Switch to monotonic

Small tweaks
Add basic memory logging
2026-02-17 15:55:45 +00:00 · 2025-03-07 19:04:47 -08:00 · 2025-03-07 15:53:22 -08:00 · 2025-03-07 15:46:14 -08:00
366 changed files with 4233 additions and 15236 deletions
--- a/.github/workflows/pr-python-connector-tests.yml
+++ b/.github/workflows/pr-python-connector-tests.yml
@@ -9,10 +9,6 @@ on:
    - cron: "0 16 * * *"

 env:
-  # AWS
-  AWS_ACCESS_KEY_ID_DAILY_CONNECTOR_TESTS: ${{ secrets.AWS_ACCESS_KEY_ID_DAILY_CONNECTOR_TESTS }}
-  AWS_SECRET_ACCESS_KEY_DAILY_CONNECTOR_TESTS: ${{ secrets.AWS_SECRET_ACCESS_KEY_DAILY_CONNECTOR_TESTS }}
-  
  # Confluence
  CONFLUENCE_TEST_SPACE_URL: ${{ secrets.CONFLUENCE_TEST_SPACE_URL }}
  CONFLUENCE_TEST_SPACE: ${{ secrets.CONFLUENCE_TEST_SPACE }}
@@ -49,16 +45,9 @@ env:
  SHAREPOINT_CLIENT_SECRET: ${{ secrets.SHAREPOINT_CLIENT_SECRET }}
  SHAREPOINT_CLIENT_DIRECTORY_ID: ${{ secrets.SHAREPOINT_CLIENT_DIRECTORY_ID }}
  SHAREPOINT_SITE: ${{ secrets.SHAREPOINT_SITE }}
-  # Github
-  ACCESS_TOKEN_GITHUB: ${{ secrets.ACCESS_TOKEN_GITHUB }}
  # Gitbook
  GITBOOK_SPACE_ID: ${{ secrets.GITBOOK_SPACE_ID }}
  GITBOOK_API_KEY: ${{ secrets.GITBOOK_API_KEY }}
-  # Notion
-  NOTION_INTEGRATION_TOKEN: ${{ secrets.NOTION_INTEGRATION_TOKEN }}
-  # Highspot
-  HIGHSPOT_KEY: ${{ secrets.HIGHSPOT_KEY }}
-  HIGHSPOT_SECRET: ${{ secrets.HIGHSPOT_SECRET }}

 jobs:
  connectors-check:
--- a/README.md
+++ b/README.md
@@ -114,4 +114,3 @@ To try the Onyx Enterprise Edition:

 ## 💡 Contributing
 Looking to contribute? Please check out the [Contribution Guide](CONTRIBUTING.md) for more details.
-
--- a/backend/Dockerfile
+++ b/backend/Dockerfile
@@ -8,7 +8,7 @@ Edition features outside of personal development or testing purposes. Please rea
 founders@onyx.app for more information. Please visit https://github.com/onyx-dot-app/onyx"

 # Default ONYX_VERSION, typically overriden during builds by GitHub Actions.
-ARG ONYX_VERSION=0.0.0-dev
+ARG ONYX_VERSION=0.8-dev
 # DO_NOT_TRACK is used to disable telemetry for Unstructured
 ENV ONYX_VERSION=${ONYX_VERSION} \
    DANSWER_RUNNING_IN_DOCKER="true" \
@@ -102,7 +102,6 @@ COPY ./alembic /app/alembic
 COPY ./alembic_tenants /app/alembic_tenants
 COPY ./alembic.ini /app/alembic.ini
 COPY supervisord.conf /usr/etc/supervisord.conf
-COPY ./static /app/static

 # Escape hatch scripts
 COPY ./scripts/debugging /app/scripts/debugging
--- a/backend/Dockerfile.model_server
+++ b/backend/Dockerfile.model_server
@@ -7,7 +7,7 @@ You can find it at https://hub.docker.com/r/onyx/onyx-model-server. For more det
 visit https://github.com/onyx-dot-app/onyx."

 # Default ONYX_VERSION, typically overriden during builds by GitHub Actions.
-ARG ONYX_VERSION=0.0.0-dev
+ARG ONYX_VERSION=0.8-dev
 ENV ONYX_VERSION=${ONYX_VERSION} \
    DANSWER_RUNNING_IN_DOCKER="true"

@@ -31,8 +31,7 @@ RUN python -c "from transformers import AutoTokenizer; \
 AutoTokenizer.from_pretrained('distilbert-base-uncased'); \
 AutoTokenizer.from_pretrained('mixedbread-ai/mxbai-rerank-xsmall-v1'); \
 from huggingface_hub import snapshot_download; \
-snapshot_download(repo_id='onyx-dot-app/hybrid-intent-token-classifier'); \
-snapshot_download(repo_id='onyx-dot-app/information-content-model'); \
+snapshot_download(repo_id='danswer/hybrid-intent-token-classifier', revision='v1.0.3'); \
 snapshot_download('nomic-ai/nomic-embed-text-v1'); \
 snapshot_download('mixedbread-ai/mxbai-rerank-xsmall-v1'); \
 from sentence_transformers import SentenceTransformer; \
--- a/backend/alembic.ini
+++ b/backend/alembic.ini
@@ -84,7 +84,7 @@ keys = console
 keys = generic

 [logger_root]
-level = INFO
+level = WARN
 handlers = console
 qualname =

--- a/backend/alembic/env.py
+++ b/backend/alembic/env.py
@@ -25,9 +25,6 @@ from shared_configs.configs import MULTI_TENANT, POSTGRES_DEFAULT_SCHEMA
 from onyx.db.models import Base
 from celery.backends.database.session import ResultModelBase  # type: ignore

-# Make sure in alembic.ini [logger_root] level=INFO is set or most logging will be
-# hidden! (defaults to level=WARN)
-
 # Alembic Config object
 config = context.config

@@ -39,7 +36,6 @@ if config.config_file_name is not None and config.attributes.get(
 target_metadata = [Base.metadata, ResultModelBase.metadata]

 EXCLUDE_TABLES = {"kombu_queue", "kombu_message"}
-
 logger = logging.getLogger(__name__)

 ssl_context: ssl.SSLContext | None = None
@@ -68,7 +64,7 @@ def include_object(
    return True


-def get_schema_options() -> tuple[str, bool, bool, bool]:
+def get_schema_options() -> tuple[str, bool, bool]:
    x_args_raw = context.get_x_argument()
    x_args = {}
    for arg in x_args_raw:
@@ -80,10 +76,6 @@ def get_schema_options() -> tuple[str, bool, bool, bool]:
    create_schema = x_args.get("create_schema", "true").lower() == "true"
    upgrade_all_tenants = x_args.get("upgrade_all_tenants", "false").lower() == "true"

-    # continue on error with individual tenant
-    # only applies to online migrations
-    continue_on_error = x_args.get("continue", "false").lower() == "true"
-
    if (
        MULTI_TENANT
        and schema_name == POSTGRES_DEFAULT_SCHEMA
@@ -94,12 +86,14 @@ def get_schema_options() -> tuple[str, bool, bool, bool]:
            "Please specify a tenant-specific schema."
        )

-    return schema_name, create_schema, upgrade_all_tenants, continue_on_error
+    return schema_name, create_schema, upgrade_all_tenants


 def do_run_migrations(
    connection: Connection, schema_name: str, create_schema: bool
 ) -> None:
+    logger.info(f"About to migrate schema: {schema_name}")
+
    if create_schema:
        connection.execute(text(f'CREATE SCHEMA IF NOT EXISTS "{schema_name}"'))
        connection.execute(text("COMMIT"))
@@ -140,12 +134,7 @@ def provide_iam_token_for_alembic(


 async def run_async_migrations() -> None:
-    (
-        schema_name,
-        create_schema,
-        upgrade_all_tenants,
-        continue_on_error,
-    ) = get_schema_options()
+    schema_name, create_schema, upgrade_all_tenants = get_schema_options()

    engine = create_async_engine(
        build_connection_string(),
@@ -162,15 +151,9 @@ async def run_async_migrations() -> None:

    if upgrade_all_tenants:
        tenant_schemas = get_all_tenant_ids()
-
-        i_tenant = 0
-        num_tenants = len(tenant_schemas)
        for schema in tenant_schemas:
-            i_tenant += 1
-            logger.info(
-                f"Migrating schema: index={i_tenant} num_tenants={num_tenants} schema={schema}"
-            )
            try:
+                logger.info(f"Migrating schema: {schema}")
                async with engine.connect() as connection:
                    await connection.run_sync(
                        do_run_migrations,
@@ -179,12 +162,7 @@ async def run_async_migrations() -> None:
                    )
            except Exception as e:
                logger.error(f"Error migrating schema {schema}: {e}")
-                if not continue_on_error:
-                    logger.error("--continue is not set, raising exception!")
-                    raise
-
-                logger.warning("--continue is set, continuing to next schema.")
-
+                raise
    else:
        try:
            logger.info(f"Migrating schema: {schema_name}")
@@ -202,11 +180,7 @@ async def run_async_migrations() -> None:


 def run_migrations_offline() -> None:
-    """This doesn't really get used when we migrate in the cloud."""
-
-    logger.info("run_migrations_offline starting.")
-
-    schema_name, _, upgrade_all_tenants, continue_on_error = get_schema_options()
+    schema_name, _, upgrade_all_tenants = get_schema_options()
    url = build_connection_string()

    if upgrade_all_tenants:
@@ -256,7 +230,6 @@ def run_migrations_offline() -> None:


 def run_migrations_online() -> None:
-    logger.info("run_migrations_online starting.")
    asyncio.run(run_async_migrations())


--- a/backend/alembic/versions/3781a5eb12cb_add_chunk_stats_table.py
+++ b/backend/alembic/versions/3781a5eb12cb_add_chunk_stats_table.py
@@ -1,51 +0,0 @@
-"""add chunk stats table
-
-Revision ID: 3781a5eb12cb
-Revises: df46c75b714e
-Create Date: 2025-03-10 10:02:30.586666
-
-"""
-from alembic import op
-import sqlalchemy as sa
-
-# revision identifiers, used by Alembic.
-revision = "3781a5eb12cb"
-down_revision = "df46c75b714e"
-branch_labels = None
-depends_on = None
-
-
-def upgrade() -> None:
-    op.create_table(
-        "chunk_stats",
-        sa.Column("id", sa.String(), primary_key=True, index=True),
-        sa.Column(
-            "document_id",
-            sa.String(),
-            sa.ForeignKey("document.id"),
-            nullable=False,
-            index=True,
-        ),
-        sa.Column("chunk_in_doc_id", sa.Integer(), nullable=False),
-        sa.Column("information_content_boost", sa.Float(), nullable=True),
-        sa.Column(
-            "last_modified",
-            sa.DateTime(timezone=True),
-            nullable=False,
-            index=True,
-            server_default=sa.func.now(),
-        ),
-        sa.Column("last_synced", sa.DateTime(timezone=True), nullable=True, index=True),
-        sa.UniqueConstraint(
-            "document_id", "chunk_in_doc_id", name="uq_chunk_stats_doc_chunk"
-        ),
-    )
-
-    op.create_index(
-        "ix_chunk_sync_status", "chunk_stats", ["last_modified", "last_synced"]
-    )
-
-
-def downgrade() -> None:
-    op.drop_index("ix_chunk_sync_status", table_name="chunk_stats")
-    op.drop_table("chunk_stats")
--- a/backend/alembic/versions/3bd4c84fe72f_improved_index.py
+++ b/backend/alembic/versions/3bd4c84fe72f_improved_index.py
@@ -28,20 +28,6 @@ depends_on = None


 def upgrade() -> None:
-    # First, drop any existing indexes to avoid conflicts
-    op.execute("COMMIT")
-    op.execute("DROP INDEX CONCURRENTLY IF EXISTS idx_chat_message_tsv;")
-
-    op.execute("COMMIT")
-    op.execute("DROP INDEX CONCURRENTLY IF EXISTS idx_chat_session_desc_tsv;")
-
-    op.execute("COMMIT")
-    op.execute("DROP INDEX IF EXISTS idx_chat_message_message_lower;")
-
-    # Drop existing columns if they exist
-    op.execute("ALTER TABLE chat_message DROP COLUMN IF EXISTS message_tsv;")
-    op.execute("ALTER TABLE chat_session DROP COLUMN IF EXISTS description_tsv;")
-
    # Create a GIN index for full-text search on chat_message.message
    op.execute(
        """
--- a/backend/alembic/versions/4d58345da04a_lowercase_user_emails.py
+++ b/backend/alembic/versions/4d58345da04a_lowercase_user_emails.py
@@ -5,10 +5,7 @@ Revises: f1ca58b2f2ec
 Create Date: 2025-01-29 07:48:46.784041

 """
-import logging
-from typing import cast
 from alembic import op
-from sqlalchemy.exc import IntegrityError
 from sqlalchemy.sql import text


@@ -18,45 +15,21 @@ down_revision = "f1ca58b2f2ec"
 branch_labels = None
 depends_on = None

-logger = logging.getLogger("alembic.runtime.migration")
-

 def upgrade() -> None:
-    """Conflicts on lowercasing will result in the uppercased email getting a
-    unique integer suffix when converted to lowercase."""
-
+    # Get database connection
    connection = op.get_bind()

-    # Fetch all user emails that are not already lowercase
-    user_emails = connection.execute(
-        text('SELECT id, email FROM "user" WHERE email != LOWER(email)')
-    ).fetchall()
-
-    for user_id, email in user_emails:
-        email = cast(str, email)
-        username, domain = email.rsplit("@", 1)
-        new_email = f"{username.lower()}@{domain.lower()}"
-        attempt = 1
-
-        while True:
-            try:
-                # Try updating the email
-                connection.execute(
-                    text('UPDATE "user" SET email = :new_email WHERE id = :user_id'),
-                    {"new_email": new_email, "user_id": user_id},
-                )
-                break  # Success, exit loop
-            except IntegrityError:
-                next_email = f"{username.lower()}_{attempt}@{domain.lower()}"
-                # Email conflict occurred, append `_1`, `_2`, etc., to the username
-                logger.warning(
-                    f"Conflict while lowercasing email: "
-                    f"old_email={email} "
-                    f"conflicting_email={new_email} "
-                    f"next_email={next_email}"
-                )
-                new_email = next_email
-                attempt += 1
+    # Update all user emails to lowercase
+    connection.execute(
+        text(
+            """
+            UPDATE "user"
+            SET email = LOWER(email)
+            WHERE email != LOWER(email)
+            """
+        )
+    )


 def downgrade() -> None:
--- a/backend/alembic/versions/df46c75b714e_add_default_vision_provider_to_llm_.py
+++ b/backend/alembic/versions/df46c75b714e_add_default_vision_provider_to_llm_.py
@@ -1,36 +0,0 @@
-"""add_default_vision_provider_to_llm_provider
-
-Revision ID: df46c75b714e
-Revises: 3934b1bc7b62
-Create Date: 2025-03-11 16:20:19.038945
-
-"""
-from alembic import op
-import sqlalchemy as sa
-
-
-# revision identifiers, used by Alembic.
-revision = "df46c75b714e"
-down_revision = "3934b1bc7b62"
-branch_labels = None
-depends_on = None
-
-
-def upgrade() -> None:
-    op.add_column(
-        "llm_provider",
-        sa.Column(
-            "is_default_vision_provider",
-            sa.Boolean(),
-            nullable=True,
-            server_default=sa.false(),
-        ),
-    )
-    op.add_column(
-        "llm_provider", sa.Column("default_vision_model", sa.String(), nullable=True)
-    )
-
-
-def downgrade() -> None:
-    op.drop_column("llm_provider", "default_vision_model")
-    op.drop_column("llm_provider", "is_default_vision_provider")
--- a/backend/alembic_tenants/versions/3b45e0018bf1_add_new_available_tenant_table.py
+++ b/backend/alembic_tenants/versions/3b45e0018bf1_add_new_available_tenant_table.py
@@ -1,33 +0,0 @@
-"""add new available tenant table
-
-Revision ID: 3b45e0018bf1
-Revises: ac842f85f932
-Create Date: 2025-03-06 09:55:18.229910
-
-"""
-import sqlalchemy as sa
-
-from alembic import op
-
-
-# revision identifiers, used by Alembic.
-revision = "3b45e0018bf1"
-down_revision = "ac842f85f932"
-branch_labels = None
-depends_on = None
-
-
-def upgrade() -> None:
-    # Create new_available_tenant table
-    op.create_table(
-        "available_tenant",
-        sa.Column("tenant_id", sa.String(), nullable=False),
-        sa.Column("alembic_version", sa.String(), nullable=False),
-        sa.Column("date_created", sa.DateTime(), nullable=False),
-        sa.PrimaryKeyConstraint("tenant_id"),
-    )
-
-
-def downgrade() -> None:
-    # Drop new_available_tenant table
-    op.drop_table("available_tenant")
--- a/backend/alembic_tenants/versions/ac842f85f932_new_column_user_tenant_mapping.py
+++ b/backend/alembic_tenants/versions/ac842f85f932_new_column_user_tenant_mapping.py
@@ -1,51 +0,0 @@
-"""new column user tenant mapping
-
-Revision ID: ac842f85f932
-Revises: 34e3630c7f32
-Create Date: 2025-03-03 13:30:14.802874
-
-"""
-import sqlalchemy as sa
-
-from alembic import op
-
-
-# revision identifiers, used by Alembic.
-revision = "ac842f85f932"
-down_revision = "34e3630c7f32"
-branch_labels = None
-depends_on = None
-
-
-def upgrade() -> None:
-    # Add active column with default value of True
-    op.add_column(
-        "user_tenant_mapping",
-        sa.Column(
-            "active",
-            sa.Boolean(),
-            nullable=False,
-            server_default="true",
-        ),
-        schema="public",
-    )
-
-    op.drop_constraint("uq_email", "user_tenant_mapping", schema="public")
-
-    # Create a unique index for active=true records
-    # This ensures a user can only be active in one tenant at a time
-    op.execute(
-        "CREATE UNIQUE INDEX uq_user_active_email_idx ON public.user_tenant_mapping (email) WHERE active = true"
-    )
-
-
-def downgrade() -> None:
-    # Drop the unique index for active=true records
-    op.execute("DROP INDEX IF EXISTS uq_user_active_email_idx")
-
-    op.create_unique_constraint(
-        "uq_email", "user_tenant_mapping", ["email"], schema="public"
-    )
-
-    # Remove the active column
-    op.drop_column("user_tenant_mapping", "active", schema="public")
--- a/backend/ee/onyx/access/access.py
+++ b/backend/ee/onyx/access/access.py
@@ -93,12 +93,12 @@ def _get_access_for_documents(
        )

        # To avoid collisions of group namings between connectors, they need to be prefixed
-        access_map[document_id] = DocumentAccess.build(
-            user_emails=list(non_ee_access.user_emails),
-            user_groups=user_group_info.get(document_id, []),
+        access_map[document_id] = DocumentAccess(
+            user_emails=non_ee_access.user_emails,
+            user_groups=set(user_group_info.get(document_id, [])),
            is_public=is_public_anywhere,
-            external_user_emails=list(ext_u_emails),
-            external_user_group_ids=list(ext_u_groups),
+            external_user_emails=ext_u_emails,
+            external_user_group_ids=ext_u_groups,
        )
    return access_map

--- a/backend/ee/onyx/chat/process_message.py
+++ b/backend/ee/onyx/chat/process_message.py
@@ -2,6 +2,7 @@ from ee.onyx.server.query_and_chat.models import OneShotQAResponse
 from onyx.chat.models import AllCitations
 from onyx.chat.models import LLMRelevanceFilterResponse
 from onyx.chat.models import OnyxAnswerPiece
+from onyx.chat.models import OnyxContexts
 from onyx.chat.models import QADocsResponse
 from onyx.chat.models import StreamingError
 from onyx.chat.process_message import ChatPacketStream
@@ -31,6 +32,8 @@ def gather_stream_for_answer_api(
            response.llm_selected_doc_indices = packet.llm_selected_doc_indices
        elif isinstance(packet, AllCitations):
            response.citations = packet.citations
+        elif isinstance(packet, OnyxContexts):
+            response.contexts = packet

    if answer:
        response.answer = answer
--- a/backend/ee/onyx/configs/app_configs.py
+++ b/backend/ee/onyx/configs/app_configs.py
@@ -25,10 +25,6 @@ SAML_CONF_DIR = os.environ.get("SAML_CONF_DIR") or "/app/ee/onyx/configs/saml_co
 #####
 # Auto Permission Sync
 #####
-DEFAULT_PERMISSION_DOC_SYNC_FREQUENCY = int(
-    os.environ.get("DEFAULT_PERMISSION_DOC_SYNC_FREQUENCY") or 5 * 60
-)
-
 # In seconds, default is 5 minutes
 CONFLUENCE_PERMISSION_GROUP_SYNC_FREQUENCY = int(
    os.environ.get("CONFLUENCE_PERMISSION_GROUP_SYNC_FREQUENCY") or 5 * 60
@@ -43,7 +39,6 @@ CONFLUENCE_ANONYMOUS_ACCESS_IS_PUBLIC = (
 CONFLUENCE_PERMISSION_DOC_SYNC_FREQUENCY = int(
    os.environ.get("CONFLUENCE_PERMISSION_DOC_SYNC_FREQUENCY") or 5 * 60
 )
-
 NUM_PERMISSION_WORKERS = int(os.environ.get("NUM_PERMISSION_WORKERS") or 2)


@@ -77,13 +72,6 @@ OAUTH_GOOGLE_DRIVE_CLIENT_SECRET = os.environ.get(
    "OAUTH_GOOGLE_DRIVE_CLIENT_SECRET", ""
 )

-GOOGLE_DRIVE_PERMISSION_GROUP_SYNC_FREQUENCY = int(
-    os.environ.get("GOOGLE_DRIVE_PERMISSION_GROUP_SYNC_FREQUENCY") or 5 * 60
-)
-
-SLACK_PERMISSION_DOC_SYNC_FREQUENCY = int(
-    os.environ.get("SLACK_PERMISSION_DOC_SYNC_FREQUENCY") or 5 * 60
-)

 # The posthog client does not accept empty API keys or hosts however it fails silently
 # when the capture is called. These defaults prevent Posthog issues from breaking the Onyx app
--- a/backend/ee/onyx/external_permissions/confluence/doc_sync.py
+++ b/backend/ee/onyx/external_permissions/confluence/doc_sync.py
@@ -2,7 +2,6 @@
 Rules defined here:
 https://confluence.atlassian.com/conf85/check-who-can-view-a-page-1283360557.html
 """
-from collections.abc import Generator
 from typing import Any

 from ee.onyx.configs.app_configs import CONFLUENCE_ANONYMOUS_ACCESS_IS_PUBLIC
@@ -264,11 +263,13 @@ def _fetch_all_page_restrictions(
    space_permissions_by_space_key: dict[str, ExternalAccess],
    is_cloud: bool,
    callback: IndexingHeartbeatInterface | None,
-) -> Generator[DocExternalAccess, None, None]:
+) -> list[DocExternalAccess]:
    """
    For all pages, if a page has restrictions, then use those restrictions.
    Otherwise, use the space's restrictions.
    """
+    document_restrictions: list[DocExternalAccess] = []
+
    for slim_doc in slim_docs:
        if callback:
            if callback.should_stop():
@@ -285,9 +286,11 @@ def _fetch_all_page_restrictions(
            confluence_client=confluence_client,
            perm_sync_data=slim_doc.perm_sync_data,
        ):
-            yield DocExternalAccess(
-                doc_id=slim_doc.id,
-                external_access=restrictions,
+            document_restrictions.append(
+                DocExternalAccess(
+                    doc_id=slim_doc.id,
+                    external_access=restrictions,
+                )
            )
            # If there are restrictions, then we don't need to use the space's restrictions
            continue
@@ -321,9 +324,11 @@ def _fetch_all_page_restrictions(
            continue

        # If there are no restrictions, then use the space's restrictions
-        yield DocExternalAccess(
-            doc_id=slim_doc.id,
-            external_access=space_permissions,
+        document_restrictions.append(
+            DocExternalAccess(
+                doc_id=slim_doc.id,
+                external_access=space_permissions,
+            )
        )
        if (
            not space_permissions.is_public
@@ -337,12 +342,13 @@ def _fetch_all_page_restrictions(
            )

    logger.debug("Finished fetching all page restrictions for space")
+    return document_restrictions


 def confluence_doc_sync(
    cc_pair: ConnectorCredentialPair,
    callback: IndexingHeartbeatInterface | None,
-) -> Generator[DocExternalAccess, None, None]:
+) -> list[DocExternalAccess]:
    """
    Adds the external permissions to the documents in postgres
    if the document doesn't already exists in postgres, we create
@@ -381,7 +387,7 @@ def confluence_doc_sync(
        slim_docs.extend(doc_batch)

    logger.debug("Fetching all page restrictions for space")
-    yield from _fetch_all_page_restrictions(
+    return _fetch_all_page_restrictions(
        confluence_client=confluence_connector.confluence_client,
        slim_docs=slim_docs,
        space_permissions_by_space_key=space_permissions_by_space_key,
--- a/backend/ee/onyx/external_permissions/gmail/doc_sync.py
+++ b/backend/ee/onyx/external_permissions/gmail/doc_sync.py
@@ -1,4 +1,3 @@
-from collections.abc import Generator
 from datetime import datetime
 from datetime import timezone

@@ -35,7 +34,7 @@ def _get_slim_doc_generator(
 def gmail_doc_sync(
    cc_pair: ConnectorCredentialPair,
    callback: IndexingHeartbeatInterface | None,
-) -> Generator[DocExternalAccess, None, None]:
+) -> list[DocExternalAccess]:
    """
    Adds the external permissions to the documents in postgres
    if the document doesn't already exists in postgres, we create
@@ -49,6 +48,7 @@ def gmail_doc_sync(
        cc_pair, gmail_connector, callback=callback
    )

+    document_external_access: list[DocExternalAccess] = []
    for slim_doc_batch in slim_doc_generator:
        for slim_doc in slim_doc_batch:
            if callback:
@@ -60,14 +60,17 @@ def gmail_doc_sync(
            if slim_doc.perm_sync_data is None:
                logger.warning(f"No permissions found for document {slim_doc.id}")
                continue
-
            if user_email := slim_doc.perm_sync_data.get("user_email"):
                ext_access = ExternalAccess(
                    external_user_emails=set([user_email]),
                    external_user_group_ids=set(),
                    is_public=False,
                )
-                yield DocExternalAccess(
-                    doc_id=slim_doc.id,
-                    external_access=ext_access,
+                document_external_access.append(
+                    DocExternalAccess(
+                        doc_id=slim_doc.id,
+                        external_access=ext_access,
+                    )
                )
+
+    return document_external_access
--- a/backend/ee/onyx/external_permissions/google_drive/doc_sync.py
+++ b/backend/ee/onyx/external_permissions/google_drive/doc_sync.py
@@ -1,4 +1,3 @@
-from collections.abc import Generator
 from datetime import datetime
 from datetime import timezone
 from typing import Any
@@ -148,7 +147,7 @@ def _get_permissions_from_slim_doc(
 def gdrive_doc_sync(
    cc_pair: ConnectorCredentialPair,
    callback: IndexingHeartbeatInterface | None,
-) -> Generator[DocExternalAccess, None, None]:
+) -> list[DocExternalAccess]:
    """
    Adds the external permissions to the documents in postgres
    if the document doesn't already exists in postgres, we create
@@ -162,6 +161,7 @@ def gdrive_doc_sync(

    slim_doc_generator = _get_slim_doc_generator(cc_pair, google_drive_connector)

+    document_external_accesses = []
    for slim_doc_batch in slim_doc_generator:
        for slim_doc in slim_doc_batch:
            if callback:
@@ -174,7 +174,10 @@ def gdrive_doc_sync(
                google_drive_connector=google_drive_connector,
                slim_doc=slim_doc,
            )
-            yield DocExternalAccess(
-                external_access=ext_access,
-                doc_id=slim_doc.id,
+            document_external_accesses.append(
+                DocExternalAccess(
+                    external_access=ext_access,
+                    doc_id=slim_doc.id,
+                )
            )
+    return document_external_accesses
--- a/backend/ee/onyx/external_permissions/slack/doc_sync.py
+++ b/backend/ee/onyx/external_permissions/slack/doc_sync.py
@@ -1,5 +1,3 @@
-from collections.abc import Generator
-
 from slack_sdk import WebClient

 from ee.onyx.external_permissions.slack.utils import fetch_user_id_to_email_map
@@ -16,6 +14,35 @@ from onyx.utils.logger import setup_logger
 logger = setup_logger()


+def _get_slack_document_ids_and_channels(
+    cc_pair: ConnectorCredentialPair, callback: IndexingHeartbeatInterface | None
+) -> dict[str, list[str]]:
+    slack_connector = SlackConnector(**cc_pair.connector.connector_specific_config)
+    slack_connector.load_credentials(cc_pair.credential.credential_json)
+
+    slim_doc_generator = slack_connector.retrieve_all_slim_documents(callback=callback)
+
+    channel_doc_map: dict[str, list[str]] = {}
+    for doc_metadata_batch in slim_doc_generator:
+        for doc_metadata in doc_metadata_batch:
+            if doc_metadata.perm_sync_data is None:
+                continue
+            channel_id = doc_metadata.perm_sync_data["channel_id"]
+            if channel_id not in channel_doc_map:
+                channel_doc_map[channel_id] = []
+            channel_doc_map[channel_id].append(doc_metadata.id)
+
+        if callback:
+            if callback.should_stop():
+                raise RuntimeError(
+                    "_get_slack_document_ids_and_channels: Stop signal detected"
+                )
+
+            callback.progress("_get_slack_document_ids_and_channels", 1)
+
+    return channel_doc_map
+
+
 def _fetch_workspace_permissions(
    user_id_to_email_map: dict[str, str],
 ) -> ExternalAccess:
@@ -95,37 +122,10 @@ def _fetch_channel_permissions(
    return channel_permissions


-def _get_slack_document_access(
-    cc_pair: ConnectorCredentialPair,
-    channel_permissions: dict[str, ExternalAccess],
-    callback: IndexingHeartbeatInterface | None,
-) -> Generator[DocExternalAccess, None, None]:
-    slack_connector = SlackConnector(**cc_pair.connector.connector_specific_config)
-    slack_connector.load_credentials(cc_pair.credential.credential_json)
-
-    slim_doc_generator = slack_connector.retrieve_all_slim_documents(callback=callback)
-
-    for doc_metadata_batch in slim_doc_generator:
-        for doc_metadata in doc_metadata_batch:
-            if doc_metadata.perm_sync_data is None:
-                continue
-            channel_id = doc_metadata.perm_sync_data["channel_id"]
-            yield DocExternalAccess(
-                external_access=channel_permissions[channel_id],
-                doc_id=doc_metadata.id,
-            )
-
-        if callback:
-            if callback.should_stop():
-                raise RuntimeError("_get_slack_document_access: Stop signal detected")
-
-            callback.progress("_get_slack_document_access", 1)
-
-
 def slack_doc_sync(
    cc_pair: ConnectorCredentialPair,
    callback: IndexingHeartbeatInterface | None,
-) -> Generator[DocExternalAccess, None, None]:
+) -> list[DocExternalAccess]:
    """
    Adds the external permissions to the documents in postgres
    if the document doesn't already exists in postgres, we create
@@ -136,12 +136,9 @@ def slack_doc_sync(
        token=cc_pair.credential.credential_json["slack_bot_token"]
    )
    user_id_to_email_map = fetch_user_id_to_email_map(slack_client)
-    if not user_id_to_email_map:
-        raise ValueError(
-            "No user id to email map found. Please check to make sure that "
-            "your Slack bot token has the `users:read.email` scope"
-        )
-
+    channel_doc_map = _get_slack_document_ids_and_channels(
+        cc_pair=cc_pair, callback=callback
+    )
    workspace_permissions = _fetch_workspace_permissions(
        user_id_to_email_map=user_id_to_email_map,
    )
@@ -151,8 +148,18 @@ def slack_doc_sync(
        user_id_to_email_map=user_id_to_email_map,
    )

-    yield from _get_slack_document_access(
-        cc_pair=cc_pair,
-        channel_permissions=channel_permissions,
-        callback=callback,
-    )
+    document_external_accesses = []
+    for channel_id, ext_access in channel_permissions.items():
+        doc_ids = channel_doc_map.get(channel_id)
+        if not doc_ids:
+            # No documents found for channel the channel_id
+            continue
+
+        for doc_id in doc_ids:
+            document_external_accesses.append(
+                DocExternalAccess(
+                    external_access=ext_access,
+                    doc_id=doc_id,
+                )
+            )
+    return document_external_accesses
--- a/backend/ee/onyx/external_permissions/sync_params.py
+++ b/backend/ee/onyx/external_permissions/sync_params.py
@@ -1,10 +1,7 @@
 from collections.abc import Callable
-from collections.abc import Generator

 from ee.onyx.configs.app_configs import CONFLUENCE_PERMISSION_DOC_SYNC_FREQUENCY
 from ee.onyx.configs.app_configs import CONFLUENCE_PERMISSION_GROUP_SYNC_FREQUENCY
-from ee.onyx.configs.app_configs import GOOGLE_DRIVE_PERMISSION_GROUP_SYNC_FREQUENCY
-from ee.onyx.configs.app_configs import SLACK_PERMISSION_DOC_SYNC_FREQUENCY
 from ee.onyx.db.external_perm import ExternalUserGroup
 from ee.onyx.external_permissions.confluence.doc_sync import confluence_doc_sync
 from ee.onyx.external_permissions.confluence.group_sync import confluence_group_sync
@@ -26,7 +23,7 @@ DocSyncFuncType = Callable[
        ConnectorCredentialPair,
        IndexingHeartbeatInterface | None,
    ],
-    Generator[DocExternalAccess, None, None],
+    list[DocExternalAccess],
 ]

 GroupSyncFuncType = Callable[
@@ -68,13 +65,13 @@ GROUP_PERMISSIONS_IS_CC_PAIR_AGNOSTIC: set[DocumentSource] = {
 DOC_PERMISSION_SYNC_PERIODS: dict[DocumentSource, int] = {
    # Polling is not supported so we fetch all doc permissions every 5 minutes
    DocumentSource.CONFLUENCE: CONFLUENCE_PERMISSION_DOC_SYNC_FREQUENCY,
-    DocumentSource.SLACK: SLACK_PERMISSION_DOC_SYNC_FREQUENCY,
+    DocumentSource.SLACK: 5 * 60,
 }

 # If nothing is specified here, we run the doc_sync every time the celery beat runs
 EXTERNAL_GROUP_SYNC_PERIODS: dict[DocumentSource, int] = {
    # Polling is not supported so we fetch all group permissions every 30 minutes
-    DocumentSource.GOOGLE_DRIVE: GOOGLE_DRIVE_PERMISSION_GROUP_SYNC_FREQUENCY,
+    DocumentSource.GOOGLE_DRIVE: 5 * 60,
    DocumentSource.CONFLUENCE: CONFLUENCE_PERMISSION_GROUP_SYNC_FREQUENCY,
 }

--- a/backend/ee/onyx/main.py
+++ b/backend/ee/onyx/main.py
@@ -64,15 +64,7 @@ def get_application() -> FastAPI:
        add_tenant_id_middleware(application, logger)

    if AUTH_TYPE == AuthType.CLOUD:
-        # For Google OAuth, refresh tokens are requested by:
-        # 1. Adding the right scopes
-        # 2. Properly configuring OAuth in Google Cloud Console to allow offline access
-        oauth_client = GoogleOAuth2(
-            OAUTH_CLIENT_ID,
-            OAUTH_CLIENT_SECRET,
-            # Use standard scopes that include profile and email
-            scopes=["openid", "email", "profile"],
-        )
+        oauth_client = GoogleOAuth2(OAUTH_CLIENT_ID, OAUTH_CLIENT_SECRET)
        include_auth_router_with_prefix(
            application,
            create_onyx_oauth_router(
@@ -95,16 +87,6 @@ def get_application() -> FastAPI:
        )

    if AUTH_TYPE == AuthType.OIDC:
-        # Ensure we request offline_access for refresh tokens
-        try:
-            oidc_scopes = list(OIDC_SCOPE_OVERRIDE or BASE_SCOPES)
-            if "offline_access" not in oidc_scopes:
-                oidc_scopes.append("offline_access")
-        except Exception as e:
-            logger.warning(f"Error configuring OIDC scopes: {e}")
-            # Fall back to default scopes if there's an error
-            oidc_scopes = BASE_SCOPES
-
        include_auth_router_with_prefix(
            application,
            create_onyx_oauth_router(
@@ -112,8 +94,8 @@ def get_application() -> FastAPI:
                    OAUTH_CLIENT_ID,
                    OAUTH_CLIENT_SECRET,
                    OPENID_CONFIG_URL,
-                    # Use the configured scopes
-                    base_scopes=oidc_scopes,
+                    # BASE_SCOPES is the same as not setting this
+                    base_scopes=OIDC_SCOPE_OVERRIDE or BASE_SCOPES,
                ),
                auth_backend,
                USER_AUTH_SECRET,
--- a/backend/ee/onyx/server/enterprise_settings/api.py
+++ b/backend/ee/onyx/server/enterprise_settings/api.py
@@ -15,8 +15,8 @@ from sqlalchemy.orm import Session

 from ee.onyx.server.enterprise_settings.models import AnalyticsScriptUpload
 from ee.onyx.server.enterprise_settings.models import EnterpriseSettings
-from ee.onyx.server.enterprise_settings.store import get_logo_filename
-from ee.onyx.server.enterprise_settings.store import get_logotype_filename
+from ee.onyx.server.enterprise_settings.store import _LOGO_FILENAME
+from ee.onyx.server.enterprise_settings.store import _LOGOTYPE_FILENAME
 from ee.onyx.server.enterprise_settings.store import load_analytics_script
 from ee.onyx.server.enterprise_settings.store import load_settings
 from ee.onyx.server.enterprise_settings.store import store_analytics_script
@@ -28,7 +28,7 @@ from onyx.auth.users import get_user_manager
 from onyx.auth.users import UserManager
 from onyx.db.engine import get_session
 from onyx.db.models import User
-from onyx.file_store.file_store import PostgresBackedFileStore
+from onyx.file_store.file_store import get_default_file_store
 from onyx.utils.logger import setup_logger

 admin_router = APIRouter(prefix="/admin/enterprise-settings")
@@ -131,49 +131,31 @@ def put_logo(
    upload_logo(file=file, db_session=db_session, is_logotype=is_logotype)


-def fetch_logo_helper(db_session: Session) -> Response:
+def fetch_logo_or_logotype(is_logotype: bool, db_session: Session) -> Response:
    try:
-        file_store = PostgresBackedFileStore(db_session)
-        onyx_file = file_store.get_file_with_mime_type(get_logo_filename())
-        if not onyx_file:
-            raise ValueError("get_onyx_file returned None!")
+        file_store = get_default_file_store(db_session)
+        filename = _LOGOTYPE_FILENAME if is_logotype else _LOGO_FILENAME
+        file_io = file_store.read_file(filename, mode="b")
+        # NOTE: specifying "image/jpeg" here, but it still works for pngs
+        # TODO: do this properly
+        return Response(content=file_io.read(), media_type="image/jpeg")
    except Exception:
        raise HTTPException(
            status_code=404,
-            detail="No logo file found",
+            detail=f"No {'logotype' if is_logotype else 'logo'} file found",
        )
-    else:
-        return Response(content=onyx_file.data, media_type=onyx_file.mime_type)
-
-
-def fetch_logotype_helper(db_session: Session) -> Response:
-    try:
-        file_store = PostgresBackedFileStore(db_session)
-        onyx_file = file_store.get_file_with_mime_type(get_logotype_filename())
-        if not onyx_file:
-            raise ValueError("get_onyx_file returned None!")
-    except Exception:
-        raise HTTPException(
-            status_code=404,
-            detail="No logotype file found",
-        )
-    else:
-        return Response(content=onyx_file.data, media_type=onyx_file.mime_type)


@basic_router.get("/logotype")
 def fetch_logotype(db_session: Session = Depends(get_session)) -> Response:
-    return fetch_logotype_helper(db_session)
+    return fetch_logo_or_logotype(is_logotype=True, db_session=db_session)


@basic_router.get("/logo")
 def fetch_logo(
    is_logotype: bool = False, db_session: Session = Depends(get_session)
 ) -> Response:
-    if is_logotype:
-        return fetch_logotype_helper(db_session)
-
-    return fetch_logo_helper(db_session)
+    return fetch_logo_or_logotype(is_logotype=is_logotype, db_session=db_session)


@admin_router.put("/custom-analytics-script")
--- a/backend/ee/onyx/server/enterprise_settings/store.py
+++ b/backend/ee/onyx/server/enterprise_settings/store.py
@@ -13,7 +13,6 @@ from ee.onyx.server.enterprise_settings.models import EnterpriseSettings
 from onyx.configs.constants import FileOrigin
 from onyx.configs.constants import KV_CUSTOM_ANALYTICS_SCRIPT_KEY
 from onyx.configs.constants import KV_ENTERPRISE_SETTINGS_KEY
-from onyx.configs.constants import ONYX_DEFAULT_APPLICATION_NAME
 from onyx.file_store.file_store import get_default_file_store
 from onyx.key_value_store.factory import get_kv_store
 from onyx.key_value_store.interface import KvKeyNotFoundError
@@ -22,18 +21,8 @@ from onyx.utils.logger import setup_logger

 logger = setup_logger()

-_LOGO_FILENAME = "__logo__"
-_LOGOTYPE_FILENAME = "__logotype__"
-

 def load_settings() -> EnterpriseSettings:
-    """Loads settings data directly from DB. This should be used primarily
-    for checking what is actually in the DB, aka for editing and saving back settings.
-
-    Runtime settings actually used by the application should be checked with
-    load_runtime_settings as defaults may be applied at runtime.
-    """
-
    dynamic_config_store = get_kv_store()
    try:
        settings = EnterpriseSettings(
@@ -47,24 +36,9 @@ def load_settings() -> EnterpriseSettings:


 def store_settings(settings: EnterpriseSettings) -> None:
-    """Stores settings directly to the kv store / db."""
-
    get_kv_store().store(KV_ENTERPRISE_SETTINGS_KEY, settings.model_dump())


-def load_runtime_settings() -> EnterpriseSettings:
-    """Loads settings from DB and applies any defaults or transformations for use
-    at runtime.
-
-    Should not be stored back to the DB.
-    """
-    enterprise_settings = load_settings()
-    if not enterprise_settings.application_name:
-        enterprise_settings.application_name = ONYX_DEFAULT_APPLICATION_NAME
-
-    return enterprise_settings
-
-
 _CUSTOM_ANALYTICS_SECRET_KEY = os.environ.get("CUSTOM_ANALYTICS_SECRET_KEY")


@@ -86,6 +60,10 @@ def store_analytics_script(analytics_script_upload: AnalyticsScriptUpload) -> No
    get_kv_store().store(KV_CUSTOM_ANALYTICS_SCRIPT_KEY, analytics_script_upload.script)


+_LOGO_FILENAME = "__logo__"
+_LOGOTYPE_FILENAME = "__logotype__"
+
+
 def is_valid_file_type(filename: str) -> bool:
    valid_extensions = (".png", ".jpg", ".jpeg")
    return filename.endswith(valid_extensions)
@@ -138,11 +116,3 @@ def upload_logo(
        file_type=file_type,
    )
    return True
-
-
-def get_logo_filename() -> str:
-    return _LOGO_FILENAME
-
-
-def get_logotype_filename() -> str:
-    return _LOGOTYPE_FILENAME
--- a/backend/ee/onyx/server/query_and_chat/chat_backend.py
+++ b/backend/ee/onyx/server/query_and_chat/chat_backend.py
@@ -1,35 +1,26 @@
 import re
-from typing import cast

 from fastapi import APIRouter
 from fastapi import Depends
 from fastapi import HTTPException
 from sqlalchemy.orm import Session

-from ee.onyx.server.query_and_chat.models import AgentAnswer
-from ee.onyx.server.query_and_chat.models import AgentSubQuery
-from ee.onyx.server.query_and_chat.models import AgentSubQuestion
 from ee.onyx.server.query_and_chat.models import BasicCreateChatMessageRequest
 from ee.onyx.server.query_and_chat.models import (
    BasicCreateChatMessageWithHistoryRequest,
 )
 from ee.onyx.server.query_and_chat.models import ChatBasicResponse
+from ee.onyx.server.query_and_chat.models import SimpleDoc
 from onyx.auth.users import current_user
 from onyx.chat.chat_utils import combine_message_thread
 from onyx.chat.chat_utils import create_chat_chain
-from onyx.chat.models import AgentAnswerPiece
 from onyx.chat.models import AllCitations
-from onyx.chat.models import ExtendedToolResponse
 from onyx.chat.models import FinalUsedContextDocsResponse
 from onyx.chat.models import LlmDoc
 from onyx.chat.models import LLMRelevanceFilterResponse
 from onyx.chat.models import OnyxAnswerPiece
 from onyx.chat.models import QADocsResponse
-from onyx.chat.models import RefinedAnswerImprovement
 from onyx.chat.models import StreamingError
-from onyx.chat.models import SubQueryPiece
-from onyx.chat.models import SubQuestionIdentifier
-from onyx.chat.models import SubQuestionPiece
 from onyx.chat.process_message import ChatPacketStream
 from onyx.chat.process_message import stream_chat_message_objects
 from onyx.configs.chat_configs import CHAT_TARGET_CHUNK_PERCENTAGE
@@ -55,6 +46,25 @@ logger = setup_logger()
 router = APIRouter(prefix="/chat")


+def _translate_doc_response_to_simple_doc(
+    doc_response: QADocsResponse,
+) -> list[SimpleDoc]:
+    return [
+        SimpleDoc(
+            id=doc.document_id,
+            semantic_identifier=doc.semantic_identifier,
+            link=doc.link,
+            blurb=doc.blurb,
+            match_highlights=[
+                highlight for highlight in doc.match_highlights if highlight
+            ],
+            source_type=doc.source_type,
+            metadata=doc.metadata,
+        )
+        for doc in doc_response.top_documents
+    ]
+
+
 def _get_final_context_doc_indices(
    final_context_docs: list[LlmDoc] | None,
    top_docs: list[SavedSearchDoc] | None,
@@ -79,26 +89,14 @@ def _convert_packet_stream_to_response(
    final_context_docs: list[LlmDoc] = []

    answer = ""
-
-    # accumulate stream data with these dicts
-    agent_sub_questions: dict[tuple[int, int], AgentSubQuestion] = {}
-    agent_answers: dict[tuple[int, int], AgentAnswer] = {}
-    agent_sub_queries: dict[tuple[int, int, int], AgentSubQuery] = {}
-
    for packet in packets:
        if isinstance(packet, OnyxAnswerPiece) and packet.answer_piece:
            answer += packet.answer_piece
        elif isinstance(packet, QADocsResponse):
            response.top_documents = packet.top_documents

-            # This is a no-op if agent_sub_questions hasn't already been filled
-            if packet.level is not None and packet.level_question_num is not None:
-                id = (packet.level, packet.level_question_num)
-                if id in agent_sub_questions:
-                    agent_sub_questions[id].document_ids = [
-                        saved_search_doc.document_id
-                        for saved_search_doc in packet.top_documents
-                    ]
+            # TODO: deprecate `simple_search_docs`
+            response.simple_search_docs = _translate_doc_response_to_simple_doc(packet)
        elif isinstance(packet, StreamingError):
            response.error_msg = packet.error
        elif isinstance(packet, ChatMessageDetail):
@@ -115,104 +113,11 @@ def _convert_packet_stream_to_response(
                citation.citation_num: citation.document_id
                for citation in packet.citations
            }
-        # agentic packets
-        elif isinstance(packet, SubQuestionPiece):
-            if packet.level is not None and packet.level_question_num is not None:
-                id = (packet.level, packet.level_question_num)
-                if agent_sub_questions.get(id) is None:
-                    agent_sub_questions[id] = AgentSubQuestion(
-                        level=packet.level,
-                        level_question_num=packet.level_question_num,
-                        sub_question=packet.sub_question,
-                        document_ids=[],
-                    )
-                else:
-                    agent_sub_questions[id].sub_question += packet.sub_question
-
-        elif isinstance(packet, AgentAnswerPiece):
-            if packet.level is not None and packet.level_question_num is not None:
-                id = (packet.level, packet.level_question_num)
-                if agent_answers.get(id) is None:
-                    agent_answers[id] = AgentAnswer(
-                        level=packet.level,
-                        level_question_num=packet.level_question_num,
-                        answer=packet.answer_piece,
-                        answer_type=packet.answer_type,
-                    )
-                else:
-                    agent_answers[id].answer += packet.answer_piece
-        elif isinstance(packet, SubQueryPiece):
-            if packet.level is not None and packet.level_question_num is not None:
-                sub_query_id = (
-                    packet.level,
-                    packet.level_question_num,
-                    packet.query_id,
-                )
-                if agent_sub_queries.get(sub_query_id) is None:
-                    agent_sub_queries[sub_query_id] = AgentSubQuery(
-                        level=packet.level,
-                        level_question_num=packet.level_question_num,
-                        sub_query=packet.sub_query,
-                        query_id=packet.query_id,
-                    )
-                else:
-                    agent_sub_queries[sub_query_id].sub_query += packet.sub_query
-        elif isinstance(packet, ExtendedToolResponse):
-            # we shouldn't get this ... it gets intercepted and translated to QADocsResponse
-            logger.warning(
-                "_convert_packet_stream_to_response: Unexpected chat packet type ExtendedToolResponse!"
-            )
-        elif isinstance(packet, RefinedAnswerImprovement):
-            response.agent_refined_answer_improvement = (
-                packet.refined_answer_improvement
-            )
-        else:
-            logger.warning(
-                f"_convert_packet_stream_to_response - Unrecognized chat packet: type={type(packet)}"
-            )

    response.final_context_doc_indices = _get_final_context_doc_indices(
        final_context_docs, response.top_documents
    )

-    # organize / sort agent metadata for output
-    if len(agent_sub_questions) > 0:
-        response.agent_sub_questions = cast(
-            dict[int, list[AgentSubQuestion]],
-            SubQuestionIdentifier.make_dict_by_level(agent_sub_questions),
-        )
-
-    if len(agent_answers) > 0:
-        # return the agent_level_answer from the first level or the last one depending
-        # on agent_refined_answer_improvement
-        response.agent_answers = cast(
-            dict[int, list[AgentAnswer]],
-            SubQuestionIdentifier.make_dict_by_level(agent_answers),
-        )
-        if response.agent_answers:
-            selected_answer_level = (
-                0
-                if not response.agent_refined_answer_improvement
-                else len(response.agent_answers) - 1
-            )
-            level_answers = response.agent_answers[selected_answer_level]
-            for level_answer in level_answers:
-                if level_answer.answer_type != "agent_level_answer":
-                    continue
-
-                answer = level_answer.answer
-                break
-
-    if len(agent_sub_queries) > 0:
-        # subqueries are often emitted with trailing whitespace ... clean it up here
-        # perhaps fix at the source?
-        for v in agent_sub_queries.values():
-            v.sub_query = v.sub_query.strip()
-
-        response.agent_sub_queries = (
-            AgentSubQuery.make_dict_by_level_and_question_index(agent_sub_queries)
-        )
-
    response.answer = answer
    if answer:
        response.answer_citationless = remove_answer_citations(answer)
--- a/backend/ee/onyx/server/query_and_chat/models.py
+++ b/backend/ee/onyx/server/query_and_chat/models.py
@@ -1,5 +1,3 @@
-from collections import OrderedDict
-from typing import Literal
 from uuid import UUID

 from pydantic import BaseModel
@@ -8,9 +6,9 @@ from pydantic import model_validator

 from ee.onyx.server.manage.models import StandardAnswer
 from onyx.chat.models import CitationInfo
+from onyx.chat.models import OnyxContexts
 from onyx.chat.models import PersonaOverrideConfig
 from onyx.chat.models import QADocsResponse
-from onyx.chat.models import SubQuestionIdentifier
 from onyx.chat.models import ThreadMessage
 from onyx.configs.constants import DocumentSource
 from onyx.context.search.enums import LLMEvaluationType
@@ -90,64 +88,6 @@ class SimpleDoc(BaseModel):
    metadata: dict | None


-class AgentSubQuestion(SubQuestionIdentifier):
-    sub_question: str
-    document_ids: list[str]
-
-
-class AgentAnswer(SubQuestionIdentifier):
-    answer: str
-    answer_type: Literal["agent_sub_answer", "agent_level_answer"]
-
-
-class AgentSubQuery(SubQuestionIdentifier):
-    sub_query: str
-    query_id: int
-
-    @staticmethod
-    def make_dict_by_level_and_question_index(
-        original_dict: dict[tuple[int, int, int], "AgentSubQuery"]
-    ) -> dict[int, dict[int, list["AgentSubQuery"]]]:
-        """Takes a dict of tuple(level, question num, query_id) to sub queries.
-
-        returns a dict of level to dict[question num to list of query_id's]
-        Ordering is asc for readability.
-        """
-        # In this function, when we sort int | None, we deliberately push None to the end
-
-        # map entries to the level_question_dict
-        level_question_dict: dict[int, dict[int, list["AgentSubQuery"]]] = {}
-        for k1, obj in original_dict.items():
-            level = k1[0]
-            question = k1[1]
-
-            if level not in level_question_dict:
-                level_question_dict[level] = {}
-
-            if question not in level_question_dict[level]:
-                level_question_dict[level][question] = []
-
-            level_question_dict[level][question].append(obj)
-
-        # sort each query_id list and question_index
-        for key1, obj1 in level_question_dict.items():
-            for key2, value2 in obj1.items():
-                # sort the query_id list of each question_index
-                level_question_dict[key1][key2] = sorted(
-                    value2, key=lambda o: o.query_id
-                )
-            # sort the question_index dict of level
-            level_question_dict[key1] = OrderedDict(
-                sorted(level_question_dict[key1].items(), key=lambda x: (x is None, x))
-            )
-
-        # sort the top dict of levels
-        sorted_dict = OrderedDict(
-            sorted(level_question_dict.items(), key=lambda x: (x is None, x))
-        )
-        return sorted_dict
-
-
 class ChatBasicResponse(BaseModel):
    # This is built piece by piece, any of these can be None as the flow could break
    answer: str | None = None
@@ -163,14 +103,10 @@ class ChatBasicResponse(BaseModel):
    cited_documents: dict[int, str] | None = None

    # FOR BACKWARDS COMPATIBILITY
+    # TODO: deprecate both of these
+    simple_search_docs: list[SimpleDoc] | None = None
    llm_chunks_indices: list[int] | None = None

-    # agentic fields
-    agent_sub_questions: dict[int, list[AgentSubQuestion]] | None = None
-    agent_answers: dict[int, list[AgentAnswer]] | None = None
-    agent_sub_queries: dict[int, dict[int, list[AgentSubQuery]]] | None = None
-    agent_refined_answer_improvement: bool | None = None
-

 class OneShotQARequest(ChunkContext):
    # Supports simplier APIs that don't deal with chat histories or message edits
@@ -217,3 +153,4 @@ class OneShotQAResponse(BaseModel):
    llm_selected_doc_indices: list[int] | None = None
    error_msg: str | None = None
    chat_message_id: int | None = None
+    contexts: OnyxContexts | None = None
--- a/backend/ee/onyx/server/saml.py
+++ b/backend/ee/onyx/server/saml.py
@@ -36,12 +36,8 @@ from onyx.utils.logger import setup_logger
 logger = setup_logger()
 router = APIRouter(prefix="/auth/saml")

-# Define non-authenticated user roles that should be re-created during SAML login
-NON_AUTHENTICATED_ROLES = {UserRole.SLACK_USER, UserRole.EXT_PERM_USER}
-

 async def upsert_saml_user(email: str) -> User:
-    logger.debug(f"Attempting to upsert SAML user with email: {email}")
    get_async_session_context = contextlib.asynccontextmanager(
        get_async_session
    )  # type:ignore
@@ -52,13 +48,9 @@ async def upsert_saml_user(email: str) -> User:
        async with get_user_db_context(session) as user_db:
            async with get_user_manager_context(user_db) as user_manager:
                try:
-                    user = await user_manager.get_by_email(email)
-                    # If user has a non-authenticated role, treat as non-existent
-                    if user.role in NON_AUTHENTICATED_ROLES:
-                        raise exceptions.UserNotExists()
-                    return user
+                    return await user_manager.get_by_email(email)
                except exceptions.UserNotExists:
-                    logger.info("Creating user from SAML login")
+                    logger.notice("Creating user from SAML login")

                user_count = await get_user_count()
                role = UserRole.ADMIN if user_count == 0 else UserRole.BASIC
@@ -67,10 +59,11 @@ async def upsert_saml_user(email: str) -> User:
                password = fastapi_users_pw_helper.generate()
                hashed_pass = fastapi_users_pw_helper.hash(password)

-                user = await user_manager.create(
+                user: User = await user_manager.create(
                    UserCreate(
                        email=email,
                        password=hashed_pass,
+                        is_verified=True,
                        role=role,
                    )
                )
--- a/backend/ee/onyx/server/tenants/admin_api.py
+++ b/backend/ee/onyx/server/tenants/admin_api.py
@@ -1,45 +0,0 @@
-from fastapi import APIRouter
-from fastapi import Depends
-from fastapi import HTTPException
-from fastapi import Response
-
-from ee.onyx.auth.users import current_cloud_superuser
-from ee.onyx.server.tenants.models import ImpersonateRequest
-from ee.onyx.server.tenants.user_mapping import get_tenant_id_for_email
-from onyx.auth.users import auth_backend
-from onyx.auth.users import get_redis_strategy
-from onyx.auth.users import User
-from onyx.db.engine import get_session_with_tenant
-from onyx.db.users import get_user_by_email
-from onyx.utils.logger import setup_logger
-
-logger = setup_logger()
-
-router = APIRouter(prefix="/tenants")
-
-
-@router.post("/impersonate")
-async def impersonate_user(
-    impersonate_request: ImpersonateRequest,
-    _: User = Depends(current_cloud_superuser),
-) -> Response:
-    """Allows a cloud superuser to impersonate another user by generating an impersonation JWT token"""
-    tenant_id = get_tenant_id_for_email(impersonate_request.email)
-
-    with get_session_with_tenant(tenant_id=tenant_id) as tenant_session:
-        user_to_impersonate = get_user_by_email(
-            impersonate_request.email, tenant_session
-        )
-        if user_to_impersonate is None:
-            raise HTTPException(status_code=404, detail="User not found")
-        token = await get_redis_strategy().write_token(user_to_impersonate)
-
-    response = await auth_backend.transport.get_login_response(token)
-    response.set_cookie(
-        key="fastapiusersauth",
-        value=token,
-        httponly=True,
-        secure=True,
-        samesite="lax",
-    )
-    return response
--- a/backend/ee/onyx/server/tenants/anonymous_users_api.py
+++ b/backend/ee/onyx/server/tenants/anonymous_users_api.py
@@ -1,98 +0,0 @@
-from fastapi import APIRouter
-from fastapi import Depends
-from fastapi import HTTPException
-from fastapi import Response
-from sqlalchemy.exc import IntegrityError
-
-from ee.onyx.auth.users import generate_anonymous_user_jwt_token
-from ee.onyx.configs.app_configs import ANONYMOUS_USER_COOKIE_NAME
-from ee.onyx.server.tenants.anonymous_user_path import get_anonymous_user_path
-from ee.onyx.server.tenants.anonymous_user_path import (
-    get_tenant_id_for_anonymous_user_path,
-)
-from ee.onyx.server.tenants.anonymous_user_path import modify_anonymous_user_path
-from ee.onyx.server.tenants.anonymous_user_path import validate_anonymous_user_path
-from ee.onyx.server.tenants.models import AnonymousUserPath
-from onyx.auth.users import anonymous_user_enabled
-from onyx.auth.users import current_admin_user
-from onyx.auth.users import optional_user
-from onyx.auth.users import User
-from onyx.configs.constants import FASTAPI_USERS_AUTH_COOKIE_NAME
-from onyx.db.engine import get_session_with_shared_schema
-from onyx.utils.logger import setup_logger
-from shared_configs.contextvars import get_current_tenant_id
-
-logger = setup_logger()
-
-router = APIRouter(prefix="/tenants")
-
-
-@router.get("/anonymous-user-path")
-async def get_anonymous_user_path_api(
-    _: User | None = Depends(current_admin_user),
-) -> AnonymousUserPath:
-    tenant_id = get_current_tenant_id()
-
-    if tenant_id is None:
-        raise HTTPException(status_code=404, detail="Tenant not found")
-
-    with get_session_with_shared_schema() as db_session:
-        current_path = get_anonymous_user_path(tenant_id, db_session)
-
-    return AnonymousUserPath(anonymous_user_path=current_path)
-
-
-@router.post("/anonymous-user-path")
-async def set_anonymous_user_path_api(
-    anonymous_user_path: str,
-    _: User | None = Depends(current_admin_user),
-) -> None:
-    tenant_id = get_current_tenant_id()
-    try:
-        validate_anonymous_user_path(anonymous_user_path)
-    except ValueError as e:
-        raise HTTPException(status_code=400, detail=str(e))
-
-    with get_session_with_shared_schema() as db_session:
-        try:
-            modify_anonymous_user_path(tenant_id, anonymous_user_path, db_session)
-        except IntegrityError:
-            raise HTTPException(
-                status_code=409,
-                detail="The anonymous user path is already in use. Please choose a different path.",
-            )
-        except Exception as e:
-            logger.exception(f"Failed to modify anonymous user path: {str(e)}")
-            raise HTTPException(
-                status_code=500,
-                detail="An unexpected error occurred while modifying the anonymous user path",
-            )
-
-
-@router.post("/anonymous-user")
-async def login_as_anonymous_user(
-    anonymous_user_path: str,
-    _: User | None = Depends(optional_user),
-) -> Response:
-    with get_session_with_shared_schema() as db_session:
-        tenant_id = get_tenant_id_for_anonymous_user_path(
-            anonymous_user_path, db_session
-        )
-        if not tenant_id:
-            raise HTTPException(status_code=404, detail="Tenant not found")
-
-    if not anonymous_user_enabled(tenant_id=tenant_id):
-        raise HTTPException(status_code=403, detail="Anonymous user is not enabled")
-
-    token = generate_anonymous_user_jwt_token(tenant_id)
-
-    response = Response()
-    response.delete_cookie(FASTAPI_USERS_AUTH_COOKIE_NAME)
-    response.set_cookie(
-        key=ANONYMOUS_USER_COOKIE_NAME,
-        value=token,
-        httponly=True,
-        secure=True,
-        samesite="strict",
-    )
-    return response
--- a/backend/ee/onyx/server/tenants/api.py
+++ b/backend/ee/onyx/server/tenants/api.py
@@ -1,24 +1,269 @@
+import stripe
 from fastapi import APIRouter
+from fastapi import Depends
+from fastapi import HTTPException
+from fastapi import Response
+from sqlalchemy.exc import IntegrityError
+from sqlalchemy.orm import Session

-from ee.onyx.server.tenants.admin_api import router as admin_router
-from ee.onyx.server.tenants.anonymous_users_api import router as anonymous_users_router
-from ee.onyx.server.tenants.billing_api import router as billing_router
-from ee.onyx.server.tenants.team_membership_api import router as team_membership_router
-from ee.onyx.server.tenants.tenant_management_api import (
-    router as tenant_management_router,
-)
-from ee.onyx.server.tenants.user_invitations_api import (
-    router as user_invitations_router,
+from ee.onyx.auth.users import current_cloud_superuser
+from ee.onyx.auth.users import generate_anonymous_user_jwt_token
+from ee.onyx.configs.app_configs import ANONYMOUS_USER_COOKIE_NAME
+from ee.onyx.configs.app_configs import STRIPE_SECRET_KEY
+from ee.onyx.server.tenants.access import control_plane_dep
+from ee.onyx.server.tenants.anonymous_user_path import get_anonymous_user_path
+from ee.onyx.server.tenants.anonymous_user_path import (
+    get_tenant_id_for_anonymous_user_path,
 )
+from ee.onyx.server.tenants.anonymous_user_path import modify_anonymous_user_path
+from ee.onyx.server.tenants.anonymous_user_path import validate_anonymous_user_path
+from ee.onyx.server.tenants.billing import fetch_billing_information
+from ee.onyx.server.tenants.billing import fetch_stripe_checkout_session
+from ee.onyx.server.tenants.billing import fetch_tenant_stripe_information
+from ee.onyx.server.tenants.models import AnonymousUserPath
+from ee.onyx.server.tenants.models import BillingInformation
+from ee.onyx.server.tenants.models import ImpersonateRequest
+from ee.onyx.server.tenants.models import ProductGatingRequest
+from ee.onyx.server.tenants.models import ProductGatingResponse
+from ee.onyx.server.tenants.models import SubscriptionSessionResponse
+from ee.onyx.server.tenants.models import SubscriptionStatusResponse
+from ee.onyx.server.tenants.product_gating import store_product_gating
+from ee.onyx.server.tenants.provisioning import delete_user_from_control_plane
+from ee.onyx.server.tenants.user_mapping import get_tenant_id_for_email
+from ee.onyx.server.tenants.user_mapping import remove_all_users_from_tenant
+from ee.onyx.server.tenants.user_mapping import remove_users_from_tenant
+from onyx.auth.users import anonymous_user_enabled
+from onyx.auth.users import auth_backend
+from onyx.auth.users import current_admin_user
+from onyx.auth.users import get_redis_strategy
+from onyx.auth.users import optional_user
+from onyx.auth.users import User
+from onyx.configs.app_configs import WEB_DOMAIN
+from onyx.configs.constants import FASTAPI_USERS_AUTH_COOKIE_NAME
+from onyx.db.auth import get_user_count
+from onyx.db.engine import get_session
+from onyx.db.engine import get_session_with_shared_schema
+from onyx.db.engine import get_session_with_tenant
+from onyx.db.users import delete_user_from_db
+from onyx.db.users import get_user_by_email
+from onyx.server.manage.models import UserByEmail
+from onyx.utils.logger import setup_logger
+from shared_configs.contextvars import CURRENT_TENANT_ID_CONTEXTVAR
+from shared_configs.contextvars import get_current_tenant_id

-# Create a main router to include all sub-routers
-# Note: We don't add a prefix here as each router already has the /tenants prefix
-router = APIRouter()
+stripe.api_key = STRIPE_SECRET_KEY
+logger = setup_logger()
+router = APIRouter(prefix="/tenants")

-# Include all the individual routers
-router.include_router(admin_router)
-router.include_router(anonymous_users_router)
-router.include_router(billing_router)
-router.include_router(team_membership_router)
-router.include_router(tenant_management_router)
-router.include_router(user_invitations_router)
+
+@router.get("/anonymous-user-path")
+async def get_anonymous_user_path_api(
+    _: User | None = Depends(current_admin_user),
+) -> AnonymousUserPath:
+    tenant_id = get_current_tenant_id()
+
+    if tenant_id is None:
+        raise HTTPException(status_code=404, detail="Tenant not found")
+
+    with get_session_with_shared_schema() as db_session:
+        current_path = get_anonymous_user_path(tenant_id, db_session)
+
+    return AnonymousUserPath(anonymous_user_path=current_path)
+
+
+@router.post("/anonymous-user-path")
+async def set_anonymous_user_path_api(
+    anonymous_user_path: str,
+    _: User | None = Depends(current_admin_user),
+) -> None:
+    tenant_id = get_current_tenant_id()
+    try:
+        validate_anonymous_user_path(anonymous_user_path)
+    except ValueError as e:
+        raise HTTPException(status_code=400, detail=str(e))
+
+    with get_session_with_shared_schema() as db_session:
+        try:
+            modify_anonymous_user_path(tenant_id, anonymous_user_path, db_session)
+        except IntegrityError:
+            raise HTTPException(
+                status_code=409,
+                detail="The anonymous user path is already in use. Please choose a different path.",
+            )
+        except Exception as e:
+            logger.exception(f"Failed to modify anonymous user path: {str(e)}")
+            raise HTTPException(
+                status_code=500,
+                detail="An unexpected error occurred while modifying the anonymous user path",
+            )
+
+
+@router.post("/anonymous-user")
+async def login_as_anonymous_user(
+    anonymous_user_path: str,
+    _: User | None = Depends(optional_user),
+) -> Response:
+    with get_session_with_shared_schema() as db_session:
+        tenant_id = get_tenant_id_for_anonymous_user_path(
+            anonymous_user_path, db_session
+        )
+        if not tenant_id:
+            raise HTTPException(status_code=404, detail="Tenant not found")
+
+    if not anonymous_user_enabled(tenant_id=tenant_id):
+        raise HTTPException(status_code=403, detail="Anonymous user is not enabled")
+
+    token = generate_anonymous_user_jwt_token(tenant_id)
+
+    response = Response()
+    response.delete_cookie(FASTAPI_USERS_AUTH_COOKIE_NAME)
+    response.set_cookie(
+        key=ANONYMOUS_USER_COOKIE_NAME,
+        value=token,
+        httponly=True,
+        secure=True,
+        samesite="strict",
+    )
+    return response
+
+
+@router.post("/product-gating")
+def gate_product(
+    product_gating_request: ProductGatingRequest, _: None = Depends(control_plane_dep)
+) -> ProductGatingResponse:
+    """
+    Gating the product means that the product is not available to the tenant.
+    They will be directed to the billing page.
+    We gate the product when their subscription has ended.
+    """
+    try:
+        store_product_gating(
+            product_gating_request.tenant_id, product_gating_request.application_status
+        )
+        return ProductGatingResponse(updated=True, error=None)
+
+    except Exception as e:
+        logger.exception("Failed to gate product")
+        return ProductGatingResponse(updated=False, error=str(e))
+
+
+@router.get("/billing-information")
+async def billing_information(
+    _: User = Depends(current_admin_user),
+) -> BillingInformation | SubscriptionStatusResponse:
+    logger.info("Fetching billing information")
+    tenant_id = get_current_tenant_id()
+    return fetch_billing_information(tenant_id)
+
+
+@router.post("/create-customer-portal-session")
+async def create_customer_portal_session(
+    _: User = Depends(current_admin_user),
+) -> dict:
+    tenant_id = get_current_tenant_id()
+
+    try:
+        stripe_info = fetch_tenant_stripe_information(tenant_id)
+        stripe_customer_id = stripe_info.get("stripe_customer_id")
+        if not stripe_customer_id:
+            raise HTTPException(status_code=400, detail="Stripe customer ID not found")
+        logger.info(stripe_customer_id)
+
+        portal_session = stripe.billing_portal.Session.create(
+            customer=stripe_customer_id,
+            return_url=f"{WEB_DOMAIN}/admin/billing",
+        )
+        logger.info(portal_session)
+        return {"url": portal_session.url}
+    except Exception as e:
+        logger.exception("Failed to create customer portal session")
+        raise HTTPException(status_code=500, detail=str(e))
+
+
+@router.post("/create-subscription-session")
+async def create_subscription_session(
+    _: User = Depends(current_admin_user),
+) -> SubscriptionSessionResponse:
+    try:
+        tenant_id = CURRENT_TENANT_ID_CONTEXTVAR.get()
+        if not tenant_id:
+            raise HTTPException(status_code=400, detail="Tenant ID not found")
+        session_id = fetch_stripe_checkout_session(tenant_id)
+        return SubscriptionSessionResponse(sessionId=session_id)
+
+    except Exception as e:
+        logger.exception("Failed to create resubscription session")
+        raise HTTPException(status_code=500, detail=str(e))
+
+
+@router.post("/impersonate")
+async def impersonate_user(
+    impersonate_request: ImpersonateRequest,
+    _: User = Depends(current_cloud_superuser),
+) -> Response:
+    """Allows a cloud superuser to impersonate another user by generating an impersonation JWT token"""
+    tenant_id = get_tenant_id_for_email(impersonate_request.email)
+
+    with get_session_with_tenant(tenant_id=tenant_id) as tenant_session:
+        user_to_impersonate = get_user_by_email(
+            impersonate_request.email, tenant_session
+        )
+        if user_to_impersonate is None:
+            raise HTTPException(status_code=404, detail="User not found")
+        token = await get_redis_strategy().write_token(user_to_impersonate)
+
+    response = await auth_backend.transport.get_login_response(token)
+    response.set_cookie(
+        key="fastapiusersauth",
+        value=token,
+        httponly=True,
+        secure=True,
+        samesite="lax",
+    )
+    return response
+
+
+@router.post("/leave-organization")
+async def leave_organization(
+    user_email: UserByEmail,
+    current_user: User | None = Depends(current_admin_user),
+    db_session: Session = Depends(get_session),
+) -> None:
+    tenant_id = get_current_tenant_id()
+
+    if current_user is None or current_user.email != user_email.user_email:
+        raise HTTPException(
+            status_code=403, detail="You can only leave the organization as yourself"
+        )
+
+    user_to_delete = get_user_by_email(user_email.user_email, db_session)
+    if user_to_delete is None:
+        raise HTTPException(status_code=404, detail="User not found")
+
+    num_admin_users = await get_user_count(only_admin_users=True)
+
+    should_delete_tenant = num_admin_users == 1
+
+    if should_delete_tenant:
+        logger.info(
+            "Last admin user is leaving the organization. Deleting tenant from control plane."
+        )
+        try:
+            await delete_user_from_control_plane(tenant_id, user_to_delete.email)
+            logger.debug("User deleted from control plane")
+        except Exception as e:
+            logger.exception(
+                f"Failed to delete user from control plane for tenant {tenant_id}: {e}"
+            )
+            raise HTTPException(
+                status_code=500,
+                detail=f"Failed to remove user from control plane: {str(e)}",
+            )
+
+    db_session.expunge(user_to_delete)
+    delete_user_from_db(user_to_delete, db_session)
+
+    if should_delete_tenant:
+        remove_all_users_from_tenant(tenant_id)
+    else:
+        remove_users_from_tenant([user_to_delete.email], tenant_id)
--- a/backend/ee/onyx/server/tenants/billing_api.py
+++ b/backend/ee/onyx/server/tenants/billing_api.py
@@ -1,96 +0,0 @@
-import stripe
-from fastapi import APIRouter
-from fastapi import Depends
-from fastapi import HTTPException
-
-from ee.onyx.auth.users import current_admin_user
-from ee.onyx.configs.app_configs import STRIPE_SECRET_KEY
-from ee.onyx.server.tenants.access import control_plane_dep
-from ee.onyx.server.tenants.billing import fetch_billing_information
-from ee.onyx.server.tenants.billing import fetch_stripe_checkout_session
-from ee.onyx.server.tenants.billing import fetch_tenant_stripe_information
-from ee.onyx.server.tenants.models import BillingInformation
-from ee.onyx.server.tenants.models import ProductGatingRequest
-from ee.onyx.server.tenants.models import ProductGatingResponse
-from ee.onyx.server.tenants.models import SubscriptionSessionResponse
-from ee.onyx.server.tenants.models import SubscriptionStatusResponse
-from ee.onyx.server.tenants.product_gating import store_product_gating
-from onyx.auth.users import User
-from onyx.configs.app_configs import WEB_DOMAIN
-from onyx.utils.logger import setup_logger
-from shared_configs.contextvars import CURRENT_TENANT_ID_CONTEXTVAR
-from shared_configs.contextvars import get_current_tenant_id
-
-stripe.api_key = STRIPE_SECRET_KEY
-logger = setup_logger()
-
-router = APIRouter(prefix="/tenants")
-
-
-@router.post("/product-gating")
-def gate_product(
-    product_gating_request: ProductGatingRequest, _: None = Depends(control_plane_dep)
-) -> ProductGatingResponse:
-    """
-    Gating the product means that the product is not available to the tenant.
-    They will be directed to the billing page.
-    We gate the product when their subscription has ended.
-    """
-    try:
-        store_product_gating(
-            product_gating_request.tenant_id, product_gating_request.application_status
-        )
-        return ProductGatingResponse(updated=True, error=None)
-
-    except Exception as e:
-        logger.exception("Failed to gate product")
-        return ProductGatingResponse(updated=False, error=str(e))
-
-
-@router.get("/billing-information")
-async def billing_information(
-    _: User = Depends(current_admin_user),
-) -> BillingInformation | SubscriptionStatusResponse:
-    logger.info("Fetching billing information")
-    tenant_id = get_current_tenant_id()
-    return fetch_billing_information(tenant_id)
-
-
-@router.post("/create-customer-portal-session")
-async def create_customer_portal_session(
-    _: User = Depends(current_admin_user),
-) -> dict:
-    tenant_id = get_current_tenant_id()
-
-    try:
-        stripe_info = fetch_tenant_stripe_information(tenant_id)
-        stripe_customer_id = stripe_info.get("stripe_customer_id")
-        if not stripe_customer_id:
-            raise HTTPException(status_code=400, detail="Stripe customer ID not found")
-        logger.info(stripe_customer_id)
-
-        portal_session = stripe.billing_portal.Session.create(
-            customer=stripe_customer_id,
-            return_url=f"{WEB_DOMAIN}/admin/billing",
-        )
-        logger.info(portal_session)
-        return {"url": portal_session.url}
-    except Exception as e:
-        logger.exception("Failed to create customer portal session")
-        raise HTTPException(status_code=500, detail=str(e))
-
-
-@router.post("/create-subscription-session")
-async def create_subscription_session(
-    _: User = Depends(current_admin_user),
-) -> SubscriptionSessionResponse:
-    try:
-        tenant_id = CURRENT_TENANT_ID_CONTEXTVAR.get()
-        if not tenant_id:
-            raise HTTPException(status_code=400, detail="Tenant ID not found")
-        session_id = fetch_stripe_checkout_session(tenant_id)
-        return SubscriptionSessionResponse(sessionId=session_id)
-
-    except Exception as e:
-        logger.exception("Failed to create resubscription session")
-        raise HTTPException(status_code=500, detail=str(e))
--- a/backend/ee/onyx/server/tenants/models.py
+++ b/backend/ee/onyx/server/tenants/models.py
@@ -67,30 +67,3 @@ class ProductGatingResponse(BaseModel):

 class SubscriptionSessionResponse(BaseModel):
    sessionId: str
-
-
-class TenantByDomainResponse(BaseModel):
-    tenant_id: str
-    number_of_users: int
-    creator_email: str
-
-
-class TenantByDomainRequest(BaseModel):
-    email: str
-
-
-class RequestInviteRequest(BaseModel):
-    tenant_id: str
-
-
-class RequestInviteResponse(BaseModel):
-    success: bool
-    message: str
-
-
-class PendingUserSnapshot(BaseModel):
-    email: str
-
-
-class ApproveUserRequest(BaseModel):
-    email: str
--- a/backend/ee/onyx/server/tenants/provisioning.py
+++ b/backend/ee/onyx/server/tenants/provisioning.py
@@ -4,7 +4,6 @@ import uuid

 import aiohttp  # Async HTTP client
 import httpx
-import requests
 from fastapi import HTTPException
 from fastapi import Request
 from sqlalchemy import select
@@ -15,7 +14,6 @@ from ee.onyx.configs.app_configs import COHERE_DEFAULT_API_KEY
 from ee.onyx.configs.app_configs import HUBSPOT_TRACKING_URL
 from ee.onyx.configs.app_configs import OPENAI_DEFAULT_API_KEY
 from ee.onyx.server.tenants.access import generate_data_plane_token
-from ee.onyx.server.tenants.models import TenantByDomainResponse
 from ee.onyx.server.tenants.models import TenantCreationPayload
 from ee.onyx.server.tenants.models import TenantDeletionPayload
 from ee.onyx.server.tenants.schema_management import create_schema_if_not_exists
@@ -28,12 +26,11 @@ from onyx.auth.users import exceptions
 from onyx.configs.app_configs import CONTROL_PLANE_API_BASE_URL
 from onyx.configs.app_configs import DEV_MODE
 from onyx.configs.constants import MilestoneRecordType
-from onyx.db.engine import get_session_with_shared_schema
 from onyx.db.engine import get_session_with_tenant
+from onyx.db.engine import get_sqlalchemy_engine
 from onyx.db.llm import update_default_provider
 from onyx.db.llm import upsert_cloud_embedding_provider
 from onyx.db.llm import upsert_llm_provider
-from onyx.db.models import AvailableTenant
 from onyx.db.models import IndexModelStatus
 from onyx.db.models import SearchSettings
 from onyx.db.models import UserTenantMapping
@@ -63,71 +60,42 @@ async def get_or_provision_tenant(
    This function should only be called after we have verified we want this user's tenant to exist.
    It returns the tenant ID associated with the email, creating a new tenant if necessary.
    """
-    # Early return for non-multi-tenant mode
    if not MULTI_TENANT:
        return POSTGRES_DEFAULT_SCHEMA

    if referral_source and request:
        await submit_to_hubspot(email, referral_source, request)

-    # First, check if the user already has a tenant
-    tenant_id: str | None = None
    try:
        tenant_id = get_tenant_id_for_email(email)
-        return tenant_id
    except exceptions.UserNotExists:
-        # User doesn't exist, so we need to create a new tenant or assign an existing one
-        pass
-
-    try:
-        # Try to get a pre-provisioned tenant
-        tenant_id = await get_available_tenant()
-
-        if tenant_id:
-            # If we have a pre-provisioned tenant, assign it to the user
-            await assign_tenant_to_user(tenant_id, email, referral_source)
-            logger.info(f"Assigned pre-provisioned tenant {tenant_id} to user {email}")
-        else:
-            # If no pre-provisioned tenant is available, create a new one on-demand
+        # If tenant does not exist and in Multi tenant mode, provision a new tenant
+        try:
            tenant_id = await create_tenant(email, referral_source)
+        except Exception as e:
+            logger.error(f"Tenant provisioning failed: {e}")
+            raise HTTPException(status_code=500, detail="Failed to provision tenant.")

-        # Notify control plane if we have created / assigned a new tenant
-        if not DEV_MODE:
-            await notify_control_plane(tenant_id, email, referral_source)
-        return tenant_id
-
-    except Exception as e:
-        # If we've encountered an error, log and raise an exception
-        error_msg = "Failed to provision tenant"
-        logger.error(error_msg, exc_info=e)
+    if not tenant_id:
        raise HTTPException(
-            status_code=500,
-            detail="Failed to provision tenant. Please try again later.",
+            status_code=401, detail="User does not belong to an organization"
        )

+    return tenant_id
+

 async def create_tenant(email: str, referral_source: str | None = None) -> str:
-    """
-    Create a new tenant on-demand when no pre-provisioned tenants are available.
-    This is the fallback method when we can't use a pre-provisioned tenant.
-
-    """
    tenant_id = TENANT_ID_PREFIX + str(uuid.uuid4())
-    logger.info(f"Creating new tenant {tenant_id} for user {email}")
-
    try:
        # Provision tenant on data plane
        await provision_tenant(tenant_id, email)
-
+        # Notify control plane
+        if not DEV_MODE:
+            await notify_control_plane(tenant_id, email, referral_source)
    except Exception as e:
-        logger.exception(f"Tenant provisioning failed: {str(e)}")
-        # Attempt to rollback the tenant provisioning
-        try:
-            await rollback_tenant_provisioning(tenant_id)
-        except Exception:
-            logger.exception(f"Failed to rollback tenant provisioning for {tenant_id}")
+        logger.error(f"Tenant provisioning failed: {e}")
+        await rollback_tenant_provisioning(tenant_id)
        raise HTTPException(status_code=500, detail="Failed to provision tenant.")
-
    return tenant_id


@@ -141,25 +109,54 @@ async def provision_tenant(tenant_id: str, email: str) -> None:
        )

    logger.debug(f"Provisioning tenant {tenant_id} for user {email}")
+    token = None

    try:
-        # Create the schema for the tenant
        if not create_schema_if_not_exists(tenant_id):
            logger.debug(f"Created schema for tenant {tenant_id}")
        else:
            logger.debug(f"Schema already exists for tenant {tenant_id}")

-        # Set up the tenant with all necessary configurations
-        await setup_tenant(tenant_id)
+        token = CURRENT_TENANT_ID_CONTEXTVAR.set(tenant_id)

-        # Assign the tenant to the user
-        await assign_tenant_to_user(tenant_id, email)
+        # Await the Alembic migrations
+        await asyncio.to_thread(run_alembic_migrations, tenant_id)
+
+        with get_session_with_tenant(tenant_id=tenant_id) as db_session:
+            configure_default_api_keys(db_session)
+
+            current_search_settings = (
+                db_session.query(SearchSettings)
+                .filter_by(status=IndexModelStatus.FUTURE)
+                .first()
+            )
+            cohere_enabled = (
+                current_search_settings is not None
+                and current_search_settings.provider_type == EmbeddingProvider.COHERE
+            )
+            setup_onyx(db_session, tenant_id, cohere_enabled=cohere_enabled)
+
+        add_users_to_tenant([email], tenant_id)
+
+        with get_session_with_tenant(tenant_id=tenant_id) as db_session:
+            create_milestone_and_report(
+                user=None,
+                distinct_id=tenant_id,
+                event_type=MilestoneRecordType.TENANT_CREATED,
+                properties={
+                    "email": email,
+                },
+                db_session=db_session,
+            )

    except Exception as e:
        logger.exception(f"Failed to create tenant {tenant_id}")
        raise HTTPException(
            status_code=500, detail=f"Failed to create tenant: {str(e)}"
        )
+    finally:
+        if token is not None:
+            CURRENT_TENANT_ID_CONTEXTVAR.reset(token)


 async def notify_control_plane(
@@ -190,74 +187,20 @@ async def notify_control_plane(


 async def rollback_tenant_provisioning(tenant_id: str) -> None:
-    """
-    Logic to rollback tenant provisioning on data plane.
-    Handles each step independently to ensure maximum cleanup even if some steps fail.
-    """
+    # Logic to rollback tenant provisioning on data plane
    logger.info(f"Rolling back tenant provisioning for tenant_id: {tenant_id}")
-
-    # Track if any part of the rollback fails
-    rollback_errors = []
-
-    # 1. Try to drop the tenant's schema
    try:
+        # Drop the tenant's schema to rollback provisioning
        drop_schema(tenant_id)
-        logger.info(f"Successfully dropped schema for tenant {tenant_id}")
+
+        # Remove tenant mapping
+        with Session(get_sqlalchemy_engine()) as db_session:
+            db_session.query(UserTenantMapping).filter(
+                UserTenantMapping.tenant_id == tenant_id
+            ).delete()
+            db_session.commit()
    except Exception as e:
-        error_msg = f"Failed to drop schema for tenant {tenant_id}: {str(e)}"
-        logger.error(error_msg)
-        rollback_errors.append(error_msg)
-
-    # 2. Try to remove tenant mapping
-    try:
-        with get_session_with_shared_schema() as db_session:
-            db_session.begin()
-            try:
-                db_session.query(UserTenantMapping).filter(
-                    UserTenantMapping.tenant_id == tenant_id
-                ).delete()
-                db_session.commit()
-                logger.info(
-                    f"Successfully removed user mappings for tenant {tenant_id}"
-                )
-            except Exception as e:
-                db_session.rollback()
-                raise e
-    except Exception as e:
-        error_msg = f"Failed to remove user mappings for tenant {tenant_id}: {str(e)}"
-        logger.error(error_msg)
-        rollback_errors.append(error_msg)
-
-    # 3. If this tenant was in the available tenants table, remove it
-    try:
-        with get_session_with_shared_schema() as db_session:
-            db_session.begin()
-            try:
-                available_tenant = (
-                    db_session.query(AvailableTenant)
-                    .filter(AvailableTenant.tenant_id == tenant_id)
-                    .first()
-                )
-
-                if available_tenant:
-                    db_session.delete(available_tenant)
-                    db_session.commit()
-                    logger.info(
-                        f"Removed tenant {tenant_id} from available tenants table"
-                    )
-            except Exception as e:
-                db_session.rollback()
-                raise e
-    except Exception as e:
-        error_msg = f"Failed to remove tenant {tenant_id} from available tenants table: {str(e)}"
-        logger.error(error_msg)
-        rollback_errors.append(error_msg)
-
-    # Log summary of rollback operation
-    if rollback_errors:
-        logger.error(f"Tenant rollback completed with {len(rollback_errors)} errors")
-    else:
-        logger.info(f"Tenant rollback completed successfully for tenant {tenant_id}")
+        logger.error(f"Failed to rollback tenant provisioning: {e}")


 def configure_default_api_keys(db_session: Session) -> None:
@@ -270,7 +213,6 @@ def configure_default_api_keys(db_session: Session) -> None:
            fast_default_model_name="claude-3-5-sonnet-20241022",
            model_names=ANTHROPIC_MODEL_NAMES,
            display_model_names=["claude-3-5-sonnet-20241022"],
-            api_key_changed=True,
        )
        try:
            full_provider = upsert_llm_provider(anthropic_provider, db_session)
@@ -283,7 +225,7 @@ def configure_default_api_keys(db_session: Session) -> None:
        )

    if OPENAI_DEFAULT_API_KEY:
-        openai_provider = LLMProviderUpsertRequest(
+        open_provider = LLMProviderUpsertRequest(
            name="OpenAI",
            provider=OPENAI_PROVIDER_NAME,
            api_key=OPENAI_DEFAULT_API_KEY,
@@ -291,10 +233,9 @@ def configure_default_api_keys(db_session: Session) -> None:
            fast_default_model_name="gpt-4o-mini",
            model_names=OPEN_AI_MODEL_NAMES,
            display_model_names=["o1", "o3-mini", "gpt-4o", "gpt-4o-mini"],
-            api_key_changed=True,
        )
        try:
-            full_provider = upsert_llm_provider(openai_provider, db_session)
+            full_provider = upsert_llm_provider(open_provider, db_session)
            update_default_provider(full_provider.id, db_session)
        except Exception as e:
            logger.error(f"Failed to configure OpenAI provider: {e}")
@@ -412,151 +353,3 @@ async def delete_user_from_control_plane(tenant_id: str, email: str) -> None:
                raise Exception(
                    f"Failed to delete tenant on control plane: {error_text}"
                )
-
-
-def get_tenant_by_domain_from_control_plane(
-    domain: str,
-    tenant_id: str,
-) -> TenantByDomainResponse | None:
-    """
-    Fetches tenant information from the control plane based on the email domain.
-
-    Args:
-        domain: The email domain to search for (e.g., "example.com")
-
-    Returns:
-        A dictionary containing tenant information if found, None otherwise
-    """
-    token = generate_data_plane_token()
-    headers = {
-        "Authorization": f"Bearer {token}",
-        "Content-Type": "application/json",
-    }
-
-    try:
-        response = requests.get(
-            f"{CONTROL_PLANE_API_BASE_URL}/tenant-by-domain",
-            headers=headers,
-            json={"domain": domain, "tenant_id": tenant_id},
-        )
-
-        if response.status_code != 200:
-            logger.error(f"Control plane tenant lookup failed: {response.text}")
-            return None
-
-        response_data = response.json()
-        if not response_data:
-            return None
-
-        return TenantByDomainResponse(
-            tenant_id=response_data.get("tenant_id"),
-            number_of_users=response_data.get("number_of_users"),
-            creator_email=response_data.get("creator_email"),
-        )
-    except Exception as e:
-        logger.error(f"Error fetching tenant by domain: {str(e)}")
-        return None
-
-
-async def get_available_tenant() -> str | None:
-    """
-    Get an available pre-provisioned tenant from the NewAvailableTenant table.
-    Returns the tenant_id if one is available, None otherwise.
-    Uses row-level locking to prevent race conditions when multiple processes
-    try to get an available tenant simultaneously.
-    """
-    if not MULTI_TENANT:
-        return None
-
-    with get_session_with_shared_schema() as db_session:
-        try:
-            db_session.begin()
-
-            # Get the oldest available tenant with FOR UPDATE lock to prevent race conditions
-            available_tenant = (
-                db_session.query(AvailableTenant)
-                .order_by(AvailableTenant.date_created)
-                .with_for_update(skip_locked=True)  # Skip locked rows to avoid blocking
-                .first()
-            )
-
-            if available_tenant:
-                tenant_id = available_tenant.tenant_id
-                # Remove the tenant from the available tenants table
-                db_session.delete(available_tenant)
-                db_session.commit()
-                logger.info(f"Using pre-provisioned tenant {tenant_id}")
-                return tenant_id
-            else:
-                db_session.rollback()
-                return None
-        except Exception:
-            logger.exception("Error getting available tenant")
-            db_session.rollback()
-            return None
-
-
-async def setup_tenant(tenant_id: str) -> None:
-    """
-    Set up a tenant with all necessary configurations.
-    This is a centralized function that handles all tenant setup logic.
-    """
-    token = None
-    try:
-        token = CURRENT_TENANT_ID_CONTEXTVAR.set(tenant_id)
-
-        # Run Alembic migrations
-        await asyncio.to_thread(run_alembic_migrations, tenant_id)
-
-        # Configure the tenant with default settings
-        with get_session_with_tenant(tenant_id=tenant_id) as db_session:
-            # Configure default API keys
-            configure_default_api_keys(db_session)
-
-            # Set up Onyx with appropriate settings
-            current_search_settings = (
-                db_session.query(SearchSettings)
-                .filter_by(status=IndexModelStatus.FUTURE)
-                .first()
-            )
-            cohere_enabled = (
-                current_search_settings is not None
-                and current_search_settings.provider_type == EmbeddingProvider.COHERE
-            )
-            setup_onyx(db_session, tenant_id, cohere_enabled=cohere_enabled)
-
-    except Exception as e:
-        logger.exception(f"Failed to set up tenant {tenant_id}")
-        raise e
-    finally:
-        if token is not None:
-            CURRENT_TENANT_ID_CONTEXTVAR.reset(token)
-
-
-async def assign_tenant_to_user(
-    tenant_id: str, email: str, referral_source: str | None = None
-) -> None:
-    """
-    Assign a tenant to a user and perform necessary operations.
-    Uses transaction handling to ensure atomicity and includes retry logic
-    for control plane notifications.
-    """
-    # First, add the user to the tenant in a transaction
-
-    try:
-        add_users_to_tenant([email], tenant_id)
-
-        # Create milestone record in the same transaction context as the tenant assignment
-        with get_session_with_tenant(tenant_id=tenant_id) as db_session:
-            create_milestone_and_report(
-                user=None,
-                distinct_id=tenant_id,
-                event_type=MilestoneRecordType.TENANT_CREATED,
-                properties={
-                    "email": email,
-                },
-                db_session=db_session,
-            )
-    except Exception:
-        logger.exception(f"Failed to assign tenant {tenant_id} to user {email}")
-        raise Exception("Failed to assign tenant to user")
--- a/backend/ee/onyx/server/tenants/schema_management.py
+++ b/backend/ee/onyx/server/tenants/schema_management.py
@@ -74,21 +74,3 @@ def drop_schema(tenant_id: str) -> None:
            text("DROP SCHEMA IF EXISTS %(schema_name)s CASCADE"),
            {"schema_name": tenant_id},
        )
-
-
-def get_current_alembic_version(tenant_id: str) -> str:
-    """Get the current Alembic version for a tenant."""
-    from alembic.runtime.migration import MigrationContext
-    from sqlalchemy import text
-
-    engine = get_sqlalchemy_engine()
-
-    # Set the search path to the tenant's schema
-    with engine.connect() as connection:
-        connection.execute(text(f'SET search_path TO "{tenant_id}"'))
-
-        # Get the current version from the alembic_version table
-        context = MigrationContext.configure(connection)
-        current_rev = context.get_current_revision()
-
-    return current_rev or "head"
--- a/backend/ee/onyx/server/tenants/team_membership_api.py
+++ b/backend/ee/onyx/server/tenants/team_membership_api.py
@@ -1,67 +0,0 @@
-from fastapi import APIRouter
-from fastapi import Depends
-from fastapi import HTTPException
-from sqlalchemy.orm import Session
-
-from ee.onyx.server.tenants.provisioning import delete_user_from_control_plane
-from ee.onyx.server.tenants.user_mapping import remove_all_users_from_tenant
-from ee.onyx.server.tenants.user_mapping import remove_users_from_tenant
-from onyx.auth.users import current_admin_user
-from onyx.auth.users import User
-from onyx.db.auth import get_user_count
-from onyx.db.engine import get_session
-from onyx.db.users import delete_user_from_db
-from onyx.db.users import get_user_by_email
-from onyx.server.manage.models import UserByEmail
-from onyx.utils.logger import setup_logger
-from shared_configs.contextvars import get_current_tenant_id
-
-logger = setup_logger()
-
-router = APIRouter(prefix="/tenants")
-
-
-@router.post("/leave-team")
-async def leave_organization(
-    user_email: UserByEmail,
-    current_user: User | None = Depends(current_admin_user),
-    db_session: Session = Depends(get_session),
-) -> None:
-    tenant_id = get_current_tenant_id()
-
-    if current_user is None or current_user.email != user_email.user_email:
-        raise HTTPException(
-            status_code=403, detail="You can only leave the organization as yourself"
-        )
-
-    user_to_delete = get_user_by_email(user_email.user_email, db_session)
-    if user_to_delete is None:
-        raise HTTPException(status_code=404, detail="User not found")
-
-    num_admin_users = await get_user_count(only_admin_users=True)
-
-    should_delete_tenant = num_admin_users == 1
-
-    if should_delete_tenant:
-        logger.info(
-            "Last admin user is leaving the organization. Deleting tenant from control plane."
-        )
-        try:
-            await delete_user_from_control_plane(tenant_id, user_to_delete.email)
-            logger.debug("User deleted from control plane")
-        except Exception as e:
-            logger.exception(
-                f"Failed to delete user from control plane for tenant {tenant_id}: {e}"
-            )
-            raise HTTPException(
-                status_code=500,
-                detail=f"Failed to remove user from control plane: {str(e)}",
-            )
-
-    db_session.expunge(user_to_delete)
-    delete_user_from_db(user_to_delete, db_session)
-
-    if should_delete_tenant:
-        remove_all_users_from_tenant(tenant_id)
-    else:
-        remove_users_from_tenant([user_to_delete.email], tenant_id)
--- a/backend/ee/onyx/server/tenants/tenant_management_api.py
+++ b/backend/ee/onyx/server/tenants/tenant_management_api.py
@@ -1,39 +0,0 @@
-from fastapi import APIRouter
-from fastapi import Depends
-
-from ee.onyx.server.tenants.models import TenantByDomainResponse
-from ee.onyx.server.tenants.provisioning import get_tenant_by_domain_from_control_plane
-from onyx.auth.users import current_user
-from onyx.auth.users import User
-from onyx.utils.logger import setup_logger
-from shared_configs.contextvars import get_current_tenant_id
-
-logger = setup_logger()
-
-router = APIRouter(prefix="/tenants")
-
-FORBIDDEN_COMMON_EMAIL_SUBSTRINGS = [
-    "gmail",
-    "outlook",
-    "yahoo",
-    "hotmail",
-    "icloud",
-    "msn",
-    "hotmail",
-    "hotmail.co.uk",
-]
-
-
-@router.get("/existing-team-by-domain")
-def get_existing_tenant_by_domain(
-    user: User | None = Depends(current_user),
-) -> TenantByDomainResponse | None:
-    if not user:
-        return None
-    domain = user.email.split("@")[1]
-    if any(substring in domain for substring in FORBIDDEN_COMMON_EMAIL_SUBSTRINGS):
-        return None
-
-    tenant_id = get_current_tenant_id()
-
-    return get_tenant_by_domain_from_control_plane(domain, tenant_id)
--- a/backend/ee/onyx/server/tenants/user_invitations_api.py
+++ b/backend/ee/onyx/server/tenants/user_invitations_api.py
@@ -1,90 +0,0 @@
-from fastapi import APIRouter
-from fastapi import Depends
-from fastapi import HTTPException
-
-from ee.onyx.server.tenants.models import ApproveUserRequest
-from ee.onyx.server.tenants.models import PendingUserSnapshot
-from ee.onyx.server.tenants.models import RequestInviteRequest
-from ee.onyx.server.tenants.user_mapping import accept_user_invite
-from ee.onyx.server.tenants.user_mapping import approve_user_invite
-from ee.onyx.server.tenants.user_mapping import deny_user_invite
-from ee.onyx.server.tenants.user_mapping import invite_self_to_tenant
-from onyx.auth.invited_users import get_pending_users
-from onyx.auth.users import current_admin_user
-from onyx.auth.users import current_user
-from onyx.auth.users import User
-from onyx.utils.logger import setup_logger
-from shared_configs.contextvars import get_current_tenant_id
-
-logger = setup_logger()
-
-router = APIRouter(prefix="/tenants")
-
-
-@router.post("/users/invite/request")
-async def request_invite(
-    invite_request: RequestInviteRequest,
-    user: User | None = Depends(current_admin_user),
-) -> None:
-    if user is None:
-        raise HTTPException(status_code=401, detail="User not authenticated")
-    try:
-        invite_self_to_tenant(user.email, invite_request.tenant_id)
-    except Exception as e:
-        logger.exception(
-            f"Failed to invite self to tenant {invite_request.tenant_id}: {e}"
-        )
-        raise HTTPException(status_code=500, detail=str(e))
-
-
-@router.get("/users/pending")
-def list_pending_users(
-    _: User | None = Depends(current_admin_user),
-) -> list[PendingUserSnapshot]:
-    pending_emails = get_pending_users()
-    return [PendingUserSnapshot(email=email) for email in pending_emails]
-
-
-@router.post("/users/invite/approve")
-async def approve_user(
-    approve_user_request: ApproveUserRequest,
-    _: User | None = Depends(current_admin_user),
-) -> None:
-    tenant_id = get_current_tenant_id()
-    approve_user_invite(approve_user_request.email, tenant_id)
-
-
-@router.post("/users/invite/accept")
-async def accept_invite(
-    invite_request: RequestInviteRequest,
-    user: User | None = Depends(current_user),
-) -> None:
-    """
-    Accept an invitation to join a tenant.
-    """
-    if not user:
-        raise HTTPException(status_code=401, detail="Not authenticated")
-
-    try:
-        accept_user_invite(user.email, invite_request.tenant_id)
-    except Exception as e:
-        logger.exception(f"Failed to accept invite: {str(e)}")
-        raise HTTPException(status_code=500, detail="Failed to accept invitation")
-
-
-@router.post("/users/invite/deny")
-async def deny_invite(
-    invite_request: RequestInviteRequest,
-    user: User | None = Depends(current_user),
-) -> None:
-    """
-    Deny an invitation to join a tenant.
-    """
-    if not user:
-        raise HTTPException(status_code=401, detail="Not authenticated")
-
-    try:
-        deny_user_invite(user.email, invite_request.tenant_id)
-    except Exception as e:
-        logger.exception(f"Failed to deny invite: {str(e)}")
-        raise HTTPException(status_code=500, detail="Failed to deny invitation")
--- a/backend/ee/onyx/server/tenants/user_mapping.py
+++ b/backend/ee/onyx/server/tenants/user_mapping.py
@@ -1,56 +1,27 @@
+import logging
+
 from fastapi_users import exceptions
 from sqlalchemy import select
+from sqlalchemy.orm import Session

-from onyx.auth.invited_users import get_invited_users
-from onyx.auth.invited_users import get_pending_users
-from onyx.auth.invited_users import write_invited_users
-from onyx.auth.invited_users import write_pending_users
-from onyx.db.engine import get_session_with_shared_schema
 from onyx.db.engine import get_session_with_tenant
+from onyx.db.engine import get_sqlalchemy_engine
 from onyx.db.models import UserTenantMapping
-from onyx.server.manage.models import TenantSnapshot
-from onyx.setup import setup_logger
 from shared_configs.configs import MULTI_TENANT
 from shared_configs.configs import POSTGRES_DEFAULT_SCHEMA
-from shared_configs.contextvars import CURRENT_TENANT_ID_CONTEXTVAR

-logger = setup_logger()
+logger = logging.getLogger(__name__)


 def get_tenant_id_for_email(email: str) -> str:
    if not MULTI_TENANT:
        return POSTGRES_DEFAULT_SCHEMA
    # Implement logic to get tenant_id from the mapping table
-    try:
-        with get_session_with_shared_schema() as db_session:
-            # First try to get an active tenant
-            result = db_session.execute(
-                select(UserTenantMapping).where(
-                    UserTenantMapping.email == email,
-                    UserTenantMapping.active == True,  # noqa: E712
-                )
-            )
-            mapping = result.scalar_one_or_none()
-            tenant_id = mapping.tenant_id if mapping else None
-
-            # If no active tenant found, try to get the first inactive one
-            if tenant_id is None:
-                result = db_session.execute(
-                    select(UserTenantMapping).where(
-                        UserTenantMapping.email == email,
-                        UserTenantMapping.active == False,  # noqa: E712
-                    )
-                )
-                mapping = result.scalar_one_or_none()
-                if mapping:
-                    # Mark this mapping as active
-                    mapping.active = True
-                    db_session.commit()
-                    tenant_id = mapping.tenant_id
-
-    except Exception as e:
-        logger.exception(f"Error getting tenant id for email {email}: {e}")
-        raise exceptions.UserNotExists()
+    with Session(get_sqlalchemy_engine()) as db_session:
+        result = db_session.execute(
+            select(UserTenantMapping.tenant_id).where(UserTenantMapping.email == email)
+        )
+        tenant_id = result.scalar_one_or_none()
    if tenant_id is None:
        raise exceptions.UserNotExists()
    return tenant_id
@@ -67,56 +38,13 @@ def user_owns_a_tenant(email: str) -> bool:


 def add_users_to_tenant(emails: list[str], tenant_id: str) -> None:
-    """
-    Add users to a tenant with proper transaction handling.
-    Checks if users already have a tenant mapping to avoid duplicates.
-    If a user already has an active mapping to any tenant, the new mapping will be added as inactive.
-    """
    with get_session_with_tenant(tenant_id=POSTGRES_DEFAULT_SCHEMA) as db_session:
        try:
-            # Start a transaction
-            db_session.begin()
-
            for email in emails:
-                # Check if the user already has a mapping to this tenant
-                existing_mapping = (
-                    db_session.query(UserTenantMapping)
-                    .filter(
-                        UserTenantMapping.email == email,
-                        UserTenantMapping.tenant_id == tenant_id,
-                    )
-                    .with_for_update()
-                    .first()
-                )
-
-                # If user already has an active mapping, add this one as inactive
-                if not existing_mapping:
-                    # Check if the user already has an active mapping to any tenant
-                    has_active_mapping = (
-                        db_session.query(UserTenantMapping)
-                        .filter(
-                            UserTenantMapping.email == email,
-                            UserTenantMapping.active == True,  # noqa: E712
-                        )
-                        .first()
-                    )
-
-                    db_session.add(
-                        UserTenantMapping(
-                            email=email,
-                            tenant_id=tenant_id,
-                            active=False if has_active_mapping else True,
-                        )
-                    )
-
-            # Commit the transaction
-            db_session.commit()
-            logger.info(f"Successfully added users {emails} to tenant {tenant_id}")
-
+                db_session.add(UserTenantMapping(email=email, tenant_id=tenant_id))
        except Exception:
            logger.exception(f"Failed to add users to tenant {tenant_id}")
-            db_session.rollback()
-            raise
+        db_session.commit()


 def remove_users_from_tenant(emails: list[str], tenant_id: str) -> None:
@@ -148,187 +76,3 @@ def remove_all_users_from_tenant(tenant_id: str) -> None:
            UserTenantMapping.tenant_id == tenant_id
        ).delete()
        db_session.commit()
-
-
-def invite_self_to_tenant(email: str, tenant_id: str) -> None:
-    token = CURRENT_TENANT_ID_CONTEXTVAR.set(tenant_id)
-    try:
-        pending_users = get_pending_users()
-        if email in pending_users:
-            return
-        write_pending_users(pending_users + [email])
-    finally:
-        CURRENT_TENANT_ID_CONTEXTVAR.reset(token)
-
-
-def approve_user_invite(email: str, tenant_id: str) -> None:
-    """
-    Approve a user invite to a tenant.
-    This will delete all existing records for this email and create a new mapping entry for the user in this tenant.
-    """
-    with get_session_with_shared_schema() as db_session:
-        # Delete all existing records for this email
-        db_session.query(UserTenantMapping).filter(
-            UserTenantMapping.email == email
-        ).delete()
-
-        # Create a new mapping entry for the user in this tenant
-        new_mapping = UserTenantMapping(email=email, tenant_id=tenant_id, active=True)
-        db_session.add(new_mapping)
-        db_session.commit()
-
-    # Also remove the user from pending users list
-    # Remove from pending users
-    pending_users = get_pending_users()
-    if email in pending_users:
-        pending_users.remove(email)
-        write_pending_users(pending_users)
-
-    # Add to invited users
-    invited_users = get_invited_users()
-    if email not in invited_users:
-        invited_users.append(email)
-        write_invited_users(invited_users)
-
-
-def accept_user_invite(email: str, tenant_id: str) -> None:
-    """
-    Accept an invitation to join a tenant.
-    This activates the user's mapping to the tenant.
-    """
-    with get_session_with_shared_schema() as db_session:
-        try:
-            # First check if there's an active mapping for this user and tenant
-            active_mapping = (
-                db_session.query(UserTenantMapping)
-                .filter(
-                    UserTenantMapping.email == email,
-                    UserTenantMapping.active == True,  # noqa: E712
-                )
-                .first()
-            )
-
-            # If an active mapping exists, delete it
-            if active_mapping:
-                db_session.delete(active_mapping)
-                logger.info(
-                    f"Deleted existing active mapping for user {email} in tenant {tenant_id}"
-                )
-
-            # Find the inactive mapping for this user and tenant
-            mapping = (
-                db_session.query(UserTenantMapping)
-                .filter(
-                    UserTenantMapping.email == email,
-                    UserTenantMapping.tenant_id == tenant_id,
-                    UserTenantMapping.active == False,  # noqa: E712
-                )
-                .first()
-            )
-
-            if mapping:
-                # Set all other mappings for this user to inactive
-                db_session.query(UserTenantMapping).filter(
-                    UserTenantMapping.email == email,
-                    UserTenantMapping.active == True,  # noqa: E712
-                ).update({"active": False})
-
-                # Activate this mapping
-                mapping.active = True
-                db_session.commit()
-                logger.info(f"User {email} accepted invitation to tenant {tenant_id}")
-            else:
-                logger.warning(
-                    f"No invitation found for user {email} in tenant {tenant_id}"
-                )
-
-        except Exception as e:
-            db_session.rollback()
-            logger.exception(
-                f"Failed to accept invitation for user {email} to tenant {tenant_id}: {str(e)}"
-            )
-            raise
-
-
-def deny_user_invite(email: str, tenant_id: str) -> None:
-    """
-    Deny an invitation to join a tenant.
-    This removes the user's mapping to the tenant.
-    """
-    with get_session_with_shared_schema() as db_session:
-        # Delete the mapping for this user and tenant
-        result = (
-            db_session.query(UserTenantMapping)
-            .filter(
-                UserTenantMapping.email == email,
-                UserTenantMapping.tenant_id == tenant_id,
-                UserTenantMapping.active == False,  # noqa: E712
-            )
-            .delete()
-        )
-
-        db_session.commit()
-        if result:
-            logger.info(f"User {email} denied invitation to tenant {tenant_id}")
-        else:
-            logger.warning(
-                f"No invitation found for user {email} in tenant {tenant_id}"
-            )
-    token = CURRENT_TENANT_ID_CONTEXTVAR.set(tenant_id)
-    try:
-        pending_users = get_invited_users()
-        if email in pending_users:
-            pending_users.remove(email)
-            write_invited_users(pending_users)
-    finally:
-        CURRENT_TENANT_ID_CONTEXTVAR.reset(token)
-
-
-def get_tenant_count(tenant_id: str) -> int:
-    """
-    Get the number of active users for this tenant
-    """
-    with get_session_with_shared_schema() as db_session:
-        # Count the number of active users for this tenant
-        user_count = (
-            db_session.query(UserTenantMapping)
-            .filter(
-                UserTenantMapping.tenant_id == tenant_id,
-                UserTenantMapping.active == True,  # noqa: E712
-            )
-            .count()
-        )
-
-        return user_count
-
-
-def get_tenant_invitation(email: str) -> TenantSnapshot | None:
-    """
-    Get the first tenant invitation for this user
-    """
-    with get_session_with_shared_schema() as db_session:
-        # Get the first tenant invitation for this user
-        invitation = (
-            db_session.query(UserTenantMapping)
-            .filter(
-                UserTenantMapping.email == email,
-                UserTenantMapping.active == False,  # noqa: E712
-            )
-            .first()
-        )
-
-        if invitation:
-            # Get the user count for this tenant
-            user_count = (
-                db_session.query(UserTenantMapping)
-                .filter(
-                    UserTenantMapping.tenant_id == invitation.tenant_id,
-                    UserTenantMapping.active == True,  # noqa: E712
-                )
-                .count()
-            )
-            return TenantSnapshot(
-                tenant_id=invitation.tenant_id, number_of_users=user_count
-            )
-
-        return None
--- a/backend/model_server/constants.py
+++ b/backend/model_server/constants.py
@@ -3,7 +3,6 @@ from shared_configs.enums import EmbedTextType


 MODEL_WARM_UP_STRING = "hi " * 512
-INFORMATION_CONTENT_MODEL_WARM_UP_STRING = "hi " * 16
 DEFAULT_OPENAI_MODEL = "text-embedding-3-small"
 DEFAULT_COHERE_MODEL = "embed-english-light-v3.0"
 DEFAULT_VOYAGE_MODEL = "voyage-large-2-instruct"
--- a/backend/model_server/custom_models.py
+++ b/backend/model_server/custom_models.py
@@ -1,14 +1,11 @@
-import numpy as np
 import torch
 import torch.nn.functional as F
 from fastapi import APIRouter
 from huggingface_hub import snapshot_download  # type: ignore
-from setfit import SetFitModel  # type: ignore[import]
 from transformers import AutoTokenizer  # type: ignore
 from transformers import BatchEncoding  # type: ignore
 from transformers import PreTrainedTokenizer  # type: ignore

-from model_server.constants import INFORMATION_CONTENT_MODEL_WARM_UP_STRING
 from model_server.constants import MODEL_WARM_UP_STRING
 from model_server.onyx_torch_model import ConnectorClassifier
 from model_server.onyx_torch_model import HybridClassifier
@@ -16,22 +13,11 @@ from model_server.utils import simple_log_function_time
 from onyx.utils.logger import setup_logger
 from shared_configs.configs import CONNECTOR_CLASSIFIER_MODEL_REPO
 from shared_configs.configs import CONNECTOR_CLASSIFIER_MODEL_TAG
-from shared_configs.configs import (
-    INDEXING_INFORMATION_CONTENT_CLASSIFICATION_CUTOFF_LENGTH,
-)
-from shared_configs.configs import INDEXING_INFORMATION_CONTENT_CLASSIFICATION_MAX
-from shared_configs.configs import INDEXING_INFORMATION_CONTENT_CLASSIFICATION_MIN
-from shared_configs.configs import (
-    INDEXING_INFORMATION_CONTENT_CLASSIFICATION_TEMPERATURE,
-)
 from shared_configs.configs import INDEXING_ONLY
-from shared_configs.configs import INFORMATION_CONTENT_MODEL_TAG
-from shared_configs.configs import INFORMATION_CONTENT_MODEL_VERSION
 from shared_configs.configs import INTENT_MODEL_TAG
 from shared_configs.configs import INTENT_MODEL_VERSION
 from shared_configs.model_server_models import ConnectorClassificationRequest
 from shared_configs.model_server_models import ConnectorClassificationResponse
-from shared_configs.model_server_models import ContentClassificationPrediction
 from shared_configs.model_server_models import IntentRequest
 from shared_configs.model_server_models import IntentResponse

@@ -45,10 +31,6 @@ _CONNECTOR_CLASSIFIER_MODEL: ConnectorClassifier | None = None
 _INTENT_TOKENIZER: AutoTokenizer | None = None
 _INTENT_MODEL: HybridClassifier | None = None

-_INFORMATION_CONTENT_MODEL: SetFitModel | None = None
-
-_INFORMATION_CONTENT_MODEL_PROMPT_PREFIX: str = ""  # spec to model version!
-

 def get_connector_classifier_tokenizer() -> AutoTokenizer:
    global _CONNECTOR_CLASSIFIER_TOKENIZER
@@ -103,7 +85,7 @@ def get_intent_model_tokenizer() -> AutoTokenizer:

 def get_local_intent_model(
    model_name_or_path: str = INTENT_MODEL_VERSION,
-    tag: str | None = INTENT_MODEL_TAG,
+    tag: str = INTENT_MODEL_TAG,
 ) -> HybridClassifier:
    global _INTENT_MODEL
    if _INTENT_MODEL is None:
@@ -120,9 +102,7 @@ def get_local_intent_model(
            try:
                # Attempt to download the model snapshot
                logger.notice(f"Downloading model snapshot for {model_name_or_path}")
-                local_path = snapshot_download(
-                    repo_id=model_name_or_path, revision=tag, local_files_only=False
-                )
+                local_path = snapshot_download(repo_id=model_name_or_path, revision=tag)
                _INTENT_MODEL = HybridClassifier.from_pretrained(local_path)
            except Exception as e:
                logger.error(
@@ -132,44 +112,6 @@ def get_local_intent_model(
    return _INTENT_MODEL


-def get_local_information_content_model(
-    model_name_or_path: str = INFORMATION_CONTENT_MODEL_VERSION,
-    tag: str | None = INFORMATION_CONTENT_MODEL_TAG,
-) -> SetFitModel:
-    global _INFORMATION_CONTENT_MODEL
-    if _INFORMATION_CONTENT_MODEL is None:
-        try:
-            # Calculate where the cache should be, then load from local if available
-            logger.notice(
-                f"Loading content information model from local cache: {model_name_or_path}"
-            )
-            local_path = snapshot_download(
-                repo_id=model_name_or_path, revision=tag, local_files_only=True
-            )
-            _INFORMATION_CONTENT_MODEL = SetFitModel.from_pretrained(local_path)
-            logger.notice(
-                f"Loaded content information model from local cache: {local_path}"
-            )
-        except Exception as e:
-            logger.warning(f"Failed to load content information model directly: {e}")
-            try:
-                # Attempt to download the model snapshot
-                logger.notice(
-                    f"Downloading content information model snapshot for {model_name_or_path}"
-                )
-                local_path = snapshot_download(
-                    repo_id=model_name_or_path, revision=tag, local_files_only=False
-                )
-                _INFORMATION_CONTENT_MODEL = SetFitModel.from_pretrained(local_path)
-            except Exception as e:
-                logger.error(
-                    f"Failed to load content information model even after attempted snapshot download: {e}"
-                )
-                raise
-
-    return _INFORMATION_CONTENT_MODEL
-
-
 def tokenize_connector_classification_query(
    connectors: list[str],
    query: str,
@@ -253,13 +195,6 @@ def warm_up_intent_model() -> None:
    )


-def warm_up_information_content_model() -> None:
-    logger.notice("Warming up Content Model")  # TODO: add version if needed
-
-    information_content_model = get_local_information_content_model()
-    information_content_model(INFORMATION_CONTENT_MODEL_WARM_UP_STRING)
-
-
@simple_log_function_time()
 def run_inference(tokens: BatchEncoding) -> tuple[list[float], list[float]]:
    intent_model = get_local_intent_model()
@@ -283,117 +218,6 @@ def run_inference(tokens: BatchEncoding) -> tuple[list[float], list[float]]:
    return intent_probabilities.tolist(), token_positive_probs


-@simple_log_function_time()
-def run_content_classification_inference(
-    text_inputs: list[str],
-) -> list[ContentClassificationPrediction]:
-    """
-    Assign a score to the segments in question. The model stored in get_local_information_content_model()
-    creates the 'model score' based on its training, and the scores are then converted to a 0.0-1.0 scale.
-    In the code outside of the model/inference model servers that score will be converted into the actual
-    boost factor.
-    """
-
-    def _prob_to_score(prob: float) -> float:
-        """
-        Conversion of base score to 0.0 - 1.0 score. Note that the min/max values depend on the model!
-        """
-        _MIN_BASE_SCORE = 0.25
-        _MAX_BASE_SCORE = 0.75
-        if prob < _MIN_BASE_SCORE:
-            raw_score = 0.0
-        elif prob < _MAX_BASE_SCORE:
-            raw_score = (prob - _MIN_BASE_SCORE) / (_MAX_BASE_SCORE - _MIN_BASE_SCORE)
-        else:
-            raw_score = 1.0
-        return (
-            INDEXING_INFORMATION_CONTENT_CLASSIFICATION_MIN
-            + (
-                INDEXING_INFORMATION_CONTENT_CLASSIFICATION_MAX
-                - INDEXING_INFORMATION_CONTENT_CLASSIFICATION_MIN
-            )
-            * raw_score
-        )
-
-    _BATCH_SIZE = 32
-    content_model = get_local_information_content_model()
-
-    # Process inputs in batches
-    all_output_classes: list[int] = []
-    all_base_output_probabilities: list[float] = []
-
-    for i in range(0, len(text_inputs), _BATCH_SIZE):
-        batch = text_inputs[i : i + _BATCH_SIZE]
-        batch_with_prefix = []
-        batch_indices = []
-
-        # Pre-allocate results for this batch
-        batch_output_classes: list[np.ndarray] = [np.array(1)] * len(batch)
-        batch_probabilities: list[np.ndarray] = [np.array(1.0)] * len(batch)
-
-        # Pre-process batch to handle long input exceptions
-        for j, text in enumerate(batch):
-            if len(text) == 0:
-                # if no input, treat as non-informative from the model's perspective
-                batch_output_classes[j] = np.array(0)
-                batch_probabilities[j] = np.array(0.0)
-                logger.warning("Input for Content Information Model is empty")
-
-            elif (
-                len(text.split())
-                <= INDEXING_INFORMATION_CONTENT_CLASSIFICATION_CUTOFF_LENGTH
-            ):
-                # if input is short, use the model
-                batch_with_prefix.append(
-                    _INFORMATION_CONTENT_MODEL_PROMPT_PREFIX + text
-                )
-                batch_indices.append(j)
-            else:
-                # if longer than cutoff, treat as informative (stay with default), but issue warning
-                logger.warning("Input for Content Information Model too long")
-
-        if batch_with_prefix:  # Only run model if we have valid inputs
-            # Get predictions for the batch
-            model_output_classes = content_model(batch_with_prefix)
-            model_output_probabilities = content_model.predict_proba(batch_with_prefix)
-
-            # Place results in the correct positions
-            for idx, batch_idx in enumerate(batch_indices):
-                batch_output_classes[batch_idx] = model_output_classes[idx].numpy()
-                batch_probabilities[batch_idx] = model_output_probabilities[idx][
-                    1
-                ].numpy()  # x[1] is prob of the positive class
-
-        all_output_classes.extend([int(x) for x in batch_output_classes])
-        all_base_output_probabilities.extend([float(x) for x in batch_probabilities])
-
-    logits = [
-        np.log(p / (1 - p)) if p != 0.0 and p != 1.0 else (100 if p == 1.0 else -100)
-        for p in all_base_output_probabilities
-    ]
-    scaled_logits = [
-        logit / INDEXING_INFORMATION_CONTENT_CLASSIFICATION_TEMPERATURE
-        for logit in logits
-    ]
-    output_probabilities_with_temp = [
-        np.exp(scaled_logit) / (1 + np.exp(scaled_logit))
-        for scaled_logit in scaled_logits
-    ]
-
-    prediction_scores = [
-        _prob_to_score(p_temp) for p_temp in output_probabilities_with_temp
-    ]
-
-    content_classification_predictions = [
-        ContentClassificationPrediction(
-            predicted_label=predicted_label, content_boost_factor=output_score
-        )
-        for predicted_label, output_score in zip(all_output_classes, prediction_scores)
-    ]
-
-    return content_classification_predictions
-
-
 def map_keywords(
    input_ids: torch.Tensor, tokenizer: AutoTokenizer, is_keyword: list[bool]
 ) -> list[str]:
@@ -538,10 +362,3 @@ async def process_analysis_request(

    is_keyword, keywords = run_analysis(intent_request)
    return IntentResponse(is_keyword=is_keyword, keywords=keywords)
-
-
-@router.post("/content-classification")
-async def process_content_classification_request(
-    content_classification_requests: list[str],
-) -> list[ContentClassificationPrediction]:
-    return run_content_classification_inference(content_classification_requests)
--- a/backend/model_server/encoders.py
+++ b/backend/model_server/encoders.py
@@ -62,60 +62,6 @@ _OPENAI_MAX_INPUT_LEN = 2048
 # Cohere allows up to 96 embeddings in a single embedding calling
 _COHERE_MAX_INPUT_LEN = 96

-# Authentication error string constants
-_AUTH_ERROR_401 = "401"
-_AUTH_ERROR_UNAUTHORIZED = "unauthorized"
-_AUTH_ERROR_INVALID_API_KEY = "invalid api key"
-_AUTH_ERROR_PERMISSION = "permission"
-
-
-def is_authentication_error(error: Exception) -> bool:
-    """Check if an exception is related to authentication issues.
-
-    Args:
-        error: The exception to check
-
-    Returns:
-        bool: True if the error appears to be authentication-related
-    """
-    error_str = str(error).lower()
-    return (
-        _AUTH_ERROR_401 in error_str
-        or _AUTH_ERROR_UNAUTHORIZED in error_str
-        or _AUTH_ERROR_INVALID_API_KEY in error_str
-        or _AUTH_ERROR_PERMISSION in error_str
-    )
-
-
-def format_embedding_error(
-    error: Exception,
-    service_name: str,
-    model: str | None,
-    provider: EmbeddingProvider,
-    status_code: int | None = None,
-) -> str:
-    """
-    Format a standardized error string for embedding errors.
-    """
-    detail = f"Status {status_code}" if status_code else f"{type(error)}"
-
-    return (
-        f"{'HTTP error' if status_code else 'Exception'} embedding text with {service_name} - {detail}: "
-        f"Model: {model} "
-        f"Provider: {provider} "
-        f"Exception: {error}"
-    )
-
-
-# Custom exception for authentication errors
-class AuthenticationError(Exception):
-    """Raised when authentication fails with a provider."""
-
-    def __init__(self, provider: str, message: str = "API key is invalid or expired"):
-        self.provider = provider
-        self.message = message
-        super().__init__(f"{provider} authentication failed: {message}")
-

 class CloudEmbedding:
    def __init__(
@@ -146,17 +92,31 @@ class CloudEmbedding:
        )

        final_embeddings: list[Embedding] = []
+        try:
+            for text_batch in batch_list(texts, _OPENAI_MAX_INPUT_LEN):
+                response = await client.embeddings.create(
+                    input=text_batch,
+                    model=model,
+                    dimensions=reduced_dimension or openai.NOT_GIVEN,
+                )
+                final_embeddings.extend(
+                    [embedding.embedding for embedding in response.data]
+                )
+            return final_embeddings
+        except Exception as e:
+            error_string = (
+                f"Exception embedding text with OpenAI - {type(e)}: "
+                f"Model: {model} "
+                f"Provider: {self.provider} "
+                f"Exception: {e}"
+            )
+            logger.error(error_string)

-        for text_batch in batch_list(texts, _OPENAI_MAX_INPUT_LEN):
-            response = await client.embeddings.create(
-                input=text_batch,
-                model=model,
-                dimensions=reduced_dimension or openai.NOT_GIVEN,
-            )
-            final_embeddings.extend(
-                [embedding.embedding for embedding in response.data]
-            )
-        return final_embeddings
+            # only log text when it's not an authentication error.
+            if not isinstance(e, openai.AuthenticationError):
+                logger.debug(f"Exception texts: {texts}")
+
+            raise RuntimeError(error_string)

    async def _embed_cohere(
        self, texts: list[str], model: str | None, embedding_type: str
@@ -195,6 +155,7 @@ class CloudEmbedding:
            input_type=embedding_type,
            truncation=True,
        )
+
        return response.embeddings

    async def _embed_azure(
@@ -278,51 +239,22 @@ class CloudEmbedding:
        deployment_name: str | None = None,
        reduced_dimension: int | None = None,
    ) -> list[Embedding]:
-        try:
-            if self.provider == EmbeddingProvider.OPENAI:
-                return await self._embed_openai(texts, model_name, reduced_dimension)
-            elif self.provider == EmbeddingProvider.AZURE:
-                return await self._embed_azure(texts, f"azure/{deployment_name}")
-            elif self.provider == EmbeddingProvider.LITELLM:
-                return await self._embed_litellm_proxy(texts, model_name)
+        if self.provider == EmbeddingProvider.OPENAI:
+            return await self._embed_openai(texts, model_name, reduced_dimension)
+        elif self.provider == EmbeddingProvider.AZURE:
+            return await self._embed_azure(texts, f"azure/{deployment_name}")
+        elif self.provider == EmbeddingProvider.LITELLM:
+            return await self._embed_litellm_proxy(texts, model_name)

-            embedding_type = EmbeddingModelTextType.get_type(self.provider, text_type)
-            if self.provider == EmbeddingProvider.COHERE:
-                return await self._embed_cohere(texts, model_name, embedding_type)
-            elif self.provider == EmbeddingProvider.VOYAGE:
-                return await self._embed_voyage(texts, model_name, embedding_type)
-            elif self.provider == EmbeddingProvider.GOOGLE:
-                return await self._embed_vertex(texts, model_name, embedding_type)
-            else:
-                raise ValueError(f"Unsupported provider: {self.provider}")
-        except openai.AuthenticationError:
-            raise AuthenticationError(provider="OpenAI")
-        except httpx.HTTPStatusError as e:
-            if e.response.status_code == 401:
-                raise AuthenticationError(provider=str(self.provider))
-
-            error_string = format_embedding_error(
-                e,
-                str(self.provider),
-                model_name or deployment_name,
-                self.provider,
-                status_code=e.response.status_code,
-            )
-            logger.error(error_string)
-            logger.debug(f"Exception texts: {texts}")
-
-            raise RuntimeError(error_string)
-        except Exception as e:
-            if is_authentication_error(e):
-                raise AuthenticationError(provider=str(self.provider))
-
-            error_string = format_embedding_error(
-                e, str(self.provider), model_name or deployment_name, self.provider
-            )
-            logger.error(error_string)
-            logger.debug(f"Exception texts: {texts}")
-
-            raise RuntimeError(error_string)
+        embedding_type = EmbeddingModelTextType.get_type(self.provider, text_type)
+        if self.provider == EmbeddingProvider.COHERE:
+            return await self._embed_cohere(texts, model_name, embedding_type)
+        elif self.provider == EmbeddingProvider.VOYAGE:
+            return await self._embed_voyage(texts, model_name, embedding_type)
+        elif self.provider == EmbeddingProvider.GOOGLE:
+            return await self._embed_vertex(texts, model_name, embedding_type)
+        else:
+            raise ValueError(f"Unsupported provider: {self.provider}")

    @staticmethod
    def create(
@@ -637,13 +569,6 @@ async def process_embed_request(
            gpu_type=gpu_type,
        )
        return EmbedResponse(embeddings=embeddings)
-    except AuthenticationError as e:
-        # Handle authentication errors consistently
-        logger.error(f"Authentication error: {e.provider}")
-        raise HTTPException(
-            status_code=401,
-            detail=f"Authentication failed: {e.message}",
-        )
    except RateLimitError as e:
        raise HTTPException(
            status_code=429,
--- a/backend/model_server/main.py
+++ b/backend/model_server/main.py
@@ -13,7 +13,6 @@ from sentry_sdk.integrations.starlette import StarletteIntegration
 from transformers import logging as transformer_logging  # type:ignore

 from model_server.custom_models import router as custom_models_router
-from model_server.custom_models import warm_up_information_content_model
 from model_server.custom_models import warm_up_intent_model
 from model_server.encoders import router as encoders_router
 from model_server.management_endpoints import router as management_router
@@ -65,31 +64,19 @@ async def lifespan(app: FastAPI) -> AsyncGenerator:

    app.state.gpu_type = gpu_type

-    try:
-        if TEMP_HF_CACHE_PATH.is_dir():
-            logger.notice("Moving contents of temp_huggingface to huggingface cache.")
-            _move_files_recursively(TEMP_HF_CACHE_PATH, HF_CACHE_PATH)
-            shutil.rmtree(TEMP_HF_CACHE_PATH, ignore_errors=True)
-            logger.notice("Moved contents of temp_huggingface to huggingface cache.")
-    except Exception as e:
-        logger.warning(
-            f"Error moving contents of temp_huggingface to huggingface cache: {e}. "
-            "This is not a critical error and the model server will continue to run."
-        )
+    if TEMP_HF_CACHE_PATH.is_dir():
+        logger.notice("Moving contents of temp_huggingface to huggingface cache.")
+        _move_files_recursively(TEMP_HF_CACHE_PATH, HF_CACHE_PATH)
+        shutil.rmtree(TEMP_HF_CACHE_PATH, ignore_errors=True)
+        logger.notice("Moved contents of temp_huggingface to huggingface cache.")

    torch.set_num_threads(max(MIN_THREADS_ML_MODELS, torch.get_num_threads()))
    logger.notice(f"Torch Threads: {torch.get_num_threads()}")

    if not INDEXING_ONLY:
-        logger.notice(
-            "The intent model should run on the model server. The information content model should not run here."
-        )
        warm_up_intent_model()
    else:
-        logger.notice(
-            "The content information model should run on the indexing model server. The intent model should not run here."
-        )
-        warm_up_information_content_model()
+        logger.notice("This model server should only run document indexing.")

    yield

--- a/backend/onyx/access/access.py
+++ b/backend/onyx/access/access.py
@@ -18,7 +18,7 @@ def _get_access_for_document(
        document_id=document_id,
    )

-    doc_access = DocumentAccess.build(
+    return DocumentAccess.build(
        user_emails=info[1] if info and info[1] else [],
        user_groups=[],
        external_user_emails=[],
@@ -26,8 +26,6 @@ def _get_access_for_document(
        is_public=info[2] if info else False,
    )

-    return doc_access
-

 def get_access_for_document(
    document_id: str,
@@ -40,12 +38,12 @@ def get_access_for_document(


 def get_null_document_access() -> DocumentAccess:
-    return DocumentAccess.build(
-        user_emails=[],
-        user_groups=[],
+    return DocumentAccess(
+        user_emails=set(),
+        user_groups=set(),
        is_public=False,
-        external_user_emails=[],
-        external_user_group_ids=[],
+        external_user_emails=set(),
+        external_user_group_ids=set(),
    )


@@ -58,18 +56,18 @@ def _get_access_for_documents(
        document_ids=document_ids,
    )
    doc_access = {
-        document_id: DocumentAccess.build(
-            user_emails=[email for email in user_emails if email],
+        document_id: DocumentAccess(
+            user_emails=set([email for email in user_emails if email]),
            # MIT version will wipe all groups and external groups on update
-            user_groups=[],
+            user_groups=set(),
            is_public=is_public,
-            external_user_emails=[],
-            external_user_group_ids=[],
+            external_user_emails=set(),
+            external_user_group_ids=set(),
        )
        for document_id, user_emails, is_public in document_access_info
    }

-    # Sometimes the document has not been indexed by the indexing job yet, in those cases
+    # Sometimes the document has not be indexed by the indexing job yet, in those cases
    # the document does not exist and so we use least permissive. Specifically the EE version
    # checks the MIT version permissions and creates a superset. This ensures that this flow
    # does not fail even if the Document has not yet been indexed.
--- a/backend/onyx/access/models.py
+++ b/backend/onyx/access/models.py
@@ -20,7 +20,7 @@ class ExternalAccess:
 class DocExternalAccess:
    """
    This is just a class to wrap the external access and the document ID
-    together. It's used for syncing document permissions to Vespa.
+    together. It's used for syncing document permissions to Redis.
    """

    external_access: ExternalAccess
@@ -56,45 +56,33 @@ class DocExternalAccess:
        )


-@dataclass(frozen=True, init=False)
+@dataclass(frozen=True)
 class DocumentAccess(ExternalAccess):
    # User emails for Onyx users, None indicates admin
    user_emails: set[str | None]
-
    # Names of user groups associated with this document
    user_groups: set[str]

-    external_user_emails: set[str]
-    external_user_group_ids: set[str]
-    is_public: bool
-
-    def __init__(self) -> None:
-        raise TypeError(
-            "Use `DocumentAccess.build(...)` instead of creating an instance directly."
-        )
-
    def to_acl(self) -> set[str]:
-        # the acl's emitted by this function are prefixed by type
-        # to get the native objects, access the member variables directly
-
-        acl_set: set[str] = set()
-        for user_email in self.user_emails:
-            if user_email:
-                acl_set.add(prefix_user_email(user_email))
-
-        for group_name in self.user_groups:
-            acl_set.add(prefix_user_group(group_name))
-
-        for external_user_email in self.external_user_emails:
-            acl_set.add(prefix_user_email(external_user_email))
-
-        for external_group_id in self.external_user_group_ids:
-            acl_set.add(prefix_external_group(external_group_id))
-
-        if self.is_public:
-            acl_set.add(PUBLIC_DOC_PAT)
-
-        return acl_set
+        return set(
+            [
+                prefix_user_email(user_email)
+                for user_email in self.user_emails
+                if user_email
+            ]
+            + [prefix_user_group(group_name) for group_name in self.user_groups]
+            + [
+                prefix_user_email(user_email)
+                for user_email in self.external_user_emails
+            ]
+            + [
+                # The group names are already prefixed by the source type
+                # This adds an additional prefix of "external_group:"
+                prefix_external_group(group_name)
+                for group_name in self.external_user_group_ids
+            ]
+            + ([PUBLIC_DOC_PAT] if self.is_public else [])
+        )

    @classmethod
    def build(
@@ -105,32 +93,29 @@ class DocumentAccess(ExternalAccess):
        external_user_group_ids: list[str],
        is_public: bool,
    ) -> "DocumentAccess":
-        """Don't prefix incoming data wth acl type, prefix on read from to_acl!"""
-
-        obj = object.__new__(cls)
-        object.__setattr__(
-            obj, "user_emails", {user_email for user_email in user_emails if user_email}
+        return cls(
+            external_user_emails={
+                prefix_user_email(external_email)
+                for external_email in external_user_emails
+            },
+            external_user_group_ids={
+                prefix_external_group(external_group_id)
+                for external_group_id in external_user_group_ids
+            },
+            user_emails={
+                prefix_user_email(user_email)
+                for user_email in user_emails
+                if user_email
+            },
+            user_groups=set(user_groups),
+            is_public=is_public,
        )
-        object.__setattr__(obj, "user_groups", set(user_groups))
-        object.__setattr__(
-            obj,
-            "external_user_emails",
-            {external_email for external_email in external_user_emails},
-        )
-        object.__setattr__(
-            obj,
-            "external_user_group_ids",
-            {external_group_id for external_group_id in external_user_group_ids},
-        )
-        object.__setattr__(obj, "is_public", is_public)
-
-        return obj


-default_public_access = DocumentAccess.build(
-    external_user_emails=[],
-    external_user_group_ids=[],
-    user_emails=[],
-    user_groups=[],
+default_public_access = DocumentAccess(
+    external_user_emails=set(),
+    external_user_group_ids=set(),
+    user_emails=set(),
+    user_groups=set(),
    is_public=True,
 )
--- a/backend/onyx/agents/agent_search/basic/utils.py
+++ b/backend/onyx/agents/agent_search/basic/utils.py
@@ -7,6 +7,7 @@ from langgraph.types import StreamWriter

 from onyx.agents.agent_search.shared_graph_utils.utils import write_custom_event
 from onyx.chat.models import LlmDoc
+from onyx.chat.models import OnyxContext
 from onyx.chat.stream_processing.answer_response_handler import AnswerResponseHandler
 from onyx.chat.stream_processing.answer_response_handler import CitationResponseHandler
 from onyx.chat.stream_processing.answer_response_handler import (
@@ -23,7 +24,7 @@ def process_llm_stream(
    should_stream_answer: bool,
    writer: StreamWriter,
    final_search_results: list[LlmDoc] | None = None,
-    displayed_search_results: list[LlmDoc] | None = None,
+    displayed_search_results: list[OnyxContext] | list[LlmDoc] | None = None,
 ) -> AIMessageChunk:
    tool_call_chunk = AIMessageChunk(content="")

--- a/backend/onyx/agents/agent_search/deep_search/initial/generate_initial_answer/nodes/generate_initial_answer.py
+++ b/backend/onyx/agents/agent_search/deep_search/initial/generate_initial_answer/nodes/generate_initial_answer.py
@@ -156,6 +156,7 @@ def generate_initial_answer(
    for tool_response in yield_search_responses(
        query=question,
        get_retrieved_sections=lambda: answer_generation_documents.context_documents,
+        get_reranked_sections=lambda: answer_generation_documents.streaming_documents,
        get_final_context_sections=lambda: answer_generation_documents.context_documents,
        search_query_info=query_info,
        get_section_relevance=lambda: relevance_list,
--- a/backend/onyx/agents/agent_search/deep_search/main/nodes/generate_validate_refined_answer.py
+++ b/backend/onyx/agents/agent_search/deep_search/main/nodes/generate_validate_refined_answer.py
@@ -183,6 +183,7 @@ def generate_validate_refined_answer(
    for tool_response in yield_search_responses(
        query=question,
        get_retrieved_sections=lambda: answer_generation_documents.context_documents,
+        get_reranked_sections=lambda: answer_generation_documents.streaming_documents,
        get_final_context_sections=lambda: answer_generation_documents.context_documents,
        search_query_info=query_info,
        get_section_relevance=lambda: relevance_list,
--- a/backend/onyx/agents/agent_search/deep_search/shared/expanded_retrieval/nodes/format_results.py
+++ b/backend/onyx/agents/agent_search/deep_search/shared/expanded_retrieval/nodes/format_results.py
@@ -57,6 +57,7 @@ def format_results(
        for tool_response in yield_search_responses(
            query=state.question,
            get_retrieved_sections=lambda: reranked_documents,
+            get_reranked_sections=lambda: state.retrieved_documents,
            get_final_context_sections=lambda: reranked_documents,
            search_query_info=query_info,
            get_section_relevance=lambda: relevance_list,
--- a/backend/onyx/agents/agent_search/orchestration/nodes/use_tool_response.py
+++ b/backend/onyx/agents/agent_search/orchestration/nodes/use_tool_response.py
@@ -13,7 +13,9 @@ from onyx.tools.tool_implementations.search.search_tool import (
    SEARCH_RESPONSE_SUMMARY_ID,
 )
 from onyx.tools.tool_implementations.search.search_tool import SearchResponseSummary
-from onyx.tools.tool_implementations.search.search_utils import section_to_llm_doc
+from onyx.tools.tool_implementations.search.search_utils import (
+    context_from_inference_section,
+)
 from onyx.tools.tool_implementations.search_like_tool_utils import (
    FINAL_CONTEXT_DOCUMENTS_ID,
 )
@@ -57,7 +59,9 @@ def basic_use_tool_response(
            search_response_summary = cast(SearchResponseSummary, yield_item.response)
            for section in search_response_summary.top_sections:
                if section.center_chunk.document_id not in initial_search_results:
-                    initial_search_results.append(section_to_llm_doc(section))
+                    initial_search_results.append(
+                        context_from_inference_section(section)
+                    )

    new_tool_call_chunk = AIMessageChunk(content="")
    if not agent_config.behavior.skip_gen_ai_answer_generation:
--- a/backend/onyx/auth/email_utils.py
+++ b/backend/onyx/auth/email_utils.py
@@ -1,6 +1,5 @@
 import smtplib
 from datetime import datetime
-from email.mime.image import MIMEImage
 from email.mime.multipart import MIMEMultipart
 from email.mime.text import MIMEText
 from email.utils import formatdate
@@ -14,13 +13,8 @@ from onyx.configs.app_configs import SMTP_SERVER
 from onyx.configs.app_configs import SMTP_USER
 from onyx.configs.app_configs import WEB_DOMAIN
 from onyx.configs.constants import AuthType
-from onyx.configs.constants import ONYX_DEFAULT_APPLICATION_NAME
-from onyx.configs.constants import ONYX_SLACK_URL
+from onyx.configs.constants import TENANT_ID_COOKIE_NAME
 from onyx.db.models import User
-from onyx.server.runtime.onyx_runtime import OnyxRuntime
-from onyx.utils.file import FileWithMimeType
-from onyx.utils.url import add_url_params
-from onyx.utils.variable_functionality import fetch_versioned_implementation
 from shared_configs.configs import MULTI_TENANT

 HTML_EMAIL_TEMPLATE = """\
@@ -62,11 +56,6 @@ HTML_EMAIL_TEMPLATE = """\
    }}
    .header img {{
      max-width: 140px;
-      width: 140px;
-      height: auto;
-      filter: brightness(1.1) contrast(1.2);
-      border-radius: 8px;
-      padding: 5px;
    }}
    .body-content {{
      padding: 20px 30px;
@@ -83,16 +72,12 @@ HTML_EMAIL_TEMPLATE = """\
    }}
    .cta-button {{
      display: inline-block;
-      padding: 14px 24px;
-      background-color: #0055FF;
+      padding: 12px 20px;
+      background-color: #000000;
      color: #ffffff !important;
      text-decoration: none;
      border-radius: 4px;
-      font-weight: 600;
-      font-size: 16px;
-      margin-top: 10px;
-      box-shadow: 0 2px 4px rgba(0, 0, 0, 0.1);
-      text-align: center;
+      font-weight: 500;
    }}
    .footer {{
      font-size: 13px;
@@ -112,8 +97,8 @@ HTML_EMAIL_TEMPLATE = """\
      <td class="header">
        <img
          style="background-color: #ffffff; border-radius: 8px;"
-          src="cid:logo.png"
-          alt="{application_name} Logo"
+          src="https://www.onyx.app/logos/customer/onyx.png"
+          alt="Onyx Logo"
        >
      </td>
    </tr>
@@ -128,8 +113,9 @@ HTML_EMAIL_TEMPLATE = """\
    </tr>
    <tr>
      <td class="footer">
-        © {year} {application_name}. All rights reserved.
-        {slack_fragment}
+        © {year} Onyx. All rights reserved.
+        <br>
+        Have questions? Join our Slack community <a href="https://join.slack.com/t/onyx-dot-app/shared_invite/zt-2twesxdr6-5iQitKZQpgq~hYIZ~dv3KA">here</a>.
      </td>
    </tr>
  </table>
@@ -139,27 +125,17 @@ HTML_EMAIL_TEMPLATE = """\


 def build_html_email(
-    application_name: str | None,
-    heading: str,
-    message: str,
-    cta_text: str | None = None,
-    cta_link: str | None = None,
+    heading: str, message: str, cta_text: str | None = None, cta_link: str | None = None
 ) -> str:
-    slack_fragment = ""
-    if application_name == ONYX_DEFAULT_APPLICATION_NAME:
-        slack_fragment = f'<br>Have questions? Join our Slack community <a href="{ONYX_SLACK_URL}">here</a>.'
-
    if cta_text and cta_link:
        cta_block = f'<a class="cta-button" href="{cta_link}">{cta_text}</a>'
    else:
        cta_block = ""
    return HTML_EMAIL_TEMPLATE.format(
-        application_name=application_name,
        title=heading,
        heading=heading,
        message=message,
        cta_block=cta_block,
-        slack_fragment=slack_fragment,
        year=datetime.now().year,
    )

@@ -170,44 +146,22 @@ def send_email(
    html_body: str,
    text_body: str,
    mail_from: str = EMAIL_FROM,
-    inline_png: tuple[str, bytes] | None = None,
 ) -> None:
    if not EMAIL_CONFIGURED:
        raise ValueError("Email is not configured.")

-    # Create a multipart/alternative message - this indicates these are alternative versions of the same content
    msg = MIMEMultipart("alternative")
    msg["Subject"] = subject
    msg["To"] = user_email
-    if mail_from:
-        msg["From"] = mail_from
+    msg["From"] = mail_from
    msg["Date"] = formatdate(localtime=True)
    msg["Message-ID"] = make_msgid(domain="onyx.app")

-    # Add text part first (lowest priority)
-    text_part = MIMEText(text_body, "plain")
-    msg.attach(text_part)
+    part_text = MIMEText(text_body, "plain")
+    part_html = MIMEText(html_body, "html")

-    if inline_png:
-        # For HTML with images, create a multipart/related container
-        related = MIMEMultipart("related")
-
-        # Add the HTML part to the related container
-        html_part = MIMEText(html_body, "html")
-        related.attach(html_part)
-
-        # Add image with proper Content-ID to the related container
-        img = MIMEImage(inline_png[1], _subtype="png")
-        img.add_header("Content-ID", f"<{inline_png[0]}>")
-        img.add_header("Content-Disposition", "inline", filename=inline_png[0])
-        related.attach(img)
-
-        # Add the related part to the message (higher priority than text)
-        msg.attach(related)
-    else:
-        # No images, just add HTML directly (higher priority than text)
-        html_part = MIMEText(html_body, "html")
-        msg.attach(html_part)
+    msg.attach(part_text)
+    msg.attach(part_html)

    try:
        with smtplib.SMTP(SMTP_SERVER, SMTP_PORT) as s:
@@ -219,21 +173,8 @@ def send_email(


 def send_subscription_cancellation_email(user_email: str) -> None:
-    """This is templated but isn't meaningful for whitelabeling."""
-
    # Example usage of the reusable HTML
-    try:
-        load_runtime_settings_fn = fetch_versioned_implementation(
-            "onyx.server.enterprise_settings.store", "load_runtime_settings"
-        )
-        settings = load_runtime_settings_fn()
-        application_name = settings.application_name
-    except ModuleNotFoundError:
-        application_name = ONYX_DEFAULT_APPLICATION_NAME
-
-    onyx_file = OnyxRuntime.get_emailable_logo()
-
-    subject = f"Your {application_name} Subscription Has Been Canceled"
+    subject = "Your Onyx Subscription Has Been Canceled"
    heading = "Subscription Canceled"
    message = (
        "<p>We're sorry to see you go.</p>"
@@ -242,48 +183,23 @@ def send_subscription_cancellation_email(user_email: str) -> None:
    )
    cta_text = "Renew Subscription"
    cta_link = "https://www.onyx.app/pricing"
-    html_content = build_html_email(
-        application_name,
-        heading,
-        message,
-        cta_text,
-        cta_link,
-    )
+    html_content = build_html_email(heading, message, cta_text, cta_link)
    text_content = (
        "We're sorry to see you go.\n"
        "Your subscription has been canceled and will end on your next billing date.\n"
        "If you change your mind, visit https://www.onyx.app/pricing"
    )
-    send_email(
-        user_email,
-        subject,
-        html_content,
-        text_content,
-        inline_png=("logo.png", onyx_file.data),
-    )
+    send_email(user_email, subject, html_content, text_content)


 def send_user_email_invite(
    user_email: str, current_user: User, auth_type: AuthType
 ) -> None:
-    onyx_file: FileWithMimeType | None = None
-
-    try:
-        load_runtime_settings_fn = fetch_versioned_implementation(
-            "onyx.server.enterprise_settings.store", "load_runtime_settings"
-        )
-        settings = load_runtime_settings_fn()
-        application_name = settings.application_name
-    except ModuleNotFoundError:
-        application_name = ONYX_DEFAULT_APPLICATION_NAME
-
-    onyx_file = OnyxRuntime.get_emailable_logo()
-
-    subject = f"Invitation to Join {application_name} Organization"
+    subject = "Invitation to Join Onyx Organization"
    heading = "You've Been Invited!"

    # the exact action taken by the user, and thus the message, depends on the auth type
-    message = f"<p>You have been invited by {current_user.email} to join an organization on {application_name}.</p>"
+    message = f"<p>You have been invited by {current_user.email} to join an organization on Onyx.</p>"
    if auth_type == AuthType.CLOUD:
        message += (
            "<p>To join the organization, please click the button below to set a password "
@@ -309,32 +225,19 @@ def send_user_email_invite(

    cta_text = "Join Organization"
    cta_link = f"{WEB_DOMAIN}/auth/signup?email={user_email}"
-
-    html_content = build_html_email(
-        application_name,
-        heading,
-        message,
-        cta_text,
-        cta_link,
-    )
+    html_content = build_html_email(heading, message, cta_text, cta_link)

    # text content is the fallback for clients that don't support HTML
    # not as critical, so not having special cases for each auth type
    text_content = (
-        f"You have been invited by {current_user.email} to join an organization on {application_name}.\n"
+        f"You have been invited by {current_user.email} to join an organization on Onyx.\n"
        "To join the organization, please visit the following link:\n"
        f"{WEB_DOMAIN}/auth/signup?email={user_email}\n"
    )
    if auth_type == AuthType.CLOUD:
        text_content += "You'll be asked to set a password or login with Google to complete your registration."

-    send_email(
-        user_email,
-        subject,
-        html_content,
-        text_content,
-        inline_png=("logo.png", onyx_file.data),
-    )
+    send_email(user_email, subject, html_content, text_content)


 def send_forgot_password_email(
@@ -344,80 +247,27 @@ def send_forgot_password_email(
    mail_from: str = EMAIL_FROM,
 ) -> None:
    # Builds a forgot password email with or without fancy HTML
-    try:
-        load_runtime_settings_fn = fetch_versioned_implementation(
-            "onyx.server.enterprise_settings.store", "load_runtime_settings"
-        )
-        settings = load_runtime_settings_fn()
-        application_name = settings.application_name
-    except ModuleNotFoundError:
-        application_name = ONYX_DEFAULT_APPLICATION_NAME
-
-    onyx_file = OnyxRuntime.get_emailable_logo()
-
-    subject = f"Reset Your {application_name} Password"
-    heading = "Reset Your Password"
-    tenant_param = f"&tenant={tenant_id}" if tenant_id and MULTI_TENANT else ""
-    message = "<p>Please click the button below to reset your password. This link will expire in 24 hours.</p>"
-    cta_text = "Reset Password"
-    cta_link = f"{WEB_DOMAIN}/auth/reset-password?token={token}{tenant_param}"
-    html_content = build_html_email(
-        application_name,
-        heading,
-        message,
-        cta_text,
-        cta_link,
-    )
-    text_content = (
-        f"Please click the following link to reset your password. This link will expire in 24 hours.\n"
-        f"{WEB_DOMAIN}/auth/reset-password?token={token}{tenant_param}"
-    )
-    send_email(
-        user_email,
-        subject,
-        html_content,
-        text_content,
-        mail_from,
-        inline_png=("logo.png", onyx_file.data),
-    )
+    subject = "Onyx Forgot Password"
+    link = f"{WEB_DOMAIN}/auth/reset-password?token={token}"
+    if MULTI_TENANT:
+        link += f"&{TENANT_ID_COOKIE_NAME}={tenant_id}"
+    message = f"<p>Click the following link to reset your password:</p><p>{link}</p>"
+    html_content = build_html_email("Reset Your Password", message)
+    text_content = f"Click the following link to reset your password: {link}"
+    send_email(user_email, subject, html_content, text_content, mail_from)


 def send_user_verification_email(
    user_email: str,
    token: str,
-    new_organization: bool = False,
    mail_from: str = EMAIL_FROM,
 ) -> None:
    # Builds a verification email
-    try:
-        load_runtime_settings_fn = fetch_versioned_implementation(
-            "onyx.server.enterprise_settings.store", "load_runtime_settings"
-        )
-        settings = load_runtime_settings_fn()
-        application_name = settings.application_name
-    except ModuleNotFoundError:
-        application_name = ONYX_DEFAULT_APPLICATION_NAME
-
-    onyx_file = OnyxRuntime.get_emailable_logo()
-
-    subject = f"{application_name} Email Verification"
+    subject = "Onyx Email Verification"
    link = f"{WEB_DOMAIN}/auth/verify-email?token={token}"
-    if new_organization:
-        link = add_url_params(link, {"first_user": "true"})
    message = (
        f"<p>Click the following link to verify your email address:</p><p>{link}</p>"
    )
-    html_content = build_html_email(
-        application_name,
-        "Verify Your Email",
-        message,
-    )
+    html_content = build_html_email("Verify Your Email", message)
    text_content = f"Click the following link to verify your email address: {link}"
-    send_email(
-        user_email,
-        subject,
-        html_content,
-        text_content,
-        mail_from,
-        inline_png=("logo.png", onyx_file.data),
-    )
+    send_email(user_email, subject, html_content, text_content, mail_from)
--- a/backend/onyx/auth/invited_users.py
+++ b/backend/onyx/auth/invited_users.py
@@ -1,6 +1,5 @@
 from typing import cast

-from onyx.configs.constants import KV_PENDING_USERS_KEY
 from onyx.configs.constants import KV_USER_STORE_KEY
 from onyx.key_value_store.factory import get_kv_store
 from onyx.key_value_store.interface import KvKeyNotFoundError
@@ -19,17 +18,3 @@ def write_invited_users(emails: list[str]) -> int:
    store = get_kv_store()
    store.store(KV_USER_STORE_KEY, cast(JSON_ro, emails))
    return len(emails)
-
-
-def get_pending_users() -> list[str]:
-    try:
-        store = get_kv_store()
-        return cast(list, store.load(KV_PENDING_USERS_KEY))
-    except KvKeyNotFoundError:
-        return list()
-
-
-def write_pending_users(emails: list[str]) -> int:
-    store = get_kv_store()
-    store.store(KV_PENDING_USERS_KEY, cast(JSON_ro, emails))
-    return len(emails)
--- a/backend/onyx/auth/oauth_refresher.py
+++ b/backend/onyx/auth/oauth_refresher.py
@@ -1,211 +0,0 @@
-from datetime import datetime
-from datetime import timezone
-from typing import Any
-from typing import cast
-from typing import Dict
-from typing import List
-from typing import Optional
-
-import httpx
-from fastapi_users.manager import BaseUserManager
-from sqlalchemy.ext.asyncio import AsyncSession
-
-from onyx.configs.app_configs import OAUTH_CLIENT_ID
-from onyx.configs.app_configs import OAUTH_CLIENT_SECRET
-from onyx.configs.app_configs import TRACK_EXTERNAL_IDP_EXPIRY
-from onyx.db.models import OAuthAccount
-from onyx.db.models import User
-from onyx.utils.logger import setup_logger
-
-logger = setup_logger()
-
-# Standard OAuth refresh token endpoints
-REFRESH_ENDPOINTS = {
-    "google": "https://oauth2.googleapis.com/token",
-}
-
-
-# NOTE: Keeping this as a utility function for potential future debugging,
-# but not using it in production code
-async def _test_expire_oauth_token(
-    user: User,
-    oauth_account: OAuthAccount,
-    db_session: AsyncSession,
-    user_manager: BaseUserManager[User, Any],
-    expire_in_seconds: int = 10,
-) -> bool:
-    """
-    Utility function for testing - Sets an OAuth token to expire in a short time
-    to facilitate testing of the refresh flow.
-    Not used in production code.
-    """
-    try:
-        new_expires_at = int(
-            (datetime.now(timezone.utc).timestamp() + expire_in_seconds)
-        )
-
-        updated_data: Dict[str, Any] = {"expires_at": new_expires_at}
-
-        await user_manager.user_db.update_oauth_account(
-            user, cast(Any, oauth_account), updated_data
-        )
-
-        return True
-    except Exception as e:
-        logger.exception(f"Error setting artificial expiration: {str(e)}")
-        return False
-
-
-async def refresh_oauth_token(
-    user: User,
-    oauth_account: OAuthAccount,
-    db_session: AsyncSession,
-    user_manager: BaseUserManager[User, Any],
-) -> bool:
-    """
-    Attempt to refresh an OAuth token that's about to expire or has expired.
-    Returns True if successful, False otherwise.
-    """
-    if not oauth_account.refresh_token:
-        logger.warning(
-            f"No refresh token available for {user.email}'s {oauth_account.oauth_name} account"
-        )
-        return False
-
-    provider = oauth_account.oauth_name
-    if provider not in REFRESH_ENDPOINTS:
-        logger.warning(f"Refresh endpoint not configured for provider: {provider}")
-        return False
-
-    try:
-        logger.info(f"Refreshing OAuth token for {user.email}'s {provider} account")
-
-        async with httpx.AsyncClient() as client:
-            response = await client.post(
-                REFRESH_ENDPOINTS[provider],
-                data={
-                    "client_id": OAUTH_CLIENT_ID,
-                    "client_secret": OAUTH_CLIENT_SECRET,
-                    "refresh_token": oauth_account.refresh_token,
-                    "grant_type": "refresh_token",
-                },
-                headers={"Content-Type": "application/x-www-form-urlencoded"},
-            )
-
-            if response.status_code != 200:
-                logger.error(
-                    f"Failed to refresh OAuth token: Status {response.status_code}"
-                )
-                return False
-
-            token_data = response.json()
-
-            new_access_token = token_data.get("access_token")
-            new_refresh_token = token_data.get(
-                "refresh_token", oauth_account.refresh_token
-            )
-            expires_in = token_data.get("expires_in")
-
-            # Calculate new expiry time if provided
-            new_expires_at: Optional[int] = None
-            if expires_in:
-                new_expires_at = int(
-                    (datetime.now(timezone.utc).timestamp() + expires_in)
-                )
-
-            # Update the OAuth account
-            updated_data: Dict[str, Any] = {
-                "access_token": new_access_token,
-                "refresh_token": new_refresh_token,
-            }
-
-            if new_expires_at:
-                updated_data["expires_at"] = new_expires_at
-
-                # Update oidc_expiry in user model if we're tracking it
-                if TRACK_EXTERNAL_IDP_EXPIRY:
-                    oidc_expiry = datetime.fromtimestamp(
-                        new_expires_at, tz=timezone.utc
-                    )
-                    await user_manager.user_db.update(
-                        user, {"oidc_expiry": oidc_expiry}
-                    )
-
-            # Update the OAuth account
-            await user_manager.user_db.update_oauth_account(
-                user, cast(Any, oauth_account), updated_data
-            )
-
-            logger.info(f"Successfully refreshed OAuth token for {user.email}")
-            return True
-
-    except Exception as e:
-        logger.exception(f"Error refreshing OAuth token: {str(e)}")
-        return False
-
-
-async def check_and_refresh_oauth_tokens(
-    user: User,
-    db_session: AsyncSession,
-    user_manager: BaseUserManager[User, Any],
-) -> None:
-    """
-    Check if any OAuth tokens are expired or about to expire and refresh them.
-    """
-    if not hasattr(user, "oauth_accounts") or not user.oauth_accounts:
-        return
-
-    now_timestamp = datetime.now(timezone.utc).timestamp()
-
-    # Buffer time to refresh tokens before they expire (in seconds)
-    buffer_seconds = 300  # 5 minutes
-
-    for oauth_account in user.oauth_accounts:
-        # Skip accounts without refresh tokens
-        if not oauth_account.refresh_token:
-            continue
-
-        # If token is about to expire, refresh it
-        if (
-            oauth_account.expires_at
-            and oauth_account.expires_at - now_timestamp < buffer_seconds
-        ):
-            logger.info(f"OAuth token for {user.email} is about to expire - refreshing")
-            success = await refresh_oauth_token(
-                user, oauth_account, db_session, user_manager
-            )
-
-            if not success:
-                logger.warning(
-                    "Failed to refresh OAuth token. User may need to re-authenticate."
-                )
-
-
-async def check_oauth_account_has_refresh_token(
-    user: User,
-    oauth_account: OAuthAccount,
-) -> bool:
-    """
-    Check if an OAuth account has a refresh token.
-    Returns True if a refresh token exists, False otherwise.
-    """
-    return bool(oauth_account.refresh_token)
-
-
-async def get_oauth_accounts_requiring_refresh_token(user: User) -> List[OAuthAccount]:
-    """
-    Returns a list of OAuth accounts for a user that are missing refresh tokens.
-    These accounts will need re-authentication to get refresh tokens.
-    """
-    if not hasattr(user, "oauth_accounts") or not user.oauth_accounts:
-        return []
-
-    accounts_needing_refresh = []
-    for oauth_account in user.oauth_accounts:
-        has_refresh_token = await check_oauth_account_has_refresh_token(
-            user, oauth_account
-        )
-        if not has_refresh_token:
-            accounts_needing_refresh.append(oauth_account)
-
-    return accounts_needing_refresh
--- a/backend/onyx/auth/users.py
+++ b/backend/onyx/auth/users.py
@@ -5,16 +5,12 @@ import string
 import uuid
 from collections.abc import AsyncGenerator
 from datetime import datetime
-from datetime import timedelta
 from datetime import timezone
-from typing import Any
 from typing import cast
 from typing import Dict
 from typing import List
 from typing import Optional
-from typing import Protocol
 from typing import Tuple
-from typing import TypeVar

 import jwt
 from email_validator import EmailNotValidError
@@ -104,12 +100,10 @@ from onyx.utils.logger import setup_logger
 from onyx.utils.telemetry import create_milestone_and_report
 from onyx.utils.telemetry import optional_telemetry
 from onyx.utils.telemetry import RecordType
-from onyx.utils.url import add_url_params
 from onyx.utils.variable_functionality import fetch_ee_implementation_or_noop
 from onyx.utils.variable_functionality import fetch_versioned_implementation
 from shared_configs.configs import async_return_default_schema
 from shared_configs.configs import MULTI_TENANT
-from shared_configs.configs import POSTGRES_DEFAULT_SCHEMA
 from shared_configs.contextvars import CURRENT_TENANT_ID_CONTEXTVAR
 from shared_configs.contextvars import get_current_tenant_id

@@ -585,10 +579,8 @@ class UserManager(UUIDIDMixin, BaseUserManager[User, uuid.UUID]):
        logger.notice(
            f"Verification requested for user {user.id}. Verification token: {token}"
        )
-        user_count = await get_user_count()
-        send_user_verification_email(
-            user.email, token, new_organization=user_count == 1
-        )
+
+        send_user_verification_email(user.email, token)

    async def authenticate(
        self, credentials: OAuth2PasswordRequestForm
@@ -600,7 +592,7 @@ class UserManager(UUIDIDMixin, BaseUserManager[User, uuid.UUID]):
            tenant_id = fetch_ee_implementation_or_noop(
                "onyx.server.tenants.provisioning",
                "get_tenant_id_for_email",
-                POSTGRES_DEFAULT_SCHEMA,
+                None,
            )(
                email=email,
            )
@@ -694,20 +686,16 @@ cookie_transport = CookieTransport(
 )


-T = TypeVar("T", covariant=True)
-ID = TypeVar("ID", contravariant=True)
+def get_redis_strategy() -> RedisStrategy:
+    return TenantAwareRedisStrategy()


-# Protocol for strategies that support token refreshing without inheritance.
-class RefreshableStrategy(Protocol):
-    """Protocol for authentication strategies that support token refreshing."""
-
-    async def refresh_token(self, token: Optional[str], user: Any) -> str:
-        """
-        Refresh an existing token by extending its lifetime.
-        Returns either the same token with extended expiration or a new token.
-        """
-        ...
+def get_database_strategy(
+    access_token_db: AccessTokenDatabase[AccessToken] = Depends(get_access_token_db),
+) -> DatabaseStrategy:
+    return DatabaseStrategy(
+        access_token_db, lifetime_seconds=SESSION_EXPIRE_TIME_SECONDS
+    )


 class TenantAwareRedisStrategy(RedisStrategy[User, uuid.UUID]):
@@ -766,75 +754,6 @@ class TenantAwareRedisStrategy(RedisStrategy[User, uuid.UUID]):
        redis = await get_async_redis_connection()
        await redis.delete(f"{self.key_prefix}{token}")

-    async def refresh_token(self, token: Optional[str], user: User) -> str:
-        """Refresh a token by extending its expiration time in Redis."""
-        if token is None:
-            # If no token provided, create a new one
-            return await self.write_token(user)
-
-        redis = await get_async_redis_connection()
-        token_key = f"{self.key_prefix}{token}"
-
-        # Check if token exists
-        token_data_str = await redis.get(token_key)
-        if not token_data_str:
-            # Token not found, create new one
-            return await self.write_token(user)
-
-        # Token exists, extend its lifetime
-        token_data = json.loads(token_data_str)
-        await redis.set(
-            token_key,
-            json.dumps(token_data),
-            ex=self.lifetime_seconds,
-        )
-
-        return token
-
-
-class RefreshableDatabaseStrategy(DatabaseStrategy[User, uuid.UUID, AccessToken]):
-    """Database strategy with token refreshing capabilities."""
-
-    def __init__(
-        self,
-        access_token_db: AccessTokenDatabase[AccessToken],
-        lifetime_seconds: Optional[int] = None,
-    ):
-        super().__init__(access_token_db, lifetime_seconds)
-        self._access_token_db = access_token_db
-
-    async def refresh_token(self, token: Optional[str], user: User) -> str:
-        """Refresh a token by updating its expiration time in the database."""
-        if token is None:
-            return await self.write_token(user)
-
-        # Find the token in database
-        access_token = await self._access_token_db.get_by_token(token)
-
-        if access_token is None:
-            # Token not found, create new one
-            return await self.write_token(user)
-
-        # Update expiration time
-        new_expires = datetime.now(timezone.utc) + timedelta(
-            seconds=float(self.lifetime_seconds or SESSION_EXPIRE_TIME_SECONDS)
-        )
-        await self._access_token_db.update(access_token, {"expires": new_expires})
-
-        return token
-
-
-def get_redis_strategy() -> TenantAwareRedisStrategy:
-    return TenantAwareRedisStrategy()
-
-
-def get_database_strategy(
-    access_token_db: AccessTokenDatabase[AccessToken] = Depends(get_access_token_db),
-) -> RefreshableDatabaseStrategy:
-    return RefreshableDatabaseStrategy(
-        access_token_db, lifetime_seconds=SESSION_EXPIRE_TIME_SECONDS
-    )
-

 if AUTH_BACKEND == AuthBackend.REDIS:
    auth_backend = AuthenticationBackend(
@@ -885,88 +804,6 @@ class FastAPIUserWithLogoutRouter(FastAPIUsers[models.UP, models.ID]):

        return router

-    def get_refresh_router(
-        self,
-        backend: AuthenticationBackend,
-        requires_verification: bool = REQUIRE_EMAIL_VERIFICATION,
-    ) -> APIRouter:
-        """
-        Provide a router for session token refreshing.
-        """
-        # Import the oauth_refresher here to avoid circular imports
-        from onyx.auth.oauth_refresher import check_and_refresh_oauth_tokens
-
-        router = APIRouter()
-
-        get_current_user_token = self.authenticator.current_user_token(
-            active=True, verified=requires_verification
-        )
-
-        refresh_responses: OpenAPIResponseType = {
-            **{
-                status.HTTP_401_UNAUTHORIZED: {
-                    "description": "Missing token or inactive user."
-                }
-            },
-            **backend.transport.get_openapi_login_responses_success(),
-        }
-
-        @router.post(
-            "/refresh", name=f"auth:{backend.name}.refresh", responses=refresh_responses
-        )
-        async def refresh(
-            user_token: Tuple[models.UP, str] = Depends(get_current_user_token),
-            strategy: Strategy[models.UP, models.ID] = Depends(backend.get_strategy),
-            user_manager: BaseUserManager[models.UP, models.ID] = Depends(
-                get_user_manager
-            ),
-            db_session: AsyncSession = Depends(get_async_session),
-        ) -> Response:
-            try:
-                user, token = user_token
-                logger.info(f"Processing token refresh request for user {user.email}")
-
-                # Check if user has OAuth accounts that need refreshing
-                await check_and_refresh_oauth_tokens(
-                    user=cast(User, user),
-                    db_session=db_session,
-                    user_manager=cast(Any, user_manager),
-                )
-
-                # Check if strategy supports refreshing
-                supports_refresh = hasattr(strategy, "refresh_token") and callable(
-                    getattr(strategy, "refresh_token")
-                )
-
-                if supports_refresh:
-                    try:
-                        refresh_method = getattr(strategy, "refresh_token")
-                        new_token = await refresh_method(token, user)
-                        logger.info(
-                            f"Successfully refreshed session token for user {user.email}"
-                        )
-                        return await backend.transport.get_login_response(new_token)
-                    except Exception as e:
-                        logger.error(f"Error refreshing session token: {str(e)}")
-                        # Fallback to logout and login if refresh fails
-                        await backend.logout(strategy, user, token)
-                        return await backend.login(strategy, user)
-
-                # Fallback: logout and login again
-                logger.info(
-                    "Strategy doesn't support refresh - using logout/login flow"
-                )
-                await backend.logout(strategy, user, token)
-                return await backend.login(strategy, user)
-            except Exception as e:
-                logger.error(f"Unexpected error in refresh endpoint: {str(e)}")
-                raise HTTPException(
-                    status_code=status.HTTP_400_BAD_REQUEST,
-                    detail=f"Token refresh failed: {str(e)}",
-                )
-
-        return router
-

 fastapi_users = FastAPIUserWithLogoutRouter[User, uuid.UUID](
    get_user_manager, [auth_backend]
@@ -1057,7 +894,7 @@ async def current_limited_user(
    return await double_check_user(user)


-async def current_chat_accessible_user(
+async def current_chat_accesssible_user(
    user: User | None = Depends(optional_user),
 ) -> User | None:
    tenant_id = get_current_tenant_id()
@@ -1200,20 +1037,12 @@ def get_oauth_router(
            "referral_source": referral_source or "default_referral",
        }
        state = generate_state_token(state_data, state_secret)
-
-        # Get the basic authorization URL
        authorization_url = await oauth_client.get_authorization_url(
            authorize_redirect_url,
            state,
            scopes,
        )

-        # For Google OAuth, add parameters to request refresh tokens
-        if oauth_client.name == "google":
-            authorization_url = add_url_params(
-                authorization_url, {"access_type": "offline", "prompt": "consent"}
-            )
-
        return OAuth2AuthorizeResponse(authorization_url=authorization_url)

    @router.get(
@@ -1266,12 +1095,6 @@ def get_oauth_router(

        next_url = state_data.get("next_url", "/")
        referral_source = state_data.get("referral_source", None)
-        try:
-            tenant_id = fetch_ee_implementation_or_noop(
-                "onyx.server.tenants.user_mapping", "get_tenant_id_for_email", None
-            )(account_email)
-        except exceptions.UserNotExists:
-            tenant_id = None

        request.state.referral_source = referral_source

@@ -1303,14 +1126,9 @@ def get_oauth_router(
        # Login user
        response = await backend.login(strategy, user)
        await user_manager.on_after_login(user, request, response)
+
        # Prepare redirect response
-        if tenant_id is None:
-            # Use URL utility to add parameters
-            redirect_url = add_url_params(next_url, {"new_team": "true"})
-            redirect_response = RedirectResponse(redirect_url, status_code=302)
-        else:
-            # No parameters to add
-            redirect_response = RedirectResponse(next_url, status_code=302)
+        redirect_response = RedirectResponse(next_url, status_code=302)

        # Copy headers and other attributes from 'response' to 'redirect_response'
        for header_name, header_value in response.headers.items():
@@ -1322,7 +1140,6 @@ def get_oauth_router(
            redirect_response.status_code = response.status_code
        if hasattr(response, "media_type"):
            redirect_response.media_type = response.media_type
-
        return redirect_response

    return router
--- a/backend/onyx/background/celery/apps/app_base.py
+++ b/backend/onyx/background/celery/apps/app_base.py
@@ -34,6 +34,7 @@ from onyx.redis.redis_connector_ext_group_sync import RedisConnectorExternalGrou
 from onyx.redis.redis_connector_prune import RedisConnectorPrune
 from onyx.redis.redis_document_set import RedisDocumentSet
 from onyx.redis.redis_pool import get_redis_client
+from onyx.redis.redis_pool import get_shared_redis_client
 from onyx.redis.redis_usergroup import RedisUserGroup
 from onyx.utils.logger import ColoredFormatter
 from onyx.utils.logger import PlainFormatter
@@ -224,7 +225,7 @@ def wait_for_redis(sender: Any, **kwargs: Any) -> None:
    Will raise WorkerShutdown to kill the celery worker if the timeout
    is reached."""

-    r = get_redis_client(tenant_id=POSTGRES_DEFAULT_SCHEMA)
+    r = get_shared_redis_client()

    WAIT_INTERVAL = 5
    WAIT_LIMIT = 60
@@ -310,7 +311,7 @@ def on_secondary_worker_init(sender: Any, **kwargs: Any) -> None:
    # Set up variables for waiting on primary worker
    WAIT_INTERVAL = 5
    WAIT_LIMIT = 60
-    r = get_redis_client(tenant_id=POSTGRES_DEFAULT_SCHEMA)
+    r = get_shared_redis_client()
    time_start = time.monotonic()

    logger.info("Waiting for primary worker to be ready...")
--- a/backend/onyx/background/celery/apps/beat.py
+++ b/backend/onyx/background/celery/apps/beat.py
@@ -1,5 +1,6 @@
 from datetime import timedelta
 from typing import Any
+from typing import cast

 from celery import Celery
 from celery import signals
@@ -9,10 +10,12 @@ from celery.utils.log import get_task_logger

 import onyx.background.celery.apps.app_base as app_base
 from onyx.background.celery.tasks.beat_schedule import CLOUD_BEAT_MULTIPLIER_DEFAULT
+from onyx.configs.constants import ONYX_CLOUD_REDIS_RUNTIME
+from onyx.configs.constants import ONYX_CLOUD_TENANT_ID
 from onyx.configs.constants import POSTGRES_CELERY_BEAT_APP_NAME
 from onyx.db.engine import get_all_tenant_ids
 from onyx.db.engine import SqlEngine
-from onyx.server.runtime.onyx_runtime import OnyxRuntime
+from onyx.redis.redis_pool import get_redis_replica_client
 from onyx.utils.variable_functionality import fetch_versioned_implementation
 from shared_configs.configs import IGNORED_SYNCING_TENANT_LIST
 from shared_configs.configs import MULTI_TENANT
@@ -138,6 +141,8 @@ class DynamicTenantScheduler(PersistentScheduler):
        """Only updates the actual beat schedule on the celery app when it changes"""
        do_update = False

+        r = get_redis_replica_client(tenant_id=ONYX_CLOUD_TENANT_ID)
+
        task_logger.debug("_try_updating_schedule starting")

        tenant_ids = get_all_tenant_ids()
@@ -147,7 +152,16 @@ class DynamicTenantScheduler(PersistentScheduler):
        current_schedule = self.schedule.items()

        # get potential new state
-        beat_multiplier = OnyxRuntime.get_beat_multiplier()
+        beat_multiplier = CLOUD_BEAT_MULTIPLIER_DEFAULT
+        beat_multiplier_raw = r.get(f"{ONYX_CLOUD_REDIS_RUNTIME}:beat_multiplier")
+        if beat_multiplier_raw is not None:
+            try:
+                beat_multiplier_bytes = cast(bytes, beat_multiplier_raw)
+                beat_multiplier = float(beat_multiplier_bytes.decode())
+            except ValueError:
+                task_logger.error(
+                    f"Invalid beat_multiplier value: {beat_multiplier_raw}"
+                )

        new_schedule = self._generate_schedule(tenant_ids, beat_multiplier)

--- a/backend/onyx/background/celery/apps/light.py
+++ b/backend/onyx/background/celery/apps/light.py
@@ -112,6 +112,5 @@ celery_app.autodiscover_tasks(
        "onyx.background.celery.tasks.connector_deletion",
        "onyx.background.celery.tasks.doc_permission_syncing",
        "onyx.background.celery.tasks.indexing",
-        "onyx.background.celery.tasks.tenant_provisioning",
    ]
 )
--- a/backend/onyx/background/celery/apps/monitoring.py
+++ b/backend/onyx/background/celery/apps/monitoring.py
@@ -92,6 +92,5 @@ def on_setup_logging(
 celery_app.autodiscover_tasks(
    [
        "onyx.background.celery.tasks.monitoring",
-        "onyx.background.celery.tasks.tenant_provisioning",
    ]
 )
--- a/backend/onyx/background/celery/apps/primary.py
+++ b/backend/onyx/background/celery/apps/primary.py
@@ -38,11 +38,10 @@ from onyx.redis.redis_connector_index import RedisConnectorIndex
 from onyx.redis.redis_connector_prune import RedisConnectorPrune
 from onyx.redis.redis_connector_stop import RedisConnectorStop
 from onyx.redis.redis_document_set import RedisDocumentSet
-from onyx.redis.redis_pool import get_redis_client
+from onyx.redis.redis_pool import get_shared_redis_client
 from onyx.redis.redis_usergroup import RedisUserGroup
 from onyx.utils.logger import setup_logger
 from shared_configs.configs import MULTI_TENANT
-from shared_configs.configs import POSTGRES_DEFAULT_SCHEMA

 logger = setup_logger()

@@ -103,7 +102,7 @@ def on_worker_init(sender: Worker, **kwargs: Any) -> None:

    # This is singleton work that should be done on startup exactly once
    # by the primary worker. This is unnecessary in the multi tenant scenario
-    r = get_redis_client(tenant_id=POSTGRES_DEFAULT_SCHEMA)
+    r = get_shared_redis_client()

    # Log the role and slave count - being connected to a slave or slave count > 0 could be problematic
    info: dict[str, Any] = cast(dict, r.info("replication"))
@@ -236,7 +235,7 @@ class HubPeriodicTask(bootsteps.StartStopStep):

            lock: RedisLock = worker.primary_worker_lock

-            r = get_redis_client(tenant_id=POSTGRES_DEFAULT_SCHEMA)
+            r = get_shared_redis_client()

            if lock.owned():
                task_logger.debug("Reacquiring primary worker lock.")
--- a/backend/onyx/background/celery/memory_monitoring.py
+++ b/backend/onyx/background/celery/memory_monitoring.py
@@ -5,53 +5,40 @@ from logging.handlers import RotatingFileHandler

 import psutil

-from onyx.utils.logger import is_running_in_container
 from onyx.utils.logger import setup_logger

 # Regular application logger
 logger = setup_logger()

-# Only set up memory monitoring in container environment
-if is_running_in_container():
-    # Set up a dedicated memory monitoring logger
-    MEMORY_LOG_DIR = "/var/log/memory"
-    MEMORY_LOG_FILE = os.path.join(MEMORY_LOG_DIR, "memory_usage.log")
-    MEMORY_LOG_MAX_BYTES = 10 * 1024 * 1024  # 10MB
-    MEMORY_LOG_BACKUP_COUNT = 5  # Keep 5 backup files
+# Set up a dedicated memory monitoring logger
+MEMORY_LOG_DIR = "/var/log/persisted-logs/memory"
+MEMORY_LOG_FILE = os.path.join(MEMORY_LOG_DIR, "memory_usage.log")
+MEMORY_LOG_MAX_BYTES = 10 * 1024 * 1024  # 10MB
+MEMORY_LOG_BACKUP_COUNT = 5  # Keep 5 backup files

-    # Ensure log directory exists
-    os.makedirs(MEMORY_LOG_DIR, exist_ok=True)
+# Ensure log directory exists
+os.makedirs(MEMORY_LOG_DIR, exist_ok=True)

-    # Create a dedicated logger for memory monitoring
-    memory_logger = logging.getLogger("memory_monitoring")
-    memory_logger.setLevel(logging.INFO)
+# Create a dedicated logger for memory monitoring
+memory_logger = logging.getLogger("memory_monitoring")
+memory_logger.setLevel(logging.INFO)

-    # Create a rotating file handler
-    memory_handler = RotatingFileHandler(
-        MEMORY_LOG_FILE,
-        maxBytes=MEMORY_LOG_MAX_BYTES,
-        backupCount=MEMORY_LOG_BACKUP_COUNT,
-    )
+# Create a rotating file handler
+memory_handler = RotatingFileHandler(
+    MEMORY_LOG_FILE, maxBytes=MEMORY_LOG_MAX_BYTES, backupCount=MEMORY_LOG_BACKUP_COUNT
+)

-    # Create a formatter that includes all relevant information
-    memory_formatter = logging.Formatter(
-        "%(asctime)s [%(levelname)s] %(message)s", datefmt="%Y-%m-%d %H:%M:%S"
-    )
-    memory_handler.setFormatter(memory_formatter)
-    memory_logger.addHandler(memory_handler)
-else:
-    # Create a null logger when not in container
-    memory_logger = logging.getLogger("memory_monitoring")
-    memory_logger.addHandler(logging.NullHandler())
+# Create a formatter that includes all relevant information
+memory_formatter = logging.Formatter(
+    "%(asctime)s [%(levelname)s] %(message)s", datefmt="%Y-%m-%d %H:%M:%S"
+)
+memory_handler.setFormatter(memory_formatter)
+memory_logger.addHandler(memory_handler)


 def emit_process_memory(
    pid: int, process_name: str, additional_metadata: dict[str, str | int]
 ) -> None:
-    # Skip memory monitoring if not in container
-    if not is_running_in_container():
-        return
-
    try:
        process = psutil.Process(pid)
        memory_info = process.memory_info()
--- a/backend/onyx/background/celery/tasks/beat_schedule.py
+++ b/backend/onyx/background/celery/tasks/beat_schedule.py
@@ -21,7 +21,6 @@ BEAT_EXPIRES_DEFAULT = 15 * 60  # 15 minutes (in seconds)
 # we have a better implementation (backpressure, etc)
 # Note that DynamicTenantScheduler can adjust the runtime value for this via Redis
 CLOUD_BEAT_MULTIPLIER_DEFAULT = 8.0
-CLOUD_DOC_PERMISSION_SYNC_MULTIPLIER_DEFAULT = 1.0

 # tasks that run in either self-hosted on cloud
 beat_task_templates: list[dict] = []
@@ -168,16 +167,6 @@ beat_cloud_tasks: list[dict] = [
            "expires": BEAT_EXPIRES_DEFAULT,
        },
    },
-    {
-        "name": f"{ONYX_CLOUD_CELERY_TASK_PREFIX}_check-available-tenants",
-        "task": OnyxCeleryTask.CHECK_AVAILABLE_TENANTS,
-        "schedule": timedelta(minutes=10),
-        "options": {
-            "queue": OnyxCeleryQueues.MONITORING,
-            "priority": OnyxCeleryPriority.HIGH,
-            "expires": BEAT_EXPIRES_DEFAULT,
-        },
-    },
 ]

 # tasks that only run self hosted
@@ -195,16 +184,6 @@ if not MULTI_TENANT:
                    "queue": OnyxCeleryQueues.MONITORING,
                },
            },
-            {
-                "name": "monitor-process-memory",
-                "task": OnyxCeleryTask.MONITOR_PROCESS_MEMORY,
-                "schedule": timedelta(minutes=5),
-                "options": {
-                    "priority": OnyxCeleryPriority.LOW,
-                    "expires": BEAT_EXPIRES_DEFAULT,
-                    "queue": OnyxCeleryQueues.MONITORING,
-                },
-            },
        ]
    )

--- a/backend/onyx/background/celery/tasks/connector_deletion/tasks.py
+++ b/backend/onyx/background/celery/tasks/connector_deletion/tasks.py
@@ -30,9 +30,6 @@ from onyx.db.connector_credential_pair import (
 )
 from onyx.db.connector_credential_pair import get_connector_credential_pair_from_id
 from onyx.db.connector_credential_pair import get_connector_credential_pairs
-from onyx.db.document import (
-    delete_all_documents_by_connector_credential_pair__no_commit,
-)
 from onyx.db.document import get_document_ids_for_connector_credential_pair
 from onyx.db.document_set import delete_document_set_cc_pair_relationship__no_commit
 from onyx.db.engine import get_session_with_current_tenant
@@ -389,8 +386,6 @@ def monitor_connector_deletion_taskset(
            db_session=db_session,
            cc_pair_id=cc_pair_id,
        )
-        credential_id_to_delete: int | None = None
-        connector_id_to_delete: int | None = None
        if not cc_pair:
            task_logger.warning(
                f"Connector deletion - cc_pair not found: cc_pair={cc_pair_id}"
@@ -445,35 +440,16 @@ def monitor_connector_deletion_taskset(
                db_session=db_session,
            )

-            # Store IDs before potentially expiring cc_pair
-            connector_id_to_delete = cc_pair.connector_id
-            credential_id_to_delete = cc_pair.credential_id
-
-            # Explicitly delete document by connector credential pair records before deleting the connector
-            # This is needed because connector_id is a primary key in that table and cascading deletes won't work
-            delete_all_documents_by_connector_credential_pair__no_commit(
-                db_session=db_session,
-                connector_id=connector_id_to_delete,
-                credential_id=credential_id_to_delete,
-            )
-
-            # Flush to ensure document deletion happens before connector deletion
-            db_session.flush()
-
-            # Expire the cc_pair to ensure SQLAlchemy doesn't try to manage its state
-            # related to the deleted DocumentByConnectorCredentialPair during commit
-            db_session.expire(cc_pair)
-
            # finally, delete the cc-pair
            delete_connector_credential_pair__no_commit(
                db_session=db_session,
-                connector_id=connector_id_to_delete,
-                credential_id=credential_id_to_delete,
+                connector_id=cc_pair.connector_id,
+                credential_id=cc_pair.credential_id,
            )
            # if there are no credentials left, delete the connector
            connector = fetch_connector_by_id(
                db_session=db_session,
-                connector_id=connector_id_to_delete,
+                connector_id=cc_pair.connector_id,
            )
            if not connector or not len(connector.credentials):
                task_logger.info(
@@ -506,15 +482,15 @@ def monitor_connector_deletion_taskset(

            task_logger.exception(
                f"Connector deletion exceptioned: "
-                f"cc_pair={cc_pair_id} connector={connector_id_to_delete} credential={credential_id_to_delete}"
+                f"cc_pair={cc_pair_id} connector={cc_pair.connector_id} credential={cc_pair.credential_id}"
            )
            raise e

    task_logger.info(
        f"Connector deletion succeeded: "
        f"cc_pair={cc_pair_id} "
-        f"connector={connector_id_to_delete} "
-        f"credential={credential_id_to_delete} "
+        f"connector={cc_pair.connector_id} "
+        f"credential={cc_pair.credential_id} "
        f"docs_deleted={fence_data.num_tasks}"
    )

@@ -564,7 +540,7 @@ def validate_connector_deletion_fences(
 def validate_connector_deletion_fence(
    tenant_id: str,
    key_bytes: bytes,
-    queued_upsert_tasks: set[str],
+    queued_tasks: set[str],
    r: Redis,
 ) -> None:
    """Checks for the error condition where an indexing fence is set but the associated celery tasks don't exist.
@@ -651,7 +627,7 @@ def validate_connector_deletion_fence(

        member_bytes = cast(bytes, member)
        member_str = member_bytes.decode("utf-8")
-        if member_str in queued_upsert_tasks:
+        if member_str in queued_tasks:
            continue

        tasks_not_in_celery += 1
--- a/backend/onyx/background/celery/tasks/doc_permission_syncing/tasks.py
+++ b/backend/onyx/background/celery/tasks/doc_permission_syncing/tasks.py
@@ -17,7 +17,6 @@ from redis.exceptions import LockError
 from redis.lock import Lock as RedisLock
 from sqlalchemy.orm import Session

-from ee.onyx.configs.app_configs import DEFAULT_PERMISSION_DOC_SYNC_FREQUENCY
 from ee.onyx.db.connector_credential_pair import get_all_auto_sync_cc_pairs
 from ee.onyx.db.document import upsert_document_external_perms
 from ee.onyx.external_permissions.sync_params import DOC_PERMISSION_SYNC_PERIODS
@@ -47,6 +46,7 @@ from onyx.configs.constants import OnyxRedisSignals
 from onyx.connectors.factory import validate_ccpair_for_user
 from onyx.db.connector import mark_cc_pair_as_permissions_synced
 from onyx.db.connector_credential_pair import get_connector_credential_pair_from_id
+from onyx.db.connector_credential_pair import update_connector_credential_pair
 from onyx.db.document import upsert_document_by_connector_credential_pair
 from onyx.db.engine import get_session_with_current_tenant
 from onyx.db.enums import AccessType
@@ -64,14 +64,11 @@ from onyx.redis.redis_connector_doc_perm_sync import RedisConnectorPermissionSyn
 from onyx.redis.redis_pool import get_redis_client
 from onyx.redis.redis_pool import get_redis_replica_client
 from onyx.redis.redis_pool import redis_lock_dump
-from onyx.server.runtime.onyx_runtime import OnyxRuntime
 from onyx.server.utils import make_short_id
 from onyx.utils.logger import doc_permission_sync_ctx
 from onyx.utils.logger import format_error_for_logging
 from onyx.utils.logger import LoggerContextVars
 from onyx.utils.logger import setup_logger
-from onyx.utils.telemetry import optional_telemetry
-from onyx.utils.telemetry import RecordType


 logger = setup_logger()
@@ -108,10 +105,9 @@ def _is_external_doc_permissions_sync_due(cc_pair: ConnectorCredentialPair) -> b

    source_sync_period = DOC_PERMISSION_SYNC_PERIODS.get(cc_pair.connector.source)

+    # If RESTRICTED_FETCH_PERIOD[source] is None, we always run the sync.
    if not source_sync_period:
-        source_sync_period = DEFAULT_PERMISSION_DOC_SYNC_FREQUENCY
-
-    source_sync_period *= int(OnyxRuntime.get_doc_permission_sync_multiplier())
+        return True

    # If the last sync is greater than the full fetch period, we run the sync
    next_sync = last_perm_sync + timedelta(seconds=source_sync_period)
@@ -289,7 +285,7 @@ def try_creating_permissions_sync_task(
            ),
            queue=OnyxCeleryQueues.CONNECTOR_DOC_PERMISSIONS_SYNC,
            task_id=custom_task_id,
-            priority=OnyxCeleryPriority.MEDIUM,
+            priority=OnyxCeleryPriority.HIGH,
        )

        # fill in the celery task id
@@ -424,7 +420,12 @@ def connector_permission_sync_generator_task(
                task_logger.exception(
                    f"validate_ccpair_permissions_sync exceptioned: cc_pair={cc_pair_id}"
                )
-                # TODO: add some notification to the admins here
+                update_connector_credential_pair(
+                    db_session=db_session,
+                    connector_id=cc_pair.connector.id,
+                    credential_id=cc_pair.credential.id,
+                    status=ConnectorCredentialPairStatus.INVALID,
+                )
                raise

            source_type = cc_pair.connector.source
@@ -452,23 +453,23 @@ def connector_permission_sync_generator_task(
            redis_connector.permissions.set_fence(new_payload)

            callback = PermissionSyncCallback(redis_connector, lock, r)
-            document_external_accesses = doc_sync_func(cc_pair, callback)
+            document_external_accesses: list[DocExternalAccess] = doc_sync_func(
+                cc_pair, callback
+            )

            task_logger.info(
                f"RedisConnector.permissions.generate_tasks starting. cc_pair={cc_pair_id}"
            )
-
-            tasks_generated = 0
-            for doc_external_access in document_external_accesses:
-                redis_connector.permissions.generate_tasks(
-                    celery_app=self.app,
-                    lock=lock,
-                    new_permissions=[doc_external_access],
-                    source_string=source_type,
-                    connector_id=cc_pair.connector.id,
-                    credential_id=cc_pair.credential.id,
-                )
-                tasks_generated += 1
+            tasks_generated = redis_connector.permissions.generate_tasks(
+                celery_app=self.app,
+                lock=lock,
+                new_permissions=document_external_accesses,
+                source_string=source_type,
+                connector_id=cc_pair.connector.id,
+                credential_id=cc_pair.credential.id,
+            )
+            if tasks_generated is None:
+                return None

            task_logger.info(
                f"RedisConnector.permissions.generate_tasks finished. "
@@ -880,21 +881,6 @@ def monitor_ccpair_permissions_taskset(
        f"remaining={remaining} "
        f"initial={initial}"
    )
-
-    # Add telemetry for permission syncing progress
-    optional_telemetry(
-        record_type=RecordType.PERMISSION_SYNC_PROGRESS,
-        data={
-            "cc_pair_id": cc_pair_id,
-            "id": payload.id if payload else None,
-            "total_docs": initial if initial is not None else 0,
-            "remaining_docs": remaining,
-            "synced_docs": (initial - remaining) if initial is not None else 0,
-            "is_complete": remaining == 0,
-        },
-        tenant_id=tenant_id,
-    )
-
    if remaining > 0:
        return

--- a/backend/onyx/background/celery/tasks/external_group_syncing/tasks.py
+++ b/backend/onyx/background/celery/tasks/external_group_syncing/tasks.py
@@ -41,6 +41,7 @@ from onyx.connectors.exceptions import ConnectorValidationError
 from onyx.connectors.factory import validate_ccpair_for_user
 from onyx.db.connector import mark_cc_pair_as_external_group_synced
 from onyx.db.connector_credential_pair import get_connector_credential_pair_from_id
+from onyx.db.connector_credential_pair import update_connector_credential_pair
 from onyx.db.engine import get_session_with_current_tenant
 from onyx.db.enums import AccessType
 from onyx.db.enums import ConnectorCredentialPairStatus
@@ -271,7 +272,7 @@ def try_creating_external_group_sync_task(
            ),
            queue=OnyxCeleryQueues.CONNECTOR_EXTERNAL_GROUP_SYNC,
            task_id=custom_task_id,
-            priority=OnyxCeleryPriority.MEDIUM,
+            priority=OnyxCeleryPriority.HIGH,
        )

        payload.celery_task_id = result.id
@@ -401,7 +402,12 @@ def connector_external_group_sync_generator_task(
                task_logger.exception(
                    f"validate_ccpair_permissions_sync exceptioned: cc_pair={cc_pair_id}"
                )
-                # TODO: add some notification to the admins here
+                update_connector_credential_pair(
+                    db_session=db_session,
+                    connector_id=cc_pair.connector.id,
+                    credential_id=cc_pair.credential.id,
+                    status=ConnectorCredentialPairStatus.INVALID,
+                )
                raise

            source_type = cc_pair.connector.source
@@ -419,9 +425,12 @@ def connector_external_group_sync_generator_task(
            try:
                external_user_groups = ext_group_sync_func(tenant_id, cc_pair)
            except ConnectorValidationError as e:
-                # TODO: add some notification to the admins here
-                logger.exception(
-                    f"Error syncing external groups for {source_type} for cc_pair: {cc_pair_id} {e}"
+                msg = f"Error syncing external groups for {source_type} for cc_pair: {cc_pair_id} {e}"
+                update_connector_credential_pair(
+                    db_session=db_session,
+                    connector_id=cc_pair.connector.id,
+                    credential_id=cc_pair.credential.id,
+                    status=ConnectorCredentialPairStatus.INVALID,
                )
                raise e

--- a/backend/onyx/background/celery/tasks/indexing/tasks.py
+++ b/backend/onyx/background/celery/tasks/indexing/tasks.py
@@ -72,7 +72,6 @@ from onyx.redis.redis_pool import get_redis_replica_client
 from onyx.redis.redis_pool import redis_lock_dump
 from onyx.redis.redis_pool import SCAN_ITER_COUNT_DEFAULT
 from onyx.redis.redis_utils import is_fence
-from onyx.server.runtime.onyx_runtime import OnyxRuntime
 from onyx.utils.logger import setup_logger
 from onyx.utils.variable_functionality import global_version
 from shared_configs.configs import INDEXING_MODEL_SERVER_HOST
@@ -402,11 +401,7 @@ def check_for_indexing(self: Task, *, tenant_id: str) -> int | None:
                    logger.warning(f"Adding {key_bytes} to the lookup table.")
                    redis_client.sadd(OnyxRedisConstants.ACTIVE_FENCES, key_bytes)

-            redis_client.set(
-                OnyxRedisSignals.BLOCK_BUILD_FENCE_LOOKUP_TABLE,
-                1,
-                ex=OnyxRuntime.get_build_fence_lookup_table_interval(),
-            )
+            redis_client.set(OnyxRedisSignals.BLOCK_BUILD_FENCE_LOOKUP_TABLE, 1, ex=300)

        # 1/3: KICKOFF

--- a/backend/onyx/background/celery/tasks/monitoring/tasks.py
+++ b/backend/onyx/background/celery/tasks/monitoring/tasks.py
@@ -6,7 +6,6 @@ from itertools import islice
 from typing import Any
 from typing import Literal

-import psutil
 from celery import shared_task
 from celery import Task
 from celery.exceptions import SoftTimeLimitExceeded
@@ -20,7 +19,6 @@ from sqlalchemy.orm import Session
 from onyx.background.celery.apps.app_base import task_logger
 from onyx.background.celery.celery_redis import celery_get_queue_length
 from onyx.background.celery.celery_redis import celery_get_unacked_task_ids
-from onyx.background.celery.memory_monitoring import emit_process_memory
 from onyx.configs.constants import CELERY_GENERIC_BEAT_LOCK_TIMEOUT
 from onyx.configs.constants import ONYX_CLOUD_TENANT_ID
 from onyx.configs.constants import OnyxCeleryQueues
@@ -41,10 +39,8 @@ from onyx.db.models import UserGroup
 from onyx.db.search_settings import get_active_search_settings_list
 from onyx.redis.redis_pool import get_redis_client
 from onyx.redis.redis_pool import redis_lock_dump
-from onyx.utils.logger import is_running_in_container
 from onyx.utils.telemetry import optional_telemetry
 from onyx.utils.telemetry import RecordType
-from shared_configs.configs import MULTI_TENANT
 from shared_configs.contextvars import CURRENT_TENANT_ID_CONTEXTVAR

 _MONITORING_SOFT_TIME_LIMIT = 60 * 5  # 5 minutes
@@ -908,93 +904,3 @@ def monitor_celery_queues_helper(
        f"external_group_sync={n_external_group_sync} "
        f"permissions_upsert={n_permissions_upsert} "
    )
-
-
-"""Memory monitoring"""
-
-
-def _get_cmdline_for_process(process: psutil.Process) -> str | None:
-    try:
-        return " ".join(process.cmdline())
-    except (psutil.NoSuchProcess, psutil.AccessDenied, psutil.ZombieProcess):
-        return None
-
-
-@shared_task(
-    name=OnyxCeleryTask.MONITOR_PROCESS_MEMORY,
-    ignore_result=True,
-    soft_time_limit=_MONITORING_SOFT_TIME_LIMIT,
-    time_limit=_MONITORING_TIME_LIMIT,
-    queue=OnyxCeleryQueues.MONITORING,
-    bind=True,
-)
-def monitor_process_memory(self: Task, *, tenant_id: str) -> None:
-    """
-    Task to monitor memory usage of supervisor-managed processes.
-    This periodically checks the memory usage of processes and logs information
-    in a standardized format.
-
-    The task looks for processes managed by supervisor and logs their
-    memory usage statistics. This is useful for monitoring memory consumption
-    over time and identifying potential memory leaks.
-    """
-    # don't run this task in multi-tenant mode, have other, better means of monitoring
-    if MULTI_TENANT:
-        return
-
-    # Skip memory monitoring if not in container
-    if not is_running_in_container():
-        return
-
-    try:
-        # Get all supervisor-managed processes
-        supervisor_processes: dict[int, str] = {}
-
-        # Map cmd line elements to more readable process names
-        process_type_mapping = {
-            "--hostname=primary": "primary",
-            "--hostname=light": "light",
-            "--hostname=heavy": "heavy",
-            "--hostname=indexing": "indexing",
-            "--hostname=monitoring": "monitoring",
-            "beat": "beat",
-            "slack/listener.py": "slack",
-        }
-
-        # Find all python processes that are likely celery workers
-        for proc in psutil.process_iter():
-            cmdline = _get_cmdline_for_process(proc)
-            if not cmdline:
-                continue
-
-            # Match supervisor-managed processes
-            for process_name, process_type in process_type_mapping.items():
-                if process_name in cmdline:
-                    if process_type in supervisor_processes.values():
-                        task_logger.error(
-                            f"Duplicate process type for type {process_type} "
-                            f"with cmd {cmdline} with pid={proc.pid}."
-                        )
-                        continue
-
-                    supervisor_processes[proc.pid] = process_type
-                    break
-
-        if len(supervisor_processes) != len(process_type_mapping):
-            task_logger.error(
-                "Missing processes: "
-                f"{set(process_type_mapping.keys()).symmetric_difference(supervisor_processes.values())}"
-            )
-
-        # Log memory usage for each process
-        for pid, process_type in supervisor_processes.items():
-            try:
-                emit_process_memory(pid, process_type, {})
-            except psutil.NoSuchProcess:
-                # Process may have terminated since we obtained the list
-                continue
-            except Exception as e:
-                task_logger.exception(f"Error monitoring process {pid}: {str(e)}")
-
-    except Exception:
-        task_logger.exception("Error in monitor_process_memory task")
--- a/backend/onyx/background/celery/tasks/tenant_provisioning/tasks.py
+++ b/backend/onyx/background/celery/tasks/tenant_provisioning/tasks.py
@@ -1,199 +0,0 @@
-"""
-Periodic tasks for tenant pre-provisioning.
-"""
-import asyncio
-import datetime
-import uuid
-
-from celery import shared_task
-from celery import Task
-from redis.lock import Lock as RedisLock
-
-from ee.onyx.server.tenants.provisioning import setup_tenant
-from ee.onyx.server.tenants.schema_management import create_schema_if_not_exists
-from ee.onyx.server.tenants.schema_management import get_current_alembic_version
-from onyx.background.celery.apps.app_base import task_logger
-from onyx.configs.app_configs import JOB_TIMEOUT
-from onyx.configs.app_configs import TARGET_AVAILABLE_TENANTS
-from onyx.configs.constants import OnyxCeleryPriority
-from onyx.configs.constants import OnyxCeleryQueues
-from onyx.configs.constants import OnyxCeleryTask
-from onyx.configs.constants import OnyxRedisLocks
-from onyx.db.engine import get_session_with_shared_schema
-from onyx.db.models import AvailableTenant
-from onyx.redis.redis_pool import get_redis_client
-from shared_configs.configs import MULTI_TENANT
-from shared_configs.configs import TENANT_ID_PREFIX
-
-# Default number of pre-provisioned tenants to maintain
-DEFAULT_TARGET_AVAILABLE_TENANTS = 5
-
-# Soft time limit for tenant pre-provisioning tasks (in seconds)
-_TENANT_PROVISIONING_SOFT_TIME_LIMIT = 60 * 5  # 5 minutes
-# Hard time limit for tenant pre-provisioning tasks (in seconds)
-_TENANT_PROVISIONING_TIME_LIMIT = 60 * 10  # 10 minutes
-
-
-@shared_task(
-    name=OnyxCeleryTask.CHECK_AVAILABLE_TENANTS,
-    queue=OnyxCeleryQueues.MONITORING,
-    ignore_result=True,
-    soft_time_limit=JOB_TIMEOUT,
-    trail=False,
-    bind=True,
-)
-def check_available_tenants(self: Task) -> None:
-    """
-    Check if we have enough pre-provisioned tenants available.
-    If not, trigger the pre-provisioning of new tenants.
-    """
-    task_logger.info("STARTING CHECK_AVAILABLE_TENANTS")
-    if not MULTI_TENANT:
-        task_logger.info(
-            "Multi-tenancy is not enabled, skipping tenant pre-provisioning"
-        )
-        return
-
-    r = get_redis_client()
-    lock_check: RedisLock = r.lock(
-        OnyxRedisLocks.CHECK_AVAILABLE_TENANTS_LOCK,
-        timeout=_TENANT_PROVISIONING_SOFT_TIME_LIMIT,
-    )
-
-    # These tasks should never overlap
-    if not lock_check.acquire(blocking=False):
-        task_logger.info(
-            "Skipping check_available_tenants task because it is already running"
-        )
-        return
-
-    try:
-        # Get the current count of available tenants
-        with get_session_with_shared_schema() as db_session:
-            available_tenants_count = db_session.query(AvailableTenant).count()
-
-        # Get the target number of available tenants
-        target_available_tenants = getattr(
-            TARGET_AVAILABLE_TENANTS, "value", DEFAULT_TARGET_AVAILABLE_TENANTS
-        )
-
-        # Calculate how many new tenants we need to provision
-        tenants_to_provision = max(
-            0, target_available_tenants - available_tenants_count
-        )
-
-        task_logger.info(
-            f"Available tenants: {available_tenants_count}, "
-            f"Target: {target_available_tenants}, "
-            f"To provision: {tenants_to_provision}"
-        )
-
-        # Trigger pre-provisioning tasks for each tenant needed
-        for _ in range(tenants_to_provision):
-            from celery import current_app
-
-            current_app.send_task(
-                OnyxCeleryTask.PRE_PROVISION_TENANT,
-                priority=OnyxCeleryPriority.LOW,
-            )
-
-    except Exception:
-        task_logger.exception("Error in check_available_tenants task")
-
-    finally:
-        lock_check.release()
-
-
-@shared_task(
-    name=OnyxCeleryTask.PRE_PROVISION_TENANT,
-    ignore_result=True,
-    soft_time_limit=_TENANT_PROVISIONING_SOFT_TIME_LIMIT,
-    time_limit=_TENANT_PROVISIONING_TIME_LIMIT,
-    queue=OnyxCeleryQueues.MONITORING,
-    bind=True,
-)
-def pre_provision_tenant(self: Task) -> None:
-    """
-    Pre-provision a new tenant and store it in the NewAvailableTenant table.
-    This function fully sets up the tenant with all necessary configurations,
-    so it's ready to be assigned to a user immediately.
-    """
-    # The MULTI_TENANT check is now done at the caller level (check_available_tenants)
-    # rather than inside this function
-
-    r = get_redis_client()
-    lock_provision: RedisLock = r.lock(
-        OnyxRedisLocks.PRE_PROVISION_TENANT_LOCK,
-        timeout=_TENANT_PROVISIONING_SOFT_TIME_LIMIT,
-    )
-
-    # Allow multiple pre-provisioning tasks to run, but ensure they don't overlap
-    if not lock_provision.acquire(blocking=False):
-        task_logger.debug(
-            "Skipping pre_provision_tenant task because it is already running"
-        )
-        return
-
-    tenant_id: str | None = None
-    try:
-        # Generate a new tenant ID
-        tenant_id = TENANT_ID_PREFIX + str(uuid.uuid4())
-        task_logger.info(f"Pre-provisioning tenant: {tenant_id}")
-
-        # Create the schema for the new tenant
-        schema_created = create_schema_if_not_exists(tenant_id)
-        if schema_created:
-            task_logger.debug(f"Created schema for tenant: {tenant_id}")
-        else:
-            task_logger.debug(f"Schema already exists for tenant: {tenant_id}")
-
-        # Set up the tenant with all necessary configurations
-        task_logger.debug(f"Setting up tenant configuration: {tenant_id}")
-        asyncio.run(setup_tenant(tenant_id))
-        task_logger.debug(f"Tenant configuration completed: {tenant_id}")
-
-        # Get the current Alembic version
-        alembic_version = get_current_alembic_version(tenant_id)
-        task_logger.debug(
-            f"Tenant {tenant_id} using Alembic version: {alembic_version}"
-        )
-
-        # Store the pre-provisioned tenant in the database
-        task_logger.debug(f"Storing pre-provisioned tenant in database: {tenant_id}")
-        with get_session_with_shared_schema() as db_session:
-            # Use a transaction to ensure atomicity
-            db_session.begin()
-            try:
-                new_tenant = AvailableTenant(
-                    tenant_id=tenant_id,
-                    alembic_version=alembic_version,
-                    date_created=datetime.datetime.now(),
-                )
-                db_session.add(new_tenant)
-                db_session.commit()
-                task_logger.info(f"Successfully pre-provisioned tenant: {tenant_id}")
-            except Exception:
-                db_session.rollback()
-                task_logger.error(
-                    f"Failed to store pre-provisioned tenant: {tenant_id}",
-                    exc_info=True,
-                )
-                raise
-
-    except Exception:
-        task_logger.error("Error in pre_provision_tenant task", exc_info=True)
-        # If we have a tenant_id, attempt to rollback any partially completed provisioning
-        if tenant_id:
-            task_logger.info(
-                f"Rolling back failed tenant provisioning for: {tenant_id}"
-            )
-            try:
-                from ee.onyx.server.tenants.provisioning import (
-                    rollback_tenant_provisioning,
-                )
-
-                asyncio.run(rollback_tenant_provisioning(tenant_id))
-            except Exception:
-                task_logger.exception(f"Error during rollback for tenant: {tenant_id}")
-    finally:
-        lock_provision.release()
--- a/backend/onyx/background/celery/tasks/vespa/tasks.py
+++ b/backend/onyx/background/celery/tasks/vespa/tasks.py
@@ -563,7 +563,6 @@ def vespa_metadata_sync_task(self: Task, document_id: str, *, tenant_id: str) ->
                    access=doc_access,
                    boost=doc.boost,
                    hidden=doc.hidden,
-                    # aggregated_boost_factor=doc.aggregated_boost_factor,
                )

                # update Vespa. OK if doc doesn't exist. Raises exception otherwise.
--- a/backend/onyx/background/indexing/checkpointing_utils.py
+++ b/backend/onyx/background/indexing/checkpointing_utils.py
@@ -6,8 +6,6 @@ from sqlalchemy import and_
 from sqlalchemy.orm import Session

 from onyx.configs.constants import FileOrigin
-from onyx.connectors.interfaces import BaseConnector
-from onyx.connectors.interfaces import CheckpointConnector
 from onyx.connectors.models import ConnectorCheckpoint
 from onyx.db.engine import get_db_current_time
 from onyx.db.index_attempt import get_index_attempt
@@ -18,6 +16,7 @@ from onyx.file_store.file_store import get_default_file_store
 from onyx.utils.logger import setup_logger
 from onyx.utils.object_size_check import deep_getsizeof

+
 logger = setup_logger()

 _NUM_RECENT_ATTEMPTS_TO_CONSIDER = 20
@@ -53,7 +52,7 @@ def save_checkpoint(


 def load_checkpoint(
-    db_session: Session, index_attempt_id: int, connector: BaseConnector
+    db_session: Session, index_attempt_id: int
 ) -> ConnectorCheckpoint | None:
    """Load a checkpoint for a given index attempt from the file store"""
    checkpoint_pointer = _build_checkpoint_pointer(index_attempt_id)
@@ -61,8 +60,6 @@ def load_checkpoint(
    try:
        checkpoint_io = file_store.read_file(checkpoint_pointer, mode="rb")
        checkpoint_data = checkpoint_io.read().decode("utf-8")
-        if isinstance(connector, CheckpointConnector):
-            return connector.validate_checkpoint_json(checkpoint_data)
        return ConnectorCheckpoint.model_validate_json(checkpoint_data)
    except RuntimeError:
        return None
@@ -74,7 +71,6 @@ def get_latest_valid_checkpoint(
    search_settings_id: int,
    window_start: datetime,
    window_end: datetime,
-    connector: BaseConnector,
 ) -> ConnectorCheckpoint:
    """Get the latest valid checkpoint for a given connector credential pair"""
    checkpoint_candidates = get_recent_completed_attempts_for_cc_pair(
@@ -109,7 +105,7 @@ def get_latest_valid_checkpoint(
            f"for cc_pair={cc_pair_id}. Ignoring checkpoint to let the run start "
            "from scratch."
        )
-        return connector.build_dummy_checkpoint()
+        return ConnectorCheckpoint.build_dummy_checkpoint()

    # assumes latest checkpoint is the furthest along. This only isn't true
    # if something else has gone wrong.
@@ -117,13 +113,12 @@ def get_latest_valid_checkpoint(
        checkpoint_candidates[0] if checkpoint_candidates else None
    )

-    checkpoint = connector.build_dummy_checkpoint()
+    checkpoint = ConnectorCheckpoint.build_dummy_checkpoint()
    if latest_valid_checkpoint_candidate:
        try:
            previous_checkpoint = load_checkpoint(
                db_session=db_session,
                index_attempt_id=latest_valid_checkpoint_candidate.id,
-                connector=connector,
            )
        except Exception:
            logger.exception(
@@ -198,7 +193,7 @@ def cleanup_checkpoint(db_session: Session, index_attempt_id: int) -> None:

 def check_checkpoint_size(checkpoint: ConnectorCheckpoint) -> None:
    """Check if the checkpoint content size exceeds the limit (200MB)"""
-    content_size = deep_getsizeof(checkpoint.model_dump())
+    content_size = deep_getsizeof(checkpoint.checkpoint_content)
    if content_size > 200_000_000:  # 200MB in bytes
        raise ValueError(
            f"Checkpoint content size ({content_size} bytes) exceeds 200MB limit"
--- a/backend/onyx/background/indexing/run_indexing.py
+++ b/backend/onyx/background/indexing/run_indexing.py
@@ -24,18 +24,15 @@ from onyx.connectors.connector_runner import ConnectorRunner
 from onyx.connectors.exceptions import ConnectorValidationError
 from onyx.connectors.exceptions import UnexpectedValidationError
 from onyx.connectors.factory import instantiate_connector
+from onyx.connectors.models import ConnectorCheckpoint
 from onyx.connectors.models import ConnectorFailure
 from onyx.connectors.models import Document
 from onyx.connectors.models import IndexAttemptMetadata
-from onyx.connectors.models import TextSection
 from onyx.db.connector_credential_pair import get_connector_credential_pair_from_id
 from onyx.db.connector_credential_pair import get_last_successful_attempt_time
 from onyx.db.connector_credential_pair import update_connector_credential_pair
-from onyx.db.constants import CONNECTOR_VALIDATION_ERROR_MESSAGE_PREFIX
 from onyx.db.engine import get_session_with_current_tenant
 from onyx.db.enums import ConnectorCredentialPairStatus
-from onyx.db.enums import IndexingStatus
-from onyx.db.enums import IndexModelStatus
 from onyx.db.index_attempt import create_index_attempt_error
 from onyx.db.index_attempt import get_index_attempt
 from onyx.db.index_attempt import get_index_attempt_errors_for_cc_pair
@@ -48,20 +45,16 @@ from onyx.db.index_attempt import transition_attempt_to_in_progress
 from onyx.db.index_attempt import update_docs_indexed
 from onyx.db.models import IndexAttempt
 from onyx.db.models import IndexAttemptError
+from onyx.db.models import IndexingStatus
+from onyx.db.models import IndexModelStatus
 from onyx.document_index.factory import get_default_document_index
 from onyx.httpx.httpx_pool import HttpxPool
 from onyx.indexing.embedder import DefaultIndexingEmbedder
 from onyx.indexing.indexing_heartbeat import IndexingHeartbeatInterface
 from onyx.indexing.indexing_pipeline import build_indexing_pipeline
-from onyx.natural_language_processing.search_nlp_models import (
-    InformationContentClassificationModel,
-)
-from onyx.redis.redis_connector import RedisConnector
 from onyx.utils.logger import setup_logger
 from onyx.utils.logger import TaskAttemptSingleton
 from onyx.utils.telemetry import create_milestone_and_report
-from onyx.utils.telemetry import optional_telemetry
-from onyx.utils.telemetry import RecordType
 from onyx.utils.variable_functionality import global_version
 from shared_configs.configs import MULTI_TENANT

@@ -161,12 +154,14 @@ def strip_null_characters(doc_batch: list[Document]) -> list[Document]:
            )

        for section in cleaned_doc.sections:
-            if section.link is not None:
+            if section.link and "\x00" in section.link:
+                logger.warning(
+                    f"NUL characters found in document link for document: {cleaned_doc.id}"
+                )
                section.link = section.link.replace("\x00", "")

            # since text can be longer, just replace to avoid double scan
-            if isinstance(section, TextSection) and section.text is not None:
-                section.text = section.text.replace("\x00", "")
+            section.text = section.text.replace("\x00", "")

        cleaned_batch.append(cleaned_doc)

@@ -354,8 +349,6 @@ def _run_indexing(
            callback=callback,
        )

-    information_content_classification_model = InformationContentClassificationModel()
-
    document_index = get_default_document_index(
        index_attempt_start.search_settings,
        None,
@@ -364,7 +357,6 @@ def _run_indexing(

    indexing_pipeline = build_indexing_pipeline(
        embedder=embedding_model,
-        information_content_classification_model=information_content_classification_model,
        document_index=document_index,
        ignore_time_skip=(
            ctx.from_beginning
@@ -390,7 +382,6 @@ def _run_indexing(
    net_doc_change = 0
    document_count = 0
    chunk_count = 0
-    index_attempt: IndexAttempt | None = None
    try:
        with get_session_with_current_tenant() as db_session_temp:
            index_attempt = get_index_attempt(db_session_temp, index_attempt_id)
@@ -409,7 +400,7 @@ def _run_indexing(
            # the beginning in order to avoid weird interactions between
            # checkpointing / failure handling.
            if index_attempt.from_beginning:
-                checkpoint = connector_runner.connector.build_dummy_checkpoint()
+                checkpoint = ConnectorCheckpoint.build_dummy_checkpoint()
            else:
                checkpoint = get_latest_valid_checkpoint(
                    db_session=db_session_temp,
@@ -417,7 +408,6 @@ def _run_indexing(
                    search_settings_id=index_attempt.search_settings_id,
                    window_start=window_start,
                    window_end=window_end,
-                    connector=connector_runner.connector,
                )

            unresolved_errors = get_index_attempt_errors_for_cc_pair(
@@ -438,7 +428,7 @@ def _run_indexing(

        while checkpoint.has_more:
            logger.info(
-                f"Running '{ctx.source.value}' connector with checkpoint: {checkpoint}"
+                f"Running '{ctx.source}' connector with checkpoint: {checkpoint}"
            )
            for document_batch, failure, next_checkpoint in connector_runner.run(
                checkpoint
@@ -489,11 +479,7 @@ def _run_indexing(

                    doc_size = 0
                    for section in doc.sections:
-                        if (
-                            isinstance(section, TextSection)
-                            and section.text is not None
-                        ):
-                            doc_size += len(section.text)
+                        doc_size += len(section.text)

                    if doc_size > INDEXING_SIZE_WARNING_THRESHOLD:
                        logger.warning(
@@ -573,22 +559,6 @@ def _run_indexing(
                if callback:
                    callback.progress("_run_indexing", len(doc_batch_cleaned))

-                # Add telemetry for indexing progress
-                optional_telemetry(
-                    record_type=RecordType.INDEXING_PROGRESS,
-                    data={
-                        "index_attempt_id": index_attempt_id,
-                        "cc_pair_id": ctx.cc_pair_id,
-                        "connector_id": ctx.connector_id,
-                        "credential_id": ctx.credential_id,
-                        "total_docs_indexed": document_count,
-                        "total_chunks": chunk_count,
-                        "batch_num": batch_num,
-                        "source": ctx.source.value,
-                    },
-                    tenant_id=tenant_id,
-                )
-
                memory_tracer.increment_and_maybe_trace()

            # `make sure the checkpoints aren't getting too large`at some regular interval
@@ -604,30 +574,6 @@ def _run_indexing(
                    checkpoint=checkpoint,
                )

-        # Add telemetry for completed indexing
-        redis_connector = RedisConnector(tenant_id, ctx.cc_pair_id)
-        redis_connector_index = redis_connector.new_index(
-            index_attempt_start.search_settings_id
-        )
-        final_progress = redis_connector_index.get_progress() or 0
-
-        optional_telemetry(
-            record_type=RecordType.INDEXING_COMPLETE,
-            data={
-                "index_attempt_id": index_attempt_id,
-                "cc_pair_id": ctx.cc_pair_id,
-                "connector_id": ctx.connector_id,
-                "credential_id": ctx.credential_id,
-                "total_docs_indexed": document_count,
-                "total_chunks": chunk_count,
-                "batch_count": batch_num,
-                "time_elapsed_seconds": time.monotonic() - start_time,
-                "source": ctx.source.value,
-                "redis_progress": final_progress,
-            },
-            tenant_id=tenant_id,
-        )
-
    except Exception as e:
        logger.exception(
            "Connector run exceptioned after elapsed time: "
@@ -641,44 +587,16 @@ def _run_indexing(
                mark_attempt_canceled(
                    index_attempt_id,
                    db_session_temp,
-                    reason=f"{CONNECTOR_VALIDATION_ERROR_MESSAGE_PREFIX}{str(e)}",
+                    reason=str(e),
                )

                if ctx.is_primary:
-                    if not index_attempt:
-                        # should always be set by now
-                        raise RuntimeError("Should never happen.")
-
-                    VALIDATION_ERROR_THRESHOLD = 5
-
-                    recent_index_attempts = get_recent_completed_attempts_for_cc_pair(
-                        cc_pair_id=ctx.cc_pair_id,
-                        search_settings_id=index_attempt.search_settings_id,
-                        limit=VALIDATION_ERROR_THRESHOLD,
+                    update_connector_credential_pair(
                        db_session=db_session_temp,
+                        connector_id=ctx.connector_id,
+                        credential_id=ctx.credential_id,
+                        status=ConnectorCredentialPairStatus.INVALID,
                    )
-                    num_validation_errors = len(
-                        [
-                            index_attempt
-                            for index_attempt in recent_index_attempts
-                            if index_attempt.error_msg
-                            and index_attempt.error_msg.startswith(
-                                CONNECTOR_VALIDATION_ERROR_MESSAGE_PREFIX
-                            )
-                        ]
-                    )
-
-                    if num_validation_errors >= VALIDATION_ERROR_THRESHOLD:
-                        logger.warning(
-                            f"Connector {ctx.connector_id} has {num_validation_errors} consecutive validation"
-                            f" errors. Marking the CC Pair as invalid."
-                        )
-                        update_connector_credential_pair(
-                            db_session=db_session_temp,
-                            connector_id=ctx.connector_id,
-                            credential_id=ctx.credential_id,
-                            status=ConnectorCredentialPairStatus.INVALID,
-                        )
            memory_tracer.stop()
            raise e

--- a/backend/onyx/chat/answer.py
+++ b/backend/onyx/chat/answer.py
@@ -30,7 +30,7 @@ from onyx.tools.tool import Tool
 from onyx.tools.tool_implementations.search.search_tool import QUERY_FIELD
 from onyx.tools.tool_implementations.search.search_tool import SearchTool
 from onyx.tools.utils import explicit_tool_calling_supported
-from onyx.utils.gpu_utils import fast_gpu_status_request
+from onyx.utils.gpu_utils import gpu_status_request
 from onyx.utils.logger import setup_logger

 logger = setup_logger()
@@ -88,9 +88,7 @@ class Answer:
            rerank_settings is not None
            and rerank_settings.rerank_provider_type is not None
        )
-        allow_agent_reranking = (
-            fast_gpu_status_request(indexing=False) or using_cloud_reranking
-        )
+        allow_agent_reranking = gpu_status_request() or using_cloud_reranking

        # TODO: this is a hack to force the query to be used for the search tool
        #       this should be removed once we fully unify graph inputs (i.e.
--- a/backend/onyx/chat/models.py
+++ b/backend/onyx/chat/models.py
@@ -1,13 +1,10 @@
-from collections import OrderedDict
 from collections.abc import Callable
 from collections.abc import Iterator
-from collections.abc import Mapping
 from datetime import datetime
 from enum import Enum
 from typing import Any
 from typing import Literal
 from typing import TYPE_CHECKING
-from typing import Union

 from pydantic import BaseModel
 from pydantic import ConfigDict
@@ -47,44 +44,9 @@ class LlmDoc(BaseModel):


 class SubQuestionIdentifier(BaseModel):
-    """None represents references to objects in the original flow. To our understanding,
-    these will not be None in the packets returned from agent search.
-    """
-
    level: int | None = None
    level_question_num: int | None = None

-    @staticmethod
-    def make_dict_by_level(
-        original_dict: Mapping[tuple[int, int], "SubQuestionIdentifier"]
-    ) -> dict[int, list["SubQuestionIdentifier"]]:
-        """returns a dict of level to object list (sorted by level_question_num)
-        Ordering is asc for readability.
-        """
-
-        # organize by level, then sort ascending by question_index
-        level_dict: dict[int, list[SubQuestionIdentifier]] = {}
-
-        # group by level
-        for k, obj in original_dict.items():
-            level = k[0]
-            if level not in level_dict:
-                level_dict[level] = []
-            level_dict[level].append(obj)
-
-        # for each level, sort the group
-        for k2, value2 in level_dict.items():
-            # we need to handle the none case due to SubQuestionIdentifier typing
-            # level_question_num as int | None, even though it should never be None here.
-            level_dict[k2] = sorted(
-                value2,
-                key=lambda x: (x.level_question_num is None, x.level_question_num),
-            )
-
-        # sort by level
-        sorted_dict = OrderedDict(sorted(level_dict.items()))
-        return sorted_dict
-

 # First chunk of info for streaming QA
 class QADocsResponse(RetrievalDocs, SubQuestionIdentifier):
@@ -194,6 +156,17 @@ class StreamingError(BaseModel):
    stack_trace: str | None = None


+class OnyxContext(BaseModel):
+    content: str
+    document_id: str
+    semantic_identifier: str
+    blurb: str
+
+
+class OnyxContexts(BaseModel):
+    contexts: list[OnyxContext]
+
+
 class OnyxAnswer(BaseModel):
    answer: str | None

@@ -259,6 +232,7 @@ class PersonaOverrideConfig(BaseModel):
 AnswerQuestionPossibleReturn = (
    OnyxAnswerPiece
    | CitationInfo
+    | OnyxContexts
    | FileChatDisplay
    | CustomToolResponse
    | StreamingError
@@ -362,8 +336,6 @@ class AgentAnswerPiece(SubQuestionIdentifier):


 class SubQuestionPiece(SubQuestionIdentifier):
-    """Refined sub questions generated from the initial user question."""
-
    sub_question: str


@@ -375,13 +347,13 @@ class RefinedAnswerImprovement(BaseModel):
    refined_answer_improvement: bool


-AgentSearchPacket = Union[
+AgentSearchPacket = (
    SubQuestionPiece
    | AgentAnswerPiece
    | SubQueryPiece
    | ExtendedToolResponse
    | RefinedAnswerImprovement
-]
+)

 AnswerPacket = (
    AnswerQuestionPossibleReturn | AgentSearchPacket | ToolCallKickoff | ToolResponse
--- a/backend/onyx/chat/process_message.py
+++ b/backend/onyx/chat/process_message.py
@@ -29,6 +29,7 @@ from onyx.chat.models import LLMRelevanceFilterResponse
 from onyx.chat.models import MessageResponseIDInfo
 from onyx.chat.models import MessageSpecificCitations
 from onyx.chat.models import OnyxAnswerPiece
+from onyx.chat.models import OnyxContexts
 from onyx.chat.models import PromptConfig
 from onyx.chat.models import QADocsResponse
 from onyx.chat.models import RefinedAnswerImprovement
@@ -72,7 +73,6 @@ from onyx.db.chat import get_or_create_root_message
 from onyx.db.chat import reserve_message_id
 from onyx.db.chat import translate_db_message_to_chat_message_detail
 from onyx.db.chat import translate_db_search_doc_to_server_search_doc
-from onyx.db.chat import update_chat_session_updated_at_timestamp
 from onyx.db.engine import get_session_context_manager
 from onyx.db.milestone import check_multi_assistant_milestone
 from onyx.db.milestone import create_milestone_if_not_exists
@@ -130,6 +130,7 @@ from onyx.tools.tool_implementations.internet_search.internet_search_tool import
 from onyx.tools.tool_implementations.search.search_tool import (
    FINAL_CONTEXT_DOCUMENTS_ID,
 )
+from onyx.tools.tool_implementations.search.search_tool import SEARCH_DOC_CONTENT_ID
 from onyx.tools.tool_implementations.search.search_tool import (
    SEARCH_RESPONSE_SUMMARY_ID,
 )
@@ -298,6 +299,7 @@ def _get_force_search_settings(
 ChatPacket = (
    StreamingError
    | QADocsResponse
+    | OnyxContexts
    | LLMRelevanceFilterResponse
    | FinalUsedContextDocsResponse
    | ChatMessageDetail
@@ -916,6 +918,8 @@ def stream_chat_message_objects(
                            response=custom_tool_response.tool_result,
                            tool_name=custom_tool_response.tool_name,
                        )
+                elif packet.id == SEARCH_DOC_CONTENT_ID and include_contexts:
+                    yield cast(OnyxContexts, packet.response)

            elif isinstance(packet, StreamStopInfo):
                if packet.stop_reason == StreamStopReason.FINISHED:
@@ -1065,8 +1069,6 @@ def stream_chat_message_objects(
            prev_message = next_answer_message

        logger.debug("Committing messages")
-        # Explicitly update the timestamp on the chat session
-        update_chat_session_updated_at_timestamp(chat_session_id, db_session)
        db_session.commit()  # actually save user / assistant message

        yield AgenticMessageResponseIDInfo(agentic_message_ids=agentic_message_ids)
--- a/backend/onyx/chat/prune_and_merge.py
+++ b/backend/onyx/chat/prune_and_merge.py
@@ -301,10 +301,6 @@ def prune_sections(


 def _merge_doc_chunks(chunks: list[InferenceChunk]) -> InferenceSection:
-    assert (
-        len(set([chunk.document_id for chunk in chunks])) == 1
-    ), "One distinct document must be passed into merge_doc_chunks"
-
    # Assuming there are no duplicates by this point
    sorted_chunks = sorted(chunks, key=lambda x: x.chunk_id)

--- a/backend/onyx/chat/stream_processing/utils.py
+++ b/backend/onyx/chat/stream_processing/utils.py
@@ -3,6 +3,7 @@ from collections.abc import Sequence
 from pydantic import BaseModel

 from onyx.chat.models import LlmDoc
+from onyx.chat.models import OnyxContext
 from onyx.context.search.models import InferenceChunk


@@ -11,7 +12,7 @@ class DocumentIdOrderMapping(BaseModel):


 def map_document_id_order(
-    chunks: Sequence[InferenceChunk | LlmDoc], one_indexed: bool = True
+    chunks: Sequence[InferenceChunk | LlmDoc | OnyxContext], one_indexed: bool = True
 ) -> DocumentIdOrderMapping:
    order_mapping = {}
    current = 1 if one_indexed else 0
--- a/backend/onyx/configs/app_configs.py
+++ b/backend/onyx/configs/app_configs.py
@@ -1,8 +1,6 @@
 import json
 import os
 import urllib.parse
-from datetime import datetime
-from datetime import timezone
 from typing import cast

 from onyx.auth.schemas import AuthBackend
@@ -10,9 +8,6 @@ from onyx.configs.constants import AuthType
 from onyx.configs.constants import DocumentIndexType
 from onyx.configs.constants import QueryHistoryType
 from onyx.file_processing.enums import HtmlBasedConnectorTransformLinksStrategy
-from onyx.prompts.image_analysis import DEFAULT_IMAGE_ANALYSIS_SYSTEM_PROMPT
-from onyx.prompts.image_analysis import DEFAULT_IMAGE_SUMMARIZATION_SYSTEM_PROMPT
-from onyx.prompts.image_analysis import DEFAULT_IMAGE_SUMMARIZATION_USER_PROMPT

 #####
 # App Configs
@@ -35,10 +30,6 @@ GENERATIVE_MODEL_ACCESS_CHECK_FREQ = int(
 )  # 1 day
 DISABLE_GENERATIVE_AI = os.environ.get("DISABLE_GENERATIVE_AI", "").lower() == "true"

-# Controls whether to allow admin query history reports with:
-# 1. associated user emails
-# 2. anonymized user emails
-# 3. no queries
 ONYX_QUERY_HISTORY_TYPE = QueryHistoryType(
    (os.environ.get("ONYX_QUERY_HISTORY_TYPE") or QueryHistoryType.NORMAL.value).lower()
 )
@@ -159,9 +150,10 @@ VESPA_CLOUD_CERT_PATH = os.environ.get("VESPA_CLOUD_CERT_PATH")
 VESPA_CLOUD_KEY_PATH = os.environ.get("VESPA_CLOUD_KEY_PATH")

 # Number of documents in a batch during indexing (further batching done by chunks before passing to bi-encoder)
-INDEX_BATCH_SIZE = int(os.environ.get("INDEX_BATCH_SIZE") or 16)
-
-MAX_DRIVE_WORKERS = int(os.environ.get("MAX_DRIVE_WORKERS", 4))
+try:
+    INDEX_BATCH_SIZE = int(os.environ.get("INDEX_BATCH_SIZE", 16))
+except ValueError:
+    INDEX_BATCH_SIZE = 16

 # Below are intended to match the env variables names used by the official postgres docker image
 # https://hub.docker.com/_/postgres
@@ -346,8 +338,8 @@ HTML_BASED_CONNECTOR_TRANSFORM_LINKS_STRATEGY = os.environ.get(
    HtmlBasedConnectorTransformLinksStrategy.STRIP,
 )

-NOTION_CONNECTOR_DISABLE_RECURSIVE_PAGE_LOOKUP = (
-    os.environ.get("NOTION_CONNECTOR_DISABLE_RECURSIVE_PAGE_LOOKUP", "").lower()
+NOTION_CONNECTOR_ENABLE_RECURSIVE_PAGE_LOOKUP = (
+    os.environ.get("NOTION_CONNECTOR_ENABLE_RECURSIVE_PAGE_LOOKUP", "").lower()
    == "true"
 )

@@ -385,27 +377,10 @@ CONFLUENCE_CONNECTOR_ATTACHMENT_CHAR_COUNT_THRESHOLD = int(
 # https://community.developer.atlassian.com/t/confluence-cloud-time-zone-get-via-rest-api/35954/16
 # https://jira.atlassian.com/browse/CONFCLOUD-69670

-
-def get_current_tz_offset() -> int:
-    # datetime now() gets local time, datetime.now(timezone.utc) gets UTC time.
-    # remove tzinfo to compare non-timezone-aware objects.
-    time_diff = datetime.now() - datetime.now(timezone.utc).replace(tzinfo=None)
-    return round(time_diff.total_seconds() / 3600)
-
-
 # enter as a floating point offset from UTC in hours (-24 < val < 24)
 # this will be applied globally, so it probably makes sense to transition this to per
 # connector as some point.
-# For the default value, we assume that the user's local timezone is more likely to be
-# correct (i.e. the configured user's timezone or the default server one) than UTC.
-# https://developer.atlassian.com/cloud/confluence/cql-fields/#created
-CONFLUENCE_TIMEZONE_OFFSET = float(
-    os.environ.get("CONFLUENCE_TIMEZONE_OFFSET", get_current_tz_offset())
-)
-
-GOOGLE_DRIVE_CONNECTOR_SIZE_THRESHOLD = int(
-    os.environ.get("GOOGLE_DRIVE_CONNECTOR_SIZE_THRESHOLD", 10 * 1024 * 1024)
-)
+CONFLUENCE_TIMEZONE_OFFSET = float(os.environ.get("CONFLUENCE_TIMEZONE_OFFSET", 0.0))

 JIRA_CONNECTOR_LABELS_TO_SKIP = [
    ignored_tag
@@ -436,9 +411,6 @@ EGNYTE_CLIENT_SECRET = os.getenv("EGNYTE_CLIENT_SECRET")
 LINEAR_CLIENT_ID = os.getenv("LINEAR_CLIENT_ID")
 LINEAR_CLIENT_SECRET = os.getenv("LINEAR_CLIENT_SECRET")

-# Slack specific configs
-SLACK_NUM_THREADS = int(os.getenv("SLACK_NUM_THREADS") or 2)
-
 DASK_JOB_CLIENT_ENABLED = (
    os.environ.get("DASK_JOB_CLIENT_ENABLED", "").lower() == "true"
 )
@@ -671,28 +643,3 @@ MOCK_LLM_RESPONSE = (


 DEFAULT_IMAGE_ANALYSIS_MAX_SIZE_MB = 20
-
-# Number of pre-provisioned tenants to maintain
-TARGET_AVAILABLE_TENANTS = int(os.environ.get("TARGET_AVAILABLE_TENANTS", "5"))
-
-
-# Image summarization configuration
-IMAGE_SUMMARIZATION_SYSTEM_PROMPT = os.environ.get(
-    "IMAGE_SUMMARIZATION_SYSTEM_PROMPT",
-    DEFAULT_IMAGE_SUMMARIZATION_SYSTEM_PROMPT,
-)
-
-# The user prompt for image summarization - the image filename will be automatically prepended
-IMAGE_SUMMARIZATION_USER_PROMPT = os.environ.get(
-    "IMAGE_SUMMARIZATION_USER_PROMPT",
-    DEFAULT_IMAGE_SUMMARIZATION_USER_PROMPT,
-)
-
-IMAGE_ANALYSIS_SYSTEM_PROMPT = os.environ.get(
-    "IMAGE_ANALYSIS_SYSTEM_PROMPT",
-    DEFAULT_IMAGE_ANALYSIS_SYSTEM_PROMPT,
-)
-
-DISABLE_AUTO_AUTH_REFRESH = (
-    os.environ.get("DISABLE_AUTO_AUTH_REFRESH", "").lower() == "true"
-)
--- a/backend/onyx/configs/constants.py
+++ b/backend/onyx/configs/constants.py
@@ -3,10 +3,6 @@ import socket
 from enum import auto
 from enum import Enum

-ONYX_DEFAULT_APPLICATION_NAME = "Onyx"
-ONYX_SLACK_URL = "https://join.slack.com/t/onyx-dot-app/shared_invite/zt-2twesxdr6-5iQitKZQpgq~hYIZ~dv3KA"
-ONYX_EMAILABLE_LOGO_MAX_DIM = 512
-
 SOURCE_TYPE = "source_type"
 # stored in the `metadata` of a chunk. Used to signify that this chunk should
 # not be used for QA. For example, Google Drive file types which can't be parsed
@@ -44,7 +40,6 @@ DISABLED_GEN_AI_MSG = (
    "You can still use Onyx as a search engine."
 )

-
 DEFAULT_PERSONA_ID = 0

 DEFAULT_CC_PAIR_ID = 1
@@ -81,7 +76,6 @@ KV_REINDEX_KEY = "needs_reindexing"
 KV_SEARCH_SETTINGS = "search_settings"
 KV_UNSTRUCTURED_API_KEY = "unstructured_api_key"
 KV_USER_STORE_KEY = "INVITED_USERS"
-KV_PENDING_USERS_KEY = "PENDING_USERS"
 KV_NO_AUTH_USER_PREFERENCES_KEY = "no_auth_user_preferences"
 KV_CRED_KEY = "credential_id_{}"
 KV_GMAIL_CRED_KEY = "gmail_app_credential"
@@ -179,7 +173,6 @@ class DocumentSource(str, Enum):
    FIREFLIES = "fireflies"
    EGNYTE = "egnyte"
    AIRTABLE = "airtable"
-    HIGHSPOT = "highspot"

    # Special case just for integration tests
    MOCK_CONNECTOR = "mock_connector"
@@ -328,8 +321,6 @@ class OnyxRedisLocks:
        "da_lock:check_connector_external_group_sync_beat"
    )
    MONITOR_BACKGROUND_PROCESSES_LOCK = "da_lock:monitor_background_processes"
-    CHECK_AVAILABLE_TENANTS_LOCK = "da_lock:check_available_tenants"
-    PRE_PROVISION_TENANT_LOCK = "da_lock:pre_provision_tenant"

    CONNECTOR_DOC_PERMISSIONS_SYNC_LOCK_PREFIX = (
        "da_lock:connector_doc_permissions_sync"
@@ -382,7 +373,6 @@ ONYX_CLOUD_TENANT_ID = "cloud"

 # the redis namespace for runtime variables
 ONYX_CLOUD_REDIS_RUNTIME = "runtime"
-CLOUD_BUILD_FENCE_LOOKUP_TABLE_INTERVAL_DEFAULT = 600


 class OnyxCeleryTask:
@@ -393,10 +383,6 @@ class OnyxCeleryTask:
    CLOUD_MONITOR_CELERY_QUEUES = (
        f"{ONYX_CLOUD_CELERY_TASK_PREFIX}_monitor_celery_queues"
    )
-    CHECK_AVAILABLE_TENANTS = f"{ONYX_CLOUD_CELERY_TASK_PREFIX}_check_available_tenants"
-
-    # Tenant pre-provisioning
-    PRE_PROVISION_TENANT = f"{ONYX_CLOUD_CELERY_TASK_PREFIX}_pre_provision_tenant"

    CHECK_FOR_CONNECTOR_DELETION = "check_for_connector_deletion_task"
    CHECK_FOR_VESPA_SYNC_TASK = "check_for_vespa_sync_task"
@@ -412,7 +398,6 @@ class OnyxCeleryTask:

    MONITOR_BACKGROUND_PROCESSES = "monitor_background_processes"
    MONITOR_CELERY_QUEUES = "monitor_celery_queues"
-    MONITOR_PROCESS_MEMORY = "monitor_process_memory"

    KOMBU_MESSAGE_CLEANUP_TASK = "kombu_message_cleanup_task"
    CONNECTOR_PERMISSION_SYNC_GENERATOR_TASK = (
--- a/backend/onyx/configs/model_configs.py
+++ b/backend/onyx/configs/model_configs.py
@@ -132,10 +132,3 @@ if _LITELLM_EXTRA_BODY_RAW:
        LITELLM_EXTRA_BODY = json.loads(_LITELLM_EXTRA_BODY_RAW)
    except Exception:
        pass
-
-# Whether and how to lower scores for short chunks w/o relevant context
-# Evaluated via custom ML model
-
-USE_INFORMATION_CONTENT_CLASSIFICATION = (
-    os.environ.get("USE_INFORMATION_CONTENT_CLASSIFICATION", "false").lower() == "true"
-)
--- a/backend/onyx/connectors/airtable/airtable_connector.py
+++ b/backend/onyx/connectors/airtable/airtable_connector.py
@@ -4,7 +4,6 @@ from concurrent.futures import Future
 from concurrent.futures import ThreadPoolExecutor
 from io import BytesIO
 from typing import Any
-from typing import cast

 import requests
 from pyairtable import Api as AirtableApi
@@ -17,8 +16,7 @@ from onyx.configs.constants import DocumentSource
 from onyx.connectors.interfaces import GenerateDocumentsOutput
 from onyx.connectors.interfaces import LoadConnector
 from onyx.connectors.models import Document
-from onyx.connectors.models import ImageSection
-from onyx.connectors.models import TextSection
+from onyx.connectors.models import Section
 from onyx.file_processing.extract_file_text import extract_file_text
 from onyx.file_processing.extract_file_text import get_file_ext
 from onyx.utils.logger import setup_logger
@@ -269,7 +267,7 @@ class AirtableConnector(LoadConnector):
        table_id: str,
        view_id: str | None,
        record_id: str,
-    ) -> tuple[list[TextSection], dict[str, str | list[str]]]:
+    ) -> tuple[list[Section], dict[str, str | list[str]]]:
        """
        Process a single Airtable field and return sections or metadata.

@@ -307,7 +305,7 @@ class AirtableConnector(LoadConnector):

        # Otherwise, create relevant sections
        sections = [
-            TextSection(
+            Section(
                link=link,
                text=(
                    f"{field_name}:\n"
@@ -342,7 +340,7 @@ class AirtableConnector(LoadConnector):
        table_name = table_schema.name
        record_id = record["id"]
        fields = record["fields"]
-        sections: list[TextSection] = []
+        sections: list[Section] = []
        metadata: dict[str, str | list[str]] = {}

        # Get primary field value if it exists
@@ -386,7 +384,7 @@ class AirtableConnector(LoadConnector):

        return Document(
            id=f"airtable__{record_id}",
-            sections=(cast(list[TextSection | ImageSection], sections)),
+            sections=sections,
            source=DocumentSource.AIRTABLE,
            semantic_identifier=semantic_id,
            metadata=metadata,
--- a/backend/onyx/connectors/asana/connector.py
+++ b/backend/onyx/connectors/asana/connector.py
@@ -10,7 +10,7 @@ from onyx.connectors.interfaces import LoadConnector
 from onyx.connectors.interfaces import PollConnector
 from onyx.connectors.interfaces import SecondsSinceUnixEpoch
 from onyx.connectors.models import Document
-from onyx.connectors.models import TextSection
+from onyx.connectors.models import Section
 from onyx.utils.logger import setup_logger

 logger = setup_logger()
@@ -82,7 +82,7 @@ class AsanaConnector(LoadConnector, PollConnector):
        logger.debug(f"Converting Asana task {task.id} to Document")
        return Document(
            id=task.id,
-            sections=[TextSection(link=task.link, text=task.text)],
+            sections=[Section(link=task.link, text=task.text)],
            doc_updated_at=task.last_modified,
            source=DocumentSource.ASANA,
            semantic_identifier=task.title,
--- a/backend/onyx/connectors/axero/connector.py
+++ b/backend/onyx/connectors/axero/connector.py
@@ -20,7 +20,7 @@ from onyx.connectors.interfaces import PollConnector
 from onyx.connectors.interfaces import SecondsSinceUnixEpoch
 from onyx.connectors.models import ConnectorMissingCredentialError
 from onyx.connectors.models import Document
-from onyx.connectors.models import TextSection
+from onyx.connectors.models import Section
 from onyx.file_processing.html_utils import parse_html_page_basic
 from onyx.utils.logger import setup_logger
 from onyx.utils.retry_wrapper import retry_builder
@@ -221,7 +221,7 @@ def _get_forums(
 def _translate_forum_to_doc(af: AxeroForum) -> Document:
    doc = Document(
        id=af.doc_id,
-        sections=[TextSection(link=af.link, text=reply) for reply in af.responses],
+        sections=[Section(link=af.link, text=reply) for reply in af.responses],
        source=DocumentSource.AXERO,
        semantic_identifier=af.title,
        doc_updated_at=af.last_update,
@@ -244,7 +244,7 @@ def _translate_content_to_doc(content: dict) -> Document:

    doc = Document(
        id="AXERO_" + str(content["ContentID"]),
-        sections=[TextSection(link=content["ContentURL"], text=page_text)],
+        sections=[Section(link=content["ContentURL"], text=page_text)],
        source=DocumentSource.AXERO,
        semantic_identifier=content["ContentTitle"],
        doc_updated_at=time_str_to_utc(content["DateUpdated"]),
--- a/backend/onyx/connectors/blob/connector.py
+++ b/backend/onyx/connectors/blob/connector.py
@@ -25,7 +25,7 @@ from onyx.connectors.interfaces import PollConnector
 from onyx.connectors.interfaces import SecondsSinceUnixEpoch
 from onyx.connectors.models import ConnectorMissingCredentialError
 from onyx.connectors.models import Document
-from onyx.connectors.models import TextSection
+from onyx.connectors.models import Section
 from onyx.file_processing.extract_file_text import extract_file_text
 from onyx.utils.logger import setup_logger

@@ -87,7 +87,7 @@ class BlobStorageConnector(LoadConnector, PollConnector):
                credentials.get(key)
                for key in ["aws_access_key_id", "aws_secret_access_key"]
            ):
-                raise ConnectorMissingCredentialError("Amazon S3")
+                raise ConnectorMissingCredentialError("Google Cloud Storage")

            session = boto3.Session(
                aws_access_key_id=credentials["aws_access_key_id"],
@@ -208,7 +208,7 @@ class BlobStorageConnector(LoadConnector, PollConnector):
                    batch.append(
                        Document(
                            id=f"{self.bucket_type}:{self.bucket_name}:{obj['Key']}",
-                            sections=[TextSection(link=link, text=text)],
+                            sections=[Section(link=link, text=text)],
                            source=DocumentSource(self.bucket_type.value),
                            semantic_identifier=name,
                            doc_updated_at=last_modified,
@@ -341,14 +341,7 @@ if __name__ == "__main__":
                print("Sections:")
                for section in doc.sections:
                    print(f"  - Link: {section.link}")
-                    if isinstance(section, TextSection) and section.text is not None:
-                        print(f"  - Text: {section.text[:100]}...")
-                    elif (
-                        hasattr(section, "image_file_name") and section.image_file_name
-                    ):
-                        print(f"  - Image: {section.image_file_name}")
-                    else:
-                        print("Error: Unknown section type")
+                    print(f"  - Text: {section.text[:100]}...")
                print("---")
            break

--- a/backend/onyx/connectors/bookstack/connector.py
+++ b/backend/onyx/connectors/bookstack/connector.py
@@ -18,7 +18,7 @@ from onyx.connectors.interfaces import PollConnector
 from onyx.connectors.interfaces import SecondsSinceUnixEpoch
 from onyx.connectors.models import ConnectorMissingCredentialError
 from onyx.connectors.models import Document
-from onyx.connectors.models import TextSection
+from onyx.connectors.models import Section
 from onyx.file_processing.html_utils import parse_html_page_basic


@@ -81,7 +81,7 @@ class BookstackConnector(LoadConnector, PollConnector):
        )
        return Document(
            id="book__" + str(book.get("id")),
-            sections=[TextSection(link=url, text=text)],
+            sections=[Section(link=url, text=text)],
            source=DocumentSource.BOOKSTACK,
            semantic_identifier="Book: " + title,
            title=title,
@@ -110,7 +110,7 @@ class BookstackConnector(LoadConnector, PollConnector):
        )
        return Document(
            id="chapter__" + str(chapter.get("id")),
-            sections=[TextSection(link=url, text=text)],
+            sections=[Section(link=url, text=text)],
            source=DocumentSource.BOOKSTACK,
            semantic_identifier="Chapter: " + title,
            title=title,
@@ -134,7 +134,7 @@ class BookstackConnector(LoadConnector, PollConnector):
        )
        return Document(
            id="shelf:" + str(shelf.get("id")),
-            sections=[TextSection(link=url, text=text)],
+            sections=[Section(link=url, text=text)],
            source=DocumentSource.BOOKSTACK,
            semantic_identifier="Shelf: " + title,
            title=title,
@@ -167,7 +167,7 @@ class BookstackConnector(LoadConnector, PollConnector):
        time.sleep(0.1)
        return Document(
            id="page:" + page_id,
-            sections=[TextSection(link=url, text=text)],
+            sections=[Section(link=url, text=text)],
            source=DocumentSource.BOOKSTACK,
            semantic_identifier="Page: " + str(title),
            title=str(title),
--- a/backend/onyx/connectors/clickup/connector.py
+++ b/backend/onyx/connectors/clickup/connector.py
@@ -17,7 +17,7 @@ from onyx.connectors.interfaces import SecondsSinceUnixEpoch
 from onyx.connectors.models import BasicExpertInfo
 from onyx.connectors.models import ConnectorMissingCredentialError
 from onyx.connectors.models import Document
-from onyx.connectors.models import TextSection
+from onyx.connectors.models import Section
 from onyx.utils.retry_wrapper import retry_builder


@@ -62,11 +62,11 @@ class ClickupConnector(LoadConnector, PollConnector):

        return response.json()

-    def _get_task_comments(self, task_id: str) -> list[TextSection]:
+    def _get_task_comments(self, task_id: str) -> list[Section]:
        url_endpoint = f"/task/{task_id}/comment"
        response = self._make_request(url_endpoint)
        comments = [
-            TextSection(
+            Section(
                link=f'https://app.clickup.com/t/{task_id}?comment={comment_dict["id"]}',
                text=comment_dict["comment_text"],
            )
@@ -133,7 +133,7 @@ class ClickupConnector(LoadConnector, PollConnector):
                    ],
                    title=task["name"],
                    sections=[
-                        TextSection(
+                        Section(
                            link=task["url"],
                            text=(
                                task["markdown_description"]
--- a/backend/onyx/connectors/confluence/connector.py
+++ b/backend/onyx/connectors/confluence/connector.py
@@ -33,9 +33,9 @@ from onyx.connectors.interfaces import SlimConnector
 from onyx.connectors.models import BasicExpertInfo
 from onyx.connectors.models import ConnectorMissingCredentialError
 from onyx.connectors.models import Document
-from onyx.connectors.models import ImageSection
+from onyx.connectors.models import Section
 from onyx.connectors.models import SlimDocument
-from onyx.connectors.models import TextSection
+from onyx.connectors.vision_enabled_connector import VisionEnabledConnector
 from onyx.indexing.indexing_heartbeat import IndexingHeartbeatInterface
 from onyx.utils.logger import setup_logger

@@ -65,7 +65,19 @@ _RESTRICTIONS_EXPANSION_FIELDS = [

 _SLIM_DOC_BATCH_SIZE = 5000

-ONE_HOUR = 3600
+_ATTACHMENT_EXTENSIONS_TO_FILTER_OUT = [
+    "gif",
+    "mp4",
+    "mov",
+    "mp3",
+    "wav",
+]
+_FULL_EXTENSION_FILTER_STRING = "".join(
+    [
+        f" and title!~'*.{extension}'"
+        for extension in _ATTACHMENT_EXTENSIONS_TO_FILTER_OUT
+    ]
+)


 class ConfluenceConnector(
@@ -73,6 +85,7 @@ class ConfluenceConnector(
    PollConnector,
    SlimConnector,
    CredentialsConnector,
+    VisionEnabledConnector,
 ):
    def __init__(
        self,
@@ -102,7 +115,9 @@ class ConfluenceConnector(
        self.timezone_offset = timezone_offset
        self._confluence_client: OnyxConfluence | None = None
        self._fetched_titles: set[str] = set()
-        self.allow_images = False
+
+        # Initialize vision LLM using the mixin
+        self.initialize_vision_llm()

        # Remove trailing slash from wiki_base if present
        self.wiki_base = wiki_base.rstrip("/")
@@ -147,9 +162,6 @@ class ConfluenceConnector(
            "max_backoff_seconds": 60,
        }

-    def set_allow_images(self, value: bool) -> None:
-        self.allow_images = value
-
    @property
    def confluence_client(self) -> OnyxConfluence:
        if self._confluence_client is None:
@@ -195,6 +207,7 @@ class ConfluenceConnector(
    def _construct_attachment_query(self, confluence_page_id: str) -> str:
        attachment_query = f"type=attachment and container='{confluence_page_id}'"
        attachment_query += self.cql_label_filter
+        attachment_query += _FULL_EXTENSION_FILTER_STRING
        return attachment_query

    def _get_comment_string_for_page_id(self, page_id: str) -> str:
@@ -224,9 +237,7 @@ class ConfluenceConnector(
            # Extract basic page information
            page_id = page["id"]
            page_title = page["title"]
-            page_url = build_confluence_document_id(
-                self.wiki_base, page["_links"]["webui"], self.is_cloud
-            )
+            page_url = f"{self.wiki_base}{page['_links']['webui']}"

            # Get the page content
            page_content = extract_text_from_confluence_html(
@@ -234,16 +245,12 @@ class ConfluenceConnector(
            )

            # Create the main section for the page content
-            sections: list[TextSection | ImageSection] = [
-                TextSection(text=page_content, link=page_url)
-            ]
+            sections = [Section(text=page_content, link=page_url)]

            # Process comments if available
            comment_text = self._get_comment_string_for_page_id(page_id)
            if comment_text:
-                sections.append(
-                    TextSection(text=comment_text, link=f"{page_url}#comments")
-                )
+                sections.append(Section(text=comment_text, link=f"{page_url}#comments"))

            # Process attachments
            if "children" in page and "attachment" in page["children"]:
@@ -256,28 +263,21 @@ class ConfluenceConnector(
                    result = process_attachment(
                        self.confluence_client,
                        attachment,
-                        page_id,
-                        self.allow_images,
+                        page_title,
+                        self.image_analysis_llm,
                    )

-                    if result and result.text:
+                    if result.text:
                        # Create a section for the attachment text
-                        attachment_section = TextSection(
+                        attachment_section = Section(
                            text=result.text,
                            link=f"{page_url}#attachment-{attachment['id']}",
-                        )
-                        sections.append(attachment_section)
-                    elif result and result.file_name:
-                        # Create an ImageSection for image attachments
-                        image_section = ImageSection(
-                            link=f"{page_url}#attachment-{attachment['id']}",
                            image_file_name=result.file_name,
                        )
-                        sections.append(image_section)
-                    else:
+                        sections.append(attachment_section)
+                    elif result.error:
                        logger.warning(
-                            f"Error processing attachment '{attachment.get('title')}':",
-                            f"{result.error if result else 'Unknown error'}",
+                            f"Error processing attachment '{attachment.get('title')}': {result.error}"
                        )

            # Extract metadata
@@ -298,14 +298,13 @@ class ConfluenceConnector(
            if "version" in page and "by" in page["version"]:
                author = page["version"]["by"]
                display_name = author.get("displayName", "Unknown")
-                email = author.get("email", "unknown@domain.invalid")
-                primary_owners.append(
-                    BasicExpertInfo(display_name=display_name, email=email)
-                )
+                primary_owners.append(BasicExpertInfo(display_name=display_name))

            # Create the document
            return Document(
-                id=page_url,
+                id=build_confluence_document_id(
+                    self.wiki_base, page["_links"]["webui"], self.is_cloud
+                ),
                sections=sections,
                source=DocumentSource.CONFLUENCE,
                semantic_identifier=page_title,
@@ -349,7 +348,7 @@ class ConfluenceConnector(
            # Now get attachments for that page:
            attachment_query = self._construct_attachment_query(page["id"])
            # We'll use the page's XML to provide context if we summarize an image
-            page.get("body", {}).get("storage", {}).get("value", "")
+            confluence_xml = page.get("body", {}).get("storage", {}).get("value", "")

            for attachment in self.confluence_client.paginated_cql_retrieval(
                cql=attachment_query,
@@ -357,39 +356,33 @@ class ConfluenceConnector(
            ):
                attachment["metadata"].get("mediaType", "")
                if not validate_attachment_filetype(
-                    attachment,
+                    attachment, self.image_analysis_llm
                ):
-                    logger.info(f"Skipping attachment: {attachment['title']}")
                    continue

-                logger.info(f"Processing attachment: {attachment['title']}")
-
                # Attempt to get textual content or image summarization:
                try:
+                    logger.info(f"Processing attachment: {attachment['title']}")
                    response = convert_attachment_to_content(
                        confluence_client=self.confluence_client,
                        attachment=attachment,
-                        page_id=page["id"],
-                        allow_images=self.allow_images,
+                        page_context=confluence_xml,
+                        llm=self.image_analysis_llm,
                    )
                    if response is None:
                        continue

                    content_text, file_storage_name = response
+
                    object_url = build_confluence_document_id(
                        self.wiki_base, attachment["_links"]["webui"], self.is_cloud
                    )
+
                    if content_text:
                        doc.sections.append(
-                            TextSection(
+                            Section(
                                text=content_text,
                                link=object_url,
-                            )
-                        )
-                    elif file_storage_name:
-                        doc.sections.append(
-                            ImageSection(
-                                link=object_url,
                                image_file_name=file_storage_name,
                            )
                        )
@@ -418,17 +411,7 @@ class ConfluenceConnector(
        start: SecondsSinceUnixEpoch | None = None,
        end: SecondsSinceUnixEpoch | None = None,
    ) -> GenerateDocumentsOutput:
-        try:
-            return self._fetch_document_batches(start, end)
-        except Exception as e:
-            if "field 'updated' is invalid" in str(e) and start is not None:
-                logger.warning(
-                    "Confluence says we provided an invalid 'updated' field. This may indicate"
-                    "a real issue, but can also appear during edge cases like daylight"
-                    f"savings time changes. Retrying with a 1 hour offset. Error: {e}"
-                )
-                return self._fetch_document_batches(start - ONE_HOUR, end)
-            raise
+        return self._fetch_document_batches(start, end)

    def retrieve_all_slim_documents(
        self,
@@ -479,7 +462,7 @@ class ConfluenceConnector(
                # If you skip images, you'll skip them in the permission sync
                attachment["metadata"].get("mediaType", "")
                if not validate_attachment_filetype(
-                    attachment,
+                    attachment, self.image_analysis_llm
                ):
                    continue

--- a/backend/onyx/connectors/confluence/onyx_confluence.py
+++ b/backend/onyx/connectors/confluence/onyx_confluence.py
@@ -1,3 +1,4 @@
+import io
 import json
 import time
 from collections.abc import Callable
@@ -18,11 +19,17 @@ from requests import HTTPError

 from ee.onyx.configs.app_configs import OAUTH_CONFLUENCE_CLOUD_CLIENT_ID
 from ee.onyx.configs.app_configs import OAUTH_CONFLUENCE_CLOUD_CLIENT_SECRET
+from onyx.configs.app_configs import (
+    CONFLUENCE_CONNECTOR_ATTACHMENT_CHAR_COUNT_THRESHOLD,
+)
+from onyx.configs.app_configs import CONFLUENCE_CONNECTOR_ATTACHMENT_SIZE_THRESHOLD
 from onyx.connectors.confluence.utils import _handle_http_error
 from onyx.connectors.confluence.utils import confluence_refresh_tokens
 from onyx.connectors.confluence.utils import get_start_param_from_url
 from onyx.connectors.confluence.utils import update_param_in_path
+from onyx.connectors.confluence.utils import validate_attachment_filetype
 from onyx.connectors.interfaces import CredentialsProviderInterface
+from onyx.file_processing.extract_file_text import extract_file_text
 from onyx.file_processing.html_utils import format_document_soup
 from onyx.redis.redis_pool import get_redis_client
 from onyx.utils.logger import setup_logger
@@ -498,12 +505,10 @@ class OnyxConfluence:
                new_start = get_start_param_from_url(url_suffix)
                previous_start = get_start_param_from_url(old_url_suffix)
                if new_start - previous_start > len(results):
-                    logger.debug(
+                    logger.warning(
                        f"Start was updated by more than the amount of results "
-                        f"retrieved for `{url_suffix}`. This is a bug with Confluence, "
-                        "but we have logic to work around it - don't worry this isn't"
-                        f" causing an issue. Start: {new_start}, Previous Start: "
-                        f"{previous_start}, Len Results: {len(results)}."
+                        f"retrieved. This is a bug with Confluence. Start: {new_start}, "
+                        f"Previous Start: {previous_start}, Len Results: {len(results)}."
                    )

                    # Update the url_suffix to use the adjusted start
@@ -803,6 +808,65 @@ def _get_user(confluence_client: OnyxConfluence, user_id: str) -> str:
    return _USER_ID_TO_DISPLAY_NAME_CACHE.get(user_id) or _USER_NOT_FOUND


+def attachment_to_content(
+    confluence_client: OnyxConfluence,
+    attachment: dict[str, Any],
+    parent_content_id: str | None = None,
+) -> str | None:
+    """If it returns None, assume that we should skip this attachment."""
+    if not validate_attachment_filetype(attachment):
+        return None
+
+    if "api.atlassian.com" in confluence_client.url:
+        # https://developer.atlassian.com/cloud/confluence/rest/v1/api-group-content---attachments/#api-wiki-rest-api-content-id-child-attachment-attachmentid-download-get
+        if not parent_content_id:
+            logger.warning(
+                "parent_content_id is required to download attachments from Confluence Cloud!"
+            )
+            return None
+
+        download_link = (
+            confluence_client.url
+            + f"/rest/api/content/{parent_content_id}/child/attachment/{attachment['id']}/download"
+        )
+    else:
+        download_link = confluence_client.url + attachment["_links"]["download"]
+
+    attachment_size = attachment["extensions"]["fileSize"]
+    if attachment_size > CONFLUENCE_CONNECTOR_ATTACHMENT_SIZE_THRESHOLD:
+        logger.warning(
+            f"Skipping {download_link} due to size. "
+            f"size={attachment_size} "
+            f"threshold={CONFLUENCE_CONNECTOR_ATTACHMENT_SIZE_THRESHOLD}"
+        )
+        return None
+
+    logger.info(f"_attachment_to_content - _session.get: link={download_link}")
+
+    # why are we using session.get here? we probably won't retry these ... is that ok?
+    response = confluence_client._session.get(download_link)
+    if response.status_code != 200:
+        logger.warning(
+            f"Failed to fetch {download_link} with invalid status code {response.status_code}"
+        )
+        return None
+
+    extracted_text = extract_file_text(
+        io.BytesIO(response.content),
+        file_name=attachment["title"],
+        break_on_unprocessable=False,
+    )
+    if len(extracted_text) > CONFLUENCE_CONNECTOR_ATTACHMENT_CHAR_COUNT_THRESHOLD:
+        logger.warning(
+            f"Skipping {download_link} due to char count. "
+            f"char count={len(extracted_text)} "
+            f"threshold={CONFLUENCE_CONNECTOR_ATTACHMENT_CHAR_COUNT_THRESHOLD}"
+        )
+        return None
+
+    return extracted_text
+
+
 def extract_text_from_confluence_html(
    confluence_client: OnyxConfluence,
    confluence_object: dict[str, Any],
--- a/backend/onyx/connectors/confluence/utils.py
+++ b/backend/onyx/connectors/confluence/utils.py
@@ -22,7 +22,6 @@ from sqlalchemy.orm import Session
 from onyx.configs.app_configs import (
    CONFLUENCE_CONNECTOR_ATTACHMENT_CHAR_COUNT_THRESHOLD,
 )
-from onyx.configs.app_configs import CONFLUENCE_CONNECTOR_ATTACHMENT_SIZE_THRESHOLD
 from onyx.configs.constants import FileOrigin

 if TYPE_CHECKING:
@@ -36,6 +35,7 @@ from onyx.db.pg_file_store import upsert_pgfilestore
 from onyx.file_processing.extract_file_text import extract_file_text
 from onyx.file_processing.file_validation import is_valid_image_type
 from onyx.file_processing.image_utils import store_image_and_create_section
+from onyx.llm.interfaces import LLM
 from onyx.utils.logger import setup_logger

 logger = setup_logger()
@@ -53,16 +53,17 @@ class TokenResponse(BaseModel):


 def validate_attachment_filetype(
-    attachment: dict[str, Any],
+    attachment: dict[str, Any], llm: LLM | None = None
 ) -> bool:
    """
    Validates if the attachment is a supported file type.
+    If LLM is provided, also checks if it's an image that can be processed.
    """
    attachment.get("metadata", {})
    media_type = attachment.get("metadata", {}).get("mediaType", "")

    if media_type.startswith("image/"):
-        return is_valid_image_type(media_type)
+        return llm is not None and is_valid_image_type(media_type)

    # For non-image files, check if we support the extension
    title = attachment.get("title", "")
@@ -83,111 +84,55 @@ class AttachmentProcessingResult(BaseModel):
    error: str | None = None


-def _make_attachment_link(
-    confluence_client: "OnyxConfluence",
-    attachment: dict[str, Any],
-    parent_content_id: str | None = None,
-) -> str | None:
-    download_link = ""
-
-    if "api.atlassian.com" in confluence_client.url:
-        # https://developer.atlassian.com/cloud/confluence/rest/v1/api-group-content---attachments/#api-wiki-rest-api-content-id-child-attachment-attachmentid-download-get
-        if not parent_content_id:
-            logger.warning(
-                "parent_content_id is required to download attachments from Confluence Cloud!"
-            )
-            return None
-
-        download_link = (
-            confluence_client.url
-            + f"/rest/api/content/{parent_content_id}/child/attachment/{attachment['id']}/download"
+def _download_attachment(
+    confluence_client: "OnyxConfluence", attachment: dict[str, Any]
+) -> bytes | None:
+    """
+    Retrieves the raw bytes of an attachment from Confluence. Returns None on error.
+    """
+    download_link = confluence_client.url + attachment["_links"]["download"]
+    resp = confluence_client._session.get(download_link)
+    if resp.status_code != 200:
+        logger.warning(
+            f"Failed to fetch {download_link} with status code {resp.status_code}"
        )
-    else:
-        download_link = confluence_client.url + attachment["_links"]["download"]
-
-    return download_link
+        return None
+    return resp.content


 def process_attachment(
    confluence_client: "OnyxConfluence",
    attachment: dict[str, Any],
-    parent_content_id: str | None,
-    allow_images: bool,
+    page_context: str,
+    llm: LLM | None,
 ) -> AttachmentProcessingResult:
    """
    Processes a Confluence attachment. If it's a document, extracts text,
-    or if it's an image, stores it for later analysis. Returns a structured result.
+    or if it's an image and an LLM is available, summarizes it. Returns a structured result.
    """
    try:
        # Get the media type from the attachment metadata
-        media_type: str = attachment.get("metadata", {}).get("mediaType", "")
+        media_type = attachment.get("metadata", {}).get("mediaType", "")
+
        # Validate the attachment type
-        if not validate_attachment_filetype(attachment):
+        if not validate_attachment_filetype(attachment, llm):
            return AttachmentProcessingResult(
                text=None,
                file_name=None,
                error=f"Unsupported file type: {media_type}",
            )

-        attachment_link = _make_attachment_link(
-            confluence_client, attachment, parent_content_id
-        )
-        if not attachment_link:
-            return AttachmentProcessingResult(
-                text=None, file_name=None, error="Failed to make attachment link"
-            )
-
-        attachment_size = attachment["extensions"]["fileSize"]
-
-        if media_type.startswith("image/"):
-            if not allow_images:
-                return AttachmentProcessingResult(
-                    text=None,
-                    file_name=None,
-                    error="Image downloading is not enabled",
-                )
-        else:
-            if attachment_size > CONFLUENCE_CONNECTOR_ATTACHMENT_SIZE_THRESHOLD:
-                logger.warning(
-                    f"Skipping {attachment_link} due to size. "
-                    f"size={attachment_size} "
-                    f"threshold={CONFLUENCE_CONNECTOR_ATTACHMENT_SIZE_THRESHOLD}"
-                )
-                return AttachmentProcessingResult(
-                    text=None,
-                    file_name=None,
-                    error=f"Attachment text too long: {attachment_size} chars",
-                )
-
-        logger.info(
-            f"Downloading attachment: "
-            f"title={attachment['title']} "
-            f"length={attachment_size} "
-            f"link={attachment_link}"
-        )
-
        # Download the attachment
-        resp: requests.Response = confluence_client._session.get(attachment_link)
-        if resp.status_code != 200:
-            logger.warning(
-                f"Failed to fetch {attachment_link} with status code {resp.status_code}"
-            )
+        raw_bytes = _download_attachment(confluence_client, attachment)
+        if raw_bytes is None:
            return AttachmentProcessingResult(
-                text=None,
-                file_name=None,
-                error=f"Attachment download status code is {resp.status_code}",
+                text=None, file_name=None, error="Failed to download attachment"
            )

-        raw_bytes = resp.content
-        if not raw_bytes:
-            return AttachmentProcessingResult(
-                text=None, file_name=None, error="attachment.content is None"
-            )
-
-        # Process image attachments
-        if media_type.startswith("image/"):
+        # Process image attachments with LLM if available
+        if media_type.startswith("image/") and llm:
            return _process_image_attachment(
-                confluence_client, attachment, raw_bytes, media_type
+                confluence_client, attachment, page_context, llm, raw_bytes, media_type
            )

        # Process document attachments
@@ -220,10 +165,12 @@ def process_attachment(
 def _process_image_attachment(
    confluence_client: "OnyxConfluence",
    attachment: dict[str, Any],
+    page_context: str,
+    llm: LLM,
    raw_bytes: bytes,
    media_type: str,
 ) -> AttachmentProcessingResult:
-    """Process an image attachment by saving it without generating a summary."""
+    """Process an image attachment by saving it and generating a summary."""
    try:
        # Use the standardized image storage and section creation
        with get_session_with_current_tenant() as db_session:
@@ -233,14 +180,15 @@ def _process_image_attachment(
                file_name=Path(attachment["id"]).name,
                display_name=attachment["title"],
                media_type=media_type,
+                llm=llm,
                file_origin=FileOrigin.CONNECTOR,
            )
-            logger.info(f"Stored image attachment with file name: {file_name}")

-            # Return empty text but include the file_name for later processing
-            return AttachmentProcessingResult(text="", file_name=file_name, error=None)
+            return AttachmentProcessingResult(
+                text=section.text, file_name=file_name, error=None
+            )
    except Exception as e:
-        msg = f"Image storage failed for {attachment['title']}: {e}"
+        msg = f"Image summarization failed for {attachment['title']}: {e}"
        logger.error(msg, exc_info=e)
        return AttachmentProcessingResult(text=None, file_name=None, error=msg)

@@ -301,16 +249,16 @@ def _process_text_attachment(
 def convert_attachment_to_content(
    confluence_client: "OnyxConfluence",
    attachment: dict[str, Any],
-    page_id: str,
-    allow_images: bool,
+    page_context: str,
+    llm: LLM | None,
 ) -> tuple[str | None, str | None] | None:
    """
    Facade function which:
      1. Validates attachment type
-      2. Extracts content or stores image for later processing
+      2. Extracts or summarizes content
      3. Returns (content_text, stored_file_name) or None if we should skip it
    """
-    media_type = attachment.get("metadata", {}).get("mediaType", "")
+    media_type = attachment["metadata"]["mediaType"]
    # Quick check for unsupported types:
    if media_type.startswith("video/") or media_type == "application/gliffy+json":
        logger.warning(
@@ -318,7 +266,7 @@ def convert_attachment_to_content(
        )
        return None

-    result = process_attachment(confluence_client, attachment, page_id, allow_images)
+    result = process_attachment(confluence_client, attachment, page_context, llm)
    if result.error is not None:
        logger.warning(
            f"Attachment {attachment['title']} encountered error: {result.error}"
@@ -531,10 +479,6 @@ def attachment_to_file_record(
        download_link, absolute=True, not_json_response=True
    )

-    file_type = attachment.get("metadata", {}).get(
-        "mediaType", "application/octet-stream"
-    )
-
    # Save image to file store
    file_name = f"confluence_attachment_{attachment['id']}"
    lobj_oid = create_populate_lobj(BytesIO(image_data), db_session)
@@ -542,7 +486,7 @@ def attachment_to_file_record(
        file_name=file_name,
        display_name=attachment["title"],
        file_origin=FileOrigin.OTHER,
-        file_type=file_type,
+        file_type=attachment["metadata"]["mediaType"],
        lobj_oid=lobj_oid,
        db_session=db_session,
        commit=True,
--- a/backend/onyx/connectors/connector_runner.py
+++ b/backend/onyx/connectors/connector_runner.py
@@ -2,8 +2,6 @@ import sys
 import time
 from collections.abc import Generator
 from datetime import datetime
-from typing import Generic
-from typing import TypeVar

 from onyx.connectors.interfaces import BaseConnector
 from onyx.connectors.interfaces import CheckpointConnector
@@ -21,10 +19,8 @@ logger = setup_logger()

 TimeRange = tuple[datetime, datetime]

-CT = TypeVar("CT", bound=ConnectorCheckpoint)

-
-class CheckpointOutputWrapper(Generic[CT]):
+class CheckpointOutputWrapper:
    """
    Wraps a CheckpointOutput generator to give things back in a more digestible format.
    The connector format is easier for the connector implementor (e.g. it enforces exactly
@@ -33,20 +29,20 @@ class CheckpointOutputWrapper(Generic[CT]):
    """

    def __init__(self) -> None:
-        self.next_checkpoint: CT | None = None
+        self.next_checkpoint: ConnectorCheckpoint | None = None

    def __call__(
        self,
-        checkpoint_connector_generator: CheckpointOutput[CT],
+        checkpoint_connector_generator: CheckpointOutput,
    ) -> Generator[
-        tuple[Document | None, ConnectorFailure | None, CT | None],
+        tuple[Document | None, ConnectorFailure | None, ConnectorCheckpoint | None],
        None,
        None,
    ]:
        # grabs the final return value and stores it in the `next_checkpoint` variable
        def _inner_wrapper(
-            checkpoint_connector_generator: CheckpointOutput[CT],
-        ) -> CheckpointOutput[CT]:
+            checkpoint_connector_generator: CheckpointOutput,
+        ) -> CheckpointOutput:
            self.next_checkpoint = yield from checkpoint_connector_generator
            return self.next_checkpoint  # not used

@@ -68,7 +64,7 @@ class CheckpointOutputWrapper(Generic[CT]):
        yield None, None, self.next_checkpoint


-class ConnectorRunner(Generic[CT]):
+class ConnectorRunner:
    """
    Handles:
        - Batching
@@ -89,9 +85,11 @@ class ConnectorRunner(Generic[CT]):
        self.doc_batch: list[Document] = []

    def run(
-        self, checkpoint: CT
+        self, checkpoint: ConnectorCheckpoint
    ) -> Generator[
-        tuple[list[Document] | None, ConnectorFailure | None, CT | None],
+        tuple[
+            list[Document] | None, ConnectorFailure | None, ConnectorCheckpoint | None
+        ],
        None,
        None,
    ]:
@@ -107,9 +105,9 @@ class ConnectorRunner(Generic[CT]):
                    end=self.time_range[1].timestamp(),
                    checkpoint=checkpoint,
                )
-                next_checkpoint: CT | None = None
+                next_checkpoint: ConnectorCheckpoint | None = None
                # this is guaranteed to always run at least once with next_checkpoint being non-None
-                for document, failure, next_checkpoint in CheckpointOutputWrapper[CT]()(
+                for document, failure, next_checkpoint in CheckpointOutputWrapper()(
                    checkpoint_connector_generator
                ):
                    if document is not None:
@@ -134,7 +132,7 @@ class ConnectorRunner(Generic[CT]):
                )

            else:
-                finished_checkpoint = self.connector.build_dummy_checkpoint()
+                finished_checkpoint = ConnectorCheckpoint.build_dummy_checkpoint()
                finished_checkpoint.has_more = False

                if isinstance(self.connector, PollConnector):
--- a/backend/onyx/connectors/discord/connector.py
+++ b/backend/onyx/connectors/discord/connector.py
@@ -4,7 +4,6 @@ from collections.abc import Iterable
 from datetime import datetime
 from datetime import timezone
 from typing import Any
-from typing import cast

 from discord import Client
 from discord.channel import TextChannel
@@ -21,8 +20,7 @@ from onyx.connectors.interfaces import PollConnector
 from onyx.connectors.interfaces import SecondsSinceUnixEpoch
 from onyx.connectors.models import ConnectorMissingCredentialError
 from onyx.connectors.models import Document
-from onyx.connectors.models import ImageSection
-from onyx.connectors.models import TextSection
+from onyx.connectors.models import Section
 from onyx.utils.logger import setup_logger

 logger = setup_logger()
@@ -34,7 +32,7 @@ _SNIPPET_LENGTH = 30

 def _convert_message_to_document(
    message: DiscordMessage,
-    sections: list[TextSection],
+    sections: list[Section],
 ) -> Document:
    """
    Convert a discord message to a document
@@ -80,7 +78,7 @@ def _convert_message_to_document(
        semantic_identifier=semantic_identifier,
        doc_updated_at=message.edited_at,
        title=title,
-        sections=(cast(list[TextSection | ImageSection], sections)),
+        sections=sections,
        metadata=metadata,
    )

@@ -125,8 +123,8 @@ async def _fetch_documents_from_channel(
        if channel_message.type != MessageType.default:
            continue

-        sections: list[TextSection] = [
-            TextSection(
+        sections: list[Section] = [
+            Section(
                text=channel_message.content,
                link=channel_message.jump_url,
            )
@@ -144,7 +142,7 @@ async def _fetch_documents_from_channel(
                continue

            sections = [
-                TextSection(
+                Section(
                    text=thread_message.content,
                    link=thread_message.jump_url,
                )
@@ -162,7 +160,7 @@ async def _fetch_documents_from_channel(
                continue

            sections = [
-                TextSection(
+                Section(
                    text=thread_message.content,
                    link=thread_message.jump_url,
                )
--- a/backend/onyx/connectors/discourse/connector.py
+++ b/backend/onyx/connectors/discourse/connector.py
@@ -3,7 +3,6 @@ import urllib.parse
 from datetime import datetime
 from datetime import timezone
 from typing import Any
-from typing import cast

 import requests
 from pydantic import BaseModel
@@ -21,8 +20,7 @@ from onyx.connectors.interfaces import SecondsSinceUnixEpoch
 from onyx.connectors.models import BasicExpertInfo
 from onyx.connectors.models import ConnectorMissingCredentialError
 from onyx.connectors.models import Document
-from onyx.connectors.models import ImageSection
-from onyx.connectors.models import TextSection
+from onyx.connectors.models import Section
 from onyx.file_processing.html_utils import parse_html_page_basic
 from onyx.utils.logger import setup_logger
 from onyx.utils.retry_wrapper import retry_builder
@@ -114,7 +112,7 @@ class DiscourseConnector(PollConnector):
                    responders.append(BasicExpertInfo(display_name=responder_name))

            sections.append(
-                TextSection(link=topic_url, text=parse_html_page_basic(post["cooked"]))
+                Section(link=topic_url, text=parse_html_page_basic(post["cooked"]))
            )
        category_name = self.category_id_map.get(topic["category_id"])

@@ -131,7 +129,7 @@ class DiscourseConnector(PollConnector):

        doc = Document(
            id="_".join([DocumentSource.DISCOURSE.value, str(topic["id"])]),
-            sections=cast(list[TextSection | ImageSection], sections),
+            sections=sections,
            source=DocumentSource.DISCOURSE,
            semantic_identifier=topic["title"],
            doc_updated_at=time_str_to_utc(topic["last_posted_at"]),
--- a/backend/onyx/connectors/document360/connector.py
+++ b/backend/onyx/connectors/document360/connector.py
@@ -19,7 +19,7 @@ from onyx.connectors.interfaces import SecondsSinceUnixEpoch
 from onyx.connectors.models import BasicExpertInfo
 from onyx.connectors.models import ConnectorMissingCredentialError
 from onyx.connectors.models import Document
-from onyx.connectors.models import TextSection
+from onyx.connectors.models import Section
 from onyx.file_processing.html_utils import parse_html_page_basic
 from onyx.utils.retry_wrapper import retry_builder

@@ -158,7 +158,7 @@ class Document360Connector(LoadConnector, PollConnector):

            document = Document(
                id=article_details["id"],
-                sections=[TextSection(link=doc_link, text=doc_text)],
+                sections=[Section(link=doc_link, text=doc_text)],
                source=DocumentSource.DOCUMENT360,
                semantic_identifier=article_details["title"],
                doc_updated_at=updated_at,
--- a/backend/onyx/connectors/dropbox/connector.py
+++ b/backend/onyx/connectors/dropbox/connector.py
@@ -19,7 +19,7 @@ from onyx.connectors.interfaces import PollConnector
 from onyx.connectors.interfaces import SecondsSinceUnixEpoch
 from onyx.connectors.models import ConnectorMissingCredentialError
 from onyx.connectors.models import Document
-from onyx.connectors.models import TextSection
+from onyx.connectors.models import Section
 from onyx.file_processing.extract_file_text import extract_file_text
 from onyx.utils.logger import setup_logger

@@ -108,7 +108,7 @@ class DropboxConnector(LoadConnector, PollConnector):
                        batch.append(
                            Document(
                                id=f"doc:{entry.id}",
-                                sections=[TextSection(link=link, text=text)],
+                                sections=[Section(link=link, text=text)],
                                source=DocumentSource.DROPBOX,
                                semantic_identifier=entry.name,
                                doc_updated_at=modified_time,
--- a/backend/onyx/connectors/egnyte/connector.py
+++ b/backend/onyx/connectors/egnyte/connector.py
@@ -24,13 +24,12 @@ from onyx.connectors.interfaces import SecondsSinceUnixEpoch
 from onyx.connectors.models import BasicExpertInfo
 from onyx.connectors.models import ConnectorMissingCredentialError
 from onyx.connectors.models import Document
-from onyx.connectors.models import TextSection
+from onyx.connectors.models import Section
 from onyx.file_processing.extract_file_text import detect_encoding
 from onyx.file_processing.extract_file_text import extract_file_text
 from onyx.file_processing.extract_file_text import get_file_ext
-from onyx.file_processing.extract_file_text import is_accepted_file_ext
 from onyx.file_processing.extract_file_text import is_text_file_extension
-from onyx.file_processing.extract_file_text import OnyxExtensionType
+from onyx.file_processing.extract_file_text import is_valid_file_ext
 from onyx.file_processing.extract_file_text import read_text_file
 from onyx.utils.logger import setup_logger
 from onyx.utils.retry_wrapper import request_with_retries
@@ -70,9 +69,7 @@ def _process_egnyte_file(

    file_name = file_metadata["name"]
    extension = get_file_ext(file_name)
-    if not is_accepted_file_ext(
-        extension, OnyxExtensionType.Plain | OnyxExtensionType.Document
-    ):
+    if not is_valid_file_ext(extension):
        logger.warning(f"Skipping file '{file_name}' with extension '{extension}'")
        return None

@@ -114,7 +111,7 @@ def _process_egnyte_file(
    # Create the document
    return Document(
        id=f"egnyte-{file_metadata['entry_id']}",
-        sections=[TextSection(text=file_content_raw.strip(), link=web_url)],
+        sections=[Section(text=file_content_raw.strip(), link=web_url)],
        source=DocumentSource.EGNYTE,
        semantic_identifier=file_name,
        metadata=metadata,
--- a/backend/onyx/connectors/factory.py
+++ b/backend/onyx/connectors/factory.py
@@ -5,7 +5,6 @@ from sqlalchemy.orm import Session

 from onyx.configs.app_configs import INTEGRATION_TESTS_MODE
 from onyx.configs.constants import DocumentSource
-from onyx.configs.llm_configs import get_image_extraction_and_analysis_enabled
 from onyx.connectors.airtable.airtable_connector import AirtableConnector
 from onyx.connectors.asana.connector import AsanaConnector
 from onyx.connectors.axero.connector import AxeroConnector
@@ -31,7 +30,6 @@ from onyx.connectors.gong.connector import GongConnector
 from onyx.connectors.google_drive.connector import GoogleDriveConnector
 from onyx.connectors.google_site.connector import GoogleSitesConnector
 from onyx.connectors.guru.connector import GuruConnector
-from onyx.connectors.highspot.connector import HighspotConnector
 from onyx.connectors.hubspot.connector import HubSpotConnector
 from onyx.connectors.interfaces import BaseConnector
 from onyx.connectors.interfaces import CheckpointConnector
@@ -119,7 +117,6 @@ def identify_connector_class(
        DocumentSource.FIREFLIES: FirefliesConnector,
        DocumentSource.EGNYTE: EgnyteConnector,
        DocumentSource.AIRTABLE: AirtableConnector,
-        DocumentSource.HIGHSPOT: HighspotConnector,
        # just for integration tests
        DocumentSource.MOCK_CONNECTOR: MockConnector,
    }
@@ -185,8 +182,6 @@ def instantiate_connector(
        if new_credentials is not None:
            backend_update_credential_json(credential, new_credentials, db_session)

-    connector.set_allow_images(get_image_extraction_and_analysis_enabled())
-
    return connector


--- a/backend/onyx/connectors/file/connector.py
+++ b/backend/onyx/connectors/file/connector.py
@@ -16,17 +16,17 @@ from onyx.connectors.interfaces import GenerateDocumentsOutput
 from onyx.connectors.interfaces import LoadConnector
 from onyx.connectors.models import BasicExpertInfo
 from onyx.connectors.models import Document
-from onyx.connectors.models import ImageSection
-from onyx.connectors.models import TextSection
+from onyx.connectors.models import Section
+from onyx.connectors.vision_enabled_connector import VisionEnabledConnector
 from onyx.db.engine import get_session_with_current_tenant
 from onyx.db.pg_file_store import get_pgfilestore_by_file_name
 from onyx.file_processing.extract_file_text import extract_text_and_images
 from onyx.file_processing.extract_file_text import get_file_ext
-from onyx.file_processing.extract_file_text import is_accepted_file_ext
+from onyx.file_processing.extract_file_text import is_valid_file_ext
 from onyx.file_processing.extract_file_text import load_files_from_zip
-from onyx.file_processing.extract_file_text import OnyxExtensionType
 from onyx.file_processing.image_utils import store_image_and_create_section
 from onyx.file_store.file_store import get_default_file_store
+from onyx.llm.interfaces import LLM
 from onyx.utils.logger import setup_logger

 logger = setup_logger()
@@ -52,51 +52,39 @@ def _read_files_and_metadata(
            file_content, ignore_dirs=True
        ):
            yield os.path.join(directory_path, file_info.filename), subfile, metadata
-    elif is_accepted_file_ext(extension, OnyxExtensionType.All):
+    elif is_valid_file_ext(extension):
        yield file_name, file_content, metadata
    else:
        logger.warning(f"Skipping file '{file_name}' with extension '{extension}'")


 def _create_image_section(
+    llm: LLM | None,
    image_data: bytes,
    db_session: Session,
    parent_file_name: str,
    display_name: str,
-    link: str | None = None,
    idx: int = 0,
-) -> tuple[ImageSection, str | None]:
+) -> tuple[Section, str | None]:
    """
-    Creates an ImageSection for an image file or embedded image.
-    Stores the image in PGFileStore but does not generate a summary.
-
-    Args:
-        image_data: Raw image bytes
-        db_session: Database session
-        parent_file_name: Name of the parent file (for embedded images)
-        display_name: Display name for the image
-        idx: Index for embedded images
+    Create a Section object for a single image and store the image in PGFileStore.
+    If summarization is enabled and we have an LLM, summarize the image.

    Returns:
-        Tuple of (ImageSection, stored_file_name or None)
+        tuple: (Section object, file_name in PGFileStore or None if storage failed)
    """
-    # Create a unique identifier for the image
-    file_name = f"{parent_file_name}_embedded_{idx}" if idx > 0 else parent_file_name
+    # Create a unique file name for the embedded image
+    file_name = f"{parent_file_name}_embedded_{idx}"

-    # Store the image and create a section
-    try:
-        section, stored_file_name = store_image_and_create_section(
-            db_session=db_session,
-            image_data=image_data,
-            file_name=file_name,
-            display_name=display_name,
-            link=link,
-            file_origin=FileOrigin.CONNECTOR,
-        )
-        return section, stored_file_name
-    except Exception as e:
-        logger.error(f"Failed to store image {display_name}: {e}")
-        raise e
+    # Use the standardized utility to store the image and create a section
+    return store_image_and_create_section(
+        db_session=db_session,
+        image_data=image_data,
+        file_name=file_name,
+        display_name=display_name,
+        llm=llm,
+        file_origin=FileOrigin.OTHER,
+    )


 def _process_file(
@@ -105,16 +93,12 @@ def _process_file(
    metadata: dict[str, Any] | None,
    pdf_pass: str | None,
    db_session: Session,
+    llm: LLM | None,
 ) -> list[Document]:
    """
-    Process a file and return a list of Documents.
-    For images, creates ImageSection objects without summarization.
-    For documents with embedded images, extracts and stores the images.
+    Processes a single file, returning a list of Documents (typically one).
+    Also handles embedded images if 'EMBEDDED_IMAGE_EXTRACTION_ENABLED' is true.
    """
-    if metadata is None:
-        metadata = {}
-
-    # Get file extension and determine file type
    extension = get_file_ext(file_name)

    # Fetch the DB record so we know the ID for internal URL
@@ -123,13 +107,15 @@ def _process_file(
        logger.warning(f"No file record found for '{file_name}' in PG; skipping.")
        return []

-    if not is_accepted_file_ext(extension, OnyxExtensionType.All):
+    if not is_valid_file_ext(extension):
        logger.warning(
            f"Skipping file '{file_name}' with unrecognized extension '{extension}'"
        )
        return []

    # Prepare doc metadata
+    if metadata is None:
+        metadata = {}
    file_display_name = metadata.get("file_display_name") or os.path.basename(file_name)

    # Timestamps
@@ -172,7 +158,6 @@ def _process_file(
            "title",
            "connector_type",
            "pdf_password",
-            "mime_type",
        ]
    }

@@ -185,85 +170,58 @@ def _process_file(
    title = metadata.get("title") or file_display_name

    # 1) If the file itself is an image, handle that scenario quickly
-    if extension in LoadConnector.IMAGE_EXTENSIONS:
-        # Read the image data
+    IMAGE_EXTENSIONS = {".jpg", ".jpeg", ".png", ".webp"}
+    if extension in IMAGE_EXTENSIONS:
+        # Summarize or produce empty doc
        image_data = file.read()
-        if not image_data:
-            logger.warning(f"Empty image file: {file_name}")
-            return []
-
-        # Create an ImageSection for the image
-        try:
-            section, _ = _create_image_section(
-                image_data=image_data,
-                db_session=db_session,
-                parent_file_name=pg_record.file_name,
-                display_name=title,
+        image_section, _ = _create_image_section(
+            llm, image_data, db_session, pg_record.file_name, title
+        )
+        return [
+            Document(
+                id=doc_id,
+                sections=[image_section],
+                source=source_type,
+                semantic_identifier=file_display_name,
+                title=title,
+                doc_updated_at=final_time_updated,
+                primary_owners=p_owners,
+                secondary_owners=s_owners,
+                metadata=metadata_tags,
            )
+        ]

-            return [
-                Document(
-                    id=doc_id,
-                    sections=[section],
-                    source=source_type,
-                    semantic_identifier=file_display_name,
-                    title=title,
-                    doc_updated_at=final_time_updated,
-                    primary_owners=p_owners,
-                    secondary_owners=s_owners,
-                    metadata=metadata_tags,
-                )
-            ]
-        except Exception as e:
-            logger.error(f"Failed to process image file {file_name}: {e}")
-            return []
-
-    # 2) Otherwise: text-based approach. Possibly with embedded images.
+    # 2) Otherwise: text-based approach. Possibly with embedded images if enabled.
+    #    (For example .docx with inline images).
    file.seek(0)
+    text_content = ""
+    embedded_images: list[tuple[bytes, str]] = []

-    # Extract text and images from the file
-    extraction_result = extract_text_and_images(
+    text_content, embedded_images = extract_text_and_images(
        file=file,
        file_name=file_name,
        pdf_pass=pdf_pass,
    )

-    # Merge file-specific metadata (from file content) with provided metadata
-    if extraction_result.metadata:
-        logger.debug(
-            f"Found file-specific metadata for {file_name}: {extraction_result.metadata}"
-        )
-        metadata.update(extraction_result.metadata)
-
    # Build sections: first the text as a single Section
-    sections: list[TextSection | ImageSection] = []
+    sections = []
    link_in_meta = metadata.get("link")
-    if extraction_result.text_content.strip():
-        logger.debug(f"Creating TextSection for {file_name} with link: {link_in_meta}")
-        sections.append(
-            TextSection(link=link_in_meta, text=extraction_result.text_content.strip())
-        )
+    if text_content.strip():
+        sections.append(Section(link=link_in_meta, text=text_content.strip()))

    # Then any extracted images from docx, etc.
-    for idx, (img_data, img_name) in enumerate(
-        extraction_result.embedded_images, start=1
-    ):
+    for idx, (img_data, img_name) in enumerate(embedded_images, start=1):
        # Store each embedded image as a separate file in PGFileStore
-        # and create a section with the image reference
-        try:
-            image_section, _ = _create_image_section(
-                image_data=img_data,
-                db_session=db_session,
-                parent_file_name=pg_record.file_name,
-                display_name=f"{title} - image {idx}",
-                idx=idx,
-            )
-            sections.append(image_section)
-        except Exception as e:
-            logger.warning(
-                f"Failed to process embedded image {idx} in {file_name}: {e}"
-            )
-
+        # and create a section with the image summary
+        image_section, _ = _create_image_section(
+            llm,
+            img_data,
+            db_session,
+            pg_record.file_name,
+            f"{title} - image {idx}",
+            idx,
+        )
+        sections.append(image_section)
    return [
        Document(
            id=doc_id,
@@ -279,10 +237,10 @@ def _process_file(
    ]


-class LocalFileConnector(LoadConnector):
+class LocalFileConnector(LoadConnector, VisionEnabledConnector):
    """
    Connector that reads files from Postgres and yields Documents, including
-    embedded image extraction without summarization.
+    optional embedded image extraction.
    """

    def __init__(
@@ -294,6 +252,9 @@ class LocalFileConnector(LoadConnector):
        self.batch_size = batch_size
        self.pdf_pass: str | None = None

+        # Initialize vision LLM using the mixin
+        self.initialize_vision_llm()
+
    def load_credentials(self, credentials: dict[str, Any]) -> dict[str, Any] | None:
        self.pdf_pass = credentials.get("pdf_password")

@@ -325,6 +286,7 @@ class LocalFileConnector(LoadConnector):
                        metadata=metadata,
                        pdf_pass=self.pdf_pass,
                        db_session=db_session,
+                        llm=self.image_analysis_llm,
                    )
                    documents.extend(new_docs)

--- a/backend/onyx/connectors/fireflies/connector.py
+++ b/backend/onyx/connectors/fireflies/connector.py
@@ -1,7 +1,6 @@
 from collections.abc import Iterator
 from datetime import datetime
 from datetime import timezone
-from typing import cast
 from typing import List

 import requests
@@ -15,8 +14,7 @@ from onyx.connectors.interfaces import SecondsSinceUnixEpoch
 from onyx.connectors.models import BasicExpertInfo
 from onyx.connectors.models import ConnectorMissingCredentialError
 from onyx.connectors.models import Document
-from onyx.connectors.models import ImageSection
-from onyx.connectors.models import TextSection
+from onyx.connectors.models import Section
 from onyx.utils.logger import setup_logger

 logger = setup_logger()
@@ -47,7 +45,7 @@ _FIREFLIES_API_QUERY = """


 def _create_doc_from_transcript(transcript: dict) -> Document | None:
-    sections: List[TextSection] = []
+    sections: List[Section] = []
    current_speaker_name = None
    current_link = ""
    current_text = ""
@@ -59,7 +57,7 @@ def _create_doc_from_transcript(transcript: dict) -> Document | None:
        if sentence["speaker_name"] != current_speaker_name:
            if current_speaker_name is not None:
                sections.append(
-                    TextSection(
+                    Section(
                        link=current_link,
                        text=current_text.strip(),
                    )
@@ -73,7 +71,7 @@ def _create_doc_from_transcript(transcript: dict) -> Document | None:

    # Sometimes these links (links with a timestamp) do not work, it is a bug with Fireflies.
    sections.append(
-        TextSection(
+        Section(
            link=current_link,
            text=current_text.strip(),
        )
@@ -96,7 +94,7 @@ def _create_doc_from_transcript(transcript: dict) -> Document | None:

    return Document(
        id=fireflies_id,
-        sections=cast(list[TextSection | ImageSection], sections),
+        sections=sections,
        source=DocumentSource.FIREFLIES,
        semantic_identifier=meeting_title,
        metadata={},
--- a/backend/onyx/connectors/freshdesk/connector.py
+++ b/backend/onyx/connectors/freshdesk/connector.py
@@ -14,7 +14,7 @@ from onyx.connectors.interfaces import PollConnector
 from onyx.connectors.interfaces import SecondsSinceUnixEpoch
 from onyx.connectors.models import ConnectorMissingCredentialError
 from onyx.connectors.models import Document
-from onyx.connectors.models import TextSection
+from onyx.connectors.models import Section
 from onyx.file_processing.html_utils import parse_html_page_basic
 from onyx.utils.logger import setup_logger

@@ -133,7 +133,7 @@ def _create_doc_from_ticket(ticket: dict, domain: str) -> Document:
    return Document(
        id=_FRESHDESK_ID_PREFIX + link,
        sections=[
-            TextSection(
+            Section(
                link=link,
                text=text,
            )
--- a/backend/onyx/connectors/gitbook/connector.py
+++ b/backend/onyx/connectors/gitbook/connector.py
@@ -13,7 +13,7 @@ from onyx.connectors.interfaces import PollConnector
 from onyx.connectors.interfaces import SecondsSinceUnixEpoch
 from onyx.connectors.models import ConnectorMissingCredentialError
 from onyx.connectors.models import Document
-from onyx.connectors.models import TextSection
+from onyx.connectors.models import Section
 from onyx.utils.logger import setup_logger


@@ -183,7 +183,7 @@ def _convert_page_to_document(
    return Document(
        id=f"gitbook-{space_id}-{page_id}",
        sections=[
-            TextSection(
+            Section(
                link=page.get("urls", {}).get("app", ""),
                text=_extract_text_from_document(page_content),
            )
@@ -228,15 +228,10 @@ class GitbookConnector(LoadConnector, PollConnector):
            raise ConnectorMissingCredentialError("GitBook")

        try:
-            content = self.client.get(f"/spaces/{self.space_id}/content/pages")
+            content = self.client.get(f"/spaces/{self.space_id}/content")
            pages: list[dict[str, Any]] = content.get("pages", [])
            current_batch: list[Document] = []

-            logger.info(f"Found {len(pages)} root pages.")
-            logger.info(
-                f"First 20 Page Ids: {[page.get('id', 'Unknown') for page in pages[:20]]}"
-            )
-
            while pages:
                page = pages.pop(0)

--- a/backend/onyx/connectors/github/connector.py
+++ b/backend/onyx/connectors/github/connector.py
@@ -1,10 +1,8 @@
-import copy
 import time
-from collections.abc import Generator
+from collections.abc import Iterator
 from datetime import datetime
 from datetime import timedelta
 from datetime import timezone
-from enum import Enum
 from typing import Any
 from typing import cast

@@ -15,30 +13,26 @@ from github.GithubException import GithubException
 from github.Issue import Issue
 from github.PaginatedList import PaginatedList
 from github.PullRequest import PullRequest
-from github.Requester import Requester
-from pydantic import BaseModel
-from typing_extensions import override

 from onyx.configs.app_configs import GITHUB_CONNECTOR_BASE_URL
+from onyx.configs.app_configs import INDEX_BATCH_SIZE
 from onyx.configs.constants import DocumentSource
 from onyx.connectors.exceptions import ConnectorValidationError
 from onyx.connectors.exceptions import CredentialExpiredError
 from onyx.connectors.exceptions import InsufficientPermissionsError
 from onyx.connectors.exceptions import UnexpectedValidationError
-from onyx.connectors.interfaces import CheckpointConnector
-from onyx.connectors.interfaces import CheckpointOutput
-from onyx.connectors.interfaces import ConnectorCheckpoint
-from onyx.connectors.interfaces import ConnectorFailure
+from onyx.connectors.interfaces import GenerateDocumentsOutput
+from onyx.connectors.interfaces import LoadConnector
+from onyx.connectors.interfaces import PollConnector
 from onyx.connectors.interfaces import SecondsSinceUnixEpoch
 from onyx.connectors.models import ConnectorMissingCredentialError
 from onyx.connectors.models import Document
-from onyx.connectors.models import DocumentFailure
-from onyx.connectors.models import TextSection
+from onyx.connectors.models import Section
+from onyx.utils.batching import batch_generator
 from onyx.utils.logger import setup_logger

 logger = setup_logger()

-ITEMS_PER_PAGE = 100

 _MAX_NUM_RATE_LIMIT_RETRIES = 5

@@ -54,7 +48,7 @@ def _sleep_after_rate_limit_exception(github_client: Github) -> None:

 def _get_batch_rate_limited(
    git_objs: PaginatedList, page_num: int, github_client: Github, attempt_num: int = 0
-) -> list[PullRequest | Issue]:
+) -> list[Any]:
    if attempt_num > _MAX_NUM_RATE_LIMIT_RETRIES:
        raise RuntimeError(
            "Re-tried fetching batch too many times. Something is going wrong with fetching objects from Github"
@@ -75,20 +69,31 @@ def _get_batch_rate_limited(
        )


+def _batch_github_objects(
+    git_objs: PaginatedList, github_client: Github, batch_size: int
+) -> Iterator[list[Any]]:
+    page_num = 0
+    while True:
+        batch = _get_batch_rate_limited(git_objs, page_num, github_client)
+        page_num += 1
+
+        if not batch:
+            break
+
+        for mini_batch in batch_generator(batch, batch_size=batch_size):
+            yield mini_batch
+
+
 def _convert_pr_to_document(pull_request: PullRequest) -> Document:
    return Document(
        id=pull_request.html_url,
-        sections=[
-            TextSection(link=pull_request.html_url, text=pull_request.body or "")
-        ],
+        sections=[Section(link=pull_request.html_url, text=pull_request.body or "")],
        source=DocumentSource.GITHUB,
        semantic_identifier=pull_request.title,
        # updated_at is UTC time but is timezone unaware, explicitly add UTC
        # as there is logic in indexing to prevent wrong timestamped docs
        # due to local time discrepancies with UTC
-        doc_updated_at=pull_request.updated_at.replace(tzinfo=timezone.utc)
-        if pull_request.updated_at
-        else None,
+        doc_updated_at=pull_request.updated_at.replace(tzinfo=timezone.utc),
        metadata={
            "merged": str(pull_request.merged),
            "state": pull_request.state,
@@ -104,7 +109,7 @@ def _fetch_issue_comments(issue: Issue) -> str:
 def _convert_issue_to_document(issue: Issue) -> Document:
    return Document(
        id=issue.html_url,
-        sections=[TextSection(link=issue.html_url, text=issue.body or "")],
+        sections=[Section(link=issue.html_url, text=issue.body or "")],
        source=DocumentSource.GITHUB,
        semantic_identifier=issue.title,
        # updated_at is UTC time but is timezone unaware
@@ -115,58 +120,31 @@ def _convert_issue_to_document(issue: Issue) -> Document:
    )


-class SerializedRepository(BaseModel):
-    # id is part of the raw_data as well, just pulled out for convenience
-    id: int
-    headers: dict[str, str | int]
-    raw_data: dict[str, Any]
-
-    def to_Repository(self, requester: Requester) -> Repository.Repository:
-        return Repository.Repository(
-            requester, self.headers, self.raw_data, completed=True
-        )
-
-
-class GithubConnectorStage(Enum):
-    START = "start"
-    PRS = "prs"
-    ISSUES = "issues"
-
-
-class GithubConnectorCheckpoint(ConnectorCheckpoint):
-    stage: GithubConnectorStage
-    curr_page: int
-
-    cached_repo_ids: list[int] | None = None
-    cached_repo: SerializedRepository | None = None
-
-
-class GithubConnector(CheckpointConnector[GithubConnectorCheckpoint]):
+class GithubConnector(LoadConnector, PollConnector):
    def __init__(
        self,
        repo_owner: str,
        repositories: str | None = None,
+        batch_size: int = INDEX_BATCH_SIZE,
        state_filter: str = "all",
        include_prs: bool = True,
        include_issues: bool = False,
    ) -> None:
        self.repo_owner = repo_owner
        self.repositories = repositories
+        self.batch_size = batch_size
        self.state_filter = state_filter
        self.include_prs = include_prs
        self.include_issues = include_issues
        self.github_client: Github | None = None

    def load_credentials(self, credentials: dict[str, Any]) -> dict[str, Any] | None:
-        # defaults to 30 items per page, can be set to as high as 100
        self.github_client = (
            Github(
-                credentials["github_access_token"],
-                base_url=GITHUB_CONNECTOR_BASE_URL,
-                per_page=ITEMS_PER_PAGE,
+                credentials["github_access_token"], base_url=GITHUB_CONNECTOR_BASE_URL
            )
            if GITHUB_CONNECTOR_BASE_URL
-            else Github(credentials["github_access_token"], per_page=ITEMS_PER_PAGE)
+            else Github(credentials["github_access_token"])
        )
        return None

@@ -237,193 +215,85 @@ class GithubConnector(CheckpointConnector[GithubConnectorCheckpoint]):
            return self._get_all_repos(github_client, attempt_num + 1)

    def _fetch_from_github(
-        self,
-        checkpoint: GithubConnectorCheckpoint,
-        start: datetime | None = None,
-        end: datetime | None = None,
-    ) -> Generator[Document | ConnectorFailure, None, GithubConnectorCheckpoint]:
+        self, start: datetime | None = None, end: datetime | None = None
+    ) -> GenerateDocumentsOutput:
        if self.github_client is None:
            raise ConnectorMissingCredentialError("GitHub")

-        checkpoint = copy.deepcopy(checkpoint)
-
-        # First run of the connector, fetch all repos and store in checkpoint
-        if checkpoint.cached_repo_ids is None:
-            repos = []
-            if self.repositories:
-                if "," in self.repositories:
-                    # Multiple repositories specified
-                    repos = self._get_github_repos(self.github_client)
-                else:
-                    # Single repository (backward compatibility)
-                    repos = [self._get_github_repo(self.github_client)]
+        repos = []
+        if self.repositories:
+            if "," in self.repositories:
+                # Multiple repositories specified
+                repos = self._get_github_repos(self.github_client)
            else:
-                # All repositories
-                repos = self._get_all_repos(self.github_client)
-            if not repos:
-                checkpoint.has_more = False
-                return checkpoint
+                # Single repository (backward compatibility)
+                repos = [self._get_github_repo(self.github_client)]
+        else:
+            # All repositories
+            repos = self._get_all_repos(self.github_client)

-            checkpoint.cached_repo_ids = sorted([repo.id for repo in repos])
-            checkpoint.cached_repo = SerializedRepository(
-                id=checkpoint.cached_repo_ids[0],
-                headers=repos[0].raw_headers,
-                raw_data=repos[0].raw_data,
-            )
-            checkpoint.stage = GithubConnectorStage.PRS
-            checkpoint.curr_page = 0
-            # save checkpoint with repo ids retrieved
-            return checkpoint
+        for repo in repos:
+            if self.include_prs:
+                logger.info(f"Fetching PRs for repo: {repo.name}")
+                pull_requests = repo.get_pulls(
+                    state=self.state_filter, sort="updated", direction="desc"
+                )

-        assert checkpoint.cached_repo is not None, "No repo saved in checkpoint"
-        repo = checkpoint.cached_repo.to_Repository(self.github_client.requester)
-
-        if self.include_prs and checkpoint.stage == GithubConnectorStage.PRS:
-            logger.info(f"Fetching PRs for repo: {repo.name}")
-            pull_requests = repo.get_pulls(
-                state=self.state_filter, sort="updated", direction="desc"
-            )
-
-            doc_batch: list[Document] = []
-            pr_batch = _get_batch_rate_limited(
-                pull_requests, checkpoint.curr_page, self.github_client
-            )
-            checkpoint.curr_page += 1
-            done_with_prs = False
-            for pr in pr_batch:
-                # we iterate backwards in time, so at this point we stop processing prs
-                if (
-                    start is not None
-                    and pr.updated_at
-                    and pr.updated_at.replace(tzinfo=timezone.utc) < start
+                for pr_batch in _batch_github_objects(
+                    pull_requests, self.github_client, self.batch_size
                ):
-                    yield from doc_batch
-                    done_with_prs = True
-                    break
-                # Skip PRs updated after the end date
-                if (
-                    end is not None
-                    and pr.updated_at
-                    and pr.updated_at.replace(tzinfo=timezone.utc) > end
+                    doc_batch: list[Document] = []
+                    for pr in pr_batch:
+                        if start is not None and pr.updated_at < start:
+                            yield doc_batch
+                            break
+                        if end is not None and pr.updated_at > end:
+                            continue
+                        doc_batch.append(_convert_pr_to_document(cast(PullRequest, pr)))
+                    yield doc_batch
+
+            if self.include_issues:
+                logger.info(f"Fetching issues for repo: {repo.name}")
+                issues = repo.get_issues(
+                    state=self.state_filter, sort="updated", direction="desc"
+                )
+
+                for issue_batch in _batch_github_objects(
+                    issues, self.github_client, self.batch_size
                ):
-                    continue
-                try:
-                    doc_batch.append(_convert_pr_to_document(cast(PullRequest, pr)))
-                except Exception as e:
-                    error_msg = f"Error converting PR to document: {e}"
-                    logger.exception(error_msg)
-                    yield ConnectorFailure(
-                        failed_document=DocumentFailure(
-                            document_id=str(pr.id), document_link=pr.html_url
-                        ),
-                        failure_message=error_msg,
-                        exception=e,
-                    )
-                    continue
+                    doc_batch = []
+                    for issue in issue_batch:
+                        issue = cast(Issue, issue)
+                        if start is not None and issue.updated_at < start:
+                            yield doc_batch
+                            break
+                        if end is not None and issue.updated_at > end:
+                            continue
+                        if issue.pull_request is not None:
+                            # PRs are handled separately
+                            continue
+                        doc_batch.append(_convert_issue_to_document(issue))
+                    yield doc_batch

-            # if we found any PRs on the page, yield any associated documents and return the checkpoint
-            if not done_with_prs and len(pr_batch) > 0:
-                yield from doc_batch
-                return checkpoint
+    def load_from_state(self) -> GenerateDocumentsOutput:
+        return self._fetch_from_github()

-            # if we went past the start date during the loop or there are no more
-            # prs to get, we move on to issues
-            checkpoint.stage = GithubConnectorStage.ISSUES
-            checkpoint.curr_page = 0
-
-        checkpoint.stage = GithubConnectorStage.ISSUES
-
-        if self.include_issues and checkpoint.stage == GithubConnectorStage.ISSUES:
-            logger.info(f"Fetching issues for repo: {repo.name}")
-            issues = repo.get_issues(
-                state=self.state_filter, sort="updated", direction="desc"
-            )
-
-            doc_batch = []
-            issue_batch = _get_batch_rate_limited(
-                issues, checkpoint.curr_page, self.github_client
-            )
-            checkpoint.curr_page += 1
-            done_with_issues = False
-            for issue in cast(list[Issue], issue_batch):
-                # we iterate backwards in time, so at this point we stop processing prs
-                if (
-                    start is not None
-                    and issue.updated_at.replace(tzinfo=timezone.utc) < start
-                ):
-                    yield from doc_batch
-                    done_with_issues = True
-                    break
-                # Skip PRs updated after the end date
-                if (
-                    end is not None
-                    and issue.updated_at.replace(tzinfo=timezone.utc) > end
-                ):
-                    continue
-
-                if issue.pull_request is not None:
-                    # PRs are handled separately
-                    continue
-
-                try:
-                    doc_batch.append(_convert_issue_to_document(issue))
-                except Exception as e:
-                    error_msg = f"Error converting issue to document: {e}"
-                    logger.exception(error_msg)
-                    yield ConnectorFailure(
-                        failed_document=DocumentFailure(
-                            document_id=str(issue.id),
-                            document_link=issue.html_url,
-                        ),
-                        failure_message=error_msg,
-                        exception=e,
-                    )
-                    continue
-
-            # if we found any issues on the page, yield them and return the checkpoint
-            if not done_with_issues and len(issue_batch) > 0:
-                yield from doc_batch
-                return checkpoint
-
-            # if we went past the start date during the loop or there are no more
-            # issues to get, we move on to the next repo
-            checkpoint.stage = GithubConnectorStage.PRS
-            checkpoint.curr_page = 0
-
-        checkpoint.has_more = len(checkpoint.cached_repo_ids) > 1
-        if checkpoint.cached_repo_ids:
-            next_id = checkpoint.cached_repo_ids.pop()
-            next_repo = self.github_client.get_repo(next_id)
-            checkpoint.cached_repo = SerializedRepository(
-                id=next_id,
-                headers=next_repo.raw_headers,
-                raw_data=next_repo.raw_data,
-            )
-
-        return checkpoint
-
-    @override
-    def load_from_checkpoint(
-        self,
-        start: SecondsSinceUnixEpoch,
-        end: SecondsSinceUnixEpoch,
-        checkpoint: GithubConnectorCheckpoint,
-    ) -> CheckpointOutput[GithubConnectorCheckpoint]:
-        start_datetime = datetime.fromtimestamp(start, tz=timezone.utc)
-        end_datetime = datetime.fromtimestamp(end, tz=timezone.utc)
+    def poll_source(
+        self, start: SecondsSinceUnixEpoch, end: SecondsSinceUnixEpoch
+    ) -> GenerateDocumentsOutput:
+        start_datetime = datetime.utcfromtimestamp(start)
+        end_datetime = datetime.utcfromtimestamp(end)

        # Move start time back by 3 hours, since some Issues/PRs are getting dropped
        # Could be due to delayed processing on GitHub side
        # The non-updated issues since last poll will be shortcut-ed and not embedded
        adjusted_start_datetime = start_datetime - timedelta(hours=3)

-        epoch = datetime.fromtimestamp(0, tz=timezone.utc)
+        epoch = datetime.utcfromtimestamp(0)
        if adjusted_start_datetime < epoch:
            adjusted_start_datetime = epoch

-        return self._fetch_from_github(
-            checkpoint, start=adjusted_start_datetime, end=end_datetime
-        )
+        return self._fetch_from_github(adjusted_start_datetime, end_datetime)

    def validate_connector_settings(self) -> None:
        if self.github_client is None:
@@ -525,16 +395,6 @@ class GithubConnector(CheckpointConnector[GithubConnectorCheckpoint]):
                f"Unexpected error during GitHub settings validation: {exc}"
            )

-    def validate_checkpoint_json(
-        self, checkpoint_json: str
-    ) -> GithubConnectorCheckpoint:
-        return GithubConnectorCheckpoint.model_validate_json(checkpoint_json)
-
-    def build_dummy_checkpoint(self) -> GithubConnectorCheckpoint:
-        return GithubConnectorCheckpoint(
-            stage=GithubConnectorStage.PRS, curr_page=0, has_more=True
-        )
-

 if __name__ == "__main__":
    import os
@@ -544,9 +404,7 @@ if __name__ == "__main__":
        repositories=os.environ["REPOSITORIES"],
    )
    connector.load_credentials(
-        {"github_access_token": os.environ["ACCESS_TOKEN_GITHUB"]}
-    )
-    document_batches = connector.load_from_checkpoint(
-        0, time.time(), connector.build_dummy_checkpoint()
+        {"github_access_token": os.environ["GITHUB_ACCESS_TOKEN"]}
    )
+    document_batches = connector.load_from_state()
    print(next(document_batches))
--- a/backend/onyx/connectors/gitlab/connector.py
+++ b/backend/onyx/connectors/gitlab/connector.py
@@ -21,7 +21,7 @@ from onyx.connectors.interfaces import SecondsSinceUnixEpoch
 from onyx.connectors.models import BasicExpertInfo
 from onyx.connectors.models import ConnectorMissingCredentialError
 from onyx.connectors.models import Document
-from onyx.connectors.models import TextSection
+from onyx.connectors.models import Section
 from onyx.utils.logger import setup_logger


@@ -56,7 +56,7 @@ def get_author(author: Any) -> BasicExpertInfo:
 def _convert_merge_request_to_document(mr: Any) -> Document:
    doc = Document(
        id=mr.web_url,
-        sections=[TextSection(link=mr.web_url, text=mr.description or "")],
+        sections=[Section(link=mr.web_url, text=mr.description or "")],
        source=DocumentSource.GITLAB,
        semantic_identifier=mr.title,
        # updated_at is UTC time but is timezone unaware, explicitly add UTC
@@ -72,7 +72,7 @@ def _convert_merge_request_to_document(mr: Any) -> Document:
 def _convert_issue_to_document(issue: Any) -> Document:
    doc = Document(
        id=issue.web_url,
-        sections=[TextSection(link=issue.web_url, text=issue.description or "")],
+        sections=[Section(link=issue.web_url, text=issue.description or "")],
        source=DocumentSource.GITLAB,
        semantic_identifier=issue.title,
        # updated_at is UTC time but is timezone unaware, explicitly add UTC
@@ -99,7 +99,7 @@ def _convert_code_to_document(
    file_url = f"{url}/{projectOwner}/{projectName}/-/blob/master/{file['path']}"  # Construct the file URL
    doc = Document(
        id=file["id"],
-        sections=[TextSection(link=file_url, text=file_content)],
+        sections=[Section(link=file_url, text=file_content)],
        source=DocumentSource.GITLAB,
        semantic_identifier=file["name"],
        doc_updated_at=datetime.now().replace(
--- a/backend/onyx/connectors/gmail/connector.py
+++ b/backend/onyx/connectors/gmail/connector.py
@@ -1,6 +1,5 @@
 from base64 import urlsafe_b64decode
 from typing import Any
-from typing import cast
 from typing import Dict

 from google.oauth2.credentials import Credentials as OAuthCredentials  # type: ignore
@@ -29,9 +28,8 @@ from onyx.connectors.interfaces import SecondsSinceUnixEpoch
 from onyx.connectors.interfaces import SlimConnector
 from onyx.connectors.models import BasicExpertInfo
 from onyx.connectors.models import Document
-from onyx.connectors.models import ImageSection
+from onyx.connectors.models import Section
 from onyx.connectors.models import SlimDocument
-from onyx.connectors.models import TextSection
 from onyx.indexing.indexing_heartbeat import IndexingHeartbeatInterface
 from onyx.utils.logger import setup_logger
 from onyx.utils.retry_wrapper import retry_builder
@@ -117,7 +115,7 @@ def _get_message_body(payload: dict[str, Any]) -> str:
    return message_body


-def message_to_section(message: Dict[str, Any]) -> tuple[TextSection, dict[str, str]]:
+def message_to_section(message: Dict[str, Any]) -> tuple[Section, dict[str, str]]:
    link = f"https://mail.google.com/mail/u/0/#inbox/{message['id']}"

    payload = message.get("payload", {})
@@ -144,7 +142,7 @@ def message_to_section(message: Dict[str, Any]) -> tuple[TextSection, dict[str,

    message_body_text: str = _get_message_body(payload)

-    return TextSection(link=link, text=message_body_text + message_data), metadata
+    return Section(link=link, text=message_body_text + message_data), metadata


 def thread_to_document(full_thread: Dict[str, Any]) -> Document | None:
@@ -194,7 +192,7 @@ def thread_to_document(full_thread: Dict[str, Any]) -> Document | None:
    return Document(
        id=id,
        semantic_identifier=semantic_identifier,
-        sections=cast(list[TextSection | ImageSection], sections),
+        sections=sections,
        source=DocumentSource.GMAIL,
        # This is used to perform permission sync
        primary_owners=primary_owners,
--- a/backend/onyx/connectors/gong/connector.py
+++ b/backend/onyx/connectors/gong/connector.py
@@ -18,7 +18,7 @@ from onyx.connectors.interfaces import PollConnector
 from onyx.connectors.interfaces import SecondsSinceUnixEpoch
 from onyx.connectors.models import ConnectorMissingCredentialError
 from onyx.connectors.models import Document
-from onyx.connectors.models import TextSection
+from onyx.connectors.models import Section
 from onyx.utils.logger import setup_logger


@@ -243,7 +243,7 @@ class GongConnector(LoadConnector, PollConnector):
                    Document(
                        id=call_id,
                        sections=[
-                            TextSection(link=call_metadata["url"], text=transcript_text)
+                            Section(link=call_metadata["url"], text=transcript_text)
                        ],
                        source=DocumentSource.GONG,
                        # Should not ever be Untitled as a call cannot be made without a Title
--- a/Show More
+++ b/Show More
Author	SHA1	Message	Date
Weves	9b169350a9	Switch to monotonic	2025-03-07 19:04:47 -08:00
Weves	c1dbb073d0	Small tweaks	2025-03-07 15:53:22 -08:00
Weves	39bfc6ae16	Add basic memory logging	2025-03-07 15:46:14 -08:00