.

Some fixes
.
2026-03-12 03:02:43 +00:00 · 2026-03-11 17:52:55 -07:00 · 2026-03-11 17:44:49 -07:00 · 2026-03-11 17:32:30 -07:00 · 2026-03-11 17:19:09 -07:00
14 changed files with 611 additions and 945 deletions
--- a/backend/ee/onyx/external_permissions/jira/group_sync.py
+++ b/backend/ee/onyx/external_permissions/jira/group_sync.py
@@ -1,8 +1,6 @@
 from collections.abc import Generator
-from typing import Any

 from jira import JIRA
-from jira.exceptions import JIRAError

 from ee.onyx.db.external_perm import ExternalUserGroup
 from onyx.connectors.jira.utils import build_jira_client
@@ -11,102 +9,107 @@ from onyx.utils.logger import setup_logger

 logger = setup_logger()

-_ATLASSIAN_ACCOUNT_TYPE = "atlassian"
-_GROUP_MEMBER_PAGE_SIZE = 50

-# The GET /group/member endpoint was introduced in Jira 6.0.
-# Jira versions older than 6.0 do not have group management REST APIs at all.
-_MIN_JIRA_VERSION_FOR_GROUP_MEMBER = "6.0"
-
-
-def _fetch_group_member_page(
+def _get_jira_group_members_email(
    jira_client: JIRA,
    group_name: str,
-    start_at: int,
-) -> dict[str, Any]:
-    """Fetch a single page from the non-deprecated GET /group/member endpoint.
+) -> list[str]:
+    """Get all member emails for a Jira group.

-    The old GET /group endpoint (used by jira_client.group_members()) is deprecated
-    and decommissioned in Jira Server 10.3+. This uses the replacement endpoint
-    directly via the library's internal _get_json helper, following the same pattern
-    as enhanced_search_ids / bulk_fetch_issues in connector.py.
-
-    There is an open PR to the library to switch to this endpoint since last year:
-    https://github.com/pycontribs/jira/pull/2356
-    so once it is merged and released, we can switch to using the library function.
+    Filters out app accounts (bots, integrations) and only returns real user emails.
    """
+    emails: list[str] = []
+
    try:
-        return jira_client._get_json(
-            "group/member",
-            params={
-                "groupname": group_name,
-                "includeInactiveUsers": "false",
-                "startAt": start_at,
-                "maxResults": _GROUP_MEMBER_PAGE_SIZE,
-            },
-        )
-    except JIRAError as e:
-        if e.status_code == 404:
-            raise RuntimeError(
-                f"GET /group/member returned 404 for group '{group_name}'. "
-                f"This endpoint requires Jira {_MIN_JIRA_VERSION_FOR_GROUP_MEMBER}+. "
-                f"If you are running a self-hosted Jira instance, please upgrade "
-                f"to at least Jira {_MIN_JIRA_VERSION_FOR_GROUP_MEMBER}."
-            ) from e
-        raise
+        # group_members returns an OrderedDict of account_id -> member_info
+        members = jira_client.group_members(group=group_name)

+        if not members:
+            logger.warning(f"No members found for group {group_name}")
+            return emails

-def _get_group_member_emails(
-    jira_client: JIRA,
-    group_name: str,
-) -> set[str]:
-    """Get all member emails for a single Jira group.
+        for account_id, member_info in members.items():
+            # member_info is a dict with keys like 'fullname', 'email', 'active'
+            email = member_info.get("email")

-    Uses the non-deprecated GET /group/member endpoint which returns full user
-    objects including accountType, so we can filter out app/customer accounts
-    without making separate user() calls.
-    """
-    emails: set[str] = set()
-    start_at = 0
-
-    while True:
-        try:
-            page = _fetch_group_member_page(jira_client, group_name, start_at)
-        except Exception as e:
-            logger.error(f"Error fetching members for group {group_name}: {e}")
-            raise
-
-        members: list[dict[str, Any]] = page.get("values", [])
-        for member in members:
-            account_type = member.get("accountType")
-            # On Jira DC < 9.0, accountType is absent; include those users.
-            # On Cloud / DC 9.0+, filter to real user accounts only.
-            if account_type is not None and account_type != _ATLASSIAN_ACCOUNT_TYPE:
-                continue
-
-            email = member.get("emailAddress")
-            if email:
-                emails.add(email)
+            # Skip "hidden" emails - these are typically app accounts
+            if email and email != "hidden":
+                emails.append(email)
            else:
-                logger.warning(
-                    f"Atlassian user {member.get('accountId', 'unknown')} "
-                    f"in group {group_name} has no visible email address"
-                )
+                # For cloud, we might need to fetch user details separately
+                try:
+                    user = jira_client.user(id=account_id)

-        if page.get("isLast", True) or not members:
-            break
-        start_at += len(members)
+                    # Skip app accounts (bots, integrations, etc.)
+                    if hasattr(user, "accountType") and user.accountType == "app":
+                        logger.info(
+                            f"Skipping app account {account_id} for group {group_name}"
+                        )
+                        continue
+
+                    if hasattr(user, "emailAddress") and user.emailAddress:
+                        emails.append(user.emailAddress)
+                    else:
+                        logger.warning(f"User {account_id} has no email address")
+                except Exception as e:
+                    logger.warning(
+                        f"Could not fetch email for user {account_id} in group {group_name}: {e}"
+                    )
+
+    except Exception as e:
+        logger.error(f"Error fetching members for group {group_name}: {e}")

    return emails


+def _build_group_member_email_map(
+    jira_client: JIRA,
+) -> dict[str, set[str]]:
+    """Build a map of group names to member emails."""
+    group_member_emails: dict[str, set[str]] = {}
+
+    try:
+        # Get all groups from Jira - returns a list of group name strings
+        group_names = jira_client.groups()
+
+        if not group_names:
+            logger.warning("No groups found in Jira")
+            return group_member_emails
+
+        logger.info(f"Found {len(group_names)} groups in Jira")
+
+        for group_name in group_names:
+            if not group_name:
+                continue
+
+            member_emails = _get_jira_group_members_email(
+                jira_client=jira_client,
+                group_name=group_name,
+            )
+
+            if member_emails:
+                group_member_emails[group_name] = set(member_emails)
+                logger.debug(
+                    f"Found {len(member_emails)} members for group {group_name}"
+                )
+            else:
+                logger.debug(f"No members found for group {group_name}")
+
+    except Exception as e:
+        logger.error(f"Error building group member email map: {e}")
+
+    return group_member_emails
+
+
 def jira_group_sync(
    tenant_id: str,  # noqa: ARG001
    cc_pair: ConnectorCredentialPair,
 ) -> Generator[ExternalUserGroup, None, None]:
-    """Sync Jira groups and their members, yielding one group at a time.
+    """
+    Sync Jira groups and their members.

-    Streams group-by-group rather than accumulating all groups in memory.
+    This function fetches all groups from Jira and yields ExternalUserGroup
+    objects containing the group ID and member emails.
    """
    jira_base_url = cc_pair.connector.connector_specific_config.get("jira_base_url", "")
    scoped_token = cc_pair.connector.connector_specific_config.get(
@@ -127,26 +130,12 @@ def jira_group_sync(
        scoped_token=scoped_token,
    )

-    group_names = jira_client.groups()
-    if not group_names:
-        raise ValueError(f"No groups found for cc_pair_id={cc_pair.id}")
+    group_member_email_map = _build_group_member_email_map(jira_client=jira_client)
+    if not group_member_email_map:
+        raise ValueError(f"No groups with members found for cc_pair_id={cc_pair.id}")

-    logger.info(f"Found {len(group_names)} groups in Jira")
-
-    for group_name in group_names:
-        if not group_name:
-            continue
-
-        member_emails = _get_group_member_emails(
-            jira_client=jira_client,
-            group_name=group_name,
-        )
-        if not member_emails:
-            logger.debug(f"No members found for group {group_name}")
-            continue
-
-        logger.debug(f"Found {len(member_emails)} members for group {group_name}")
+    for group_id, group_member_emails in group_member_email_map.items():
        yield ExternalUserGroup(
-            id=group_name,
-            user_emails=list(member_emails),
+            id=group_id,
+            user_emails=list(group_member_emails),
        )
--- a/backend/onyx/background/celery/tasks/opensearch_migration/constants.py
+++ b/backend/onyx/background/celery/tasks/opensearch_migration/constants.py
@@ -11,9 +11,6 @@
 # lock after its cleanup which happens at most after its soft timeout.

 # Constants corresponding to migrate_documents_from_vespa_to_opensearch_task.
-from onyx.configs.app_configs import OPENSEARCH_MIGRATION_GET_VESPA_CHUNKS_PAGE_SIZE
-
-
 MIGRATION_TASK_SOFT_TIME_LIMIT_S = 60 * 5  # 5 minutes.
 MIGRATION_TASK_TIME_LIMIT_S = 60 * 6  # 6 minutes.
 # The maximum time the lock can be held for. Will automatically be released
@@ -47,7 +44,7 @@ TOTAL_ALLOWABLE_DOC_MIGRATION_ATTEMPTS_BEFORE_PERMANENT_FAILURE = 15

 # WARNING: Do not change these values without knowing what changes also need to
 # be made to OpenSearchTenantMigrationRecord.
-GET_VESPA_CHUNKS_PAGE_SIZE = OPENSEARCH_MIGRATION_GET_VESPA_CHUNKS_PAGE_SIZE
+GET_VESPA_CHUNKS_PAGE_SIZE = 500
 GET_VESPA_CHUNKS_SLICE_COUNT = 4

 # String used to indicate in the vespa_visit_continuation_token mapping that the
--- a/backend/onyx/configs/app_configs.py
+++ b/backend/onyx/configs/app_configs.py
@@ -311,12 +311,6 @@ VERIFY_CREATE_OPENSEARCH_INDEX_ON_INIT_MT = (
    os.environ.get("VERIFY_CREATE_OPENSEARCH_INDEX_ON_INIT_MT", "true").lower()
    == "true"
 )
-OPENSEARCH_MIGRATION_GET_VESPA_CHUNKS_PAGE_SIZE = int(
-    os.environ.get("OPENSEARCH_MIGRATION_GET_VESPA_CHUNKS_PAGE_SIZE") or 500
-)
-OPENSEARCH_OVERRIDE_DEFAULT_NUM_HYBRID_SEARCH_CANDIDATES = int(
-    os.environ.get("OPENSEARCH_DEFAULT_NUM_HYBRID_SEARCH_CANDIDATES") or 0
-)

 VESPA_HOST = os.environ.get("VESPA_HOST") or "localhost"
 # NOTE: this is used if and only if the vespa config server is accessible via a
--- a/backend/onyx/document_index/opensearch/constants.py
+++ b/backend/onyx/document_index/opensearch/constants.py
@@ -1,10 +1,5 @@
 # Default value for the maximum number of tokens a chunk can hold, if none is
 # specified when creating an index.
-from onyx.configs.app_configs import (
-    OPENSEARCH_OVERRIDE_DEFAULT_NUM_HYBRID_SEARCH_CANDIDATES,
-)
-
-
 DEFAULT_MAX_CHUNK_SIZE = 512

 # Size of the dynamic list used to consider elements during kNN graph creation.
@@ -15,43 +10,27 @@ EF_CONSTRUCTION = 256
 # quality but increase memory footprint. Values typically range between 12 - 48.
 M = 32  # Set relatively high for better accuracy.

-# When performing hybrid search, we need to consider more candidates than the
-# number of results to be returned. This is because the scoring is hybrid and
-# the results are reordered due to the hybrid scoring. Higher = more candidates
-# for hybrid fusion = better retrieval accuracy, but results in more computation
-# per query. Imagine a simple case with a single keyword query and a single
-# vector query and we want 10 final docs. If we only fetch 10 candidates from
-# each of keyword and vector, they would have to have perfect overlap to get a
-# good hybrid ranking for the 10 results. If we fetch 1000 candidates from each,
-# we have a much higher chance of all 10 of the final desired docs showing up
-# and getting scored. In worse situations, the final 10 docs don't even show up
-# as the final 10 (worse than just a miss at the reranking step).
-DEFAULT_NUM_HYBRID_SEARCH_CANDIDATES = (
-    OPENSEARCH_OVERRIDE_DEFAULT_NUM_HYBRID_SEARCH_CANDIDATES
-    if OPENSEARCH_OVERRIDE_DEFAULT_NUM_HYBRID_SEARCH_CANDIDATES > 0
-    else 750
-)
+# When performing hybrid search, we need to consider more candidates than the number of results to be returned.
+# This is because the scoring is hybrid and the results are reordered due to the hybrid scoring.
+# Higher = more candidates for hybrid fusion = better retrieval accuracy, but results in more computation per query.
+# Imagine a simple case with a single keyword query and a single vector query and we want 10 final docs.
+# If we only fetch 10 candidates from each of keyword and vector, they would have to have perfect overlap to get a good hybrid
+# ranking for the 10 results. If we fetch 1000 candidates from each, we have a much higher chance of all 10 of the final desired
+# docs showing up and getting scored. In worse situations, the final 10 docs don't even show up as the final 10 (worse than just
+# a miss at the reranking step).
+DEFAULT_NUM_HYBRID_SEARCH_CANDIDATES = 750

-# Number of vectors to examine to decide the top k neighbors for the HNSW
-# method.
-# NOTE: "When creating a search query, you must specify k. If you provide both k
-# and ef_search, then the larger value is passed to the engine. If ef_search is
-# larger than k, you can provide the size parameter to limit the final number of
-# results to k." from
-# https://docs.opensearch.org/latest/query-dsl/specialized/k-nn/index/#ef_search
+# Number of vectors to examine for top k neighbors for the HNSW method.
 EF_SEARCH = DEFAULT_NUM_HYBRID_SEARCH_CANDIDATES

-# Since the titles are included in the contents, the embedding matches are
-# heavily downweighted as they act as a boost rather than an independent scoring
-# component.
+# Since the titles are included in the contents, they are heavily downweighted as they act as a boost
+# rather than an independent scoring component.
 SEARCH_TITLE_VECTOR_WEIGHT = 0.1
 SEARCH_CONTENT_VECTOR_WEIGHT = 0.45
-# Single keyword weight for both title and content (merged from former title
-# keyword + content keyword).
+# Single keyword weight for both title and content (merged from former title keyword + content keyword).
 SEARCH_KEYWORD_WEIGHT = 0.45

-# NOTE: It is critical that the order of these weights matches the order of the
-# sub-queries in the hybrid search.
+# NOTE: it is critical that the order of these weights matches the order of the sub-queries in the hybrid search.
 HYBRID_SEARCH_NORMALIZATION_WEIGHTS = [
    SEARCH_TITLE_VECTOR_WEIGHT,
    SEARCH_CONTENT_VECTOR_WEIGHT,
--- a/backend/onyx/document_index/opensearch/search.py
+++ b/backend/onyx/document_index/opensearch/search.py
@@ -285,16 +285,13 @@ class DocumentQuery:
        hybrid_search_query: dict[str, Any] = {
            "hybrid": {
                "queries": hybrid_search_subqueries,
-                # Max results per subquery per shard before aggregation. Ensures
-                # keyword and vector subqueries contribute equally to the
-                # candidate pool for hybrid fusion.
+                # Max results per subquery per shard before aggregation. Ensures keyword and vector
+                # subqueries contribute equally to the candidate pool for hybrid fusion.
                # Sources:
                # https://docs.opensearch.org/latest/vector-search/ai-search/hybrid-search/pagination/
                # https://opensearch.org/blog/navigating-pagination-in-hybrid-queries-with-the-pagination_depth-parameter/
                "pagination_depth": DEFAULT_NUM_HYBRID_SEARCH_CANDIDATES,
-                # Applied to all the sub-queries independently (this avoids
-                # subqueries having a lot of results thrown out during
-                # aggregation).
+                # Applied to all the sub-queries independently (this avoids having subqueries having a lot of results thrown out).
                # Sources:
                # https://docs.opensearch.org/latest/query-dsl/compound/hybrid/
                # https://opensearch.org/blog/introducing-common-filter-support-for-hybrid-search-queries
@@ -377,10 +374,9 @@ class DocumentQuery:
    def _get_hybrid_search_subqueries(
        query_text: str,
        query_vector: list[float],
-        # The default number of neighbors to consider for knn vector similarity
-        # search. This is higher than the number of results because the scoring
-        # is hybrid. For a detailed breakdown, see where the default value is
-        # set.
+        # The default number of neighbors to consider for knn vector similarity search.
+        # This is higher than the number of results because the scoring is hybrid.
+        # for a detailed breakdown, see where the default value is set.
        vector_candidates: int = DEFAULT_NUM_HYBRID_SEARCH_CANDIDATES,
    ) -> list[dict[str, Any]]:
        """Returns subqueries for hybrid search.
@@ -404,27 +400,20 @@ class DocumentQuery:
        in a single hybrid query. Source:
        https://docs.opensearch.org/latest/query-dsl/compound/hybrid/

-        NOTE: Each query is independent during the search phase, there is no
-        backfilling of scores for missing query components. What this means is
-        that if a document was a good vector match but did not show up for
-        keyword, it gets a score of 0 for the keyword component of the hybrid
-        scoring. This is not as bad as just disregarding a score though as there
-        is normalization applied after. So really it is "increasing" the missing
-        score compared to if it was included and the range was renormalized.
-        This does however mean that between docs that have high scores for say
-        the vector field, the keyword scores between them are completely ignored
-        unless they also showed up in the keyword query as a reasonably high
-        match. TLDR, this is a bit of unique funky behavior but it seems ok.
+        NOTE: Each query is independent during the search phase, there is no backfilling of scores for missing query components.
+        What this means is that if a document was a good vector match but did not show up for keyword, it gets a score of 0 for
+        the keyword component of the hybrid scoring. This is not as bad as just disregarding a score though as there is
+        normalization applied after. So really it is "increasing" the missing score compared to if it was included and the range
+        was renormalized. This does however mean that between docs that have high scores for say the vector field, the keyword
+        scores between them are completely ignored unless they also showed up in the keyword query as a reasonably high match.
+        TLDR, this is a bit of unique funky behavior but it seems ok.

        NOTE: Options considered and rejected:
-        - minimum_should_match: Since it's hybrid search and users often provide
-          semantic queries, there is often a lot of terms, and very low number
-          of meaningful keywords (and a low ratio of keywords).
-        - fuzziness AUTO: Typo tolerance (0/1/2 edit distance by term length).
-          It's mostly for typos as the analyzer ("english" by default) already
-          does some stemming and tokenization. In testing datasets, this makes
-          recall slightly worse. It also is less performant so not really any
-          reason to do it.
+        - minimum_should_match: Since it's hybrid search and users often provide semantic queries, there is often a lot of terms,
+          and very low number of meaningful keywords (and a low ratio of keywords).
+        - fuzziness AUTO: typo tolerance (0/1/2 edit distance by term length). It's mostly for typos as the analyzer ("english by
+          default") already does some stemming and tokenization. In testing datasets, this makes recall slightly worse. It also is
+          less performant so not really any reason to do it.

        Args:
            query_text: The text of the query to search for.
--- a/backend/onyx/file_processing/extract_file_text.py
+++ b/backend/onyx/file_processing/extract_file_text.py
@@ -1,3 +1,4 @@
+import csv
 import gc
 import io
 import json
@@ -19,6 +20,7 @@ from zipfile import BadZipFile

 import chardet
 import openpyxl
+from openpyxl.worksheet.worksheet import Worksheet
 from PIL import Image

 from onyx.configs.constants import ONYX_METADATA_FILENAME
@@ -352,6 +354,65 @@ def pptx_to_text(file: IO[Any], file_name: str = "") -> str:
    return presentation.markdown


+def _worksheet_to_matrix(
+    worksheet: Worksheet,
+) -> list[list[str]]:
+    """
+    Converts a singular worksheet to a matrix of values
+    """
+    rows: list[list[str]] = []
+    for worksheet_row in worksheet.iter_rows(min_row=1, values_only=True):
+        row = ["" if cell is None else str(cell) for cell in worksheet_row]
+        rows.append(row)
+
+    return rows
+
+
+def _clean_worksheet_matrix(matrix: list[list[str]]) -> list[list[str]]:
+    """
+    Cleans a worksheet matrix by removing rows if there are N consecutive empty
+    rows and removing cols if there are M consecutive empty columns
+    """
+    MAX_EMPTY_ROWS = 2  # Runs longer than this are capped to max_empty; shorter runs are preserved as-is
+    MAX_EMPTY_COLS = 2
+
+    # Row cleanup
+    matrix = _remove_empty_runs(matrix, max_empty=MAX_EMPTY_ROWS)
+
+    # Column cleanup (transpose, clean, transpose back)
+    transposed = list(map(list, zip(*matrix))) if matrix else []
+    transposed = _remove_empty_runs(transposed, max_empty=MAX_EMPTY_COLS)
+    matrix = list(map(list, zip(*transposed))) if transposed else []
+
+    return matrix
+
+
+def _remove_empty_runs(
+    rows: list[list[str]],
+    max_empty: int,
+) -> list[list[str]]:
+    """Removes entire runs of empty rows when the run length exceeds max_empty.
+
+    Leading and trailing empty rows are always dropped regardless of run length,
+    since there is no adjacent non-empty row to bound the run.
+    """
+    result: list[list[str]] = []
+    empty_buffer: list[list[str]] = []
+
+    for row in rows:
+        # Check if empty
+        if not any(row):
+            empty_buffer.append(row)
+        else:
+            # Add upto max empty rows onto the result - that's what we allow
+            result.extend(empty_buffer[:max_empty])
+            # Add the new non-empty row
+            result.append(row)
+            empty_buffer = []
+
+    return result
+
+
 def xlsx_to_text(file: IO[Any], file_name: str = "") -> str:
    # TODO: switch back to this approach in a few months when markitdown
    # fixes their handling of excel files
@@ -390,30 +451,15 @@ def xlsx_to_text(file: IO[Any], file_name: str = "") -> str:
                f"Failed to extract text from {file_name or 'xlsx file'}. This happens due to a bug in openpyxl. {e}"
            )
            return ""
-        raise e
+        raise

    text_content = []
    for sheet in workbook.worksheets:
-        rows = []
-        num_empty_consecutive_rows = 0
-        for row in sheet.iter_rows(min_row=1, values_only=True):
-            row_str = ",".join(str(cell or "") for cell in row)
-
-            # Only add the row if there are any values in the cells
-            if len(row_str) >= len(row):
-                rows.append(row_str)
-                num_empty_consecutive_rows = 0
-            else:
-                num_empty_consecutive_rows += 1
-
-            if num_empty_consecutive_rows > 100:
-                # handle massive excel sheets with mostly empty cells
-                logger.warning(
-                    f"Found {num_empty_consecutive_rows} empty rows in {file_name}, skipping rest of file"
-                )
-                break
-        sheet_str = "\n".join(rows)
-        text_content.append(sheet_str)
+        sheet_matrix = _clean_worksheet_matrix(_worksheet_to_matrix(sheet))
+        buf = io.StringIO()
+        writer = csv.writer(buf, lineterminator="\n")
+        writer.writerows(sheet_matrix)
+        text_content.append(buf.getvalue().rstrip("\n"))
    return TEXT_SECTION_SEPARATOR.join(text_content)


--- a/backend/tests/daily/conftest.py
+++ b/backend/tests/daily/conftest.py
@@ -19,7 +19,7 @@ from fastapi.testclient import TestClient
 from onyx.auth.users import current_admin_user
 from onyx.db.engine.sql_engine import get_session
 from onyx.db.models import UserRole
-from onyx.main import get_application
+from onyx.main import fetch_versioned_implementation
 from onyx.utils.logger import setup_logger

 logger = setup_logger()
@@ -51,8 +51,11 @@ def client() -> Generator[TestClient, None, None]:
    # Patch out prometheus metrics setup to avoid "Duplicated timeseries in
    # CollectorRegistry" errors when multiple tests each create a new app
    # (prometheus registers metrics globally and rejects duplicate names).
+    get_app = fetch_versioned_implementation(
+        module="onyx.main", attribute="get_application"
+    )
    with patch("onyx.main.setup_prometheus_metrics"):
-        app: FastAPI = get_application(lifespan_override=test_lifespan)
+        app: FastAPI = get_app(lifespan_override=test_lifespan)

    # Override the database session dependency with a mock
    # (these tests don't actually need DB access)
--- a/backend/tests/external_dependency_unit/opensearch_migration/test_opensearch_migration_tasks.py
+++ b/backend/tests/external_dependency_unit/opensearch_migration/test_opensearch_migration_tasks.py
@@ -17,9 +17,6 @@ from unittest.mock import patch
 import pytest
 from sqlalchemy.orm import Session

-from onyx.background.celery.tasks.opensearch_migration.constants import (
-    GET_VESPA_CHUNKS_SLICE_COUNT,
-)
 from onyx.background.celery.tasks.opensearch_migration.tasks import (
    is_continuation_token_done_for_all_slices,
 )
@@ -323,15 +320,9 @@ def test_embedding_dimension(db_session: Session) -> Generator[int, None, None]:
@pytest.fixture(scope="function")
 def patch_get_vespa_chunks_page_size() -> Generator[int, None, None]:
    test_page_size = 5
-    with (
-        patch(
-            "onyx.background.celery.tasks.opensearch_migration.tasks.GET_VESPA_CHUNKS_PAGE_SIZE",
-            test_page_size,
-        ),
-        patch(
-            "onyx.background.celery.tasks.opensearch_migration.constants.GET_VESPA_CHUNKS_PAGE_SIZE",
-            test_page_size,
-        ),
+    with patch(
+        "onyx.background.celery.tasks.opensearch_migration.tasks.GET_VESPA_CHUNKS_PAGE_SIZE",
+        test_page_size,
    ):
        yield test_page_size  # Test runs here.

@@ -591,175 +582,6 @@ class TestMigrateChunksFromVespaToOpenSearchTask:
                    document_chunks[document.id][opensearch_chunk.chunk_index],
                )

-    def test_chunk_migration_visits_all_chunks_even_when_batch_size_varies(
-        self,
-        db_session: Session,
-        test_documents: list[Document],
-        vespa_document_index: VespaDocumentIndex,
-        opensearch_client: OpenSearchIndexClient,
-        test_embedding_dimension: int,
-        clean_migration_tables: None,  # noqa: ARG002
-        enable_opensearch_indexing_for_onyx: None,  # noqa: ARG002
-    ) -> None:
-        """
-        Tests that chunk migration works correctly even when the batch size
-        changes halfway through a migration.
-
-        Simulates task time running out my mocking the locking behavior.
-        """
-        # Precondition.
-        # Index chunks into Vespa.
-        document_chunks: dict[str, list[dict[str, Any]]] = {
-            document.id: [
-                _create_raw_document_chunk(
-                    document_id=document.id,
-                    chunk_index=i,
-                    content=f"Test content {i} for {document.id}",
-                    embedding=_generate_test_vector(test_embedding_dimension),
-                    now=datetime.now(),
-                    title=f"Test title {document.id}",
-                    title_embedding=_generate_test_vector(test_embedding_dimension),
-                )
-                for i in range(CHUNK_COUNT)
-            ]
-            for document in test_documents
-        }
-        all_chunks: list[dict[str, Any]] = []
-        for chunks in document_chunks.values():
-            all_chunks.extend(chunks)
-        vespa_document_index.index_raw_chunks(all_chunks)
-
-        # Run the initial batch. To simulate partial progress we will mock the
-        # redis lock to return True for the first invocation of .owned() and
-        # False subsequently.
-        # NOTE: The batch size is currently set to 5 in
-        # patch_get_vespa_chunks_page_size.
-        mock_redis_client = Mock()
-        mock_lock = Mock()
-        mock_lock.owned.side_effect = [True, False, False]
-        mock_lock.acquire.return_value = True
-        mock_redis_client.lock.return_value = mock_lock
-        with patch(
-            "onyx.background.celery.tasks.opensearch_migration.tasks.get_redis_client",
-            return_value=mock_redis_client,
-        ):
-            result_1 = migrate_chunks_from_vespa_to_opensearch_task(
-                tenant_id=get_current_tenant_id()
-            )
-
-        assert result_1 is True
-        # Expire the session cache to see the committed changes from the task.
-        db_session.expire_all()
-
-        # Verify partial progress was saved.
-        tenant_record = db_session.query(OpenSearchTenantMigrationRecord).first()
-        assert tenant_record is not None
-        partial_chunks_migrated = tenant_record.total_chunks_migrated
-        assert partial_chunks_migrated > 0
-        # page_size applies per slice, so one iteration can fetch up to
-        # page_size * GET_VESPA_CHUNKS_SLICE_COUNT chunks total.
-        assert partial_chunks_migrated <= 5 * GET_VESPA_CHUNKS_SLICE_COUNT
-        assert tenant_record.vespa_visit_continuation_token is not None
-        # Slices are not necessarily evenly distributed across all document
-        # chunks so we can't test that every token is non-None, but certainly at
-        # least one must be.
-        assert any(json.loads(tenant_record.vespa_visit_continuation_token).values())
-        assert tenant_record.migration_completed_at is None
-        assert tenant_record.approx_chunk_count_in_vespa is not None
-
-        # Under test.
-        # Now patch the batch size to be some other number, like 2.
-        mock_redis_client = Mock()
-        mock_lock = Mock()
-        mock_lock.owned.side_effect = [True, False, False]
-        mock_lock.acquire.return_value = True
-        mock_redis_client.lock.return_value = mock_lock
-        with (
-            patch(
-                "onyx.background.celery.tasks.opensearch_migration.tasks.GET_VESPA_CHUNKS_PAGE_SIZE",
-                2,
-            ),
-            patch(
-                "onyx.background.celery.tasks.opensearch_migration.constants.GET_VESPA_CHUNKS_PAGE_SIZE",
-                2,
-            ),
-            patch(
-                "onyx.background.celery.tasks.opensearch_migration.tasks.get_redis_client",
-                return_value=mock_redis_client,
-            ),
-        ):
-            result_2 = migrate_chunks_from_vespa_to_opensearch_task(
-                tenant_id=get_current_tenant_id()
-            )
-
-        # Postcondition.
-        assert result_2 is True
-        # Expire the session cache to see the committed changes from the task.
-        db_session.expire_all()
-
-        # Verify next partial progress was saved.
-        tenant_record = db_session.query(OpenSearchTenantMigrationRecord).first()
-        assert tenant_record is not None
-        new_partial_chunks_migrated = tenant_record.total_chunks_migrated
-        assert new_partial_chunks_migrated > partial_chunks_migrated
-        # page_size applies per slice, so one iteration can fetch up to
-        # page_size * GET_VESPA_CHUNKS_SLICE_COUNT chunks total.
-        assert new_partial_chunks_migrated <= (5 + 2) * GET_VESPA_CHUNKS_SLICE_COUNT
-        assert tenant_record.vespa_visit_continuation_token is not None
-        # Slices are not necessarily evenly distributed across all document
-        # chunks so we can't test that every token is non-None, but certainly at
-        # least one must be.
-        assert any(json.loads(tenant_record.vespa_visit_continuation_token).values())
-        assert tenant_record.migration_completed_at is None
-        assert tenant_record.approx_chunk_count_in_vespa is not None
-
-        # Under test.
-        # Run the remainder of the migration.
-        with (
-            patch(
-                "onyx.background.celery.tasks.opensearch_migration.tasks.GET_VESPA_CHUNKS_PAGE_SIZE",
-                2,
-            ),
-            patch(
-                "onyx.background.celery.tasks.opensearch_migration.constants.GET_VESPA_CHUNKS_PAGE_SIZE",
-                2,
-            ),
-        ):
-            result_3 = migrate_chunks_from_vespa_to_opensearch_task(
-                tenant_id=get_current_tenant_id()
-            )
-
-        # Postcondition.
-        assert result_3 is True
-        # Expire the session cache to see the committed changes from the task.
-        db_session.expire_all()
-
-        # Verify completion.
-        tenant_record = db_session.query(OpenSearchTenantMigrationRecord).first()
-        assert tenant_record is not None
-        assert tenant_record.total_chunks_migrated > new_partial_chunks_migrated
-        assert tenant_record.total_chunks_migrated == len(all_chunks)
-        # Visit is complete so continuation token should be None.
-        assert tenant_record.vespa_visit_continuation_token is not None
-        assert is_continuation_token_done_for_all_slices(
-            json.loads(tenant_record.vespa_visit_continuation_token)
-        )
-        assert tenant_record.migration_completed_at is not None
-        assert tenant_record.approx_chunk_count_in_vespa == len(all_chunks)
-
-        # Verify chunks were indexed in OpenSearch.
-        for document in test_documents:
-            opensearch_chunks = _get_document_chunks_from_opensearch(
-                opensearch_client, document.id, get_current_tenant_id()
-            )
-            assert len(opensearch_chunks) == CHUNK_COUNT
-            opensearch_chunks.sort(key=lambda x: x.chunk_index)
-            for opensearch_chunk in opensearch_chunks:
-                _assert_chunk_matches_vespa_chunk(
-                    opensearch_chunk,
-                    document_chunks[document.id][opensearch_chunk.chunk_index],
-                )
-
    def test_chunk_migration_empty_vespa(
        self,
        db_session: Session,
--- a/backend/tests/unit/onyx/file_processing/init.py
+++ b/backend/tests/unit/onyx/file_processing/init.py
--- a/backend/tests/unit/onyx/file_processing/test_xlsx_to_text.py
+++ b/backend/tests/unit/onyx/file_processing/test_xlsx_to_text.py
@@ -0,0 +1,196 @@
+import io
+
+import openpyxl
+
+from onyx.file_processing.extract_file_text import xlsx_to_text
+
+
+def _make_xlsx(sheets: dict[str, list[list[str]]]) -> io.BytesIO:
+    """Create an in-memory xlsx file from a dict of sheet_name -> matrix of strings."""
+    wb = openpyxl.Workbook()
+    if wb.active is not None:
+        wb.remove(wb.active)
+    for sheet_name, rows in sheets.items():
+        ws = wb.create_sheet(title=sheet_name)
+        for row in rows:
+            ws.append(row)
+    buf = io.BytesIO()
+    wb.save(buf)
+    buf.seek(0)
+    return buf
+
+
+class TestXlsxToText:
+    def test_single_sheet_basic(self) -> None:
+        xlsx = _make_xlsx(
+            {
+                "Sheet1": [
+                    ["Name", "Age"],
+                    ["Alice", "30"],
+                    ["Bob", "25"],
+                ]
+            }
+        )
+        result = xlsx_to_text(xlsx)
+        lines = [line for line in result.strip().split("\n") if line.strip()]
+        assert len(lines) == 3
+        assert "Name" in lines[0]
+        assert "Age" in lines[0]
+        assert "Alice" in lines[1]
+        assert "30" in lines[1]
+        assert "Bob" in lines[2]
+
+    def test_multiple_sheets_separated(self) -> None:
+        xlsx = _make_xlsx(
+            {
+                "Sheet1": [["a", "b"]],
+                "Sheet2": [["c", "d"]],
+            }
+        )
+        result = xlsx_to_text(xlsx)
+        # TEXT_SECTION_SEPARATOR is "\n\n"
+        assert "\n\n" in result
+        parts = result.split("\n\n")
+        assert any("a" in p for p in parts)
+        assert any("c" in p for p in parts)
+
+    def test_empty_cells(self) -> None:
+        xlsx = _make_xlsx(
+            {
+                "Sheet1": [
+                    ["a", "", "b"],
+                    ["", "c", ""],
+                ]
+            }
+        )
+        result = xlsx_to_text(xlsx)
+        lines = [line for line in result.strip().split("\n") if line.strip()]
+        assert len(lines) == 2
+
+    def test_commas_in_cells_are_quoted(self) -> None:
+        """Cells containing commas should be quoted in CSV output."""
+        xlsx = _make_xlsx(
+            {
+                "Sheet1": [
+                    ["hello, world", "normal"],
+                ]
+            }
+        )
+        result = xlsx_to_text(xlsx)
+        assert '"hello, world"' in result
+
+    def test_empty_workbook(self) -> None:
+        xlsx = _make_xlsx({"Sheet1": []})
+        result = xlsx_to_text(xlsx)
+        assert result.strip() == ""
+
+    def test_long_empty_row_run_capped(self) -> None:
+        """Runs of >2 empty rows should be capped to 2."""
+        xlsx = _make_xlsx(
+            {
+                "Sheet1": [
+                    ["header"],
+                    [""],
+                    [""],
+                    [""],
+                    [""],
+                    ["data"],
+                ]
+            }
+        )
+        result = xlsx_to_text(xlsx)
+        lines = [line for line in result.strip().split("\n") if line.strip()]
+        # 4 empty rows capped to 2, so: header + 2 empty + data = 4 lines
+        assert len(lines) == 4
+        assert "header" in lines[0]
+        assert "data" in lines[-1]
+
+    def test_long_empty_col_run_capped(self) -> None:
+        """Runs of >2 empty columns should be capped to 2."""
+        xlsx = _make_xlsx(
+            {
+                "Sheet1": [
+                    ["a", "", "", "", "b"],
+                    ["c", "", "", "", "d"],
+                ]
+            }
+        )
+        result = xlsx_to_text(xlsx)
+        lines = [line for line in result.strip().split("\n") if line.strip()]
+        assert len(lines) == 2
+        # Each row should have 4 fields (a + 2 empty + b), not 5
+        # csv format: a,,,b (3 commas = 4 fields)
+        first_line = lines[0].strip()
+        # Count commas to verify column reduction
+        assert first_line.count(",") == 3
+
+    def test_short_empty_runs_kept(self) -> None:
+        """Runs of <=2 empty rows/cols should be preserved."""
+        xlsx = _make_xlsx(
+            {
+                "Sheet1": [
+                    ["a", "b"],
+                    ["", ""],
+                    ["", ""],
+                    ["c", "d"],
+                ]
+            }
+        )
+        result = xlsx_to_text(xlsx)
+        lines = [line for line in result.strip().split("\n") if line.strip()]
+        # All 4 rows preserved (2 empty rows <= threshold)
+        assert len(lines) == 4
+
+    def test_bad_zip_file_returns_empty(self) -> None:
+        bad_file = io.BytesIO(b"not a zip file")
+        result = xlsx_to_text(bad_file, file_name="test.xlsx")
+        assert result == ""
+
+    def test_bad_zip_tilde_file_returns_empty(self) -> None:
+        bad_file = io.BytesIO(b"not a zip file")
+        result = xlsx_to_text(bad_file, file_name="~$temp.xlsx")
+        assert result == ""
+
+    def test_large_sparse_sheet(self) -> None:
+        """A sheet with data, a big empty gap, and more data — gap is capped to 2."""
+        rows: list[list[str]] = [["row1_data"]]
+        rows.extend([[""] for _ in range(10)])
+        rows.append(["row2_data"])
+        xlsx = _make_xlsx({"Sheet1": rows})
+        result = xlsx_to_text(xlsx)
+        lines = [line for line in result.strip().split("\n") if line.strip()]
+        # 10 empty rows capped to 2: row1_data + 2 empty + row2_data = 4
+        assert len(lines) == 4
+        assert "row1_data" in lines[0]
+        assert "row2_data" in lines[-1]
+
+    def test_quotes_in_cells(self) -> None:
+        """Cells containing quotes should be properly escaped."""
+        xlsx = _make_xlsx(
+            {
+                "Sheet1": [
+                    ['say "hello"', "normal"],
+                ]
+            }
+        )
+        result = xlsx_to_text(xlsx)
+        # csv.writer escapes quotes by doubling them
+        assert '""hello""' in result
+
+    def test_each_row_is_separate_line(self) -> None:
+        """Each row should produce its own line (regression for writerow vs writerows)."""
+        xlsx = _make_xlsx(
+            {
+                "Sheet1": [
+                    ["r1c1", "r1c2"],
+                    ["r2c1", "r2c2"],
+                    ["r3c1", "r3c2"],
+                ]
+            }
+        )
+        result = xlsx_to_text(xlsx)
+        lines = [line for line in result.strip().split("\n") if line.strip()]
+        assert len(lines) == 3
+        assert "r1c1" in lines[0] and "r1c2" in lines[0]
+        assert "r2c1" in lines[1] and "r2c2" in lines[1]
+        assert "r3c1" in lines[2] and "r3c2" in lines[2]
--- a/deployment/docker_compose/docker-compose.onyx-lite.yml
+++ b/deployment/docker_compose/docker-compose.onyx-lite.yml
@@ -15,9 +15,8 @@
 #                  -f docker-compose.dev.yml up -d --wait
 #
 # This overlay:
-#   - Moves Vespa (index), both model servers, OpenSearch, MinIO,
-#     Redis (cache), and the background worker to profiles so they do
-#     not start by default
+#   - Moves Vespa (index), both model servers, code-interpreter, Redis (cache),
+#     and the background worker to profiles so they do not start by default
 #   - Makes depends_on references to removed services optional
 #   - Sets DISABLE_VECTOR_DB=true on the api_server
 #   - Uses PostgreSQL for caching and auth instead of Redis
@@ -28,8 +27,7 @@
 #   --profile inference         Inference model server
 #   --profile background        Background worker (Celery) — also needs redis
 #   --profile redis             Redis cache
-#   --profile opensearch        OpenSearch
-#   --profile s3-filestore      MinIO (S3-compatible file store)
+#   --profile code-interpreter  Code interpreter
 # =============================================================================

 name: onyx
@@ -40,9 +38,6 @@ services:
      index:
        condition: service_started
        required: false
-      opensearch:
-        condition: service_started
-        required: false
      cache:
        condition: service_started
        required: false
@@ -89,10 +84,4 @@ services:
  inference_model_server:
    profiles: ["inference"]

-  # OpenSearch is not needed in lite mode (no indexing).
-  opensearch:
-    profiles: ["opensearch"]
-
-  # MinIO is not needed in lite mode (Postgres handles file storage).
-  minio:
-    profiles: ["s3-filestore"]
+  code-interpreter: {}
--- a/deployment/docker_compose/install.sh
+++ b/deployment/docker_compose/install.sh
@@ -1,8 +1,8 @@
 #!/bin/bash

-set -euo pipefail
+set -e

-# Expected resource requirements (overridden below if --lite)
+# Expected resource requirements
 EXPECTED_DOCKER_RAM_GB=10
 EXPECTED_DISK_GB=32

@@ -10,11 +10,6 @@ EXPECTED_DISK_GB=32
 SHUTDOWN_MODE=false
 DELETE_DATA_MODE=false
 INCLUDE_CRAFT=false  # Disabled by default, use --include-craft to enable
-LITE_MODE=false       # Disabled by default, use --lite to enable
-USE_LOCAL_FILES=false # Disabled by default, use --local to skip downloading config files
-NO_PROMPT=false
-DRY_RUN=false
-VERBOSE=false

 while [[ $# -gt 0 ]]; do
    case $1 in
@@ -30,26 +25,6 @@ while [[ $# -gt 0 ]]; do
            INCLUDE_CRAFT=true
            shift
            ;;
-        --lite)
-            LITE_MODE=true
-            shift
-            ;;
-        --local)
-            USE_LOCAL_FILES=true
-            shift
-            ;;
-        --no-prompt)
-            NO_PROMPT=true
-            shift
-            ;;
-        --dry-run)
-            DRY_RUN=true
-            shift
-            ;;
-        --verbose)
-            VERBOSE=true
-            shift
-            ;;
        --help|-h)
            echo "Onyx Installation Script"
            echo ""
@@ -57,23 +32,15 @@ while [[ $# -gt 0 ]]; do
            echo ""
            echo "Options:"
            echo "  --include-craft  Enable Onyx Craft (AI-powered web app building)"
-            echo "  --lite           Deploy Onyx Lite (no Vespa, Redis, or model servers)"
-            echo "  --local          Use existing config files instead of downloading from GitHub"
            echo "  --shutdown       Stop (pause) Onyx containers"
            echo "  --delete-data    Remove all Onyx data (containers, volumes, and files)"
-            echo "  --no-prompt      Run non-interactively with defaults (for CI/automation)"
-            echo "  --dry-run        Show what would be done without making changes"
-            echo "  --verbose        Show detailed output for debugging"
            echo "  --help, -h       Show this help message"
            echo ""
            echo "Examples:"
            echo "  $0                    # Install Onyx"
-            echo "  $0 --lite             # Install Onyx Lite (minimal deployment)"
            echo "  $0 --include-craft    # Install Onyx with Craft enabled"
            echo "  $0 --shutdown         # Pause Onyx services"
            echo "  $0 --delete-data      # Completely remove Onyx and all data"
-            echo "  $0 --local            # Re-run using existing config files on disk"
-            echo "  $0 --no-prompt        # Non-interactive install with defaults"
            exit 0
            ;;
        *)
@@ -84,129 +51,8 @@ while [[ $# -gt 0 ]]; do
    esac
 done

-if [[ "$VERBOSE" = true ]]; then
-    set -x
-fi
-
-if [[ "$LITE_MODE" = true ]] && [[ "$INCLUDE_CRAFT" = true ]]; then
-    echo "ERROR: --lite and --include-craft cannot be used together."
-    echo "Craft requires services (Vespa, Redis, background workers) that lite mode disables."
-    exit 1
-fi
-
-# When --lite is passed as a flag, lower resource thresholds early (before the
-# resource check). When lite is chosen interactively, the thresholds are adjusted
-# inside the new-deployment flow, after the resource check has already passed
-# with the standard thresholds — which is the safer direction.
-if [[ "$LITE_MODE" = true ]]; then
-    EXPECTED_DOCKER_RAM_GB=4
-    EXPECTED_DISK_GB=16
-fi
-
 INSTALL_ROOT="${INSTALL_PREFIX:-onyx_data}"

-LITE_COMPOSE_FILE="docker-compose.onyx-lite.yml"
-
-# Build the -f flags for docker compose.
-# Pass "true" as $1 to auto-detect a previously-downloaded lite overlay
-# (used by shutdown/delete-data so users don't need to remember --lite).
-# Without the argument, the lite overlay is only included when --lite was
-# explicitly passed — preventing install/start from silently staying in
-# lite mode just because the file exists on disk from a prior run.
-compose_file_args() {
-    local auto_detect="${1:-false}"
-    local args="-f docker-compose.yml"
-    if [[ "$LITE_MODE" = true ]] || { [[ "$auto_detect" = true ]] && [[ -f "${INSTALL_ROOT}/deployment/${LITE_COMPOSE_FILE}" ]]; }; then
-        args="$args -f ${LITE_COMPOSE_FILE}"
-    fi
-    echo "$args"
-}
-
-# --- Downloader detection (curl with wget fallback) ---
-DOWNLOADER=""
-detect_downloader() {
-    if command -v curl &> /dev/null; then
-        DOWNLOADER="curl"
-        return 0
-    fi
-    if command -v wget &> /dev/null; then
-        DOWNLOADER="wget"
-        return 0
-    fi
-    echo "ERROR: Neither curl nor wget found. Please install one and retry."
-    exit 1
-}
-detect_downloader
-
-download_file() {
-    local url="$1"
-    local output="$2"
-    if [[ "$DOWNLOADER" == "curl" ]]; then
-        curl -fsSL --retry 3 --retry-delay 2 --retry-connrefused -o "$output" "$url"
-    else
-        wget -q --tries=3 --timeout=20 -O "$output" "$url"
-    fi
-}
-
-# Ensures a required file is present. With --local, verifies the file exists on
-# disk. Otherwise, downloads it from the given URL. Returns 0 on success, 1 on
-# failure (caller should handle the exit).
-ensure_file() {
-    local path="$1"
-    local url="$2"
-    local desc="$3"
-
-    if [[ "$USE_LOCAL_FILES" = true ]]; then
-        if [[ -f "$path" ]]; then
-            print_success "Using existing ${desc}"
-            return 0
-        fi
-        print_error "Required file missing: ${desc} (${path})"
-        return 1
-    fi
-
-    print_info "Downloading ${desc}..."
-    if download_file "$url" "$path" 2>/dev/null; then
-        print_success "${desc} downloaded"
-        return 0
-    fi
-    print_error "Failed to download ${desc}"
-    print_info "Please ensure you have internet connection and try again"
-    return 1
-}
-
-# --- Interactive prompt helpers ---
-is_interactive() {
-    [[ "$NO_PROMPT" = false ]] && [[ -t 0 ]]
-}
-
-prompt_or_default() {
-    local prompt_text="$1"
-    local default_value="$2"
-    if is_interactive; then
-        read -p "$prompt_text" -r REPLY
-        if [[ -z "$REPLY" ]]; then
-            REPLY="$default_value"
-        fi
-    else
-        REPLY="$default_value"
-    fi
-}
-
-prompt_yn_or_default() {
-    local prompt_text="$1"
-    local default_value="$2"
-    if is_interactive; then
-        read -p "$prompt_text" -n 1 -r
-        echo ""
-        if [[ -z "$REPLY" ]]; then
-            REPLY="$default_value"
-        fi
-    else
-        REPLY="$default_value"
-    fi
-}
-
 # Colors for output
 RED='\033[0;31m'
 GREEN='\033[0;32m'
@@ -265,7 +111,7 @@ if [ "$SHUTDOWN_MODE" = true ]; then
            fi

            # Stop containers (without removing them)
-            (cd "${INSTALL_ROOT}/deployment" && $COMPOSE_CMD $(compose_file_args true) stop)
+            (cd "${INSTALL_ROOT}/deployment" && $COMPOSE_CMD -f docker-compose.yml stop)
            if [ $? -eq 0 ]; then
                print_success "Onyx containers stopped (paused)"
            else
@@ -294,17 +140,12 @@ if [ "$DELETE_DATA_MODE" = true ]; then
    echo "  • All downloaded files and configurations"
    echo "  • All user data and documents"
    echo ""
-    if is_interactive; then
-        read -p "Are you sure you want to continue? Type 'DELETE' to confirm: " -r
-        echo ""
-        if [ "$REPLY" != "DELETE" ]; then
-            print_info "Operation cancelled."
-            exit 0
-        fi
-    else
-        print_error "Cannot confirm destructive operation in non-interactive mode."
-        print_info "Run interactively or remove the ${INSTALL_ROOT} directory manually."
-        exit 1
+    read -p "Are you sure you want to continue? Type 'DELETE' to confirm: " -r
+    echo ""
+
+    if [ "$REPLY" != "DELETE" ]; then
+        print_info "Operation cancelled."
+        exit 0
    fi

    print_info "Removing Onyx containers and volumes..."
@@ -323,7 +164,7 @@ if [ "$DELETE_DATA_MODE" = true ]; then
            fi

            # Stop and remove containers with volumes
-            (cd "${INSTALL_ROOT}/deployment" && $COMPOSE_CMD $(compose_file_args true) down -v)
+            (cd "${INSTALL_ROOT}/deployment" && $COMPOSE_CMD -f docker-compose.yml down -v)
            if [ $? -eq 0 ]; then
                print_success "Onyx containers and volumes removed"
            else
@@ -345,117 +186,6 @@ if [ "$DELETE_DATA_MODE" = true ]; then
    exit 0
 fi

-# --- Auto-install Docker (Linux only) ---
-# Runs before the banner so a group-based re-exec doesn't repeat it.
-install_docker_linux() {
-    local distro_id=""
-    if [[ -f /etc/os-release ]]; then
-        distro_id="$(. /etc/os-release && echo "${ID:-}")"
-    fi
-
-    case "$distro_id" in
-        amzn)
-            print_info "Detected Amazon Linux — installing Docker via package manager..."
-            if command -v dnf &> /dev/null; then
-                sudo dnf install -y docker
-            else
-                sudo yum install -y docker
-            fi
-            ;;
-        *)
-            print_info "Installing Docker via get.docker.com..."
-            download_file "https://get.docker.com" /tmp/get-docker.sh
-            sudo sh /tmp/get-docker.sh
-            rm -f /tmp/get-docker.sh
-            ;;
-    esac
-
-    sudo systemctl start docker 2>/dev/null || sudo service docker start 2>/dev/null || true
-    sudo systemctl enable docker 2>/dev/null || true
-}
-
-# Detect OS (including WSL)
-IS_WSL=false
-if [[ -n "${WSL_DISTRO_NAME:-}" ]] || grep -qi microsoft /proc/version 2>/dev/null; then
-    IS_WSL=true
-fi
-
-# Dry-run: show plan and exit
-if [[ "$DRY_RUN" = true ]]; then
-    print_info "Dry run mode — showing what would happen:"
-    echo "  • Install root: ${INSTALL_ROOT}"
-    echo "  • Lite mode: ${LITE_MODE}"
-    echo "  • Include Craft: ${INCLUDE_CRAFT}"
-    echo "  • OS type: ${OSTYPE:-unknown} (WSL: ${IS_WSL})"
-    echo "  • Downloader: ${DOWNLOADER}"
-    echo ""
-    print_success "Dry run complete (no changes made)"
-    exit 0
-fi
-
-if ! command -v docker &> /dev/null; then
-    if [[ "$OSTYPE" == "linux-gnu"* ]] || [[ -n "${WSL_DISTRO_NAME:-}" ]]; then
-        install_docker_linux
-        if ! command -v docker &> /dev/null; then
-            print_error "Docker installation failed."
-            echo "  Visit: https://docs.docker.com/get-docker/"
-            exit 1
-        fi
-        print_success "Docker installed successfully"
-    fi
-fi
-
-# --- Auto-install Docker Compose plugin (Linux only) ---
-if command -v docker &> /dev/null \
-    && ! docker compose version &> /dev/null \
-    && ! command -v docker-compose &> /dev/null \
-    && { [[ "$OSTYPE" == "linux-gnu"* ]] || [[ -n "${WSL_DISTRO_NAME:-}" ]]; }; then
-
-    print_info "Docker Compose not found — installing plugin..."
-    COMPOSE_ARCH="$(uname -m)"
-    COMPOSE_URL="https://github.com/docker/compose/releases/latest/download/docker-compose-linux-${COMPOSE_ARCH}"
-    COMPOSE_DIR="/usr/local/lib/docker/cli-plugins"
-    COMPOSE_TMP="$(mktemp)"
-    sudo mkdir -p "$COMPOSE_DIR"
-    if download_file "$COMPOSE_URL" "$COMPOSE_TMP"; then
-        sudo mv "$COMPOSE_TMP" "$COMPOSE_DIR/docker-compose"
-        sudo chmod +x "$COMPOSE_DIR/docker-compose"
-        if docker compose version &> /dev/null; then
-            print_success "Docker Compose plugin installed"
-        else
-            print_error "Docker Compose plugin installed but not detected."
-            echo "  Visit: https://docs.docker.com/compose/install/"
-            exit 1
-        fi
-    else
-        rm -f "$COMPOSE_TMP"
-        print_error "Failed to download Docker Compose plugin."
-        echo "  Visit: https://docs.docker.com/compose/install/"
-        exit 1
-    fi
-fi
-
-# On Linux, ensure the current user can talk to the Docker daemon without
-# sudo.  If necessary, add them to the "docker" group and re-exec the
-# script under that group so the rest of the install proceeds normally.
-if command -v docker &> /dev/null \
-    && { [[ "$OSTYPE" == "linux-gnu"* ]] || [[ -n "${WSL_DISTRO_NAME:-}" ]]; } \
-    && [[ "$(id -u)" -ne 0 ]] \
-    && ! docker info &> /dev/null; then
-    if [[ "${_ONYX_REEXEC:-}" = "1" ]]; then
-        print_error "Cannot connect to Docker after group re-exec."
-        print_info "Log out and back in, then run the script again."
-        exit 1
-    fi
-    if ! getent group docker &> /dev/null; then
-        sudo groupadd docker
-    fi
-    print_info "Adding $USER to the docker group..."
-    sudo usermod -aG docker "$USER"
-    print_info "Re-launching with docker group active..."
-    exec sg docker -c "_ONYX_REEXEC=1 bash $(printf '%q ' "$0" "$@")"
-fi
-
 # ASCII Art Banner
 echo ""
 echo -e "${BLUE}${BOLD}"
@@ -479,7 +209,8 @@ echo "2. Check your system resources (Docker, memory, disk space)"
 echo "3. Guide you through deployment options (version, authentication)"
 echo ""

-if is_interactive; then
+# Only prompt for acknowledgment if running interactively
+if [ -t 0 ]; then
    echo -e "${YELLOW}${BOLD}Please acknowledge and press Enter to continue...${NC}"
    read -r
    echo ""
@@ -529,35 +260,41 @@ else
    exit 1
 fi

-# Returns 0 if $1 <= $2, 1 if $1 > $2
-# Handles missing or non-numeric parts gracefully (treats them as 0)
+# Function to compare version numbers
 version_compare() {
-    local version1="${1:-0.0.0}"
-    local version2="${2:-0.0.0}"
+    # Returns 0 if $1 <= $2, 1 if $1 > $2
+    local version1=$1
+    local version2=$2

-    local v1_major v1_minor v1_patch v2_major v2_minor v2_patch
-    v1_major=$(echo "$version1" | cut -d. -f1)
-    v1_minor=$(echo "$version1" | cut -d. -f2)
-    v1_patch=$(echo "$version1" | cut -d. -f3)
-    v2_major=$(echo "$version2" | cut -d. -f1)
-    v2_minor=$(echo "$version2" | cut -d. -f2)
-    v2_patch=$(echo "$version2" | cut -d. -f3)
+    # Split versions into components
+    local v1_major=$(echo $version1 | cut -d. -f1)
+    local v1_minor=$(echo $version1 | cut -d. -f2)
+    local v1_patch=$(echo $version1 | cut -d. -f3)

-    # Default non-numeric or empty parts to 0
-    [[ "$v1_major" =~ ^[0-9]+$ ]] || v1_major=0
-    [[ "$v1_minor" =~ ^[0-9]+$ ]] || v1_minor=0
-    [[ "$v1_patch" =~ ^[0-9]+$ ]] || v1_patch=0
-    [[ "$v2_major" =~ ^[0-9]+$ ]] || v2_major=0
-    [[ "$v2_minor" =~ ^[0-9]+$ ]] || v2_minor=0
-    [[ "$v2_patch" =~ ^[0-9]+$ ]] || v2_patch=0
+    local v2_major=$(echo $version2 | cut -d. -f1)
+    local v2_minor=$(echo $version2 | cut -d. -f2)
+    local v2_patch=$(echo $version2 | cut -d. -f3)

-    if [ "$v1_major" -lt "$v2_major" ]; then return 0
-    elif [ "$v1_major" -gt "$v2_major" ]; then return 1; fi
+    # Compare major version
+    if [ "$v1_major" -lt "$v2_major" ]; then
+        return 0
+    elif [ "$v1_major" -gt "$v2_major" ]; then
+        return 1
+    fi

-    if [ "$v1_minor" -lt "$v2_minor" ]; then return 0
-    elif [ "$v1_minor" -gt "$v2_minor" ]; then return 1; fi
+    # Compare minor version
+    if [ "$v1_minor" -lt "$v2_minor" ]; then
+        return 0
+    elif [ "$v1_minor" -gt "$v2_minor" ]; then
+        return 1
+    fi

-    [ "$v1_patch" -le "$v2_patch" ]
+    # Compare patch version
+    if [ "$v1_patch" -le "$v2_patch" ]; then
+        return 0
+    else
+        return 1
+    fi
 }

 # Check Docker daemon
@@ -599,20 +336,10 @@ fi

 # Convert to GB for display
 if [ "$MEMORY_MB" -gt 0 ]; then
-    MEMORY_GB=$(awk "BEGIN {printf \"%.1f\", $MEMORY_MB / 1024}")
-    if [ "$(awk "BEGIN {print ($MEMORY_MB >= 1024)}")" = "1" ]; then
-        MEMORY_DISPLAY="~${MEMORY_GB}GB"
-    else
-        MEMORY_DISPLAY="${MEMORY_MB}MB"
-    fi
-    if [[ "$OSTYPE" == "darwin"* ]]; then
-        print_info "Docker memory allocation: ${MEMORY_DISPLAY}"
-    else
-        print_info "System memory: ${MEMORY_DISPLAY} (Docker uses host memory directly)"
-    fi
+    MEMORY_GB=$((MEMORY_MB / 1024))
+    print_info "Docker memory allocation: ~${MEMORY_GB}GB"
 else
-    print_warning "Could not determine memory allocation"
-    MEMORY_DISPLAY="unknown"
+    print_warning "Could not determine Docker memory allocation"
    MEMORY_MB=0
 fi

@@ -631,7 +358,7 @@ RESOURCE_WARNING=false
 EXPECTED_RAM_MB=$((EXPECTED_DOCKER_RAM_GB * 1024))

 if [ "$MEMORY_MB" -gt 0 ] && [ "$MEMORY_MB" -lt "$EXPECTED_RAM_MB" ]; then
-    print_warning "Less than ${EXPECTED_DOCKER_RAM_GB}GB RAM available (found: ${MEMORY_DISPLAY})"
+    print_warning "Docker has less than ${EXPECTED_DOCKER_RAM_GB}GB RAM allocated (found: ~${MEMORY_GB}GB)"
    RESOURCE_WARNING=true
 fi

@@ -642,10 +369,10 @@ fi

 if [ "$RESOURCE_WARNING" = true ]; then
    echo ""
-    print_warning "Onyx recommends at least ${EXPECTED_DOCKER_RAM_GB}GB RAM and ${EXPECTED_DISK_GB}GB disk space for optimal performance in standard mode."
-    print_warning "Lite mode requires less resources (1-4GB RAM, 8-16GB disk depending on usage), but does not include a vector database."
+    print_warning "Onyx recommends at least ${EXPECTED_DOCKER_RAM_GB}GB RAM and ${EXPECTED_DISK_GB}GB disk space for optimal performance."
+    echo ""
+    read -p "Do you want to continue anyway? (y/N): " -n 1 -r
    echo ""
-    prompt_yn_or_default "Do you want to continue anyway? (Y/n): " "y"
    if [[ ! $REPLY =~ ^[Yy]$ ]]; then
        print_info "Installation cancelled. Please allocate more resources and try again."
        exit 1
@@ -658,89 +385,117 @@ print_step "Creating directory structure"
 if [ -d "${INSTALL_ROOT}" ]; then
    print_info "Directory structure already exists"
    print_success "Using existing ${INSTALL_ROOT} directory"
+else
+    mkdir -p "${INSTALL_ROOT}/deployment"
+    mkdir -p "${INSTALL_ROOT}/data/nginx/local"
+    print_success "Directory structure created"
 fi
-mkdir -p "${INSTALL_ROOT}/deployment"
-mkdir -p "${INSTALL_ROOT}/data/nginx/local"
-print_success "Directory structure created"

-# Ensure all required configuration files are present
+# Download all required files
+print_step "Downloading Onyx configuration files"
+print_info "This step downloads all necessary configuration files from GitHub..."
+echo ""
+print_info "Downloading the following files:"
+echo "  • docker-compose.yml - Main Docker Compose configuration"
+echo "  • env.template - Environment variables template"
+echo "  • nginx/app.conf.template - Nginx web server configuration"
+echo "  • nginx/run-nginx.sh - Nginx startup script"
+echo "  • README.md - Documentation and setup instructions"
+echo ""
+
+# Download Docker Compose file
+COMPOSE_FILE="${INSTALL_ROOT}/deployment/docker-compose.yml"
+print_info "Downloading docker-compose.yml..."
+if curl -fsSL -o "$COMPOSE_FILE" "${GITHUB_RAW_URL}/docker-compose.yml" 2>/dev/null; then
+    print_success "Docker Compose file downloaded successfully"
+
+    # Check if Docker Compose version is older than 2.24.0 and show warning
+    # Skip check for dev builds (assume they're recent enough)
+    if [ "$COMPOSE_VERSION" != "dev" ] && version_compare "$COMPOSE_VERSION" "2.24.0"; then
+        print_warning "Docker Compose version $COMPOSE_VERSION is older than 2.24.0"
+        echo ""
+        print_warning "The docker-compose.yml file uses the newer env_file format that requires Docker Compose 2.24.0 or later."
+        echo ""
+        print_info "To use this configuration with your current Docker Compose version, you have two options:"
+        echo ""
+        echo "1. Upgrade Docker Compose to version 2.24.0 or later (recommended)"
+        echo "   Visit: https://docs.docker.com/compose/install/"
+        echo ""
+        echo "2. Manually replace all env_file sections in docker-compose.yml"
+        echo "   Change from:"
+        echo "     env_file:"
+        echo "       - path: .env"
+        echo "         required: false"
+        echo "   To:"
+        echo "     env_file: .env"
+        echo ""
+        print_warning "The installation will continue, but may fail if Docker Compose cannot parse the file."
+        echo ""
+        read -p "Do you want to continue anyway? (y/N): " -n 1 -r
+        echo ""
+        if [[ ! $REPLY =~ ^[Yy]$ ]]; then
+            print_info "Installation cancelled. Please upgrade Docker Compose or manually edit the docker-compose.yml file."
+            exit 1
+        fi
+        print_info "Proceeding with installation despite Docker Compose version compatibility issues..."
+    fi
+else
+    print_error "Failed to download Docker Compose file"
+    print_info "Please ensure you have internet connection and try again"
+    exit 1
+fi
+
+# Download env.template file
+ENV_TEMPLATE="${INSTALL_ROOT}/deployment/env.template"
+print_info "Downloading env.template..."
+if curl -fsSL -o "$ENV_TEMPLATE" "${GITHUB_RAW_URL}/env.template" 2>/dev/null; then
+    print_success "Environment template downloaded successfully"
+else
+    print_error "Failed to download env.template"
+    print_info "Please ensure you have internet connection and try again"
+    exit 1
+fi
+
+# Download nginx config files
 NGINX_BASE_URL="https://raw.githubusercontent.com/onyx-dot-app/onyx/main/deployment/data/nginx"

-if [[ "$USE_LOCAL_FILES" = true ]]; then
-    print_step "Verifying existing configuration files"
+# Download app.conf.template
+NGINX_CONFIG="${INSTALL_ROOT}/data/nginx/app.conf.template"
+print_info "Downloading nginx configuration template..."
+if curl -fsSL -o "$NGINX_CONFIG" "$NGINX_BASE_URL/app.conf.template" 2>/dev/null; then
+    print_success "Nginx configuration template downloaded"
 else
-    print_step "Downloading Onyx configuration files"
-    print_info "This step downloads all necessary configuration files from GitHub..."
+    print_error "Failed to download nginx configuration template"
+    print_info "Please ensure you have internet connection and try again"
+    exit 1
 fi

-ensure_file "${INSTALL_ROOT}/deployment/docker-compose.yml" \
-    "${GITHUB_RAW_URL}/docker-compose.yml" "docker-compose.yml" || exit 1
-
-# Check Docker Compose version compatibility after obtaining docker-compose.yml
-if [ "$COMPOSE_VERSION" != "dev" ] && version_compare "$COMPOSE_VERSION" "2.24.0"; then
-    print_warning "Docker Compose version $COMPOSE_VERSION is older than 2.24.0"
-    echo ""
-    print_warning "The docker-compose.yml file uses the newer env_file format that requires Docker Compose 2.24.0 or later."
-    echo ""
-    print_info "To use this configuration with your current Docker Compose version, you have two options:"
-    echo ""
-    echo "1. Upgrade Docker Compose to version 2.24.0 or later (recommended)"
-    echo "   Visit: https://docs.docker.com/compose/install/"
-    echo ""
-    echo "2. Manually replace all env_file sections in docker-compose.yml"
-    echo "   Change from:"
-    echo "     env_file:"
-    echo "       - path: .env"
-    echo "         required: false"
-    echo "   To:"
-    echo "     env_file: .env"
-    echo ""
-    print_warning "The installation will continue, but may fail if Docker Compose cannot parse the file."
-    echo ""
-    prompt_yn_or_default "Do you want to continue anyway? (Y/n): " "y"
-    if [[ ! $REPLY =~ ^[Yy]$ ]]; then
-        print_info "Installation cancelled. Please upgrade Docker Compose or manually edit the docker-compose.yml file."
-        exit 1
-    fi
-    print_info "Proceeding with installation despite Docker Compose version compatibility issues..."
+# Download run-nginx.sh script
+NGINX_RUN_SCRIPT="${INSTALL_ROOT}/data/nginx/run-nginx.sh"
+print_info "Downloading nginx startup script..."
+if curl -fsSL -o "$NGINX_RUN_SCRIPT" "$NGINX_BASE_URL/run-nginx.sh" 2>/dev/null; then
+    chmod +x "$NGINX_RUN_SCRIPT"
+    print_success "Nginx startup script downloaded and made executable"
+else
+    print_error "Failed to download nginx startup script"
+    print_info "Please ensure you have internet connection and try again"
+    exit 1
 fi

-# Handle lite overlay: ensure it if --lite, clean up stale copies otherwise
-if [[ "$LITE_MODE" = true ]]; then
-    ensure_file "${INSTALL_ROOT}/deployment/${LITE_COMPOSE_FILE}" \
-        "${GITHUB_RAW_URL}/${LITE_COMPOSE_FILE}" "${LITE_COMPOSE_FILE}" || exit 1
-elif [[ -f "${INSTALL_ROOT}/deployment/${LITE_COMPOSE_FILE}" ]]; then
-    if [[ -f "${INSTALL_ROOT}/deployment/.env" ]]; then
-        print_warning "Existing lite overlay found but --lite was not passed."
-        prompt_yn_or_default "Remove lite overlay and switch to standard mode? (y/N): " "n"
-        if [[ ! $REPLY =~ ^[Yy]$ ]]; then
-            print_info "Keeping existing lite overlay. Pass --lite to keep using lite mode."
-            LITE_MODE=true
-        else
-            rm -f "${INSTALL_ROOT}/deployment/${LITE_COMPOSE_FILE}"
-            print_info "Removed lite overlay (switching to standard mode)"
-        fi
-    else
-        rm -f "${INSTALL_ROOT}/deployment/${LITE_COMPOSE_FILE}"
-        print_info "Removed previous lite overlay (switching to standard mode)"
-    fi
+# Download README file
+README_FILE="${INSTALL_ROOT}/README.md"
+print_info "Downloading README.md..."
+if curl -fsSL -o "$README_FILE" "${GITHUB_RAW_URL}/README.md" 2>/dev/null; then
+    print_success "README.md downloaded successfully"
+else
+    print_error "Failed to download README.md"
+    print_info "Please ensure you have internet connection and try again"
+    exit 1
 fi

-ensure_file "${INSTALL_ROOT}/deployment/env.template" \
-    "${GITHUB_RAW_URL}/env.template" "env.template" || exit 1
-
-ensure_file "${INSTALL_ROOT}/data/nginx/app.conf.template" \
-    "$NGINX_BASE_URL/app.conf.template" "nginx/app.conf.template" || exit 1
-
-ensure_file "${INSTALL_ROOT}/data/nginx/run-nginx.sh" \
-    "$NGINX_BASE_URL/run-nginx.sh" "nginx/run-nginx.sh" || exit 1
-chmod +x "${INSTALL_ROOT}/data/nginx/run-nginx.sh"
-
-ensure_file "${INSTALL_ROOT}/README.md" \
-    "${GITHUB_RAW_URL}/README.md" "README.md" || exit 1
-
+# Create empty local directory marker (if needed)
 touch "${INSTALL_ROOT}/data/nginx/local/.gitkeep"
-print_success "All configuration files ready"
+print_success "All configuration files downloaded successfully"

 # Set up deployment configuration
 print_step "Setting up deployment configs"
@@ -758,7 +513,7 @@ if [ -d "${INSTALL_ROOT}/deployment" ] && [ -f "${INSTALL_ROOT}/deployment/docke

    if [ -n "$COMPOSE_CMD" ]; then
        # Check if any containers are running
-        RUNNING_CONTAINERS=$(cd "${INSTALL_ROOT}/deployment" && $COMPOSE_CMD $(compose_file_args true) ps -q 2>/dev/null | wc -l)
+        RUNNING_CONTAINERS=$(cd "${INSTALL_ROOT}/deployment" && $COMPOSE_CMD -f docker-compose.yml ps -q 2>/dev/null | wc -l)
        if [ "$RUNNING_CONTAINERS" -gt 0 ]; then
            print_error "Onyx services are currently running!"
            echo ""
@@ -779,7 +534,7 @@ if [ -f "$ENV_FILE" ]; then
    echo "• Press Enter to restart with current configuration"
    echo "• Type 'update' to update to a newer version"
    echo ""
-    prompt_or_default "Choose an option [default: restart]: " ""
+    read -p "Choose an option [default: restart]: " -r
    echo ""

    if [ "$REPLY" = "update" ]; then
@@ -788,30 +543,26 @@ if [ -f "$ENV_FILE" ]; then
        echo "• Press Enter for latest (recommended)"
        echo "• Type a specific tag (e.g., v0.1.0)"
        echo ""
+        # If --include-craft was passed, default to craft-latest
        if [ "$INCLUDE_CRAFT" = true ]; then
-            prompt_or_default "Enter tag [default: craft-latest]: " "craft-latest"
-            VERSION="$REPLY"
+            read -p "Enter tag [default: craft-latest]: " -r VERSION
        else
-            prompt_or_default "Enter tag [default: latest]: " "latest"
-            VERSION="$REPLY"
+            read -p "Enter tag [default: latest]: " -r VERSION
        fi
        echo ""

-        if [ "$INCLUDE_CRAFT" = true ] && [ "$VERSION" = "craft-latest" ]; then
-            print_info "Selected: craft-latest (Craft enabled)"
-        elif [ "$VERSION" = "latest" ]; then
-            print_info "Selected: Latest version"
+        if [ -z "$VERSION" ]; then
+            if [ "$INCLUDE_CRAFT" = true ]; then
+                VERSION="craft-latest"
+                print_info "Selected: craft-latest (Craft enabled)"
+            else
+                VERSION="latest"
+                print_info "Selected: Latest version"
+            fi
        else
            print_info "Selected: $VERSION"
        fi

-        # Reject craft image tags when running in lite mode
-        if [[ "$LITE_MODE" = true ]] && [[ "${VERSION:-}" == craft-* ]]; then
-            print_error "Cannot use a craft image tag (${VERSION}) with --lite."
-            print_info "Craft requires services (Vespa, Redis, background workers) that lite mode disables."
-            exit 1
-        fi
-
        # Update .env file with new version
        print_info "Updating configuration for version $VERSION..."
        if grep -q "^IMAGE_TAG=" "$ENV_FILE"; then
@@ -830,67 +581,13 @@ if [ -f "$ENV_FILE" ]; then
        fi
        print_success "Configuration updated for upgrade"
    else
-        # Reject restarting a craft deployment in lite mode
-        EXISTING_TAG=$(grep "^IMAGE_TAG=" "$ENV_FILE" | head -1 | cut -d'=' -f2 | tr -d ' "'"'"'')
-        if [[ "$LITE_MODE" = true ]] && [[ "${EXISTING_TAG:-}" == craft-* ]]; then
-            print_error "Cannot restart a craft deployment (${EXISTING_TAG}) with --lite."
-            print_info "Craft requires services (Vespa, Redis, background workers) that lite mode disables."
-            exit 1
-        fi
-
        print_info "Keeping existing configuration..."
        print_success "Will restart with current settings"
    fi
-
-    # Ensure COMPOSE_PROFILES is cleared when running in lite mode on an
-    # existing .env (the template ships with s3-filestore enabled).
-    if [[ "$LITE_MODE" = true ]] && grep -q "^COMPOSE_PROFILES=.*s3-filestore" "$ENV_FILE" 2>/dev/null; then
-        sed -i.bak 's/^COMPOSE_PROFILES=.*/COMPOSE_PROFILES=/' "$ENV_FILE" 2>/dev/null || true
-        print_success "Cleared COMPOSE_PROFILES for lite mode"
-    fi
 else
    print_info "No existing .env file found. Setting up new deployment..."
    echo ""

-    # Ask for deployment mode (standard vs lite) unless already set via --lite flag
-    if [[ "$LITE_MODE" = false ]]; then
-        print_info "Which deployment mode would you like?"
-        echo ""
-        echo "  1) Standard  - Full deployment with search, connectors, and RAG"
-        echo "  2) Lite      - Minimal deployment (no Vespa, Redis, or model servers)"
-        echo "                  LLM chat, tools, file uploads, and Projects still work"
-        echo ""
-        prompt_or_default "Choose a mode (1 or 2) [default: 1]: " "1"
-        echo ""
-
-        case "$REPLY" in
-            2)
-                LITE_MODE=true
-                print_info "Selected: Lite mode"
-                ensure_file "${INSTALL_ROOT}/deployment/${LITE_COMPOSE_FILE}" \
-                    "${GITHUB_RAW_URL}/${LITE_COMPOSE_FILE}" "${LITE_COMPOSE_FILE}" || exit 1
-                ;;
-            *)
-                print_info "Selected: Standard mode"
-                ;;
-        esac
-    else
-        print_info "Deployment mode: Lite (set via --lite flag)"
-    fi
-
-    # Validate lite + craft combination (could now be set interactively)
-    if [[ "$LITE_MODE" = true ]] && [[ "$INCLUDE_CRAFT" = true ]]; then
-        print_error "--include-craft cannot be used with Lite mode."
-        print_info "Craft requires services (Vespa, Redis, background workers) that lite mode disables."
-        exit 1
-    fi
-
-    # Adjust resource expectations for lite mode
-    if [[ "$LITE_MODE" = true ]]; then
-        EXPECTED_DOCKER_RAM_GB=4
-        EXPECTED_DISK_GB=16
-    fi
-
    # Ask for version
    print_info "Which tag would you like to deploy?"
    echo ""
@@ -898,21 +595,23 @@ else
        echo "• Press Enter for craft-latest (recommended for Craft)"
        echo "• Type a specific tag (e.g., craft-v1.0.0)"
        echo ""
-        prompt_or_default "Enter tag [default: craft-latest]: " "craft-latest"
-        VERSION="$REPLY"
+        read -p "Enter tag [default: craft-latest]: " -r VERSION
    else
        echo "• Press Enter for latest (recommended)"
        echo "• Type a specific tag (e.g., v0.1.0)"
        echo ""
-        prompt_or_default "Enter tag [default: latest]: " "latest"
-        VERSION="$REPLY"
+        read -p "Enter tag [default: latest]: " -r VERSION
    fi
    echo ""

-    if [ "$INCLUDE_CRAFT" = true ] && [ "$VERSION" = "craft-latest" ]; then
-        print_info "Selected: craft-latest (Craft enabled)"
-    elif [ "$VERSION" = "latest" ]; then
-        print_info "Selected: Latest tag"
+    if [ -z "$VERSION" ]; then
+        if [ "$INCLUDE_CRAFT" = true ]; then
+            VERSION="craft-latest"
+            print_info "Selected: craft-latest (Craft enabled)"
+        else
+            VERSION="latest"
+            print_info "Selected: Latest tag"
+        fi
    else
        print_info "Selected: $VERSION"
    fi
@@ -946,13 +645,6 @@ else
    # Use basic auth by default
    AUTH_SCHEMA="basic"

-    # Reject craft image tags when running in lite mode (must check before writing .env)
-    if [[ "$LITE_MODE" = true ]] && [[ "${VERSION:-}" == craft-* ]]; then
-        print_error "Cannot use a craft image tag (${VERSION}) with --lite."
-        print_info "Craft requires services (Vespa, Redis, background workers) that lite mode disables."
-        exit 1
-    fi
-
    # Create .env file from template
    print_info "Creating .env file with your selections..."
    cp "$ENV_TEMPLATE" "$ENV_FILE"
@@ -962,13 +654,6 @@ else
    sed -i.bak "s/^IMAGE_TAG=.*/IMAGE_TAG=$VERSION/" "$ENV_FILE"
    print_success "IMAGE_TAG set to $VERSION"

-    # In lite mode, clear COMPOSE_PROFILES so profiled services (MinIO, etc.)
-    # stay disabled — the template ships with s3-filestore enabled by default.
-    if [[ "$LITE_MODE" = true ]]; then
-        sed -i.bak 's/^COMPOSE_PROFILES=.*/COMPOSE_PROFILES=/' "$ENV_FILE" 2>/dev/null || true
-        print_success "Cleared COMPOSE_PROFILES for lite mode"
-    fi
-
    # Configure basic authentication (default)
    sed -i.bak 's/^AUTH_TYPE=.*/AUTH_TYPE=basic/' "$ENV_FILE" 2>/dev/null || true
    print_success "Basic authentication enabled in configuration"
@@ -1089,7 +774,7 @@ print_step "Pulling Docker images"
 print_info "This may take several minutes depending on your internet connection..."
 echo ""
 print_info "Downloading Docker images (this may take a while)..."
-(cd "${INSTALL_ROOT}/deployment" && $COMPOSE_CMD $(compose_file_args) pull --quiet)
+(cd "${INSTALL_ROOT}/deployment" && $COMPOSE_CMD -f docker-compose.yml pull --quiet)
 if [ $? -eq 0 ]; then
    print_success "Docker images downloaded successfully"
 else
@@ -1103,9 +788,9 @@ print_info "Launching containers..."
 echo ""
 if [ "$USE_LATEST" = true ]; then
    print_info "Force pulling latest images and recreating containers..."
-    (cd "${INSTALL_ROOT}/deployment" && $COMPOSE_CMD $(compose_file_args) up -d --pull always --force-recreate)
+    (cd "${INSTALL_ROOT}/deployment" && $COMPOSE_CMD -f docker-compose.yml up -d --pull always --force-recreate)
 else
-    (cd "${INSTALL_ROOT}/deployment" && $COMPOSE_CMD $(compose_file_args) up -d)
+    (cd "${INSTALL_ROOT}/deployment" && $COMPOSE_CMD -f docker-compose.yml up -d)
 fi
 if [ $? -ne 0 ]; then
    print_error "Failed to start Onyx services"
@@ -1127,7 +812,7 @@ echo ""
 # Check for restart loops
 print_info "Checking container health status..."
 RESTART_ISSUES=false
-CONTAINERS=$(cd "${INSTALL_ROOT}/deployment" && $COMPOSE_CMD $(compose_file_args) ps -q 2>/dev/null)
+CONTAINERS=$(cd "${INSTALL_ROOT}/deployment" && $COMPOSE_CMD -f docker-compose.yml ps -q 2>/dev/null)

 for CONTAINER in $CONTAINERS; do
    PROJECT_NAME="$(basename "$INSTALL_ROOT")_deployment_"
@@ -1156,7 +841,7 @@ if [ "$RESTART_ISSUES" = true ]; then
    print_error "Some containers are experiencing issues!"
    echo ""
    print_info "Please check the logs for more information:"
-    echo "  (cd \"${INSTALL_ROOT}/deployment\" && $COMPOSE_CMD $(compose_file_args) logs)"
+    echo "  (cd \"${INSTALL_ROOT}/deployment\" && $COMPOSE_CMD -f docker-compose.yml logs)"

    echo ""
    print_info "If the issue persists, please contact: founders@onyx.app"
@@ -1175,12 +860,8 @@ check_onyx_health() {
    echo ""

    while [ $attempt -le $max_attempts ]; do
-        local http_code=""
-        if [[ "$DOWNLOADER" == "curl" ]]; then
-            http_code=$(curl -s -o /dev/null -w "%{http_code}" "http://localhost:$port" 2>/dev/null || echo "000")
-        else
-            http_code=$(wget -q --spider -S "http://localhost:$port" 2>&1 | grep "HTTP/" | tail -1 | awk '{print $2}' || echo "000")
-        fi
+        # Check for successful HTTP responses (200, 301, 302, etc.)
+        local http_code=$(curl -s -o /dev/null -w "%{http_code}" "http://localhost:$port")
        if echo "$http_code" | grep -qE "^(200|301|302|303|307|308)$"; then
            return 0
        fi
@@ -1236,18 +917,6 @@ print_info "If authentication is enabled, you can create your admin account here
 echo "   • Visit http://localhost:${HOST_PORT}/auth/signup to create your admin account"
 echo "   • The first user created will automatically have admin privileges"
 echo ""
-if [[ "$LITE_MODE" = true ]]; then
-    echo ""
-    print_info "Running in Lite mode — the following services are NOT started:"
-    echo "  • Vespa (vector database)"
-    echo "  • Redis (cache)"
-    echo "  • Model servers (embedding/inference)"
-    echo "  • Background workers (Celery)"
-    echo ""
-    print_info "Connectors and RAG search are disabled. LLM chat, tools, user file"
-    print_info "uploads, Projects, Agent knowledge, and code interpreter still work."
-fi
-echo ""
 print_info "Refer to the README in the ${INSTALL_ROOT} directory for more information."
 echo ""
 print_info "For help or issues, contact: founders@onyx.app"
--- a/web/src/app/app/message/FileDisplay.tsx
+++ b/web/src/app/app/message/FileDisplay.tsx
@@ -1,7 +1,6 @@
 "use client";

-import { ReactNode, useState } from "react";
-import { cn } from "@/lib/utils";
+import { useState } from "react";
 import { ChatFileType, FileDescriptor } from "@/app/app/interfaces";
 import Attachment from "@/refresh-components/Attachment";
 import { InMessageImage } from "@/app/app/components/files/images/InMessageImage";
@@ -10,27 +9,10 @@ import PreviewModal from "@/sections/modals/PreviewModal";
 import { MinimalOnyxDocument } from "@/lib/search/interfaces";
 import ExpandableContentWrapper from "@/components/tools/ExpandableContentWrapper";

-interface FileContainerProps {
-  children: ReactNode;
-  className?: string;
-  id?: string;
-}
-
 interface FileDisplayProps {
  files: FileDescriptor[];
 }

-function FileContainer({ children, className, id }: FileContainerProps) {
-  return (
-    <div
-      id={id}
-      className={cn("flex w-full flex-col items-end gap-2 py-2", className)}
-    >
-      {children}
-    </div>
-  );
-}
-
 export default function FileDisplay({ files }: FileDisplayProps) {
  const [close, setClose] = useState(true);
  const [previewingFile, setPreviewingFile] = useState<FileDescriptor | null>(
@@ -59,7 +41,7 @@ export default function FileDisplay({ files }: FileDisplayProps) {
      )}

      {textFiles.length > 0 && (
-        <FileContainer id="onyx-file">
+        <div id="onyx-file" className="flex flex-col items-end gap-2 py-2">
          {textFiles.map((file) => (
            <Attachment
              key={file.id}
@@ -67,36 +49,40 @@ export default function FileDisplay({ files }: FileDisplayProps) {
              open={() => setPreviewingFile(file)}
            />
          ))}
-        </FileContainer>
+        </div>
      )}

      {imageFiles.length > 0 && (
-        <FileContainer id="onyx-image">
+        <div id="onyx-image" className="flex flex-col items-end gap-2 py-2">
          {imageFiles.map((file) => (
            <InMessageImage key={file.id} fileId={file.id} />
          ))}
-        </FileContainer>
+        </div>
      )}

      {csvFiles.length > 0 && (
-        <FileContainer className="overflow-auto">
-          {csvFiles.map((file) =>
-            close ? (
-              <ExpandableContentWrapper
-                key={file.id}
-                fileDescriptor={file}
-                close={() => setClose(false)}
-                ContentComponent={CsvContent}
-              />
-            ) : (
-              <Attachment
-                key={file.id}
-                open={() => setClose(true)}
-                fileName={file.name || file.id}
-              />
-            )
-          )}
-        </FileContainer>
+        <div className="flex flex-col items-end gap-2 py-2">
+          {csvFiles.map((file) => {
+            return (
+              <div key={file.id} className="w-fit">
+                {close ? (
+                  <>
+                    <ExpandableContentWrapper
+                      fileDescriptor={file}
+                      close={() => setClose(false)}
+                      ContentComponent={CsvContent}
+                    />
+                  </>
+                ) : (
+                  <Attachment
+                    open={() => setClose(true)}
+                    fileName={file.name || file.id}
+                  />
+                )}
+              </div>
+            );
+          })}
+        </div>
      )}
    </>
  );
--- a/web/src/components/tools/ExpandableContentWrapper.tsx
+++ b/web/src/components/tools/ExpandableContentWrapper.tsx
@@ -40,7 +40,12 @@ export default function ExpandableContentWrapper({
  };

  const Content = (
-    <div className="w-message-default max-w-full !rounded-lg overflow-y-hidden h-full">
+    <div
+      className={cn(
+        !expanded ? "w-message-default" : "w-full",
+        "!rounded !rounded-lg overflow-y-hidden h-full"
+      )}
+    >
      <CardHeader className="w-full bg-background-tint-02 top-0 p-3">
        <div className="flex justify-between items-center">
          <Text className="text-ellipsis line-clamp-1" text03 mainUiAction>
@@ -78,10 +83,12 @@ export default function ExpandableContentWrapper({
        )}
      >
        <CardContent className="p-0">
-          <ContentComponent
-            fileDescriptor={fileDescriptor}
-            expanded={expanded}
-          />
+          {!expanded && (
+            <ContentComponent
+              fileDescriptor={fileDescriptor}
+              expanded={expanded}
+            />
+          )}
        </CardContent>
      </Card>
    </div>
Author	SHA1	Message	Date
Dane Urban	83558ae04c	.	2026-03-11 17:52:55 -07:00
Dane Urban	005009602c	Some fixes	2026-03-11 17:44:49 -07:00
Dane Urban	b93875353b	.	2026-03-11 17:32:30 -07:00
Dane Urban	2290141b53	xlsx to text	2026-03-11 17:19:09 -07:00