fix linting

Merge branch 'helm-updates' of https://github.com/geohacker/onyx into feature/helm-updates
fix backend labels. configure nginx routes. update annotations
2026-02-17 15:55:45 +00:00 · 2025-03-05 12:03:06 -08:00 · 2025-03-05 11:22:20 -08:00 · 2025-03-05 08:59:02 +05:30 · 2025-03-05 08:09:19 +05:30 · 2025-03-05 08:07:09 +05:30
20 changed files with 150 additions and 624 deletions
--- a/backend/alembic/versions/3bd4c84fe72f_improved_index.py
+++ b/backend/alembic/versions/3bd4c84fe72f_improved_index.py
@@ -6,8 +6,7 @@ Create Date: 2025-02-26 13:07:56.217791

 """
 from alembic import op
-import time
-from sqlalchemy import text
+

 # revision identifiers, used by Alembic.
 revision = "3bd4c84fe72f"
@@ -28,357 +27,45 @@ depends_on = None
 # 4. Adds indexes to both chat_message and chat_session tables for comprehensive search


-def upgrade():
-    # --- PART 1: chat_message table ---
-    # Step 1: Add nullable column (quick, minimal locking)
-    # op.execute("ALTER TABLE chat_message DROP COLUMN IF EXISTS message_tsv")
-    # op.execute("DROP TRIGGER IF EXISTS chat_message_tsv_trigger ON chat_message")
-    # op.execute("DROP FUNCTION IF EXISTS update_chat_message_tsv()")
-    # op.execute("ALTER TABLE chat_message DROP COLUMN IF EXISTS message_tsv")
-    # # Drop chat_session tsv trigger if it exists
-    # op.execute("DROP TRIGGER IF EXISTS chat_session_tsv_trigger ON chat_session")
-    # op.execute("DROP FUNCTION IF EXISTS update_chat_session_tsv()")
-    # op.execute("ALTER TABLE chat_session DROP COLUMN IF EXISTS title_tsv")
-    # raise Exception("Stop here")
-    time.time()
-    op.execute("ALTER TABLE chat_message ADD COLUMN IF NOT EXISTS message_tsv tsvector")
-
-    # Step 2: Create function and trigger for new/updated rows
+def upgrade() -> None:
+    # Create a GIN index for full-text search on chat_message.message
    op.execute(
        """
-    CREATE OR REPLACE FUNCTION update_chat_message_tsv()
-    RETURNS TRIGGER AS $$
-    BEGIN
-      NEW.message_tsv = to_tsvector('english', NEW.message);
-      RETURN NEW;
-    END;
-    $$ LANGUAGE plpgsql
-    """
+        ALTER TABLE chat_message
+        ADD COLUMN message_tsv tsvector
+        GENERATED ALWAYS AS (to_tsvector('english', message)) STORED;
+        """
    )

-    # Create trigger in a separate execute call
+    # Commit the current transaction before creating concurrent indexes
+    op.execute("COMMIT")
+
    op.execute(
        """
-    CREATE TRIGGER chat_message_tsv_trigger
-    BEFORE INSERT OR UPDATE ON chat_message
-    FOR EACH ROW EXECUTE FUNCTION update_chat_message_tsv()
-    """
+        CREATE INDEX CONCURRENTLY IF NOT EXISTS idx_chat_message_tsv
+        ON chat_message
+        USING GIN (message_tsv)
+        """
    )

-    # Step 3: Update existing rows in batches using Python
-    time.time()
-
-    # Get connection and count total rows
-    connection = op.get_bind()
-    total_count_result = connection.execute(
-        text("SELECT COUNT(*) FROM chat_message")
-    ).scalar()
-    total_count = total_count_result if total_count_result is not None else 0
-    batch_size = 5000
-    batches = 0
-
-    # Calculate total batches needed
-    total_batches = (
-        (total_count + batch_size - 1) // batch_size if total_count > 0 else 0
+    # Also add a stored tsvector column for chat_session.description
+    op.execute(
+        """
+        ALTER TABLE chat_session
+        ADD COLUMN description_tsv tsvector
+        GENERATED ALWAYS AS (to_tsvector('english', coalesce(description, ''))) STORED;
+        """
    )

-    # Process in batches - properly handling UUIDs by using OFFSET/LIMIT approach
-    for batch_num in range(total_batches):
-        offset = batch_num * batch_size
+    # Commit again before creating the second concurrent index
+    op.execute("COMMIT")

-        # Execute update for this batch using OFFSET/LIMIT which works with UUIDs
-        connection.execute(
-            text(
-                """
-            UPDATE chat_message
-            SET message_tsv = to_tsvector('english', message)
-            WHERE id IN (
-                SELECT id FROM chat_message
-                WHERE message_tsv IS NULL
-                ORDER BY id
-                LIMIT :batch_size OFFSET :offset
-            )
-            """
-            ).bindparams(batch_size=batch_size, offset=offset)
-        )
-
-        # Commit each batch
-        connection.execute(text("COMMIT"))
-        # Start a new transaction
-        connection.execute(text("BEGIN"))
-
-        batches += 1
-
-    # Final check for any remaining NULL values
-    connection.execute(
-        text(
-            """
-    UPDATE chat_message SET message_tsv = to_tsvector('english', message)
-    WHERE message_tsv IS NULL
-    """
-        )
-    )
-
-    # Create GIN index concurrently
-    connection.execute(text("COMMIT"))
-
-    time.time()
-
-    connection.execute(
-        text(
-            """
-    CREATE INDEX CONCURRENTLY IF NOT EXISTS idx_chat_message_tsv
-    ON chat_message USING GIN (message_tsv)
-    """
-        )
-    )
-
-    # First drop the trigger as it won't be needed anymore
-    connection.execute(
-        text(
-            """
-    DROP TRIGGER IF EXISTS chat_message_tsv_trigger ON chat_message;
-    """
-        )
-    )
-
-    connection.execute(
-        text(
-            """
-    DROP FUNCTION IF EXISTS update_chat_message_tsv();
-    """
-        )
-    )
-
-    # Add new generated column
-    time.time()
-    connection.execute(
-        text(
-            """
-    ALTER TABLE chat_message
-    ADD COLUMN message_tsv_gen tsvector
-    GENERATED ALWAYS AS (to_tsvector('english', message)) STORED;
-    """
-        )
-    )
-
-    connection.execute(text("COMMIT"))
-
-    time.time()
-
-    connection.execute(
-        text(
-            """
-    CREATE INDEX CONCURRENTLY IF NOT EXISTS idx_chat_message_tsv_gen
-    ON chat_message USING GIN (message_tsv_gen)
-    """
-        )
-    )
-
-    # Drop old index and column
-    connection.execute(text("COMMIT"))
-
-    connection.execute(
-        text(
-            """
-    DROP INDEX CONCURRENTLY IF EXISTS idx_chat_message_tsv;
-    """
-        )
-    )
-    connection.execute(text("COMMIT"))
-    connection.execute(
-        text(
-            """
-    ALTER TABLE chat_message DROP COLUMN message_tsv;
-    """
-        )
-    )
-
-    # Rename new column to old name
-    connection.execute(
-        text(
-            """
-    ALTER TABLE chat_message RENAME COLUMN message_tsv_gen TO message_tsv;
-    """
-        )
-    )
-
-    # --- PART 2: chat_session table ---
-
-    # Step 1: Add nullable column (quick, minimal locking)
-    time.time()
-    connection.execute(
-        text(
-            "ALTER TABLE chat_session ADD COLUMN IF NOT EXISTS description_tsv tsvector"
-        )
-    )
-
-    # Step 2: Create function and trigger for new/updated rows - SPLIT INTO SEPARATE CALLS
-    connection.execute(
-        text(
-            """
-    CREATE OR REPLACE FUNCTION update_chat_session_tsv()
-    RETURNS TRIGGER AS $$
-    BEGIN
-      NEW.description_tsv = to_tsvector('english', COALESCE(NEW.description, ''));
-      RETURN NEW;
-    END;
-    $$ LANGUAGE plpgsql
-    """
-        )
-    )
-
-    # Create trigger in a separate execute call
-    connection.execute(
-        text(
-            """
-    CREATE TRIGGER chat_session_tsv_trigger
-    BEFORE INSERT OR UPDATE ON chat_session
-    FOR EACH ROW EXECUTE FUNCTION update_chat_session_tsv()
-    """
-        )
-    )
-
-    # Step 3: Update existing rows in batches using Python
-    time.time()
-
-    # Get the maximum ID to determine batch count
-    # Cast id to text for MAX function since it's a UUID
-    max_id_result = connection.execute(
-        text("SELECT COALESCE(MAX(id::text), '0') FROM chat_session")
-    ).scalar()
-    max_id_result if max_id_result is not None else "0"
-    batch_size = 5000
-    batches = 0
-
-    # Get all IDs ordered to process in batches
-    rows = connection.execute(
-        text("SELECT id FROM chat_session ORDER BY id")
-    ).fetchall()
-    total_rows = len(rows)
-
-    # Process in batches
-    for batch_num, batch_start in enumerate(range(0, total_rows, batch_size)):
-        batch_end = min(batch_start + batch_size, total_rows)
-        batch_ids = [row[0] for row in rows[batch_start:batch_end]]
-
-        if not batch_ids:
-            continue
-
-        # Use IN clause instead of BETWEEN for UUIDs
-        placeholders = ", ".join([f":id{i}" for i in range(len(batch_ids))])
-        params = {f"id{i}": id_val for i, id_val in enumerate(batch_ids)}
-
-        # Execute update for this batch
-        connection.execute(
-            text(
-                f"""
-            UPDATE chat_session
-            SET description_tsv = to_tsvector('english', COALESCE(description, ''))
-            WHERE id IN ({placeholders})
-            AND description_tsv IS NULL
-            """
-            ).bindparams(**params)
-        )
-
-        # Commit each batch
-        connection.execute(text("COMMIT"))
-        # Start a new transaction
-        connection.execute(text("BEGIN"))
-
-        batches += 1
-
-    # Final check for any remaining NULL values
-    connection.execute(
-        text(
-            """
-    UPDATE chat_session SET description_tsv = to_tsvector('english', COALESCE(description, ''))
-    WHERE description_tsv IS NULL
-    """
-        )
-    )
-
-    # Create GIN index concurrently
-    connection.execute(text("COMMIT"))
-
-    time.time()
-    connection.execute(
-        text(
-            """
-    CREATE INDEX CONCURRENTLY IF NOT EXISTS idx_chat_session_desc_tsv
-    ON chat_session USING GIN (description_tsv)
-    """
-        )
-    )
-
-    # After Final check for chat_session
-    # First drop the trigger as it won't be needed anymore
-    connection.execute(
-        text(
-            """
-    DROP TRIGGER IF EXISTS chat_session_tsv_trigger ON chat_session;
-    """
-        )
-    )
-
-    connection.execute(
-        text(
-            """
-    DROP FUNCTION IF EXISTS update_chat_session_tsv();
-    """
-        )
-    )
-    # Add new generated column
-    time.time()
-    connection.execute(
-        text(
-            """
-    ALTER TABLE chat_session
-    ADD COLUMN description_tsv_gen tsvector
-    GENERATED ALWAYS AS (to_tsvector('english', COALESCE(description, ''))) STORED;
-    """
-        )
-    )
-
-    # Create new index on generated column
-    connection.execute(text("COMMIT"))
-
-    time.time()
-    connection.execute(
-        text(
-            """
-    CREATE INDEX CONCURRENTLY IF NOT EXISTS idx_chat_session_desc_tsv_gen
-    ON chat_session USING GIN (description_tsv_gen)
-    """
-        )
-    )
-
-    # Drop old index and column
-    connection.execute(text("COMMIT"))
-
-    connection.execute(
-        text(
-            """
-    DROP INDEX CONCURRENTLY IF EXISTS idx_chat_session_desc_tsv;
-    """
-        )
-    )
-    connection.execute(text("COMMIT"))
-    connection.execute(
-        text(
-            """
-    ALTER TABLE chat_session DROP COLUMN description_tsv;
-    """
-        )
-    )
-
-    # Rename new column to old name
-    connection.execute(
-        text(
-            """
-    ALTER TABLE chat_session RENAME COLUMN description_tsv_gen TO description_tsv;
-    """
-        )
+    op.execute(
+        """
+        CREATE INDEX CONCURRENTLY IF NOT EXISTS idx_chat_session_desc_tsv
+        ON chat_session
+        USING GIN (description_tsv)
+        """
    )


--- a/backend/ee/onyx/db/query_history.py
+++ b/backend/ee/onyx/db/query_history.py
@@ -134,9 +134,7 @@ def fetch_chat_sessions_eagerly_by_time(
    limit: int | None = 500,
    initial_time: datetime | None = None,
 ) -> list[ChatSession]:
-    """Sorted by oldest to newest, then by message id"""
-
-    asc_time_order: UnaryExpression = asc(ChatSession.time_created)
+    time_order: UnaryExpression = desc(ChatSession.time_created)
    message_order: UnaryExpression = asc(ChatMessage.id)

    filters: list[ColumnElement | BinaryExpression] = [
@@ -149,7 +147,8 @@ def fetch_chat_sessions_eagerly_by_time(
    subquery = (
        db_session.query(ChatSession.id, ChatSession.time_created)
        .filter(*filters)
-        .order_by(asc_time_order)
+        .order_by(ChatSession.id, time_order)
+        .distinct(ChatSession.id)
        .limit(limit)
        .subquery()
    )
@@ -165,7 +164,7 @@ def fetch_chat_sessions_eagerly_by_time(
                ChatMessage.chat_message_feedbacks
            ),
        )
-        .order_by(asc_time_order, message_order)
+        .order_by(time_order, message_order)
    )

    chat_sessions = query.all()
--- a/backend/ee/onyx/db/usage_export.py
+++ b/backend/ee/onyx/db/usage_export.py
@@ -16,18 +16,13 @@ from onyx.db.models import UsageReport
 from onyx.file_store.file_store import get_default_file_store


-# Gets skeletons of all messages in the given range
+# Gets skeletons of all message
 def get_empty_chat_messages_entries__paginated(
    db_session: Session,
    period: tuple[datetime, datetime],
    limit: int | None = 500,
    initial_time: datetime | None = None,
 ) -> tuple[Optional[datetime], list[ChatMessageSkeleton]]:
-    """Returns a tuple where:
-    first element is the most recent timestamp out of the sessions iterated
-    - this timestamp can be used to paginate forward in time
-    second element is a list of messages belonging to all the sessions iterated
-    """
    chat_sessions = fetch_chat_sessions_eagerly_by_time(
        start=period[0],
        end=period[1],
@@ -57,17 +52,18 @@ def get_empty_chat_messages_entries__paginated(
    if len(chat_sessions) == 0:
        return None, []

-    return chat_sessions[-1].time_created, message_skeletons
+    return chat_sessions[0].time_created, message_skeletons


 def get_all_empty_chat_message_entries(
    db_session: Session,
    period: tuple[datetime, datetime],
 ) -> Generator[list[ChatMessageSkeleton], None, None]:
-    """period is the range of time over which to fetch messages."""
    initial_time: Optional[datetime] = period[0]
+    ind = 0
    while True:
-        # iterate from oldest to newest
+        ind += 1
+
        time_created, message_skeletons = get_empty_chat_messages_entries__paginated(
            db_session,
            period,
--- a/backend/onyx/configs/app_configs.py
+++ b/backend/onyx/configs/app_configs.py
@@ -642,4 +642,14 @@ MOCK_LLM_RESPONSE = (
 )


-DEFAULT_IMAGE_ANALYSIS_MAX_SIZE_MB = 20
+# Image processing configurations
+ENABLE_IMAGE_EXTRACTION = (
+    os.environ.get("ENABLE_IMAGE_EXTRACTION", "true").lower() == "true"
+)
+ENABLE_INDEXING_TIME_IMAGE_ANALYSIS = not (
+    os.environ.get("DISABLE_INDEXING_TIME_IMAGE_ANALYSIS", "false").lower() == "true"
+)
+ENABLE_SEARCH_TIME_IMAGE_ANALYSIS = not (
+    os.environ.get("DISABLE_SEARCH_TIME_IMAGE_ANALYSIS", "false").lower() == "true"
+)
+IMAGE_ANALYSIS_MAX_SIZE_MB = int(os.environ.get("IMAGE_ANALYSIS_MAX_SIZE_MB", "20"))
--- a/backend/onyx/configs/llm_configs.py
+++ b/backend/onyx/configs/llm_configs.py
@@ -1,38 +0,0 @@
-from onyx.configs.app_configs import DEFAULT_IMAGE_ANALYSIS_MAX_SIZE_MB
-from onyx.server.settings.store import load_settings
-
-
-def get_image_extraction_and_analysis_enabled() -> bool:
-    """Get image extraction and analysis enabled setting from workspace settings or fallback to False"""
-    try:
-        settings = load_settings()
-        if settings.image_extraction_and_analysis_enabled is not None:
-            return settings.image_extraction_and_analysis_enabled
-    except Exception:
-        pass
-
-    return False
-
-
-def get_search_time_image_analysis_enabled() -> bool:
-    """Get search time image analysis enabled setting from workspace settings or fallback to False"""
-    try:
-        settings = load_settings()
-        if settings.search_time_image_analysis_enabled is not None:
-            return settings.search_time_image_analysis_enabled
-    except Exception:
-        pass
-
-    return False
-
-
-def get_image_analysis_max_size_mb() -> int:
-    """Get image analysis max size MB setting from workspace settings or fallback to environment variable"""
-    try:
-        settings = load_settings()
-        if settings.image_analysis_max_size_mb is not None:
-            return settings.image_analysis_max_size_mb
-    except Exception:
-        pass
-
-    return DEFAULT_IMAGE_ANALYSIS_MAX_SIZE_MB
--- a/backend/onyx/connectors/confluence/onyx_confluence.py
+++ b/backend/onyx/connectors/confluence/onyx_confluence.py
@@ -144,12 +144,6 @@ class OnyxConfluence:
            self.static_credentials = credential_json
            return credential_json, False

-        if not OAUTH_CONFLUENCE_CLOUD_CLIENT_ID:
-            raise RuntimeError("OAUTH_CONFLUENCE_CLOUD_CLIENT_ID must be set!")
-
-        if not OAUTH_CONFLUENCE_CLOUD_CLIENT_SECRET:
-            raise RuntimeError("OAUTH_CONFLUENCE_CLOUD_CLIENT_SECRET must be set!")
-
        # check if we should refresh tokens. we're deciding to refresh halfway
        # to expiration
        now = datetime.now(timezone.utc)
--- a/backend/onyx/connectors/vision_enabled_connector.py
+++ b/backend/onyx/connectors/vision_enabled_connector.py
@@ -1,7 +1,7 @@
 """
 Mixin for connectors that need vision capabilities.
 """
-from onyx.configs.llm_configs import get_image_extraction_and_analysis_enabled
+from onyx.configs.app_configs import ENABLE_INDEXING_TIME_IMAGE_ANALYSIS
 from onyx.llm.factory import get_default_llm_with_vision
 from onyx.llm.interfaces import LLM
 from onyx.utils.logger import setup_logger
@@ -30,7 +30,7 @@ class VisionEnabledConnector:
        Sets self.image_analysis_llm to the LLM instance or None if disabled.
        """
        self.image_analysis_llm: LLM | None = None
-        if get_image_extraction_and_analysis_enabled():
+        if ENABLE_INDEXING_TIME_IMAGE_ANALYSIS:
            try:
                self.image_analysis_llm = get_default_llm_with_vision()
                if self.image_analysis_llm is None:
--- a/backend/onyx/context/search/postprocessing/postprocessing.py
+++ b/backend/onyx/context/search/postprocessing/postprocessing.py
@@ -10,8 +10,8 @@ from langchain_core.messages import SystemMessage

 from onyx.chat.models import SectionRelevancePiece
 from onyx.configs.app_configs import BLURB_SIZE
+from onyx.configs.app_configs import ENABLE_SEARCH_TIME_IMAGE_ANALYSIS
 from onyx.configs.constants import RETURN_SEPARATOR
-from onyx.configs.llm_configs import get_search_time_image_analysis_enabled
 from onyx.configs.model_configs import CROSS_ENCODER_RANGE_MAX
 from onyx.configs.model_configs import CROSS_ENCODER_RANGE_MIN
 from onyx.context.search.enums import LLMEvaluationType
@@ -413,7 +413,7 @@ def search_postprocessing(
        # NOTE: if we don't rerank, we can return the chunks immediately
        # since we know this is the final order.
        # This way the user experience isn't delayed by the LLM step
-        if get_search_time_image_analysis_enabled():
+        if ENABLE_SEARCH_TIME_IMAGE_ANALYSIS:
            update_image_sections_with_query(
                retrieved_sections, search_query.query, llm
            )
@@ -456,7 +456,7 @@ def search_postprocessing(
            _log_top_section_links(search_query.search_type.value, reranked_sections)

            # Add the image processing step here
-            if get_search_time_image_analysis_enabled():
+            if ENABLE_SEARCH_TIME_IMAGE_ANALYSIS:
                update_image_sections_with_query(
                    reranked_sections, search_query.query, llm
                )
--- a/backend/onyx/db/seeding/chat_history_seeding.py
+++ b/backend/onyx/db/seeding/chat_history_seeding.py
@@ -1,53 +0,0 @@
-import random
-from datetime import datetime
-from datetime import timedelta
-
-from onyx.configs.constants import MessageType
-from onyx.db.chat import create_chat_session
-from onyx.db.chat import create_new_chat_message
-from onyx.db.chat import get_or_create_root_message
-from onyx.db.engine import get_session_with_current_tenant
-from onyx.db.models import ChatSession
-
-
-def seed_chat_history(num_sessions: int, num_messages: int, days: int) -> None:
-    """Utility function to seed chat history for testing.
-
-    num_sessions: the number of sessions to seed
-    num_messages: the number of messages to seed per sessions
-    days: the number of days looking backwards from the current time over which to randomize
-    the times.
-    """
-    with get_session_with_current_tenant() as db_session:
-        for y in range(0, num_sessions):
-            create_chat_session(db_session, f"pytest_session_{y}", None, None)
-
-        # randomize all session times
-        rows = db_session.query(ChatSession).all()
-        for row in rows:
-            row.time_created = datetime.utcnow() - timedelta(
-                days=random.randint(0, days)
-            )
-            row.time_updated = row.time_created + timedelta(
-                minutes=random.randint(0, 10)
-            )
-
-            root_message = get_or_create_root_message(row.id, db_session)
-
-            for x in range(0, num_messages):
-                chat_message = create_new_chat_message(
-                    row.id,
-                    root_message,
-                    f"pytest_message_{x}",
-                    None,
-                    0,
-                    MessageType.USER,
-                    db_session,
-                )
-
-                chat_message.time_sent = row.time_created + timedelta(
-                    minutes=random.randint(0, 10)
-                )
-            db_session.commit()
-
-        db_session.commit()
--- a/backend/onyx/server/settings/models.py
+++ b/backend/onyx/server/settings/models.py
@@ -53,11 +53,6 @@ class Settings(BaseModel):
    auto_scroll: bool | None = False
    query_history_type: QueryHistoryType | None = None

-    # Image processing settings
-    image_extraction_and_analysis_enabled: bool | None = False
-    search_time_image_analysis_enabled: bool | None = False
-    image_analysis_max_size_mb: int | None = 20
-

 class UserSettings(Settings):
    notifications: list[Notification]
--- a/backend/onyx/server/settings/store.py
+++ b/backend/onyx/server/settings/store.py
@@ -47,7 +47,6 @@ def load_settings() -> Settings:

    settings.anonymous_user_enabled = anonymous_user_enabled
    settings.query_history_type = ONYX_QUERY_HISTORY_TYPE
-
    return settings


--- a/backend/scripts/chat_history_seeding.py
+++ b/backend/scripts/chat_history_seeding.py
@@ -1,45 +0,0 @@
-import argparse
-import logging
-from logging import getLogger
-
-from onyx.db.seeding.chat_history_seeding import seed_chat_history
-
-# Configure the logger
-logging.basicConfig(
-    level=logging.INFO,  # Set the log level (DEBUG, INFO, WARNING, ERROR, CRITICAL)
-    format="%(asctime)s - %(name)s - %(levelname)s - %(message)s",  # Log format
-    handlers=[logging.StreamHandler()],  # Output logs to console
-)
-
-logger = getLogger(__name__)
-
-
-def go_main(num_sessions: int, num_messages: int, num_days: int) -> None:
-    seed_chat_history(num_sessions, num_messages, num_days)
-
-
-if __name__ == "__main__":
-    parser = argparse.ArgumentParser(description="Seed chat history")
-    parser.add_argument(
-        "--sessions",
-        type=int,
-        default=2048,
-        help="Number of chat sessions to seed",
-    )
-
-    parser.add_argument(
-        "--messages",
-        type=int,
-        default=4,
-        help="Number of chat messages to seed per session",
-    )
-
-    parser.add_argument(
-        "--days",
-        type=int,
-        default=90,
-        help="Number of days looking backwards over which to seed the timestamps with",
-    )
-
-    args = parser.parse_args()
-    go_main(args.sessions, args.messages, args.days)
--- a/backend/tests/integration/tests/query_history/test_usage_reports.py
+++ b/backend/tests/integration/tests/query_history/test_usage_reports.py
@@ -1,46 +0,0 @@
-from datetime import datetime
-from datetime import timedelta
-from datetime import timezone
-
-from ee.onyx.db.usage_export import get_all_empty_chat_message_entries
-from onyx.db.engine import get_session_with_current_tenant
-from onyx.db.seeding.chat_history_seeding import seed_chat_history
-
-
-def test_usage_reports(reset: None) -> None:
-    EXPECTED_SESSIONS = 2048
-    MESSAGES_PER_SESSION = 4
-    EXPECTED_MESSAGES = EXPECTED_SESSIONS * MESSAGES_PER_SESSION
-
-    seed_chat_history(EXPECTED_SESSIONS, MESSAGES_PER_SESSION, 90)
-
-    with get_session_with_current_tenant() as db_session:
-        # count of all entries should be exact
-        period = (
-            datetime.fromtimestamp(0, tz=timezone.utc),
-            datetime.now(tz=timezone.utc),
-        )
-
-        count = 0
-        for entry_batch in get_all_empty_chat_message_entries(db_session, period):
-            for entry in entry_batch:
-                count += 1
-
-        assert count == EXPECTED_MESSAGES
-
-        # count in a one month time range should be within a certain range statistically
-        # this can be improved if we seed the chat history data deterministically
-        period = (
-            datetime.now(tz=timezone.utc) - timedelta(days=30),
-            datetime.now(tz=timezone.utc),
-        )
-
-        count = 0
-        for entry_batch in get_all_empty_chat_message_entries(db_session, period):
-            for entry in entry_batch:
-                count += 1
-
-        lower = EXPECTED_MESSAGES // 3 - (EXPECTED_MESSAGES // (3 * 3))
-        upper = EXPECTED_MESSAGES // 3 + (EXPECTED_MESSAGES // (3 * 3))
-        assert count > lower
-        assert count < upper
--- a/deployment/README.md
+++ b/deployment/README.md
@@ -80,3 +80,13 @@ prod cluster**
   - `kubectl delete -f .`
   - To not delete the persistent volumes (Document indexes and Users), specify the specific `.yaml` files instead of
     `.` without specifying delete on persistent-volumes.yaml.
+
+### Using Helm to deploy to an existing cluster
+
+Onyx has a helm chart that is convenient to install all services to an existing Kubernetes cluster. To install:
+
+* Currently the helm chart is not published so to install, clone the repo.
+* Configure access to the cluster via kubectl. Ensure the kubectl context is set to the cluster that you want to use
+* The default secrets, environment variables and other service level configuration are stored in `deployment/helm/charts/onyx/values.yml`. You may create another `override.yml`
+* `cd deployment/helm/charts/onyx` and run `helm install onyx -n onyx -f override.yaml .`. This will install onyx on the cluster under the `onyx` namespace.
+* Check the status of the deploy using `kubectl get pods -n onyx`
--- a/deployment/helm/charts/onyx/templates/ingress-api.yaml
+++ b/deployment/helm/charts/onyx/templates/ingress-api.yaml
@@ -0,0 +1,27 @@
+{{- if .Values.ingress.enabled -}}
+apiVersion: networking.k8s.io/v1
+kind: Ingress
+metadata:
+  name: {{ include "onyx-stack.fullname" . }}-ingress-api
+  annotations:
+    kubernetes.io/ingress.class: nginx
+    nginx.ingress.kubernetes.io/rewrite-target: /$2
+    nginx.ingress.kubernetes.io/use-regex: "true"
+    cert-manager.io/cluster-issuer: {{ include "onyx-stack.fullname" . }}-letsencrypt
+spec:
+  rules:
+    - host: {{ .Values.ingress.api.host }}
+      http:
+        paths:
+          - path: /api(/|$)(.*)
+            pathType: Prefix
+            backend:
+              service:
+                name: {{ include "onyx-stack.fullname" . }}-api-service
+                port:
+                  number: {{ .Values.api.service.servicePort }}
+  tls:
+    - hosts:
+        - {{ .Values.ingress.api.host }}
+      secretName: {{ include "onyx-stack.fullname" . }}-ingress-api-tls
+{{- end }}
--- a/deployment/helm/charts/onyx/templates/ingress-webserver.yaml
+++ b/deployment/helm/charts/onyx/templates/ingress-webserver.yaml
@@ -0,0 +1,26 @@
+{{- if .Values.ingress.enabled -}}
+apiVersion: networking.k8s.io/v1
+kind: Ingress
+metadata:
+  name: {{ include "onyx-stack.fullname" . }}-ingress-webserver
+  annotations:
+    kubernetes.io/ingress.class: nginx
+    cert-manager.io/cluster-issuer: {{ include "onyx-stack.fullname" . }}-letsencrypt
+    kubernetes.io/tls-acme: "true"
+spec:
+  rules:
+    - host: {{ .Values.ingress.webserver.host }}
+      http:
+        paths:
+          - path: /
+            pathType: Prefix
+            backend:
+              service:
+                name: {{ include "onyx-stack.fullname" . }}-webserver
+                port:
+                  number: {{ .Values.webserver.service.servicePort }}
+  tls:
+    - hosts:
+        - {{ .Values.ingress.webserver.host }}
+      secretName: {{ include "onyx-stack.fullname" . }}-ingress-webserver-tls
+{{- end }}
--- a/deployment/helm/charts/onyx/templates/lets-encrypt.yaml
+++ b/deployment/helm/charts/onyx/templates/lets-encrypt.yaml
@@ -0,0 +1,20 @@
+{{- if .Values.letsencrypt.enabled -}}
+apiVersion: cert-manager.io/v1
+kind: ClusterIssuer
+metadata:
+  name: {{ include "onyx-stack.fullname" . }}-letsencrypt
+spec:
+  acme:
+    # The ACME server URL
+    server: https://acme-v02.api.letsencrypt.org/directory
+    # Email address used for ACME registration
+    email: {{ .Values.letsencrypt.email }}
+    # Name of a secret used to store the ACME account private key
+    privateKeySecretRef:
+      name: {{ include "onyx-stack.fullname" . }}-letsencrypt
+    # Enable the HTTP-01 challenge provider
+    solvers:
+      - http01:
+          ingress:
+            class: nginx
+{{- end }}
--- a/deployment/helm/charts/onyx/values.yaml
+++ b/deployment/helm/charts/onyx/values.yaml
@@ -376,22 +376,17 @@ redis:
    existingSecret: onyx-secrets
    existingSecretPasswordKey: redis_password

-# ingress:
-#  enabled: false
-#  className: ""
-#  annotations: {}
-#    # kubernetes.io/ingress.class: nginx
-#    # kubernetes.io/tls-acme: "true"
-#  hosts:
-#    - host: chart-example.local
-#      paths:
-#        - path: /
-#          pathType: ImplementationSpecific
-#  tls: []
-#  #  - secretName: chart-example-tls
-#  #    hosts:
-#  #      - chart-example.local
+ingress:
+  enabled: false
+  className: ""
+  api:
+    host: onyx.local
+  webserver:
+    host: onyx.local

+letsencrypt:
+  enabled: false
+  email: "abc@abc.com"

 auth:
  # existingSecret onyx-secret for storing smtp, oauth, slack, and other secrets
--- a/web/src/app/admin/settings/SettingsForm.tsx
+++ b/web/src/app/admin/settings/SettingsForm.tsx
@@ -26,7 +26,7 @@ export function Checkbox({
  onChange: (e: React.ChangeEvent<HTMLInputElement>) => void;
 }) {
  return (
-    <label className="flex text-xs cursor-pointer">
+    <label className="flex text-sm cursor-pointer">
      <input
        checked={checked}
        onChange={onChange}
@@ -34,7 +34,7 @@ export function Checkbox({
        className="mr-2 w-3.5 h-3.5 my-auto"
      />
      <div>
-        <Label small>{label}</Label>
+        <Label>{label}</Label>
        {sublabel && <SubLabel>{sublabel}</SubLabel>}
      </div>
    </label>
@@ -208,7 +208,7 @@ export function SettingsForm() {
  }

  return (
-    <div className="flex flex-col pb-8">
+    <div>
      {popup}
      <Title className="mb-4">Workspace Settings</Title>
      <Checkbox
@@ -307,51 +307,6 @@ export function SettingsForm() {
          </Button>
        </>
      )}
-
-      {/* Image Processing Settings */}
-      <Title className="mt-8 mb-4">Image Processing</Title>
-
-      <div className="flex flex-col gap-2">
-        <Checkbox
-          label="Enable Image Extraction and Analysis"
-          sublabel="Extract and analyze images from documents during indexing. This allows the system to process images and create searchable descriptions of them."
-          checked={settings.image_extraction_and_analysis_enabled ?? false}
-          onChange={(e) =>
-            handleToggleSettingsField(
-              "image_extraction_and_analysis_enabled",
-              e.target.checked
-            )
-          }
-        />
-
-        <Checkbox
-          label="Enable Search-time Image Analysis"
-          sublabel="Analyze images at search time when a user asks about images. This provides more detailed and query-specific image analysis but may increase search-time latency."
-          checked={settings.search_time_image_analysis_enabled ?? false}
-          onChange={(e) =>
-            handleToggleSettingsField(
-              "search_time_image_analysis_enabled",
-              e.target.checked
-            )
-          }
-        />
-
-        <IntegerInput
-          label="Maximum Image Size for Analysis (MB)"
-          sublabel="Images larger than this size will not be analyzed to prevent excessive resource usage."
-          value={settings.image_analysis_max_size_mb ?? null}
-          onChange={(e) => {
-            const value = e.target.value ? parseInt(e.target.value) : null;
-            if (value !== null && !isNaN(value) && value > 0) {
-              updateSettingField([
-                { fieldName: "image_analysis_max_size_mb", newValue: value },
-              ]);
-            }
-          }}
-          id="image-analysis-max-size"
-          placeholder="Enter maximum size in MB"
-        />
-      </div>
    </div>
  );
 }
--- a/web/src/app/admin/settings/interfaces.ts
+++ b/web/src/app/admin/settings/interfaces.ts
@@ -21,11 +21,6 @@ export interface Settings {
  auto_scroll: boolean;
  temperature_override_enabled: boolean;
  query_history_type: QueryHistoryType;
-
-  // Image processing settings
-  image_extraction_and_analysis_enabled?: boolean;
-  search_time_image_analysis_enabled?: boolean;
-  image_analysis_max_size_mb?: number;
 }

 export enum NotificationType {
Author	SHA1	Message	Date
Richard Kuo (Danswer)	f6225fa8fc	fix linting	2025-03-05 12:03:06 -08:00
Richard Kuo (Danswer)	1feb812724	Merge branch 'helm-updates' of https://github.com/geohacker/onyx into feature/helm-updates	2025-03-05 11:22:20 -08:00
Sajjad Anwar	dcaa634b7f	fix backend labels. configure nginx routes. update annotations	2025-03-05 08:59:02 +05:30
Sajjad Anwar	63bd1253e7	use pathType ImplementationSpecific as Prefix is deprecated	2025-03-05 08:09:19 +05:30
Sajjad Anwar	64f9178746	add letsencrypt. close blocks	2025-03-05 08:07:09 +05:30
Sajjad Anwar	01a0ce3ced	helm setup docs	2025-03-05 07:56:04 +05:30
Sajjad Anwar	092728271c	add ingress for api and web	2025-02-21 17:32:23 -05:00