fix

ensure checks pass
remove unnecessary logs
2026-02-17 15:55:45 +00:00 · 2025-03-06 14:46:20 -08:00 · 2025-03-06 14:46:20 -08:00 · 2025-03-06 14:46:20 -08:00 · 2025-03-06 14:46:19 -08:00 · 2025-03-06 14:46:19 -08:00
30 changed files with 567 additions and 88 deletions
--- a/backend/alembic/versions/3934b1bc7b62_update_github_connector_repo_name_to_.py
+++ b/backend/alembic/versions/3934b1bc7b62_update_github_connector_repo_name_to_.py
@@ -0,0 +1,125 @@
+"""Update GitHub connector repo_name to repositories
+
+Revision ID: 3934b1bc7b62
+Revises: b7c2b63c4a03
+Create Date: 2025-03-05 10:50:30.516962
+
+"""
+from alembic import op
+import sqlalchemy as sa
+import json
+import logging
+
+# revision identifiers, used by Alembic.
+revision = "3934b1bc7b62"
+down_revision = "b7c2b63c4a03"
+branch_labels = None
+depends_on = None
+
+logger = logging.getLogger("alembic.runtime.migration")
+
+
+def upgrade() -> None:
+    # Get all GitHub connectors
+    conn = op.get_bind()
+
+    # First get all GitHub connectors
+    github_connectors = conn.execute(
+        sa.text(
+            """
+            SELECT id, connector_specific_config
+            FROM connector
+            WHERE source = 'GITHUB'
+            """
+        )
+    ).fetchall()
+
+    # Update each connector's config
+    updated_count = 0
+    for connector_id, config in github_connectors:
+        try:
+            if not config:
+                logger.warning(f"Connector {connector_id} has no config, skipping")
+                continue
+
+            # Parse the config if it's a string
+            if isinstance(config, str):
+                config = json.loads(config)
+
+            if "repo_name" not in config:
+                continue
+
+            # Create new config with repositories instead of repo_name
+            new_config = dict(config)
+            repo_name_value = new_config.pop("repo_name")
+            new_config["repositories"] = repo_name_value
+
+            # Update the connector with the new config
+            conn.execute(
+                sa.text(
+                    """
+                    UPDATE connector
+                    SET connector_specific_config = :new_config
+                    WHERE id = :connector_id
+                    """
+                ),
+                {"connector_id": connector_id, "new_config": json.dumps(new_config)},
+            )
+            updated_count += 1
+        except Exception as e:
+            logger.error(f"Error updating connector {connector_id}: {str(e)}")
+
+
+def downgrade() -> None:
+    # Get all GitHub connectors
+    conn = op.get_bind()
+
+    logger.debug(
+        "Starting rollback of GitHub connectors from repositories to repo_name"
+    )
+
+    github_connectors = conn.execute(
+        sa.text(
+            """
+            SELECT id, connector_specific_config
+            FROM connector
+            WHERE source = 'GITHUB'
+            """
+        )
+    ).fetchall()
+
+    logger.debug(f"Found {len(github_connectors)} GitHub connectors to rollback")
+
+    # Revert each GitHub connector to use repo_name instead of repositories
+    reverted_count = 0
+    for connector_id, config in github_connectors:
+        try:
+            if not config:
+                continue
+
+            # Parse the config if it's a string
+            if isinstance(config, str):
+                config = json.loads(config)
+
+            if "repositories" not in config:
+                continue
+
+            # Create new config with repo_name instead of repositories
+            new_config = dict(config)
+            repositories_value = new_config.pop("repositories")
+            new_config["repo_name"] = repositories_value
+
+            # Update the connector with the new config
+            conn.execute(
+                sa.text(
+                    """
+                    UPDATE connector
+                    SET connector_specific_config = :new_config
+                    WHERE id = :connector_id
+                    """
+                ),
+                {"new_config": json.dumps(new_config), "connector_id": connector_id},
+            )
+            reverted_count += 1
+        except Exception as e:
+            logger.error(f"Error reverting connector {connector_id}: {str(e)}")
--- a/backend/ee/onyx/db/query_history.py
+++ b/backend/ee/onyx/db/query_history.py
@@ -134,7 +134,9 @@ def fetch_chat_sessions_eagerly_by_time(
    limit: int | None = 500,
    initial_time: datetime | None = None,
 ) -> list[ChatSession]:
-    time_order: UnaryExpression = desc(ChatSession.time_created)
+    """Sorted by oldest to newest, then by message id"""
+
+    asc_time_order: UnaryExpression = asc(ChatSession.time_created)
    message_order: UnaryExpression = asc(ChatMessage.id)

    filters: list[ColumnElement | BinaryExpression] = [
@@ -147,8 +149,7 @@ def fetch_chat_sessions_eagerly_by_time(
    subquery = (
        db_session.query(ChatSession.id, ChatSession.time_created)
        .filter(*filters)
-        .order_by(ChatSession.id, time_order)
-        .distinct(ChatSession.id)
+        .order_by(asc_time_order)
        .limit(limit)
        .subquery()
    )
@@ -164,7 +165,7 @@ def fetch_chat_sessions_eagerly_by_time(
                ChatMessage.chat_message_feedbacks
            ),
        )
-        .order_by(time_order, message_order)
+        .order_by(asc_time_order, message_order)
    )

    chat_sessions = query.all()
--- a/backend/ee/onyx/db/usage_export.py
+++ b/backend/ee/onyx/db/usage_export.py
@@ -16,13 +16,18 @@ from onyx.db.models import UsageReport
 from onyx.file_store.file_store import get_default_file_store


-# Gets skeletons of all message
+# Gets skeletons of all messages in the given range
 def get_empty_chat_messages_entries__paginated(
    db_session: Session,
    period: tuple[datetime, datetime],
    limit: int | None = 500,
    initial_time: datetime | None = None,
 ) -> tuple[Optional[datetime], list[ChatMessageSkeleton]]:
+    """Returns a tuple where:
+    first element is the most recent timestamp out of the sessions iterated
+    - this timestamp can be used to paginate forward in time
+    second element is a list of messages belonging to all the sessions iterated
+    """
    chat_sessions = fetch_chat_sessions_eagerly_by_time(
        start=period[0],
        end=period[1],
@@ -52,18 +57,17 @@ def get_empty_chat_messages_entries__paginated(
    if len(chat_sessions) == 0:
        return None, []

-    return chat_sessions[0].time_created, message_skeletons
+    return chat_sessions[-1].time_created, message_skeletons


 def get_all_empty_chat_message_entries(
    db_session: Session,
    period: tuple[datetime, datetime],
 ) -> Generator[list[ChatMessageSkeleton], None, None]:
+    """period is the range of time over which to fetch messages."""
    initial_time: Optional[datetime] = period[0]
-    ind = 0
    while True:
-        ind += 1
-
+        # iterate from oldest to newest
        time_created, message_skeletons = get_empty_chat_messages_entries__paginated(
            db_session,
            period,
--- a/backend/ee/onyx/main.py
+++ b/backend/ee/onyx/main.py
@@ -15,7 +15,7 @@ from ee.onyx.server.enterprise_settings.api import (
 )
 from ee.onyx.server.manage.standard_answer import router as standard_answer_router
 from ee.onyx.server.middleware.tenant_tracking import add_tenant_id_middleware
-from ee.onyx.server.oauth.api import router as oauth_router
+from ee.onyx.server.oauth.api import router as ee_oauth_router
 from ee.onyx.server.query_and_chat.chat_backend import (
    router as chat_router,
 )
@@ -128,7 +128,7 @@ def get_application() -> FastAPI:
    include_router_with_global_prefix_prepended(application, query_router)
    include_router_with_global_prefix_prepended(application, chat_router)
    include_router_with_global_prefix_prepended(application, standard_answer_router)
-    include_router_with_global_prefix_prepended(application, oauth_router)
+    include_router_with_global_prefix_prepended(application, ee_oauth_router)

    # Enterprise-only global settings
    include_router_with_global_prefix_prepended(
--- a/backend/ee/onyx/server/oauth/confluence_cloud.py
+++ b/backend/ee/onyx/server/oauth/confluence_cloud.py
@@ -80,6 +80,7 @@ class ConfluenceCloudOAuth:
        "search:confluence%20"
        # granular scope
        "read:attachment:confluence%20"  # possibly unneeded unless calling v2 attachments api
+        "read:content-details:confluence%20"  # for permission sync
        "offline_access"
    )

--- a/backend/ee/onyx/server/tenants/product_gating.py
+++ b/backend/ee/onyx/server/tenants/product_gating.py
@@ -48,4 +48,5 @@ def store_product_gating(tenant_id: str, application_status: ApplicationStatus)

 def get_gated_tenants() -> set[str]:
    redis_client = get_redis_replica_client(tenant_id=ONYX_CLOUD_TENANT_ID)
-    return cast(set[str], redis_client.smembers(GATED_TENANTS_KEY))
+    gated_tenants_bytes = cast(set[bytes], redis_client.smembers(GATED_TENANTS_KEY))
+    return {tenant_id.decode("utf-8") for tenant_id in gated_tenants_bytes}
--- a/backend/ee/onyx/server/tenants/provisioning.py
+++ b/backend/ee/onyx/server/tenants/provisioning.py
@@ -55,7 +55,11 @@ logger = logging.getLogger(__name__)
 async def get_or_provision_tenant(
    email: str, referral_source: str | None = None, request: Request | None = None
 ) -> str:
-    """Get existing tenant ID for an email or create a new tenant if none exists."""
+    """
+    Get existing tenant ID for an email or create a new tenant if none exists.
+    This function should only be called after we have verified we want this user's tenant to exist.
+    It returns the tenant ID associated with the email, creating a new tenant if necessary.
+    """
    if not MULTI_TENANT:
        return POSTGRES_DEFAULT_SCHEMA

--- a/backend/onyx/auth/users.py
+++ b/backend/onyx/auth/users.py
@@ -587,14 +587,20 @@ class UserManager(UUIDIDMixin, BaseUserManager[User, uuid.UUID]):
    ) -> Optional[User]:
        email = credentials.username

-        # Get tenant_id from mapping table
-        tenant_id = await fetch_ee_implementation_or_noop(
-            "onyx.server.tenants.provisioning",
-            "get_or_provision_tenant",
-            async_return_default_schema,
-        )(
-            email=email,
-        )
+        tenant_id: str | None = None
+        try:
+            tenant_id = fetch_ee_implementation_or_noop(
+                "onyx.server.tenants.provisioning",
+                "get_tenant_id_for_email",
+                None,
+            )(
+                email=email,
+            )
+        except Exception as e:
+            logger.warning(
+                f"User attempted to login with invalid credentials: {str(e)}"
+            )
+
        if not tenant_id:
            # User not found in mapping
            self.password_helper.hash(credentials.password)
--- a/backend/onyx/connectors/confluence/connector.py
+++ b/backend/onyx/connectors/confluence/connector.py
@@ -240,7 +240,7 @@ class ConfluenceConnector(
            # Extract basic page information
            page_id = page["id"]
            page_title = page["title"]
-            page_url = f"{self.wiki_base}/wiki{page['_links']['webui']}"
+            page_url = f"{self.wiki_base}{page['_links']['webui']}"

            # Get the page content
            page_content = extract_text_from_confluence_html(
--- a/backend/onyx/connectors/confluence/onyx_confluence.py
+++ b/backend/onyx/connectors/confluence/onyx_confluence.py
@@ -144,6 +144,12 @@ class OnyxConfluence:
            self.static_credentials = credential_json
            return credential_json, False

+        if not OAUTH_CONFLUENCE_CLOUD_CLIENT_ID:
+            raise RuntimeError("OAUTH_CONFLUENCE_CLOUD_CLIENT_ID must be set!")
+
+        if not OAUTH_CONFLUENCE_CLOUD_CLIENT_SECRET:
+            raise RuntimeError("OAUTH_CONFLUENCE_CLOUD_CLIENT_SECRET must be set!")
+
        # check if we should refresh tokens. we're deciding to refresh halfway
        # to expiration
        now = datetime.now(timezone.utc)
--- a/backend/onyx/connectors/github/connector.py
+++ b/backend/onyx/connectors/github/connector.py
@@ -124,14 +124,14 @@ class GithubConnector(LoadConnector, PollConnector):
    def __init__(
        self,
        repo_owner: str,
-        repo_name: str | None = None,
+        repositories: str | None = None,
        batch_size: int = INDEX_BATCH_SIZE,
        state_filter: str = "all",
        include_prs: bool = True,
        include_issues: bool = False,
    ) -> None:
        self.repo_owner = repo_owner
-        self.repo_name = repo_name
+        self.repositories = repositories
        self.batch_size = batch_size
        self.state_filter = state_filter
        self.include_prs = include_prs
@@ -157,11 +157,42 @@ class GithubConnector(LoadConnector, PollConnector):
            )

        try:
-            return github_client.get_repo(f"{self.repo_owner}/{self.repo_name}")
+            return github_client.get_repo(f"{self.repo_owner}/{self.repositories}")
        except RateLimitExceededException:
            _sleep_after_rate_limit_exception(github_client)
            return self._get_github_repo(github_client, attempt_num + 1)

+    def _get_github_repos(
+        self, github_client: Github, attempt_num: int = 0
+    ) -> list[Repository.Repository]:
+        """Get specific repositories based on comma-separated repo_name string."""
+        if attempt_num > _MAX_NUM_RATE_LIMIT_RETRIES:
+            raise RuntimeError(
+                "Re-tried fetching repos too many times. Something is going wrong with fetching objects from Github"
+            )
+
+        try:
+            repos = []
+            # Split repo_name by comma and strip whitespace
+            repo_names = [
+                name.strip() for name in (cast(str, self.repositories)).split(",")
+            ]
+
+            for repo_name in repo_names:
+                if repo_name:  # Skip empty strings
+                    try:
+                        repo = github_client.get_repo(f"{self.repo_owner}/{repo_name}")
+                        repos.append(repo)
+                    except GithubException as e:
+                        logger.warning(
+                            f"Could not fetch repo {self.repo_owner}/{repo_name}: {e}"
+                        )
+
+            return repos
+        except RateLimitExceededException:
+            _sleep_after_rate_limit_exception(github_client)
+            return self._get_github_repos(github_client, attempt_num + 1)
+
    def _get_all_repos(
        self, github_client: Github, attempt_num: int = 0
    ) -> list[Repository.Repository]:
@@ -189,11 +220,17 @@ class GithubConnector(LoadConnector, PollConnector):
        if self.github_client is None:
            raise ConnectorMissingCredentialError("GitHub")

-        repos = (
-            [self._get_github_repo(self.github_client)]
-            if self.repo_name
-            else self._get_all_repos(self.github_client)
-        )
+        repos = []
+        if self.repositories:
+            if "," in self.repositories:
+                # Multiple repositories specified
+                repos = self._get_github_repos(self.github_client)
+            else:
+                # Single repository (backward compatibility)
+                repos = [self._get_github_repo(self.github_client)]
+        else:
+            # All repositories
+            repos = self._get_all_repos(self.github_client)

        for repo in repos:
            if self.include_prs:
@@ -268,11 +305,48 @@ class GithubConnector(LoadConnector, PollConnector):
            )

        try:
-            if self.repo_name:
-                test_repo = self.github_client.get_repo(
-                    f"{self.repo_owner}/{self.repo_name}"
-                )
-                test_repo.get_contents("")
+            if self.repositories:
+                if "," in self.repositories:
+                    # Multiple repositories specified
+                    repo_names = [name.strip() for name in self.repositories.split(",")]
+                    if not repo_names:
+                        raise ConnectorValidationError(
+                            "Invalid connector settings: No valid repository names provided."
+                        )
+
+                    # Validate at least one repository exists and is accessible
+                    valid_repos = False
+                    validation_errors = []
+
+                    for repo_name in repo_names:
+                        if not repo_name:
+                            continue
+
+                        try:
+                            test_repo = self.github_client.get_repo(
+                                f"{self.repo_owner}/{repo_name}"
+                            )
+                            test_repo.get_contents("")
+                            valid_repos = True
+                            # If at least one repo is valid, we can proceed
+                            break
+                        except GithubException as e:
+                            validation_errors.append(
+                                f"Repository '{repo_name}': {e.data.get('message', str(e))}"
+                            )
+
+                    if not valid_repos:
+                        error_msg = (
+                            "None of the specified repositories could be accessed: "
+                        )
+                        error_msg += ", ".join(validation_errors)
+                        raise ConnectorValidationError(error_msg)
+                else:
+                    # Single repository (backward compatibility)
+                    test_repo = self.github_client.get_repo(
+                        f"{self.repo_owner}/{self.repositories}"
+                    )
+                    test_repo.get_contents("")
            else:
                # Try to get organization first
                try:
@@ -298,10 +372,15 @@ class GithubConnector(LoadConnector, PollConnector):
                    "Your GitHub token does not have sufficient permissions for this repository (HTTP 403)."
                )
            elif e.status == 404:
-                if self.repo_name:
-                    raise ConnectorValidationError(
-                        f"GitHub repository not found with name: {self.repo_owner}/{self.repo_name}"
-                    )
+                if self.repositories:
+                    if "," in self.repositories:
+                        raise ConnectorValidationError(
+                            f"None of the specified GitHub repositories could be found for owner: {self.repo_owner}"
+                        )
+                    else:
+                        raise ConnectorValidationError(
+                            f"GitHub repository not found with name: {self.repo_owner}/{self.repositories}"
+                        )
                else:
                    raise ConnectorValidationError(
                        f"GitHub user or organization not found: {self.repo_owner}"
@@ -310,6 +389,7 @@ class GithubConnector(LoadConnector, PollConnector):
                raise ConnectorValidationError(
                    f"Unexpected GitHub error (status={e.status}): {e.data}"
                )
+
        except Exception as exc:
            raise Exception(
                f"Unexpected error during GitHub settings validation: {exc}"
@@ -321,7 +401,7 @@ if __name__ == "__main__":

    connector = GithubConnector(
        repo_owner=os.environ["REPO_OWNER"],
-        repo_name=os.environ["REPO_NAME"],
+        repositories=os.environ["REPOSITORIES"],
    )
    connector.load_credentials(
        {"github_access_token": os.environ["GITHUB_ACCESS_TOKEN"]}
--- a/backend/onyx/db/seeding/chat_history_seeding.py
+++ b/backend/onyx/db/seeding/chat_history_seeding.py
@@ -0,0 +1,53 @@
+import random
+from datetime import datetime
+from datetime import timedelta
+
+from onyx.configs.constants import MessageType
+from onyx.db.chat import create_chat_session
+from onyx.db.chat import create_new_chat_message
+from onyx.db.chat import get_or_create_root_message
+from onyx.db.engine import get_session_with_current_tenant
+from onyx.db.models import ChatSession
+
+
+def seed_chat_history(num_sessions: int, num_messages: int, days: int) -> None:
+    """Utility function to seed chat history for testing.
+
+    num_sessions: the number of sessions to seed
+    num_messages: the number of messages to seed per sessions
+    days: the number of days looking backwards from the current time over which to randomize
+    the times.
+    """
+    with get_session_with_current_tenant() as db_session:
+        for y in range(0, num_sessions):
+            create_chat_session(db_session, f"pytest_session_{y}", None, None)
+
+        # randomize all session times
+        rows = db_session.query(ChatSession).all()
+        for row in rows:
+            row.time_created = datetime.utcnow() - timedelta(
+                days=random.randint(0, days)
+            )
+            row.time_updated = row.time_created + timedelta(
+                minutes=random.randint(0, 10)
+            )
+
+            root_message = get_or_create_root_message(row.id, db_session)
+
+            for x in range(0, num_messages):
+                chat_message = create_new_chat_message(
+                    row.id,
+                    root_message,
+                    f"pytest_message_{x}",
+                    None,
+                    0,
+                    MessageType.USER,
+                    db_session,
+                )
+
+                chat_message.time_sent = row.time_created + timedelta(
+                    minutes=random.randint(0, 10)
+                )
+            db_session.commit()
+
+        db_session.commit()
--- a/backend/onyx/indexing/indexing_pipeline.py
+++ b/backend/onyx/indexing/indexing_pipeline.py
@@ -464,12 +464,29 @@ def index_doc_batch(
            ),
        )

-        successful_doc_ids = {record.document_id for record in insertion_records}
-        if successful_doc_ids != set(updatable_ids):
+        all_returned_doc_ids = (
+            {record.document_id for record in insertion_records}
+            .union(
+                {
+                    record.failed_document.document_id
+                    for record in vector_db_write_failures
+                    if record.failed_document
+                }
+            )
+            .union(
+                {
+                    record.failed_document.document_id
+                    for record in embedding_failures
+                    if record.failed_document
+                }
+            )
+        )
+        if all_returned_doc_ids != set(updatable_ids):
            raise RuntimeError(
                f"Some documents were not successfully indexed. "
                f"Updatable IDs: {updatable_ids}, "
-                f"Successful IDs: {successful_doc_ids}"
+                f"Returned IDs: {all_returned_doc_ids}. "
+                "This should never happen."
            )

        last_modified_ids = []
--- a/backend/onyx/main.py
+++ b/backend/onyx/main.py
@@ -51,6 +51,7 @@ from onyx.server.documents.cc_pair import router as cc_pair_router
 from onyx.server.documents.connector import router as connector_router
 from onyx.server.documents.credential import router as credential_router
 from onyx.server.documents.document import router as document_router
+from onyx.server.documents.standard_oauth import router as standard_oauth_router
 from onyx.server.features.document_set.api import router as document_set_router
 from onyx.server.features.folder.api import router as folder_router
 from onyx.server.features.input_prompt.api import (
@@ -322,6 +323,7 @@ def get_application() -> FastAPI:
    )
    include_router_with_global_prefix_prepended(application, long_term_logs_router)
    include_router_with_global_prefix_prepended(application, api_key_router)
+    include_router_with_global_prefix_prepended(application, standard_oauth_router)

    if AUTH_TYPE == AuthType.DISABLED:
        # Server logs this during auth setup verification step
--- a/backend/scripts/chat_history_seeding.py
+++ b/backend/scripts/chat_history_seeding.py
@@ -0,0 +1,45 @@
+import argparse
+import logging
+from logging import getLogger
+
+from onyx.db.seeding.chat_history_seeding import seed_chat_history
+
+# Configure the logger
+logging.basicConfig(
+    level=logging.INFO,  # Set the log level (DEBUG, INFO, WARNING, ERROR, CRITICAL)
+    format="%(asctime)s - %(name)s - %(levelname)s - %(message)s",  # Log format
+    handlers=[logging.StreamHandler()],  # Output logs to console
+)
+
+logger = getLogger(__name__)
+
+
+def go_main(num_sessions: int, num_messages: int, num_days: int) -> None:
+    seed_chat_history(num_sessions, num_messages, num_days)
+
+
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser(description="Seed chat history")
+    parser.add_argument(
+        "--sessions",
+        type=int,
+        default=2048,
+        help="Number of chat sessions to seed",
+    )
+
+    parser.add_argument(
+        "--messages",
+        type=int,
+        default=4,
+        help="Number of chat messages to seed per session",
+    )
+
+    parser.add_argument(
+        "--days",
+        type=int,
+        default=90,
+        help="Number of days looking backwards over which to seed the timestamps with",
+    )
+
+    args = parser.parse_args()
+    go_main(args.sessions, args.messages, args.days)
--- a/backend/tests/daily/connectors/confluence/test_confluence_basic.py
+++ b/backend/tests/daily/connectors/confluence/test_confluence_basic.py
@@ -45,7 +45,7 @@ def test_confluence_connector_basic(
    with pytest.raises(StopIteration):
        next(doc_batch_generator)

-    assert len(doc_batch) == 3
+    assert len(doc_batch) == 2

    page_within_a_page_doc: Document | None = None
    page_doc: Document | None = None
--- a/backend/tests/integration/tests/query_history/test_usage_reports.py
+++ b/backend/tests/integration/tests/query_history/test_usage_reports.py
@@ -0,0 +1,46 @@
+from datetime import datetime
+from datetime import timedelta
+from datetime import timezone
+
+from ee.onyx.db.usage_export import get_all_empty_chat_message_entries
+from onyx.db.engine import get_session_with_current_tenant
+from onyx.db.seeding.chat_history_seeding import seed_chat_history
+
+
+def test_usage_reports(reset: None) -> None:
+    EXPECTED_SESSIONS = 2048
+    MESSAGES_PER_SESSION = 4
+    EXPECTED_MESSAGES = EXPECTED_SESSIONS * MESSAGES_PER_SESSION
+
+    seed_chat_history(EXPECTED_SESSIONS, MESSAGES_PER_SESSION, 90)
+
+    with get_session_with_current_tenant() as db_session:
+        # count of all entries should be exact
+        period = (
+            datetime.fromtimestamp(0, tz=timezone.utc),
+            datetime.now(tz=timezone.utc),
+        )
+
+        count = 0
+        for entry_batch in get_all_empty_chat_message_entries(db_session, period):
+            for entry in entry_batch:
+                count += 1
+
+        assert count == EXPECTED_MESSAGES
+
+        # count in a one month time range should be within a certain range statistically
+        # this can be improved if we seed the chat history data deterministically
+        period = (
+            datetime.now(tz=timezone.utc) - timedelta(days=30),
+            datetime.now(tz=timezone.utc),
+        )
+
+        count = 0
+        for entry_batch in get_all_empty_chat_message_entries(db_session, period):
+            for entry in entry_batch:
+                count += 1
+
+        lower = EXPECTED_MESSAGES // 3 - (EXPECTED_MESSAGES // (3 * 3))
+        upper = EXPECTED_MESSAGES // 3 + (EXPECTED_MESSAGES // (3 * 3))
+        assert count > lower
+        assert count < upper
--- a/deployment/README.md
+++ b/deployment/README.md
@@ -80,3 +80,13 @@ prod cluster**
   - `kubectl delete -f .`
   - To not delete the persistent volumes (Document indexes and Users), specify the specific `.yaml` files instead of
     `.` without specifying delete on persistent-volumes.yaml.
+
+### Using Helm to deploy to an existing cluster
+
+Onyx has a helm chart that is convenient to install all services to an existing Kubernetes cluster. To install:
+
+* Currently the helm chart is not published so to install, clone the repo.
+* Configure access to the cluster via kubectl. Ensure the kubectl context is set to the cluster that you want to use
+* The default secrets, environment variables and other service level configuration are stored in `deployment/helm/charts/onyx/values.yml`. You may create another `override.yml`
+* `cd deployment/helm/charts/onyx` and run `helm install onyx -n onyx -f override.yaml .`. This will install onyx on the cluster under the `onyx` namespace.
+* Check the status of the deploy using `kubectl get pods -n onyx`
--- a/deployment/helm/charts/onyx/templates/ingress-api.yaml
+++ b/deployment/helm/charts/onyx/templates/ingress-api.yaml
@@ -0,0 +1,27 @@
+{{- if .Values.ingress.enabled -}}
+apiVersion: networking.k8s.io/v1
+kind: Ingress
+metadata:
+  name: {{ include "onyx-stack.fullname" . }}-ingress-api
+  annotations:
+    kubernetes.io/ingress.class: nginx
+    nginx.ingress.kubernetes.io/rewrite-target: /$2
+    nginx.ingress.kubernetes.io/use-regex: "true"
+    cert-manager.io/cluster-issuer: {{ include "onyx-stack.fullname" . }}-letsencrypt
+spec:
+  rules:
+    - host: {{ .Values.ingress.api.host }}
+      http:
+        paths:
+          - path: /api(/|$)(.*)
+            pathType: Prefix
+            backend:
+              service:
+                name: {{ include "onyx-stack.fullname" . }}-api-service
+                port:
+                  number: {{ .Values.api.service.servicePort }}
+  tls:
+    - hosts:
+        - {{ .Values.ingress.api.host }}
+      secretName: {{ include "onyx-stack.fullname" . }}-ingress-api-tls
+{{- end }}
--- a/deployment/helm/charts/onyx/templates/ingress-webserver.yaml
+++ b/deployment/helm/charts/onyx/templates/ingress-webserver.yaml
@@ -0,0 +1,26 @@
+{{- if .Values.ingress.enabled -}}
+apiVersion: networking.k8s.io/v1
+kind: Ingress
+metadata:
+  name: {{ include "onyx-stack.fullname" . }}-ingress-webserver
+  annotations:
+    kubernetes.io/ingress.class: nginx
+    cert-manager.io/cluster-issuer: {{ include "onyx-stack.fullname" . }}-letsencrypt
+    kubernetes.io/tls-acme: "true"
+spec:
+  rules:
+    - host: {{ .Values.ingress.webserver.host }}
+      http:
+        paths:
+          - path: /
+            pathType: Prefix
+            backend:
+              service:
+                name: {{ include "onyx-stack.fullname" . }}-webserver
+                port:
+                  number: {{ .Values.webserver.service.servicePort }}
+  tls:
+    - hosts:
+        - {{ .Values.ingress.webserver.host }}
+      secretName: {{ include "onyx-stack.fullname" . }}-ingress-webserver-tls
+{{- end }}
--- a/deployment/helm/charts/onyx/templates/lets-encrypt.yaml
+++ b/deployment/helm/charts/onyx/templates/lets-encrypt.yaml
@@ -0,0 +1,20 @@
+{{- if .Values.letsencrypt.enabled -}}
+apiVersion: cert-manager.io/v1
+kind: ClusterIssuer
+metadata:
+  name: {{ include "onyx-stack.fullname" . }}-letsencrypt
+spec:
+  acme:
+    # The ACME server URL
+    server: https://acme-v02.api.letsencrypt.org/directory
+    # Email address used for ACME registration
+    email: {{ .Values.letsencrypt.email }}
+    # Name of a secret used to store the ACME account private key
+    privateKeySecretRef:
+      name: {{ include "onyx-stack.fullname" . }}-letsencrypt
+    # Enable the HTTP-01 challenge provider
+    solvers:
+      - http01:
+          ingress:
+            class: nginx
+{{- end }}
--- a/deployment/helm/charts/onyx/values.yaml
+++ b/deployment/helm/charts/onyx/values.yaml
@@ -376,22 +376,17 @@ redis:
    existingSecret: onyx-secrets
    existingSecretPasswordKey: redis_password

-# ingress:
-#  enabled: false
-#  className: ""
-#  annotations: {}
-#    # kubernetes.io/ingress.class: nginx
-#    # kubernetes.io/tls-acme: "true"
-#  hosts:
-#    - host: chart-example.local
-#      paths:
-#        - path: /
-#          pathType: ImplementationSpecific
-#  tls: []
-#  #  - secretName: chart-example-tls
-#  #    hosts:
-#  #      - chart-example.local
+ingress:
+  enabled: false
+  className: ""
+  api:
+    host: onyx.local
+  webserver:
+    host: onyx.local

+letsencrypt:
+  enabled: false
+  email: "abc@abc.com"

 auth:
  # existingSecret onyx-secret for storing smtp, oauth, slack, and other secrets
--- a/web/src/app/admin/settings/SettingsForm.tsx
+++ b/web/src/app/admin/settings/SettingsForm.tsx
@@ -290,21 +290,24 @@ export function SettingsForm() {
            id="chatRetentionInput"
            placeholder="Infinite Retention"
          />
-          <Button
-            onClick={handleSetChatRetention}
-            variant="submit"
-            size="sm"
-            className="mr-3"
-          >
-            Set Retention Limit
-          </Button>
-          <Button
-            onClick={handleClearChatRetention}
-            variant="default"
-            size="sm"
-          >
-            Retain All
-          </Button>
+          <div className="mr-auto flex gap-2">
+            <Button
+              onClick={handleSetChatRetention}
+              variant="submit"
+              size="sm"
+              className="mr-auto"
+            >
+              Set Retention Limit
+            </Button>
+            <Button
+              onClick={handleClearChatRetention}
+              variant="default"
+              size="sm"
+              className="mr-auto"
+            >
+              Retain All
+            </Button>
+          </div>
        </>
      )}

--- a/web/src/app/auth/login/EmailPasswordForm.tsx
+++ b/web/src/app/auth/login/EmailPasswordForm.tsx
@@ -61,6 +61,7 @@ export function EmailPasswordForm({

            if (!response.ok) {
              setIsWorking(false);
+
              const errorDetail = (await response.json()).detail;
              let errorMsg = "Unknown error";
              if (typeof errorDetail === "object" && errorDetail.reason) {
@@ -96,12 +97,13 @@ export function EmailPasswordForm({
          } else {
            setIsWorking(false);
            const errorDetail = (await loginResponse.json()).detail;
-
            let errorMsg = "Unknown error";
            if (errorDetail === "LOGIN_BAD_CREDENTIALS") {
              errorMsg = "Invalid email or password";
            } else if (errorDetail === "NO_WEB_LOGIN_AND_HAS_NO_PASSWORD") {
              errorMsg = "Create an account to set a password";
+            } else if (typeof errorDetail === "string") {
+              errorMsg = errorDetail;
            }
            if (loginResponse.status === 429) {
              errorMsg = "Too many requests. Please try again later.";
--- a/web/src/app/chat/folders/FolderDropdown.tsx
+++ b/web/src/app/chat/folders/FolderDropdown.tsx
@@ -191,6 +191,7 @@ export const FolderDropdown = forwardRef<HTMLDivElement, FolderDropdownProps>(
                    onChange={(e) => setNewFolderName(e.target.value)}
                    className="text-sm font-medium bg-transparent outline-none w-full pb-1 border-b border-background-500 transition-colors duration-200"
                    onKeyDown={(e) => {
+                      e.stopPropagation();
                      if (e.key === "Enter") {
                        handleEdit();
                      }
--- a/web/src/app/chat/folders/FolderList.tsx
+++ b/web/src/app/chat/folders/FolderList.tsx
@@ -303,7 +303,6 @@ const FolderItem = ({
              key={chatSession.id}
              chatSession={chatSession}
              isSelected={chatSession.id === currentChatId}
-              skipGradient={isDragOver}
              showShareModal={showShareModal}
              showDeleteModal={showDeleteModal}
            />
--- a/web/src/app/chat/sessionSidebar/ChatSessionDisplay.tsx
+++ b/web/src/app/chat/sessionSidebar/ChatSessionDisplay.tsx
@@ -32,21 +32,17 @@ export function ChatSessionDisplay({
  chatSession,
  search,
  isSelected,
-  skipGradient,
  closeSidebar,
  showShareModal,
  showDeleteModal,
-  foldersExisting,
  isDragging,
 }: {
  chatSession: ChatSession;
  isSelected: boolean;
  search?: boolean;
-  skipGradient?: boolean;
  closeSidebar?: () => void;
  showShareModal?: (chatSession: ChatSession) => void;
  showDeleteModal?: (chatSession: ChatSession) => void;
-  foldersExisting?: boolean;
  isDragging?: boolean;
 }) {
  const router = useRouter();
@@ -238,8 +234,12 @@ export function ChatSessionDisplay({
                          e.preventDefault();
                          e.stopPropagation();
                        }}
-                        onChange={(e) => setChatName(e.target.value)}
+                        onChange={(e) => {
+                          setChatName(e.target.value);
+                        }}
                        onKeyDown={(event) => {
+                          event.stopPropagation();
+
                          if (event.key === "Enter") {
                            onRename();
                            event.preventDefault();
--- a/web/src/app/chat/sessionSidebar/PagesTab.tsx
+++ b/web/src/app/chat/sessionSidebar/PagesTab.tsx
@@ -264,7 +264,6 @@ export function PagesTab({
        >
          <ChatSessionDisplay
            chatSession={chat}
-            foldersExisting={foldersExisting}
            isSelected={currentChatId === chat.id}
            showShareModal={showShareModal}
            showDeleteModal={showDeleteModal}
--- a/web/src/components/admin/connectors/ConnectorTitle.tsx
+++ b/web/src/components/admin/connectors/ConnectorTitle.tsx
@@ -40,8 +40,12 @@ export const ConnectorTitle = ({
    const typedConnector = connector as Connector<GithubConfig>;
    additionalMetadata.set(
      "Repo",
-      typedConnector.connector_specific_config.repo_name
-        ? `${typedConnector.connector_specific_config.repo_owner}/${typedConnector.connector_specific_config.repo_name}`
+      typedConnector.connector_specific_config.repositories
+        ? `${typedConnector.connector_specific_config.repo_owner}/${
+            typedConnector.connector_specific_config.repositories.includes(",")
+              ? "multiple repos"
+              : typedConnector.connector_specific_config.repositories
+          }`
        : `${typedConnector.connector_specific_config.repo_owner}/*`
    );
  } else if (connector.source === "gitlab") {
--- a/web/src/lib/connectors/connectors.tsx
+++ b/web/src/lib/connectors/connectors.tsx
@@ -190,10 +190,12 @@ export const connectorConfigs: Record<
            fields: [
              {
                type: "text",
-                query: "Enter the repository name:",
-                label: "Repository Name",
-                name: "repo_name",
+                query: "Enter the repository name(s):",
+                label: "Repository Name(s)",
+                name: "repositories",
                optional: false,
+                description:
+                  "For multiple repositories, enter comma-separated names (e.g., repo1,repo2,repo3)",
              },
            ],
          },
@@ -1358,7 +1360,7 @@ export interface WebConfig {

 export interface GithubConfig {
  repo_owner: string;
-  repo_name: string;
+  repositories: string; // Comma-separated list of repository names
  include_prs: boolean;
  include_issues: boolean;
 }
Author	SHA1	Message	Date
pablonyx	9087320a06	fix	2025-03-06 14:46:20 -08:00
pablonyx	b0af1458c0	ensure checks pass	2025-03-06 14:46:20 -08:00
pablonyx	bb67a7a122	remove unnecessary logs	2025-03-06 14:46:20 -08:00
pablonyx	e239dc31c1	rename	2025-03-06 14:46:19 -08:00
pablonyx	027128502c	add csl	2025-03-06 14:46:19 -08:00
Chris Weaver	a7a374dc81	Confluence fixes (#4220 ) * Confluence fixes * Small tweak * Address greptile comments	2025-03-06 20:57:07 +00:00
rkuo-danswer	facc8cc2fa	add scope needed for permission sync (#4198 ) Co-authored-by: Richard Kuo (Danswer) <rkuo@onyx.app>	2025-03-06 20:03:38 +00:00
rkuo-danswer	2c0af0a0ca	Feature/helm updates (#4201 ) * add ingress for api and web * helm setup docs * add letsencrypt. close blocks * use pathType ImplementationSpecific as Prefix is deprecated * fix backend labels. configure nginx routes. update annotations * fix linting --------- Co-authored-by: Sajjad Anwar <sajjadkm@gmail.com> Co-authored-by: Richard Kuo (Danswer) <rkuo@onyx.app>	2025-03-06 19:48:20 +00:00
pablonyx	bfbc1cd954	k (#4172 )	2025-03-06 18:55:12 +00:00
pablonyx	626da583aa	Fix gated tenants (#4177 ) * fix * mypy .	2025-03-06 18:07:15 +00:00
pablonyx	92faca139d	Fix extra tenant mystery (#4197 ) * fix extra tenant mystery * nit	2025-03-06 18:06:49 +00:00
pablonyx	cec05c5ee9	Revert "k" This reverts commit `687122911d`.	2025-03-06 09:38:31 -08:00
Richard Kuo (Danswer)	eaf054ef06	oauth router went missing?	2025-03-05 15:50:23 -08:00
pablonyx	a7a1a24658	minor nit	2025-03-05 15:35:02 -08:00
pablonyx	687122911d	k	2025-03-05 15:27:14 -08:00
pablonyx	40953bd4fe	Workspace configs (#4202 )	2025-03-05 12:28:44 -08:00
rkuo-danswer	a7acc07e79	fix usage report pagination (#4183 ) * early work in progress * rename utility script * move actual data seeding to a shareable function * add test * make the test pass with the fix * fix comment --------- Co-authored-by: Richard Kuo (Danswer) <rkuo@onyx.app>	2025-03-05 19:13:51 +00:00