feat(metrics): add connector_name label to push-based connector metrics (#10237 )

fix(model-server): add missing onyx/configs to Dockerfile for sentry support (#10236 )
fix(deletion): Commit Session in per-doc cleanup (#10193 )
2026-04-16 06:56:51 +00:00 · 2026-04-15 22:58:49 +00:00 · 2026-04-15 22:42:00 +00:00 · 2026-04-15 22:37:00 +00:00
16 changed files with 59 additions and 74 deletions
--- a/backend/Dockerfile
+++ b/backend/Dockerfile
@@ -12,7 +12,7 @@ founders@onyx.app for more information. Please visit https://github.com/onyx-dot
 ARG ENABLE_CRAFT=false

 # DO_NOT_TRACK is used to disable telemetry for Unstructured
-ENV ONYX_RUNNING_IN_DOCKER="true" \
+ENV DANSWER_RUNNING_IN_DOCKER="true" \
    DO_NOT_TRACK="true" \
    PLAYWRIGHT_BROWSERS_PATH="/app/.cache/ms-playwright"

--- a/backend/Dockerfile.model_server
+++ b/backend/Dockerfile.model_server
@@ -1,7 +1,7 @@
 # Base stage with dependencies
 FROM python:3.11-slim-bookworm@sha256:9c6f90801e6b68e772b7c0ca74260cbf7af9f320acec894e26fccdaccfbe3b47 AS base

-ENV ONYX_RUNNING_IN_DOCKER="true" \
+ENV DANSWER_RUNNING_IN_DOCKER="true" \
    HF_HOME=/app/.cache/huggingface

 COPY --from=ghcr.io/astral-sh/uv:0.9.9 /uv /uvx /bin/
@@ -50,6 +50,10 @@ COPY ./onyx/utils/logger.py /app/onyx/utils/logger.py
 COPY ./onyx/utils/middleware.py /app/onyx/utils/middleware.py
 COPY ./onyx/utils/tenant.py /app/onyx/utils/tenant.py

+# Sentry configuration (used when SENTRY_DSN is set)
+COPY ./onyx/configs/__init__.py /app/onyx/configs/__init__.py
+COPY ./onyx/configs/sentry.py /app/onyx/configs/sentry.py
+
 # Place to fetch version information
 COPY ./onyx/__init__.py /app/onyx/__init__.py

--- a/backend/onyx/background/celery/memory_monitoring.py
+++ b/backend/onyx/background/celery/memory_monitoring.py
@@ -5,8 +5,8 @@ from logging.handlers import RotatingFileHandler

 import psutil

+from onyx.utils.logger import is_running_in_container
 from onyx.utils.logger import setup_logger
-from onyx.utils.platform import is_running_in_container

 # Regular application logger
 logger = setup_logger()
--- a/backend/onyx/background/celery/tasks/docprocessing/tasks.py
+++ b/backend/onyx/background/celery/tasks/docprocessing/tasks.py
@@ -537,10 +537,12 @@ def check_indexing_completion(
            )

        source = cc_pair.connector.source.value
+        connector_name = cc_pair.connector.name or f"cc_pair_{cc_pair.id}"
        on_index_attempt_status_change(
            tenant_id=tenant_id,
            source=source,
            cc_pair_id=cc_pair.id,
+            connector_name=connector_name,
            status=attempt.status.value,
        )

@@ -568,6 +570,7 @@ def check_indexing_completion(
                tenant_id=tenant_id,
                source=source,
                cc_pair_id=cc_pair.id,
+                connector_name=connector_name,
                docs_indexed=attempt.new_docs_indexed or 0,
                success_timestamp=attempt.time_updated.timestamp(),
            )
@@ -595,6 +598,7 @@ def check_indexing_completion(
                    tenant_id=tenant_id,
                    source=source,
                    cc_pair_id=cc_pair.id,
+                    connector_name=connector_name,
                    in_error=False,
                )

@@ -920,10 +924,14 @@ def check_for_indexing(self: Task, *, tenant_id: str) -> int | None:
                        cc_pair_id=cc_pair_id,
                        in_repeated_error_state=True,
                    )
+                    error_connector_name = (
+                        cc_pair.connector.name or f"cc_pair_{cc_pair.id}"
+                    )
                    on_connector_error_state_change(
                        tenant_id=tenant_id,
                        source=cc_pair.connector.source.value,
                        cc_pair_id=cc_pair_id,
+                        connector_name=error_connector_name,
                        in_error=True,
                    )

--- a/backend/onyx/background/celery/tasks/monitoring/tasks.py
+++ b/backend/onyx/background/celery/tasks/monitoring/tasks.py
@@ -42,7 +42,7 @@ from onyx.db.models import UserGroup
 from onyx.db.search_settings import get_active_search_settings_list
 from onyx.redis.redis_pool import get_redis_client
 from onyx.redis.redis_pool import redis_lock_dump
-from onyx.utils.platform import is_running_in_container
+from onyx.utils.logger import is_running_in_container
 from onyx.utils.telemetry import optional_telemetry
 from onyx.utils.telemetry import RecordType
 from shared_configs.configs import MULTI_TENANT
--- a/backend/onyx/background/celery/tasks/shared/tasks.py
+++ b/backend/onyx/background/celery/tasks/shared/tasks.py
@@ -248,6 +248,7 @@ def document_by_cc_pair_cleanup_task(
                        ),
                    )
                    mark_document_as_modified(document_id, db_session)
+                    db_session.commit()
                completion_status = (
                    OnyxCeleryTaskCompletionStatus.NON_RETRYABLE_EXCEPTION
                )
--- a/backend/onyx/background/indexing/run_docfetching.py
+++ b/backend/onyx/background/indexing/run_docfetching.py
@@ -273,6 +273,7 @@ def run_docfetching_entrypoint(
            tenant_id=tenant_id,
            source=attempt.connector_credential_pair.connector.source.value,
            cc_pair_id=connector_credential_pair_id,
+            connector_name=connector_name or f"cc_pair_{connector_credential_pair_id}",
            status="in_progress",
        )

--- a/backend/onyx/server/metrics/connector_health_metrics.py
+++ b/backend/onyx/server/metrics/connector_health_metrics.py
@@ -7,7 +7,12 @@ to avoid disrupting the caller's business logic.
 Gauge metrics (error state, last success timestamp) are per-process.
 With multiple worker pods, use max() aggregation in PromQL to get the
 correct value across instances, e.g.:
-    max by (cc_pair_id) (onyx_connector_in_error_state)
+    max by (cc_pair_id, connector_name) (onyx_connector_in_error_state)
+
+Unlike the per-task counters in indexing_task_metrics.py, these metrics
+include connector_name because their cardinality is bounded by the number
+of connectors (one series per connector), not by the number of task
+executions.
 """

 from prometheus_client import Counter
@@ -17,12 +22,14 @@ from onyx.utils.logger import setup_logger

 logger = setup_logger()

+_CONNECTOR_LABELS = ["tenant_id", "source", "cc_pair_id", "connector_name"]
+
 # --- Index attempt lifecycle ---

 INDEX_ATTEMPT_STATUS = Counter(
    "onyx_index_attempt_transitions_total",
    "Index attempt status transitions",
-    ["tenant_id", "source", "cc_pair_id", "status"],
+    [*_CONNECTOR_LABELS, "status"],
 )

 # --- Connector health ---
@@ -30,25 +37,25 @@ INDEX_ATTEMPT_STATUS = Counter(
 CONNECTOR_IN_ERROR_STATE = Gauge(
    "onyx_connector_in_error_state",
    "Whether the connector is in a repeated error state (1=yes, 0=no)",
-    ["tenant_id", "source", "cc_pair_id"],
+    _CONNECTOR_LABELS,
 )

 CONNECTOR_LAST_SUCCESS_TIMESTAMP = Gauge(
    "onyx_connector_last_success_timestamp_seconds",
    "Unix timestamp of last successful indexing for this connector",
-    ["tenant_id", "source", "cc_pair_id"],
+    _CONNECTOR_LABELS,
 )

 CONNECTOR_DOCS_INDEXED = Counter(
    "onyx_connector_docs_indexed_total",
    "Total documents indexed per connector (monotonic)",
-    ["tenant_id", "source", "cc_pair_id"],
+    _CONNECTOR_LABELS,
 )

 CONNECTOR_INDEXING_ERRORS = Counter(
    "onyx_connector_indexing_errors_total",
    "Total failed index attempts per connector (monotonic)",
-    ["tenant_id", "source", "cc_pair_id"],
+    _CONNECTOR_LABELS,
 )


@@ -56,6 +63,7 @@ def on_index_attempt_status_change(
    tenant_id: str,
    source: str,
    cc_pair_id: int,
+    connector_name: str,
    status: str,
 ) -> None:
    """Called on any index attempt status transition."""
@@ -64,6 +72,7 @@ def on_index_attempt_status_change(
            "tenant_id": tenant_id,
            "source": source,
            "cc_pair_id": str(cc_pair_id),
+            "connector_name": connector_name,
        }
        INDEX_ATTEMPT_STATUS.labels(**labels, status=status).inc()
        if status == "failed":
@@ -76,6 +85,7 @@ def on_connector_error_state_change(
    tenant_id: str,
    source: str,
    cc_pair_id: int,
+    connector_name: str,
    in_error: bool,
 ) -> None:
    """Called when a connector's in_repeated_error_state changes."""
@@ -84,6 +94,7 @@ def on_connector_error_state_change(
            tenant_id=tenant_id,
            source=source,
            cc_pair_id=str(cc_pair_id),
+            connector_name=connector_name,
        ).set(1.0 if in_error else 0.0)
    except Exception:
        logger.debug("Failed to record connector error state metric", exc_info=True)
@@ -93,6 +104,7 @@ def on_connector_indexing_success(
    tenant_id: str,
    source: str,
    cc_pair_id: int,
+    connector_name: str,
    docs_indexed: int,
    success_timestamp: float,
 ) -> None:
@@ -102,6 +114,7 @@ def on_connector_indexing_success(
            "tenant_id": tenant_id,
            "source": source,
            "cc_pair_id": str(cc_pair_id),
+            "connector_name": connector_name,
        }
        CONNECTOR_LAST_SUCCESS_TIMESTAMP.labels(**labels).set(success_timestamp)
        if docs_indexed > 0:
--- a/backend/onyx/server/settings/api.py
+++ b/backend/onyx/server/settings/api.py
@@ -34,7 +34,6 @@ from onyx.server.settings.models import UserSettings
 from onyx.server.settings.store import load_settings
 from onyx.server.settings.store import store_settings
 from onyx.utils.logger import setup_logger
-from onyx.utils.platform import is_running_in_container
 from onyx.utils.variable_functionality import (
    fetch_versioned_implementation_with_fallback,
 )
@@ -112,7 +111,6 @@ def fetch_settings(
            if DISABLE_VECTOR_DB
            else DEFAULT_FILE_TOKEN_COUNT_THRESHOLD_K_VECTOR_DB
        ),
-        is_containerized=is_running_in_container(),
    )


--- a/backend/onyx/server/settings/models.py
+++ b/backend/onyx/server/settings/models.py
@@ -133,7 +133,3 @@ class UserSettings(Settings):
            else DEFAULT_FILE_TOKEN_COUNT_THRESHOLD_K_VECTOR_DB
        )
    )
-    # True when the backend is running inside a container (Docker/Podman).
-    # The frontend uses this to default local-service URLs (e.g. Ollama,
-    # LM Studio) to host.docker.internal instead of localhost.
-    is_containerized: bool = False
--- a/backend/onyx/utils/logger.py
+++ b/backend/onyx/utils/logger.py
@@ -5,7 +5,6 @@ from collections.abc import MutableMapping
 from logging.handlers import RotatingFileHandler
 from typing import Any

-from onyx.utils.platform import is_running_in_container
 from onyx.utils.tenant import get_tenant_id_short_string
 from shared_configs.configs import DEV_LOGGING_ENABLED
 from shared_configs.configs import LOG_FILE_NAME
@@ -170,6 +169,13 @@ def get_standard_formatter() -> ColoredFormatter:
    )


+DANSWER_DOCKER_ENV_STR = "DANSWER_RUNNING_IN_DOCKER"
+
+
+def is_running_in_container() -> bool:
+    return os.getenv(DANSWER_DOCKER_ENV_STR) == "true"
+
+
 def setup_logger(
    name: str = __name__,
    log_level: int = get_log_level_from_str(),
--- a/backend/onyx/utils/platform.py
+++ b/backend/onyx/utils/platform.py
@@ -1,32 +0,0 @@
-import logging
-import os
-
-logger = logging.getLogger(__name__)
-
-_ONYX_DOCKER_ENV_STR = "ONYX_RUNNING_IN_DOCKER"
-_DANSWER_DOCKER_ENV_STR = "DANSWER_RUNNING_IN_DOCKER"
-
-
-def _resolve_container_flag() -> bool:
-    onyx_val = os.getenv(_ONYX_DOCKER_ENV_STR)
-    if onyx_val is not None:
-        return onyx_val.lower() == "true"
-
-    danswer_val = os.getenv(_DANSWER_DOCKER_ENV_STR)
-    if danswer_val is not None:
-        logger.warning(
-            "%s is deprecated and will be ignored in a future release. "
-            "Use %s instead.",
-            _DANSWER_DOCKER_ENV_STR,
-            _ONYX_DOCKER_ENV_STR,
-        )
-        return danswer_val.lower() == "true"
-
-    return False
-
-
-_IS_RUNNING_IN_CONTAINER: bool = _resolve_container_flag()
-
-
-def is_running_in_container() -> bool:
-    return _IS_RUNNING_IN_CONTAINER
--- a/docs/METRICS.md
+++ b/docs/METRICS.md
@@ -217,19 +217,19 @@ Enriches docfetching and docprocessing tasks with connector-level labels. Silent
 | `onyx_indexing_task_completed_total`  | Counter   | `task_name`, `source`, `tenant_id`, `cc_pair_id`, `outcome` | Indexing tasks completed per connector   |
 | `onyx_indexing_task_duration_seconds` | Histogram | `task_name`, `source`, `tenant_id`                          | Indexing task duration by connector type |

-`connector_name` is intentionally excluded from these push-based counters to avoid unbounded cardinality (it's a free-form user string).
+`connector_name` is intentionally excluded from these per-task counters to avoid unbounded cardinality (it's a free-form user string).

 ### Connector Health Metrics (`onyx.server.metrics.connector_health_metrics`)

-Push-based metrics emitted by docfetching and docprocessing workers at the point where connector state changes occur. Scales to any number of tenants (no schema iteration).
+Push-based metrics emitted by docfetching and docprocessing workers at the point where connector state changes occur. Scales to any number of tenants (no schema iteration). Unlike the per-task counters above, these include `connector_name` because their cardinality is bounded by the number of connectors (one series per connector), not by the number of task executions.

-| Metric                                          | Type    | Labels                                        | Description                                                   |
-| ----------------------------------------------- | ------- | --------------------------------------------- | ------------------------------------------------------------- |
-| `onyx_index_attempt_transitions_total`          | Counter | `tenant_id`, `source`, `cc_pair_id`, `status` | Index attempt status transitions (in_progress, success, etc.) |
-| `onyx_connector_in_error_state`                 | Gauge   | `tenant_id`, `source`, `cc_pair_id`           | Whether connector is in repeated error state (1=yes, 0=no)    |
-| `onyx_connector_last_success_timestamp_seconds` | Gauge   | `tenant_id`, `source`, `cc_pair_id`           | Unix timestamp of last successful indexing                    |
-| `onyx_connector_docs_indexed_total`             | Counter | `tenant_id`, `source`, `cc_pair_id`           | Total documents indexed per connector (monotonic)             |
-| `onyx_connector_indexing_errors_total`          | Counter | `tenant_id`, `source`, `cc_pair_id`           | Total failed index attempts per connector (monotonic)         |
+| Metric                                          | Type    | Labels                                                          | Description                                                   |
+| ----------------------------------------------- | ------- | --------------------------------------------------------------- | ------------------------------------------------------------- |
+| `onyx_index_attempt_transitions_total`          | Counter | `tenant_id`, `source`, `cc_pair_id`, `connector_name`, `status` | Index attempt status transitions (in_progress, success, etc.) |
+| `onyx_connector_in_error_state`                 | Gauge   | `tenant_id`, `source`, `cc_pair_id`, `connector_name`           | Whether connector is in repeated error state (1=yes, 0=no)    |
+| `onyx_connector_last_success_timestamp_seconds` | Gauge   | `tenant_id`, `source`, `cc_pair_id`, `connector_name`           | Unix timestamp of last successful indexing                    |
+| `onyx_connector_docs_indexed_total`             | Counter | `tenant_id`, `source`, `cc_pair_id`, `connector_name`           | Total documents indexed per connector (monotonic)             |
+| `onyx_connector_indexing_errors_total`          | Counter | `tenant_id`, `source`, `cc_pair_id`, `connector_name`           | Total failed index attempts per connector (monotonic)         |

 ### Pull-Based Collectors (`onyx.server.metrics.indexing_pipeline`)

--- a/web/src/interfaces/settings.ts
+++ b/web/src/interfaces/settings.ts
@@ -76,10 +76,6 @@ export interface Settings {
  // Factory defaults for the restore button.
  default_user_file_max_upload_size_mb?: number;
  default_file_token_count_threshold_k?: number;
-
-  // True when the backend runs inside a container (Docker/Podman).
-  // Used to default local-service URLs to host.docker.internal.
-  is_containerized?: boolean;
 }

 export enum NotificationType {
--- a/web/src/sections/modals/llmConfig/LMStudioModal.tsx
+++ b/web/src/sections/modals/llmConfig/LMStudioModal.tsx
@@ -27,7 +27,8 @@ import {
 import { fetchModels } from "@/lib/llmConfig/svc";
 import { toast } from "@/hooks/useToast";
 import { refreshLlmProviderCaches } from "@/lib/llmConfig/cache";
-import { useSettingsContext } from "@/providers/SettingsProvider";
+
+const DEFAULT_API_BASE = "http://localhost:1234";

 interface LMStudioModalValues extends BaseLLMModalValues {
  api_base: string;
@@ -115,10 +116,6 @@ export default function LMStudioModal({
 }: LLMProviderFormProps) {
  const isOnboarding = variant === "onboarding";
  const { mutate } = useSWRConfig();
-  const { settings } = useSettingsContext();
-  const defaultApiBase = settings.is_containerized
-    ? "http://host.docker.internal:1234"
-    : "http://localhost:1234";

  const onClose = () => onOpenChange?.(false);

@@ -128,7 +125,7 @@ export default function LMStudioModal({
      LLMProviderName.LM_STUDIO,
      existingLlmProvider
    ),
-    api_base: existingLlmProvider?.api_base ?? defaultApiBase,
+    api_base: existingLlmProvider?.api_base ?? DEFAULT_API_BASE,
    custom_config: {
      LM_STUDIO_API_KEY: existingLlmProvider?.custom_config?.LM_STUDIO_API_KEY,
    },
--- a/web/src/sections/modals/llmConfig/OllamaModal.tsx
+++ b/web/src/sections/modals/llmConfig/OllamaModal.tsx
@@ -31,7 +31,8 @@ import { Card } from "@opal/components";
 import { toast } from "@/hooks/useToast";
 import { refreshLlmProviderCaches } from "@/lib/llmConfig/cache";
 import InputTypeInField from "@/refresh-components/form/InputTypeInField";
-import { useSettingsContext } from "@/providers/SettingsProvider";
+
+const DEFAULT_API_BASE = "http://127.0.0.1:11434";
 const CLOUD_API_BASE = "https://ollama.com";

 enum Tab {
@@ -162,10 +163,6 @@ export default function OllamaModal({
 }: LLMProviderFormProps) {
  const isOnboarding = variant === "onboarding";
  const { mutate } = useSWRConfig();
-  const { settings } = useSettingsContext();
-  const defaultApiBase = settings.is_containerized
-    ? "http://host.docker.internal:11434"
-    : "http://127.0.0.1:11434";
  const apiKey = existingLlmProvider?.custom_config?.OLLAMA_API_KEY;
  const defaultTab =
    existingLlmProvider && !!apiKey ? Tab.TAB_CLOUD : Tab.TAB_SELF_HOSTED;
@@ -179,7 +176,7 @@ export default function OllamaModal({
      LLMProviderName.OLLAMA_CHAT,
      existingLlmProvider
    ),
-    api_base: existingLlmProvider?.api_base ?? defaultApiBase,
+    api_base: existingLlmProvider?.api_base ?? DEFAULT_API_BASE,
    custom_config: {
      OLLAMA_API_KEY: apiKey,
    },
Author	SHA1	Message	Date
Nikolas Garza	546da624a1	feat(metrics): add connector_name label to push-based connector metrics (#10237 )	2026-04-15 22:58:49 +00:00
Nikolas Garza	1a88dea760	fix(model-server): add missing onyx/configs to Dockerfile for sentry support (#10236 )	2026-04-15 22:42:00 +00:00
Justin Tahara	53d2d647c5	fix(deletion): Commit Session in per-doc cleanup (#10193 )	2026-04-15 22:37:00 +00:00