Compare commits

...

3 Commits

Author SHA1 Message Date
Yuhong Sun
2ef72df67c mypy 2026-04-07 09:45:29 -07:00
Yuhong Sun
67456412c8 checkpoint 2026-04-04 17:09:38 -07:00
Yuhong Sun
c83a107390 Checkpoint 2026-04-02 00:51:16 -07:00
6 changed files with 227 additions and 44 deletions

View File

@@ -59,6 +59,9 @@ from onyx.db.permission_sync_attempt import (
from onyx.redis.redis_connector import RedisConnector
from onyx.redis.redis_connector_utils import get_deletion_attempt_snapshot
from onyx.redis.redis_pool import get_redis_client
from onyx.server.documents.mock_connector_data import get_mock_cc_pair_full_info
from onyx.server.documents.mock_connector_data import get_mock_index_attempts
from onyx.server.documents.mock_connector_data import load_mock_data
from onyx.server.documents.models import CCPairFullInfo
from onyx.server.documents.models import CCPropertyUpdateRequest
from onyx.server.documents.models import CCStatusUpdateRequest
@@ -85,6 +88,18 @@ def get_cc_pair_index_attempts(
user: User = Depends(current_curator_or_admin_user),
db_session: Session = Depends(get_session),
) -> PaginatedReturn[IndexAttemptSnapshot]:
mock_data = load_mock_data()
if mock_data is not None:
mock_attempts = get_mock_index_attempts(mock_data, cc_pair_id)
if mock_attempts is not None:
all_items = [IndexAttemptSnapshot(**a) for a in mock_attempts]
start = page_num * page_size
page_items = all_items[start : start + page_size]
return PaginatedReturn(
items=page_items,
total_items=len(all_items),
)
if user:
user_has_access = verify_user_has_access_to_cc_pair(
cc_pair_id, db_session, user, get_editable=False
@@ -157,6 +172,12 @@ def get_cc_pair_full_info(
user: User = Depends(current_curator_or_admin_user),
db_session: Session = Depends(get_session),
) -> CCPairFullInfo:
mock_data = load_mock_data()
if mock_data is not None:
mock_info = get_mock_cc_pair_full_info(mock_data, cc_pair_id)
if mock_info is not None:
return CCPairFullInfo(**mock_info)
tenant_id = get_current_tenant_id()
cc_pair = get_connector_credential_pair_from_id_for_user(

View File

@@ -32,7 +32,6 @@ from onyx.background.celery.tasks.pruning.tasks import (
from onyx.background.celery.versioned_apps.client import app as client_app
from onyx.configs.app_configs import EMAIL_CONFIGURED
from onyx.configs.app_configs import ENABLED_CONNECTOR_TYPES
from onyx.configs.app_configs import MOCK_CONNECTOR_FILE_PATH
from onyx.configs.constants import DocumentSource
from onyx.configs.constants import FileOrigin
from onyx.configs.constants import MilestoneRecordType
@@ -125,6 +124,8 @@ from onyx.file_store.file_store import FileStore
from onyx.file_store.file_store import get_default_file_store
from onyx.key_value_store.interface import KvKeyNotFoundError
from onyx.redis.redis_pool import get_redis_client
from onyx.server.documents.mock_connector_data import get_mock_indexing_statuses
from onyx.server.documents.mock_connector_data import load_mock_data
from onyx.server.documents.models import AuthStatus
from onyx.server.documents.models import AuthUrl
from onyx.server.documents.models import ConnectorBase
@@ -1115,28 +1116,40 @@ def get_connector_indexing_status(
# sqlalchemy-method-connection-for-bind-is-already-in-progress
# for why we can't pass in the current db_session to these functions
if MOCK_CONNECTOR_FILE_PATH:
import json
mock_data = load_mock_data()
if mock_data is not None:
mock_statuses = get_mock_indexing_statuses(mock_data)
if mock_statuses is not None:
# Group statuses by source, mirroring the real code path.
source_to_statuses: dict[
DocumentSource, list[ConnectorIndexingStatusLite]
] = {}
for raw in mock_statuses:
status = ConnectorIndexingStatusLite(**raw)
source_to_statuses.setdefault(status.source, []).append(status)
with open(MOCK_CONNECTOR_FILE_PATH, "r") as f:
raw_data = json.load(f)
connector_indexing_statuses = [
ConnectorIndexingStatusLite(**status) for status in raw_data
]
return [
ConnectorIndexingStatusLiteResponse(
source=DocumentSource.FILE,
summary=SourceSummary(
total_connectors=100,
active_connectors=100,
public_connectors=100,
total_docs_indexed=100000,
),
current_page=1,
total_pages=1,
indexing_statuses=connector_indexing_statuses,
)
]
response_list: list[ConnectorIndexingStatusLiteResponse] = []
for source in sorted(source_to_statuses):
statuses = source_to_statuses[source]
total_docs = sum(s.docs_indexed for s in statuses)
public_count = sum(
1 for s in statuses if s.access_type == AccessType.PUBLIC
)
response_list.append(
ConnectorIndexingStatusLiteResponse(
source=source,
summary=SourceSummary(
total_connectors=len(statuses),
active_connectors=len(statuses),
public_connectors=public_count,
total_docs_indexed=total_docs,
),
current_page=1,
total_pages=1,
indexing_statuses=statuses,
)
)
return response_list
parallel_functions: list[tuple[CallableProtocol, tuple[Any, ...]]] = [
# Get editable connector/credential pairs
@@ -1266,16 +1279,16 @@ def get_connector_indexing_status(
# Process editable cc_pairs
editable_statuses: list[ConnectorIndexingStatusLite] = []
for cc_pair in editable_cc_pairs:
status = build_connector_indexing_status(cc_pair, True)
if status:
editable_statuses.append(status)
editable_status = build_connector_indexing_status(cc_pair, True)
if editable_status:
editable_statuses.append(editable_status)
# Process non-editable cc_pairs
non_editable_statuses: list[ConnectorIndexingStatusLite] = []
for cc_pair in non_editable_cc_pairs:
status = build_connector_indexing_status(cc_pair, False)
if status:
non_editable_statuses.append(status)
non_editable_status = build_connector_indexing_status(cc_pair, False)
if non_editable_status:
non_editable_statuses.append(non_editable_status)
# Process federated connectors
federated_statuses: list[FederatedConnectorStatus] = []
@@ -1328,13 +1341,14 @@ def get_connector_indexing_status(
editable_statuses + non_editable_statuses + federated_statuses
):
if isinstance(connector_status, FederatedConnectorStatus):
source = connector_status.source.to_non_federated_source()
maybe_source = connector_status.source.to_non_federated_source()
else:
source = connector_status.source
maybe_source = connector_status.source
# Skip if source is None (federated connectors without mapping)
if source is None:
if maybe_source is None:
continue
source = maybe_source
if source not in source_to_summary:
source_to_summary[source] = SourceSummary(
@@ -1369,26 +1383,27 @@ def get_connector_indexing_status(
editable_statuses + non_editable_statuses + federated_statuses
):
if isinstance(connector_status, FederatedConnectorStatus):
source = connector_status.source.to_non_federated_source()
maybe_source = connector_status.source.to_non_federated_source()
else:
source = connector_status.source
maybe_source = connector_status.source
# Skip if source is None (federated connectors without mapping)
if source is None:
if maybe_source is None:
continue
source = maybe_source
if source not in source_to_all_statuses:
source_to_all_statuses[source] = []
source_to_all_statuses[source].append(connector_status)
# Create paginated response objects by source
response_list: list[ConnectorIndexingStatusLiteResponse] = []
paginated_responses: list[ConnectorIndexingStatusLiteResponse] = []
source_list = list(source_to_all_statuses.keys())
source_list.sort()
for source in source_list:
statuses = source_to_all_statuses[source]
all_statuses = source_to_all_statuses[source]
# Get current page for this source (default to page 1, 1-indexed)
current_page = request.source_to_page.get(source, 1)
@@ -1397,24 +1412,26 @@ def get_connector_indexing_status(
end_idx = start_idx + _INDEXING_STATUS_PAGE_SIZE
if request.get_all_connectors:
page_statuses = statuses
page_statuses = all_statuses
else:
# Get the page slice for this source
page_statuses = statuses[start_idx:end_idx]
page_statuses = all_statuses[start_idx:end_idx]
# Create response object for this source
if page_statuses: # Only include sources that have data on this page
response_list.append(
paginated_responses.append(
ConnectorIndexingStatusLiteResponse(
source=source,
summary=source_to_summary[source],
current_page=current_page,
total_pages=math.ceil(len(statuses) / _INDEXING_STATUS_PAGE_SIZE),
total_pages=math.ceil(
len(all_statuses) / _INDEXING_STATUS_PAGE_SIZE
),
indexing_statuses=page_statuses,
)
)
return response_list
return paginated_responses
def _get_connector_indexing_status_lite(

View File

@@ -0,0 +1,133 @@
"""Utilities for loading mock connector data from a JSON file.
When MOCK_CONNECTOR_FILE_PATH is set, the backend serves connector listing,
detail, and index-attempt endpoints from a static JSON file instead of hitting
the database. This is useful for frontend development and demos.
Time-offset support
-------------------
Any datetime string field in the JSON can be replaced with an *offset string*
of the form ``"<offset_seconds>"``, e.g. ``"-3600"`` means "1 hour ago" and
``"-86400"`` means "24 hours ago". Positive values point to the future.
The offset is resolved to an absolute ISO-8601 datetime at load time, so
each request gets a fresh "now".
"""
import json
from datetime import datetime
from datetime import timedelta
from datetime import timezone
from typing import Any
from onyx.configs.app_configs import MOCK_CONNECTOR_FILE_PATH
from onyx.utils.logger import setup_logger
logger = setup_logger()
# ---- JSON schema top-level keys ------------------------------------------------
_KEY_INDEXING_STATUSES = "indexing_statuses"
_KEY_CC_PAIR_FULL_INFO = "cc_pair_full_info"
_KEY_INDEX_ATTEMPTS = "index_attempts"
# Fields across the relevant Pydantic models that hold datetimes.
_DATETIME_FIELDS: set[str] = {
# ConnectorIndexingStatusLite
"last_success",
# CCPairFullInfo
"last_indexed",
"last_pruned",
"last_full_permission_sync",
"last_permission_sync_attempt_finished",
# ConnectorSnapshot / CredentialSnapshot
"time_created",
"time_updated",
"indexing_start",
# IndexAttemptSnapshot
"time_started",
"time_updated",
"poll_range_start",
"poll_range_end",
# IndexAttemptErrorPydantic
"failed_time_range_start",
"failed_time_range_end",
"time_created",
}
def _resolve_time_offsets(obj: Any) -> Any:
"""Walk a JSON-like structure and resolve offset strings to ISO datetimes.
An offset string is a string that, after stripping whitespace, is parseable
as an integer or float. It represents seconds relative to *now*.
"""
now = datetime.now(tz=timezone.utc)
if isinstance(obj, dict):
return {k: _resolve_value(k, v, now) for k, v in obj.items()}
if isinstance(obj, list):
return [_resolve_time_offsets(item) for item in obj]
return obj
def _resolve_value(key: str, value: Any, now: datetime) -> Any:
if isinstance(value, dict):
return {k: _resolve_value(k, v, now) for k, v in value.items()}
if isinstance(value, list):
return [_resolve_time_offsets(item) for item in value]
if key in _DATETIME_FIELDS and isinstance(value, str):
try:
offset_seconds = float(value)
return (now + timedelta(seconds=offset_seconds)).isoformat()
except ValueError:
# Not a numeric string leave it as-is (already an ISO datetime).
pass
return value
def _load_raw() -> dict[str, Any] | None:
"""Load and return the raw JSON from MOCK_CONNECTOR_FILE_PATH, or None."""
if not MOCK_CONNECTOR_FILE_PATH:
return None
with open(MOCK_CONNECTOR_FILE_PATH) as f:
return json.load(f)
def load_mock_data() -> dict[str, Any] | None:
"""Load mock data with time offsets resolved. Returns None when mocking is
disabled."""
raw = _load_raw()
if raw is None:
return None
# Support both the old format (bare list of indexing statuses) and the new
# format (dict with explicit keys).
if isinstance(raw, list):
raw = {_KEY_INDEXING_STATUSES: raw}
return _resolve_time_offsets(raw)
def get_mock_indexing_statuses(
data: dict[str, Any],
) -> list[dict[str, Any]] | None:
return data.get(_KEY_INDEXING_STATUSES)
def get_mock_cc_pair_full_info(
data: dict[str, Any],
cc_pair_id: int,
) -> dict[str, Any] | None:
by_id = data.get(_KEY_CC_PAIR_FULL_INFO)
if not by_id:
return None
return by_id.get(str(cc_pair_id))
def get_mock_index_attempts(
data: dict[str, Any],
cc_pair_id: int,
) -> list[dict[str, Any]] | None:
by_id = data.get(_KEY_INDEX_ATTEMPTS)
if not by_id:
return None
return by_id.get(str(cc_pair_id))

View File

@@ -112,6 +112,7 @@ interface UseChatControllerProps {
searchParams: ReadonlyURLSearchParams;
resetInputBar: () => void;
setSelectedAgentFromId: (agentId: number | null) => void;
totalAvailableSources?: number;
}
async function stopChatSession(chatSessionId: string): Promise<void> {
@@ -136,6 +137,7 @@ export default function useChatController({
selectedDocuments,
resetInputBar,
setSelectedAgentFromId,
totalAvailableSources,
}: UseChatControllerProps) {
const pathname = usePathname();
const router = useRouter();
@@ -708,7 +710,8 @@ export default function useChatController({
filterManager.selectedSources,
filterManager.selectedDocumentSets,
filterManager.timeRange,
filterManager.selectedTags
filterManager.selectedTags,
totalAvailableSources
),
modelProvider:
modelOverride?.name || llmManager.currentLlm.name || undefined,

View File

@@ -11,11 +11,19 @@ export const buildFilters = (
sources: SourceMetadata[],
documentSets: string[],
timeRange: DateRangePickerValue | null,
tags: Tag[]
tags: Tag[],
totalAvailableSources?: number
): Filters => {
// If all available sources are selected, treat as no filter (null)
const allSourcesSelected =
totalAvailableSources !== undefined &&
sources.length >= totalAvailableSources;
const filters = {
source_type:
sources.length > 0 ? sources.map((source) => source.internalName) : null,
sources.length > 0 && !allSourcesSelected
? sources.map((source) => source.internalName)
: null,
document_set: documentSets.length > 0 ? documentSets : null,
time_cutoff: timeRange?.from ? timeRange.from : null,
tags: tags,

View File

@@ -383,6 +383,7 @@ export default function AppPage({ firstMessage }: ChatPageProps) {
searchParams,
resetInputBar,
setSelectedAgentFromId,
totalAvailableSources: sources.length,
});
const {