Compare commits

...

39 Commits

Author SHA1 Message Date
Justin Tahara
b61109a747 fix(image): Cap Uploaded File Image Count (#10298) 2026-04-16 21:36:45 -07:00
Jamison Lahman
78459fb3e7 Revert "chore(deps): bump litellm from 1.81.6 to 1.83.0 (#9898) to release v3.0" (#9909) 2026-04-03 18:32:06 -07:00
Jamison Lahman
e243d7955b chore(deps): bump litellm from 1.81.6 to 1.83.0 (#9898) to release v3.0 (#9903)
Signed-off-by: dependabot[bot] <support@github.com>
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
2026-04-03 16:02:05 -07:00
Wenxi
77f5411bf7 fix(ci): tag web-server and model-server with craft-latest (#9661)
Co-authored-by: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
2026-03-26 11:00:49 -07:00
Wenxi
c45caf1f1d refactor: use ods latest-stable-tag to tag images in Docker Hub (#9281)
Co-authored-by: greptile-apps[bot] <165735046+greptile-apps[bot]@users.noreply.github.com>
2026-03-26 11:00:49 -07:00
Wenxi
4f534249d6 refactor: sync craft latest builds with latest stable (#9279) 2026-03-26 11:00:49 -07:00
Wenxi
eb87d88b89 feat(ods): use release-tag to print highest stable semver that should receive the latest tag (#9278)
Co-authored-by: greptile-apps[bot] <165735046+greptile-apps[bot]@users.noreply.github.com>
2026-03-26 11:00:49 -07:00
github-actions[bot]
4fd6786ce2 fix(chat): dont clear input message after errors submitting (#9624) to release v3.0 (#9626)
Co-authored-by: Jamison Lahman <jamison@lahman.dev>
2026-03-25 12:04:20 -07:00
github-actions[bot]
6919afe022 fix(ux): disable MCP Tools toggle if needs authenticated (#9607) to release v3.0 (#9608)
Co-authored-by: Jamison Lahman <jamison@lahman.dev>
2026-03-24 15:54:52 -07:00
Justin Tahara
c4ac0fd286 fix(ui): Text focused paste from PowerPoint (#9603) 2026-03-24 14:31:19 -07:00
github-actions[bot]
d2f8e38e67 chore(playwright): mask date switcher in screenshots (#9584) to release v3.0 (#9585)
Co-authored-by: Jamison Lahman <jamison@lahman.dev>
2026-03-23 18:45:09 -07:00
github-actions[bot]
bbd57c5904 fix(ux): display invalid agent fields on load (#9582) to release v3.0 (#9583)
Co-authored-by: Jamison Lahman <jamison@lahman.dev>
2026-03-23 17:29:22 -07:00
github-actions[bot]
546d5cd384 fix(ux): give a tooltip with reason agent edit cannot save (#9571) to release v3.0 (#9572)
Co-authored-by: Jamison Lahman <jamison@lahman.dev>
2026-03-23 13:54:01 -07:00
Evan Lohn
f902f49483 fix: last index time consistency (#9546) 2026-03-23 10:35:20 -07:00
Justin Tahara
ed3630e248 feat(backend): Adding procps (#9509) 2026-03-19 16:34:58 -07:00
Jamison Lahman
598e605dd2 chore(hotfix): cherry-pick 3 commits to release v3.0 (#9510)
Co-authored-by: greptile-apps[bot] <165735046+greptile-apps[bot]@users.noreply.github.com>
2026-03-19 16:15:10 -07:00
Evan Lohn
aee02f6501 fix: drive rate limit retry (#9498) 2026-03-19 14:40:13 -07:00
Justin Tahara
2959470114 fix(code interpreter): Caching files (#9484) 2026-03-19 14:14:21 -07:00
github-actions[bot]
7d9a339e0b fix(fe): fix memories immediately losing focus on click (#9493) to release v3.0 (#9496)
Co-authored-by: Jamison Lahman <jamison@lahman.dev>
2026-03-19 13:24:23 -07:00
Justin Tahara
a2742fcabf fix(agents): Agents are Private by Default (#9465) 2026-03-18 17:10:49 -07:00
Justin Tahara
ba4b4f0930 fix(logging): extract LiteLLM error details in image summarization failures (#9458) 2026-03-18 16:58:11 -07:00
Justin Tahara
74a4d620ad fix(celery): add dedup guardrails to user file delete queue (#9454)
Co-authored-by: Claude Opus 4.6 <noreply@anthropic.com>
2026-03-18 16:49:37 -07:00
Justin Tahara
51f46bd8f0 fix(celery): add task expiry to upload API send_task call (#9456)
Co-authored-by: Claude Opus 4.6 <noreply@anthropic.com>
2026-03-18 16:25:21 -07:00
Justin Tahara
e6cfe77a6d fix(image): stop dumping base64 image data into error logs (#9457) 2026-03-18 16:04:22 -07:00
github-actions[bot]
cc3719f356 fix(file upload): Allow zip file upload via query param (#9432) to release v3.0 (#9443)
Co-authored-by: Danelegend <43459662+Danelegend@users.noreply.github.com>
2026-03-18 09:25:05 -07:00
github-actions[bot]
b658ad8985 chore: bump next to 16.1.7 (#9423) to release v3.0 (#9448)
Co-authored-by: Nikolas Garza <90273783+nmgarza5@users.noreply.github.com>
2026-03-18 09:24:54 -07:00
Nikolas Garza
b1632044ed fix(vespa): use weightedSet for ACL filters to prevent query failures (#9403) to release v3.0 (#9409) 2026-03-17 11:19:04 -07:00
github-actions[bot]
9fa8265f00 chore(tests): fix flaky test_run_with_timeout_raises_on_timeout (#9377) to release v3.0 (#9379)
Co-authored-by: Jamison Lahman <jamison@lahman.dev>
2026-03-16 12:10:56 -07:00
github-actions[bot]
ce53e123dc fix(fe): bump flatted to patch CVE-2026-32141 (#9350) to release v3.0 (#9353)
Co-authored-by: Nikolas Garza <90273783+nmgarza5@users.noreply.github.com>
2026-03-14 00:11:18 -07:00
github-actions[bot]
5606ae5e81 fix(litellm): filter embedding models (#9347) to release v3.0 (#9348)
Co-authored-by: Danelegend <43459662+Danelegend@users.noreply.github.com>
2026-03-13 23:43:50 -07:00
Evan Lohn
923e0691aa fix: sharepoint pages 400 list expand (#9321) 2026-03-13 11:43:08 -07:00
Evan Lohn
b232e2a771 fix: skip classic site pages (#9318) 2026-03-12 22:08:49 -07:00
Evan Lohn
c3ebfeda2f chore: sharepoint error logs (#9309) 2026-03-12 21:37:50 -07:00
github-actions[bot]
6a28dfedb1 fix(fe): prevent clicking InputSelect from selecting text (#9292) to release v3.0 (#9306)
Co-authored-by: Jamison Lahman <jamison@lahman.dev>
2026-03-12 09:36:25 -07:00
github-actions[bot]
a123ec083d chore(devtools): upgrade ods: 0.6.3->0.7.0 (#9297) to release v3.0 (#9298)
Co-authored-by: Jamison Lahman <jamison@lahman.dev>
2026-03-11 20:37:22 -07:00
Nikolas Garza
f448f1274d fix(slackbot): resolve channel references and filter search by channel tags (#9256) to release v3.0 (#9294) 2026-03-11 20:23:28 -07:00
Jamison Lahman
d12f8b94aa fix(fe): InputComboBox resets filter value on open (#9287) to release v3.0 (#9291) 2026-03-11 18:36:36 -07:00
Bo-Onyx
355fe2ff2c fix(api memory): replace glibc with jemalloc for memory allocating (#9196) to release v3.0 (#9282)
Co-authored-by: Justin Tahara <105671973+justin-tahara@users.noreply.github.com>
2026-03-11 14:58:43 -07:00
Nikolas Garza
8ec5423a0c fix(tests): remove deprecated o1-preview and o1-mini model tests (#9280) 2026-03-11 14:37:03 -07:00
94 changed files with 3541 additions and 756 deletions

View File

@@ -29,20 +29,32 @@ jobs:
build-backend-craft: ${{ steps.check.outputs.build-backend-craft }}
build-model-server: ${{ steps.check.outputs.build-model-server }}
is-cloud-tag: ${{ steps.check.outputs.is-cloud-tag }}
is-stable: ${{ steps.check.outputs.is-stable }}
is-beta: ${{ steps.check.outputs.is-beta }}
is-stable-standalone: ${{ steps.check.outputs.is-stable-standalone }}
is-beta-standalone: ${{ steps.check.outputs.is-beta-standalone }}
is-craft-latest: ${{ steps.check.outputs.is-craft-latest }}
is-latest: ${{ steps.check.outputs.is-latest }}
is-test-run: ${{ steps.check.outputs.is-test-run }}
sanitized-tag: ${{ steps.check.outputs.sanitized-tag }}
short-sha: ${{ steps.check.outputs.short-sha }}
steps:
- name: Checkout (for git tags)
uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # ratchet:actions/checkout@v6
with:
persist-credentials: false
fetch-depth: 0
fetch-tags: true
- name: Setup uv
uses: astral-sh/setup-uv@5a095e7a2014a4212f075830d4f7277575a9d098 # ratchet:astral-sh/setup-uv@v7
with:
version: "0.9.9"
enable-cache: false
- name: Check which components to build and version info
id: check
env:
EVENT_NAME: ${{ github.event_name }}
run: |
set -eo pipefail
TAG="${GITHUB_REF_NAME}"
# Sanitize tag name by replacing slashes with hyphens (for Docker tag compatibility)
SANITIZED_TAG=$(echo "$TAG" | tr '/' '-')
@@ -54,9 +66,8 @@ jobs:
IS_VERSION_TAG=false
IS_STABLE=false
IS_BETA=false
IS_STABLE_STANDALONE=false
IS_BETA_STANDALONE=false
IS_CRAFT_LATEST=false
IS_LATEST=false
IS_PROD_TAG=false
IS_TEST_RUN=false
BUILD_DESKTOP=false
@@ -67,9 +78,6 @@ jobs:
BUILD_MODEL_SERVER=true
# Determine tag type based on pattern matching (do regex checks once)
if [[ "$TAG" == craft-* ]]; then
IS_CRAFT_LATEST=true
fi
if [[ "$TAG" == *cloud* ]]; then
IS_CLOUD=true
fi
@@ -97,20 +105,28 @@ jobs:
fi
fi
# Craft-latest builds backend with Craft enabled
if [[ "$IS_CRAFT_LATEST" == "true" ]]; then
BUILD_BACKEND_CRAFT=true
BUILD_BACKEND=false
fi
# Standalone version checks (for backend/model-server - version excluding cloud tags)
if [[ "$IS_STABLE" == "true" ]] && [[ "$IS_CLOUD" != "true" ]]; then
IS_STABLE_STANDALONE=true
fi
if [[ "$IS_BETA" == "true" ]] && [[ "$IS_CLOUD" != "true" ]]; then
IS_BETA_STANDALONE=true
fi
# Determine if this tag should get the "latest" Docker tag.
# Only the highest semver stable tag (vX.Y.Z exactly) gets "latest".
if [[ "$IS_STABLE" == "true" ]]; then
HIGHEST_STABLE=$(uv run --no-sync --with onyx-devtools ods latest-stable-tag) || {
echo "::error::Failed to determine highest stable tag via 'ods latest-stable-tag'"
exit 1
}
if [[ "$TAG" == "$HIGHEST_STABLE" ]]; then
IS_LATEST=true
fi
fi
# Build craft-latest backend alongside the regular latest.
if [[ "$IS_LATEST" == "true" ]]; then
BUILD_BACKEND_CRAFT=true
fi
# Determine if this is a production tag
# Production tags are: version tags (v1.2.3*) or nightly tags
if [[ "$IS_VERSION_TAG" == "true" ]] || [[ "$IS_NIGHTLY" == "true" ]]; then
@@ -129,11 +145,9 @@ jobs:
echo "build-backend-craft=$BUILD_BACKEND_CRAFT"
echo "build-model-server=$BUILD_MODEL_SERVER"
echo "is-cloud-tag=$IS_CLOUD"
echo "is-stable=$IS_STABLE"
echo "is-beta=$IS_BETA"
echo "is-stable-standalone=$IS_STABLE_STANDALONE"
echo "is-beta-standalone=$IS_BETA_STANDALONE"
echo "is-craft-latest=$IS_CRAFT_LATEST"
echo "is-latest=$IS_LATEST"
echo "is-test-run=$IS_TEST_RUN"
echo "sanitized-tag=$SANITIZED_TAG"
echo "short-sha=$SHORT_SHA"
@@ -600,7 +614,8 @@ jobs:
latest=false
tags: |
type=raw,value=${{ needs.determine-builds.outputs.is-test-run == 'true' && format('web-{0}', needs.determine-builds.outputs.sanitized-tag) || github.ref_name }}
type=raw,value=${{ needs.determine-builds.outputs.is-test-run != 'true' && needs.determine-builds.outputs.is-stable == 'true' && 'latest' || '' }}
type=raw,value=${{ needs.determine-builds.outputs.is-test-run != 'true' && needs.determine-builds.outputs.is-latest == 'true' && 'latest' || '' }}
type=raw,value=${{ needs.determine-builds.outputs.is-test-run != 'true' && needs.determine-builds.outputs.is-latest == 'true' && 'craft-latest' || '' }}
type=raw,value=${{ needs.determine-builds.outputs.is-test-run != 'true' && env.EDGE_TAG == 'true' && 'edge' || '' }}
type=raw,value=${{ needs.determine-builds.outputs.is-test-run != 'true' && needs.determine-builds.outputs.is-beta == 'true' && 'beta' || '' }}
@@ -1037,7 +1052,7 @@ jobs:
latest=false
tags: |
type=raw,value=${{ needs.determine-builds.outputs.is-test-run == 'true' && format('backend-{0}', needs.determine-builds.outputs.sanitized-tag) || github.ref_name }}
type=raw,value=${{ needs.determine-builds.outputs.is-test-run != 'true' && needs.determine-builds.outputs.is-stable-standalone == 'true' && 'latest' || '' }}
type=raw,value=${{ needs.determine-builds.outputs.is-test-run != 'true' && needs.determine-builds.outputs.is-latest == 'true' && 'latest' || '' }}
type=raw,value=${{ needs.determine-builds.outputs.is-test-run != 'true' && env.EDGE_TAG == 'true' && 'edge' || '' }}
type=raw,value=${{ needs.determine-builds.outputs.is-test-run != 'true' && needs.determine-builds.outputs.is-beta-standalone == 'true' && 'beta' || '' }}
@@ -1249,8 +1264,6 @@ jobs:
latest=false
tags: |
type=raw,value=craft-latest
# TODO: Consider aligning craft-latest tags with regular backend builds (e.g., latest, edge, beta)
# to keep tagging strategy consistent across all backend images
- name: Create and push manifest
env:
@@ -1473,7 +1486,8 @@ jobs:
latest=false
tags: |
type=raw,value=${{ needs.determine-builds.outputs.is-test-run == 'true' && format('model-server-{0}', needs.determine-builds.outputs.sanitized-tag) || github.ref_name }}
type=raw,value=${{ needs.determine-builds.outputs.is-test-run != 'true' && needs.determine-builds.outputs.is-stable-standalone == 'true' && 'latest' || '' }}
type=raw,value=${{ needs.determine-builds.outputs.is-test-run != 'true' && needs.determine-builds.outputs.is-latest == 'true' && 'latest' || '' }}
type=raw,value=${{ needs.determine-builds.outputs.is-test-run != 'true' && needs.determine-builds.outputs.is-latest == 'true' && 'craft-latest' || '' }}
type=raw,value=${{ needs.determine-builds.outputs.is-test-run != 'true' && env.EDGE_TAG == 'true' && 'edge' || '' }}
type=raw,value=${{ needs.determine-builds.outputs.is-test-run != 'true' && needs.determine-builds.outputs.is-beta-standalone == 'true' && 'beta' || '' }}

View File

@@ -46,7 +46,11 @@ RUN apt-get update && \
pkg-config \
gcc \
nano \
vim && \
vim \
# Install procps so kubernetes exec sessions can use ps aux for debugging
procps \
libjemalloc2 \
&& \
rm -rf /var/lib/apt/lists/* && \
apt-get clean
@@ -164,6 +168,13 @@ ENV PYTHONPATH=/app
ARG ONYX_VERSION=0.0.0-dev
ENV ONYX_VERSION=${ONYX_VERSION}
# Use jemalloc instead of glibc malloc to reduce memory fragmentation
# in long-running Python processes (API server, Celery workers).
# The soname is architecture-independent; the dynamic linker resolves
# the correct path from standard library directories.
# Placed after all RUN steps so build-time processes are unaffected.
ENV LD_PRELOAD=libjemalloc.so.2
# Default command which does nothing
# This container is used by api server and background which specify their own CMD
CMD ["tail", "-f", "/dev/null"]

View File

@@ -24,6 +24,7 @@ from onyx.configs.app_configs import MANAGED_VESPA
from onyx.configs.app_configs import VESPA_CLOUD_CERT_PATH
from onyx.configs.app_configs import VESPA_CLOUD_KEY_PATH
from onyx.configs.constants import CELERY_GENERIC_BEAT_LOCK_TIMEOUT
from onyx.configs.constants import CELERY_USER_FILE_DELETE_TASK_EXPIRES
from onyx.configs.constants import CELERY_USER_FILE_PROCESSING_LOCK_TIMEOUT
from onyx.configs.constants import CELERY_USER_FILE_PROCESSING_TASK_EXPIRES
from onyx.configs.constants import CELERY_USER_FILE_PROJECT_SYNC_LOCK_TIMEOUT
@@ -33,6 +34,7 @@ from onyx.configs.constants import OnyxCeleryPriority
from onyx.configs.constants import OnyxCeleryQueues
from onyx.configs.constants import OnyxCeleryTask
from onyx.configs.constants import OnyxRedisLocks
from onyx.configs.constants import USER_FILE_DELETE_MAX_QUEUE_DEPTH
from onyx.configs.constants import USER_FILE_PROCESSING_MAX_QUEUE_DEPTH
from onyx.configs.constants import USER_FILE_PROJECT_SYNC_MAX_QUEUE_DEPTH
from onyx.connectors.file.connector import LocalFileConnector
@@ -88,6 +90,17 @@ def _user_file_delete_lock_key(user_file_id: str | UUID) -> str:
return f"{OnyxRedisLocks.USER_FILE_DELETE_LOCK_PREFIX}:{user_file_id}"
def _user_file_delete_queued_key(user_file_id: str | UUID) -> str:
"""Key that exists while a delete_single_user_file task is sitting in the queue.
The beat generator sets this with a TTL equal to CELERY_USER_FILE_DELETE_TASK_EXPIRES
before enqueuing and the worker deletes it as its first action. This prevents
the beat from adding duplicate tasks for files that already have a live task
in flight.
"""
return f"{OnyxRedisLocks.USER_FILE_DELETE_QUEUED_PREFIX}:{user_file_id}"
def get_user_file_project_sync_queue_depth(celery_app: Celery) -> int:
redis_celery: Redis = celery_app.broker_connection().channel().client # type: ignore
return celery_get_queue_length(
@@ -546,7 +559,23 @@ def process_single_user_file(
ignore_result=True,
)
def check_for_user_file_delete(self: Task, *, tenant_id: str) -> None:
"""Scan for user files with DELETING status and enqueue per-file tasks."""
"""Scan for user files with DELETING status and enqueue per-file tasks.
Three mechanisms prevent queue runaway (mirrors check_user_file_processing):
1. **Queue depth backpressure** if the broker queue already has more than
USER_FILE_DELETE_MAX_QUEUE_DEPTH items we skip this beat cycle entirely.
2. **Per-file queued guard** before enqueuing a task we set a short-lived
Redis key (TTL = CELERY_USER_FILE_DELETE_TASK_EXPIRES). If that key
already exists the file already has a live task in the queue, so we skip
it. The worker deletes the key the moment it picks up the task so the
next beat cycle can re-enqueue if the file is still DELETING.
3. **Task expiry** every enqueued task carries an `expires` value equal to
CELERY_USER_FILE_DELETE_TASK_EXPIRES. If a task is still sitting in
the queue after that deadline, Celery discards it without touching the DB.
"""
task_logger.info("check_for_user_file_delete - Starting")
redis_client = get_redis_client(tenant_id=tenant_id)
lock: RedisLock = redis_client.lock(
@@ -555,8 +584,23 @@ def check_for_user_file_delete(self: Task, *, tenant_id: str) -> None:
)
if not lock.acquire(blocking=False):
return None
enqueued = 0
skipped_guard = 0
try:
# --- Protection 1: queue depth backpressure ---
# NOTE: must use the broker's Redis client (not redis_client) because
# Celery queues live on a separate Redis DB with CELERY_SEPARATOR keys.
r_celery: Redis = self.app.broker_connection().channel().client # type: ignore
queue_len = celery_get_queue_length(OnyxCeleryQueues.USER_FILE_DELETE, r_celery)
if queue_len > USER_FILE_DELETE_MAX_QUEUE_DEPTH:
task_logger.warning(
f"check_for_user_file_delete - Queue depth {queue_len} exceeds "
f"{USER_FILE_DELETE_MAX_QUEUE_DEPTH}, skipping enqueue for "
f"tenant={tenant_id}"
)
return None
with get_session_with_current_tenant() as db_session:
user_file_ids = (
db_session.execute(
@@ -568,23 +612,40 @@ def check_for_user_file_delete(self: Task, *, tenant_id: str) -> None:
.all()
)
for user_file_id in user_file_ids:
self.app.send_task(
OnyxCeleryTask.DELETE_SINGLE_USER_FILE,
kwargs={"user_file_id": str(user_file_id), "tenant_id": tenant_id},
queue=OnyxCeleryQueues.USER_FILE_DELETE,
priority=OnyxCeleryPriority.HIGH,
# --- Protection 2: per-file queued guard ---
queued_key = _user_file_delete_queued_key(user_file_id)
guard_set = redis_client.set(
queued_key,
1,
ex=CELERY_USER_FILE_DELETE_TASK_EXPIRES,
nx=True,
)
if not guard_set:
skipped_guard += 1
continue
# --- Protection 3: task expiry ---
try:
self.app.send_task(
OnyxCeleryTask.DELETE_SINGLE_USER_FILE,
kwargs={
"user_file_id": str(user_file_id),
"tenant_id": tenant_id,
},
queue=OnyxCeleryQueues.USER_FILE_DELETE,
priority=OnyxCeleryPriority.HIGH,
expires=CELERY_USER_FILE_DELETE_TASK_EXPIRES,
)
except Exception:
redis_client.delete(queued_key)
raise
enqueued += 1
except Exception as e:
task_logger.exception(
f"check_for_user_file_delete - Error enqueuing deletes - {e.__class__.__name__}"
)
return None
finally:
if lock.owned():
lock.release()
task_logger.info(
f"check_for_user_file_delete - Enqueued {enqueued} tasks for tenant={tenant_id}"
f"check_for_user_file_delete - Enqueued {enqueued} tasks, skipped_guard={skipped_guard} for tenant={tenant_id}"
)
return None
@@ -602,6 +663,9 @@ def delete_user_file_impl(
file_lock: RedisLock | None = None
if redis_locking:
redis_client = get_redis_client(tenant_id=tenant_id)
# Clear the queued guard so the beat can re-enqueue if deletion fails
# and the file remains in DELETING status.
redis_client.delete(_user_file_delete_queued_key(user_file_id))
file_lock = redis_client.lock(
_user_file_delete_lock_key(user_file_id),
timeout=CELERY_GENERIC_BEAT_LOCK_TIMEOUT,

View File

@@ -788,6 +788,29 @@ MAX_FILE_SIZE_BYTES = int(
os.environ.get("MAX_FILE_SIZE_BYTES") or 2 * 1024 * 1024 * 1024
) # 2GB in bytes
# Maximum embedded images allowed in a single file. PDFs (and other formats)
# with thousands of embedded images can OOM the user-file-processing worker
# because every image is decoded with PIL and then sent to the vision LLM.
# Enforced both at upload time (rejects the file) and during extraction
# (defense-in-depth: caps the number of images materialized).
#
# Clamped to >= 0; a negative env value would turn upload validation into
# always-fail and extraction into always-stop, which is never desired. 0
# disables image extraction entirely, which is a valid (if aggressive) setting.
MAX_EMBEDDED_IMAGES_PER_FILE = max(
0, int(os.environ.get("MAX_EMBEDDED_IMAGES_PER_FILE") or 500)
)
# Maximum embedded images allowed across all files in a single upload batch.
# Protects against the scenario where a user uploads many files that each
# fall under MAX_EMBEDDED_IMAGES_PER_FILE but aggregate to enough work
# (serial-ish celery fan-out plus per-image vision-LLM calls) to OOM the
# worker under concurrency or run up surprise latency/cost. Also clamped
# to >= 0.
MAX_EMBEDDED_IMAGES_PER_UPLOAD = max(
0, int(os.environ.get("MAX_EMBEDDED_IMAGES_PER_UPLOAD") or 1000)
)
# Use document summary for contextual rag
USE_DOCUMENT_SUMMARY = os.environ.get("USE_DOCUMENT_SUMMARY", "true").lower() == "true"
# Use chunk summary for contextual rag

View File

@@ -177,6 +177,14 @@ USER_FILE_PROJECT_SYNC_MAX_QUEUE_DEPTH = 500
CELERY_USER_FILE_PROJECT_SYNC_LOCK_TIMEOUT = 5 * 60 # 5 minutes (in seconds)
# How long a queued user-file-delete task is valid before workers discard it.
# Mirrors the processing task expiry to prevent indefinite queue growth when
# files are stuck in DELETING status and the beat keeps re-enqueuing them.
CELERY_USER_FILE_DELETE_TASK_EXPIRES = 60 # 1 minute (in seconds)
# Max queue depth before the delete beat stops enqueuing more delete tasks.
USER_FILE_DELETE_MAX_QUEUE_DEPTH = 500
CELERY_SANDBOX_FILE_SYNC_LOCK_TIMEOUT = 5 * 60 # 5 minutes (in seconds)
DANSWER_REDIS_FUNCTION_LOCK_PREFIX = "da_function_lock:"
@@ -469,6 +477,9 @@ class OnyxRedisLocks:
USER_FILE_PROJECT_SYNC_QUEUED_PREFIX = "da_lock:user_file_project_sync_queued"
USER_FILE_DELETE_BEAT_LOCK = "da_lock:check_user_file_delete_beat"
USER_FILE_DELETE_LOCK_PREFIX = "da_lock:user_file_delete"
# Short-lived key set when a delete task is enqueued; cleared when the worker picks it up.
# Prevents the beat from re-enqueuing the same file while a delete task is already queued.
USER_FILE_DELETE_QUEUED_PREFIX = "da_lock:user_file_delete_queued"
# Release notes
RELEASE_NOTES_FETCH_LOCK = "da_lock:release_notes_fetch"

View File

@@ -157,9 +157,7 @@ def _execute_single_retrieval(
logger.error(f"Error executing request: {e}")
raise e
elif _is_rate_limit_error(e):
results = _execute_with_retry(
lambda: retrieval_function(**request_kwargs).execute()
)
results = _execute_with_retry(retrieval_function(**request_kwargs))
elif e.resp.status == 404 or e.resp.status == 403:
if continue_on_404_or_403:
logger.debug(f"Error executing request: {e}")

View File

@@ -33,6 +33,7 @@ from office365.runtime.queries.client_query import ClientQuery # type: ignore[i
from office365.sharepoint.client_context import ClientContext # type: ignore[import-untyped]
from pydantic import BaseModel
from pydantic import Field
from requests.exceptions import HTTPError
from onyx.configs.app_configs import INDEX_BATCH_SIZE
from onyx.configs.app_configs import REQUEST_TIMEOUT_SECONDS
@@ -268,6 +269,32 @@ class SizeCapExceeded(Exception):
"""Exception raised when the size cap is exceeded."""
def _log_and_raise_for_status(response: requests.Response) -> None:
"""Log the response text and raise for status."""
try:
response.raise_for_status()
except Exception:
logger.error(f"HTTP request failed: {response.text}")
raise
GRAPH_INVALID_REQUEST_CODE = "invalidRequest"
def _is_graph_invalid_request(response: requests.Response) -> bool:
"""Return True if the response body is the generic Graph API
``{"error": {"code": "invalidRequest", "message": "Invalid request"}}``
shape. This particular error has no actionable inner error code and is
returned by the site-pages endpoint when a page has a corrupt canvas layout
(e.g. duplicate web-part IDs — see SharePoint/sp-dev-docs#8822)."""
try:
body = response.json()
except Exception:
return False
error = body.get("error", {})
return error.get("code") == GRAPH_INVALID_REQUEST_CODE
def load_certificate_from_pfx(pfx_data: bytes, password: str) -> CertificateData | None:
"""Load certificate from .pfx file for MSAL authentication"""
try:
@@ -344,7 +371,7 @@ def _probe_remote_size(url: str, timeout: int) -> int | None:
"""Determine remote size using HEAD or a range GET probe. Returns None if unknown."""
try:
head_resp = requests.head(url, timeout=timeout, allow_redirects=True)
head_resp.raise_for_status()
_log_and_raise_for_status(head_resp)
cl = head_resp.headers.get("Content-Length")
if cl and cl.isdigit():
return int(cl)
@@ -359,7 +386,7 @@ def _probe_remote_size(url: str, timeout: int) -> int | None:
timeout=timeout,
stream=True,
) as range_resp:
range_resp.raise_for_status()
_log_and_raise_for_status(range_resp)
cr = range_resp.headers.get("Content-Range") # e.g., "bytes 0-0/12345"
if cr and "/" in cr:
total = cr.split("/")[-1]
@@ -384,7 +411,7 @@ def _download_with_cap(url: str, timeout: int, cap: int) -> bytes:
- Returns the full bytes if the content fits within `cap`.
"""
with requests.get(url, stream=True, timeout=timeout) as resp:
resp.raise_for_status()
_log_and_raise_for_status(resp)
# If the server provides Content-Length, prefer an early decision.
cl_header = resp.headers.get("Content-Length")
@@ -428,7 +455,7 @@ def _download_via_graph_api(
with requests.get(
url, headers=headers, stream=True, timeout=REQUEST_TIMEOUT_SECONDS
) as resp:
resp.raise_for_status()
_log_and_raise_for_status(resp)
buf = io.BytesIO()
for chunk in resp.iter_content(64 * 1024):
if not chunk:
@@ -1238,26 +1265,135 @@ class SharepointConnector(
site.execute_query()
site_id = site.id
page_url: str | None = (
f"{self.graph_api_base}/sites/{site_id}" f"/pages/microsoft.graph.sitePage"
site_pages_base = (
f"{self.graph_api_base}/sites/{site_id}/pages/microsoft.graph.sitePage"
)
page_url: str | None = site_pages_base
params: dict[str, str] | None = {"$expand": "canvasLayout"}
total_yielded = 0
yielded_ids: set[str] = set()
while page_url:
data = self._graph_api_get_json(page_url, params)
try:
data = self._graph_api_get_json(page_url, params)
except HTTPError as e:
if e.response is not None and e.response.status_code == 404:
logger.warning(f"Site page not found: {page_url}")
break
if (
e.response is not None
and e.response.status_code == 400
and _is_graph_invalid_request(e.response)
):
logger.warning(
f"$expand=canvasLayout on the LIST endpoint returned 400 "
f"for site {site_descriptor.url}. Falling back to "
f"per-page expansion."
)
yield from self._fetch_site_pages_individually(
site_pages_base, start, end, skip_ids=yielded_ids
)
return
raise
params = None # nextLink already embeds query params
for page in data.get("value", []):
if not _site_page_in_time_window(page, start, end):
continue
total_yielded += 1
page_id = page.get("id")
if page_id:
yielded_ids.add(page_id)
yield page
page_url = data.get("@odata.nextLink")
logger.debug(f"Yielded {total_yielded} site pages for {site_descriptor.url}")
def _fetch_site_pages_individually(
self,
site_pages_base: str,
start: datetime | None = None,
end: datetime | None = None,
skip_ids: set[str] | None = None,
) -> Generator[dict[str, Any], None, None]:
"""Fallback for _fetch_site_pages: list pages without $expand, then
expand canvasLayout on each page individually.
The Graph API's LIST endpoint can return 400 when $expand=canvasLayout
is used and *any* page in the site has a corrupt canvas layout (e.g.
duplicate web part IDs — see SharePoint/sp-dev-docs#8822). Since the
LIST expansion is all-or-nothing, a single bad page poisons the entire
response. This method works around it by fetching metadata first, then
expanding each page individually so only the broken page loses its
canvas content.
``skip_ids`` contains page IDs already yielded by the caller before the
fallback was triggered, preventing duplicates.
"""
page_url: str | None = site_pages_base
total_yielded = 0
_skip_ids = skip_ids or set()
while page_url:
try:
data = self._graph_api_get_json(page_url)
except HTTPError as e:
if e.response is not None and e.response.status_code == 404:
break
raise
for page in data.get("value", []):
if not _site_page_in_time_window(page, start, end):
continue
page_id = page.get("id")
if page_id and page_id in _skip_ids:
continue
if not page_id:
total_yielded += 1
yield page
continue
expanded = self._try_expand_single_page(site_pages_base, page_id, page)
total_yielded += 1
yield expanded
page_url = data.get("@odata.nextLink")
logger.debug(
f"Yielded {total_yielded} site pages (per-page expansion fallback)"
)
def _try_expand_single_page(
self,
site_pages_base: str,
page_id: str,
fallback_page: dict[str, Any],
) -> dict[str, Any]:
"""Try to GET a single page with $expand=canvasLayout. On 400, return
the metadata-only fallback so the page is still indexed (without canvas
content)."""
pages_collection = site_pages_base.removesuffix("/microsoft.graph.sitePage")
single_url = f"{pages_collection}/{page_id}/microsoft.graph.sitePage"
try:
return self._graph_api_get_json(single_url, {"$expand": "canvasLayout"})
except HTTPError as e:
if (
e.response is not None
and e.response.status_code == 400
and _is_graph_invalid_request(e.response)
):
page_name = fallback_page.get("name", page_id)
logger.warning(
f"$expand=canvasLayout failed for page '{page_name}' "
f"({page_id}). Indexing metadata only."
)
return fallback_page
raise
def _acquire_token(self) -> dict[str, Any]:
"""
Acquire token via MSAL
@@ -1309,7 +1445,7 @@ class SharepointConnector(
access_token = self._get_graph_access_token()
headers = {"Authorization": f"Bearer {access_token}"}
continue
response.raise_for_status()
_log_and_raise_for_status(response)
return response.json()
except (requests.ConnectionError, requests.Timeout):
if attempt < GRAPH_API_MAX_RETRIES:

View File

@@ -583,6 +583,67 @@ def get_latest_index_attempt_for_cc_pair_id(
return db_session.execute(stmt).scalar_one_or_none()
def get_latest_successful_index_attempt_for_cc_pair_id(
db_session: Session,
connector_credential_pair_id: int,
secondary_index: bool = False,
) -> IndexAttempt | None:
"""Returns the most recent successful index attempt for the given cc pair,
filtered to the current (or future) search settings.
Uses MAX(id) semantics to match get_latest_index_attempts_by_status."""
status = IndexModelStatus.FUTURE if secondary_index else IndexModelStatus.PRESENT
stmt = (
select(IndexAttempt)
.where(
IndexAttempt.connector_credential_pair_id == connector_credential_pair_id,
IndexAttempt.status.in_(
[IndexingStatus.SUCCESS, IndexingStatus.COMPLETED_WITH_ERRORS]
),
)
.join(SearchSettings)
.where(SearchSettings.status == status)
.order_by(desc(IndexAttempt.id))
.limit(1)
)
return db_session.execute(stmt).scalar_one_or_none()
def get_latest_successful_index_attempts_parallel(
secondary_index: bool = False,
) -> Sequence[IndexAttempt]:
"""Batch version: returns the latest successful index attempt per cc pair.
Covers both SUCCESS and COMPLETED_WITH_ERRORS (matching is_successful())."""
model_status = (
IndexModelStatus.FUTURE if secondary_index else IndexModelStatus.PRESENT
)
with get_session_with_current_tenant() as db_session:
latest_ids = (
select(
IndexAttempt.connector_credential_pair_id,
func.max(IndexAttempt.id).label("max_id"),
)
.join(SearchSettings, IndexAttempt.search_settings_id == SearchSettings.id)
.where(
SearchSettings.status == model_status,
IndexAttempt.status.in_(
[IndexingStatus.SUCCESS, IndexingStatus.COMPLETED_WITH_ERRORS]
),
)
.group_by(IndexAttempt.connector_credential_pair_id)
.subquery()
)
stmt = select(IndexAttempt).join(
latest_ids,
(
IndexAttempt.connector_credential_pair_id
== latest_ids.c.connector_credential_pair_id
)
& (IndexAttempt.id == latest_ids.c.max_id),
)
return db_session.execute(stmt).scalars().all()
def count_index_attempts_for_cc_pair(
db_session: Session,
cc_pair_id: int,

View File

@@ -12,6 +12,7 @@ from sqlalchemy.orm import Session
from starlette.background import BackgroundTasks
from onyx.configs.app_configs import DISABLE_VECTOR_DB
from onyx.configs.constants import CELERY_USER_FILE_PROCESSING_TASK_EXPIRES
from onyx.configs.constants import FileOrigin
from onyx.configs.constants import OnyxCeleryPriority
from onyx.configs.constants import OnyxCeleryQueues
@@ -144,6 +145,7 @@ def upload_files_to_user_files_with_indexing(
kwargs={"user_file_id": user_file.id, "tenant_id": tenant_id},
queue=OnyxCeleryQueues.USER_FILE_PROCESSING,
priority=OnyxCeleryPriority.HIGH,
expires=CELERY_USER_FILE_PROCESSING_TASK_EXPIRES,
)
logger.info(
f"Triggered indexing for user_file_id={user_file.id} "

View File

@@ -503,20 +503,31 @@ def query_vespa(
response = http_client.post(SEARCH_ENDPOINT, json=params)
response.raise_for_status()
except httpx.HTTPError as e:
error_base = "Failed to query Vespa"
logger.error(
f"{error_base}:\n"
f"Request URL: {e.request.url}\n"
f"Request Headers: {e.request.headers}\n"
f"Request Payload: {params}\n"
f"Exception: {str(e)}"
+ (
f"\nResponse: {e.response.text}"
if isinstance(e, httpx.HTTPStatusError)
else ""
)
response_text = (
e.response.text if isinstance(e, httpx.HTTPStatusError) else None
)
raise httpx.HTTPError(error_base) from e
status_code = (
e.response.status_code if isinstance(e, httpx.HTTPStatusError) else None
)
yql_value = params.get("yql", "")
yql_length = len(str(yql_value))
# Log each detail on its own line so log collectors capture them
# as separate entries rather than truncating a single multiline msg
logger.error(
f"Failed to query Vespa | "
f"status={status_code} | "
f"yql_length={yql_length} | "
f"exception={str(e)}"
)
if response_text:
logger.error(f"Vespa error response: {response_text[:1000]}")
logger.error(f"Vespa request URL: {e.request.url}")
# Re-raise with diagnostics so callers see what actually went wrong
raise httpx.HTTPError(
f"Failed to query Vespa (status={status_code}, " f"yql_length={yql_length})"
) from e
response_json: dict[str, Any] = response.json()

View File

@@ -43,6 +43,22 @@ def build_vespa_filters(
return ""
return f"({' or '.join(eq_elems)})"
def _build_weighted_set_filter(key: str, vals: list[str] | None) -> str:
"""Build a Vespa weightedSet filter for large value lists.
Uses Vespa's native weightedSet() operator instead of OR-chained
'contains' clauses. This is critical for fields like
access_control_list where a single user may have tens of thousands
of ACL entries — OR clauses at that scale cause Vespa to reject
the query with HTTP 400."""
if not key or not vals:
return ""
filtered = [val for val in vals if val]
if not filtered:
return ""
items = ", ".join(f'"{val}":1' for val in filtered)
return f"weightedSet({key}, {{{items}}})"
def _build_int_or_filters(key: str, vals: list[int] | None) -> str:
"""For an integer field filter.
Returns a bare clause or ""."""
@@ -157,11 +173,16 @@ def build_vespa_filters(
if filters.tenant_id and MULTI_TENANT:
filter_parts.append(build_tenant_id_filter(filters.tenant_id))
# ACL filters
# ACL filters — use weightedSet for efficient matching against the
# access_control_list weightedset<string> field. OR-chaining thousands
# of 'contains' clauses causes Vespa to reject the query (HTTP 400)
# for users with large numbers of external permission groups.
if filters.access_control_list is not None:
_append(
filter_parts,
_build_or_filters(ACCESS_CONTROL_LIST, filters.access_control_list),
_build_weighted_set_filter(
ACCESS_CONTROL_LIST, filters.access_control_list
),
)
# Source type filters

View File

@@ -21,6 +21,7 @@ import chardet
import openpyxl
from PIL import Image
from onyx.configs.app_configs import MAX_EMBEDDED_IMAGES_PER_FILE
from onyx.configs.constants import ONYX_METADATA_FILENAME
from onyx.configs.llm_configs import get_image_extraction_and_analysis_enabled
from onyx.file_processing.file_types import OnyxFileExtensions
@@ -176,6 +177,56 @@ def read_text_file(
return file_content_raw, metadata
def count_pdf_embedded_images(file: IO[Any], cap: int) -> int:
"""Return the number of embedded images in a PDF, short-circuiting at cap+1.
Used to reject PDFs whose image count would OOM the user-file-processing
worker during indexing. Returns a value > cap as a sentinel once the count
exceeds the cap, so callers do not iterate thousands of image objects just
to report a number. Returns 0 if the PDF cannot be parsed.
Owner-password-only PDFs (permission restrictions but no open password) are
counted normally — they decrypt with an empty string. Truly password-locked
PDFs are skipped (return 0) since we can't inspect them; the caller should
ensure the password-protected check runs first.
Always restores the file pointer to its original position before returning.
"""
from pypdf import PdfReader
try:
start_pos = file.tell()
except Exception:
start_pos = None
try:
if start_pos is not None:
file.seek(0)
reader = PdfReader(file)
if reader.is_encrypted:
# Try empty password first (owner-password-only PDFs); give up if that fails.
try:
if reader.decrypt("") == 0:
return 0
except Exception:
return 0
count = 0
for page in reader.pages:
for _ in page.images:
count += 1
if count > cap:
return count
return count
except Exception:
logger.warning("Failed to count embedded images in PDF", exc_info=True)
return 0
finally:
if start_pos is not None:
try:
file.seek(start_pos)
except Exception:
pass
def pdf_to_text(file: IO[Any], pdf_pass: str | None = None) -> str:
"""
Extract text from a PDF. For embedded images, a more complex approach is needed.
@@ -231,8 +282,27 @@ def read_pdf_file(
)
if extract_images:
image_cap = MAX_EMBEDDED_IMAGES_PER_FILE
images_processed = 0
cap_reached = False
for page_num, page in enumerate(pdf_reader.pages):
if cap_reached:
break
for image_file_object in page.images:
if images_processed >= image_cap:
# Defense-in-depth backstop. Upload-time validation
# should have rejected files exceeding the cap, but
# we also break here so a single oversized file can
# never pin a worker.
logger.warning(
"PDF embedded image cap reached (%d). "
"Skipping remaining images on page %d and beyond.",
image_cap,
page_num + 1,
)
cap_reached = True
break
image = Image.open(io.BytesIO(image_file_object.data))
img_byte_arr = io.BytesIO()
image.save(img_byte_arr, format=image.format)
@@ -245,6 +315,7 @@ def read_pdf_file(
image_callback(img_bytes, image_name)
else:
extracted_images.append((img_bytes, image_name))
images_processed += 1
return text, metadata, extracted_images

View File

@@ -19,12 +19,16 @@ class OnyxMimeTypes:
PLAIN_TEXT_MIME_TYPE,
"text/markdown",
"text/x-markdown",
"text/x-log",
"text/x-config",
"text/tab-separated-values",
"application/json",
"application/xml",
"text/xml",
"application/x-yaml",
"application/yaml",
"text/yaml",
"text/x-yaml",
}
DOCUMENT_MIME_TYPES = {
PDF_MIME_TYPE,

View File

@@ -88,9 +88,13 @@ def summarize_image_with_error_handling(
try:
return summarize_image_pipeline(llm, image_data, user_prompt, system_prompt)
except UnsupportedImageFormatError:
magic_hex = image_data[:8].hex() if image_data else "empty"
logger.info(
"Skipping image summarization due to unsupported MIME type for %s",
"Skipping image summarization due to unsupported MIME type "
"for %s (magic_bytes=%s, size=%d bytes)",
context_name,
magic_hex,
len(image_data),
)
return None
@@ -134,9 +138,23 @@ def _summarize_image(
return summary
except Exception as e:
error_msg = f"Summarization failed. Messages: {messages}"
error_msg = error_msg[:1024]
raise ValueError(error_msg) from e
# Extract structured details from LiteLLM exceptions when available,
# rather than dumping the full messages payload (which contains base64
# image data and produces enormous, unreadable error logs).
str_e = str(e)
if len(str_e) > 512:
str_e = str_e[:512] + "... (truncated)"
parts = [f"Summarization failed: {type(e).__name__}: {str_e}"]
status_code = getattr(e, "status_code", None)
llm_provider = getattr(e, "llm_provider", None)
model = getattr(e, "model", None)
if status_code is not None:
parts.append(f"status_code={status_code}")
if llm_provider is not None:
parts.append(f"llm_provider={llm_provider}")
if model is not None:
parts.append(f"model={model}")
raise ValueError(" | ".join(parts)) from e
def _encode_image_for_llm_prompt(image_data: bytes) -> str:

View File

@@ -1,5 +1,9 @@
import re
from enum import Enum
# Matches Slack channel references like <#C097NBWMY8Y> or <#C097NBWMY8Y|channel-name>
SLACK_CHANNEL_REF_PATTERN = re.compile(r"<#([A-Z0-9]+)(?:\|([^>]+))?>")
LIKE_BLOCK_ACTION_ID = "feedback-like"
DISLIKE_BLOCK_ACTION_ID = "feedback-dislike"
SHOW_EVERYONE_ACTION_ID = "show-everyone"

View File

@@ -18,15 +18,18 @@ from onyx.configs.onyxbot_configs import ONYX_BOT_DISPLAY_ERROR_MSGS
from onyx.configs.onyxbot_configs import ONYX_BOT_NUM_RETRIES
from onyx.configs.onyxbot_configs import ONYX_BOT_REACT_EMOJI
from onyx.context.search.models import BaseFilters
from onyx.context.search.models import Tag
from onyx.db.engine.sql_engine import get_session_with_current_tenant
from onyx.db.models import SlackChannelConfig
from onyx.db.models import User
from onyx.db.persona import get_persona_by_id
from onyx.db.users import get_user_by_email
from onyx.onyxbot.slack.blocks import build_slack_response_blocks
from onyx.onyxbot.slack.constants import SLACK_CHANNEL_REF_PATTERN
from onyx.onyxbot.slack.handlers.utils import send_team_member_message
from onyx.onyxbot.slack.models import SlackMessageInfo
from onyx.onyxbot.slack.models import ThreadMessage
from onyx.onyxbot.slack.utils import get_channel_from_id
from onyx.onyxbot.slack.utils import get_channel_name_from_id
from onyx.onyxbot.slack.utils import respond_in_thread_or_channel
from onyx.onyxbot.slack.utils import SlackRateLimiter
@@ -41,6 +44,51 @@ srl = SlackRateLimiter()
RT = TypeVar("RT") # return type
def resolve_channel_references(
message: str,
client: WebClient,
logger: OnyxLoggingAdapter,
) -> tuple[str, list[Tag]]:
"""Parse Slack channel references from a message, resolve IDs to names,
replace the raw markup with readable #channel-name, and return channel tags
for search filtering."""
tags: list[Tag] = []
channel_matches = SLACK_CHANNEL_REF_PATTERN.findall(message)
seen_channel_ids: set[str] = set()
for channel_id, channel_name_from_markup in channel_matches:
if channel_id in seen_channel_ids:
continue
seen_channel_ids.add(channel_id)
channel_name = channel_name_from_markup or None
if not channel_name:
try:
channel_info = get_channel_from_id(client=client, channel_id=channel_id)
channel_name = channel_info.get("name") or None
except Exception:
logger.warning(f"Failed to resolve channel name for ID: {channel_id}")
if not channel_name:
continue
# Replace raw Slack markup with readable channel name
if channel_name_from_markup:
message = message.replace(
f"<#{channel_id}|{channel_name_from_markup}>",
f"#{channel_name}",
)
else:
message = message.replace(
f"<#{channel_id}>",
f"#{channel_name}",
)
tags.append(Tag(tag_key="Channel", tag_value=channel_name))
return message, tags
def rate_limits(
client: WebClient, channel: str, thread_ts: Optional[str]
) -> Callable[[Callable[..., RT]], Callable[..., RT]]:
@@ -157,6 +205,20 @@ def handle_regular_answer(
user_message = messages[-1]
history_messages = messages[:-1]
# Resolve any <#CHANNEL_ID> references in the user message to readable
# channel names and extract channel tags for search filtering
resolved_message, channel_tags = resolve_channel_references(
message=user_message.message,
client=client,
logger=logger,
)
user_message = ThreadMessage(
message=resolved_message,
sender=user_message.sender,
role=user_message.role,
)
channel_name, _ = get_channel_name_from_id(
client=client,
channel_id=channel,
@@ -207,6 +269,7 @@ def handle_regular_answer(
source_type=None,
document_set=document_set_names,
time_cutoff=None,
tags=channel_tags if channel_tags else None,
)
new_message_request = SendMessageRequest(
@@ -231,6 +294,16 @@ def handle_regular_answer(
slack_context_str=slack_context_str,
)
# If a channel filter was applied but no results were found, override
# the LLM response to avoid hallucinated answers about unindexed channels
if channel_tags and not answer.citation_info and not answer.top_documents:
channel_names = ", ".join(f"#{tag.tag_value}" for tag in channel_tags)
answer.answer = (
f"No indexed data found for {channel_names}. "
"This channel may not be indexed, or there may be no messages "
"matching your query within it."
)
except Exception as e:
logger.exception(
f"Unable to process message - did not successfully answer "
@@ -285,6 +358,7 @@ def handle_regular_answer(
only_respond_if_citations
and not answer.citation_info
and not message_info.bypass_filters
and not channel_tags
):
logger.error(
f"Unable to find citations to answer: '{answer.answer}' - not answering!"

View File

@@ -43,6 +43,9 @@ from onyx.db.index_attempt import count_index_attempt_errors_for_cc_pair
from onyx.db.index_attempt import count_index_attempts_for_cc_pair
from onyx.db.index_attempt import get_index_attempt_errors_for_cc_pair
from onyx.db.index_attempt import get_latest_index_attempt_for_cc_pair_id
from onyx.db.index_attempt import (
get_latest_successful_index_attempt_for_cc_pair_id,
)
from onyx.db.index_attempt import get_paginated_index_attempts_for_cc_pair_id
from onyx.db.indexing_coordination import IndexingCoordination
from onyx.db.models import IndexAttempt
@@ -190,6 +193,11 @@ def get_cc_pair_full_info(
only_finished=False,
)
latest_successful_attempt = get_latest_successful_index_attempt_for_cc_pair_id(
db_session=db_session,
connector_credential_pair_id=cc_pair_id,
)
# Get latest permission sync attempt for status
latest_permission_sync_attempt = None
if cc_pair.access_type == AccessType.SYNC:
@@ -207,6 +215,11 @@ def get_cc_pair_full_info(
cc_pair_id=cc_pair_id,
),
last_index_attempt=latest_attempt,
last_successful_index_time=(
latest_successful_attempt.time_started
if latest_successful_attempt
else None
),
latest_deletion_attempt=get_deletion_attempt_snapshot(
connector_id=cc_pair.connector_id,
credential_id=cc_pair.credential_id,

View File

@@ -3,6 +3,7 @@ import math
import mimetypes
import os
import zipfile
from datetime import datetime
from io import BytesIO
from typing import Any
from typing import cast
@@ -109,6 +110,9 @@ from onyx.db.federated import fetch_all_federated_connectors_parallel
from onyx.db.index_attempt import get_index_attempts_for_cc_pair
from onyx.db.index_attempt import get_latest_index_attempts_by_status
from onyx.db.index_attempt import get_latest_index_attempts_parallel
from onyx.db.index_attempt import (
get_latest_successful_index_attempts_parallel,
)
from onyx.db.models import ConnectorCredentialPair
from onyx.db.models import FederatedConnector
from onyx.db.models import IndexAttempt
@@ -479,7 +483,9 @@ def is_zip_file(file: UploadFile) -> bool:
def upload_files(
files: list[UploadFile], file_origin: FileOrigin = FileOrigin.CONNECTOR
files: list[UploadFile],
file_origin: FileOrigin = FileOrigin.CONNECTOR,
unzip: bool = True,
) -> FileUploadResponse:
# Skip directories and known macOS metadata entries
@@ -502,31 +508,46 @@ def upload_files(
if seen_zip:
raise HTTPException(status_code=400, detail=SEEN_ZIP_DETAIL)
seen_zip = True
# Validate the zip by opening it (catches corrupt/non-zip files)
with zipfile.ZipFile(file.file, "r") as zf:
zip_metadata_file_id = save_zip_metadata_to_file_store(
zf, file_store
)
for file_info in zf.namelist():
if zf.getinfo(file_info).is_dir():
continue
if not should_process_file(file_info):
continue
sub_file_bytes = zf.read(file_info)
mime_type, __ = mimetypes.guess_type(file_info)
if mime_type is None:
mime_type = "application/octet-stream"
file_id = file_store.save_file(
content=BytesIO(sub_file_bytes),
display_name=os.path.basename(file_info),
file_origin=file_origin,
file_type=mime_type,
if unzip:
zip_metadata_file_id = save_zip_metadata_to_file_store(
zf, file_store
)
deduped_file_paths.append(file_id)
deduped_file_names.append(os.path.basename(file_info))
for file_info in zf.namelist():
if zf.getinfo(file_info).is_dir():
continue
if not should_process_file(file_info):
continue
sub_file_bytes = zf.read(file_info)
mime_type, __ = mimetypes.guess_type(file_info)
if mime_type is None:
mime_type = "application/octet-stream"
file_id = file_store.save_file(
content=BytesIO(sub_file_bytes),
display_name=os.path.basename(file_info),
file_origin=file_origin,
file_type=mime_type,
)
deduped_file_paths.append(file_id)
deduped_file_names.append(os.path.basename(file_info))
continue
# Store the zip as-is (unzip=False)
file.file.seek(0)
file_id = file_store.save_file(
content=file.file,
display_name=file.filename,
file_origin=file_origin,
file_type=file.content_type or "application/zip",
)
deduped_file_paths.append(file_id)
deduped_file_names.append(file.filename)
continue
# Since we can't render docx files in the UI,
@@ -613,9 +634,10 @@ def _fetch_and_check_file_connector_cc_pair_permissions(
@router.post("/admin/connector/file/upload", tags=PUBLIC_API_TAGS)
def upload_files_api(
files: list[UploadFile],
unzip: bool = True,
_: User = Depends(current_curator_or_admin_user),
) -> FileUploadResponse:
return upload_files(files, FileOrigin.OTHER)
return upload_files(files, FileOrigin.OTHER, unzip=unzip)
@router.get("/admin/connector/{connector_id}/files", tags=PUBLIC_API_TAGS)
@@ -1140,21 +1162,26 @@ def get_connector_indexing_status(
),
(),
),
# Get most recent successful index attempts
(
lambda: get_latest_successful_index_attempts_parallel(
request.secondary_index,
),
(),
),
]
if user and user.role == UserRole.ADMIN:
# For Admin users, we already got all the cc pair in editable_cc_pairs
# its not needed to get them again
(
editable_cc_pairs,
federated_connectors,
latest_index_attempts,
latest_finished_index_attempts,
latest_successful_index_attempts,
) = run_functions_tuples_in_parallel(parallel_functions)
non_editable_cc_pairs = []
else:
parallel_functions.append(
# Get non-editable connector/credential pairs
(
lambda: get_connector_credential_pairs_for_user_parallel(
user, False, None, True, True, False, True, request.source
@@ -1168,6 +1195,7 @@ def get_connector_indexing_status(
federated_connectors,
latest_index_attempts,
latest_finished_index_attempts,
latest_successful_index_attempts,
non_editable_cc_pairs,
) = run_functions_tuples_in_parallel(parallel_functions)
@@ -1179,6 +1207,9 @@ def get_connector_indexing_status(
latest_finished_index_attempts = cast(
list[IndexAttempt], latest_finished_index_attempts
)
latest_successful_index_attempts = cast(
list[IndexAttempt], latest_successful_index_attempts
)
document_count_info = get_document_counts_for_all_cc_pairs(db_session)
@@ -1188,42 +1219,48 @@ def get_connector_indexing_status(
for connector_id, credential_id, cnt in document_count_info
}
cc_pair_to_latest_index_attempt: dict[tuple[int, int], IndexAttempt] = {
(
attempt.connector_credential_pair.connector_id,
attempt.connector_credential_pair.credential_id,
): attempt
for attempt in latest_index_attempts
}
def _attempt_lookup(
attempts: list[IndexAttempt],
) -> dict[int, IndexAttempt]:
return {attempt.connector_credential_pair_id: attempt for attempt in attempts}
cc_pair_to_latest_finished_index_attempt: dict[tuple[int, int], IndexAttempt] = {
(
attempt.connector_credential_pair.connector_id,
attempt.connector_credential_pair.credential_id,
): attempt
for attempt in latest_finished_index_attempts
}
cc_pair_to_latest_index_attempt = _attempt_lookup(latest_index_attempts)
cc_pair_to_latest_finished_index_attempt = _attempt_lookup(
latest_finished_index_attempts
)
cc_pair_to_latest_successful_index_attempt = _attempt_lookup(
latest_successful_index_attempts
)
def build_connector_indexing_status(
cc_pair: ConnectorCredentialPair,
is_editable: bool,
) -> ConnectorIndexingStatusLite | None:
# TODO remove this to enable ingestion API
if cc_pair.name == "DefaultCCPair":
return None
latest_attempt = cc_pair_to_latest_index_attempt.get(
(cc_pair.connector_id, cc_pair.credential_id)
)
latest_attempt = cc_pair_to_latest_index_attempt.get(cc_pair.id)
latest_finished_attempt = cc_pair_to_latest_finished_index_attempt.get(
(cc_pair.connector_id, cc_pair.credential_id)
cc_pair.id
)
latest_successful_attempt = cc_pair_to_latest_successful_index_attempt.get(
cc_pair.id
)
doc_count = cc_pair_to_document_cnt.get(
(cc_pair.connector_id, cc_pair.credential_id), 0
)
return _get_connector_indexing_status_lite(
cc_pair, latest_attempt, latest_finished_attempt, is_editable, doc_count
cc_pair,
latest_attempt,
latest_finished_attempt,
(
latest_successful_attempt.time_started
if latest_successful_attempt
else None
),
is_editable,
doc_count,
)
# Process editable cc_pairs
@@ -1384,6 +1421,7 @@ def _get_connector_indexing_status_lite(
cc_pair: ConnectorCredentialPair,
latest_index_attempt: IndexAttempt | None,
latest_finished_index_attempt: IndexAttempt | None,
last_successful_index_time: datetime | None,
is_editable: bool,
document_cnt: int,
) -> ConnectorIndexingStatusLite | None:
@@ -1417,7 +1455,7 @@ def _get_connector_indexing_status_lite(
else None
),
last_status=latest_index_attempt.status if latest_index_attempt else None,
last_success=cc_pair.last_successful_index_time,
last_success=last_successful_index_time,
docs_indexed=document_cnt,
latest_index_attempt_docs_indexed=(
latest_index_attempt.total_docs_indexed if latest_index_attempt else None

View File

@@ -330,6 +330,7 @@ class CCPairFullInfo(BaseModel):
num_docs_indexed: int, # not ideal, but this must be computed separately
is_editable_for_current_user: bool,
indexing: bool,
last_successful_index_time: datetime | None = None,
last_permission_sync_attempt_status: PermissionSyncStatus | None = None,
permission_syncing: bool = False,
last_permission_sync_attempt_finished: datetime | None = None,
@@ -382,9 +383,7 @@ class CCPairFullInfo(BaseModel):
creator_email=(
cc_pair_model.creator.email if cc_pair_model.creator else None
),
last_indexed=(
last_index_attempt.time_started if last_index_attempt else None
),
last_indexed=last_successful_index_time,
last_pruned=cc_pair_model.last_pruned,
last_full_permission_sync=cls._get_last_full_permission_sync(cc_pair_model),
overall_indexing_speed=overall_indexing_speed,

View File

@@ -40,6 +40,8 @@ from sqlalchemy.orm import Session
from onyx.auth.users import current_user
from onyx.background.celery.versioned_apps.client import app as celery_app
from onyx.configs.app_configs import MAX_EMBEDDED_IMAGES_PER_FILE
from onyx.configs.app_configs import MAX_EMBEDDED_IMAGES_PER_UPLOAD
from onyx.configs.constants import DocumentSource
from onyx.configs.constants import OnyxCeleryQueues
from onyx.configs.constants import OnyxCeleryTask
@@ -50,6 +52,9 @@ from onyx.db.engine.sql_engine import get_session
from onyx.db.enums import ConnectorCredentialPairStatus
from onyx.db.models import User
from onyx.document_index.interfaces import DocumentMetadata
from onyx.error_handling.error_codes import OnyxErrorCode
from onyx.error_handling.exceptions import OnyxError
from onyx.file_processing.extract_file_text import count_pdf_embedded_images
from onyx.server.features.build.configs import USER_LIBRARY_MAX_FILE_SIZE_BYTES
from onyx.server.features.build.configs import USER_LIBRARY_MAX_FILES_PER_UPLOAD
from onyx.server.features.build.configs import USER_LIBRARY_MAX_TOTAL_SIZE_BYTES
@@ -127,6 +132,49 @@ class DeleteFileResponse(BaseModel):
# =============================================================================
def _looks_like_pdf(filename: str, content_type: str | None) -> bool:
"""True if either the filename or the content-type indicates a PDF.
Client-supplied ``content_type`` can be spoofed (e.g. a PDF uploaded with
``Content-Type: application/octet-stream``), so we also fall back to
extension-based detection via ``mimetypes.guess_type`` on the filename.
"""
if content_type == "application/pdf":
return True
guessed, _ = mimetypes.guess_type(filename)
return guessed == "application/pdf"
def _check_pdf_image_caps(
filename: str, content: bytes, content_type: str | None, batch_total: int
) -> int:
"""Enforce per-file and per-batch embedded-image caps for PDFs.
Returns the number of embedded images in this file (0 for non-PDFs) so
callers can update their running batch total. Raises OnyxError(INVALID_INPUT)
if either cap is exceeded.
"""
if not _looks_like_pdf(filename, content_type):
return 0
file_cap = MAX_EMBEDDED_IMAGES_PER_FILE
batch_cap = MAX_EMBEDDED_IMAGES_PER_UPLOAD
# Short-circuit at the larger cap so we get a useful count for both checks.
count = count_pdf_embedded_images(BytesIO(content), max(file_cap, batch_cap))
if count > file_cap:
raise OnyxError(
OnyxErrorCode.INVALID_INPUT,
f"PDF '{filename}' contains too many embedded images "
f"(more than {file_cap}). Try splitting the document into smaller files.",
)
if batch_total + count > batch_cap:
raise OnyxError(
OnyxErrorCode.INVALID_INPUT,
f"Upload would exceed the {batch_cap}-image limit across all "
f"files in this batch. Try uploading fewer image-heavy files at once.",
)
return count
def _sanitize_path(path: str) -> str:
"""Sanitize a file path, removing traversal attempts and normalizing.
@@ -356,6 +404,7 @@ async def upload_files(
uploaded_entries: list[LibraryEntryResponse] = []
total_size = 0
batch_image_total = 0
now = datetime.now(timezone.utc)
# Sanitize the base path
@@ -375,6 +424,14 @@ async def upload_files(
detail=f"File '{file.filename}' exceeds maximum size of {USER_LIBRARY_MAX_FILE_SIZE_BYTES // (1024*1024)}MB",
)
# Reject PDFs with an unreasonable per-file or per-batch image count
batch_image_total += _check_pdf_image_caps(
filename=file.filename or "unnamed",
content=content,
content_type=file.content_type,
batch_total=batch_image_total,
)
# Validate cumulative storage (existing + this upload batch)
total_size += file_size
if existing_usage + total_size > USER_LIBRARY_MAX_TOTAL_SIZE_BYTES:
@@ -473,6 +530,7 @@ async def upload_zip(
uploaded_entries: list[LibraryEntryResponse] = []
total_size = 0
batch_image_total = 0
# Extract zip contents into a subfolder named after the zip file
zip_name = api_sanitize_filename(file.filename or "upload")
@@ -511,6 +569,36 @@ async def upload_zip(
logger.warning(f"Skipping '{zip_info.filename}' - exceeds max size")
continue
# Skip PDFs that would trip the per-file or per-batch image
# cap (would OOM the user-file-processing worker). Matches
# /upload behavior but uses skip-and-warn to stay consistent
# with the zip path's handling of oversized files.
zip_file_name = zip_info.filename.split("/")[-1]
zip_content_type, _ = mimetypes.guess_type(zip_file_name)
if zip_content_type == "application/pdf":
image_count = count_pdf_embedded_images(
BytesIO(file_content),
max(
MAX_EMBEDDED_IMAGES_PER_FILE,
MAX_EMBEDDED_IMAGES_PER_UPLOAD,
),
)
if image_count > MAX_EMBEDDED_IMAGES_PER_FILE:
logger.warning(
"Skipping '%s' - exceeds %d per-file embedded-image cap",
zip_info.filename,
MAX_EMBEDDED_IMAGES_PER_FILE,
)
continue
if batch_image_total + image_count > MAX_EMBEDDED_IMAGES_PER_UPLOAD:
logger.warning(
"Skipping '%s' - would exceed %d per-batch embedded-image cap",
zip_info.filename,
MAX_EMBEDDED_IMAGES_PER_UPLOAD,
)
continue
batch_image_total += image_count
total_size += file_size
# Validate cumulative storage

View File

@@ -10,7 +10,10 @@ from pydantic import Field
from sqlalchemy.orm import Session
from onyx.configs.app_configs import FILE_TOKEN_COUNT_THRESHOLD
from onyx.configs.app_configs import MAX_EMBEDDED_IMAGES_PER_FILE
from onyx.configs.app_configs import MAX_EMBEDDED_IMAGES_PER_UPLOAD
from onyx.db.llm import fetch_default_llm_model
from onyx.file_processing.extract_file_text import count_pdf_embedded_images
from onyx.file_processing.extract_file_text import extract_file_text
from onyx.file_processing.extract_file_text import get_file_ext
from onyx.file_processing.file_types import OnyxFileExtensions
@@ -156,6 +159,11 @@ def categorize_uploaded_files(
except RuntimeError as e:
logger.warning(f"Failed to get current tenant ID: {str(e)}")
# Running total of embedded images across PDFs in this batch. Once the
# aggregate cap is reached, subsequent PDFs in the same upload are
# rejected even if they'd individually fit under MAX_EMBEDDED_IMAGES_PER_FILE.
batch_image_total = 0
for upload in files:
try:
filename = get_safe_filename(upload)
@@ -204,6 +212,47 @@ def categorize_uploaded_files(
)
continue
# Reject PDFs with an unreasonable number of embedded images
# (either per-file or accumulated across this upload batch).
# A PDF with thousands of embedded images can OOM the
# user-file-processing celery worker because every image is
# decoded with PIL and then sent to the vision LLM.
if extension == ".pdf":
file_cap = MAX_EMBEDDED_IMAGES_PER_FILE
batch_cap = MAX_EMBEDDED_IMAGES_PER_UPLOAD
# Use the larger of the two caps as the short-circuit
# threshold so we get a useful count for both checks.
# count_pdf_embedded_images restores the stream position.
count = count_pdf_embedded_images(
upload.file, max(file_cap, batch_cap)
)
if count > file_cap:
results.rejected.append(
RejectedFile(
filename=filename,
reason=(
f"PDF contains too many embedded images "
f"(more than {file_cap}). Try splitting "
f"the document into smaller files."
),
)
)
continue
if batch_image_total + count > batch_cap:
results.rejected.append(
RejectedFile(
filename=filename,
reason=(
f"Upload would exceed the "
f"{batch_cap}-image limit across all "
f"files in this batch. Try uploading "
f"fewer image-heavy files at once."
),
)
)
continue
batch_image_total += count
text_content = extract_file_text(
file=upload.file,
file_name=filename,

View File

@@ -81,6 +81,7 @@ from onyx.server.manage.llm.models import VisionProviderResponse
from onyx.server.manage.llm.utils import generate_bedrock_display_name
from onyx.server.manage.llm.utils import generate_ollama_display_name
from onyx.server.manage.llm.utils import infer_vision_support
from onyx.server.manage.llm.utils import is_embedding_model
from onyx.server.manage.llm.utils import is_reasoning_model
from onyx.server.manage.llm.utils import is_valid_bedrock_model
from onyx.server.manage.llm.utils import ModelMetadata
@@ -1376,6 +1377,10 @@ def get_litellm_available_models(
try:
model_details = LitellmModelDetails.model_validate(model)
# Skip embedding models
if is_embedding_model(model_details.id):
continue
results.append(
LitellmFinalModelResponse(
provider_name=model_details.owned_by,

View File

@@ -366,3 +366,18 @@ def extract_vendor_from_model_name(model_name: str, provider: str) -> str | None
return None
return None
def is_embedding_model(model_name: str) -> bool:
"""Checks for if a model is an embedding model"""
from litellm import get_model_info
try:
# get_model_info raises on unknown models
# default to False
model_info = get_model_info(model_name)
except Exception:
return False
is_embedding_mode = model_info.get("mode") == "embedding"
return is_embedding_mode

View File

@@ -1,3 +1,4 @@
import hashlib
import mimetypes
from io import BytesIO
from typing import Any
@@ -85,6 +86,14 @@ class PythonTool(Tool[PythonToolOverrideKwargs]):
def __init__(self, tool_id: int, emitter: Emitter) -> None:
super().__init__(emitter=emitter)
self._id = tool_id
# Cache of (filename, content_hash) -> ci_file_id to avoid re-uploading
# the same file on every tool call iteration within the same agent session.
# Filename is included in the key so two files with identical bytes but
# different names each get their own upload slot.
# TTL assumption: code-interpreter file TTLs (typically hours) greatly
# exceed the lifetime of a single agent session (at most MAX_LLM_CYCLES
# iterations, typically a few minutes), so stale-ID eviction is not needed.
self._uploaded_file_cache: dict[tuple[str, str], str] = {}
@property
def id(self) -> int:
@@ -184,8 +193,13 @@ class PythonTool(Tool[PythonToolOverrideKwargs]):
for ind, chat_file in enumerate(chat_files):
file_name = chat_file.filename or f"file_{ind}"
try:
# Upload to Code Interpreter
ci_file_id = client.upload_file(chat_file.content, file_name)
content_hash = hashlib.sha256(chat_file.content).hexdigest()
cache_key = (file_name, content_hash)
ci_file_id = self._uploaded_file_cache.get(cache_key)
if ci_file_id is None:
# Upload to Code Interpreter
ci_file_id = client.upload_file(chat_file.content, file_name)
self._uploaded_file_cache[cache_key] = ci_file_id
# Stage for execution
files_to_stage.append({"path": file_name, "file_id": ci_file_id})
@@ -303,15 +317,10 @@ class PythonTool(Tool[PythonToolOverrideKwargs]):
f"file {ci_file_id}: {e}"
)
# Cleanup staged input files
for file_mapping in files_to_stage:
try:
client.delete_file(file_mapping["file_id"])
except Exception as e:
logger.error(
f"Failed to delete Code Interpreter staged "
f"file {file_mapping['file_id']}: {e}"
)
# Note: staged input files are intentionally not deleted here because
# _uploaded_file_cache reuses their file_ids across iterations. They are
# orphaned when the session ends, but the code interpreter cleans up
# stale files on its own TTL.
# Emit file_ids once files are processed
if generated_file_ids:

View File

@@ -263,7 +263,7 @@ oauthlib==3.2.2
# via
# kubernetes
# requests-oauthlib
onyx-devtools==0.6.3
onyx-devtools==0.7.0
# via onyx
openai==2.14.0
# via

View File

@@ -45,6 +45,21 @@ npx playwright test <TEST_NAME>
Shared fixtures live in `backend/tests/conftest.py`. Test subdirectories can define
their own `conftest.py` for directory-scoped fixtures.
## Running Tests Repeatedly (`pytest-repeat`)
Use `pytest-repeat` to catch flaky tests by running them multiple times:
```bash
# Run a specific test 50 times
pytest --count=50 backend/tests/unit/path/to/test.py::test_name
# Stop on first failure with -x
pytest --count=50 -x backend/tests/unit/path/to/test.py::test_name
# Repeat an entire test file
pytest --count=10 backend/tests/unit/path/to/test_file.py
```
## Best Practices
### Use `enable_ee` fixture instead of inlining

View File

@@ -0,0 +1,274 @@
"""
External dependency unit tests for user file delete queue protections.
Verifies that the three mechanisms added to check_for_user_file_delete work
correctly:
1. Queue depth backpressure when the broker queue exceeds
USER_FILE_DELETE_MAX_QUEUE_DEPTH, no new tasks are enqueued.
2. Per-file Redis guard key if the guard key for a file already exists in
Redis, that file is skipped even though it is still in DELETING status.
3. Task expiry every send_task call carries expires=
CELERY_USER_FILE_DELETE_TASK_EXPIRES so that stale queued tasks are
discarded by workers automatically.
Also verifies that delete_user_file_impl clears the guard key the moment
it is picked up by a worker.
Uses real Redis (DB 0 via get_redis_client) and real PostgreSQL for UserFile
rows. The Celery app is provided as a MagicMock injected via a PropertyMock
on the task class so no real broker is needed.
"""
from collections.abc import Generator
from contextlib import contextmanager
from typing import Any
from unittest.mock import MagicMock
from unittest.mock import patch
from unittest.mock import PropertyMock
from uuid import uuid4
from sqlalchemy.orm import Session
from onyx.background.celery.tasks.user_file_processing.tasks import (
_user_file_delete_lock_key,
)
from onyx.background.celery.tasks.user_file_processing.tasks import (
_user_file_delete_queued_key,
)
from onyx.background.celery.tasks.user_file_processing.tasks import (
check_for_user_file_delete,
)
from onyx.background.celery.tasks.user_file_processing.tasks import (
process_single_user_file_delete,
)
from onyx.configs.constants import CELERY_USER_FILE_DELETE_TASK_EXPIRES
from onyx.configs.constants import OnyxCeleryQueues
from onyx.configs.constants import OnyxCeleryTask
from onyx.configs.constants import USER_FILE_DELETE_MAX_QUEUE_DEPTH
from onyx.db.enums import UserFileStatus
from onyx.db.models import UserFile
from onyx.redis.redis_pool import get_redis_client
from tests.external_dependency_unit.conftest import create_test_user
from tests.external_dependency_unit.constants import TEST_TENANT_ID
# ---------------------------------------------------------------------------
# Helpers
# ---------------------------------------------------------------------------
_PATCH_QUEUE_LEN = (
"onyx.background.celery.tasks.user_file_processing.tasks.celery_get_queue_length"
)
def _create_deleting_user_file(db_session: Session, user_id: object) -> UserFile:
"""Insert a UserFile in DELETING status and return it."""
uf = UserFile(
id=uuid4(),
user_id=user_id,
file_id=f"test_file_{uuid4().hex[:8]}",
name=f"test_{uuid4().hex[:8]}.txt",
file_type="text/plain",
status=UserFileStatus.DELETING,
)
db_session.add(uf)
db_session.commit()
db_session.refresh(uf)
return uf
@contextmanager
def _patch_task_app(task: Any, mock_app: MagicMock) -> Generator[None, None, None]:
"""Patch the ``app`` property on *task*'s class so that ``self.app``
inside the task function returns *mock_app*.
With ``bind=True``, ``task.run`` is a bound method whose ``__self__`` is
the actual task instance. We patch ``app`` on that instance's class
(a unique Celery-generated Task subclass) so the mock is scoped to this
task only.
"""
task_instance = task.run.__self__
with patch.object(
type(task_instance), "app", new_callable=PropertyMock, return_value=mock_app
):
yield
# ---------------------------------------------------------------------------
# Test classes
# ---------------------------------------------------------------------------
class TestDeleteQueueDepthBackpressure:
"""Protection 1: skip all enqueuing when the broker queue is too deep."""
def test_no_tasks_enqueued_when_queue_over_limit(
self,
db_session: Session,
tenant_context: None, # noqa: ARG002
) -> None:
"""When the queue depth exceeds the limit the beat cycle is skipped."""
user = create_test_user(db_session, "del_bp_user")
_create_deleting_user_file(db_session, user.id)
mock_app = MagicMock()
with (
_patch_task_app(check_for_user_file_delete, mock_app),
patch(_PATCH_QUEUE_LEN, return_value=USER_FILE_DELETE_MAX_QUEUE_DEPTH + 1),
):
check_for_user_file_delete.run(tenant_id=TEST_TENANT_ID)
mock_app.send_task.assert_not_called()
class TestDeletePerFileGuardKey:
"""Protection 2: per-file Redis guard key prevents duplicate enqueue."""
def test_guarded_file_not_re_enqueued(
self,
db_session: Session,
tenant_context: None, # noqa: ARG002
) -> None:
"""A file whose guard key is already set in Redis is skipped."""
user = create_test_user(db_session, "del_guard_user")
uf = _create_deleting_user_file(db_session, user.id)
redis_client = get_redis_client(tenant_id=TEST_TENANT_ID)
guard_key = _user_file_delete_queued_key(uf.id)
redis_client.setex(guard_key, CELERY_USER_FILE_DELETE_TASK_EXPIRES, 1)
mock_app = MagicMock()
try:
with (
_patch_task_app(check_for_user_file_delete, mock_app),
patch(_PATCH_QUEUE_LEN, return_value=0),
):
check_for_user_file_delete.run(tenant_id=TEST_TENANT_ID)
# send_task must not have been called with this specific file's ID
for call in mock_app.send_task.call_args_list:
kwargs = call.kwargs.get("kwargs", {})
assert kwargs.get("user_file_id") != str(
uf.id
), f"File {uf.id} should have been skipped because its guard key exists"
finally:
redis_client.delete(guard_key)
def test_guard_key_exists_in_redis_after_enqueue(
self,
db_session: Session,
tenant_context: None, # noqa: ARG002
) -> None:
"""After a file is enqueued its guard key is present in Redis with a TTL."""
user = create_test_user(db_session, "del_guard_set_user")
uf = _create_deleting_user_file(db_session, user.id)
redis_client = get_redis_client(tenant_id=TEST_TENANT_ID)
guard_key = _user_file_delete_queued_key(uf.id)
redis_client.delete(guard_key) # clean slate
mock_app = MagicMock()
try:
with (
_patch_task_app(check_for_user_file_delete, mock_app),
patch(_PATCH_QUEUE_LEN, return_value=0),
):
check_for_user_file_delete.run(tenant_id=TEST_TENANT_ID)
assert redis_client.exists(
guard_key
), "Guard key should be set in Redis after enqueue"
ttl = int(redis_client.ttl(guard_key)) # type: ignore[arg-type]
assert (
0 < ttl <= CELERY_USER_FILE_DELETE_TASK_EXPIRES
), f"Guard key TTL {ttl}s is outside the expected range (0, {CELERY_USER_FILE_DELETE_TASK_EXPIRES}]"
finally:
redis_client.delete(guard_key)
class TestDeleteTaskExpiry:
"""Protection 3: every send_task call includes an expires value."""
def test_send_task_called_with_expires(
self,
db_session: Session,
tenant_context: None, # noqa: ARG002
) -> None:
"""send_task is called with the correct queue, task name, and expires."""
user = create_test_user(db_session, "del_expires_user")
uf = _create_deleting_user_file(db_session, user.id)
redis_client = get_redis_client(tenant_id=TEST_TENANT_ID)
guard_key = _user_file_delete_queued_key(uf.id)
redis_client.delete(guard_key)
mock_app = MagicMock()
try:
with (
_patch_task_app(check_for_user_file_delete, mock_app),
patch(_PATCH_QUEUE_LEN, return_value=0),
):
check_for_user_file_delete.run(tenant_id=TEST_TENANT_ID)
# At least one task should have been submitted (for our file)
assert (
mock_app.send_task.call_count >= 1
), "Expected at least one task to be submitted"
# Every submitted task must carry expires
for call in mock_app.send_task.call_args_list:
assert call.args[0] == OnyxCeleryTask.DELETE_SINGLE_USER_FILE
assert call.kwargs.get("queue") == OnyxCeleryQueues.USER_FILE_DELETE
assert (
call.kwargs.get("expires") == CELERY_USER_FILE_DELETE_TASK_EXPIRES
), "Task must be submitted with the correct expires value to prevent stale task accumulation"
finally:
redis_client.delete(guard_key)
class TestDeleteWorkerClearsGuardKey:
"""process_single_user_file_delete removes the guard key when it picks up a task."""
def test_guard_key_deleted_on_pickup(
self,
tenant_context: None, # noqa: ARG002
) -> None:
"""The guard key is deleted before the worker does any real work.
We simulate an already-locked file so delete_user_file_impl returns
early but crucially, after the guard key deletion.
"""
user_file_id = str(uuid4())
redis_client = get_redis_client(tenant_id=TEST_TENANT_ID)
guard_key = _user_file_delete_queued_key(user_file_id)
# Simulate the guard key set when the beat enqueued the task
redis_client.setex(guard_key, CELERY_USER_FILE_DELETE_TASK_EXPIRES, 1)
assert redis_client.exists(guard_key), "Guard key must exist before pickup"
# Hold the per-file delete lock so the worker exits early without
# touching the database or file store.
lock_key = _user_file_delete_lock_key(user_file_id)
delete_lock = redis_client.lock(lock_key, timeout=10)
acquired = delete_lock.acquire(blocking=False)
assert acquired, "Should be able to acquire the delete lock for this test"
try:
process_single_user_file_delete.run(
user_file_id=user_file_id,
tenant_id=TEST_TENANT_ID,
)
finally:
if delete_lock.owned():
delete_lock.release()
assert not redis_client.exists(
guard_key
), "Guard key should be deleted when the worker picks up the task"

View File

@@ -1218,15 +1218,16 @@ def test_code_interpreter_receives_chat_files(
finally:
ci_mod.CodeInterpreterClient.__init__.__defaults__ = original_defaults
# Verify: file uploaded, code executed via streaming, staged file cleaned up
# Verify: file uploaded and code executed via streaming.
assert len(mock_ci_server.get_requests(method="POST", path="/v1/files")) == 1
assert (
len(mock_ci_server.get_requests(method="POST", path="/v1/execute/stream")) == 1
)
delete_requests = mock_ci_server.get_requests(method="DELETE")
assert len(delete_requests) == 1
assert delete_requests[0].path.startswith("/v1/files/")
# Staged input files are intentionally NOT deleted — PythonTool caches their
# file IDs across agent-loop iterations to avoid re-uploading on every call.
# The code interpreter cleans them up via its own TTL.
assert len(mock_ci_server.get_requests(method="DELETE")) == 0
execute_body = mock_ci_server.get_requests(
method="POST", path="/v1/execute/stream"

View File

@@ -0,0 +1,237 @@
"""
Integration tests for the "Last Indexed" time displayed on both the
per-connector detail page and the all-connectors listing page.
Expected behavior: "Last Indexed" = time_started of the most recent
successful index attempt for the cc pair, regardless of pagination.
Edge cases:
1. First page of index attempts is entirely errors — last_indexed should
still reflect the older successful attempt beyond page 1.
2. Credential swap — successful attempts, then failures after a
"credential change"; last_indexed should reflect the most recent
successful attempt.
3. Mix of statuses — only the most recent successful attempt matters.
4. COMPLETED_WITH_ERRORS counts as a success for last_indexed purposes.
"""
from datetime import datetime
from datetime import timedelta
from datetime import timezone
from onyx.db.models import IndexingStatus
from onyx.server.documents.models import CCPairFullInfo
from onyx.server.documents.models import ConnectorIndexingStatusLite
from tests.integration.common_utils.managers.cc_pair import CCPairManager
from tests.integration.common_utils.managers.connector import ConnectorManager
from tests.integration.common_utils.managers.credential import CredentialManager
from tests.integration.common_utils.managers.index_attempt import IndexAttemptManager
from tests.integration.common_utils.managers.user import UserManager
from tests.integration.common_utils.test_models import DATestCCPair
from tests.integration.common_utils.test_models import DATestUser
def _wait_for_real_success(
cc_pair: DATestCCPair,
admin: DATestUser,
) -> None:
"""Wait for the initial index attempt to complete successfully."""
CCPairManager.wait_for_indexing_completion(
cc_pair,
after=datetime(2000, 1, 1, tzinfo=timezone.utc),
user_performing_action=admin,
timeout=120,
)
def _get_detail(cc_pair_id: int, admin: DATestUser) -> CCPairFullInfo:
result = CCPairManager.get_single(cc_pair_id, admin)
assert result is not None
return result
def _get_listing(cc_pair_id: int, admin: DATestUser) -> ConnectorIndexingStatusLite:
result = CCPairManager.get_indexing_status_by_id(cc_pair_id, admin)
assert result is not None
return result
def test_last_indexed_first_page_all_errors(reset: None) -> None: # noqa: ARG001
"""When the first page of index attempts is entirely errors but an
older successful attempt exists, both the detail page and the listing
page should still show the time of that successful attempt.
The detail page UI uses page size 8. We insert 10 failed attempts
more recent than the initial success to push the success off page 1.
"""
admin = UserManager.create(name="admin_first_page_errors")
cc_pair = CCPairManager.create_from_scratch(user_performing_action=admin)
_wait_for_real_success(cc_pair, admin)
# Baseline: last_success should be set from the initial successful run
listing_before = _get_listing(cc_pair.id, admin)
assert listing_before.last_success is not None
# 10 recent failures push the success off page 1
IndexAttemptManager.create_test_index_attempts(
num_attempts=10,
cc_pair_id=cc_pair.id,
status=IndexingStatus.FAILED,
error_msg="simulated failure",
base_time=datetime.now(tz=timezone.utc),
)
detail = _get_detail(cc_pair.id, admin)
listing = _get_listing(cc_pair.id, admin)
assert (
detail.last_indexed is not None
), "Detail page last_indexed is None even though a successful attempt exists"
assert (
listing.last_success is not None
), "Listing page last_success is None even though a successful attempt exists"
# Both surfaces must agree
assert detail.last_indexed == listing.last_success, (
f"Detail last_indexed={detail.last_indexed} != "
f"listing last_success={listing.last_success}"
)
def test_last_indexed_credential_swap_scenario(reset: None) -> None: # noqa: ARG001
"""Perform an actual credential swap: create connector + cred1 (cc_pair_1),
wait for success, then associate a new cred2 with the same connector
(cc_pair_2), wait for that to succeed, and inject failures on cc_pair_2.
cc_pair_2's last_indexed must reflect cc_pair_2's own success, not
cc_pair_1's older one. Both the detail page and listing page must agree.
"""
admin = UserManager.create(name="admin_cred_swap")
connector = ConnectorManager.create(user_performing_action=admin)
cred1 = CredentialManager.create(user_performing_action=admin)
cc_pair_1 = CCPairManager.create(
connector_id=connector.id,
credential_id=cred1.id,
user_performing_action=admin,
)
_wait_for_real_success(cc_pair_1, admin)
cred2 = CredentialManager.create(user_performing_action=admin, name="swapped-cred")
cc_pair_2 = CCPairManager.create(
connector_id=connector.id,
credential_id=cred2.id,
user_performing_action=admin,
)
_wait_for_real_success(cc_pair_2, admin)
listing_after_swap = _get_listing(cc_pair_2.id, admin)
assert listing_after_swap.last_success is not None
IndexAttemptManager.create_test_index_attempts(
num_attempts=10,
cc_pair_id=cc_pair_2.id,
status=IndexingStatus.FAILED,
error_msg="credential expired",
base_time=datetime.now(tz=timezone.utc),
)
detail = _get_detail(cc_pair_2.id, admin)
listing = _get_listing(cc_pair_2.id, admin)
assert detail.last_indexed is not None
assert listing.last_success is not None
assert detail.last_indexed == listing.last_success, (
f"Detail last_indexed={detail.last_indexed} != "
f"listing last_success={listing.last_success}"
)
def test_last_indexed_mixed_statuses(reset: None) -> None: # noqa: ARG001
"""Mix of in_progress, failed, and successful attempts. Only the most
recent successful attempt's time matters."""
admin = UserManager.create(name="admin_mixed")
cc_pair = CCPairManager.create_from_scratch(user_performing_action=admin)
_wait_for_real_success(cc_pair, admin)
now = datetime.now(tz=timezone.utc)
# Success 5 hours ago
IndexAttemptManager.create_test_index_attempts(
num_attempts=1,
cc_pair_id=cc_pair.id,
status=IndexingStatus.SUCCESS,
base_time=now - timedelta(hours=5),
)
# Failures 3 hours ago
IndexAttemptManager.create_test_index_attempts(
num_attempts=3,
cc_pair_id=cc_pair.id,
status=IndexingStatus.FAILED,
error_msg="transient failure",
base_time=now - timedelta(hours=3),
)
# In-progress 1 hour ago
IndexAttemptManager.create_test_index_attempts(
num_attempts=1,
cc_pair_id=cc_pair.id,
status=IndexingStatus.IN_PROGRESS,
base_time=now - timedelta(hours=1),
)
detail = _get_detail(cc_pair.id, admin)
listing = _get_listing(cc_pair.id, admin)
assert detail.last_indexed is not None
assert listing.last_success is not None
assert detail.last_indexed == listing.last_success, (
f"Detail last_indexed={detail.last_indexed} != "
f"listing last_success={listing.last_success}"
)
def test_last_indexed_completed_with_errors(reset: None) -> None: # noqa: ARG001
"""COMPLETED_WITH_ERRORS is treated as a successful attempt (matching
IndexingStatus.is_successful()). When it is the most recent "success"
and later attempts all failed, both surfaces should reflect its time."""
admin = UserManager.create(name="admin_completed_errors")
cc_pair = CCPairManager.create_from_scratch(user_performing_action=admin)
_wait_for_real_success(cc_pair, admin)
now = datetime.now(tz=timezone.utc)
# COMPLETED_WITH_ERRORS 2 hours ago
IndexAttemptManager.create_test_index_attempts(
num_attempts=1,
cc_pair_id=cc_pair.id,
status=IndexingStatus.COMPLETED_WITH_ERRORS,
base_time=now - timedelta(hours=2),
)
# 10 failures after — push everything else off page 1
IndexAttemptManager.create_test_index_attempts(
num_attempts=10,
cc_pair_id=cc_pair.id,
status=IndexingStatus.FAILED,
error_msg="post-partial failure",
base_time=now,
)
detail = _get_detail(cc_pair.id, admin)
listing = _get_listing(cc_pair.id, admin)
assert (
detail.last_indexed is not None
), "COMPLETED_WITH_ERRORS should count as a success for last_indexed"
assert (
listing.last_success is not None
), "COMPLETED_WITH_ERRORS should count as a success for last_success"
assert detail.last_indexed == listing.last_success, (
f"Detail last_indexed={detail.last_indexed} != "
f"listing last_success={listing.last_success}"
)

View File

@@ -0,0 +1,325 @@
"""Unit tests for SharepointConnector._fetch_site_pages error handling.
Covers 404 handling (classic sites / no modern pages) and 400
canvasLayout fallback (corrupt pages causing $expand=canvasLayout to
fail on the LIST endpoint).
"""
from __future__ import annotations
import json
from typing import Any
import pytest
from requests import Response
from requests.exceptions import HTTPError
from onyx.connectors.sharepoint.connector import GRAPH_INVALID_REQUEST_CODE
from onyx.connectors.sharepoint.connector import SharepointConnector
from onyx.connectors.sharepoint.connector import SiteDescriptor
SITE_URL = "https://tenant.sharepoint.com/sites/ClassicSite"
FAKE_SITE_ID = "tenant.sharepoint.com,abc123,def456"
PAGES_COLLECTION = f"https://graph.microsoft.com/v1.0/sites/{FAKE_SITE_ID}/pages"
SITE_PAGES_BASE = f"{PAGES_COLLECTION}/microsoft.graph.sitePage"
def _site_descriptor() -> SiteDescriptor:
return SiteDescriptor(url=SITE_URL, drive_name=None, folder_path=None)
def _make_http_error(
status_code: int,
error_code: str = "itemNotFound",
message: str = "Item not found",
) -> HTTPError:
body = {"error": {"code": error_code, "message": message}}
response = Response()
response.status_code = status_code
response._content = json.dumps(body).encode()
response.headers["Content-Type"] = "application/json"
return HTTPError(response=response)
def _setup_connector(
monkeypatch: pytest.MonkeyPatch, # noqa: ARG001
) -> SharepointConnector:
"""Create a connector with the graph client and site resolution mocked."""
connector = SharepointConnector(sites=[SITE_URL])
connector.graph_api_base = "https://graph.microsoft.com/v1.0"
mock_sites = type(
"FakeSites",
(),
{
"get_by_url": staticmethod(
lambda url: type( # noqa: ARG005
"Q",
(),
{
"execute_query": lambda self: None, # noqa: ARG005
"id": FAKE_SITE_ID,
},
)()
),
},
)()
connector._graph_client = type("FakeGraphClient", (), {"sites": mock_sites})()
return connector
def _patch_graph_api_get_json(
monkeypatch: pytest.MonkeyPatch,
fake_fn: Any,
) -> None:
monkeypatch.setattr(SharepointConnector, "_graph_api_get_json", fake_fn)
class TestFetchSitePages404:
def test_404_yields_no_pages(self, monkeypatch: pytest.MonkeyPatch) -> None:
"""A 404 from the Pages API should result in zero yielded pages."""
connector = _setup_connector(monkeypatch)
def fake_get_json(
self: SharepointConnector, # noqa: ARG001
url: str, # noqa: ARG001
params: dict[str, str] | None = None, # noqa: ARG001
) -> dict[str, Any]:
raise _make_http_error(404)
_patch_graph_api_get_json(monkeypatch, fake_get_json)
pages = list(connector._fetch_site_pages(_site_descriptor()))
assert pages == []
def test_404_does_not_raise(self, monkeypatch: pytest.MonkeyPatch) -> None:
"""A 404 must not propagate as an exception."""
connector = _setup_connector(monkeypatch)
def fake_get_json(
self: SharepointConnector, # noqa: ARG001
url: str, # noqa: ARG001
params: dict[str, str] | None = None, # noqa: ARG001
) -> dict[str, Any]:
raise _make_http_error(404)
_patch_graph_api_get_json(monkeypatch, fake_get_json)
for _ in connector._fetch_site_pages(_site_descriptor()):
pass
def test_non_404_http_error_still_raises(
self, monkeypatch: pytest.MonkeyPatch
) -> None:
"""Non-404 HTTP errors (e.g. 403) must still propagate."""
connector = _setup_connector(monkeypatch)
def fake_get_json(
self: SharepointConnector, # noqa: ARG001
url: str, # noqa: ARG001
params: dict[str, str] | None = None, # noqa: ARG001
) -> dict[str, Any]:
raise _make_http_error(403)
_patch_graph_api_get_json(monkeypatch, fake_get_json)
with pytest.raises(HTTPError):
list(connector._fetch_site_pages(_site_descriptor()))
def test_successful_fetch_yields_pages(
self, monkeypatch: pytest.MonkeyPatch
) -> None:
"""When the API succeeds, pages should be yielded normally."""
connector = _setup_connector(monkeypatch)
fake_page = {
"id": "page-1",
"title": "Hello World",
"webUrl": f"{SITE_URL}/SitePages/Hello.aspx",
"lastModifiedDateTime": "2025-06-01T00:00:00Z",
}
def fake_get_json(
self: SharepointConnector, # noqa: ARG001
url: str, # noqa: ARG001
params: dict[str, str] | None = None, # noqa: ARG001
) -> dict[str, Any]:
return {"value": [fake_page]}
_patch_graph_api_get_json(monkeypatch, fake_get_json)
pages = list(connector._fetch_site_pages(_site_descriptor()))
assert len(pages) == 1
assert pages[0]["id"] == "page-1"
def test_404_on_second_page_stops_pagination(
self, monkeypatch: pytest.MonkeyPatch
) -> None:
"""If the first API page succeeds but a nextLink returns 404,
already-yielded pages are kept and iteration stops cleanly."""
connector = _setup_connector(monkeypatch)
call_count = 0
first_page = {
"id": "page-1",
"title": "First",
"webUrl": f"{SITE_URL}/SitePages/First.aspx",
"lastModifiedDateTime": "2025-06-01T00:00:00Z",
}
def fake_get_json(
self: SharepointConnector, # noqa: ARG001
url: str, # noqa: ARG001
params: dict[str, str] | None = None, # noqa: ARG001
) -> dict[str, Any]:
nonlocal call_count
call_count += 1
if call_count == 1:
return {
"value": [first_page],
"@odata.nextLink": "https://graph.microsoft.com/next",
}
raise _make_http_error(404)
_patch_graph_api_get_json(monkeypatch, fake_get_json)
pages = list(connector._fetch_site_pages(_site_descriptor()))
assert len(pages) == 1
assert pages[0]["id"] == "page-1"
class TestFetchSitePages400Fallback:
"""When $expand=canvasLayout on the LIST endpoint returns 400
invalidRequest, _fetch_site_pages should fall back to listing
without expansion, then expanding each page individually."""
GOOD_PAGE: dict[str, Any] = {
"id": "good-1",
"name": "Good.aspx",
"title": "Good Page",
"lastModifiedDateTime": "2025-06-01T00:00:00Z",
}
BAD_PAGE: dict[str, Any] = {
"id": "bad-1",
"name": "Bad.aspx",
"title": "Bad Page",
"lastModifiedDateTime": "2025-06-01T00:00:00Z",
}
GOOD_PAGE_EXPANDED: dict[str, Any] = {
**GOOD_PAGE,
"canvasLayout": {"horizontalSections": []},
}
def test_fallback_expands_good_pages_individually(
self, monkeypatch: pytest.MonkeyPatch
) -> None:
"""On 400 from the LIST expand, the connector should list without
expand, then GET each page individually with $expand=canvasLayout."""
connector = _setup_connector(monkeypatch)
good_page = self.GOOD_PAGE
bad_page = self.BAD_PAGE
good_page_expanded = self.GOOD_PAGE_EXPANDED
def fake_get_json(
self: SharepointConnector, # noqa: ARG001
url: str,
params: dict[str, str] | None = None,
) -> dict[str, Any]:
if url == SITE_PAGES_BASE and params == {"$expand": "canvasLayout"}:
raise _make_http_error(
400, GRAPH_INVALID_REQUEST_CODE, "Invalid request"
)
if url == SITE_PAGES_BASE and params is None:
return {"value": [good_page, bad_page]}
expand_params = {"$expand": "canvasLayout"}
if url == f"{PAGES_COLLECTION}/good-1/microsoft.graph.sitePage":
assert params == expand_params, f"Expected $expand params, got {params}"
return good_page_expanded
if url == f"{PAGES_COLLECTION}/bad-1/microsoft.graph.sitePage":
assert params == expand_params, f"Expected $expand params, got {params}"
raise _make_http_error(
400, GRAPH_INVALID_REQUEST_CODE, "Invalid request"
)
raise AssertionError(f"Unexpected call: {url} {params}")
_patch_graph_api_get_json(monkeypatch, fake_get_json)
pages = list(connector._fetch_site_pages(_site_descriptor()))
assert len(pages) == 2
assert pages[0].get("canvasLayout") is not None
assert pages[1].get("canvasLayout") is None
assert pages[1]["id"] == "bad-1"
def test_mid_pagination_400_does_not_duplicate(
self, monkeypatch: pytest.MonkeyPatch
) -> None:
"""If the first paginated batch succeeds but a later nextLink
returns 400, pages from the first batch must not be re-yielded
by the fallback."""
connector = _setup_connector(monkeypatch)
good_page = self.GOOD_PAGE
good_page_expanded = self.GOOD_PAGE_EXPANDED
bad_page = self.BAD_PAGE
second_page = {
"id": "page-2",
"name": "Second.aspx",
"title": "Second Page",
"lastModifiedDateTime": "2025-06-01T00:00:00Z",
}
next_link = "https://graph.microsoft.com/v1.0/next-page-link"
def fake_get_json(
self: SharepointConnector, # noqa: ARG001
url: str,
params: dict[str, str] | None = None,
) -> dict[str, Any]:
if url == SITE_PAGES_BASE and params == {"$expand": "canvasLayout"}:
return {
"value": [good_page],
"@odata.nextLink": next_link,
}
if url == next_link:
raise _make_http_error(
400, GRAPH_INVALID_REQUEST_CODE, "Invalid request"
)
if url == SITE_PAGES_BASE and params is None:
return {"value": [good_page, bad_page, second_page]}
expand_params = {"$expand": "canvasLayout"}
if url == f"{PAGES_COLLECTION}/good-1/microsoft.graph.sitePage":
assert params == expand_params, f"Expected $expand params, got {params}"
return good_page_expanded
if url == f"{PAGES_COLLECTION}/bad-1/microsoft.graph.sitePage":
assert params == expand_params, f"Expected $expand params, got {params}"
raise _make_http_error(
400, GRAPH_INVALID_REQUEST_CODE, "Invalid request"
)
if url == f"{PAGES_COLLECTION}/page-2/microsoft.graph.sitePage":
assert params == expand_params, f"Expected $expand params, got {params}"
return {**second_page, "canvasLayout": {"horizontalSections": []}}
raise AssertionError(f"Unexpected call: {url} {params}")
_patch_graph_api_get_json(monkeypatch, fake_get_json)
pages = list(connector._fetch_site_pages(_site_descriptor()))
ids = [p["id"] for p in pages]
assert ids == ["good-1", "bad-1", "page-2"]
def test_non_invalid_request_400_still_raises(
self, monkeypatch: pytest.MonkeyPatch
) -> None:
"""A 400 with a different error code (not invalidRequest) should
propagate, not trigger the fallback."""
connector = _setup_connector(monkeypatch)
def fake_get_json(
self: SharepointConnector, # noqa: ARG001
url: str, # noqa: ARG001
params: dict[str, str] | None = None, # noqa: ARG001
) -> dict[str, Any]:
raise _make_http_error(400, "badRequest", "Something else went wrong")
_patch_graph_api_get_json(monkeypatch, fake_get_json)
with pytest.raises(HTTPError):
list(connector._fetch_site_pages(_site_descriptor()))

View File

@@ -0,0 +1,63 @@
"""
Unit test verifying that the upload API path sends tasks with expires=.
The upload_files_to_user_files_with_indexing function must include expires=
on every send_task call to prevent phantom task accumulation if the worker
is down or slow.
"""
from unittest.mock import MagicMock
from unittest.mock import patch
from uuid import uuid4
from onyx.configs.constants import CELERY_USER_FILE_PROCESSING_TASK_EXPIRES
from onyx.configs.constants import OnyxCeleryQueues
from onyx.configs.constants import OnyxCeleryTask
from onyx.db.models import UserFile
from onyx.db.projects import upload_files_to_user_files_with_indexing
def _make_mock_user_file() -> MagicMock:
uf = MagicMock(spec=UserFile)
uf.id = str(uuid4())
return uf
@patch("onyx.db.projects.get_current_tenant_id", return_value="test_tenant")
@patch("onyx.db.projects.create_user_files")
@patch(
"onyx.background.celery.versioned_apps.client.app",
new_callable=MagicMock,
)
def test_send_task_includes_expires(
mock_client_app: MagicMock,
mock_create: MagicMock,
mock_tenant: MagicMock, # noqa: ARG001
) -> None:
"""Every send_task call from the upload path must include expires=."""
user_files = [_make_mock_user_file(), _make_mock_user_file()]
mock_create.return_value = MagicMock(
user_files=user_files,
rejected_files=[],
id_to_temp_id={},
)
mock_user = MagicMock()
mock_db_session = MagicMock()
upload_files_to_user_files_with_indexing(
files=[],
project_id=None,
user=mock_user,
temp_id_map=None,
db_session=mock_db_session,
)
assert mock_client_app.send_task.call_count == len(user_files)
for call in mock_client_app.send_task.call_args_list:
assert call.args[0] == OnyxCeleryTask.PROCESS_SINGLE_USER_FILE
assert call.kwargs.get("queue") == OnyxCeleryQueues.USER_FILE_PROCESSING
assert (
call.kwargs.get("expires") == CELERY_USER_FILE_PROCESSING_TASK_EXPIRES
), "send_task must include expires= to prevent phantom task accumulation"

View File

@@ -0,0 +1,89 @@
"""
Unit tests for image summarization error handling.
Verifies that:
1. LLM errors produce actionable error messages (not base64 dumps)
2. Unsupported MIME type logs include the magic bytes and size
3. The ValueError raised on LLM failure preserves the original exception
"""
from unittest.mock import MagicMock
from unittest.mock import patch
import pytest
from onyx.file_processing.image_summarization import _summarize_image
from onyx.file_processing.image_summarization import summarize_image_with_error_handling
class TestSummarizeImageErrorMessage:
"""_summarize_image must not dump base64 image data into error messages."""
def test_error_message_contains_exception_type_not_base64(self) -> None:
"""The ValueError should contain the original exception info, not message payloads."""
mock_llm = MagicMock()
mock_llm.invoke.side_effect = RuntimeError("Connection timeout")
# A fake base64-encoded image string (should NOT appear in the error)
fake_encoded = "data:image/png;base64,iVBORw0KGgoAAAANSUhEUg..."
with pytest.raises(ValueError, match="RuntimeError: Connection timeout"):
_summarize_image(fake_encoded, mock_llm, query="test")
def test_error_message_does_not_contain_base64(self) -> None:
"""Ensure base64 data is never included in the error message."""
mock_llm = MagicMock()
mock_llm.invoke.side_effect = RuntimeError("API error")
fake_encoded = "data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAAAUA"
with pytest.raises(ValueError) as exc_info:
_summarize_image(fake_encoded, mock_llm)
error_str = str(exc_info.value)
assert "base64" not in error_str
assert "iVBOR" not in error_str
def test_original_exception_is_chained(self) -> None:
"""The ValueError should chain the original exception via __cause__."""
mock_llm = MagicMock()
original = RuntimeError("upstream failure")
mock_llm.invoke.side_effect = original
with pytest.raises(ValueError) as exc_info:
_summarize_image("data:image/png;base64,abc", mock_llm)
assert exc_info.value.__cause__ is original
class TestUnsupportedMimeTypeLogging:
"""summarize_image_with_error_handling should log useful info for unsupported formats."""
@patch(
"onyx.file_processing.image_summarization.summarize_image_pipeline",
side_effect=__import__(
"onyx.file_processing.image_summarization",
fromlist=["UnsupportedImageFormatError"],
).UnsupportedImageFormatError("unsupported"),
)
def test_logs_magic_bytes_and_size(
self, mock_pipeline: MagicMock # noqa: ARG002
) -> None:
"""The info log should include magic bytes hex and image size."""
mock_llm = MagicMock()
# TIFF magic bytes (not in the supported list)
image_data = b"\x49\x49\x2a\x00" + b"\x00" * 100
with patch("onyx.file_processing.image_summarization.logger") as mock_logger:
result = summarize_image_with_error_handling(
llm=mock_llm,
image_data=image_data,
context_name="test_image.tiff",
)
assert result is None
mock_logger.info.assert_called_once()
log_args = mock_logger.info.call_args
# Check the format string args contain magic bytes and size
assert "49492a00" in str(log_args)
assert "104" in str(log_args) # 4 + 100 bytes

View File

@@ -0,0 +1,141 @@
"""
Unit tests verifying that LiteLLM error details are extracted and surfaced
in image summarization error messages.
When the LLM call fails, the error handler should include the status_code,
llm_provider, and model from LiteLLM exceptions so operators can diagnose
the root cause (rate limit, content filter, unsupported vision, etc.)
without needing to dig through LiteLLM internals.
"""
from unittest.mock import MagicMock
import pytest
from onyx.file_processing.image_summarization import _summarize_image
def _make_litellm_style_error(
*,
message: str = "API error",
status_code: int | None = None,
llm_provider: str | None = None,
model: str | None = None,
) -> RuntimeError:
"""Create an exception with LiteLLM-style attributes."""
exc = RuntimeError(message)
if status_code is not None:
exc.status_code = status_code # type: ignore[attr-defined]
if llm_provider is not None:
exc.llm_provider = llm_provider # type: ignore[attr-defined]
if model is not None:
exc.model = model # type: ignore[attr-defined]
return exc
class TestLiteLLMErrorExtraction:
"""Verify that LiteLLM error attributes are included in the ValueError."""
def test_status_code_included(self) -> None:
mock_llm = MagicMock()
mock_llm.invoke.side_effect = _make_litellm_style_error(
message="Content filter triggered",
status_code=400,
llm_provider="azure",
model="gpt-4o",
)
with pytest.raises(ValueError, match="status_code=400"):
_summarize_image("data:image/png;base64,abc", mock_llm)
def test_llm_provider_included(self) -> None:
mock_llm = MagicMock()
mock_llm.invoke.side_effect = _make_litellm_style_error(
message="Bad request",
status_code=400,
llm_provider="azure",
)
with pytest.raises(ValueError, match="llm_provider=azure"):
_summarize_image("data:image/png;base64,abc", mock_llm)
def test_model_included(self) -> None:
mock_llm = MagicMock()
mock_llm.invoke.side_effect = _make_litellm_style_error(
message="Bad request",
model="gpt-4o",
)
with pytest.raises(ValueError, match="model=gpt-4o"):
_summarize_image("data:image/png;base64,abc", mock_llm)
def test_all_fields_in_single_message(self) -> None:
mock_llm = MagicMock()
mock_llm.invoke.side_effect = _make_litellm_style_error(
message="Rate limit exceeded",
status_code=429,
llm_provider="azure",
model="gpt-4o",
)
with pytest.raises(ValueError) as exc_info:
_summarize_image("data:image/png;base64,abc", mock_llm)
msg = str(exc_info.value)
assert "status_code=429" in msg
assert "llm_provider=azure" in msg
assert "model=gpt-4o" in msg
assert "Rate limit exceeded" in msg
def test_plain_exception_without_litellm_attrs(self) -> None:
"""Non-LiteLLM exceptions should still produce a useful message."""
mock_llm = MagicMock()
mock_llm.invoke.side_effect = ConnectionError("Connection refused")
with pytest.raises(ValueError) as exc_info:
_summarize_image("data:image/png;base64,abc", mock_llm)
msg = str(exc_info.value)
assert "ConnectionError" in msg
assert "Connection refused" in msg
# Should not contain status_code/llm_provider/model
assert "status_code" not in msg
assert "llm_provider" not in msg
def test_no_base64_in_error(self) -> None:
"""Error messages must not contain the full base64 image payload.
Some LiteLLM exceptions echo the request body (including base64 images)
in their message. The truncation guard ensures the bulk of such a
payload is stripped from the re-raised ValueError.
"""
mock_llm = MagicMock()
# Build a long base64-like payload that exceeds the 512-char truncation
fake_b64_payload = "iVBORw0KGgo" * 100 # ~1100 chars
fake_b64 = f"data:image/png;base64,{fake_b64_payload}"
mock_llm.invoke.side_effect = RuntimeError(
f"Request failed for payload: {fake_b64}"
)
with pytest.raises(ValueError) as exc_info:
_summarize_image(fake_b64, mock_llm)
msg = str(exc_info.value)
# The full payload must not appear (truncation should have kicked in)
assert fake_b64_payload not in msg
assert "truncated" in msg
def test_long_error_message_truncated(self) -> None:
"""Exception messages longer than 512 chars are truncated."""
mock_llm = MagicMock()
long_msg = "x" * 1000
mock_llm.invoke.side_effect = RuntimeError(long_msg)
with pytest.raises(ValueError) as exc_info:
_summarize_image("data:image/png;base64,abc", mock_llm)
msg = str(exc_info.value)
assert "truncated" in msg
# The full 1000-char string should not appear
assert long_msg not in msg

View File

@@ -26,14 +26,6 @@ class TestIsTrueOpenAIModel:
"""Test that real OpenAI GPT-4o-mini model is correctly identified."""
assert is_true_openai_model(LlmProviderNames.OPENAI, "gpt-4o-mini") is True
def test_real_openai_o1_preview(self) -> None:
"""Test that real OpenAI o1-preview reasoning model is correctly identified."""
assert is_true_openai_model(LlmProviderNames.OPENAI, "o1-preview") is True
def test_real_openai_o1_mini(self) -> None:
"""Test that real OpenAI o1-mini reasoning model is correctly identified."""
assert is_true_openai_model(LlmProviderNames.OPENAI, "o1-mini") is True
def test_openai_with_provider_prefix(self) -> None:
"""Test that OpenAI model with provider prefix is correctly identified."""
assert is_true_openai_model(LlmProviderNames.OPENAI, "openai/gpt-4") is False

View File

@@ -0,0 +1,204 @@
"""Tests for Slack channel reference resolution and tag filtering
in handle_regular_answer.py."""
from unittest.mock import MagicMock
from slack_sdk.errors import SlackApiError
from onyx.context.search.models import Tag
from onyx.onyxbot.slack.constants import SLACK_CHANNEL_REF_PATTERN
from onyx.onyxbot.slack.handlers.handle_regular_answer import resolve_channel_references
# ---------------------------------------------------------------------------
# Helpers
# ---------------------------------------------------------------------------
def _mock_client_with_channels(
channel_map: dict[str, str],
) -> MagicMock:
"""Return a mock WebClient where conversations_info resolves IDs to names."""
client = MagicMock()
def _conversations_info(channel: str) -> MagicMock:
if channel in channel_map:
resp = MagicMock()
resp.validate = MagicMock()
resp.__getitem__ = lambda _self, key: {
"channel": {
"name": channel_map[channel],
"is_im": False,
"is_mpim": False,
}
}[key]
return resp
raise SlackApiError("channel_not_found", response=MagicMock())
client.conversations_info = _conversations_info
return client
def _mock_logger() -> MagicMock:
return MagicMock()
# ---------------------------------------------------------------------------
# SLACK_CHANNEL_REF_PATTERN regex tests
# ---------------------------------------------------------------------------
class TestSlackChannelRefPattern:
def test_matches_bare_channel_id(self) -> None:
matches = SLACK_CHANNEL_REF_PATTERN.findall("<#C097NBWMY8Y>")
assert matches == [("C097NBWMY8Y", "")]
def test_matches_channel_id_with_name(self) -> None:
matches = SLACK_CHANNEL_REF_PATTERN.findall("<#C097NBWMY8Y|eng-infra>")
assert matches == [("C097NBWMY8Y", "eng-infra")]
def test_matches_multiple_channels(self) -> None:
msg = "compare <#C111AAA> and <#C222BBB|general>"
matches = SLACK_CHANNEL_REF_PATTERN.findall(msg)
assert len(matches) == 2
assert ("C111AAA", "") in matches
assert ("C222BBB", "general") in matches
def test_no_match_on_plain_text(self) -> None:
matches = SLACK_CHANNEL_REF_PATTERN.findall("no channels here")
assert matches == []
def test_no_match_on_user_mention(self) -> None:
matches = SLACK_CHANNEL_REF_PATTERN.findall("<@U12345>")
assert matches == []
# ---------------------------------------------------------------------------
# resolve_channel_references tests
# ---------------------------------------------------------------------------
class TestResolveChannelReferences:
def test_resolves_bare_channel_id_via_api(self) -> None:
client = _mock_client_with_channels({"C097NBWMY8Y": "eng-infra"})
logger = _mock_logger()
message, tags = resolve_channel_references(
message="summary of <#C097NBWMY8Y> this week",
client=client,
logger=logger,
)
assert message == "summary of #eng-infra this week"
assert len(tags) == 1
assert tags[0] == Tag(tag_key="Channel", tag_value="eng-infra")
def test_uses_name_from_pipe_format_without_api_call(self) -> None:
client = MagicMock()
logger = _mock_logger()
message, tags = resolve_channel_references(
message="check <#C097NBWMY8Y|eng-infra> for updates",
client=client,
logger=logger,
)
assert message == "check #eng-infra for updates"
assert tags == [Tag(tag_key="Channel", tag_value="eng-infra")]
# Should NOT have called the API since name was in the markup
client.conversations_info.assert_not_called()
def test_multiple_channels(self) -> None:
client = _mock_client_with_channels(
{
"C111AAA": "eng-infra",
"C222BBB": "eng-general",
}
)
logger = _mock_logger()
message, tags = resolve_channel_references(
message="compare <#C111AAA> and <#C222BBB>",
client=client,
logger=logger,
)
assert "#eng-infra" in message
assert "#eng-general" in message
assert "<#" not in message
assert len(tags) == 2
tag_values = {t.tag_value for t in tags}
assert tag_values == {"eng-infra", "eng-general"}
def test_no_channel_references_returns_unchanged(self) -> None:
client = MagicMock()
logger = _mock_logger()
message, tags = resolve_channel_references(
message="just a normal message with no channels",
client=client,
logger=logger,
)
assert message == "just a normal message with no channels"
assert tags == []
def test_api_failure_skips_channel_gracefully(self) -> None:
# Client that fails for all channel lookups
client = _mock_client_with_channels({})
logger = _mock_logger()
message, tags = resolve_channel_references(
message="check <#CBADID123>",
client=client,
logger=logger,
)
# Message should remain unchanged for the failed channel
assert "<#CBADID123>" in message
assert tags == []
logger.warning.assert_called_once()
def test_partial_failure_resolves_what_it_can(self) -> None:
# Only one of two channels resolves
client = _mock_client_with_channels({"C111AAA": "eng-infra"})
logger = _mock_logger()
message, tags = resolve_channel_references(
message="compare <#C111AAA> and <#CBADID123>",
client=client,
logger=logger,
)
assert "#eng-infra" in message
assert "<#CBADID123>" in message # failed one stays raw
assert len(tags) == 1
assert tags[0].tag_value == "eng-infra"
def test_duplicate_channel_produces_single_tag(self) -> None:
client = _mock_client_with_channels({"C111AAA": "eng-infra"})
logger = _mock_logger()
message, tags = resolve_channel_references(
message="summarize <#C111AAA> and compare with <#C111AAA>",
client=client,
logger=logger,
)
assert message == "summarize #eng-infra and compare with #eng-infra"
assert len(tags) == 1
assert tags[0].tag_value == "eng-infra"
def test_mixed_pipe_and_bare_formats(self) -> None:
client = _mock_client_with_channels({"C222BBB": "random"})
logger = _mock_logger()
message, tags = resolve_channel_references(
message="see <#C111AAA|eng-infra> and <#C222BBB>",
client=client,
logger=logger,
)
assert "#eng-infra" in message
assert "#random" in message
assert len(tags) == 2

View File

@@ -3,6 +3,7 @@
from onyx.server.manage.llm.utils import generate_bedrock_display_name
from onyx.server.manage.llm.utils import generate_ollama_display_name
from onyx.server.manage.llm.utils import infer_vision_support
from onyx.server.manage.llm.utils import is_embedding_model
from onyx.server.manage.llm.utils import is_reasoning_model
from onyx.server.manage.llm.utils import is_valid_bedrock_model
from onyx.server.manage.llm.utils import strip_openrouter_vendor_prefix
@@ -209,3 +210,35 @@ class TestIsReasoningModel:
is_reasoning_model("anthropic/claude-3-5-sonnet", "Claude 3.5 Sonnet")
is False
)
class TestIsEmbeddingModel:
"""Tests for embedding model detection."""
def test_openai_embedding_ada(self) -> None:
assert is_embedding_model("text-embedding-ada-002") is True
def test_openai_embedding_3_small(self) -> None:
assert is_embedding_model("text-embedding-3-small") is True
def test_openai_embedding_3_large(self) -> None:
assert is_embedding_model("text-embedding-3-large") is True
def test_cohere_embed_model(self) -> None:
assert is_embedding_model("embed-english-v3.0") is True
def test_bedrock_titan_embed(self) -> None:
assert is_embedding_model("amazon.titan-embed-text-v1") is True
def test_gpt4o_not_embedding(self) -> None:
assert is_embedding_model("gpt-4o") is False
def test_gpt4_not_embedding(self) -> None:
assert is_embedding_model("gpt-4") is False
def test_dall_e_not_embedding(self) -> None:
assert is_embedding_model("dall-e-3") is False
def test_unknown_custom_model_not_embedding(self) -> None:
"""Custom/local models not in litellm's model DB should default to False."""
assert is_embedding_model("my-custom-local-model-v1") is False

View File

@@ -0,0 +1,109 @@
import io
import zipfile
from unittest.mock import MagicMock
from unittest.mock import patch
from zipfile import BadZipFile
import pytest
from fastapi import UploadFile
from starlette.datastructures import Headers
from onyx.configs.constants import FileOrigin
from onyx.server.documents.connector import upload_files
def _create_test_zip() -> bytes:
"""Create a simple in-memory zip file containing two text files."""
buf = io.BytesIO()
with zipfile.ZipFile(buf, "w") as zf:
zf.writestr("file1.txt", "hello")
zf.writestr("file2.txt", "world")
return buf.getvalue()
def _make_upload_file(content: bytes, filename: str, content_type: str) -> UploadFile:
return UploadFile(
file=io.BytesIO(content),
filename=filename,
headers=Headers({"content-type": content_type}),
)
@patch("onyx.server.documents.connector.get_default_file_store")
def test_upload_zip_with_unzip_true_extracts_files(
mock_get_store: MagicMock,
) -> None:
"""When unzip=True (default), a zip upload is extracted into individual files."""
mock_store = MagicMock()
mock_store.save_file.side_effect = lambda **kwargs: f"id-{kwargs['display_name']}"
mock_get_store.return_value = mock_store
zip_bytes = _create_test_zip()
upload = _make_upload_file(zip_bytes, "test.zip", "application/zip")
result = upload_files([upload], FileOrigin.CONNECTOR)
# Should have extracted the two individual files, not stored the zip itself
assert len(result.file_paths) == 2
assert "id-file1.txt" in result.file_paths
assert "id-file2.txt" in result.file_paths
assert "file1.txt" in result.file_names
assert "file2.txt" in result.file_names
@patch("onyx.server.documents.connector.get_default_file_store")
def test_upload_zip_with_unzip_false_stores_zip_as_is(
mock_get_store: MagicMock,
) -> None:
"""When unzip=False, the zip file is stored as-is without extraction."""
mock_store = MagicMock()
mock_store.save_file.return_value = "zip-file-id"
mock_get_store.return_value = mock_store
zip_bytes = _create_test_zip()
upload = _make_upload_file(zip_bytes, "site_export.zip", "application/zip")
result = upload_files([upload], FileOrigin.CONNECTOR, unzip=False)
# Should store exactly one file (the zip itself)
assert len(result.file_paths) == 1
assert result.file_paths[0] == "zip-file-id"
assert result.file_names == ["site_export.zip"]
# No zip metadata should be created
assert result.zip_metadata_file_id is None
# Verify the stored content is a valid zip
saved_content: io.BytesIO = mock_store.save_file.call_args[1]["content"]
saved_content.seek(0)
with zipfile.ZipFile(saved_content, "r") as zf:
assert set(zf.namelist()) == {"file1.txt", "file2.txt"}
@patch("onyx.server.documents.connector.get_default_file_store")
def test_upload_invalid_zip_with_unzip_false_raises(
mock_get_store: MagicMock,
) -> None:
"""An invalid zip is rejected even when unzip=False (validation still runs)."""
mock_get_store.return_value = MagicMock()
bad_zip = _make_upload_file(b"not a zip", "bad.zip", "application/zip")
with pytest.raises(BadZipFile):
upload_files([bad_zip], FileOrigin.CONNECTOR, unzip=False)
@patch("onyx.server.documents.connector.get_default_file_store")
def test_upload_multiple_zips_rejected_when_unzip_false(
mock_get_store: MagicMock,
) -> None:
"""The seen_zip guard rejects a second zip even when unzip=False."""
mock_store = MagicMock()
mock_store.save_file.return_value = "zip-id"
mock_get_store.return_value = mock_store
zip_bytes = _create_test_zip()
zip1 = _make_upload_file(zip_bytes, "a.zip", "application/zip")
zip2 = _make_upload_file(zip_bytes, "b.zip", "application/zip")
with pytest.raises(Exception, match="Only one zip file"):
upload_files([zip1, zip2], FileOrigin.CONNECTOR, unzip=False)

View File

@@ -0,0 +1,208 @@
"""Unit tests for PythonTool file-upload caching.
Verifies that PythonTool reuses code-interpreter file IDs across multiple
run() calls within the same session instead of re-uploading identical content
on every agent loop iteration.
"""
from unittest.mock import MagicMock
from unittest.mock import patch
from onyx.tools.models import ChatFile
from onyx.tools.models import PythonToolOverrideKwargs
from onyx.tools.tool_implementations.python.code_interpreter_client import (
StreamResultEvent,
)
from onyx.tools.tool_implementations.python.python_tool import PythonTool
TOOL_MODULE = "onyx.tools.tool_implementations.python.python_tool"
def _make_stream_result() -> StreamResultEvent:
return StreamResultEvent(
exit_code=0,
timed_out=False,
duration_ms=10,
files=[],
)
def _make_tool() -> PythonTool:
emitter = MagicMock()
return PythonTool(tool_id=1, emitter=emitter)
def _make_override(files: list[ChatFile]) -> PythonToolOverrideKwargs:
return PythonToolOverrideKwargs(chat_files=files)
def _run_tool(tool: PythonTool, mock_client: MagicMock, files: list[ChatFile]) -> None:
"""Call tool.run() with a mocked CodeInterpreterClient context manager."""
from onyx.server.query_and_chat.placement import Placement
mock_client.execute_streaming.return_value = iter([_make_stream_result()])
ctx = MagicMock()
ctx.__enter__ = MagicMock(return_value=mock_client)
ctx.__exit__ = MagicMock(return_value=False)
placement = Placement(turn_index=0, tab_index=0)
override = _make_override(files)
with patch(f"{TOOL_MODULE}.CodeInterpreterClient", return_value=ctx):
tool.run(placement=placement, override_kwargs=override, code="print('hi')")
# ---------------------------------------------------------------------------
# Cache hit: same content uploaded in a second call reuses the file_id
# ---------------------------------------------------------------------------
@patch(f"{TOOL_MODULE}.CODE_INTERPRETER_BASE_URL", "http://fake:8000")
def test_same_file_uploaded_only_once_across_two_runs() -> None:
tool = _make_tool()
client = MagicMock()
client.upload_file.return_value = "file-id-abc"
pptx_content = b"fake pptx bytes"
files = [ChatFile(filename="report.pptx", content=pptx_content)]
_run_tool(tool, client, files)
_run_tool(tool, client, files)
# upload_file should only have been called once across both runs
client.upload_file.assert_called_once_with(pptx_content, "report.pptx")
@patch(f"{TOOL_MODULE}.CODE_INTERPRETER_BASE_URL", "http://fake:8000")
def test_cached_file_id_is_staged_on_second_run() -> None:
tool = _make_tool()
client = MagicMock()
client.upload_file.return_value = "file-id-abc"
files = [ChatFile(filename="data.pptx", content=b"content")]
_run_tool(tool, client, files)
# On the second run, execute_streaming should still receive the file
client.execute_streaming.return_value = iter([_make_stream_result()])
ctx = MagicMock()
ctx.__enter__ = MagicMock(return_value=client)
ctx.__exit__ = MagicMock(return_value=False)
from onyx.server.query_and_chat.placement import Placement
placement = Placement(turn_index=1, tab_index=0)
with patch(f"{TOOL_MODULE}.CodeInterpreterClient", return_value=ctx):
tool.run(
placement=placement,
override_kwargs=_make_override(files),
code="print('hi')",
)
# The second execute_streaming call should include the file
_, kwargs = client.execute_streaming.call_args
staged_files = kwargs.get("files") or []
assert any(f["file_id"] == "file-id-abc" for f in staged_files)
# ---------------------------------------------------------------------------
# Cache miss: different content triggers a new upload
# ---------------------------------------------------------------------------
@patch(f"{TOOL_MODULE}.CODE_INTERPRETER_BASE_URL", "http://fake:8000")
def test_different_file_content_uploaded_separately() -> None:
tool = _make_tool()
client = MagicMock()
client.upload_file.side_effect = ["file-id-v1", "file-id-v2"]
file_v1 = ChatFile(filename="report.pptx", content=b"version 1")
file_v2 = ChatFile(filename="report.pptx", content=b"version 2")
_run_tool(tool, client, [file_v1])
_run_tool(tool, client, [file_v2])
assert client.upload_file.call_count == 2
@patch(f"{TOOL_MODULE}.CODE_INTERPRETER_BASE_URL", "http://fake:8000")
def test_multiple_distinct_files_each_uploaded_once() -> None:
tool = _make_tool()
client = MagicMock()
client.upload_file.side_effect = ["id-a", "id-b"]
files = [
ChatFile(filename="a.pptx", content=b"aaa"),
ChatFile(filename="b.xlsx", content=b"bbb"),
]
_run_tool(tool, client, files)
_run_tool(tool, client, files)
# Two distinct files — each uploaded exactly once
assert client.upload_file.call_count == 2
@patch(f"{TOOL_MODULE}.CODE_INTERPRETER_BASE_URL", "http://fake:8000")
def test_same_content_different_filename_uploaded_separately() -> None:
# Identical bytes but different names must each get their own upload slot
# so both files appear under their respective paths in the workspace.
tool = _make_tool()
client = MagicMock()
client.upload_file.side_effect = ["id-v1", "id-v2"]
same_bytes = b"shared content"
files = [
ChatFile(filename="report_v1.csv", content=same_bytes),
ChatFile(filename="report_v2.csv", content=same_bytes),
]
_run_tool(tool, client, files)
assert client.upload_file.call_count == 2
# ---------------------------------------------------------------------------
# No cross-instance sharing: a fresh PythonTool re-uploads everything
# ---------------------------------------------------------------------------
@patch(f"{TOOL_MODULE}.CODE_INTERPRETER_BASE_URL", "http://fake:8000")
def test_new_tool_instance_re_uploads_file() -> None:
client = MagicMock()
client.upload_file.side_effect = ["id-session-1", "id-session-2"]
files = [ChatFile(filename="deck.pptx", content=b"slide data")]
tool_session_1 = _make_tool()
_run_tool(tool_session_1, client, files)
tool_session_2 = _make_tool()
_run_tool(tool_session_2, client, files)
# Different instances — each uploads independently
assert client.upload_file.call_count == 2
# ---------------------------------------------------------------------------
# Upload failure: failed upload is not cached, retried next run
# ---------------------------------------------------------------------------
@patch(f"{TOOL_MODULE}.CODE_INTERPRETER_BASE_URL", "http://fake:8000")
def test_upload_failure_not_cached() -> None:
tool = _make_tool()
client = MagicMock()
# First call raises, second succeeds
client.upload_file.side_effect = [Exception("network error"), "file-id-ok"]
files = [ChatFile(filename="slides.pptx", content=b"data")]
# First run — upload fails, file is skipped but not cached
_run_tool(tool, client, files)
# Second run — should attempt upload again
_run_tool(tool, client, files)
assert client.upload_file.call_count == 2

View File

@@ -32,15 +32,17 @@ def test_run_with_timeout_raises_on_timeout(slow: float, timeout: float) -> None
"""Test that a function that exceeds timeout raises TimeoutError"""
def slow_function() -> None:
time.sleep(slow) # Sleep for 2 seconds
time.sleep(slow)
start = time.monotonic()
with pytest.raises(TimeoutError) as exc_info:
start = time.time()
run_with_timeout(timeout, slow_function) # Set timeout to 0.1 seconds
end = time.time()
assert end - start >= timeout
assert end - start < (slow + timeout) / 2
run_with_timeout(timeout, slow_function)
elapsed = time.monotonic() - start
assert f"timed out after {timeout} seconds" in str(exc_info.value)
assert elapsed >= timeout
# Should return around the timeout duration, not the full sleep duration
assert elapsed == pytest.approx(timeout, abs=0.8)
@pytest.mark.filterwarnings("ignore::pytest.PytestUnhandledThreadExceptionWarning")

View File

@@ -44,13 +44,13 @@ class TestBuildVespaFilters:
assert result == f'({SOURCE_TYPE} contains "web") and '
def test_acl(self) -> None:
"""Test with acls."""
"""Test with acls — uses weightedSet operator for efficient matching."""
# Single ACL
filters = IndexFilters(access_control_list=["user1"])
result = build_vespa_filters(filters)
assert (
result
== f'!({HIDDEN}=true) and (access_control_list contains "user1") and '
== f'!({HIDDEN}=true) and weightedSet(access_control_list, {{"user1":1}}) and '
)
# Multiple ACL's
@@ -58,7 +58,7 @@ class TestBuildVespaFilters:
result = build_vespa_filters(filters)
assert (
result
== f'!({HIDDEN}=true) and (access_control_list contains "user2" or access_control_list contains "group2") and '
== f'!({HIDDEN}=true) and weightedSet(access_control_list, {{"user2":1, "group2":1}}) and '
)
def test_tenant_filter(self) -> None:
@@ -250,10 +250,7 @@ class TestBuildVespaFilters:
result = build_vespa_filters(filters)
expected = f"!({HIDDEN}=true) and "
expected += (
'(access_control_list contains "user1" or '
'access_control_list contains "group1") and '
)
expected += 'weightedSet(access_control_list, {"user1":1, "group1":1}) and '
expected += f'({SOURCE_TYPE} contains "web") and '
expected += f'({METADATA_LIST} contains "color{INDEX_SEPARATOR}red") and '
# Knowledge scope filters are OR'd together
@@ -298,6 +295,38 @@ class TestBuildVespaFilters:
)
assert expected == result
def test_acl_large_list_uses_weighted_set(self) -> None:
"""Verify that large ACL lists produce a weightedSet clause
instead of OR-chained contains — this is what prevents Vespa
HTTP 400 errors for users with thousands of permission groups."""
acl = [f"external_group:google_drive_{i}" for i in range(10_000)]
acl += ["user_email:user@example.com", "__PUBLIC__"]
filters = IndexFilters(access_control_list=acl)
result = build_vespa_filters(filters)
assert "weightedSet(access_control_list, {" in result
# Must NOT contain OR-chained contains clauses
assert "access_control_list contains" not in result
# All entries should be present
assert '"external_group:google_drive_0":1' in result
assert '"external_group:google_drive_9999":1' in result
assert '"user_email:user@example.com":1' in result
assert '"__PUBLIC__":1' in result
def test_acl_empty_strings_filtered(self) -> None:
"""Empty strings in the ACL list should be filtered out."""
filters = IndexFilters(access_control_list=["user1", "", "group1"])
result = build_vespa_filters(filters)
assert (
result
== f'!({HIDDEN}=true) and weightedSet(access_control_list, {{"user1":1, "group1":1}}) and '
)
# All empty
filters = IndexFilters(access_control_list=["", ""])
result = build_vespa_filters(filters)
assert result == f"!({HIDDEN}=true) and "
def test_empty_or_none_values(self) -> None:
"""Test with empty or None values in filter lists."""
# Empty strings in document set

View File

@@ -143,7 +143,7 @@ dev = [
"matplotlib==3.10.8",
"mypy-extensions==1.0.0",
"mypy==1.13.0",
"onyx-devtools==0.6.3",
"onyx-devtools==0.7.0",
"openapi-generator-cli==7.17.0",
"pandas-stubs~=2.3.3",
"pre-commit==3.2.2",

View File

@@ -0,0 +1,35 @@
package cmd
import (
"fmt"
"github.com/jmelahman/tag/git"
"github.com/spf13/cobra"
)
// NewLatestStableTagCommand creates the latest-stable-tag command.
func NewLatestStableTagCommand() *cobra.Command {
cmd := &cobra.Command{
Use: "latest-stable-tag",
Short: "Print the git tag that should receive the 'latest' Docker tag",
Long: `Print the highest stable (non-pre-release) semver tag in the repository.
This is used during deployment to decide whether a given tag should
receive the "latest" tag on Docker Hub. Only the highest vX.Y.Z tag
qualifies. Tags with pre-release suffixes (e.g. v1.2.3-beta,
v1.2.3-cloud.1) are excluded.`,
Args: cobra.NoArgs,
RunE: func(c *cobra.Command, _ []string) error {
tag, err := git.GetLatestStableSemverTag("")
if err != nil {
return fmt.Errorf("get latest stable semver tag: %w", err)
}
if tag == "" {
return fmt.Errorf("no stable semver tag found in repository")
}
fmt.Println(tag)
return nil
},
}
return cmd
}

View File

@@ -52,6 +52,7 @@ func NewRootCommand() *cobra.Command {
cmd.AddCommand(NewScreenshotDiffCommand())
cmd.AddCommand(NewDesktopCommand())
cmd.AddCommand(NewWebCommand())
cmd.AddCommand(NewLatestStableTagCommand())
cmd.AddCommand(NewWhoisCommand())
return cmd

View File

@@ -3,12 +3,13 @@ module github.com/onyx-dot-app/onyx/tools/ods
go 1.26.0
require (
github.com/jmelahman/tag v0.5.2
github.com/sirupsen/logrus v1.9.3
github.com/spf13/cobra v1.10.1
github.com/spf13/pflag v1.0.9
github.com/spf13/cobra v1.10.2
github.com/spf13/pflag v1.0.10
)
require (
github.com/inconshreveable/mousetrap v1.1.0 // indirect
golang.org/x/sys v0.0.0-20220715151400-c0bba94af5f8 // indirect
golang.org/x/sys v0.39.0 // indirect
)

View File

@@ -4,20 +4,26 @@ github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c
github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
github.com/inconshreveable/mousetrap v1.1.0 h1:wN+x4NVGpMsO7ErUn/mUI3vEoE6Jt13X2s0bqwp9tc8=
github.com/inconshreveable/mousetrap v1.1.0/go.mod h1:vpF70FUmC8bwa3OWnCshd2FqLfsEA9PFc4w1p2J65bw=
github.com/jmelahman/tag v0.5.2 h1:g6A/aHehu5tkA31mPoDsXBNr1FigZ9A82Y8WVgb/WsM=
github.com/jmelahman/tag v0.5.2/go.mod h1:qmuqk19B1BKkpcg3kn7l/Eey+UqucLxgOWkteUGiG4Q=
github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM=
github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4=
github.com/russross/blackfriday/v2 v2.1.0/go.mod h1:+Rmxgy9KzJVeS9/2gXHxylqXiyQDYRxCVz55jmeOWTM=
github.com/sirupsen/logrus v1.9.3 h1:dueUQJ1C2q9oE3F7wvmSGAaVtTmUizReu6fjN8uqzbQ=
github.com/sirupsen/logrus v1.9.3/go.mod h1:naHLuLoDiP4jHNo9R0sCBMtWGeIprob74mVsIT4qYEQ=
github.com/spf13/cobra v1.10.1 h1:lJeBwCfmrnXthfAupyUTzJ/J4Nc1RsHC/mSRU2dll/s=
github.com/spf13/cobra v1.10.1/go.mod h1:7SmJGaTHFVBY0jW4NXGluQoLvhqFQM+6XSKD+P4XaB0=
github.com/spf13/pflag v1.0.9 h1:9exaQaMOCwffKiiiYk6/BndUBv+iRViNW+4lEMi0PvY=
github.com/spf13/cobra v1.10.2 h1:DMTTonx5m65Ic0GOoRY2c16WCbHxOOw6xxezuLaBpcU=
github.com/spf13/cobra v1.10.2/go.mod h1:7C1pvHqHw5A4vrJfjNwvOdzYu0Gml16OCs2GRiTUUS4=
github.com/spf13/pflag v1.0.9/go.mod h1:McXfInJRrz4CZXVZOBLb0bTZqETkiAhM9Iw0y3An2Bg=
github.com/spf13/pflag v1.0.10 h1:4EBh2KAYBwaONj6b2Ye1GiHfwjqyROoF4RwYO+vPwFk=
github.com/spf13/pflag v1.0.10/go.mod h1:McXfInJRrz4CZXVZOBLb0bTZqETkiAhM9Iw0y3An2Bg=
github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME=
github.com/stretchr/testify v1.7.0 h1:nwc3DEeHmmLAfoZucVR881uASk0Mfjw8xYJ99tb5CcY=
github.com/stretchr/testify v1.7.0/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg=
golang.org/x/sys v0.0.0-20220715151400-c0bba94af5f8 h1:0A+M6Uqn+Eje4kHMK80dtF3JCXC4ykBgQG4Fe06QRhQ=
github.com/stretchr/testify v1.11.1 h1:7s2iGBzp5EwR7/aIZr8ao5+dra3wiQyKjjFuvgVKu7U=
github.com/stretchr/testify v1.11.1/go.mod h1:wZwfW3scLgRK+23gO65QZefKpKQRnfz6sD981Nm4B6U=
go.yaml.in/yaml/v3 v3.0.4/go.mod h1:DhzuOOF2ATzADvBadXxruRBLzYTpT36CKvDb3+aBEFg=
golang.org/x/sys v0.0.0-20220715151400-c0bba94af5f8/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
golang.org/x/sys v0.39.0 h1:CvCKL8MeisomCi6qNZ+wbb0DN9E5AATixKsvNtMoMFk=
golang.org/x/sys v0.39.0/go.mod h1:OgkHotnGiDImocRcuBABYBEXf8A9a87e/uXjp9XT3ks=
gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
gopkg.in/yaml.v3 v3.0.0-20200313102051-9f266ea9e77c/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM=
gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA=

17
uv.lock generated
View File

@@ -4443,7 +4443,7 @@ requires-dist = [
{ name = "numpy", marker = "extra == 'model-server'", specifier = "==2.4.1" },
{ name = "oauthlib", marker = "extra == 'backend'", specifier = "==3.2.2" },
{ name = "office365-rest-python-client", marker = "extra == 'backend'", specifier = "==2.6.2" },
{ name = "onyx-devtools", marker = "extra == 'dev'", specifier = "==0.6.3" },
{ name = "onyx-devtools", marker = "extra == 'dev'", specifier = "==0.7.0" },
{ name = "openai", specifier = "==2.14.0" },
{ name = "openapi-generator-cli", marker = "extra == 'dev'", specifier = "==7.17.0" },
{ name = "openinference-instrumentation", marker = "extra == 'backend'", specifier = "==0.1.42" },
@@ -4548,20 +4548,19 @@ requires-dist = [{ name = "onyx", extras = ["backend", "dev", "ee"], editable =
[[package]]
name = "onyx-devtools"
version = "0.6.3"
version = "0.7.0"
source = { registry = "https://pypi.org/simple" }
dependencies = [
{ name = "fastapi" },
{ name = "openapi-generator-cli" },
]
wheels = [
{ url = "https://files.pythonhosted.org/packages/84/e2/e7619722c3ccd18eb38100f776fb3dd6b4ae0fbbee09fca5af7c69a279b5/onyx_devtools-0.6.3-py3-none-any.whl", hash = "sha256:d3a5422945d9da12cafc185f64b39f6e727ee4cc92b37427deb7a38f9aad4966", size = 3945381, upload-time = "2026-03-05T20:39:25.896Z" },
{ url = "https://files.pythonhosted.org/packages/f2/09/513d2dabedc1e54ad4376830fc9b34a3d9c164bdbcdedfcdbb8b8154dc5a/onyx_devtools-0.6.3-py3-none-macosx_10_12_x86_64.whl", hash = "sha256:efe300e9f3a2e7ae75f88a4f9e0a5c4c471478296cb1615b6a1f03d247582e13", size = 3978761, upload-time = "2026-03-05T20:39:28.822Z" },
{ url = "https://files.pythonhosted.org/packages/39/41/e757602a0de032d74ed01c7ee57f30e57728fb9cd4f922f50d2affda3889/onyx_devtools-0.6.3-py3-none-macosx_11_0_arm64.whl", hash = "sha256:594066eed3f917cfab5a8c7eac3d4a210df30259f2049f664787749709345e19", size = 3665378, upload-time = "2026-03-05T20:44:22.696Z" },
{ url = "https://files.pythonhosted.org/packages/33/1c/c93b65d0b32e202596a2647922a75c7011cb982f899ddfcfd171f792c58f/onyx_devtools-0.6.3-py3-none-manylinux_2_17_aarch64.whl", hash = "sha256:384ef66030b55c0fd68b3898782b5b4b868ff3de119569dfc8544e2ce534b98a", size = 3540890, upload-time = "2026-03-05T20:39:28.886Z" },
{ url = "https://files.pythonhosted.org/packages/f4/33/760eb656013f7f0cdff24570480d3dc4e52bbd8e6147ea1e8cf6fad7554f/onyx_devtools-0.6.3-py3-none-manylinux_2_17_x86_64.whl", hash = "sha256:82e218f3a49f64910c2c4c34d5dc12d1ea1520a27e0b0f6e4c0949ff9abaf0e1", size = 3945396, upload-time = "2026-03-05T20:39:34.323Z" },
{ url = "https://files.pythonhosted.org/packages/1a/eb/f54b3675c464df8a51194ff75afc97c2417659e3a209dc46948b47c28860/onyx_devtools-0.6.3-py3-none-win_amd64.whl", hash = "sha256:8af614ae7229290ef2417cb85270184a1e826ed9a3a34658da93851edb36df57", size = 4045936, upload-time = "2026-03-05T20:39:28.375Z" },
{ url = "https://files.pythonhosted.org/packages/04/b8/5bee38e748f3d4b8ec935766224db1bbc1214c91092e5822c080fccd9130/onyx_devtools-0.6.3-py3-none-win_arm64.whl", hash = "sha256:717589db4b42528d33ae96f8006ee6aad3555034dcfee724705b6576be6a6ec4", size = 3608268, upload-time = "2026-03-05T20:39:28.731Z" },
{ url = "https://files.pythonhosted.org/packages/22/9e/6957b11555da57d9e97092f4cd8ac09a86666264b0c9491838f4b27db5dc/onyx_devtools-0.7.0-py3-none-macosx_10_12_x86_64.whl", hash = "sha256:ad962a168d46ea11dcde9fa3b37e4f12ec520b4a4cb4d49d8732de110d46c4b6", size = 3998057, upload-time = "2026-03-12T03:09:11.585Z" },
{ url = "https://files.pythonhosted.org/packages/cd/90/c72f3d06ba677012d77c77de36195b6a32a15c755c79ba0282be74e3c366/onyx_devtools-0.7.0-py3-none-macosx_11_0_arm64.whl", hash = "sha256:e46d252e2b048ff053b03519c3a875998780738d7c334eaa1c9a32ff445e3e1a", size = 3687753, upload-time = "2026-03-12T03:09:11.742Z" },
{ url = "https://files.pythonhosted.org/packages/10/42/4e9fe36eccf9f76d67ba8f4ff6539196a09cd60351fb63f5865e1544cbfa/onyx_devtools-0.7.0-py3-none-manylinux_2_17_aarch64.whl", hash = "sha256:f280bc9320e1cc310e7d753a371009bfaab02cc0e0cfd78559663b15655b5a50", size = 3560144, upload-time = "2026-03-12T03:12:24.02Z" },
{ url = "https://files.pythonhosted.org/packages/76/40/36dc12d99760b358c7f39b27361cb18fa9681ffe194107f982d0e1a74016/onyx_devtools-0.7.0-py3-none-manylinux_2_17_x86_64.whl", hash = "sha256:e31df751c7540ae7e70a7fe8e1153c79c31c2254af6aa4c72c0dd54fa381d2ab", size = 3964387, upload-time = "2026-03-12T03:09:11.356Z" },
{ url = "https://files.pythonhosted.org/packages/34/18/74744230c3820a5a7687335507ca5f1dbebab2c5325805041c1cd5703e6a/onyx_devtools-0.7.0-py3-none-win_amd64.whl", hash = "sha256:541bfd347c2d5b11e7f63ab5001d2594df91d215ad9d07b1562f5e715700f7e6", size = 4068030, upload-time = "2026-03-12T03:09:12.98Z" },
{ url = "https://files.pythonhosted.org/packages/8c/78/1320436607d3ffcb321ba7b064556c020ea15843a7e7d903fbb7529a71f5/onyx_devtools-0.7.0-py3-none-win_arm64.whl", hash = "sha256:83016330a9d39712431916cc25b2fb2cfcaa0112a55cc4f919d545da3a8974f9", size = 3626409, upload-time = "2026-03-12T03:09:10.222Z" },
]
[[package]]

View File

@@ -53,6 +53,8 @@ const sharedConfig = {
// Testing & Mocking
"msw",
"until-async",
// Language Detection
"linguist-languages",
// Markdown & Syntax Highlighting
"react-markdown",
"remark-.*", // All remark packages

102
web/package-lock.json generated
View File

@@ -59,8 +59,9 @@
"lowlight": "^3.3.0",
"lucide-react": "^0.454.0",
"mdast-util-find-and-replace": "^3.0.1",
"mime": "^4.1.0",
"motion": "^12.29.0",
"next": "16.1.6",
"next": "16.1.7",
"next-themes": "^0.4.4",
"postcss": "^8.5.6",
"posthog-js": "^1.176.0",
@@ -2395,9 +2396,9 @@
}
},
"node_modules/@next/env": {
"version": "16.1.6",
"resolved": "https://registry.npmjs.org/@next/env/-/env-16.1.6.tgz",
"integrity": "sha512-N1ySLuZjnAtN3kFnwhAwPvZah8RJxKasD7x1f8shFqhncnWZn4JMfg37diLNuoHsLAlrDfM3g4mawVdtAG8XLQ==",
"version": "16.1.7",
"resolved": "https://registry.npmjs.org/@next/env/-/env-16.1.7.tgz",
"integrity": "sha512-rJJbIdJB/RQr2F1nylZr/PJzamvNNhfr3brdKP6s/GW850jbtR70QlSfFselvIBbcPUOlQwBakexjFzqLzF6pg==",
"license": "MIT"
},
"node_modules/@next/eslint-plugin-next": {
@@ -2441,9 +2442,9 @@
}
},
"node_modules/@next/swc-darwin-arm64": {
"version": "16.1.6",
"resolved": "https://registry.npmjs.org/@next/swc-darwin-arm64/-/swc-darwin-arm64-16.1.6.tgz",
"integrity": "sha512-wTzYulosJr/6nFnqGW7FrG3jfUUlEf8UjGA0/pyypJl42ExdVgC6xJgcXQ+V8QFn6niSG2Pb8+MIG1mZr2vczw==",
"version": "16.1.7",
"resolved": "https://registry.npmjs.org/@next/swc-darwin-arm64/-/swc-darwin-arm64-16.1.7.tgz",
"integrity": "sha512-b2wWIE8sABdyafc4IM8r5Y/dS6kD80JRtOGrUiKTsACFQfWWgUQ2NwoUX1yjFMXVsAwcQeNpnucF2ZrujsBBPg==",
"cpu": [
"arm64"
],
@@ -2457,9 +2458,9 @@
}
},
"node_modules/@next/swc-darwin-x64": {
"version": "16.1.6",
"resolved": "https://registry.npmjs.org/@next/swc-darwin-x64/-/swc-darwin-x64-16.1.6.tgz",
"integrity": "sha512-BLFPYPDO+MNJsiDWbeVzqvYd4NyuRrEYVB5k2N3JfWncuHAy2IVwMAOlVQDFjj+krkWzhY2apvmekMkfQR0CUQ==",
"version": "16.1.7",
"resolved": "https://registry.npmjs.org/@next/swc-darwin-x64/-/swc-darwin-x64-16.1.7.tgz",
"integrity": "sha512-zcnVaaZulS1WL0Ss38R5Q6D2gz7MtBu8GZLPfK+73D/hp4GFMrC2sudLky1QibfV7h6RJBJs/gOFvYP0X7UVlQ==",
"cpu": [
"x64"
],
@@ -2473,9 +2474,9 @@
}
},
"node_modules/@next/swc-linux-arm64-gnu": {
"version": "16.1.6",
"resolved": "https://registry.npmjs.org/@next/swc-linux-arm64-gnu/-/swc-linux-arm64-gnu-16.1.6.tgz",
"integrity": "sha512-OJYkCd5pj/QloBvoEcJ2XiMnlJkRv9idWA/j0ugSuA34gMT6f5b7vOiCQHVRpvStoZUknhl6/UxOXL4OwtdaBw==",
"version": "16.1.7",
"resolved": "https://registry.npmjs.org/@next/swc-linux-arm64-gnu/-/swc-linux-arm64-gnu-16.1.7.tgz",
"integrity": "sha512-2ant89Lux/Q3VyC8vNVg7uBaFVP9SwoK2jJOOR0L8TQnX8CAYnh4uctAScy2Hwj2dgjVHqHLORQZJ2wH6VxhSQ==",
"cpu": [
"arm64"
],
@@ -2489,9 +2490,9 @@
}
},
"node_modules/@next/swc-linux-arm64-musl": {
"version": "16.1.6",
"resolved": "https://registry.npmjs.org/@next/swc-linux-arm64-musl/-/swc-linux-arm64-musl-16.1.6.tgz",
"integrity": "sha512-S4J2v+8tT3NIO9u2q+S0G5KdvNDjXfAv06OhfOzNDaBn5rw84DGXWndOEB7d5/x852A20sW1M56vhC/tRVbccQ==",
"version": "16.1.7",
"resolved": "https://registry.npmjs.org/@next/swc-linux-arm64-musl/-/swc-linux-arm64-musl-16.1.7.tgz",
"integrity": "sha512-uufcze7LYv0FQg9GnNeZ3/whYfo+1Q3HnQpm16o6Uyi0OVzLlk2ZWoY7j07KADZFY8qwDbsmFnMQP3p3+Ftprw==",
"cpu": [
"arm64"
],
@@ -2505,9 +2506,9 @@
}
},
"node_modules/@next/swc-linux-x64-gnu": {
"version": "16.1.6",
"resolved": "https://registry.npmjs.org/@next/swc-linux-x64-gnu/-/swc-linux-x64-gnu-16.1.6.tgz",
"integrity": "sha512-2eEBDkFlMMNQnkTyPBhQOAyn2qMxyG2eE7GPH2WIDGEpEILcBPI/jdSv4t6xupSP+ot/jkfrCShLAa7+ZUPcJQ==",
"version": "16.1.7",
"resolved": "https://registry.npmjs.org/@next/swc-linux-x64-gnu/-/swc-linux-x64-gnu-16.1.7.tgz",
"integrity": "sha512-KWVf2gxYvHtvuT+c4MBOGxuse5TD7DsMFYSxVxRBnOzok/xryNeQSjXgxSv9QpIVlaGzEn/pIuI6Koosx8CGWA==",
"cpu": [
"x64"
],
@@ -2521,9 +2522,9 @@
}
},
"node_modules/@next/swc-linux-x64-musl": {
"version": "16.1.6",
"resolved": "https://registry.npmjs.org/@next/swc-linux-x64-musl/-/swc-linux-x64-musl-16.1.6.tgz",
"integrity": "sha512-oicJwRlyOoZXVlxmIMaTq7f8pN9QNbdes0q2FXfRsPhfCi8n8JmOZJm5oo1pwDaFbnnD421rVU409M3evFbIqg==",
"version": "16.1.7",
"resolved": "https://registry.npmjs.org/@next/swc-linux-x64-musl/-/swc-linux-x64-musl-16.1.7.tgz",
"integrity": "sha512-HguhaGwsGr1YAGs68uRKc4aGWxLET+NevJskOcCAwXbwj0fYX0RgZW2gsOCzr9S11CSQPIkxmoSbuVaBp4Z3dA==",
"cpu": [
"x64"
],
@@ -2537,9 +2538,9 @@
}
},
"node_modules/@next/swc-win32-arm64-msvc": {
"version": "16.1.6",
"resolved": "https://registry.npmjs.org/@next/swc-win32-arm64-msvc/-/swc-win32-arm64-msvc-16.1.6.tgz",
"integrity": "sha512-gQmm8izDTPgs+DCWH22kcDmuUp7NyiJgEl18bcr8irXA5N2m2O+JQIr6f3ct42GOs9c0h8QF3L5SzIxcYAAXXw==",
"version": "16.1.7",
"resolved": "https://registry.npmjs.org/@next/swc-win32-arm64-msvc/-/swc-win32-arm64-msvc-16.1.7.tgz",
"integrity": "sha512-S0n3KrDJokKTeFyM/vGGGR8+pCmXYrjNTk2ZozOL1C/JFdfUIL9O1ATaJOl5r2POe56iRChbsszrjMAdWSv7kQ==",
"cpu": [
"arm64"
],
@@ -2553,9 +2554,9 @@
}
},
"node_modules/@next/swc-win32-x64-msvc": {
"version": "16.1.6",
"resolved": "https://registry.npmjs.org/@next/swc-win32-x64-msvc/-/swc-win32-x64-msvc-16.1.6.tgz",
"integrity": "sha512-NRfO39AIrzBnixKbjuo2YiYhB6o9d8v/ymU9m/Xk8cyVk+k7XylniXkHwjs4s70wedVffc6bQNbufk5v0xEm0A==",
"version": "16.1.7",
"resolved": "https://registry.npmjs.org/@next/swc-win32-x64-msvc/-/swc-win32-x64-msvc-16.1.7.tgz",
"integrity": "sha512-mwgtg8CNZGYm06LeEd+bNnOUfwOyNem/rOiP14Lsz+AnUY92Zq/LXwtebtUiaeVkhbroRCQ0c8GlR4UT1U+0yg==",
"cpu": [
"x64"
],
@@ -9225,7 +9226,9 @@
}
},
"node_modules/flatted": {
"version": "3.3.3",
"version": "3.4.1",
"resolved": "https://registry.npmjs.org/flatted/-/flatted-3.4.1.tgz",
"integrity": "sha512-IxfVbRFVlV8V/yRaGzk0UVIcsKKHMSfYw66T/u4nTwlWteQePsxe//LjudR1AMX4tZW3WFCh3Zqa/sjlqpbURQ==",
"dev": true,
"license": "ISC"
},
@@ -12746,6 +12749,21 @@
"url": "https://github.com/sponsors/jonschlinkert"
}
},
"node_modules/mime": {
"version": "4.1.0",
"resolved": "https://registry.npmjs.org/mime/-/mime-4.1.0.tgz",
"integrity": "sha512-X5ju04+cAzsojXKes0B/S4tcYtFAJ6tTMuSPBEn9CPGlrWr8Fiw7qYeLT0XyH80HSoAoqWCaz+MWKh22P7G1cw==",
"funding": [
"https://github.com/sponsors/broofa"
],
"license": "MIT",
"bin": {
"mime": "bin/cli.js"
},
"engines": {
"node": ">=16"
}
},
"node_modules/mime-db": {
"version": "1.52.0",
"license": "MIT",
@@ -12911,14 +12929,14 @@
"license": "MIT"
},
"node_modules/next": {
"version": "16.1.6",
"resolved": "https://registry.npmjs.org/next/-/next-16.1.6.tgz",
"integrity": "sha512-hkyRkcu5x/41KoqnROkfTm2pZVbKxvbZRuNvKXLRXxs3VfyO0WhY50TQS40EuKO9SW3rBj/sF3WbVwDACeMZyw==",
"version": "16.1.7",
"resolved": "https://registry.npmjs.org/next/-/next-16.1.7.tgz",
"integrity": "sha512-WM0L7WrSvKwoLegLYr6V+mz+RIofqQgVAfHhMp9a88ms0cFX8iX9ew+snpWlSBwpkURJOUdvCEt3uLl3NNzvWg==",
"license": "MIT",
"dependencies": {
"@next/env": "16.1.6",
"@next/env": "16.1.7",
"@swc/helpers": "0.5.15",
"baseline-browser-mapping": "^2.8.3",
"baseline-browser-mapping": "^2.9.19",
"caniuse-lite": "^1.0.30001579",
"postcss": "8.4.31",
"styled-jsx": "5.1.6"
@@ -12930,14 +12948,14 @@
"node": ">=20.9.0"
},
"optionalDependencies": {
"@next/swc-darwin-arm64": "16.1.6",
"@next/swc-darwin-x64": "16.1.6",
"@next/swc-linux-arm64-gnu": "16.1.6",
"@next/swc-linux-arm64-musl": "16.1.6",
"@next/swc-linux-x64-gnu": "16.1.6",
"@next/swc-linux-x64-musl": "16.1.6",
"@next/swc-win32-arm64-msvc": "16.1.6",
"@next/swc-win32-x64-msvc": "16.1.6",
"@next/swc-darwin-arm64": "16.1.7",
"@next/swc-darwin-x64": "16.1.7",
"@next/swc-linux-arm64-gnu": "16.1.7",
"@next/swc-linux-arm64-musl": "16.1.7",
"@next/swc-linux-x64-gnu": "16.1.7",
"@next/swc-linux-x64-musl": "16.1.7",
"@next/swc-win32-arm64-msvc": "16.1.7",
"@next/swc-win32-x64-msvc": "16.1.7",
"sharp": "^0.34.4"
},
"peerDependencies": {

View File

@@ -75,8 +75,9 @@
"lowlight": "^3.3.0",
"lucide-react": "^0.454.0",
"mdast-util-find-and-replace": "^3.0.1",
"mime": "^4.1.0",
"motion": "^12.29.0",
"next": "16.1.6",
"next": "16.1.7",
"next-themes": "^0.4.4",
"postcss": "^8.5.6",
"posthog-js": "^1.176.0",

View File

@@ -626,10 +626,7 @@ function Main({ ccPairId }: { ccPairId: number }) {
<div className="w-[200px]">
<div className="text-sm font-medium mb-1">Last Indexed</div>
<div className="text-sm text-text-default">
{timeAgo(
indexAttempts?.find((attempt) => attempt.status === "success")
?.time_started
) ?? "-"}
{timeAgo(ccPair?.last_indexed) ?? "-"}
</div>
</div>

View File

@@ -21,10 +21,13 @@ export const submitGoogleSite = async (
formData.append("files", file);
});
const response = await fetch("/api/manage/admin/connector/file/upload", {
method: "POST",
body: formData,
});
const response = await fetch(
"/api/manage/admin/connector/file/upload?unzip=false",
{
method: "POST",
body: formData,
}
);
const responseJson = await response.json();
if (!response.ok) {
toast.error(`Unable to upload files - ${responseJson.detail}`);

View File

@@ -6,7 +6,7 @@ import { ChatFileType, FileDescriptor } from "@/app/app/interfaces";
import Attachment from "@/refresh-components/Attachment";
import { InMessageImage } from "@/app/app/components/files/images/InMessageImage";
import CsvContent from "@/components/tools/CSVContent";
import TextViewModal from "@/sections/modals/TextViewModal";
import PreviewModal from "@/sections/modals/PreviewModal";
import { MinimalOnyxDocument } from "@/lib/search/interfaces";
import ExpandableContentWrapper from "@/components/tools/ExpandableContentWrapper";
@@ -52,7 +52,7 @@ export default function FileDisplay({ files }: FileDisplayProps) {
return (
<>
{previewingFile && (
<TextViewModal
<PreviewModal
presentingDocument={presentingDocument}
onClose={() => setPreviewingFile(null)}
/>

View File

@@ -1,7 +1,7 @@
/* Light mode syntax highlighting (Atom One Light) */
.hljs {
color: #383a42 !important;
background: #fafafa !important;
background: var(--background-code-01) !important;
}
.hljs-comment,
@@ -77,7 +77,7 @@
/* Dark mode syntax highlighting (Atom One Dark) */
.dark .hljs {
color: #e2e6eb !important;
background: #151617 !important;
background: var(--background-code-01) !important;
}
.dark .hljs-comment,

View File

@@ -11,7 +11,7 @@ import { Callout } from "@/components/ui/callout";
import OnyxInitializingLoader from "@/components/OnyxInitializingLoader";
import { Persona } from "@/app/admin/agents/interfaces";
import { MinimalOnyxDocument } from "@/lib/search/interfaces";
import TextViewModal from "@/sections/modals/TextViewModal";
import PreviewModal from "@/sections/modals/PreviewModal";
import { UNNAMED_CHAT } from "@/lib/constants";
import Text from "@/refresh-components/texts/Text";
import useOnMount from "@/hooks/useOnMount";
@@ -64,7 +64,7 @@ export default function SharedChatDisplay({
return (
<>
{presentingDocument && (
<TextViewModal
<PreviewModal
presentingDocument={presentingDocument}
onClose={() => setPresentingDocument(null)}
/>

View File

@@ -13,6 +13,7 @@ import {
type KeyboardEvent,
} from "react";
import { useRouter } from "next/navigation";
import { getPastedFilesIfNoText } from "@/lib/clipboard";
import { cn, isImageFile } from "@/lib/utils";
import { Disabled } from "@opal/core";
import {
@@ -230,21 +231,11 @@ const InputBar = memo(
const handlePaste = useCallback(
(event: ClipboardEvent) => {
const items = event.clipboardData?.items;
if (items) {
const pastedFiles: File[] = [];
for (let i = 0; i < items.length; i++) {
const item = items[i];
if (item && item.kind === "file") {
const file = item.getAsFile();
if (file) pastedFiles.push(file);
}
}
if (pastedFiles.length > 0) {
event.preventDefault();
// Context handles session binding internally
uploadFiles(pastedFiles);
}
const pastedFiles = getPastedFilesIfNoText(event.clipboardData);
if (pastedFiles.length > 0) {
event.preventDefault();
// Context handles session binding internally
uploadFiles(pastedFiles);
}
},
[uploadFiles]

View File

@@ -272,6 +272,22 @@ export default function UserLibraryModal({
</Disabled>
</Section>
{/* The exact cap is controlled by the backend env var
MAX_EMBEDDED_IMAGES_PER_FILE (default 500). This copy is
deliberately vague so it doesn't drift if the limit is
tuned per-deployment; the precise number is surfaced in
the rejection error the server returns. */}
<Section
flexDirection="row"
justifyContent="end"
padding={0.5}
height="fit"
>
<Text secondaryBody text03>
PDFs with many embedded images may be rejected.
</Text>
</Section>
{isLoading ? (
<Section padding={2} height="fit">
<Text secondaryBody text03>

View File

@@ -438,6 +438,9 @@
--action-text-link-05: var(--blue-50);
--action-text-danger-05: var(--red-50);
/* Background / Code */
--background-code-01: var(--grey-02);
/* Code */
--code-code: var(--alpha-grey-100-85);
--code-comment: var(--alpha-grey-100-35);
@@ -639,6 +642,9 @@
--action-text-link-05: var(--blue-45);
--action-text-danger-05: var(--red-45);
/* Background / Code */
--background-code-01: #151617;
/* Code */
--code-code: var(--alpha-grey-00-85);
--code-comment: var(--alpha-grey-00-45);

View File

@@ -40,7 +40,7 @@ import { SvgUser, SvgMenu, SvgAlertTriangle } from "@opal/icons";
import { useAppBackground } from "@/providers/AppBackgroundProvider";
import { MinimalOnyxDocument } from "@/lib/search/interfaces";
import DocumentsSidebar from "@/sections/document-sidebar/DocumentsSidebar";
import TextViewModal from "@/sections/modals/TextViewModal";
import PreviewModal from "@/sections/modals/PreviewModal";
import { personaIncludesRetrieval } from "@/app/app/services/lib";
import { useQueryController } from "@/providers/QueryControllerProvider";
import { eeGated } from "@/ce";
@@ -295,7 +295,6 @@ export default function NRFPage({ isSidePanel = false }: NRFPageProps) {
// If we already have messages (chat session started), always use chat mode
// (matches AppPage behavior where existing sessions bypass classification)
if (hasMessages) {
resetInputBar();
onSubmit({
message: submittedMessage,
currentMessageFiles: currentMessageFiles,
@@ -307,7 +306,6 @@ export default function NRFPage({ isSidePanel = false }: NRFPageProps) {
// Build an onChat closure that captures additionalContext for this submission
const onChat = (chatMessage: string) => {
resetInputBar();
onSubmit({
message: chatMessage,
currentMessageFiles: currentMessageFiles,
@@ -326,7 +324,6 @@ export default function NRFPage({ isSidePanel = false }: NRFPageProps) {
onSubmit,
currentMessageFiles,
deepResearchEnabled,
resetInputBar,
submitQuery,
tabReadingEnabled,
currentTabUrl,
@@ -537,7 +534,7 @@ export default function NRFPage({ isSidePanel = false }: NRFPageProps) {
{/* Text/document preview modal */}
{presentingDocument && (
<TextViewModal
<PreviewModal
presentingDocument={presentingDocument}
onClose={() => setPresentingDocument(null)}
/>

View File

@@ -4,21 +4,21 @@ import {
MemoizedLink,
MemoizedParagraph,
} from "@/app/app/message/MemoizedTextComponents";
import React, { useMemo, CSSProperties } from "react";
import { useMemo, CSSProperties } from "react";
import ReactMarkdown, { type Components } from "react-markdown";
import remarkGfm from "remark-gfm";
import rehypeHighlight from "rehype-highlight";
import remarkMath from "remark-math";
import rehypeKatex from "rehype-katex";
import "katex/dist/katex.min.css";
import { transformLinkUri } from "@/lib/utils";
import { cn, transformLinkUri } from "@/lib/utils";
type MinimalMarkdownComponentOverrides = Partial<Components>;
interface MinimalMarkdownProps {
content: string;
className?: string;
style?: CSSProperties;
showHeader?: boolean;
/**
* Override specific markdown renderers.
* Any renderer not provided will fall back to this component's defaults.
@@ -29,7 +29,7 @@ interface MinimalMarkdownProps {
export default function MinimalMarkdown({
content,
className = "",
style,
showHeader = true,
components,
}: MinimalMarkdownProps) {
const markdownComponents = useMemo(() => {
@@ -43,7 +43,11 @@ export default function MinimalMarkdown({
code: ({ node, inline, className, children, ...props }: any) => {
const codeText = extractCodeText(node, content, children);
return (
<CodeBlock className={className} codeText={codeText}>
<CodeBlock
className={className}
codeText={codeText}
showHeader={showHeader}
>
{children}
</CodeBlock>
);
@@ -54,22 +58,20 @@ export default function MinimalMarkdown({
...defaults,
...(components ?? {}),
} satisfies Components;
}, [content, components]);
}, [content, components, showHeader]);
return (
<div style={style || {}} className={`${className}`}>
<ReactMarkdown
className="prose dark:prose-invert max-w-full text-sm break-words"
components={markdownComponents}
rehypePlugins={[rehypeHighlight, rehypeKatex]}
remarkPlugins={[
remarkGfm,
[remarkMath, { singleDollarTextMath: false }],
]}
urlTransform={transformLinkUri}
>
{content}
</ReactMarkdown>
</div>
<ReactMarkdown
className={cn(
"prose dark:prose-invert max-w-full text-sm break-words",
className
)}
components={markdownComponents}
rehypePlugins={[rehypeHighlight, rehypeKatex]}
remarkPlugins={[remarkGfm, [remarkMath, { singleDollarTextMath: false }]]}
urlTransform={transformLinkUri}
>
{content}
</ReactMarkdown>
);
}

View File

@@ -52,6 +52,7 @@ export const AdminDateRangeSelector = memo(function AdminDateRangeSelector({
<Popover.Trigger asChild>
{/* TODO(@raunakab): migrate to opal Button once className/iconClassName is resolved */}
<Button
data-testid="admin-date-range-selector-button"
secondary
className={cn("justify-start", !value && "text-muted-foreground")}
leftIcon={SvgCalendar}

View File

@@ -6,7 +6,7 @@ import { Button } from "@opal/components";
import Text from "@/refresh-components/texts/Text";
import { FileDescriptor } from "@/app/app/interfaces";
import { cn } from "@/lib/utils";
import TextViewModal from "@/sections/modals/TextViewModal";
import PreviewModal from "@/sections/modals/PreviewModal";
import { MinimalOnyxDocument } from "@/lib/search/interfaces";
export interface ExpandableContentWrapperProps {
@@ -95,7 +95,7 @@ export default function ExpandableContentWrapper({
return (
<>
{expanded && (
<TextViewModal
<PreviewModal
presentingDocument={presentingDocument}
onClose={() => setExpanded(false)}
/>

View File

@@ -0,0 +1,89 @@
import { getPastedFilesIfNoText } from "./clipboard";
type MockClipboardData = Parameters<typeof getPastedFilesIfNoText>[0];
function makeClipboardData({
textPlain = "",
text = "",
files = [],
}: {
textPlain?: string;
text?: string;
files?: File[];
}): MockClipboardData {
return {
items: files.map((file) => ({
kind: "file",
getAsFile: () => file,
})),
getData: (format: string) => {
if (format === "text/plain") {
return textPlain;
}
if (format === "text") {
return text;
}
return "";
},
};
}
describe("getPastedFilesIfNoText", () => {
it("prefers plain text over pasted files when both are present", () => {
const imageFile = new File(["slide preview"], "slide.png", {
type: "image/png",
});
expect(
getPastedFilesIfNoText(
makeClipboardData({
textPlain: "Welcome to PowerPoint for Mac",
files: [imageFile],
})
)
).toEqual([]);
});
it("falls back to text data when text/plain is empty", () => {
const imageFile = new File(["slide preview"], "slide.png", {
type: "image/png",
});
expect(
getPastedFilesIfNoText(
makeClipboardData({
text: "Welcome to PowerPoint for Mac",
files: [imageFile],
})
)
).toEqual([]);
});
it("still returns files for image-only pastes", () => {
const imageFile = new File(["slide preview"], "slide.png", {
type: "image/png",
});
expect(
getPastedFilesIfNoText(makeClipboardData({ files: [imageFile] }))
).toEqual([imageFile]);
});
it("ignores whitespace-only text and keeps file pastes working", () => {
const imageFile = new File(["slide preview"], "slide.png", {
type: "image/png",
});
expect(
getPastedFilesIfNoText(
makeClipboardData({
textPlain: " ",
text: "\n",
files: [imageFile],
})
)
).toEqual([imageFile]);
});
});

52
web/src/lib/clipboard.ts Normal file
View File

@@ -0,0 +1,52 @@
type ClipboardFileItem = {
kind: string;
getAsFile: () => File | null;
};
type ClipboardDataLike = {
items?: ArrayLike<ClipboardFileItem> | null;
getData: (format: string) => string;
};
function getClipboardText(
clipboardData: ClipboardDataLike,
format: "text/plain" | "text"
): string {
try {
return clipboardData.getData(format);
} catch {
return "";
}
}
export function getPastedFilesIfNoText(
clipboardData?: ClipboardDataLike | null
): File[] {
if (!clipboardData) {
return [];
}
const plainText = getClipboardText(clipboardData, "text/plain").trim();
const fallbackText = getClipboardText(clipboardData, "text").trim();
// Apps like PowerPoint on macOS can place both rendered image data and the
// original text on the clipboard. Prefer letting the textarea consume text.
if (plainText || fallbackText || !clipboardData.items) {
return [];
}
const pastedFiles: File[] = [];
for (let i = 0; i < clipboardData.items.length; i++) {
const item = clipboardData.items[i];
if (item?.kind !== "file") {
continue;
}
const file = item.getAsFile();
if (file) {
pastedFiles.push(file);
}
}
return pastedFiles;
}

View File

@@ -0,0 +1,102 @@
import {
getCodeLanguage,
getDataLanguage,
getLanguageByMime,
isMarkdownFile,
} from "./languages";
describe("getCodeLanguage", () => {
it.each([
["app.py", "python"],
["index.ts", "typescript"],
["main.go", "go"],
["style.css", "css"],
["page.html", "html"],
["App.vue", "vue"],
["lib.rs", "rust"],
["main.cpp", "c++"],
["util.c", "c"],
["script.js", "javascript"],
])("%s → %s", (filename, expected) => {
expect(getCodeLanguage(filename)).toBe(expected);
});
it.each([
[".h", "c"],
[".inc", "php"],
[".m", "objective-c"],
[".re", "reason"],
])("override: %s → %s", (ext, expected) => {
expect(getCodeLanguage(`file${ext}`)).toBe(expected);
});
it("resolves by exact filename when there is no extension", () => {
expect(getCodeLanguage("Dockerfile")).toBe("dockerfile");
expect(getCodeLanguage("Makefile")).toBe("makefile");
});
it("is case-insensitive for filenames", () => {
expect(getCodeLanguage("INDEX.JS")).toBe("javascript");
expect(getCodeLanguage("dockerfile")).toBe("dockerfile");
});
it("returns null for unknown extensions", () => {
expect(getCodeLanguage("file.xyz123")).toBeNull();
});
it("excludes markdown extensions", () => {
expect(getCodeLanguage("README.md")).toBeNull();
expect(getCodeLanguage("notes.markdown")).toBeNull();
});
});
describe("getDataLanguage", () => {
it.each([
["config.json", "json"],
["config.yaml", "yaml"],
["config.yml", "yaml"],
["config.toml", "toml"],
["data.xml", "xml"],
["data.csv", "csv"],
])("%s → %s", (filename, expected) => {
expect(getDataLanguage(filename)).toBe(expected);
});
it("returns null for code files", () => {
expect(getDataLanguage("app.py")).toBeNull();
expect(getDataLanguage("header.h")).toBeNull();
expect(getDataLanguage("view.m")).toBeNull();
expect(getDataLanguage("component.re")).toBeNull();
});
});
describe("isMarkdownFile", () => {
it("recognises markdown extensions", () => {
expect(isMarkdownFile("README.md")).toBe(true);
expect(isMarkdownFile("doc.markdown")).toBe(true);
});
it("is case-insensitive", () => {
expect(isMarkdownFile("NOTES.MD")).toBe(true);
});
it("rejects non-markdown files", () => {
expect(isMarkdownFile("app.py")).toBe(false);
expect(isMarkdownFile("data.json")).toBe(false);
});
});
describe("getLanguageByMime", () => {
it("resolves known MIME types", () => {
expect(getLanguageByMime("text/x-python")).toBe("python");
expect(getLanguageByMime("text/javascript")).toBe("javascript");
});
it("strips parameters before matching", () => {
expect(getLanguageByMime("text/x-python; charset=utf-8")).toBe("python");
});
it("returns null for unknown MIME types", () => {
expect(getLanguageByMime("application/x-unknown-thing")).toBeNull();
});
});

View File

@@ -7,6 +7,7 @@ interface LinguistLanguage {
type: string;
extensions?: string[];
filenames?: string[];
codemirrorMimeType?: string;
}
interface LanguageMaps {
@@ -14,7 +15,23 @@ interface LanguageMaps {
filenames: Map<string, string>;
}
const allLanguages = Object.values(languages) as LinguistLanguage[];
// Explicit winners for extensions claimed by multiple linguist-languages entries
// where the "most extensions" heuristic below picks the wrong language.
const EXTENSION_OVERRIDES: Record<string, string> = {
".h": "c",
".inc": "php",
".m": "objective-c",
".re": "reason",
".rs": "rust",
};
// Sort so that languages with more extensions (i.e. more general-purpose) win
// when multiple languages claim the same extension (e.g. Ecmarkup vs HTML both
// claim .html — HTML should win because it's the canonical language for that
// extension). Known mis-rankings are patched by EXTENSION_OVERRIDES above.
const allLanguages = (Object.values(languages) as LinguistLanguage[]).sort(
(a, b) => (b.extensions?.length ?? 0) - (a.extensions?.length ?? 0)
);
// Collect extensions that linguist-languages assigns to "Markdown" so we can
// exclude them from the code-language map
@@ -25,14 +42,22 @@ const markdownExtensions = new Set(
);
function buildLanguageMaps(
type: string,
types: string[],
excludedExtensions?: Set<string>
): LanguageMaps {
const typeSet = new Set(types);
const extensions = new Map<string, string>();
const filenames = new Map<string, string>();
if (typeSet.has("programming") || typeSet.has("markup")) {
for (const [ext, lang] of Object.entries(EXTENSION_OVERRIDES)) {
if (excludedExtensions?.has(ext.toLowerCase())) continue;
extensions.set(ext, lang);
}
}
for (const lang of allLanguages) {
if (lang.type !== type) continue;
if (!typeSet.has(lang.type)) continue;
const name = lang.name.toLowerCase();
for (const ext of lang.extensions ?? []) {
@@ -57,13 +82,17 @@ function lookupLanguage(name: string, maps: LanguageMaps): string | null {
return (ext && maps.extensions.get(ext)) ?? maps.filenames.get(lower) ?? null;
}
const codeMaps = buildLanguageMaps("programming", markdownExtensions);
const dataMaps = buildLanguageMaps("data");
const codeMaps = buildLanguageMaps(
["programming", "markup"],
markdownExtensions
);
const dataMaps = buildLanguageMaps(["data"]);
/**
* Returns the language name for a given file name, or null if it's not a
* recognised code file. Looks up by extension first, then by exact filename
* (e.g. "Dockerfile", "Makefile"). Runs in O(1).
* recognised code or markup file (programming + markup types from
* linguist-languages, e.g. Python, HTML, CSS, Vue). Looks up by extension
* first, then by exact filename (e.g. "Dockerfile", "Makefile"). Runs in O(1).
*/
export function getCodeLanguage(name: string): string | null {
return lookupLanguage(name, codeMaps);
@@ -86,3 +115,20 @@ export function isMarkdownFile(name: string): boolean {
const ext = name.toLowerCase().match(LANGUAGE_EXT_PATTERN)?.[0];
return !!ext && markdownExtensions.has(ext);
}
const mimeToLanguage = new Map<string, string>();
for (const lang of allLanguages) {
if (lang.codemirrorMimeType && !mimeToLanguage.has(lang.codemirrorMimeType)) {
mimeToLanguage.set(lang.codemirrorMimeType, lang.name.toLowerCase());
}
}
/**
* Returns the language name for a given MIME type using the codemirrorMimeType
* field from linguist-languages (~297 entries). Returns null if unrecognised.
*/
export function getLanguageByMime(mime: string): string | null {
const base = mime.split(";")[0];
if (!base) return null;
return mimeToLanguage.get(base.trim().toLowerCase()) ?? null;
}

View File

@@ -6,10 +6,42 @@ import { cn } from "@/lib/utils";
// Throttle interval for scroll events (~60fps)
const SCROLL_THROTTLE_MS = 16;
/**
* A scrollable container that shows gradient or shadow indicators when
* content overflows above or below the visible area.
*
* HEIGHT CONSTRAINT REQUIREMENT
*
* This component relies on its inner scroll container having a smaller
* clientHeight than its scrollHeight. For that to happen, the entire
* ancestor chain must constrain height via flex sizing (flex-1 min-h-0),
* NOT via percentage heights (h-full).
*
* height: 100% resolves to "auto" when the containing block's height is
* determined by flex layout (flex-auto, flex-1) rather than an explicit
* height property — this is per the CSS spec. When that happens, the
* container grows to fit its content and scrollHeight === clientHeight,
* making scroll indicators invisible.
*
* Correct pattern: every ancestor up to the nearest fixed-height boundary
* must form an unbroken flex column chain using "flex-1 min-h-0":
*
* fixed-height-ancestor (e.g. h-[500px])
* flex flex-col flex-1 min-h-0 <-- use flex-1, NOT h-full
* ScrollIndicatorDiv
* ...tall content...
*
* Common mistakes:
* - Using h-full instead of flex-1 min-h-0 anywhere in the chain.
* - Placing this inside a parent with overflow-y: auto (e.g. Modal.Body),
* which becomes the scroll container instead of this component's inner div.
*/
export interface ScrollIndicatorDivProps
extends React.HTMLAttributes<HTMLDivElement> {
// Mask/Shadow options
disableIndicators?: boolean;
disableTopIndicator?: boolean;
disableBottomIndicator?: boolean;
backgroundColor?: string;
indicatorHeight?: string;
@@ -22,6 +54,8 @@ export interface ScrollIndicatorDivProps
export default function ScrollIndicatorDiv({
disableIndicators = false,
disableTopIndicator = false,
disableBottomIndicator = false,
backgroundColor = "var(--background-tint-02)",
indicatorHeight = "3rem",
variant = "gradient",
@@ -77,13 +111,19 @@ export default function ScrollIndicatorDiv({
// Update on scroll (throttled)
container.addEventListener("scroll", handleScroll, { passive: true });
// Update on resize (in case content changes)
// Update when the container itself resizes
const resizeObserver = new ResizeObserver(updateScrollIndicators);
resizeObserver.observe(container);
// Update when descendants change (e.g. syntax highlighting mutates the
// DOM after initial render, which changes scrollHeight without firing
// resize or scroll events on the container).
const mutationObserver = new MutationObserver(handleScroll);
return () => {
container.removeEventListener("scroll", handleScroll);
resizeObserver.disconnect();
mutationObserver.disconnect();
if (throttleTimeoutRef.current) {
clearTimeout(throttleTimeoutRef.current);
}
@@ -120,7 +160,7 @@ export default function ScrollIndicatorDiv({
return (
<div className="relative flex-1 min-h-0 overflow-y-hidden flex flex-col w-full">
{/* Top indicator */}
{!disableIndicators && showTopIndicator && (
{!disableIndicators && !disableTopIndicator && showTopIndicator && (
<div
className="absolute top-0 left-0 right-0 z-[20] pointer-events-none transition-opacity duration-200"
style={getIndicatorStyle("top")}
@@ -141,7 +181,7 @@ export default function ScrollIndicatorDiv({
</div>
{/* Bottom indicator */}
{!disableIndicators && showBottomIndicator && (
{!disableIndicators && !disableBottomIndicator && showBottomIndicator && (
<div
className="absolute bottom-0 left-0 right-0 z-[20] pointer-events-none transition-opacity duration-200"
style={getIndicatorStyle("bottom")}

View File

@@ -118,6 +118,21 @@ describe("InputComboBox", () => {
expect(screen.queryByRole("listbox")).not.toBeInTheDocument();
});
test("shows all options on focus when a value is already selected", () => {
render(
<InputComboBox
placeholder="Select"
value="apple"
options={mockOptions}
/>
);
const input = screen.getByDisplayValue("Apple");
fireEvent.focus(input);
const options = screen.getAllByRole("option");
expect(options.length).toBe(3);
});
test("closes dropdown on tab", async () => {
const user = setupUser();
render(

View File

@@ -322,24 +322,32 @@ const InputComboBox = ({
const handleFocus = useCallback(() => {
if (hasOptions) {
setInputValue("");
setIsOpen(true);
setHighlightedIndex(-1); // Start with no highlight on focus
setIsKeyboardNav(false); // Start with mouse mode
setHighlightedIndex(-1);
setIsKeyboardNav(false);
}
}, [hasOptions, setIsOpen, setHighlightedIndex, setIsKeyboardNav]);
}, [
hasOptions,
setInputValue,
setIsOpen,
setHighlightedIndex,
setIsKeyboardNav,
]);
const toggleDropdown = useCallback(() => {
if (!disabled && hasOptions) {
setIsOpen((prev) => {
const newOpen = !prev;
if (newOpen) {
setHighlightedIndex(-1); // Reset highlight when opening
setInputValue("");
setHighlightedIndex(-1);
}
return newOpen;
});
inputRef.current?.focus();
}
}, [disabled, hasOptions, setIsOpen, setHighlightedIndex]);
}, [disabled, hasOptions, setIsOpen, setInputValue, setHighlightedIndex]);
const autoId = useId();
const fieldId = fieldContext?.baseId || name || `combo-box-${autoId}`;

View File

@@ -20,21 +20,26 @@ export function useComboBoxState({ value, options }: UseComboBoxStateProps) {
const [highlightedIndex, setHighlightedIndex] = useState(-1);
const [isKeyboardNav, setIsKeyboardNav] = useState(false);
// State synchronization logic
// Only sync when the dropdown is closed or when value changes significantly
// Sync inputValue with the external value prop.
// When the dropdown is closed, always reflect the controlled value.
// When the dropdown is open, only sync if the *value prop itself* changes
// (e.g. parent programmatically updates it), not when inputValue changes
// (e.g. user clears the field on focus to browse all options).
useEffect(() => {
// If dropdown is closed, always sync with prop value
if (!isOpen) {
setInputValue(value);
} else {
// If dropdown is open, only sync if the new value is an exact match with an option
// This prevents interference when user is typing
}
}, [value, isOpen]);
useEffect(() => {
if (isOpen) {
const isExactOptionMatch = options.some((opt) => opt.value === value);
if (isExactOptionMatch && inputValue !== value) {
if (isExactOptionMatch) {
setInputValue(value);
}
}
}, [value, isOpen, options, inputValue]);
// Only react to value prop changes while open, not inputValue changes
}, [value]);
// Reset highlight and keyboard nav when closing dropdown
useEffect(() => {

View File

@@ -2,7 +2,7 @@
import * as React from "react";
import * as SelectPrimitive from "@radix-ui/react-select";
import { cn, noProp } from "@/lib/utils";
import { cn } from "@/lib/utils";
import LineItem, { LineItemProps } from "@/refresh-components/buttons/LineItem";
import Text from "@/refresh-components/texts/Text";
import type { IconProps } from "@opal/types";
@@ -298,7 +298,10 @@ function InputSelectContent({
)}
sideOffset={4}
position="popper"
onMouseDown={noProp()}
onMouseDown={(e) => {
e.stopPropagation();
e.preventDefault();
}}
{...props}
>
<SelectPrimitive.Viewport className="flex flex-col gap-1">

View File

@@ -112,9 +112,11 @@ function MemoryItem({
/>
</Disabled>
</Section>
{isFocused && (
<div
className={isFocused ? "visible" : "invisible h-0 overflow-hidden"}
>
<CharacterCount value={memory.content} limit={MAX_MEMORY_LENGTH} />
)}
</div>
</Section>
</div>
);

View File

@@ -661,7 +661,7 @@ export default function AgentEditorPage({
// Sharing
shared_user_ids: existingAgent?.users?.map((user) => user.id) ?? [],
shared_group_ids: existingAgent?.groups ?? [],
is_public: existingAgent?.is_public ?? true,
is_public: existingAgent?.is_public ?? false,
label_ids: existingAgent?.labels?.map((l) => l.id) ?? [],
featured: existingAgent?.featured ?? false,
};
@@ -967,6 +967,14 @@ export default function AgentEditorPage({
validateOnChange
validateOnBlur
validateOnMount
initialTouched={{
description:
initialValues.description.length >
MAX_CHARACTERS_AGENT_DESCRIPTION,
starter_messages: initialValues.starter_messages.map(
(msg) => msg.length > MAX_CHARACTERS_STARTER_MESSAGE
) as unknown as boolean,
}}
initialStatus={{ warnings: {} }}
>
{({ isSubmitting, isValid, dirty, values, setFieldValue }) => {
@@ -1201,18 +1209,33 @@ export default function AgentEditorPage({
>
Cancel
</OpalButton>
<Disabled
disabled={
isSubmitting ||
!isValid ||
!dirty ||
hasUploadingFiles
<SimpleTooltip
tooltip={
isSubmitting
? "Saving changes..."
: !isValid
? "Please fix the errors in the form before saving."
: !dirty
? "No changes have been made."
: hasUploadingFiles
? "Please wait for files to finish uploading."
: undefined
}
side="bottom"
>
<OpalButton type="submit">
{existingAgent ? "Save" : "Create"}
</OpalButton>
</Disabled>
<Disabled
disabled={
isSubmitting ||
!isValid ||
!dirty ||
hasUploadingFiles
}
>
<OpalButton type="submit">
{existingAgent ? "Save" : "Create"}
</OpalButton>
</Disabled>
</SimpleTooltip>
</div>
}
backButton

View File

@@ -465,7 +465,6 @@ export default function AppPage({ firstMessage }: ChatPageProps) {
const onChat = useCallback(
(message: string) => {
resetInputBar();
onSubmit({
message,
currentMessageFiles,
@@ -476,7 +475,6 @@ export default function AppPage({ firstMessage }: ChatPageProps) {
}
},
[
resetInputBar,
onSubmit,
currentMessageFiles,
deepResearchEnabledForCurrentWorkflow,
@@ -510,7 +508,6 @@ export default function AppPage({ firstMessage }: ChatPageProps) {
// If we're in an existing chat session, always use chat mode
// (appMode only applies to new sessions)
if (currentChatSessionId) {
resetInputBar();
onSubmit({
message,
currentMessageFiles,
@@ -523,7 +520,7 @@ export default function AppPage({ firstMessage }: ChatPageProps) {
}
// For new sessions, let the query controller handle routing.
// resetInputBar is called inside onChat for chat-routed queries.
// resetInputBar is called inside useChatController.onSubmit for chat-routed queries.
// For search-routed queries, the input bar is intentionally kept
// so the user can see and refine their search query.
await submitQuery(message, onChat);
@@ -532,7 +529,6 @@ export default function AppPage({ firstMessage }: ChatPageProps) {
currentChatSessionId,
submitQuery,
onChat,
resetInputBar,
onSubmit,
currentMessageFiles,
deepResearchEnabledForCurrentWorkflow,

View File

@@ -114,6 +114,10 @@ function MCPServerCard({
const allToolIds = tools.map((t) => t.id);
const serverEnabled =
tools.length > 0 && tools.some((t) => isToolEnabled(t.id));
const needsAuth = !server.is_authenticated;
const authTooltip = needsAuth
? "Authenticate this MCP server before enabling its tools."
: undefined;
return (
<ExpandableCard.Root isFolded={isFolded} onFoldedChange={setIsFolded}>
@@ -122,10 +126,13 @@ function MCPServerCard({
description={server.description}
icon={getActionIcon(server.server_url, server.name)}
rightChildren={
<Switch
checked={serverEnabled}
onCheckedChange={(checked) => onToggleTools(allToolIds, checked)}
/>
<SimpleTooltip tooltip={authTooltip} side="top">
<Switch
checked={serverEnabled}
onCheckedChange={(checked) => onToggleTools(allToolIds, checked)}
disabled={needsAuth}
/>
</SimpleTooltip>
}
>
{tools.length > 0 && (
@@ -158,12 +165,15 @@ function MCPServerCard({
description={tool.description}
icon={tool.icon}
rightChildren={
<Switch
checked={isToolEnabled(tool.id)}
onCheckedChange={(checked) =>
onToggleTool(tool.id, checked)
}
/>
<SimpleTooltip tooltip={authTooltip} side="top">
<Switch
checked={isToolEnabled(tool.id)}
onCheckedChange={(checked) =>
onToggleTool(tool.id, checked)
}
disabled={needsAuth}
/>
</SimpleTooltip>
}
/>
))}

View File

@@ -21,6 +21,7 @@ import { ChatState } from "@/app/app/interfaces";
import { useForcedTools } from "@/lib/hooks/useForcedTools";
import { useAppMode } from "@/providers/AppModeProvider";
import useAppFocus from "@/hooks/useAppFocus";
import { getPastedFilesIfNoText } from "@/lib/clipboard";
import { cn, isImageFile } from "@/lib/utils";
import { Disabled } from "@opal/core";
import { useUser } from "@/providers/UserProvider";
@@ -233,20 +234,10 @@ const AppInputBar = React.memo(
}, [showFiles, currentMessageFiles]);
function handlePaste(event: React.ClipboardEvent) {
const items = event.clipboardData?.items;
if (items) {
const pastedFiles = [];
for (let i = 0; i < items.length; i++) {
const item = items[i];
if (item && item.kind === "file") {
const file = item.getAsFile();
if (file) pastedFiles.push(file);
}
}
if (pastedFiles.length > 0) {
event.preventDefault();
handleFileUpload(pastedFiles);
}
const pastedFiles = getPastedFilesIfNoText(event.clipboardData);
if (pastedFiles.length > 0) {
event.preventDefault();
handleFileUpload(pastedFiles);
}
}

View File

@@ -7,21 +7,16 @@ import Text from "@/refresh-components/texts/Text";
import SimpleLoader from "@/refresh-components/loaders/SimpleLoader";
import { cn } from "@/lib/utils";
import { Section } from "@/layouts/general-layouts";
import { getCodeLanguage, getDataLanguage } from "@/lib/languages";
import mime from "mime";
import {
getCodeLanguage,
getDataLanguage,
getLanguageByMime,
} from "@/lib/languages";
import { fetchChatFile } from "@/lib/chat/svc";
import { PreviewContext } from "@/sections/modals/PreviewModal/interfaces";
import { resolveVariant } from "@/sections/modals/PreviewModal/variants";
function resolveMimeType(mimeType: string, fileName: string): string {
if (mimeType !== "application/octet-stream") return mimeType;
const lower = fileName.toLowerCase();
if (lower.endsWith(".md") || lower.endsWith(".markdown"))
return "text/markdown";
if (lower.endsWith(".txt")) return "text/plain";
if (lower.endsWith(".csv")) return "text/csv";
return mimeType;
}
interface PreviewModalProps {
presentingDocument: MinimalOnyxDocument;
onClose: () => void;
@@ -47,9 +42,10 @@ export default function PreviewModal({
const language = useMemo(
() =>
getCodeLanguage(presentingDocument.semantic_identifier || "") ||
getLanguageByMime(mimeType) ||
getDataLanguage(presentingDocument.semantic_identifier || "") ||
"plaintext",
[presentingDocument.semantic_identifier]
[mimeType, presentingDocument.semantic_identifier]
);
const lineCount = useMemo(() => {
@@ -91,7 +87,10 @@ export default function PreviewModal({
const rawContentType =
response.headers.get("Content-Type") || "application/octet-stream";
const resolvedMime = resolveMimeType(rawContentType, originalFileName);
const resolvedMime =
rawContentType === "application/octet-stream"
? mime.getType(originalFileName) ?? rawContentType
: rawContentType;
setMimeType(resolvedMime);
const resolved = resolveVariant(
@@ -171,24 +170,24 @@ export default function PreviewModal({
onClose={onClose}
/>
{/* Body + floating footer wrapper */}
<Modal.Body padding={0} gap={0}>
<Section padding={0} gap={0}>
{isLoading ? (
<Section>
<SimpleLoader className="h-8 w-8" />
</Section>
) : loadError ? (
<Section padding={1}>
<Text text03 mainUiBody>
{loadError}
</Text>
</Section>
) : (
variant.renderContent(ctx)
)}
</Section>
</Modal.Body>
{/* Body — uses flex-1/min-h-0/overflow-hidden (not Modal.Body)
so that child ScrollIndicatorDivs become the actual scroll
container instead of the body stealing it via overflow-y-auto. */}
<div className="flex flex-col flex-1 min-h-0 overflow-hidden w-full bg-background-tint-01">
{isLoading ? (
<Section>
<SimpleLoader className="h-8 w-8" />
</Section>
) : loadError ? (
<Section padding={1}>
<Text text03 mainUiBody>
{loadError}
</Text>
</Section>
) : (
variant.renderContent(ctx)
)}
</div>
{/* Floating footer */}
{!isLoading && !loadError && (
@@ -199,8 +198,9 @@ export default function PreviewModal({
"p-4 pointer-events-none w-full"
)}
style={{
background:
"linear-gradient(to top, var(--background-tint-01) 40%, transparent)",
background: `linear-gradient(to top, var(--background-${
variant.codeBackground ? "code-01" : "tint-01"
}) 40%, transparent)`,
}}
>
{/* Left slot */}

View File

@@ -19,6 +19,8 @@ export interface PreviewVariant
matches: (semanticIdentifier: string | null, mimeType: string) => boolean;
/** Whether the fetcher should read the blob as text. */
needsTextContent: boolean;
/** Whether the variant renders on a code-style background (bg-background-code-01). */
codeBackground: boolean;
/** String shown below the title in the modal header. */
headerDescription: (ctx: PreviewContext) => string;
/** Body content. */

View File

@@ -0,0 +1,37 @@
"use client";
import MinimalMarkdown from "@/components/chat/MinimalMarkdown";
import ScrollIndicatorDiv from "@/refresh-components/ScrollIndicatorDiv";
import { cn } from "@/lib/utils";
import "@/app/app/message/custom-code-styles.css";
interface CodePreviewProps {
content: string;
language?: string | null;
normalize?: boolean;
}
export function CodePreview({
content,
language,
normalize,
}: CodePreviewProps) {
// Wrap raw content in a fenced code block for syntax highlighting. Uses ~~~
// instead of ``` to avoid conflicts with backticks in the content. Any literal
// ~~~ sequences in the content are escaped so they don't accidentally close the fence.
const markdownContent = normalize
? `~~~${language || ""}\n${content.replace(/~~~/g, "\\~\\~\\~")}\n~~~`
: content;
return (
<ScrollIndicatorDiv
className={cn("p-4", normalize && "bg-background-code-01")}
backgroundColor={normalize ? "var(--background-code-01)" : undefined}
variant="shadow"
bottomSpacing="2rem"
disableBottomIndicator
>
<MinimalMarkdown content={markdownContent} showHeader={false} />
</ScrollIndicatorDiv>
);
}

View File

@@ -1,10 +1,8 @@
import MinimalMarkdown from "@/components/chat/MinimalMarkdown";
import Text from "@/refresh-components/texts/Text";
import { Section } from "@/layouts/general-layouts";
import { getCodeLanguage } from "@/lib/languages";
import { CodeBlock } from "@/app/app/message/CodeBlock";
import { extractCodeText } from "@/app/app/message/codeUtils";
import { PreviewVariant } from "@/sections/modals/PreviewModal/interfaces";
import { CodePreview } from "@/sections/modals/PreviewModal/variants/CodePreview";
import {
CopyButton,
DownloadButton,
@@ -15,6 +13,7 @@ export const codeVariant: PreviewVariant = {
width: "md",
height: "lg",
needsTextContent: true,
codeBackground: true,
headerDescription: (ctx) =>
ctx.fileContent
@@ -24,20 +23,7 @@ export const codeVariant: PreviewVariant = {
: "",
renderContent: (ctx) => (
<MinimalMarkdown
content={`\`\`\`${ctx.language}\n${ctx.fileContent}\n\n\`\`\``}
className="w-full break-words h-full"
components={{
code: ({ node, children }: any) => {
const codeText = extractCodeText(node, ctx.fileContent, children);
return (
<CodeBlock className="" codeText={codeText}>
{children}
</CodeBlock>
);
},
}}
/>
<CodePreview normalize content={ctx.fileContent} language={ctx.language} />
),
renderFooterLeft: (ctx) => (

View File

@@ -34,6 +34,7 @@ export const csvVariant: PreviewVariant = {
width: "lg",
height: "full",
needsTextContent: true,
codeBackground: false,
headerDescription: (ctx) => {
if (!ctx.fileContent) return "";
const { rows } = parseCsv(ctx.fileContent);

View File

@@ -1,10 +1,8 @@
import MinimalMarkdown from "@/components/chat/MinimalMarkdown";
import Text from "@/refresh-components/texts/Text";
import { Section } from "@/layouts/general-layouts";
import { getDataLanguage } from "@/lib/languages";
import { CodeBlock } from "@/app/app/message/CodeBlock";
import { extractCodeText } from "@/app/app/message/codeUtils";
import { getDataLanguage, getLanguageByMime } from "@/lib/languages";
import { PreviewVariant } from "@/sections/modals/PreviewModal/interfaces";
import { CodePreview } from "@/sections/modals/PreviewModal/variants/CodePreview";
import {
CopyButton,
DownloadButton,
@@ -22,10 +20,12 @@ function formatContent(language: string, content: string): string {
}
export const dataVariant: PreviewVariant = {
matches: (name) => !!getDataLanguage(name || ""),
matches: (name, mime) =>
!!getDataLanguage(name || "") || !!getLanguageByMime(mime),
width: "md",
height: "lg",
needsTextContent: true,
codeBackground: true,
headerDescription: (ctx) =>
ctx.fileContent
@@ -37,20 +37,7 @@ export const dataVariant: PreviewVariant = {
renderContent: (ctx) => {
const formatted = formatContent(ctx.language, ctx.fileContent);
return (
<MinimalMarkdown
content={`\`\`\`${ctx.language}\n${formatted}\n\n\`\`\``}
className="w-full break-words h-full"
components={{
code: ({ node, children }: any) => {
const codeText = extractCodeText(node, formatted, children);
return (
<CodeBlock className="" codeText={codeText}>
{children}
</CodeBlock>
);
},
}}
/>
<CodePreview normalize content={formatted} language={ctx.language} />
);
},

View File

@@ -130,6 +130,7 @@ export const docxVariant: PreviewVariant = {
width: "lg",
height: "full",
needsTextContent: false,
codeBackground: false,
headerDescription: () => {
if (lastDocxResult) {
const count = lastDocxResult.wordCount;

View File

@@ -11,6 +11,7 @@ export const imageVariant: PreviewVariant = {
width: "lg",
height: "full",
needsTextContent: false,
codeBackground: false,
headerDescription: () => "",
renderContent: (ctx) => (

View File

@@ -5,6 +5,7 @@ import { pdfVariant } from "@/sections/modals/PreviewModal/variants/pdfVariant";
import { csvVariant } from "@/sections/modals/PreviewModal/variants/csvVariant";
import { markdownVariant } from "@/sections/modals/PreviewModal/variants/markdownVariant";
import { dataVariant } from "@/sections/modals/PreviewModal/variants/dataVariant";
import { textVariant } from "@/sections/modals/PreviewModal/variants/textVariant";
import { unsupportedVariant } from "@/sections/modals/PreviewModal/variants/unsupportedVariant";
import { docxVariant } from "@/sections/modals/PreviewModal/variants/docxVariant";
@@ -14,9 +15,10 @@ const PREVIEW_VARIANTS: PreviewVariant[] = [
imageVariant,
pdfVariant,
csvVariant,
dataVariant,
markdownVariant,
docxVariant,
textVariant,
dataVariant,
];
export function resolveVariant(

View File

@@ -1,8 +1,7 @@
import MinimalMarkdown from "@/components/chat/MinimalMarkdown";
import ScrollIndicatorDiv from "@/refresh-components/ScrollIndicatorDiv";
import { Section } from "@/layouts/general-layouts";
import { isMarkdownFile } from "@/lib/languages";
import { PreviewVariant } from "@/sections/modals/PreviewModal/interfaces";
import { CodePreview } from "@/sections/modals/PreviewModal/variants/CodePreview";
import {
CopyButton,
DownloadButton,
@@ -11,7 +10,6 @@ import {
const MARKDOWN_MIMES = [
"text/markdown",
"text/x-markdown",
"text/plain",
"text/x-rst",
"text/x-org",
];
@@ -24,15 +22,11 @@ export const markdownVariant: PreviewVariant = {
width: "lg",
height: "full",
needsTextContent: true,
codeBackground: false,
headerDescription: () => "",
renderContent: (ctx) => (
<ScrollIndicatorDiv className="flex-1 min-h-0 p-4" variant="shadow">
<MinimalMarkdown
content={ctx.fileContent}
className="w-full pb-4 text-lg break-words"
/>
</ScrollIndicatorDiv>
<CodePreview content={ctx.fileContent} language={ctx.language} />
),
renderFooterLeft: () => null,

View File

@@ -7,6 +7,7 @@ export const pdfVariant: PreviewVariant = {
width: "lg",
height: "full",
needsTextContent: false,
codeBackground: false,
headerDescription: () => "",
renderContent: (ctx) => (

View File

@@ -0,0 +1,55 @@
import Text from "@/refresh-components/texts/Text";
import { Section } from "@/layouts/general-layouts";
import { PreviewVariant } from "@/sections/modals/PreviewModal/interfaces";
import { CodePreview } from "@/sections/modals/PreviewModal/variants/CodePreview";
import {
CopyButton,
DownloadButton,
} from "@/sections/modals/PreviewModal/variants/shared";
const TEXT_MIMES = [
"text/plain",
"text/x-log",
"text/x-config",
"text/tab-separated-values",
];
const TEXT_EXTENSIONS = [".txt", ".log", ".conf", ".tsv"];
export const textVariant: PreviewVariant = {
matches: (name, mime) => {
if (TEXT_MIMES.some((supportedMime) => mime.startsWith(supportedMime))) {
return true;
}
const lowerName = (name || "").toLowerCase();
return TEXT_EXTENSIONS.some((extension) => lowerName.endsWith(extension));
},
width: "md",
height: "lg",
needsTextContent: true,
codeBackground: true,
headerDescription: (ctx) =>
ctx.fileContent
? `${ctx.lineCount} ${ctx.lineCount === 1 ? "line" : "lines"} · ${
ctx.fileSize
}`
: "",
renderContent: (ctx) => (
<CodePreview normalize content={ctx.fileContent} language={ctx.language} />
),
renderFooterLeft: (ctx) => (
<Text text03 mainUiBody className="select-none">
{ctx.lineCount} {ctx.lineCount === 1 ? "line" : "lines"}
</Text>
),
renderFooterRight: (ctx) => (
<Section flexDirection="row" width="fit">
<CopyButton getText={() => ctx.fileContent} />
<DownloadButton fileUrl={ctx.fileUrl} fileName={ctx.fileName} />
</Section>
),
};

View File

@@ -5,13 +5,14 @@ import { DownloadButton } from "@/sections/modals/PreviewModal/variants/shared";
export const unsupportedVariant: PreviewVariant = {
matches: () => true,
width: "lg",
width: "md",
height: "full",
needsTextContent: false,
codeBackground: false,
headerDescription: () => "",
renderContent: (ctx) => (
<div className="flex flex-col items-center justify-center flex-1 min-h-0 gap-4 p-6">
<div className="flex flex-col items-center justify-center flex-1 w-full min-h-0 gap-4 p-6">
<Text as="p" text03 mainUiBody>
This file format is not supported for preview.
</Text>

View File

@@ -0,0 +1,83 @@
import React, { useEffect } from "react";
import { render, screen, waitFor } from "@tests/setup/test-utils";
import ShareAgentModal, { ShareAgentModalProps } from "./ShareAgentModal";
import { useCreateModal } from "@/refresh-components/contexts/ModalContext";
jest.mock("@/hooks/useShareableUsers", () => ({
__esModule: true,
default: jest.fn(() => ({ data: [] })),
}));
jest.mock("@/hooks/useShareableGroups", () => ({
__esModule: true,
default: jest.fn(() => ({ data: [] })),
}));
jest.mock("@/hooks/useAgents", () => ({
useAgent: jest.fn(() => ({ agent: null })),
}));
jest.mock("@/lib/hooks", () => ({
useLabels: jest.fn(() => ({
labels: [],
createLabel: jest.fn(),
})),
}));
function ModalHarness(props: ShareAgentModalProps) {
const modal = useCreateModal();
useEffect(() => {
modal.toggle(true);
}, [modal]);
return (
<modal.Provider>
<ShareAgentModal {...props} />
</modal.Provider>
);
}
function renderShareAgentModal(overrides: Partial<ShareAgentModalProps> = {}) {
const props: ShareAgentModalProps = {
userIds: [],
groupIds: [],
isPublic: false,
isFeatured: false,
labelIds: [],
...overrides,
};
return render(<ModalHarness {...props} />);
}
describe("ShareAgentModal", () => {
it("defaults to Users & Groups when the agent is private", async () => {
renderShareAgentModal({ isPublic: false });
await waitFor(() =>
expect(
screen.getByRole("tab", { name: "Users & Groups" })
).toHaveAttribute("data-state", "active")
);
expect(
screen.getByRole("tab", { name: "Your Organization" })
).toHaveAttribute("data-state", "inactive");
});
it("defaults to Your Organization when the agent is public", async () => {
renderShareAgentModal({ isPublic: true });
await waitFor(() =>
expect(
screen.getByRole("tab", { name: "Your Organization" })
).toHaveAttribute("data-state", "active")
);
expect(screen.getByRole("tab", { name: "Users & Groups" })).toHaveAttribute(
"data-state",
"inactive"
);
});
});

View File

@@ -1,333 +0,0 @@
"use client";
import { useState, useEffect, useCallback, useMemo } from "react";
import {
Table,
TableBody,
TableCell,
TableHead,
TableHeader,
TableRow,
} from "@/components/ui/table";
import { MinimalOnyxDocument } from "@/lib/search/interfaces";
import MinimalMarkdown from "@/components/chat/MinimalMarkdown";
import { Button } from "@opal/components";
import Modal, { BasicModalFooter } from "@/refresh-components/Modal";
import Text from "@/refresh-components/texts/Text";
import {
SvgDownloadCloud,
SvgFileText,
SvgZoomIn,
SvgZoomOut,
} from "@opal/icons";
import PreviewImage from "@/refresh-components/PreviewImage";
import SimpleLoader from "@/refresh-components/loaders/SimpleLoader";
import ScrollIndicatorDiv from "@/refresh-components/ScrollIndicatorDiv";
import { cn } from "@/lib/utils";
import { Section } from "@/layouts/general-layouts";
export interface TextViewProps {
presentingDocument: MinimalOnyxDocument;
onClose: () => void;
}
export default function TextViewModal({
presentingDocument,
onClose,
}: TextViewProps) {
const [zoom, setZoom] = useState(100);
const [fileContent, setFileContent] = useState("");
const [fileUrl, setFileUrl] = useState("");
const [fileName, setFileName] = useState("");
const [isLoading, setIsLoading] = useState(true);
const [loadError, setLoadError] = useState<string | null>(null);
const [fileType, setFileType] = useState("application/octet-stream");
const csvData = useMemo(() => {
if (!fileType.startsWith("text/csv")) {
return null;
}
const lines = fileContent.split(/\r?\n/).filter((l) => l.length > 0);
const headers = lines.length > 0 ? lines[0]?.split(",") ?? [] : [];
const rows = lines.slice(1).map((line) => line.split(","));
return { headers, rows } as { headers: string[]; rows: string[][] };
}, [fileContent, fileType]);
// Detect if a given MIME type is one of the recognized markdown formats
const isMarkdownFormat = (mimeType: string): boolean => {
const markdownFormats = [
"text/markdown",
"text/x-markdown",
"text/plain",
"text/csv",
"text/x-rst",
"text/x-org",
"txt",
];
return markdownFormats.some((format) => mimeType.startsWith(format));
};
const isImageFormat = (mimeType: string) => {
const imageFormats = [
"image/png",
"image/jpeg",
"image/gif",
"image/svg+xml",
];
return imageFormats.some((format) => mimeType.startsWith(format));
};
// Detect if a given MIME type can be rendered in an <iframe>
const isSupportedIframeFormat = (mimeType: string): boolean => {
const supportedFormats = [
"application/pdf",
"image/png",
"image/jpeg",
"image/gif",
"image/svg+xml",
];
return supportedFormats.some((format) => mimeType.startsWith(format));
};
const fetchFile = useCallback(
async (signal?: AbortSignal) => {
setIsLoading(true);
setLoadError(null);
setFileContent("");
const fileIdLocal =
presentingDocument.document_id.split("__")[1] ||
presentingDocument.document_id;
try {
const response = await fetch(
`/api/chat/file/${encodeURIComponent(fileIdLocal)}`,
{
method: "GET",
signal,
cache: "force-cache",
}
);
if (!response.ok) {
setLoadError("Failed to load document.");
return;
}
const blob = await response.blob();
const url = window.URL.createObjectURL(blob);
setFileUrl((prev) => {
if (prev) {
window.URL.revokeObjectURL(prev);
}
return url;
});
const originalFileName =
presentingDocument.semantic_identifier || "document";
setFileName(originalFileName);
let contentType =
response.headers.get("Content-Type") || "application/octet-stream";
// If it's octet-stream but file name suggests a text-based extension, override accordingly
if (contentType === "application/octet-stream") {
const lowerName = originalFileName.toLowerCase();
if (lowerName.endsWith(".md") || lowerName.endsWith(".markdown")) {
contentType = "text/markdown";
} else if (lowerName.endsWith(".txt")) {
contentType = "text/plain";
} else if (lowerName.endsWith(".csv")) {
contentType = "text/csv";
}
}
setFileType(contentType);
// If the final content type looks like markdown, read its text
if (isMarkdownFormat(contentType)) {
const text = await blob.text();
setFileContent(text);
}
} catch (error) {
// Abort is expected on unmount / doc change
if (signal?.aborted) {
return;
}
setLoadError("Failed to load document.");
} finally {
// Prevent stale/aborted requests from clobbering the loading state.
// This is especially important in React StrictMode where effects can run twice.
if (!signal?.aborted) {
setIsLoading(false);
}
}
},
[presentingDocument]
);
useEffect(() => {
const controller = new AbortController();
fetchFile(controller.signal);
return () => {
controller.abort();
};
}, [fetchFile]);
useEffect(() => {
return () => {
if (fileUrl) {
window.URL.revokeObjectURL(fileUrl);
}
};
}, [fileUrl]);
const handleDownload = () => {
const link = document.createElement("a");
link.href = fileUrl;
link.download = fileName || presentingDocument.document_id;
document.body.appendChild(link);
link.click();
document.body.removeChild(link);
};
const handleZoomIn = () => setZoom((prev) => Math.min(prev + 25, 200));
const handleZoomOut = () => setZoom((prev) => Math.max(prev - 25, 100));
return (
<Modal
open
onOpenChange={(open) => {
if (!open) {
onClose();
}
}}
>
<Modal.Content
width="lg"
height="full"
preventAccidentalClose={false}
onOpenAutoFocus={(e) => e.preventDefault()}
>
<Modal.Header
icon={SvgFileText}
title={fileName || "Document"}
onClose={onClose}
>
<Section flexDirection="row" justifyContent="start" gap={0.25}>
<Button
prominence="tertiary"
onClick={handleZoomOut}
icon={SvgZoomOut}
tooltip="Zoom Out"
/>
<Text mainUiBody>{zoom}%</Text>
<Button
prominence="tertiary"
onClick={handleZoomIn}
icon={SvgZoomIn}
tooltip="Zoom In"
/>
<Button
prominence="tertiary"
onClick={handleDownload}
icon={SvgDownloadCloud}
tooltip="Download"
/>
</Section>
</Modal.Header>
<Modal.Body>
<Section>
{isLoading ? (
<SimpleLoader className="h-8 w-8" />
) : loadError ? (
<Text text03 mainUiBody>
{loadError}
</Text>
) : (
<div
className="flex flex-col flex-1 min-h-0 min-w-0 w-full transform origin-center transition-transform duration-300 ease-in-out"
style={{ transform: `scale(${zoom / 100})` }}
>
{isImageFormat(fileType) ? (
<PreviewImage
src={fileUrl}
alt={fileName}
className="w-full flex-1 min-h-0"
/>
) : isSupportedIframeFormat(fileType) ? (
<iframe
src={`${fileUrl}#toolbar=0`}
className="w-full h-full flex-1 min-h-0 border-none"
title="File Viewer"
/>
) : isMarkdownFormat(fileType) ? (
<ScrollIndicatorDiv
className="flex-1 min-h-0 p-4"
variant="shadow"
>
{csvData ? (
<Table>
<TableHeader className="sticky top-0 z-sticky">
<TableRow className="bg-background-tint-02">
{csvData.headers.map((h, i) => (
<TableHead key={i}>
<Text
as="p"
className="line-clamp-2 font-medium"
text03
mainUiBody
>
{h}
</Text>
</TableHead>
))}
</TableRow>
</TableHeader>
<TableBody>
{csvData.rows.map((row, rIdx) => (
<TableRow key={rIdx}>
{csvData.headers.map((_, cIdx) => (
<TableCell
key={cIdx}
className={cn(
cIdx === 0 &&
"sticky left-0 bg-background-tint-01",
"py-0 px-4 whitespace-normal break-words"
)}
>
{row?.[cIdx] ?? ""}
</TableCell>
))}
</TableRow>
))}
</TableBody>
</Table>
) : (
<MinimalMarkdown
content={fileContent}
className="w-full pb-4 h-full text-lg break-words"
/>
)}
</ScrollIndicatorDiv>
) : (
<div className="flex flex-col items-center justify-center flex-1 min-h-0 p-6 gap-4">
<Text as="p" text03 mainUiBody>
This file format is not supported for preview.
</Text>
<Button onClick={handleDownload}>Download File</Button>
</div>
)}
</div>
)}
</Section>
</Modal.Body>
<Modal.Footer>
<BasicModalFooter
submit={<Button onClick={handleDownload}>Download File</Button>}
/>
</Modal.Footer>
</Modal.Content>
</Modal>
);
}

View File

@@ -260,6 +260,7 @@ module.exports = {
"code-string": "var(--code-string)",
"code-number": "var(--code-number)",
"code-definition": "var(--code-definition)",
"background-code-01": "var(--background-code-01)",
// Shimmer colors for loading animations
"shimmer-base": "var(--shimmer-base)",

View File

@@ -187,7 +187,10 @@ for (const theme of THEMES) {
/\//g,
"-"
)}`;
await expectScreenshot(page, { name: screenshotName });
await expectScreenshot(page, {
name: screenshotName,
mask: ['[data-testid="admin-date-range-selector-button"]'],
});
});
}
});

View File

@@ -1860,6 +1860,9 @@ test.describe("MCP OAuth flows", () => {
toolName: TOOL_NAMES.admin,
logStep,
});
const createdAgent = await adminApiClient.getAssistant(agentId);
expect(createdAgent.is_public).toBe(false);
logStep("Verified newly created agent is private by default");
const adminToolId = await fetchMcpToolIdByName(
page,
serverId,
@@ -1899,6 +1902,13 @@ test.describe("MCP OAuth flows", () => {
).toBeVisible({ timeout: 15000 });
logStep("Verified MCP server card is still visible on actions page");
await adminApiClient.updateAgentSharing(agentId, {
isPublic: true,
userIds: createdAgent.users.map((user) => user.id),
groupIds: createdAgent.groups,
});
logStep("Published agent explicitly for end-user MCP flow");
adminArtifacts = {
serverId,
serverName,

View File

@@ -681,6 +681,9 @@ export class OnyxApiClient {
async getAssistant(agentId: number): Promise<{
id: number;
is_public: boolean;
users: Array<{ id: string }>;
groups: number[];
tools: Array<{ id: number; mcp_server_id?: number | null }>;
}> {
const response = await this.get(`/persona/${agentId}`);
@@ -690,6 +693,37 @@ export class OnyxApiClient {
);
}
async updateAgentSharing(
agentId: number,
options: {
userIds?: string[];
groupIds?: number[];
isPublic?: boolean;
labelIds?: number[];
}
): Promise<void> {
const response = await this.request.patch(
`${this.baseUrl}/persona/${agentId}/share`,
{
data: {
user_ids: options.userIds,
group_ids: options.groupIds,
is_public: options.isPublic,
label_ids: options.labelIds,
},
}
);
await this.handleResponse(
response,
`Failed to update sharing for assistant ${agentId}`
);
this.log(
`Updated assistant sharing: ${agentId} (is_public=${String(
options.isPublic
)})`
);
}
async listMcpServers(): Promise<any[]> {
const response = await this.get(`/admin/mcp/servers`);
const data = await this.handleResponse<{ mcp_servers: any[] }>(