Compare commits

...

31 Commits

Author SHA1 Message Date
Yuhong Sun
2db102a745 k 2025-09-10 14:41:42 -07:00
SubashMohan
56e4174a10 launch template changes 2025-09-10 23:34:26 +05:30
SubashMohan
d7dd3a23a8 Implement last accessed timestamp update for user files in chat and enhance file upload handling in ProjectContextPanel. Refactor FilesList to support file removal and improve UI for file uploads. 2025-09-10 23:04:07 +05:30
SubashMohan
8446c6956e onpickrecent implementation for projectcontextpanel 2025-09-10 16:22:39 +05:30
SubashMohan
420002309e fix context length issue and file removal from chat and project context panel 2025-09-10 15:32:24 +05:30
SubashMohan
e40300cef4 add failed status handling and fix upload flickering 2025-09-09 13:24:06 +05:30
SubashMohan
712a86ae82 type fixes 2025-09-08 16:09:26 +05:30
SubashMohan
bb34971149 Refactor user file and project handling to remove folder references, update models and relationships to support user projects, and enhance database migrations. Adjust API responses and frontend components to reflect the new project-based structure. 2025-09-08 11:43:23 +05:30
SubashMohan
8b0fd5eb6a remove mydocuments code from backend 2025-09-08 11:43:23 +05:30
SubashMohan
b692c97812 ui improvements and moving chatbar near project context panel 2025-09-08 11:43:23 +05:30
SubashMohan
8dc0c1d25d remove migration file 2025-09-08 11:43:23 +05:30
SubashMohan
f4754c865f remove mydocument code from froentend 2025-09-08 11:43:23 +05:30
SubashMohan
bf66840e85 add userfiles feature in assistant and context token handling across features 2025-09-08 11:43:23 +05:30
SubashMohan
d0a338a761 add instructions in search pipeline and assistant my documents reference removal 2025-09-08 11:43:21 +05:30
SubashMohan
1cb18b4c2a my documents removal phase 2 2025-09-08 11:42:50 +05:30
SubashMohan
0ad817f339 remove few my documents reference 2025-09-08 11:42:50 +05:30
SubashMohan
9aa01daf05 Refactor user file handling to support project-based access and improve integration with chat sessions. Update models and APIs to utilize UUIDs for user file IDs, remove folder-related features, and enhance file retrieval logic 2025-09-08 11:42:50 +05:30
SubashMohan
5333a135b7 remove folder feature 2025-09-08 11:42:50 +05:30
SubashMohan
b86137131d Add project management API and models for user files and projects 2025-09-08 11:42:50 +05:30
SubashMohan
68f9149570 Add project management features to chat sessions 2025-09-08 11:42:50 +05:30
SubashMohan
fa7fdb5034 Modify ingestion and indexing tasks to use request_id instead of index_attempt_metadata 2025-09-08 11:42:50 +05:30
SubashMohan
15d90bcc7a Dropzone fix 2025-09-08 11:42:47 +05:30
SubashMohan
e0794d9aa1 Add prompt_id to UserFolder model and implement project instructions management 2025-09-08 11:41:44 +05:30
SubashMohan
d669e20d07 Refactor user file processing and project management features 2025-09-08 11:41:44 +05:30
SubashMohan
3de811a61f WIP:Implement project management features with recent files retrieval and user file integration 2025-09-08 11:41:44 +05:30
SubashMohan
f619579bc9 new file picker setup 2025-09-08 11:41:44 +05:30
SubashMohan
7e6e3b4c1d add project__userfile table and endpoints 2025-09-08 11:41:44 +05:30
SubashMohan
de51aabb8e model changes 2025-09-08 11:41:44 +05:30
SubashMohan
90357a2705 Enhance user file processing with indexing pipeline integration and error handling 2025-09-08 11:41:44 +05:30
SubashMohan
6369c2f3d8 implement user file processing tasks and status management draft 2025-09-08 11:41:44 +05:30
SubashMohan
138c3db5ac initial indexing abstraction setup 2025-09-08 11:41:44 +05:30
158 changed files with 7682 additions and 12044 deletions

View File

@@ -1,469 +1,496 @@
/* Copy this file into '.vscode/launch.json' or merge its contents into your existing configurations. */
{
// Use IntelliSense to learn about possible attributes.
// Hover to view descriptions of existing attributes.
// For more information, visit: https://go.microsoft.com/fwlink/?linkid=830387
"version": "0.2.0",
"compounds": [
{
// Dummy entry used to label the group
"name": "--- Compound ---",
"configurations": ["--- Individual ---"],
"presentation": {
"group": "1"
}
},
{
"name": "Run All Onyx Services",
"configurations": [
"Web Server",
"Model Server",
"API Server",
"Slack Bot",
"Celery primary",
"Celery light",
"Celery heavy",
"Celery docfetching",
"Celery docprocessing",
"Celery beat",
"Celery monitoring"
],
"presentation": {
"group": "1"
},
"stopAll": true
},
{
"name": "Web / Model / API",
"configurations": ["Web Server", "Model Server", "API Server"],
"presentation": {
"group": "1"
},
"stopAll": true
},
{
"name": "Celery (all)",
"configurations": [
"Celery primary",
"Celery light",
"Celery heavy",
"Celery docfetching",
"Celery docprocessing",
"Celery beat",
"Celery monitoring"
],
"presentation": {
"group": "1"
},
"stopAll": true
// Use IntelliSense to learn about possible attributes.
// Hover to view descriptions of existing attributes.
// For more information, visit: https://go.microsoft.com/fwlink/?linkid=830387
"version": "0.2.0",
"compounds": [
{
// Dummy entry used to label the group
"name": "--- Compound ---",
"configurations": ["--- Individual ---"],
"presentation": {
"group": "1"
}
],
"configurations": [
{
// Dummy entry used to label the group
"name": "--- Individual ---",
"type": "node",
"request": "launch",
"presentation": {
"group": "2",
"order": 0
}
},
{
"name": "Web Server",
"type": "node",
"request": "launch",
"cwd": "${workspaceRoot}/web",
"runtimeExecutable": "npm",
"envFile": "${workspaceFolder}/.vscode/.env",
"runtimeArgs": ["run", "dev"],
"presentation": {
"group": "2"
},
"console": "integratedTerminal",
"consoleTitle": "Web Server Console"
},
{
"name": "Model Server",
"consoleName": "Model Server",
"type": "debugpy",
"request": "launch",
"module": "uvicorn",
"cwd": "${workspaceFolder}/backend",
"envFile": "${workspaceFolder}/.vscode/.env",
"env": {
"LOG_LEVEL": "DEBUG",
"PYTHONUNBUFFERED": "1"
},
"args": ["model_server.main:app", "--reload", "--port", "9000"],
"presentation": {
"group": "2"
},
"consoleTitle": "Model Server Console"
},
{
"name": "API Server",
"consoleName": "API Server",
"type": "debugpy",
"request": "launch",
"module": "uvicorn",
"cwd": "${workspaceFolder}/backend",
"envFile": "${workspaceFolder}/.vscode/.env",
"env": {
"LOG_DANSWER_MODEL_INTERACTIONS": "True",
"LOG_LEVEL": "DEBUG",
"PYTHONUNBUFFERED": "1"
},
"args": ["onyx.main:app", "--reload", "--port", "8080"],
"presentation": {
"group": "2"
},
"consoleTitle": "API Server Console"
},
// For the listener to access the Slack API,
// DANSWER_BOT_SLACK_APP_TOKEN & DANSWER_BOT_SLACK_BOT_TOKEN need to be set in .env file located in the root of the project
{
"name": "Slack Bot",
"consoleName": "Slack Bot",
"type": "debugpy",
"request": "launch",
"program": "onyx/onyxbot/slack/listener.py",
"cwd": "${workspaceFolder}/backend",
"envFile": "${workspaceFolder}/.vscode/.env",
"env": {
"LOG_LEVEL": "DEBUG",
"PYTHONUNBUFFERED": "1",
"PYTHONPATH": "."
},
"presentation": {
"group": "2"
},
"consoleTitle": "Slack Bot Console"
},
{
"name": "Celery primary",
"type": "debugpy",
"request": "launch",
"module": "celery",
"cwd": "${workspaceFolder}/backend",
"envFile": "${workspaceFolder}/.vscode/.env",
"env": {
"LOG_LEVEL": "INFO",
"PYTHONUNBUFFERED": "1",
"PYTHONPATH": "."
},
"args": [
"-A",
"onyx.background.celery.versioned_apps.primary",
"worker",
"--pool=threads",
"--concurrency=4",
"--prefetch-multiplier=1",
"--loglevel=INFO",
"--hostname=primary@%n",
"-Q",
"celery"
],
"presentation": {
"group": "2"
},
"consoleTitle": "Celery primary Console"
},
{
"name": "Celery light",
"type": "debugpy",
"request": "launch",
"module": "celery",
"cwd": "${workspaceFolder}/backend",
"envFile": "${workspaceFolder}/.vscode/.env",
"env": {
"LOG_LEVEL": "INFO",
"PYTHONUNBUFFERED": "1",
"PYTHONPATH": "."
},
"args": [
"-A",
"onyx.background.celery.versioned_apps.light",
"worker",
"--pool=threads",
"--concurrency=64",
"--prefetch-multiplier=8",
"--loglevel=INFO",
"--hostname=light@%n",
"-Q",
"vespa_metadata_sync,connector_deletion,doc_permissions_upsert,index_attempt_cleanup"
],
"presentation": {
"group": "2"
},
"consoleTitle": "Celery light Console"
},
{
"name": "Celery heavy",
"type": "debugpy",
"request": "launch",
"module": "celery",
"cwd": "${workspaceFolder}/backend",
"envFile": "${workspaceFolder}/.vscode/.env",
"env": {
"LOG_LEVEL": "INFO",
"PYTHONUNBUFFERED": "1",
"PYTHONPATH": "."
},
"args": [
"-A",
"onyx.background.celery.versioned_apps.heavy",
"worker",
"--pool=threads",
"--concurrency=4",
"--prefetch-multiplier=1",
"--loglevel=INFO",
"--hostname=heavy@%n",
"-Q",
"connector_pruning,connector_doc_permissions_sync,connector_external_group_sync"
],
"presentation": {
"group": "2"
},
"consoleTitle": "Celery heavy Console"
},
{
"name": "Celery docfetching",
"type": "debugpy",
"request": "launch",
"module": "celery",
"cwd": "${workspaceFolder}/backend",
"envFile": "${workspaceFolder}/.vscode/.env",
"env": {
"LOG_LEVEL": "DEBUG",
"PYTHONUNBUFFERED": "1",
"PYTHONPATH": "."
},
"args": [
"-A",
"onyx.background.celery.versioned_apps.docfetching",
"worker",
"--pool=threads",
"--concurrency=1",
"--prefetch-multiplier=1",
"--loglevel=INFO",
"--hostname=docfetching@%n",
"-Q",
"connector_doc_fetching,user_files_indexing"
],
"presentation": {
"group": "2"
},
"consoleTitle": "Celery docfetching Console",
"justMyCode": false
},
{
"name": "Celery docprocessing",
"type": "debugpy",
"request": "launch",
"module": "celery",
"cwd": "${workspaceFolder}/backend",
"envFile": "${workspaceFolder}/.vscode/.env",
"env": {
"ENABLE_MULTIPASS_INDEXING": "false",
"LOG_LEVEL": "DEBUG",
"PYTHONUNBUFFERED": "1",
"PYTHONPATH": "."
},
"args": [
"-A",
"onyx.background.celery.versioned_apps.docprocessing",
"worker",
"--pool=threads",
"--concurrency=6",
"--prefetch-multiplier=1",
"--loglevel=INFO",
"--hostname=docprocessing@%n",
"-Q",
"docprocessing"
],
"presentation": {
"group": "2"
},
"consoleTitle": "Celery docprocessing Console",
"justMyCode": false
"name": "Run All Onyx Services",
"configurations": [
"Web Server",
"Model Server",
"API Server",
"Slack Bot",
"Celery primary",
"Celery light",
"Celery heavy",
"Celery docfetching",
"Celery docprocessing",
"Celery beat",
"Celery monitoring",
"Celery user file processing"
],
"presentation": {
"group": "1"
}
},
{
"name": "Celery monitoring",
"type": "debugpy",
"request": "launch",
"module": "celery",
"cwd": "${workspaceFolder}/backend",
"envFile": "${workspaceFolder}/.vscode/.env",
"env": {},
"args": [
{
"name": "Web / Model / API",
"configurations": ["Web Server", "Model Server", "API Server"],
"presentation": {
"group": "1"
}
},
{
"name": "Celery (all)",
"configurations": [
"Celery primary",
"Celery light",
"Celery heavy",
"Celery docfetching",
"Celery docprocessing",
"Celery beat",
"Celery monitoring",
"Celery user file processing"
],
"presentation": {
"group": "1"
},
"stopAll": true
}
],
"configurations": [
{
// Dummy entry used to label the group
"name": "--- Individual ---",
"type": "node",
"request": "launch",
"presentation": {
"group": "2",
"order": 0
}
},
{
"name": "Web Server",
"type": "node",
"request": "launch",
"cwd": "${workspaceRoot}/web",
"runtimeExecutable": "npm",
"envFile": "${workspaceFolder}/.vscode/.env",
"runtimeArgs": ["run", "dev"],
"presentation": {
"group": "2"
},
"console": "integratedTerminal",
"consoleTitle": "Web Server Console"
},
{
"name": "Model Server",
"consoleName": "Model Server",
"type": "debugpy",
"request": "launch",
"module": "uvicorn",
"cwd": "${workspaceFolder}/backend",
"envFile": "${workspaceFolder}/.vscode/.env",
"env": {
"LOG_LEVEL": "DEBUG",
"PYTHONUNBUFFERED": "1"
},
"args": ["model_server.main:app", "--reload", "--port", "9000"],
"presentation": {
"group": "2"
},
"consoleTitle": "Model Server Console"
},
{
"name": "API Server",
"consoleName": "API Server",
"type": "debugpy",
"request": "launch",
"module": "uvicorn",
"cwd": "${workspaceFolder}/backend",
"envFile": "${workspaceFolder}/.vscode/.env",
"env": {
"LOG_DANSWER_MODEL_INTERACTIONS": "True",
"LOG_LEVEL": "DEBUG",
"PYTHONUNBUFFERED": "1"
},
"args": ["onyx.main:app", "--reload", "--port", "8080"],
"presentation": {
"group": "2"
},
"consoleTitle": "API Server Console"
},
// For the listener to access the Slack API,
// DANSWER_BOT_SLACK_APP_TOKEN & DANSWER_BOT_SLACK_BOT_TOKEN need to be set in .env file located in the root of the project
{
"name": "Slack Bot",
"consoleName": "Slack Bot",
"type": "debugpy",
"request": "launch",
"program": "onyx/onyxbot/slack/listener.py",
"cwd": "${workspaceFolder}/backend",
"envFile": "${workspaceFolder}/.vscode/.env",
"env": {
"LOG_LEVEL": "DEBUG",
"PYTHONUNBUFFERED": "1",
"PYTHONPATH": "."
},
"presentation": {
"group": "2"
},
"consoleTitle": "Slack Bot Console"
},
{
"name": "Celery primary",
"type": "debugpy",
"request": "launch",
"module": "celery",
"cwd": "${workspaceFolder}/backend",
"envFile": "${workspaceFolder}/.vscode/.env",
"env": {
"LOG_LEVEL": "INFO",
"PYTHONUNBUFFERED": "1",
"PYTHONPATH": "."
},
"args": [
"-A",
"onyx.background.celery.versioned_apps.primary",
"worker",
"--pool=threads",
"--concurrency=4",
"--prefetch-multiplier=1",
"--loglevel=INFO",
"--hostname=primary@%n",
"-Q",
"celery"
],
"presentation": {
"group": "2"
},
"consoleTitle": "Celery primary Console"
},
{
"name": "Celery light",
"type": "debugpy",
"request": "launch",
"module": "celery",
"cwd": "${workspaceFolder}/backend",
"envFile": "${workspaceFolder}/.vscode/.env",
"env": {
"LOG_LEVEL": "INFO",
"PYTHONUNBUFFERED": "1",
"PYTHONPATH": "."
},
"args": [
"-A",
"onyx.background.celery.versioned_apps.light",
"worker",
"--pool=threads",
"--concurrency=64",
"--prefetch-multiplier=8",
"--loglevel=INFO",
"--hostname=light@%n",
"-Q",
"vespa_metadata_sync,connector_deletion,doc_permissions_upsert,index_attempt_cleanup"
],
"presentation": {
"group": "2"
},
"consoleTitle": "Celery light Console"
},
{
"name": "Celery heavy",
"type": "debugpy",
"request": "launch",
"module": "celery",
"cwd": "${workspaceFolder}/backend",
"envFile": "${workspaceFolder}/.vscode/.env",
"env": {
"LOG_LEVEL": "INFO",
"PYTHONUNBUFFERED": "1",
"PYTHONPATH": "."
},
"args": [
"-A",
"onyx.background.celery.versioned_apps.heavy",
"worker",
"--pool=threads",
"--concurrency=4",
"--prefetch-multiplier=1",
"--loglevel=INFO",
"--hostname=heavy@%n",
"-Q",
"connector_pruning,connector_doc_permissions_sync,connector_external_group_sync"
],
"presentation": {
"group": "2"
},
"consoleTitle": "Celery heavy Console"
},
{
"name": "Celery docfetching",
"type": "debugpy",
"request": "launch",
"module": "celery",
"cwd": "${workspaceFolder}/backend",
"envFile": "${workspaceFolder}/.vscode/.env",
"env": {
"LOG_LEVEL": "DEBUG",
"PYTHONUNBUFFERED": "1",
"PYTHONPATH": "."
},
"args": [
"-A",
"onyx.background.celery.versioned_apps.monitoring",
"onyx.background.celery.versioned_apps.docfetching",
"worker",
"--pool=solo",
"--pool=threads",
"--concurrency=1",
"--prefetch-multiplier=1",
"--loglevel=INFO",
"--hostname=monitoring@%n",
"--hostname=docfetching@%n",
"-Q",
"monitoring"
],
"presentation": {
"connector_doc_fetching,user_files_indexing"
],
"presentation": {
"group": "2"
},
"consoleTitle": "Celery monitoring Console"
},
{
"name": "Celery beat",
"type": "debugpy",
"request": "launch",
"module": "celery",
"cwd": "${workspaceFolder}/backend",
"envFile": "${workspaceFolder}/.vscode/.env",
"env": {
"consoleTitle": "Celery docfetching Console",
"justMyCode": false
},
{
"name": "Celery docprocessing",
"type": "debugpy",
"request": "launch",
"module": "celery",
"cwd": "${workspaceFolder}/backend",
"envFile": "${workspaceFolder}/.vscode/.env",
"env": {
"ENABLE_MULTIPASS_INDEXING": "false",
"LOG_LEVEL": "DEBUG",
"PYTHONUNBUFFERED": "1",
"PYTHONPATH": "."
},
"args": [
},
"args": [
"-A",
"onyx.background.celery.versioned_apps.beat",
"beat",
"--loglevel=INFO"
],
"presentation": {
"onyx.background.celery.versioned_apps.docprocessing",
"worker",
"--pool=threads",
"--concurrency=6",
"--prefetch-multiplier=1",
"--loglevel=INFO",
"--hostname=docprocessing@%n",
"-Q",
"docprocessing"
],
"presentation": {
"group": "2"
},
"consoleTitle": "Celery beat Console"
},
{
"name": "Pytest",
"consoleName": "Pytest",
"type": "debugpy",
"request": "launch",
"module": "pytest",
"cwd": "${workspaceFolder}/backend",
"envFile": "${workspaceFolder}/.vscode/.env",
"env": {
"LOG_LEVEL": "DEBUG",
"PYTHONUNBUFFERED": "1",
"PYTHONPATH": "."
},
"args": [
"-v"
// Specify a sepcific module/test to run or provide nothing to run all tests
//"tests/unit/onyx/llm/answering/test_prune_and_merge.py"
],
"presentation": {
"group": "2"
},
"consoleTitle": "Pytest Console"
},
{
// Dummy entry used to label the group
"name": "--- Tasks ---",
"type": "node",
"request": "launch",
"presentation": {
"group": "3",
"order": 0
}
},
{
"name": "Clear and Restart External Volumes and Containers",
"type": "node",
"request": "launch",
"runtimeExecutable": "bash",
"runtimeArgs": [
"${workspaceFolder}/backend/scripts/restart_containers.sh"
],
"cwd": "${workspaceFolder}",
"console": "integratedTerminal",
"stopOnEntry": true,
"presentation": {
"group": "3"
}
},
{
// Celery jobs launched through a single background script (legacy)
// Recommend using the "Celery (all)" compound launch instead.
"name": "Background Jobs",
"consoleName": "Background Jobs",
"type": "debugpy",
"request": "launch",
"program": "scripts/dev_run_background_jobs.py",
"cwd": "${workspaceFolder}/backend",
"envFile": "${workspaceFolder}/.vscode/.env",
"env": {
"LOG_DANSWER_MODEL_INTERACTIONS": "True",
"LOG_LEVEL": "DEBUG",
"PYTHONUNBUFFERED": "1",
"PYTHONPATH": "."
}
},
{
"name": "Install Python Requirements",
"type": "node",
"request": "launch",
"runtimeExecutable": "bash",
"runtimeArgs": [
"-c",
"pip install -r backend/requirements/default.txt && pip install -r backend/requirements/dev.txt && pip install -r backend/requirements/ee.txt && pip install -r backend/requirements/model_server.txt"
],
"cwd": "${workspaceFolder}",
"console": "integratedTerminal",
"presentation": {
"group": "3"
}
},
"consoleTitle": "Celery docprocessing Console",
"justMyCode": false
},
{
// script to generate the openapi schema
"name": "Onyx OpenAPI Schema Generator",
"name": "Celery monitoring",
"type": "debugpy",
"request": "launch",
"program": "scripts/onyx_openapi_schema.py",
"module": "celery",
"cwd": "${workspaceFolder}/backend",
"envFile": "${workspaceFolder}/.env",
"envFile": "${workspaceFolder}/.vscode/.env",
"env": {},
"args": [
"-A",
"onyx.background.celery.versioned_apps.monitoring",
"worker",
"--pool=solo",
"--concurrency=1",
"--prefetch-multiplier=1",
"--loglevel=INFO",
"--hostname=monitoring@%n",
"-Q",
"monitoring"
],
"presentation": {
"group": "2"
},
"consoleTitle": "Celery monitoring Console"
},
{
"name": "Celery beat",
"type": "debugpy",
"request": "launch",
"module": "celery",
"cwd": "${workspaceFolder}/backend",
"envFile": "${workspaceFolder}/.vscode/.env",
"env": {
"LOG_LEVEL": "DEBUG",
"PYTHONUNBUFFERED": "1",
"PYTHONPATH": "."
},
"args": [
"--filename",
"generated/openapi.json"
]
"-A",
"onyx.background.celery.versioned_apps.beat",
"beat",
"--loglevel=INFO"
],
"presentation": {
"group": "2"
},
"consoleTitle": "Celery beat Console"
},
{
// script to debug multi tenant db issues
"name": "Onyx DB Manager (Top Chunks)",
"name": "Celery user file processing",
"type": "debugpy",
"request": "launch",
"program": "scripts/debugging/onyx_db.py",
"module": "celery",
"args": [
"-A",
"onyx.background.celery.versioned_apps.user_file_processing",
"worker",
"--loglevel=INFO",
"--hostname=user_file_processing@%n",
"--pool=threads",
"-Q",
"user_file_processing"
],
"cwd": "${workspaceFolder}/backend",
"envFile": "${workspaceFolder}/.env",
"envFile": "${workspaceFolder}/.vscode/.env",
"env": {
"LOG_LEVEL": "DEBUG",
"PYTHONUNBUFFERED": "1",
"PYTHONPATH": "."
},
"presentation": {
"group": "2"
},
"consoleTitle": "Celery user file processing Console"
},
{
"name": "Pytest",
"consoleName": "Pytest",
"type": "debugpy",
"request": "launch",
"module": "pytest",
"cwd": "${workspaceFolder}/backend",
"envFile": "${workspaceFolder}/.vscode/.env",
"env": {
"LOG_LEVEL": "DEBUG",
"PYTHONUNBUFFERED": "1",
"PYTHONPATH": "."
},
"args": [
"--password",
"your_password_here",
"--port",
"5433",
"--report",
"top-chunks",
"--filename",
"generated/tenants_by_num_docs.csv"
]
"-v"
// Specify a sepcific module/test to run or provide nothing to run all tests
//"tests/unit/onyx/llm/answering/test_prune_and_merge.py"
],
"presentation": {
"group": "2"
},
"consoleTitle": "Pytest Console"
},
{
"name": "Debug React Web App in Chrome",
"type": "chrome",
"request": "launch",
"url": "http://localhost:3000",
"webRoot": "${workspaceFolder}/web"
{
// Dummy entry used to label the group
"name": "--- Tasks ---",
"type": "node",
"request": "launch",
"presentation": {
"group": "3",
"order": 0
}
},
{
"name": "Clear and Restart External Volumes and Containers",
"type": "node",
"request": "launch",
"runtimeExecutable": "bash",
"runtimeArgs": [
"${workspaceFolder}/backend/scripts/restart_containers.sh"
],
"cwd": "${workspaceFolder}",
"console": "integratedTerminal",
"stopOnEntry": true,
"presentation": {
"group": "3"
}
},
{
// Celery jobs launched through a single background script (legacy)
// Recommend using the "Celery (all)" compound launch instead.
"name": "Background Jobs",
"consoleName": "Background Jobs",
"type": "debugpy",
"request": "launch",
"program": "scripts/dev_run_background_jobs.py",
"cwd": "${workspaceFolder}/backend",
"envFile": "${workspaceFolder}/.vscode/.env",
"env": {
"LOG_DANSWER_MODEL_INTERACTIONS": "True",
"LOG_LEVEL": "DEBUG",
"PYTHONUNBUFFERED": "1",
"PYTHONPATH": "."
}
},
{
"name": "Install Python Requirements",
"type": "node",
"request": "launch",
"runtimeExecutable": "bash",
"runtimeArgs": [
"-c",
"pip install -r backend/requirements/default.txt && pip install -r backend/requirements/dev.txt && pip install -r backend/requirements/ee.txt && pip install -r backend/requirements/model_server.txt"
],
"cwd": "${workspaceFolder}",
"console": "integratedTerminal",
"presentation": {
"group": "3"
}
},
{
// script to generate the openapi schema
"name": "Onyx OpenAPI Schema Generator",
"type": "debugpy",
"request": "launch",
"program": "scripts/onyx_openapi_schema.py",
"cwd": "${workspaceFolder}/backend",
"envFile": "${workspaceFolder}/.env",
"env": {
"PYTHONUNBUFFERED": "1",
"PYTHONPATH": "."
},
"args": [
"--filename",
"generated/openapi.json"
]
}
},
{
// script to debug multi tenant db issues
"name": "Onyx DB Manager (Top Chunks)",
"type": "debugpy",
"request": "launch",
"program": "scripts/debugging/onyx_db.py",
"cwd": "${workspaceFolder}/backend",
"envFile": "${workspaceFolder}/.env",
"env": {
"PYTHONUNBUFFERED": "1",
"PYTHONPATH": "."
},
"args": [
"--password",
"your_password_here",
"--port",
"5433",
"--report",
"top-chunks",
"--filename",
"generated/tenants_by_num_docs.csv"
]
},
{
"name": "Debug React Web App in Chrome",
"type": "chrome",
"request": "launch",
"url": "http://localhost:3000",
"webRoot": "${workspaceFolder}/web"
}
]
}

View File

@@ -23,12 +23,27 @@ RUN mkdir -p /app && \
chmod 755 /var/log/onyx && \
chown onyx:onyx /var/log/onyx
# Install build tools needed for compiling Rust packages like fastuuid
RUN apt-get update && apt-get install -y \
build-essential \
curl \
&& rm -rf /var/lib/apt/lists/*
# Install Rust (needed for fastuuid compilation)
RUN curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs | sh -s -- -y
ENV PATH="/root/.cargo/bin:${PATH}"
COPY ./requirements/model_server.txt /tmp/requirements.txt
RUN pip install --no-cache-dir --upgrade \
--retries 5 \
--timeout 30 \
-r /tmp/requirements.txt
# Clean up build tools to reduce image size
RUN apt-get remove -y build-essential curl && \
apt-get autoremove -y && \
rm -rf /root/.cargo /root/.rustup
RUN apt-get remove -y --allow-remove-essential perl-base && \
apt-get autoremove -y

View File

@@ -0,0 +1,380 @@
"""add project__userfile table and userfile column changes
Revision ID: 085d844e3953
Revises: 8818cf73fa1a
Create Date: 2025-09-05 14:24:50.026940
"""
from alembic import op
import sqlalchemy as sa
from sqlalchemy.dialects import postgresql as psql
# revision identifiers, used by Alembic.
revision = "085d844e3953"
down_revision = "8818cf73fa1a"
branch_labels = None
depends_on = None
def upgrade() -> None:
# 0) Ensure UUID generator exists
op.execute("CREATE EXTENSION IF NOT EXISTS pgcrypto")
# Drop persona__user_folder table
try:
op.drop_table("persona__user_folder")
except Exception:
# Table might not exist, that's okay
pass
# Drop folder related tables and columns
# First try to drop the foreign key constraint if it exists
try:
# TODO(subash): do proper deletion on constraints
op.drop_constraint(
"chat_session_folder_id_fkey", "chat_session", type_="foreignkey"
)
except Exception:
# Constraint might not exist, that's okay
pass
# Then drop the folder_id column if it exists
try:
op.drop_column("chat_session", "folder_id")
except Exception:
# Column might not exist, that's okay
pass
# Finally drop the chat_folder table if it exists
try:
op.drop_table("chat_folder")
except Exception:
# Table might not exist, that's okay
pass
# 1) Add transitional UUID column on user_file + UNIQUE so FKs can reference it
op.add_column(
"user_file",
sa.Column(
"new_id",
psql.UUID(as_uuid=True),
nullable=False,
server_default=sa.text("gen_random_uuid()"),
),
)
op.create_unique_constraint("uq_user_file_new_id", "user_file", ["new_id"])
# 2) Move FK users to the transitional UUID
# ---- persona__user_file.user_file_id (INT) -> UUID ----
op.add_column(
"persona__user_file",
sa.Column("user_file_id_uuid", psql.UUID(as_uuid=True), nullable=True),
)
op.execute(
"""
UPDATE persona__user_file p
SET user_file_id_uuid = uf.new_id
FROM user_file uf
WHERE p.user_file_id = uf.id
"""
)
# swap FK to reference user_file.new_id (the transitional UNIQUE)
op.drop_constraint(
"persona__user_file_user_file_id_fkey",
"persona__user_file",
type_="foreignkey",
)
op.alter_column("persona__user_file", "user_file_id_uuid", nullable=False)
op.create_foreign_key(
"persona__user_file_user_file_id_fkey",
"persona__user_file",
"user_file",
local_cols=["user_file_id_uuid"],
remote_cols=["new_id"],
)
op.drop_column("persona__user_file", "user_file_id")
op.alter_column(
"persona__user_file",
"user_file_id_uuid",
new_column_name="user_file_id",
existing_type=psql.UUID(as_uuid=True),
nullable=False,
)
# ---- end persona__user_file ----
# (Repeat 2) for any other FK tables that point to user_file.id)
# 3) Swap PK on user_file from int -> uuid
op.drop_constraint("user_file_pkey", "user_file", type_="primary")
op.drop_column("user_file", "id")
op.alter_column(
"user_file",
"new_id",
new_column_name="id",
existing_type=psql.UUID(as_uuid=True),
nullable=False,
)
op.create_primary_key("user_file_pkey", "user_file", ["id"])
# 4) Now **force** FKs to bind to the PK:
# (a) drop FK(s)
op.drop_constraint(
"persona__user_file_user_file_id_fkey",
"persona__user_file",
type_="foreignkey",
)
# (b) drop the transitional UNIQUE so it cannot be chosen
op.drop_constraint("uq_user_file_new_id", "user_file", type_="unique")
# (c) recreate FK(s) to user_file(id) — only PK remains, so it will bind there
op.create_foreign_key(
"persona__user_file_user_file_id_fkey",
"persona__user_file",
"user_file",
local_cols=["user_file_id"],
remote_cols=["id"],
)
# 5) Rename user_folder -> user_project and update dependent FKs/columns
try:
op.rename_table("user_folder", "user_project")
except Exception:
# Table might already be renamed
pass
# Drop user_file.folder_id if it exists (we don't keep one-to-many link)
try:
op.drop_column("user_file", "folder_id")
except Exception:
pass
# 6) Safe to create new tables referencing the UUID PK
op.create_table(
"project__user_file",
sa.Column("project_id", sa.Integer(), nullable=False),
sa.Column("user_file_id", psql.UUID(as_uuid=True), nullable=False),
sa.ForeignKeyConstraint(["project_id"], ["user_project.id"]),
sa.ForeignKeyConstraint(["user_file_id"], ["user_file.id"]),
sa.PrimaryKeyConstraint("project_id", "user_file_id"),
)
# 6) Remove CCPair relationship
# Drop the foreign key constraint first
op.drop_constraint(
"user_file_cc_pair_id_fkey",
"user_file",
type_="foreignkey",
)
# Drop the unique constraint
op.drop_constraint(
"user_file_cc_pair_id_key",
"user_file",
type_="unique",
)
# Drop the column
op.drop_column("user_file", "cc_pair_id")
# 7) Add extra columns
op.add_column(
"user_file",
sa.Column(
"status",
sa.Enum(
"processing",
"completed",
"failed",
"canceled",
name="userfilestatus",
native_enum=False,
),
nullable=False,
server_default="processing",
),
)
op.add_column("user_file", sa.Column("chunk_count", sa.Integer(), nullable=True))
# Drop deprecated document_id column if present
try:
op.drop_column("user_file", "document_id")
except Exception:
pass
op.add_column(
"user_file",
sa.Column("last_accessed_at", sa.DateTime(timezone=True), nullable=True),
)
op.add_column(
"user_project",
sa.Column("prompt_id", sa.Integer(), nullable=True),
)
op.create_foreign_key(
"user_project_prompt_id_fkey",
"user_project",
"prompt",
["prompt_id"],
["id"],
)
op.add_column(
"chat_session",
sa.Column("project_id", sa.Integer(), nullable=True),
)
op.create_foreign_key(
"chat_session_project_id_fkey",
"chat_session",
"user_project",
["project_id"],
["id"],
)
# Add index on project_id for better query performance
op.create_index(
"ix_chat_session_project_id",
"chat_session",
["project_id"],
)
def downgrade() -> None:
# Recreate persona__user_folder table
op.create_table(
"persona__user_folder",
sa.Column("persona_id", sa.Integer(), nullable=False),
sa.Column("user_folder_id", sa.Integer(), nullable=False),
sa.ForeignKeyConstraint(["persona_id"], ["persona.id"]),
sa.ForeignKeyConstraint(["user_folder_id"], ["user_folder.id"]),
sa.PrimaryKeyConstraint("persona_id", "user_folder_id"),
)
# Recreate folder related tables and columns
# First create the chat_folder table
op.create_table(
"chat_folder",
sa.Column("id", sa.Integer(), primary_key=True),
sa.Column("user_id", psql.UUID(as_uuid=True), nullable=True),
sa.Column("name", sa.String(), nullable=True),
sa.Column("display_priority", sa.Integer(), nullable=True, default=0),
)
# Add foreign key for user_id after table creation
op.create_foreign_key(
"chat_folder_user_id_fkey",
"chat_folder",
"user",
["user_id"],
["id"],
)
# Add folder_id column to chat_session
op.add_column(
"chat_session",
sa.Column("folder_id", sa.Integer(), nullable=True),
)
# Create foreign key constraint after both tables exist
op.create_foreign_key(
"chat_session_folder_id_fkey",
"chat_session",
"chat_folder",
["folder_id"],
["id"],
)
# Drop extra columns
op.drop_column("user_file", "last_accessed_at")
# Recreate document_id on downgrade
try:
op.add_column(
"user_file", sa.Column("document_id", sa.String(), nullable=False)
)
except Exception:
pass
op.drop_column("user_file", "chunk_count")
op.drop_column("user_file", "status")
op.execute("DROP TYPE IF EXISTS userfilestatus")
# Drop association table
op.drop_table("project__user_file")
# Drop index before dropping the column
op.drop_index("ix_chat_session_project_id", table_name="chat_session")
op.drop_column("chat_session", "project_id")
# Recreate an integer PK (best-effort; original values arent retained)
op.drop_constraint(
"persona__user_file_user_file_id_fkey", "persona__user_file", type_="foreignkey"
)
op.drop_constraint("user_file_pkey", "user_file", type_="primary")
op.add_column(
"user_file",
sa.Column("id_int_tmp", sa.Integer(), autoincrement=True, nullable=False),
)
op.execute(
"CREATE SEQUENCE IF NOT EXISTS user_file_id_seq OWNED BY user_file.id_int_tmp"
)
op.execute(
"ALTER TABLE user_file ALTER COLUMN id_int_tmp SET DEFAULT nextval('user_file_id_seq')"
)
op.create_primary_key("user_file_pkey", "user_file", ["id_int_tmp"])
op.add_column(
"persona__user_file",
sa.Column("user_file_id_int_tmp", sa.Integer(), nullable=True),
)
op.create_foreign_key(
"persona__user_file_user_file_id_fkey",
"persona__user_file",
"user_file",
["user_file_id_int_tmp"],
["id_int_tmp"],
)
# Remove UUID id and rename int back to id
op.drop_column("user_file", "id")
op.alter_column(
"user_file",
"id_int_tmp",
new_column_name="id",
existing_type=sa.Integer(),
nullable=False,
)
op.drop_column("persona__user_file", "user_file_id")
op.alter_column(
"persona__user_file",
"user_file_id_int_tmp",
new_column_name="user_file_id",
existing_type=sa.Integer(),
)
# Restore CCPair relationship
op.add_column(
"user_file",
sa.Column("cc_pair_id", sa.Integer(), nullable=True),
)
op.create_unique_constraint(
"user_file_cc_pair_id_key",
"user_file",
["cc_pair_id"],
)
op.create_foreign_key(
"user_file_cc_pair_id_fkey",
"user_file",
"connector_credential_pair",
["cc_pair_id"],
["id"],
)
# Rename user_project back to user_folder and revert related changes
try:
op.drop_constraint(
"user_project_prompt_id_fkey", "user_project", type_="foreignkey"
)
except Exception:
pass
try:
op.drop_column("user_project", "prompt_id")
except Exception:
pass
# Recreate user_file.folder_id (nullable) since we dropped it on upgrade
try:
op.add_column("user_file", sa.Column("folder_id", sa.Integer(), nullable=True))
except Exception:
pass
try:
op.rename_table("user_project", "user_folder")
except Exception:
pass

View File

@@ -182,7 +182,6 @@ def admin_get_chat_sessions(
time_created=chat.time_created.isoformat(),
time_updated=chat.time_updated.isoformat(),
shared_status=chat.shared_status,
folder_id=chat.folder_id,
current_alternate_model=chat.current_alternate_model,
)
for chat in chat_sessions

View File

@@ -10,6 +10,7 @@ from onyx.configs.constants import PUBLIC_DOC_PAT
from onyx.db.document import get_access_info_for_document
from onyx.db.document import get_access_info_for_documents
from onyx.db.models import User
from onyx.db.models import UserFile
from onyx.utils.variable_functionality import fetch_ee_implementation_or_noop
from onyx.utils.variable_functionality import fetch_versioned_implementation
@@ -124,3 +125,20 @@ def source_should_fetch_permissions_during_indexing(source: DocumentSource) -> b
),
)
return _source_should_fetch_permissions_during_indexing_func(source)
def get_access_for_user_files(
user_file_ids: list[str],
db_session: Session,
) -> dict[str, DocumentAccess]:
user_files = db_session.query(UserFile).filter(UserFile.id.in_(user_file_ids)).all()
return {
str(user_file.id): DocumentAccess.build(
user_emails=[user_file.user.email],
user_groups=[],
is_public=False,
external_user_emails=[],
external_user_group_ids=[],
)
for user_file in user_files
}

View File

@@ -50,6 +50,7 @@ from onyx.kg.utils.extraction_utils import get_relationship_types_str
from onyx.llm.utils import check_number_of_tokens
from onyx.llm.utils import get_max_input_tokens
from onyx.natural_language_processing.utils import get_tokenizer
from onyx.prompts.chat_prompts import PROJECT_INSTRUCTIONS_SEPARATOR
from onyx.prompts.dr_prompts import ANSWER_PROMPT_WO_TOOL_CALLING
from onyx.prompts.dr_prompts import DECISION_PROMPT_W_TOOL_CALLING
from onyx.prompts.dr_prompts import DECISION_PROMPT_WO_TOOL_CALLING
@@ -416,6 +417,13 @@ def clarifier(
assistant_system_prompt = DEFAULT_DR_SYSTEM_PROMPT + "\n\n"
assistant_task_prompt = ""
if graph_config.inputs.project_instructions:
assistant_system_prompt = (
assistant_system_prompt
+ PROJECT_INSTRUCTIONS_SEPARATOR
+ graph_config.inputs.project_instructions
)
chat_history_string = (
get_chat_history_string(
graph_config.inputs.prompt_builder.message_history,

View File

@@ -41,6 +41,7 @@ from onyx.db.models import ResearchAgentIteration
from onyx.db.models import ResearchAgentIterationSubStep
from onyx.db.models import SearchDoc as DbSearchDoc
from onyx.llm.utils import check_number_of_tokens
from onyx.prompts.chat_prompts import PROJECT_INSTRUCTIONS_SEPARATOR
from onyx.prompts.dr_prompts import FINAL_ANSWER_PROMPT_W_SUB_ANSWERS
from onyx.prompts.dr_prompts import FINAL_ANSWER_PROMPT_WITHOUT_SUB_ANSWERS
from onyx.prompts.dr_prompts import TEST_INFO_COMPLETE_PROMPT
@@ -346,6 +347,13 @@ def closer(
uploaded_context=uploaded_context,
)
if graph_config.inputs.project_instructions:
assistant_system_prompt = (
assistant_system_prompt
+ PROJECT_INSTRUCTIONS_SEPARATOR
+ graph_config.inputs.project_instructions
)
all_context_llmdocs = [
llm_doc_from_inference_section(inference_section)
for inference_section in all_cited_documents

View File

@@ -1,6 +1,7 @@
import re
from datetime import datetime
from typing import cast
from uuid import UUID
from langchain_core.runnables import RunnableConfig
from langgraph.types import StreamWriter
@@ -71,6 +72,7 @@ def basic_search(
search_tool_info = state.available_tools[state.tools_used[-1]]
search_tool = cast(SearchTool, search_tool_info.tool_object)
force_use_tool = graph_config.tooling.force_use_tool
# sanity check
if search_tool != graph_config.tooling.search_tool:
@@ -139,6 +141,15 @@ def basic_search(
retrieved_docs: list[InferenceSection] = []
callback_container: list[list[InferenceSection]] = []
user_file_ids: list[UUID] | None = None
project_id: int | None = None
if force_use_tool.override_kwargs and isinstance(
force_use_tool.override_kwargs, SearchToolOverrideKwargs
):
override_kwargs = force_use_tool.override_kwargs
user_file_ids = override_kwargs.user_file_ids
project_id = override_kwargs.project_id
# new db session to avoid concurrency issues
with get_session_with_current_tenant() as search_db_session:
for tool_response in search_tool.run(
@@ -150,6 +161,8 @@ def basic_search(
alternate_db_session=search_db_session,
retrieved_sections_callback=callback_container.append,
skip_query_analysis=True,
user_file_ids=user_file_ids,
project_id=project_id,
),
):
# get retrieved docs to send to the rest of the graph

View File

@@ -24,6 +24,7 @@ class GraphInputs(BaseModel):
prompt_builder: AnswerPromptBuilder
files: list[InMemoryChatFile] | None = None
structured_response_format: dict | None = None
project_instructions: str | None = None
class Config:
arbitrary_types_allowed = True

View File

@@ -115,7 +115,6 @@ celery_app.autodiscover_tasks(
"onyx.background.celery.tasks.vespa",
"onyx.background.celery.tasks.connector_deletion",
"onyx.background.celery.tasks.doc_permission_syncing",
"onyx.background.celery.tasks.user_file_folder_sync",
"onyx.background.celery.tasks.docprocessing",
]
)

View File

@@ -321,7 +321,6 @@ celery_app.autodiscover_tasks(
"onyx.background.celery.tasks.shared",
"onyx.background.celery.tasks.vespa",
"onyx.background.celery.tasks.llm_model_update",
"onyx.background.celery.tasks.user_file_folder_sync",
"onyx.background.celery.tasks.kg_processing",
]
)

View File

@@ -0,0 +1,113 @@
from typing import Any
from typing import cast
from celery import Celery
from celery import signals
from celery import Task
from celery.apps.worker import Worker
from celery.signals import celeryd_init
from celery.signals import worker_init
from celery.signals import worker_process_init
from celery.signals import worker_ready
from celery.signals import worker_shutdown
import onyx.background.celery.apps.app_base as app_base
from onyx.configs.constants import POSTGRES_CELERY_WORKER_USER_FILE_PROCESSING_APP_NAME
from onyx.db.engine.sql_engine import SqlEngine
from onyx.utils.logger import setup_logger
from shared_configs.configs import MULTI_TENANT
logger = setup_logger()
celery_app = Celery(__name__)
celery_app.config_from_object("onyx.background.celery.configs.user_file_processing")
celery_app.Task = app_base.TenantAwareTask # type: ignore [misc]
@signals.task_prerun.connect
def on_task_prerun(
sender: Any | None = None,
task_id: str | None = None,
task: Task | None = None,
args: tuple | None = None,
kwargs: dict | None = None,
**kwds: Any,
) -> None:
app_base.on_task_prerun(sender, task_id, task, args, kwargs, **kwds)
@signals.task_postrun.connect
def on_task_postrun(
sender: Any | None = None,
task_id: str | None = None,
task: Task | None = None,
args: tuple | None = None,
kwargs: dict | None = None,
retval: Any | None = None,
state: str | None = None,
**kwds: Any,
) -> None:
app_base.on_task_postrun(sender, task_id, task, args, kwargs, retval, state, **kwds)
@celeryd_init.connect
def on_celeryd_init(sender: str, conf: Any = None, **kwargs: Any) -> None:
app_base.on_celeryd_init(sender, conf, **kwargs)
@worker_init.connect
def on_worker_init(sender: Worker, **kwargs: Any) -> None:
logger.info("worker_init signal received.")
SqlEngine.set_app_name(POSTGRES_CELERY_WORKER_USER_FILE_PROCESSING_APP_NAME)
# rkuo: Transient errors keep happening in the indexing watchdog threads.
# "SSL connection has been closed unexpectedly"
# actually setting the spawn method in the cloud fixes 95% of these.
# setting pre ping might help even more, but not worrying about that yet
pool_size = cast(int, sender.concurrency) # type: ignore
SqlEngine.init_engine(pool_size=pool_size, max_overflow=8)
app_base.wait_for_redis(sender, **kwargs)
app_base.wait_for_db(sender, **kwargs)
app_base.wait_for_vespa_or_shutdown(sender, **kwargs)
# Less startup checks in multi-tenant case
if MULTI_TENANT:
return
app_base.on_secondary_worker_init(sender, **kwargs)
@worker_ready.connect
def on_worker_ready(sender: Any, **kwargs: Any) -> None:
app_base.on_worker_ready(sender, **kwargs)
@worker_shutdown.connect
def on_worker_shutdown(sender: Any, **kwargs: Any) -> None:
app_base.on_worker_shutdown(sender, **kwargs)
@worker_process_init.connect
def init_worker(**kwargs: Any) -> None:
SqlEngine.reset_engine()
@signals.setup_logging.connect
def on_setup_logging(
loglevel: Any, logfile: Any, format: Any, colorize: Any, **kwargs: Any
) -> None:
app_base.on_setup_logging(loglevel, logfile, format, colorize, **kwargs)
base_bootsteps = app_base.get_bootsteps()
for bootstep in base_bootsteps:
celery_app.steps["worker"].add(bootstep)
celery_app.autodiscover_tasks(
[
"onyx.background.celery.tasks.user_file_processing",
]
)

View File

@@ -0,0 +1,22 @@
import onyx.background.celery.configs.base as shared_config
from onyx.configs.app_configs import CELERY_WORKER_USER_FILE_PROCESSING_CONCURRENCY
broker_url = shared_config.broker_url
broker_connection_retry_on_startup = shared_config.broker_connection_retry_on_startup
broker_pool_limit = shared_config.broker_pool_limit
broker_transport_options = shared_config.broker_transport_options
redis_socket_keepalive = shared_config.redis_socket_keepalive
redis_retry_on_timeout = shared_config.redis_retry_on_timeout
redis_backend_health_check_interval = shared_config.redis_backend_health_check_interval
result_backend = shared_config.result_backend
result_expires = shared_config.result_expires # 86400 seconds is the default
task_default_priority = shared_config.task_default_priority
task_acks_late = shared_config.task_acks_late
# User file processing worker configuration
worker_concurrency = CELERY_WORKER_USER_FILE_PROCESSING_CONCURRENCY
worker_pool = "threads"
worker_prefetch_multiplier = 1

View File

@@ -89,17 +89,6 @@ beat_task_templates: list[dict] = [
"expires": BEAT_EXPIRES_DEFAULT,
},
},
{
"name": "check-for-user-file-folder-sync",
"task": OnyxCeleryTask.CHECK_FOR_USER_FILE_FOLDER_SYNC,
"schedule": timedelta(
days=1
), # This should essentially always be triggered manually for user folder updates.
"options": {
"priority": OnyxCeleryPriority.MEDIUM,
"expires": BEAT_EXPIRES_DEFAULT,
},
},
{
"name": "check-for-pruning",
"task": OnyxCeleryTask.CHECK_FOR_PRUNING,

View File

@@ -28,9 +28,6 @@ from onyx.db.connector_credential_pair import add_deletion_failure_message
from onyx.db.connector_credential_pair import (
delete_connector_credential_pair__no_commit,
)
from onyx.db.connector_credential_pair import (
delete_userfiles_for_cc_pair__no_commit,
)
from onyx.db.connector_credential_pair import get_connector_credential_pair_from_id
from onyx.db.connector_credential_pair import get_connector_credential_pairs
from onyx.db.document import (
@@ -484,12 +481,6 @@ def monitor_connector_deletion_taskset(
# related to the deleted DocumentByConnectorCredentialPair during commit
db_session.expire(cc_pair)
# delete all userfiles for the cc_pair
delete_userfiles_for_cc_pair__no_commit(
db_session=db_session,
cc_pair_id=cc_pair_id,
)
# finally, delete the cc-pair
delete_connector_credential_pair__no_commit(
db_session=db_session,

View File

@@ -85,6 +85,9 @@ from onyx.document_index.factory import get_default_document_index
from onyx.file_store.document_batch_storage import DocumentBatchStorage
from onyx.file_store.document_batch_storage import get_document_batch_storage
from onyx.httpx.httpx_pool import HttpxPool
from onyx.indexing.adapters.document_indexing_adapter import (
DocumentIndexingBatchAdapter,
)
from onyx.indexing.embedder import DefaultIndexingEmbedder
from onyx.indexing.indexing_pipeline import run_indexing_pipeline
from onyx.natural_language_processing.search_nlp_models import EmbeddingModel
@@ -1369,6 +1372,14 @@ def _docprocessing_task(
f"Processing {len(documents)} documents through indexing pipeline"
)
adapter = DocumentIndexingBatchAdapter(
db_session=db_session,
connector_id=index_attempt.connector_credential_pair.connector.id,
credential_id=index_attempt.connector_credential_pair.credential.id,
tenant_id=tenant_id,
index_attempt_metadata=index_attempt_metadata,
)
# real work happens here!
index_pipeline_result = run_indexing_pipeline(
embedder=embedding_model,
@@ -1378,7 +1389,8 @@ def _docprocessing_task(
db_session=db_session,
tenant_id=tenant_id,
document_batch=documents,
index_attempt_metadata=index_attempt_metadata,
request_id=index_attempt_metadata.request_id,
adapter=adapter,
)
# Update batch completion and document counts atomically using database coordination

View File

@@ -1,266 +0,0 @@
import time
from typing import List
from celery import shared_task
from celery import Task
from celery.exceptions import SoftTimeLimitExceeded
from redis.lock import Lock as RedisLock
from sqlalchemy.orm import Session
from tenacity import RetryError
from onyx.background.celery.apps.app_base import task_logger
from onyx.background.celery.tasks.shared.RetryDocumentIndex import RetryDocumentIndex
from onyx.background.celery.tasks.shared.tasks import LIGHT_SOFT_TIME_LIMIT
from onyx.background.celery.tasks.shared.tasks import LIGHT_TIME_LIMIT
from onyx.background.celery.tasks.shared.tasks import OnyxCeleryTaskCompletionStatus
from onyx.configs.app_configs import JOB_TIMEOUT
from onyx.configs.constants import CELERY_USER_FILE_FOLDER_SYNC_BEAT_LOCK_TIMEOUT
from onyx.configs.constants import OnyxCeleryTask
from onyx.configs.constants import OnyxRedisLocks
from onyx.db.connector_credential_pair import (
get_connector_credential_pairs_with_user_files,
)
from onyx.db.document import get_document
from onyx.db.engine.sql_engine import get_session_with_current_tenant
from onyx.db.models import ConnectorCredentialPair
from onyx.db.models import Document
from onyx.db.models import DocumentByConnectorCredentialPair
from onyx.db.search_settings import get_active_search_settings
from onyx.db.user_documents import fetch_user_files_for_documents
from onyx.db.user_documents import fetch_user_folders_for_documents
from onyx.document_index.factory import get_default_document_index
from onyx.document_index.interfaces import VespaDocumentUserFields
from onyx.httpx.httpx_pool import HttpxPool
from onyx.redis.redis_pool import get_redis_client
from onyx.utils.logger import setup_logger
logger = setup_logger()
@shared_task(
name=OnyxCeleryTask.CHECK_FOR_USER_FILE_FOLDER_SYNC,
ignore_result=True,
soft_time_limit=JOB_TIMEOUT,
trail=False,
bind=True,
)
def check_for_user_file_folder_sync(self: Task, *, tenant_id: str) -> bool | None:
"""Runs periodically to check for documents that need user file folder metadata updates.
This task fetches all connector credential pairs with user files, gets the documents
associated with them, and updates the user file and folder metadata in Vespa.
"""
time_start = time.monotonic()
r = get_redis_client()
lock_beat: RedisLock = r.lock(
OnyxRedisLocks.CHECK_USER_FILE_FOLDER_SYNC_BEAT_LOCK,
timeout=CELERY_USER_FILE_FOLDER_SYNC_BEAT_LOCK_TIMEOUT,
)
# these tasks should never overlap
if not lock_beat.acquire(blocking=False):
return None
try:
with get_session_with_current_tenant() as db_session:
# Get all connector credential pairs that have user files
cc_pairs = get_connector_credential_pairs_with_user_files(db_session)
if not cc_pairs:
task_logger.info("No connector credential pairs with user files found")
return True
# Get all documents associated with these cc_pairs
document_ids = get_documents_for_cc_pairs(cc_pairs, db_session)
if not document_ids:
task_logger.info(
"No documents found for connector credential pairs with user files"
)
return True
# Fetch current user file and folder IDs for these documents
doc_id_to_user_file_id = fetch_user_files_for_documents(
document_ids=document_ids, db_session=db_session
)
doc_id_to_user_folder_id = fetch_user_folders_for_documents(
document_ids=document_ids, db_session=db_session
)
# Update Vespa metadata for each document
for doc_id in document_ids:
user_file_id = doc_id_to_user_file_id.get(doc_id)
user_folder_id = doc_id_to_user_folder_id.get(doc_id)
if user_file_id is not None or user_folder_id is not None:
# Schedule a task to update the document metadata
update_user_file_folder_metadata.apply_async(
args=(doc_id,), # Use tuple instead of list for args
kwargs={
"tenant_id": tenant_id,
"user_file_id": user_file_id,
"user_folder_id": user_folder_id,
},
queue="vespa_metadata_sync",
)
task_logger.info(
f"Scheduled metadata updates for {len(document_ids)} documents. "
f"Elapsed time: {time.monotonic() - time_start:.2f}s"
)
return True
except Exception as e:
task_logger.exception(f"Error in check_for_user_file_folder_sync: {e}")
return False
finally:
lock_beat.release()
def get_documents_for_cc_pairs(
cc_pairs: List[ConnectorCredentialPair], db_session: Session
) -> List[str]:
"""Get all document IDs associated with the given connector credential pairs."""
if not cc_pairs:
return []
cc_pair_ids = [cc_pair.id for cc_pair in cc_pairs]
# Query to get document IDs from DocumentByConnectorCredentialPair
# Note: DocumentByConnectorCredentialPair uses connector_id and credential_id, not cc_pair_id
doc_cc_pairs = (
db_session.query(Document.id)
.join(
DocumentByConnectorCredentialPair,
Document.id == DocumentByConnectorCredentialPair.id,
)
.filter(
db_session.query(ConnectorCredentialPair)
.filter(
ConnectorCredentialPair.id.in_(cc_pair_ids),
ConnectorCredentialPair.connector_id
== DocumentByConnectorCredentialPair.connector_id,
ConnectorCredentialPair.credential_id
== DocumentByConnectorCredentialPair.credential_id,
)
.exists()
)
.all()
)
return [doc_id for (doc_id,) in doc_cc_pairs]
@shared_task(
name=OnyxCeleryTask.UPDATE_USER_FILE_FOLDER_METADATA,
bind=True,
soft_time_limit=LIGHT_SOFT_TIME_LIMIT,
time_limit=LIGHT_TIME_LIMIT,
max_retries=3,
)
def update_user_file_folder_metadata(
self: Task,
document_id: str,
*,
tenant_id: str,
user_file_id: int | None,
user_folder_id: int | None,
) -> bool:
"""Updates the user file and folder metadata for a document in Vespa."""
start = time.monotonic()
completion_status = OnyxCeleryTaskCompletionStatus.UNDEFINED
try:
with get_session_with_current_tenant() as db_session:
active_search_settings = get_active_search_settings(db_session)
doc_index = get_default_document_index(
search_settings=active_search_settings.primary,
secondary_search_settings=active_search_settings.secondary,
httpx_client=HttpxPool.get("vespa"),
)
retry_index = RetryDocumentIndex(doc_index)
doc = get_document(document_id, db_session)
if not doc:
elapsed = time.monotonic() - start
task_logger.info(
f"doc={document_id} "
f"action=no_operation "
f"elapsed={elapsed:.2f}"
)
completion_status = OnyxCeleryTaskCompletionStatus.SKIPPED
return False
# Create user fields object with file and folder IDs
user_fields = VespaDocumentUserFields(
user_file_id=str(user_file_id) if user_file_id is not None else None,
user_folder_id=(
str(user_folder_id) if user_folder_id is not None else None
),
)
# Update Vespa. OK if doc doesn't exist. Raises exception otherwise.
chunks_affected = retry_index.update_single(
document_id,
tenant_id=tenant_id,
chunk_count=doc.chunk_count,
fields=None, # We're only updating user fields
user_fields=user_fields,
)
elapsed = time.monotonic() - start
task_logger.info(
f"doc={document_id} "
f"action=user_file_folder_sync "
f"user_file_id={user_file_id} "
f"user_folder_id={user_folder_id} "
f"chunks={chunks_affected} "
f"elapsed={elapsed:.2f}"
)
completion_status = OnyxCeleryTaskCompletionStatus.SUCCEEDED
return True
except SoftTimeLimitExceeded:
task_logger.info(f"SoftTimeLimitExceeded exception. doc={document_id}")
completion_status = OnyxCeleryTaskCompletionStatus.SOFT_TIME_LIMIT
except Exception as ex:
e: Exception | None = None
while True:
if isinstance(ex, RetryError):
task_logger.warning(
f"Tenacity retry failed: num_attempts={ex.last_attempt.attempt_number}"
)
# only set the inner exception if it is of type Exception
e_temp = ex.last_attempt.exception()
if isinstance(e_temp, Exception):
e = e_temp
else:
e = ex
task_logger.exception(
f"update_user_file_folder_metadata exceptioned: doc={document_id}"
)
completion_status = OnyxCeleryTaskCompletionStatus.RETRYABLE_EXCEPTION
if (
self.max_retries is not None
and self.request.retries >= self.max_retries
):
completion_status = (
OnyxCeleryTaskCompletionStatus.NON_RETRYABLE_EXCEPTION
)
# Exponential backoff from 2^4 to 2^6 ... i.e. 16, 32, 64
countdown = 2 ** (self.request.retries + 4)
self.retry(exc=e, countdown=countdown) # this will raise a celery exception
break # we won't hit this, but it looks weird not to have it
finally:
task_logger.info(
f"update_user_file_folder_metadata completed: status={completion_status.value} doc={document_id}"
)
return False

View File

@@ -0,0 +1,248 @@
import time
from celery import shared_task
from celery import Task
from redis.lock import Lock as RedisLock
from sqlalchemy import select
from onyx.background.celery.apps.app_base import task_logger
from onyx.background.celery.celery_utils import httpx_init_vespa_pool
from onyx.configs.app_configs import MANAGED_VESPA
from onyx.configs.app_configs import VESPA_CLOUD_CERT_PATH
from onyx.configs.app_configs import VESPA_CLOUD_KEY_PATH
from onyx.configs.constants import CELERY_GENERIC_BEAT_LOCK_TIMEOUT
from onyx.configs.constants import OnyxCeleryPriority
from onyx.configs.constants import OnyxCeleryQueues
from onyx.configs.constants import OnyxCeleryTask
from onyx.configs.constants import OnyxRedisLocks
from onyx.connectors.file.connector import LocalFileConnector
from onyx.connectors.models import Document
from onyx.db.engine.sql_engine import get_session_with_current_tenant
from onyx.db.enums import UserFileStatus
from onyx.db.models import UserFile
from onyx.db.search_settings import get_active_search_settings_list
from onyx.document_index.factory import get_default_document_index
from onyx.httpx.httpx_pool import HttpxPool
from onyx.indexing.adapters.user_file_indexing_adapter import UserFileIndexingAdapter
from onyx.indexing.embedder import DefaultIndexingEmbedder
from onyx.indexing.indexing_pipeline import run_indexing_pipeline
from onyx.natural_language_processing.search_nlp_models import (
InformationContentClassificationModel,
)
from onyx.redis.redis_pool import get_redis_client
from shared_configs.contextvars import CURRENT_TENANT_ID_CONTEXTVAR
def _user_file_lock_key(user_file_id: int) -> str:
return f"{OnyxRedisLocks.USER_FILE_PROCESSING_LOCK_PREFIX}:{user_file_id}"
@shared_task(
name=OnyxCeleryTask.CHECK_FOR_USER_FILE_PROCESSING,
soft_time_limit=300,
bind=True,
ignore_result=True,
)
def check_user_file_processing(self: Task, *, tenant_id: str) -> None:
"""Scan for user files with PROCESSING status and enqueue per-file tasks.
Uses direct Redis locks to avoid overlapping runs.
"""
task_logger.info("check_user_file_processing - Starting")
CURRENT_TENANT_ID_CONTEXTVAR.set(tenant_id)
redis_client = get_redis_client(tenant_id=tenant_id)
lock: RedisLock = redis_client.lock(
OnyxRedisLocks.USER_FILE_PROCESSING_BEAT_LOCK,
timeout=CELERY_GENERIC_BEAT_LOCK_TIMEOUT,
)
# Do not overlap generator runs
if not lock.acquire(blocking=False):
return None
enqueued = 0
try:
with get_session_with_current_tenant() as db_session:
user_file_ids = (
db_session.execute(
select(UserFile.id).where(
UserFile.status == UserFileStatus.PROCESSING
)
)
.scalars()
.all()
)
for user_file_id in user_file_ids:
self.app.send_task(
OnyxCeleryTask.PROCESS_SINGLE_USER_FILE,
kwargs={"user_file_id": user_file_id, "tenant_id": tenant_id},
queue=OnyxCeleryQueues.USER_FILE_PROCESSING,
priority=OnyxCeleryPriority.HIGH,
)
enqueued += 1
finally:
if lock.owned():
lock.release()
task_logger.info(
f"check_user_file_processing - Enqueued {enqueued} tasks for tenant={tenant_id}"
)
return None
@shared_task(
name=OnyxCeleryTask.PROCESS_SINGLE_USER_FILE,
bind=True,
ignore_result=True,
)
def process_single_user_file(self: Task, *, user_file_id: str, tenant_id: str) -> None:
task_logger.info(f"process_single_user_file - Starting id={user_file_id}")
start = time.monotonic()
CURRENT_TENANT_ID_CONTEXTVAR.set(tenant_id)
redis_client = get_redis_client(tenant_id=tenant_id)
file_lock: RedisLock = redis_client.lock(
_user_file_lock_key(user_file_id), timeout=CELERY_GENERIC_BEAT_LOCK_TIMEOUT
)
if not file_lock.acquire(blocking=False):
task_logger.info(
f"process_single_user_file - Lock held, skipping user_file_id={user_file_id}"
)
return None
documents: list[Document] = []
try:
with get_session_with_current_tenant() as db_session:
uf = db_session.get(UserFile, user_file_id)
if not uf:
task_logger.warning(
f"process_single_user_file - UserFile not found id={user_file_id}"
)
return None
if uf.status != UserFileStatus.PROCESSING:
task_logger.info(
f"process_single_user_file - Skipping id={user_file_id} status={uf.status}"
)
return None
connector = LocalFileConnector(
file_locations=[uf.file_id],
file_names=[uf.name] if uf.name else None,
zip_metadata={},
)
connector.load_credentials({})
# 20 is the documented default for httpx max_keepalive_connections
if MANAGED_VESPA:
httpx_init_vespa_pool(
20, ssl_cert=VESPA_CLOUD_CERT_PATH, ssl_key=VESPA_CLOUD_KEY_PATH
)
else:
httpx_init_vespa_pool(20)
search_settings_list = get_active_search_settings_list(db_session)
current_search_settings = next(
search_settings_instance
for search_settings_instance in search_settings_list
if search_settings_instance.status.is_current()
)
if not current_search_settings:
raise RuntimeError(
f"process_single_user_file - No current search settings found for tenant={tenant_id}"
)
try:
for batch in connector.load_from_state():
documents.extend(batch)
adapter = UserFileIndexingAdapter(
tenant_id=tenant_id,
db_session=db_session,
)
# Set up indexing pipeline components
embedding_model = DefaultIndexingEmbedder.from_db_search_settings(
search_settings=current_search_settings,
)
information_content_classification_model = (
InformationContentClassificationModel()
)
document_index = get_default_document_index(
current_search_settings,
None,
httpx_client=HttpxPool.get("vespa"),
)
task_logger.info(
f"process_single_user_file - Documents before indexing: {documents}"
)
# update the doument id to userfile id in the documents
for document in documents:
document.id = str(user_file_id)
# real work happens here!
index_pipeline_result = run_indexing_pipeline(
embedder=embedding_model,
information_content_classification_model=information_content_classification_model,
document_index=document_index,
ignore_time_skip=True,
db_session=db_session,
tenant_id=tenant_id,
document_batch=documents,
request_id=None,
adapter=adapter,
)
task_logger.info(
f"process_single_user_file - Indexing pipeline completed ={index_pipeline_result}"
)
if index_pipeline_result.failures:
task_logger.error(
f"process_single_user_file - Indexing pipeline failed id={user_file_id}"
)
uf.status = UserFileStatus.FAILED
db_session.add(uf)
db_session.commit()
return None
except Exception as e:
task_logger.exception(
f"process_single_user_file - Error id={user_file_id}: {e}"
)
uf.status = UserFileStatus.FAILED
db_session.add(uf)
db_session.commit()
return None
elapsed = time.monotonic() - start
task_logger.info(
f"process_single_user_file - Finished id={user_file_id} docs={len(documents)} elapsed={elapsed:.2f}s"
)
return None
except Exception as e:
# Attempt to mark the file as failed
with get_session_with_current_tenant() as db_session:
uf = db_session.get(UserFile, user_file_id)
if uf:
uf.status = UserFileStatus.FAILED
db_session.add(uf)
db_session.commit()
task_logger.exception(
f"process_single_user_file - Error id={user_file_id}: {e}"
)
return None
finally:
if file_lock.owned():
file_lock.release()

View File

@@ -0,0 +1,16 @@
"""Factory stub for running the user file processing Celery worker."""
from celery import Celery
from onyx.utils.variable_functionality import set_is_ee_based_on_env_variable
set_is_ee_based_on_env_variable()
def get_app() -> Celery:
from onyx.background.celery.apps.user_file_processing import celery_app
return celery_app
app = get_app()

View File

@@ -578,7 +578,7 @@ def _run_indexing(
db_session=db_session,
tenant_id=tenant_id,
document_batch=doc_batch_cleaned,
index_attempt_metadata=index_attempt_md,
request_id=index_attempt_md.request_id,
)
batch_num += 1

View File

@@ -61,6 +61,7 @@ class Answer:
use_agentic_search: bool = False,
research_type: ResearchType | None = None,
research_plan: dict[str, Any] | None = None,
project_instructions: str | None = None,
) -> None:
self.is_connected: Callable[[], bool] | None = is_connected
self._processed_stream: list[AnswerStreamPart] | None = None
@@ -96,6 +97,7 @@ class Answer:
prompt_builder=prompt_builder,
files=latest_query_files,
structured_response_format=answer_style_config.structured_response_format,
project_instructions=project_instructions,
)
self.graph_tooling = GraphTooling(
primary_llm=llm,

View File

@@ -63,6 +63,7 @@ from onyx.db.models import SearchDoc as DbSearchDoc
from onyx.db.models import ToolCall
from onyx.db.models import User
from onyx.db.persona import get_persona_by_id
from onyx.db.projects import get_project_instructions
from onyx.db.search_settings import get_current_search_settings
from onyx.document_index.factory import get_default_document_index
from onyx.file_store.models import FileDescriptor
@@ -444,26 +445,27 @@ def stream_chat_message_objects(
files = load_all_chat_files(history_msgs, new_msg_req.file_descriptors)
req_file_ids = [f["id"] for f in new_msg_req.file_descriptors]
latest_query_files = [file for file in files if file.file_id in req_file_ids]
user_file_ids = new_msg_req.user_file_ids or []
user_folder_ids = new_msg_req.user_folder_ids or []
user_file_ids = []
if persona.user_files:
for file in persona.user_files:
user_file_ids.append(file.id)
if persona.user_folders:
for folder in persona.user_folders:
user_folder_ids.append(folder.id)
if new_msg_req.current_message_files:
for file in new_msg_req.current_message_files:
if file["user_file_id"]:
user_file_ids.append(file["user_file_id"])
# Load in user files into memory and create search tool override kwargs if needed
# if we have enough tokens and no folders, we don't need to use search
# if we have enough tokens, we don't need to use search
# we can just pass them into the prompt directly
(
in_memory_user_files,
user_file_models,
search_tool_override_kwargs_for_user_files,
) = parse_user_files(
user_file_ids=user_file_ids,
user_folder_ids=user_folder_ids,
user_file_ids=user_file_ids or [],
project_id=chat_session.project_id,
db_session=db_session,
persona=persona,
actual_user_input=message_text,
@@ -572,6 +574,15 @@ def stream_chat_message_objects(
final_msg.prompt or persona.prompts[0]
)
# Retrieve project-specific instructions if this chat session is associated with a project.
project_instructions: str | None = (
get_project_instructions(
db_session=db_session, project_id=chat_session.project_id
)
if persona.is_default_persona
else None
) # if the persona is not default, we don't want to use the project instructions
answer_style_config = AnswerStyleConfig(
citation_config=CitationConfig(
all_docs_useful=selected_db_search_docs is not None
@@ -683,6 +694,7 @@ def stream_chat_message_objects(
db_session=db_session,
use_agentic_search=new_msg_req.use_agentic_search,
skip_gen_ai_answer_generation=new_msg_req.skip_gen_ai_answer_generation,
project_instructions=project_instructions,
)
# Process streamed packets using the new packet processing module

View File

@@ -76,6 +76,7 @@ def default_build_user_message(
if prompt_config.task_prompt
else user_query
)
user_prompt = user_prompt.strip()
tag_handled_prompt = handle_onyx_date_awareness(user_prompt, prompt_config)
user_msg = HumanMessage(

View File

@@ -4,6 +4,8 @@ from sqlalchemy.orm import Session
from onyx.db.models import Persona
from onyx.db.models import UserFile
from onyx.db.projects import get_user_files_from_project
from onyx.db.user_file import update_last_accessed_at_for_user_files
from onyx.file_store.models import InMemoryChatFile
from onyx.file_store.utils import get_user_files_as_user
from onyx.file_store.utils import load_in_memory_chat_files
@@ -15,24 +17,24 @@ logger = setup_logger()
def parse_user_files(
user_file_ids: list[int],
user_folder_ids: list[int],
user_file_ids: list[UUID],
db_session: Session,
persona: Persona,
actual_user_input: str,
project_id: int,
# should only be None if auth is disabled
user_id: UUID | None,
) -> tuple[list[InMemoryChatFile], list[UserFile], SearchToolOverrideKwargs | None]:
"""
Parse user files and folders into in-memory chat files and create search tool override kwargs.
Only creates SearchToolOverrideKwargs if token overflow occurs or folders are present.
Parse user files and project into in-memory chat files and create search tool override kwargs.
Only creates SearchToolOverrideKwargs if token overflow occurs.
Args:
user_file_ids: List of user file IDs to load
user_folder_ids: List of user folder IDs to load
db_session: Database session
persona: Persona to calculate available tokens
actual_user_input: User's input message for token calculation
project_id: Project ID to validate file ownership
user_id: User ID to validate file ownership
Returns:
@@ -40,37 +42,51 @@ def parse_user_files(
loaded user files,
user file models,
search tool override kwargs if token
overflow or folders present
overflow
)
"""
# Return empty results if no files or folders specified
if not user_file_ids and not user_folder_ids:
# Return empty results if no files or project specified
if not user_file_ids and not project_id:
return [], [], None
project_user_file_ids = []
if project_id and user_id:
project_user_file_ids.extend(
[
file.id
for file in get_user_files_from_project(project_id, user_id, db_session)
]
)
# Load user files from the database into memory
user_files = load_in_memory_chat_files(
user_file_ids or [],
user_folder_ids or [],
user_file_ids + project_user_file_ids or [],
db_session,
)
user_file_models = get_user_files_as_user(
user_file_ids or [],
user_folder_ids or [],
user_file_ids + project_user_file_ids or [],
user_id,
db_session,
)
# Update last accessed at for the user files which are used in the chat
if user_file_ids or project_user_file_ids:
update_last_accessed_at_for_user_files(
user_file_ids + project_user_file_ids or [],
db_session,
)
# Calculate token count for the files, need to import here to avoid circular import
# TODO: fix this
from onyx.db.user_documents import calculate_user_files_token_count
from onyx.db.user_file import calculate_user_files_token_count
from onyx.chat.prompt_builder.citations_prompt import (
compute_max_document_tokens_for_persona,
)
total_tokens = calculate_user_files_token_count(
user_file_ids or [],
user_folder_ids or [],
user_file_ids + project_user_file_ids or [],
db_session,
)
@@ -87,20 +103,22 @@ def parse_user_files(
have_enough_tokens = total_tokens <= available_tokens
# If we have enough tokens and no folders, we don't need search
# If we have enough tokens, we don't need search
# we can just pass them into the prompt directly
if have_enough_tokens and not user_folder_ids:
if have_enough_tokens:
# No search tool override needed - files can be passed directly
return user_files, user_file_models, None
# Token overflow or folders present - need to use search tool
# Token overflow - need to use search tool
override_kwargs = SearchToolOverrideKwargs(
force_no_rerank=have_enough_tokens,
alternate_db_session=None,
retrieved_sections_callback=None,
skip_query_analysis=have_enough_tokens,
user_file_ids=user_file_ids,
user_folder_ids=user_folder_ids,
user_file_ids=user_file_ids or [],
project_id=(
project_id if persona.is_default_persona else None
), # if the persona is not default, we don't want to use the project files
)
return user_files, user_file_models, override_kwargs

View File

@@ -355,6 +355,19 @@ CELERY_WORKER_KG_PROCESSING_CONCURRENCY = int(
os.environ.get("CELERY_WORKER_KG_PROCESSING_CONCURRENCY") or 4
)
CELERY_WORKER_USER_FILE_PROCESSING_CONCURRENCY_DEFAULT = 4
try:
CELERY_WORKER_USER_FILE_PROCESSING_CONCURRENCY = int(
os.environ.get(
"CELERY_WORKER_USER_FILE_PROCESSING_CONCURRENCY",
CELERY_WORKER_USER_FILE_PROCESSING_CONCURRENCY_DEFAULT,
)
)
except ValueError:
CELERY_WORKER_USER_FILE_PROCESSING_CONCURRENCY = (
CELERY_WORKER_USER_FILE_PROCESSING_CONCURRENCY_DEFAULT
)
# The maximum number of tasks that can be queued up to sync to Vespa in a single pass
VESPA_SYNC_MAX_TASKS = 8192

View File

@@ -3,7 +3,6 @@ import os
INPUT_PROMPT_YAML = "./onyx/seeding/input_prompts.yaml"
PROMPTS_YAML = "./onyx/seeding/prompts.yaml"
PERSONAS_YAML = "./onyx/seeding/personas.yaml"
USER_FOLDERS_YAML = "./onyx/seeding/user_folders.yaml"
NUM_RETURNED_HITS = 50
# Used for LLM filtering and reranking
# We want this to be approximately the number of results we want to show on the first page

View File

@@ -76,6 +76,9 @@ POSTGRES_CELERY_WORKER_DOCFETCHING_APP_NAME = "celery_worker_docfetching"
POSTGRES_CELERY_WORKER_MONITORING_APP_NAME = "celery_worker_monitoring"
POSTGRES_CELERY_WORKER_INDEXING_CHILD_APP_NAME = "celery_worker_indexing_child"
POSTGRES_CELERY_WORKER_KG_PROCESSING_APP_NAME = "celery_worker_kg_processing"
POSTGRES_CELERY_WORKER_USER_FILE_PROCESSING_APP_NAME = (
"celery_worker_user_file_processing"
)
POSTGRES_PERMISSIONS_APP_NAME = "permissions"
POSTGRES_UNKNOWN_APP_NAME = "unknown"
@@ -112,7 +115,6 @@ CELERY_GENERIC_BEAT_LOCK_TIMEOUT = 120
CELERY_VESPA_SYNC_BEAT_LOCK_TIMEOUT = 120
CELERY_USER_FILE_FOLDER_SYNC_BEAT_LOCK_TIMEOUT = 120
CELERY_PRIMARY_WORKER_LOCK_TIMEOUT = 120
@@ -342,6 +344,9 @@ class OnyxCeleryQueues:
# Indexing queue
USER_FILES_INDEXING = "user_files_indexing"
# User file processing queue
USER_FILE_PROCESSING = "user_file_processing"
# Document processing pipeline queue
DOCPROCESSING = "docprocessing"
CONNECTOR_DOC_FETCHING = "connector_doc_fetching"
@@ -367,7 +372,7 @@ class OnyxRedisLocks:
CHECK_CONNECTOR_EXTERNAL_GROUP_SYNC_BEAT_LOCK = (
"da_lock:check_connector_external_group_sync_beat"
)
CHECK_USER_FILE_FOLDER_SYNC_BEAT_LOCK = "da_lock:check_user_file_folder_sync_beat"
MONITOR_BACKGROUND_PROCESSES_LOCK = "da_lock:monitor_background_processes"
CHECK_AVAILABLE_TENANTS_LOCK = "da_lock:check_available_tenants"
CLOUD_PRE_PROVISION_TENANT_LOCK = "da_lock:pre_provision_tenant"
@@ -389,6 +394,10 @@ class OnyxRedisLocks:
# KG processing
KG_PROCESSING_LOCK = "da_lock:kg_processing"
# User file processing
USER_FILE_PROCESSING_BEAT_LOCK = "da_lock:check_user_file_processing_beat"
USER_FILE_PROCESSING_LOCK_PREFIX = "da_lock:user_file_processing"
class OnyxRedisSignals:
BLOCK_VALIDATE_INDEXING_FENCES = "signal:block_validate_indexing_fences"
@@ -447,8 +456,6 @@ class OnyxCeleryTask:
f"{ONYX_CLOUD_CELERY_TASK_PREFIX}_monitor_celery_pidbox"
)
UPDATE_USER_FILE_FOLDER_METADATA = "update_user_file_folder_metadata"
CHECK_FOR_CONNECTOR_DELETION = "check_for_connector_deletion_task"
CHECK_FOR_VESPA_SYNC_TASK = "check_for_vespa_sync_task"
CHECK_FOR_INDEXING = "check_for_indexing"
@@ -456,8 +463,10 @@ class OnyxCeleryTask:
CHECK_FOR_DOC_PERMISSIONS_SYNC = "check_for_doc_permissions_sync"
CHECK_FOR_EXTERNAL_GROUP_SYNC = "check_for_external_group_sync"
CHECK_FOR_LLM_MODEL_UPDATE = "check_for_llm_model_update"
CHECK_FOR_USER_FILE_FOLDER_SYNC = "check_for_user_file_folder_sync"
# User file processing
CHECK_FOR_USER_FILE_PROCESSING = "check_for_user_file_processing"
PROCESS_SINGLE_USER_FILE = "process_single_user_file"
# Connector checkpoint cleanup
CHECK_FOR_CHECKPOINT_CLEANUP = "check_for_checkpoint_cleanup"
CLEANUP_CHECKPOINT = "cleanup_checkpoint"

View File

@@ -1,5 +1,6 @@
from datetime import datetime
from typing import Any
from uuid import UUID
from pydantic import BaseModel
from pydantic import ConfigDict
@@ -119,8 +120,8 @@ class BaseFilters(BaseModel):
class UserFileFilters(BaseModel):
user_file_ids: list[int] | None = None
user_folder_ids: list[int] | None = None
user_file_ids: list[UUID] | None = None
project_id: int | None = None
class IndexFilters(BaseFilters, UserFileFilters):

View File

@@ -166,9 +166,6 @@ def retrieval_preprocessing(
)
user_file_filters = search_request.user_file_filters
user_file_ids = (user_file_filters.user_file_ids or []) if user_file_filters else []
user_folder_ids = (
(user_file_filters.user_folder_ids or []) if user_file_filters else []
)
if persona and persona.user_files:
user_file_ids = list(
set(user_file_ids) | set([file.id for file in persona.user_files])
@@ -176,7 +173,7 @@ def retrieval_preprocessing(
final_filters = IndexFilters(
user_file_ids=user_file_ids,
user_folder_ids=user_folder_ids,
project_id=user_file_filters.project_id,
source_type=preset_filters.source_type or predicted_source_filters,
document_set=preset_filters.document_set,
time_cutoff=time_filter or predicted_time_cutoff,

View File

@@ -33,7 +33,6 @@ from onyx.agents.agent_search.utils import create_citation_format_list
from onyx.auth.schemas import UserRole
from onyx.chat.models import DocumentRelevance
from onyx.configs.chat_configs import HARD_DELETE_CHATS
from onyx.configs.constants import DocumentSource
from onyx.configs.constants import MessageType
from onyx.context.search.models import InferenceSection
from onyx.context.search.models import RetrievalDocs
@@ -53,12 +52,10 @@ from onyx.db.models import SearchDoc
from onyx.db.models import SearchDoc as DBSearchDoc
from onyx.db.models import ToolCall
from onyx.db.models import User
from onyx.db.models import UserFile
from onyx.db.persona import get_best_persona_id_for_user
from onyx.db.tools import get_tool_by_id
from onyx.file_store.file_store import get_default_file_store
from onyx.file_store.models import FileDescriptor
from onyx.file_store.models import InMemoryChatFile
from onyx.llm.override_models import LLMOverride
from onyx.llm.override_models import PromptOverride
from onyx.server.query_and_chat.models import ChatMessageDetail
@@ -466,6 +463,8 @@ def get_chat_sessions_by_user(
db_session: Session,
include_onyxbot_flows: bool = False,
limit: int = 50,
project_id: UUID | None = None,
only_non_project_chats: bool = False,
) -> list[ChatSession]:
stmt = select(ChatSession).where(ChatSession.user_id == user_id)
@@ -480,6 +479,11 @@ def get_chat_sessions_by_user(
if limit:
stmt = stmt.limit(limit)
if project_id is not None:
stmt = stmt.where(ChatSession.project_id == project_id)
elif only_non_project_chats:
stmt = stmt.where(ChatSession.project_id.is_(None))
result = db_session.execute(stmt)
chat_sessions = result.scalars().all()
@@ -553,6 +557,7 @@ def create_chat_session(
prompt_override: PromptOverride | None = None,
onyxbot_flow: bool = False,
slack_thread_id: str | None = None,
project_id: int | None = None,
) -> ChatSession:
chat_session = ChatSession(
user_id=user_id,
@@ -562,6 +567,7 @@ def create_chat_session(
prompt_override=prompt_override,
onyxbot_flow=onyxbot_flow,
slack_thread_id=slack_thread_id,
project_id=project_id,
)
db_session.add(chat_session)
@@ -1184,90 +1190,6 @@ def get_db_search_doc_by_document_id(
return search_doc
def create_search_doc_from_user_file(
db_user_file: UserFile, associated_chat_file: InMemoryChatFile, db_session: Session
) -> SearchDoc:
"""Create a SearchDoc in the database from a UserFile and return it.
This ensures proper ID generation by SQLAlchemy and prevents duplicate key errors.
"""
blurb = ""
if associated_chat_file and associated_chat_file.content:
try:
# Try to decode as UTF-8, but handle errors gracefully
content_sample = associated_chat_file.content[:100]
# Remove null bytes which can cause SQL errors
content_sample = content_sample.replace(b"\x00", b"")
# NOTE(rkuo): this used to be "replace" instead of strict, but
# that would bypass the binary handling below
blurb = content_sample.decode("utf-8", errors="strict")
except Exception:
# If decoding fails completely, provide a generic description
blurb = f"[Binary file: {db_user_file.name}]"
db_search_doc = SearchDoc(
document_id=db_user_file.document_id,
chunk_ind=0, # Default to 0 for user files
semantic_id=db_user_file.name,
link=db_user_file.link_url,
blurb=blurb,
source_type=DocumentSource.FILE, # Assuming internal source for user files
boost=0, # Default boost
hidden=False, # Default visibility
doc_metadata={}, # Empty metadata
score=0.0, # Default score of 0.0 instead of None
is_relevant=None, # No relevance initially
relevance_explanation=None, # No explanation initially
match_highlights=[], # No highlights initially
updated_at=db_user_file.created_at, # Use created_at as updated_at
primary_owners=[], # Empty list instead of None
secondary_owners=[], # Empty list instead of None
is_internet=False, # Not from internet
)
db_session.add(db_search_doc)
db_session.flush() # Get the ID but don't commit yet
return db_search_doc
def translate_db_user_file_to_search_doc(
db_user_file: UserFile, associated_chat_file: InMemoryChatFile
) -> SearchDoc:
blurb = ""
if associated_chat_file and associated_chat_file.content:
try:
# Try to decode as UTF-8, but handle errors gracefully
content_sample = associated_chat_file.content[:100]
# Remove null bytes which can cause SQL errors
content_sample = content_sample.replace(b"\x00", b"")
blurb = content_sample.decode("utf-8", errors="replace")
except Exception:
# If decoding fails completely, provide a generic description
blurb = f"[Binary file: {db_user_file.name}]"
return SearchDoc(
# Don't set ID - let SQLAlchemy auto-generate it
document_id=db_user_file.document_id,
chunk_ind=0, # Default to 0 for user files
semantic_id=db_user_file.name,
link=db_user_file.link_url,
blurb=blurb,
source_type=DocumentSource.FILE, # Assuming internal source for user files
boost=0, # Default boost
hidden=False, # Default visibility
doc_metadata={}, # Empty metadata
score=0.0, # Default score of 0.0 instead of None
is_relevant=None, # No relevance initially
relevance_explanation=None, # No explanation initially
match_highlights=[], # No highlights initially
updated_at=db_user_file.created_at, # Use created_at as updated_at
primary_owners=[], # Empty list instead of None
secondary_owners=[], # Empty list instead of None
is_internet=False, # Not from internet
)
def translate_db_search_doc_to_server_search_doc(
db_search_doc: SearchDoc,
remove_doc_content: bool = False,

View File

@@ -34,7 +34,6 @@ from onyx.db.models import IndexingStatus
from onyx.db.models import SearchSettings
from onyx.db.models import User
from onyx.db.models import User__UserGroup
from onyx.db.models import UserFile
from onyx.db.models import UserGroup__ConnectorCredentialPair
from onyx.db.models import UserRole
from onyx.server.models import StatusResponse
@@ -805,31 +804,3 @@ def resync_cc_pair(
)
db_session.commit()
def get_connector_credential_pairs_with_user_files(
db_session: Session,
) -> list[ConnectorCredentialPair]:
"""
Get all connector credential pairs that have associated user files.
Args:
db_session: Database session
Returns:
List of ConnectorCredentialPair objects that have user files
"""
return (
db_session.query(ConnectorCredentialPair)
.join(UserFile, UserFile.cc_pair_id == ConnectorCredentialPair.id)
.distinct()
.all()
)
def delete_userfiles_for_cc_pair__no_commit(
db_session: Session,
cc_pair_id: int,
) -> None:
stmt = delete(UserFile).where(UserFile.cc_pair_id == cc_pair_id)
db_session.execute(stmt)

View File

@@ -131,3 +131,10 @@ class EmbeddingPrecision(str, PyEnum):
# good reason to specify anything else
BFLOAT16 = "bfloat16"
FLOAT = "float"
class UserFileStatus(str, PyEnum):
PROCESSING = "processing"
COMPLETED = "completed"
FAILED = "failed"
CANCELED = "canceled"

View File

@@ -1,132 +0,0 @@
from uuid import UUID
from sqlalchemy.orm import Session
from onyx.db.chat import delete_chat_session
from onyx.db.models import ChatFolder
from onyx.db.models import ChatSession
from onyx.utils.logger import setup_logger
logger = setup_logger()
def get_user_folders(
user_id: UUID | None,
db_session: Session,
) -> list[ChatFolder]:
return db_session.query(ChatFolder).filter(ChatFolder.user_id == user_id).all()
def update_folder_display_priority(
user_id: UUID | None,
display_priority_map: dict[int, int],
db_session: Session,
) -> None:
folders = get_user_folders(user_id=user_id, db_session=db_session)
folder_ids = {folder.id for folder in folders}
if folder_ids != set(display_priority_map.keys()):
raise ValueError("Invalid Folder IDs provided")
for folder in folders:
folder.display_priority = display_priority_map[folder.id]
db_session.commit()
def get_folder_by_id(
user_id: UUID | None,
folder_id: int,
db_session: Session,
) -> ChatFolder:
folder = (
db_session.query(ChatFolder).filter(ChatFolder.id == folder_id).one_or_none()
)
if not folder:
raise ValueError("Folder by specified id does not exist")
if folder.user_id != user_id:
raise PermissionError(f"Folder does not belong to user: {user_id}")
return folder
def create_folder(
user_id: UUID | None, folder_name: str | None, db_session: Session
) -> int:
new_folder = ChatFolder(
user_id=user_id,
name=folder_name,
)
db_session.add(new_folder)
db_session.commit()
return new_folder.id
def rename_folder(
user_id: UUID | None, folder_id: int, folder_name: str | None, db_session: Session
) -> None:
folder = get_folder_by_id(
user_id=user_id, folder_id=folder_id, db_session=db_session
)
folder.name = folder_name
db_session.commit()
def add_chat_to_folder(
user_id: UUID | None, folder_id: int, chat_session: ChatSession, db_session: Session
) -> None:
folder = get_folder_by_id(
user_id=user_id, folder_id=folder_id, db_session=db_session
)
chat_session.folder_id = folder.id
db_session.commit()
def remove_chat_from_folder(
user_id: UUID | None, folder_id: int, chat_session: ChatSession, db_session: Session
) -> None:
folder = get_folder_by_id(
user_id=user_id, folder_id=folder_id, db_session=db_session
)
if chat_session.folder_id != folder.id:
raise ValueError("The chat session is not in the specified folder.")
if folder.user_id != user_id:
raise ValueError(
f"Tried to remove a chat session from a folder that does not below to "
f"this user, user id: {user_id}"
)
chat_session.folder_id = None
if chat_session in folder.chat_sessions:
folder.chat_sessions.remove(chat_session)
db_session.commit()
def delete_folder(
user_id: UUID | None,
folder_id: int,
including_chats: bool,
db_session: Session,
) -> None:
folder = get_folder_by_id(
user_id=user_id, folder_id=folder_id, db_session=db_session
)
# Assuming there will not be a massive number of chats in any given folder
if including_chats:
for chat_session in folder.chat_sessions:
delete_chat_session(
user_id=user_id,
chat_session_id=chat_session.id,
db_session=db_session,
)
db_session.delete(folder)
db_session.commit()

View File

@@ -63,6 +63,7 @@ from onyx.db.enums import (
SyncType,
SyncStatus,
MCPAuthenticationType,
UserFileStatus,
)
from onyx.configs.constants import NotificationType
from onyx.configs.constants import SearchFeedbackType
@@ -208,9 +209,6 @@ class User(SQLAlchemyBaseUserTableUUID, Base):
chat_sessions: Mapped[list["ChatSession"]] = relationship(
"ChatSession", back_populates="user"
)
chat_folders: Mapped[list["ChatFolder"]] = relationship(
"ChatFolder", back_populates="user"
)
prompts: Mapped[list["Prompt"]] = relationship("Prompt", back_populates="user")
input_prompts: Mapped[list["InputPrompt"]] = relationship(
@@ -229,8 +227,8 @@ class User(SQLAlchemyBaseUserTableUUID, Base):
back_populates="creator",
primaryjoin="User.id == foreign(ConnectorCredentialPair.creator_id)",
)
folders: Mapped[list["UserFolder"]] = relationship(
"UserFolder", back_populates="user"
projects: Mapped[list["UserProject"]] = relationship(
"UserProject", back_populates="user"
)
files: Mapped[list["UserFile"]] = relationship("UserFile", back_populates="user")
# MCP servers accessible to this user
@@ -538,10 +536,6 @@ class ConnectorCredentialPair(Base):
primaryjoin="foreign(ConnectorCredentialPair.creator_id) == remote(User.id)",
)
user_file: Mapped["UserFile"] = relationship(
"UserFile", back_populates="cc_pair", uselist=False
)
background_errors: Mapped[list["BackgroundError"]] = relationship(
"BackgroundError", back_populates="cc_pair", cascade="all, delete-orphan"
)
@@ -2036,9 +2030,6 @@ class ChatSession(Base):
Enum(ChatSessionSharedStatus, native_enum=False),
default=ChatSessionSharedStatus.PRIVATE,
)
folder_id: Mapped[int | None] = mapped_column(
ForeignKey("chat_folder.id"), nullable=True
)
current_alternate_model: Mapped[str | None] = mapped_column(String, default=None)
@@ -2046,6 +2037,14 @@ class ChatSession(Base):
String, nullable=True, default=None
)
project_id: Mapped[int | None] = mapped_column(
ForeignKey("user_project.id"), nullable=True
)
project: Mapped["UserProject"] = relationship(
"UserProject", back_populates="chat_sessions", foreign_keys=[project_id]
)
# the latest "overrides" specified by the user. These take precedence over
# the attached persona. However, overrides specified directly in the
# `send-message` call will take precedence over these.
@@ -2071,9 +2070,6 @@ class ChatSession(Base):
DateTime(timezone=True), server_default=func.now()
)
user: Mapped[User] = relationship("User", back_populates="chat_sessions")
folder: Mapped["ChatFolder"] = relationship(
"ChatFolder", back_populates="chat_sessions"
)
messages: Mapped[list["ChatMessage"]] = relationship(
"ChatMessage", back_populates="chat_session", cascade="all, delete-orphan"
)
@@ -2183,33 +2179,6 @@ class ChatMessage(Base):
)
class ChatFolder(Base):
"""For organizing chat sessions"""
__tablename__ = "chat_folder"
id: Mapped[int] = mapped_column(primary_key=True)
# Only null if auth is off
user_id: Mapped[UUID | None] = mapped_column(
ForeignKey("user.id", ondelete="CASCADE"), nullable=True
)
name: Mapped[str | None] = mapped_column(String, nullable=True)
display_priority: Mapped[int] = mapped_column(Integer, nullable=True, default=0)
user: Mapped[User] = relationship("User", back_populates="chat_folders")
chat_sessions: Mapped[list["ChatSession"]] = relationship(
"ChatSession", back_populates="folder"
)
def __lt__(self, other: Any) -> bool:
if not isinstance(other, ChatFolder):
return NotImplemented
if self.display_priority == other.display_priority:
# Bigger ID (created later) show earlier
return self.id > other.id
return self.display_priority < other.display_priority
class AgentSubQuestion(Base):
"""
A sub-question is a question that is asked of the LLM to gather supporting
@@ -2669,11 +2638,6 @@ class Persona(Base):
secondary="persona__user_file",
back_populates="assistants",
)
user_folders: Mapped[list["UserFolder"]] = relationship(
"UserFolder",
secondary="persona__user_folder",
back_populates="assistants",
)
labels: Mapped[list["PersonaLabel"]] = relationship(
"PersonaLabel",
secondary=Persona__PersonaLabel.__table__,
@@ -2691,20 +2655,11 @@ class Persona(Base):
)
class Persona__UserFolder(Base):
__tablename__ = "persona__user_folder"
persona_id: Mapped[int] = mapped_column(ForeignKey("persona.id"), primary_key=True)
user_folder_id: Mapped[int] = mapped_column(
ForeignKey("user_folder.id"), primary_key=True
)
class Persona__UserFile(Base):
__tablename__ = "persona__user_file"
persona_id: Mapped[int] = mapped_column(ForeignKey("persona.id"), primary_key=True)
user_file_id: Mapped[int] = mapped_column(
user_file_id: Mapped[UUID] = mapped_column(
ForeignKey("user_file.id"), primary_key=True
)
@@ -3309,22 +3264,38 @@ class InputPrompt__User(Base):
disabled: Mapped[bool] = mapped_column(Boolean, nullable=False, default=False)
class UserFolder(Base):
__tablename__ = "user_folder"
class Project__UserFile(Base):
__tablename__ = "project__user_file"
project_id: Mapped[int] = mapped_column(
ForeignKey("user_project.id"), primary_key=True
)
user_file_id: Mapped[UUID] = mapped_column(
ForeignKey("user_file.id"), primary_key=True
)
class UserProject(Base):
__tablename__ = "user_project"
id: Mapped[int] = mapped_column(primary_key=True, autoincrement=True)
user_id: Mapped[UUID | None] = mapped_column(ForeignKey("user.id"), nullable=False)
name: Mapped[str] = mapped_column(nullable=False)
description: Mapped[str] = mapped_column(nullable=False)
description: Mapped[str] = mapped_column(nullable=True)
created_at: Mapped[datetime.datetime] = mapped_column(
DateTime(timezone=True), server_default=func.now()
)
user: Mapped["User"] = relationship(back_populates="folders")
files: Mapped[list["UserFile"]] = relationship(back_populates="folder")
assistants: Mapped[list["Persona"]] = relationship(
"Persona",
secondary=Persona__UserFolder.__table__,
back_populates="user_folders",
user: Mapped["User"] = relationship(back_populates="projects")
user_files: Mapped[list["UserFile"]] = relationship(
"UserFile",
secondary=Project__UserFile.__table__,
back_populates="projects",
)
prompt_id: Mapped[int | None] = mapped_column(
ForeignKey("prompt.id"), nullable=True
)
chat_sessions: Mapped[list["ChatSession"]] = relationship(
"ChatSession", back_populates="project", lazy="selectin"
)
@@ -3337,36 +3308,43 @@ class UserDocument(str, Enum):
class UserFile(Base):
__tablename__ = "user_file"
id: Mapped[int] = mapped_column(primary_key=True, autoincrement=True)
id: Mapped[UUID] = mapped_column(PGUUID, primary_key=True)
user_id: Mapped[UUID | None] = mapped_column(ForeignKey("user.id"), nullable=False)
assistants: Mapped[list["Persona"]] = relationship(
"Persona",
secondary=Persona__UserFile.__table__,
back_populates="user_files",
)
folder_id: Mapped[int | None] = mapped_column(
ForeignKey("user_folder.id"), nullable=True
)
file_id: Mapped[str] = mapped_column(nullable=False)
document_id: Mapped[str] = mapped_column(nullable=False)
name: Mapped[str] = mapped_column(nullable=False)
created_at: Mapped[datetime.datetime] = mapped_column(
default=datetime.datetime.utcnow
)
user: Mapped["User"] = relationship(back_populates="files")
folder: Mapped["UserFolder"] = relationship(back_populates="files")
token_count: Mapped[int | None] = mapped_column(Integer, nullable=True)
cc_pair_id: Mapped[int | None] = mapped_column(
ForeignKey("connector_credential_pair.id"), nullable=True, unique=True
file_type: Mapped[str] = mapped_column(String, nullable=False)
status: Mapped[UserFileStatus] = mapped_column(
Enum(UserFileStatus, native_enum=False),
nullable=False,
default=UserFileStatus.PROCESSING,
)
cc_pair: Mapped["ConnectorCredentialPair"] = relationship(
"ConnectorCredentialPair", back_populates="user_file"
chunk_count: Mapped[int | None] = mapped_column(Integer, nullable=True)
last_accessed_at: Mapped[datetime.datetime | None] = mapped_column(
DateTime(timezone=True), nullable=True
)
link_url: Mapped[str | None] = mapped_column(String, nullable=True)
content_type: Mapped[str | None] = mapped_column(String, nullable=True)
projects: Mapped[list["UserProject"]] = relationship(
"UserProject",
secondary=Project__UserFile.__table__,
back_populates="user_files",
lazy="selectin",
)
"""
Multi-tenancy related tables

View File

@@ -34,7 +34,6 @@ from onyx.db.models import Tool
from onyx.db.models import User
from onyx.db.models import User__UserGroup
from onyx.db.models import UserFile
from onyx.db.models import UserFolder
from onyx.db.models import UserGroup
from onyx.db.notification import create_notification
from onyx.server.features.persona.models import FullPersonaSnapshot
@@ -243,6 +242,16 @@ def create_update_persona(
elif user.role != UserRole.ADMIN:
raise ValueError("Only admins can make a default persona")
# Convert incoming string UUIDs to UUID objects for DB operations
converted_user_file_ids = None
if create_persona_request.user_file_ids is not None:
try:
converted_user_file_ids = [
UUID(str_id) for str_id in create_persona_request.user_file_ids
]
except Exception:
raise ValueError("Invalid user_file_ids; must be UUID strings")
persona = upsert_persona(
persona_id=persona_id,
user=user,
@@ -268,8 +277,7 @@ def create_update_persona(
llm_relevance_filter=create_persona_request.llm_relevance_filter,
llm_filter_extraction=create_persona_request.llm_filter_extraction,
is_default_persona=create_persona_request.is_default_persona,
user_file_ids=create_persona_request.user_file_ids,
user_folder_ids=create_persona_request.user_folder_ids,
user_file_ids=converted_user_file_ids,
)
versioned_make_persona_private = fetch_versioned_implementation(
@@ -505,8 +513,7 @@ def upsert_persona(
builtin_persona: bool = False,
is_default_persona: bool | None = None,
label_ids: list[int] | None = None,
user_file_ids: list[int] | None = None,
user_folder_ids: list[int] | None = None,
user_file_ids: list[UUID] | None = None,
chunks_above: int = CONTEXT_CHUNKS_ABOVE,
chunks_below: int = CONTEXT_CHUNKS_BELOW,
) -> Persona:
@@ -567,17 +574,6 @@ def upsert_persona(
if not user_files and user_file_ids:
raise ValueError("user_files not found")
# Fetch and attach user_folders by IDs
user_folders = None
if user_folder_ids is not None:
user_folders = (
db_session.query(UserFolder)
.filter(UserFolder.id.in_(user_folder_ids))
.all()
)
if not user_folders and user_folder_ids:
raise ValueError("user_folders not found")
# Fetch and attach prompts by IDs
prompts = None
if prompt_ids is not None:
@@ -651,10 +647,6 @@ def upsert_persona(
existing_persona.user_files.clear()
existing_persona.user_files = user_files or []
if user_folder_ids is not None:
existing_persona.user_folders.clear()
existing_persona.user_folders = user_folders or []
# We should only update display priority if it is not already set
if existing_persona.display_priority is None:
existing_persona.display_priority = display_priority
@@ -696,7 +688,6 @@ def upsert_persona(
is_default_persona=(
is_default_persona if is_default_persona is not None else False
),
user_folders=user_folders or [],
user_files=user_files or [],
labels=labels or [],
)

176
backend/onyx/db/projects.py Normal file
View File

@@ -0,0 +1,176 @@
import datetime
import uuid
from typing import List
from uuid import UUID
from fastapi import UploadFile
from pydantic import BaseModel
from pydantic import ConfigDict
from sqlalchemy.orm import Session
from onyx.background.celery.versioned_apps.client import app as client_app
from onyx.configs.constants import OnyxCeleryPriority
from onyx.configs.constants import OnyxCeleryQueues
from onyx.configs.constants import OnyxCeleryTask
from onyx.db.models import Project__UserFile
from onyx.db.models import Prompt
from onyx.db.models import User
from onyx.db.models import UserFile
from onyx.db.models import UserProject
from onyx.server.documents.connector import upload_files
from onyx.server.features.projects.projects_file_utils import categorize_uploaded_files
from onyx.utils.logger import setup_logger
from shared_configs.contextvars import get_current_tenant_id
logger = setup_logger()
class CategorizedFilesResult(BaseModel):
user_files: list[UserFile]
non_accepted_files: list[str]
unsupported_files: list[str]
# Allow SQLAlchemy ORM models inside this result container
model_config = ConfigDict(arbitrary_types_allowed=True)
def create_user_files(
files: List[UploadFile],
project_id: int | None,
user: User | None,
db_session: Session,
link_url: str | None = None,
) -> CategorizedFilesResult:
# Categorize the files
categorized_files = categorize_uploaded_files(files)
# NOTE: At the moment, zip metadata is not used for user files.
# Should revisit to decide whether this should be a feature.
upload_response = upload_files(categorized_files.acceptable)
user_files = []
non_accepted_files = categorized_files.non_accepted
unsupported_files = categorized_files.unsupported
# Pair returned storage paths with the same set of acceptable files we uploaded
for file_path, file in zip(
upload_response.file_paths, categorized_files.acceptable
):
new_file = UserFile(
id=uuid.uuid4(),
user_id=user.id if user else None,
file_id=file_path,
name=file.filename,
token_count=categorized_files.acceptable_file_to_token_count[
file.filename or ""
],
link_url=link_url,
content_type=file.content_type,
file_type=file.content_type,
last_accessed_at=datetime.datetime.now(datetime.timezone.utc),
)
# Persist the UserFile first to satisfy FK constraints for association table
db_session.add(new_file)
db_session.flush()
if project_id:
project_to_user_file = Project__UserFile(
project_id=project_id,
user_file_id=new_file.id,
)
db_session.add(project_to_user_file)
user_files.append(new_file)
db_session.commit()
return CategorizedFilesResult(
user_files=user_files,
non_accepted_files=non_accepted_files,
unsupported_files=unsupported_files,
)
def upload_files_to_user_files_with_indexing(
files: List[UploadFile],
project_id: int | None,
user: User,
db_session: Session,
) -> CategorizedFilesResult:
categorized_files_result = create_user_files(files, project_id, user, db_session)
user_files = categorized_files_result.user_files
non_accepted_files = categorized_files_result.non_accepted_files
unsupported_files = categorized_files_result.unsupported_files
# Trigger per-file processing immediately for the current tenant
tenant_id = get_current_tenant_id()
if non_accepted_files:
for filename in non_accepted_files:
logger.warning(f"Non-accepted file: {filename}")
if unsupported_files:
for filename in unsupported_files:
logger.warning(f"Unsupported file: {filename}")
for user_file in user_files:
task = client_app.send_task(
OnyxCeleryTask.PROCESS_SINGLE_USER_FILE,
kwargs={"user_file_id": user_file.id, "tenant_id": tenant_id},
queue=OnyxCeleryQueues.USER_FILE_PROCESSING,
priority=OnyxCeleryPriority.HIGH,
)
logger.info(
f"Triggered indexing for user_file_id={user_file.id} with task_id={task.id}"
)
return CategorizedFilesResult(
user_files=user_files,
non_accepted_files=non_accepted_files,
unsupported_files=unsupported_files,
)
def check_project_ownership(
project_id: int, user_id: UUID, db_session: Session
) -> bool:
return (
db_session.query(UserProject)
.filter(UserProject.id == project_id, UserProject.user_id == user_id)
.first()
is not None
)
def get_user_files_from_project(
project_id: int, user_id: UUID, db_session: Session
) -> list[UserFile]:
# First check if the user owns the project
if not check_project_ownership(project_id, user_id, db_session):
return []
return (
db_session.query(UserFile)
.join(Project__UserFile)
.filter(Project__UserFile.project_id == project_id)
.all()
)
def get_project_instructions(db_session: Session, project_id: int | None) -> str | None:
"""Return the project's instruction prompt text if available; otherwise None.
Safe helper that swallows DB errors and returns None on any failure.
"""
if not project_id:
return None
try:
project = (
db_session.query(UserProject)
.filter(UserProject.id == project_id)
.one_or_none()
)
if not project or not project.prompt_id:
return None
project_prompt = (
db_session.query(Prompt)
.filter(Prompt.id == project.prompt_id)
.one_or_none()
)
if not project_prompt or not project_prompt.system_prompt:
return None
instructions = project_prompt.system_prompt.strip()
return instructions or None
except Exception:
return None

View File

@@ -1,478 +0,0 @@
import datetime
import time
from typing import List
from uuid import UUID
from fastapi import UploadFile
from sqlalchemy import and_
from sqlalchemy import func
from sqlalchemy.orm import joinedload
from sqlalchemy.orm import Session
from onyx.auth.users import get_current_tenant_id
from onyx.configs.constants import DocumentSource
from onyx.connectors.models import InputType
from onyx.db.connector import create_connector
from onyx.db.connector_credential_pair import add_credential_to_connector
from onyx.db.credentials import create_credential
from onyx.db.enums import AccessType
from onyx.db.models import ConnectorCredentialPair
from onyx.db.models import Document
from onyx.db.models import DocumentByConnectorCredentialPair
from onyx.db.models import Persona
from onyx.db.models import Persona__UserFile
from onyx.db.models import User
from onyx.db.models import UserFile
from onyx.db.models import UserFolder
from onyx.server.documents.connector import trigger_indexing_for_cc_pair
from onyx.server.documents.connector import upload_files
from onyx.server.documents.models import ConnectorBase
from onyx.server.documents.models import CredentialBase
from onyx.server.models import StatusResponse
USER_FILE_CONSTANT = "USER_FILE_CONNECTOR"
def create_user_files(
files: List[UploadFile],
folder_id: int | None,
user: User | None,
db_session: Session,
link_url: str | None = None,
) -> list[UserFile]:
"""NOTE(rkuo): This function can take -1 (RECENT_DOCS_FOLDER_ID for folder_id.
Document what this does?
"""
# NOTE: At the moment, zip metadata is not used for user files.
# Should revisit to decide whether this should be a feature.
upload_response = upload_files(files)
user_files = []
for file_path, file in zip(upload_response.file_paths, files):
new_file = UserFile(
user_id=user.id if user else None,
folder_id=folder_id,
file_id=file_path,
document_id="USER_FILE_CONNECTOR__" + file_path,
name=file.filename,
token_count=None,
link_url=link_url,
content_type=file.content_type,
)
db_session.add(new_file)
user_files.append(new_file)
db_session.commit()
return user_files
def upload_files_to_user_files_with_indexing(
files: List[UploadFile],
folder_id: int | None,
user: User,
db_session: Session,
trigger_index: bool = True,
) -> list[UserFile]:
"""NOTE(rkuo): This function can take -1 (RECENT_DOCS_FOLDER_ID for folder_id.
Document what this does?
Create user files and trigger immediate indexing"""
# Create the user files first
user_files = create_user_files(files, folder_id, user, db_session)
# Create connector and credential for each file
for user_file in user_files:
cc_pair = create_file_connector_credential(user_file, user, db_session)
user_file.cc_pair_id = cc_pair.data
db_session.commit()
# Trigger immediate high-priority indexing for all created files
if trigger_index:
tenant_id = get_current_tenant_id()
for user_file in user_files:
# Use the existing trigger_indexing_for_cc_pair function but with highest priority
if user_file.cc_pair_id:
trigger_indexing_for_cc_pair(
[],
user_file.cc_pair.connector_id,
False,
tenant_id,
db_session,
is_user_file=True,
)
return user_files
def create_file_connector_credential(
user_file: UserFile, user: User, db_session: Session
) -> StatusResponse:
"""Create connector and credential for a user file"""
connector_base = ConnectorBase(
name=f"UserFile-{user_file.file_id}-{int(time.time())}",
source=DocumentSource.FILE,
input_type=InputType.LOAD_STATE,
connector_specific_config={
"file_locations": [user_file.file_id],
"file_names": [user_file.name],
"zip_metadata": {},
},
refresh_freq=None,
prune_freq=None,
indexing_start=None,
)
connector = create_connector(db_session=db_session, connector_data=connector_base)
credential_info = CredentialBase(
credential_json={},
admin_public=True,
source=DocumentSource.FILE,
curator_public=True,
groups=[],
name=f"UserFileCredential-{user_file.file_id}-{int(time.time())}",
is_user_file=True,
)
credential = create_credential(credential_info, user, db_session)
return add_credential_to_connector(
db_session=db_session,
user=user,
connector_id=connector.id,
credential_id=credential.id,
cc_pair_name=f"UserFileCCPair-{user_file.file_id}-{int(time.time())}",
access_type=AccessType.PRIVATE,
auto_sync_options=None,
groups=[],
is_user_file=True,
)
def get_user_file_indexing_status(
file_ids: list[int], db_session: Session
) -> dict[int, bool]:
"""Get indexing status for multiple user files"""
status_dict = {}
# Query UserFile with cc_pair join
files_with_pairs = (
db_session.query(UserFile)
.filter(UserFile.id.in_(file_ids))
.options(joinedload(UserFile.cc_pair))
.all()
)
for file in files_with_pairs:
if file.cc_pair and file.cc_pair.last_successful_index_time:
status_dict[file.id] = True
else:
status_dict[file.id] = False
return status_dict
def calculate_user_files_token_count(
file_ids: list[int], folder_ids: list[int], db_session: Session
) -> int:
"""Calculate total token count for specified files and folders"""
total_tokens = 0
# Get tokens from individual files
if file_ids:
file_tokens = (
db_session.query(func.sum(UserFile.token_count))
.filter(UserFile.id.in_(file_ids))
.scalar()
or 0
)
total_tokens += file_tokens
# Get tokens from folders
if folder_ids:
folder_files_tokens = (
db_session.query(func.sum(UserFile.token_count))
.filter(UserFile.folder_id.in_(folder_ids))
.scalar()
or 0
)
total_tokens += folder_files_tokens
return total_tokens
def load_all_user_files(
file_ids: list[int], folder_ids: list[int], db_session: Session
) -> list[UserFile]:
"""Load all user files from specified file IDs and folder IDs"""
result = []
# Get individual files
if file_ids:
files = db_session.query(UserFile).filter(UserFile.id.in_(file_ids)).all()
result.extend(files)
# Get files from folders
if folder_ids:
folder_files = (
db_session.query(UserFile).filter(UserFile.folder_id.in_(folder_ids)).all()
)
result.extend(folder_files)
return result
def get_user_files_from_folder(folder_id: int, db_session: Session) -> list[UserFile]:
return db_session.query(UserFile).filter(UserFile.folder_id == folder_id).all()
def share_file_with_assistant(
file_id: int, assistant_id: int, db_session: Session
) -> None:
file = db_session.query(UserFile).filter(UserFile.id == file_id).first()
assistant = db_session.query(Persona).filter(Persona.id == assistant_id).first()
if file and assistant:
file.assistants.append(assistant)
db_session.commit()
def unshare_file_with_assistant(
file_id: int, assistant_id: int, db_session: Session
) -> None:
db_session.query(Persona__UserFile).filter(
and_(
Persona__UserFile.user_file_id == file_id,
Persona__UserFile.persona_id == assistant_id,
)
).delete()
db_session.commit()
def share_folder_with_assistant(
folder_id: int, assistant_id: int, db_session: Session
) -> None:
folder = db_session.query(UserFolder).filter(UserFolder.id == folder_id).first()
assistant = db_session.query(Persona).filter(Persona.id == assistant_id).first()
if folder and assistant:
for file in folder.files:
share_file_with_assistant(file.id, assistant_id, db_session)
def unshare_folder_with_assistant(
folder_id: int, assistant_id: int, db_session: Session
) -> None:
folder = db_session.query(UserFolder).filter(UserFolder.id == folder_id).first()
if folder:
for file in folder.files:
unshare_file_with_assistant(file.id, assistant_id, db_session)
def fetch_user_files_for_documents(
document_ids: list[str],
db_session: Session,
) -> dict[str, int | None]:
"""
Fetches user file IDs for the given document IDs.
Args:
document_ids: List of document IDs to fetch user files for
db_session: Database session
Returns:
Dictionary mapping document IDs to user file IDs (or None if no user file exists)
"""
# First, get the document to cc_pair mapping
doc_cc_pairs = (
db_session.query(Document.id, ConnectorCredentialPair.id)
.join(
DocumentByConnectorCredentialPair,
Document.id == DocumentByConnectorCredentialPair.id,
)
.join(
ConnectorCredentialPair,
and_(
DocumentByConnectorCredentialPair.connector_id
== ConnectorCredentialPair.connector_id,
DocumentByConnectorCredentialPair.credential_id
== ConnectorCredentialPair.credential_id,
),
)
.filter(Document.id.in_(document_ids))
.all()
)
# Get cc_pair to user_file mapping
cc_pair_to_user_file = (
db_session.query(ConnectorCredentialPair.id, UserFile.id)
.join(UserFile, UserFile.cc_pair_id == ConnectorCredentialPair.id)
.filter(
ConnectorCredentialPair.id.in_(
[cc_pair_id for _, cc_pair_id in doc_cc_pairs]
)
)
.all()
)
# Create mapping from cc_pair_id to user_file_id
cc_pair_to_user_file_dict = {
cc_pair_id: user_file_id for cc_pair_id, user_file_id in cc_pair_to_user_file
}
# Create the final result mapping document_id to user_file_id
result: dict[str, int | None] = {doc_id: None for doc_id in document_ids}
for doc_id, cc_pair_id in doc_cc_pairs:
if cc_pair_id in cc_pair_to_user_file_dict:
result[doc_id] = cc_pair_to_user_file_dict[cc_pair_id]
return result
def fetch_user_folders_for_documents(
document_ids: list[str],
db_session: Session,
) -> dict[str, int | None]:
"""
Fetches user folder IDs for the given document IDs.
For each document, returns the folder ID that the document's associated user file belongs to.
Args:
document_ids: List of document IDs to fetch user folders for
db_session: Database session
Returns:
Dictionary mapping document IDs to user folder IDs (or None if no user folder exists)
"""
# First, get the document to cc_pair mapping
doc_cc_pairs = (
db_session.query(Document.id, ConnectorCredentialPair.id)
.join(
DocumentByConnectorCredentialPair,
Document.id == DocumentByConnectorCredentialPair.id,
)
.join(
ConnectorCredentialPair,
and_(
DocumentByConnectorCredentialPair.connector_id
== ConnectorCredentialPair.connector_id,
DocumentByConnectorCredentialPair.credential_id
== ConnectorCredentialPair.credential_id,
),
)
.filter(Document.id.in_(document_ids))
.all()
)
# Get cc_pair to user_file and folder mapping
cc_pair_to_folder = (
db_session.query(ConnectorCredentialPair.id, UserFile.folder_id)
.join(UserFile, UserFile.cc_pair_id == ConnectorCredentialPair.id)
.filter(
ConnectorCredentialPair.id.in_(
[cc_pair_id for _, cc_pair_id in doc_cc_pairs]
)
)
.all()
)
# Create mapping from cc_pair_id to folder_id
cc_pair_to_folder_dict = {
cc_pair_id: folder_id for cc_pair_id, folder_id in cc_pair_to_folder
}
# Create the final result mapping document_id to folder_id
result: dict[str, int | None] = {doc_id: None for doc_id in document_ids}
for doc_id, cc_pair_id in doc_cc_pairs:
if cc_pair_id in cc_pair_to_folder_dict:
result[doc_id] = cc_pair_to_folder_dict[cc_pair_id]
return result
def get_user_file_from_id(db_session: Session, user_file_id: int) -> UserFile | None:
return db_session.query(UserFile).filter(UserFile.id == user_file_id).first()
# def fetch_user_files_for_documents(
# # document_ids: list[str],
# # db_session: Session,
# # ) -> dict[str, int | None]:
# # # Query UserFile objects for the given document_ids
# # user_files = (
# # db_session.query(UserFile).filter(UserFile.document_id.in_(document_ids)).all()
# # )
# # # Create a dictionary mapping document_ids to UserFile objects
# # result: dict[str, int | None] = {doc_id: None for doc_id in document_ids}
# # for user_file in user_files:
# # result[user_file.document_id] = user_file.id
# # return result
def upsert_user_folder(
db_session: Session,
id: int | None = None,
user_id: UUID | None = None,
name: str | None = None,
description: str | None = None,
created_at: datetime.datetime | None = None,
user: User | None = None,
files: list[UserFile] | None = None,
assistants: list[Persona] | None = None,
) -> UserFolder:
if id is not None:
user_folder = db_session.query(UserFolder).filter_by(id=id).first()
else:
user_folder = (
db_session.query(UserFolder).filter_by(name=name, user_id=user_id).first()
)
if user_folder:
if user_id is not None:
user_folder.user_id = user_id
if name is not None:
user_folder.name = name
if description is not None:
user_folder.description = description
if created_at is not None:
user_folder.created_at = created_at
if user is not None:
user_folder.user = user
if files is not None:
user_folder.files = files
if assistants is not None:
user_folder.assistants = assistants
else:
user_folder = UserFolder(
id=id,
user_id=user_id,
name=name,
description=description,
created_at=created_at or datetime.datetime.utcnow(),
user=user,
files=files or [],
assistants=assistants or [],
)
db_session.add(user_folder)
db_session.flush()
return user_folder
def get_user_folder_by_name(db_session: Session, name: str) -> UserFolder | None:
return db_session.query(UserFolder).filter(UserFolder.name == name).first()
def update_user_file_token_count__no_commit(
user_file_id_to_token_count: dict[int, int | None],
db_session: Session,
) -> None:
for user_file_id, token_count in user_file_id_to_token_count.items():
db_session.query(UserFile).filter(UserFile.id == user_file_id).update(
{UserFile.token_count: token_count}
)

View File

@@ -0,0 +1,79 @@
import datetime
from sqlalchemy import func
from sqlalchemy import select
from sqlalchemy.orm import Session
from onyx.db.models import UserFile
def fetch_chunk_counts_for_user_files(
user_file_ids: list[str],
db_session: Session,
) -> list[tuple[str, int]]:
"""
Return a list of (user_file_id, chunk_count) tuples.
If a user_file_id is not found in the database, it will be returned with a chunk_count of 0.
"""
stmt = select(UserFile.id, UserFile.chunk_count).where(
UserFile.id.in_(user_file_ids)
)
results = db_session.execute(stmt).all()
# Create a dictionary of user_file_id to chunk_count
chunk_counts = {str(row.id): row.chunk_count or 0 for row in results}
# Return a list of tuples, preserving `None` for documents not found or with
# an unknown chunk count. Callers should handle the `None` case and fall
# back to an existence check against the vector DB if necessary.
return [
(user_file_id, chunk_counts.get(user_file_id, 0))
for user_file_id in user_file_ids
]
def calculate_user_files_token_count(file_ids: list[str], db_session: Session) -> int:
"""Calculate total token count for specified files"""
total_tokens = 0
# Get tokens from individual files
if file_ids:
file_tokens = (
db_session.query(func.sum(UserFile.token_count))
.filter(UserFile.id.in_(file_ids))
.scalar()
or 0
)
total_tokens += file_tokens
return total_tokens
def fetch_user_project_ids_for_user_files(
user_file_ids: list[str],
db_session: Session,
) -> dict[str, list[int]]:
"""Fetch user project ids for specified user files"""
stmt = select(UserFile).where(UserFile.id.in_(user_file_ids))
results = db_session.execute(stmt).scalars().all()
return {
str(user_file.id): [project.id for project in user_file.projects]
for user_file in results
}
def update_last_accessed_at_for_user_files(
user_file_ids: list[str],
db_session: Session,
) -> None:
"""Update `last_accessed_at` to now (UTC) for the given user files."""
if not user_file_ids:
return
now = datetime.datetime.now(datetime.timezone.utc)
(
db_session.query(UserFile)
.filter(UserFile.id.in_(user_file_ids))
.update({UserFile.last_accessed_at: now}, synchronize_session=False)
)
db_session.commit()

View File

@@ -176,6 +176,11 @@ schema {{ schema_name }} {
rank: filter
attribute: fast-search
}
field user_project type array<int> {
indexing: summary | attribute
rank: filter
attribute: fast-search
}
}
# If using different tokenization settings, the fieldset has to be removed, and the field must

View File

@@ -51,8 +51,7 @@ from onyx.document_index.vespa_constants import SOURCE_TYPE
from onyx.document_index.vespa_constants import TENANT_ID
from onyx.document_index.vespa_constants import TITLE
from onyx.document_index.vespa_constants import TITLE_EMBEDDING
from onyx.document_index.vespa_constants import USER_FILE
from onyx.document_index.vespa_constants import USER_FOLDER
from onyx.document_index.vespa_constants import USER_PROJECT
from onyx.indexing.models import DocMetadataAwareIndexChunk
from onyx.utils.logger import setup_logger
@@ -208,8 +207,7 @@ def _index_vespa_chunk(
DOCUMENT_SETS: {document_set: 1 for document_set in chunk.document_sets},
# still called `image_file_name` in Vespa for backwards compatibility
IMAGE_FILE_NAME: chunk.image_file_id,
USER_FILE: chunk.user_file if chunk.user_file is not None else None,
USER_FOLDER: chunk.user_folder if chunk.user_folder is not None else None,
USER_PROJECT: chunk.user_project if chunk.user_project is not None else [],
BOOST: chunk.boost,
AGGREGATED_CHUNK_BOOST_FACTOR: chunk.aggregated_chunk_boost_factor,
}

View File

@@ -14,8 +14,7 @@ from onyx.document_index.vespa_constants import HIDDEN
from onyx.document_index.vespa_constants import METADATA_LIST
from onyx.document_index.vespa_constants import SOURCE_TYPE
from onyx.document_index.vespa_constants import TENANT_ID
from onyx.document_index.vespa_constants import USER_FILE
from onyx.document_index.vespa_constants import USER_FOLDER
from onyx.document_index.vespa_constants import USER_PROJECT
from onyx.kg.utils.formatting_utils import split_relationship_id
from onyx.utils.logger import setup_logger
from shared_configs.configs import MULTI_TENANT
@@ -138,6 +137,18 @@ def build_vespa_filters(
return f"!({DOC_UPDATED_AT} < {cutoff_secs}) and "
return f"({DOC_UPDATED_AT} >= {cutoff_secs}) and "
def _build_user_project_filter(
project_id: int | None,
) -> str:
if project_id is None:
return ""
try:
pid = int(project_id)
except Exception:
return ""
# Vespa YQL 'contains' expects a string literal; quote the integer
return f'({USER_PROJECT} contains "{pid}") and '
# Start building the filter string
filter_str = f"!({HIDDEN}=true) and " if not include_hidden else ""
@@ -172,10 +183,14 @@ def build_vespa_filters(
# Document sets
filter_str += _build_or_filters(DOCUMENT_SETS, filters.document_set)
# New: user_file_ids as integer filters
filter_str += _build_int_or_filters(USER_FILE, filters.user_file_ids)
# Convert UUIDs to strings for user_file_ids
user_file_ids_str = (
[str(uuid) for uuid in filters.user_file_ids] if filters.user_file_ids else None
)
filter_str += _build_or_filters(DOCUMENT_ID, user_file_ids_str)
filter_str += _build_int_or_filters(USER_FOLDER, filters.user_folder_ids)
# User project filter (array<int> attribute membership)
filter_str += _build_user_project_filter(filters.project_id)
# Time filter
filter_str += _build_time_filter(filters.time_cutoff)

View File

@@ -55,6 +55,7 @@ ACCESS_CONTROL_LIST = "access_control_list"
DOCUMENT_SETS = "document_sets"
USER_FILE = "user_file"
USER_FOLDER = "user_folder"
USER_PROJECT = "user_project"
LARGE_CHUNK_REFERENCE_IDS = "large_chunk_reference_ids"
METADATA = "metadata"
METADATA_LIST = "metadata_list"

View File

@@ -1,6 +1,7 @@
import base64
from enum import Enum
from typing import NotRequired
from uuid import UUID
from typing_extensions import TypedDict # noreorder
from pydantic import BaseModel
@@ -35,6 +36,7 @@ class FileDescriptor(TypedDict):
id: str
type: ChatFileType
name: NotRequired[str | None]
user_file_id: NotRequired[UUID | None]
class InMemoryChatFile(BaseModel):
@@ -56,4 +58,5 @@ class InMemoryChatFile(BaseModel):
"id": str(self.file_id),
"type": self.file_type,
"name": self.filename,
"user_file_id": str(self.file_id),
}

View File

@@ -10,7 +10,6 @@ from sqlalchemy.orm import Session
from onyx.configs.constants import FileOrigin
from onyx.db.models import ChatMessage
from onyx.db.models import UserFile
from onyx.db.models import UserFolder
from onyx.file_store.file_store import get_default_file_store
from onyx.file_store.models import ChatFileType
from onyx.file_store.models import FileDescriptor
@@ -22,15 +21,13 @@ from onyx.utils.threadpool_concurrency import run_functions_tuples_in_parallel
logger = setup_logger()
RECENT_FOLDER_ID = -1
def user_file_id_to_plaintext_file_name(user_file_id: int) -> str:
def user_file_id_to_plaintext_file_name(user_file_id: UUID) -> str:
"""Generate a consistent file name for storing plaintext content of a user file."""
return f"plaintext_{user_file_id}"
def store_user_file_plaintext(user_file_id: int, plaintext_content: str) -> bool:
def store_user_file_plaintext(user_file_id: UUID, plaintext_content: str) -> bool:
"""
Store plaintext content for a user file in the file store.
@@ -95,14 +92,7 @@ def load_all_chat_files(
return files
def load_user_folder(folder_id: int, db_session: Session) -> list[InMemoryChatFile]:
user_files = (
db_session.query(UserFile).filter(UserFile.folder_id == folder_id).all()
)
return [load_user_file(file.id, db_session) for file in user_files]
def load_user_file(file_id: int, db_session: Session) -> InMemoryChatFile:
def load_user_file(file_id: UUID, db_session: Session) -> InMemoryChatFile:
chat_file_type = ChatFileType.USER_KNOWLEDGE
status = "not_loaded"
@@ -159,17 +149,15 @@ def load_user_file(file_id: int, db_session: Session) -> InMemoryChatFile:
def load_in_memory_chat_files(
user_file_ids: list[int],
user_folder_ids: list[int],
user_file_ids: list[UUID],
db_session: Session,
) -> list[InMemoryChatFile]:
"""
Loads the actual content of user files specified by individual IDs and those
within specified folder IDs into memory.
within specified project IDs into memory.
Args:
user_file_ids: A list of specific UserFile IDs to load.
user_folder_ids: A list of UserFolder IDs. All UserFiles within these folders will be loaded.
db_session: The SQLAlchemy database session.
Returns:
@@ -182,32 +170,24 @@ def load_in_memory_chat_files(
run_functions_tuples_in_parallel(
# 1. Load files specified by individual IDs
[(load_user_file, (file_id, db_session)) for file_id in user_file_ids]
)
# 2. Load all files within specified folders
+ [
file
for folder_id in user_folder_ids
for file in load_user_folder(folder_id, db_session)
],
),
)
def get_user_files(
user_file_ids: list[int],
user_folder_ids: list[int],
user_file_ids: list[UUID],
db_session: Session,
) -> list[UserFile]:
"""
Fetches UserFile database records based on provided file and folder IDs.
Fetches UserFile database records based on provided file and project IDs.
Args:
user_file_ids: A list of specific UserFile IDs to fetch.
user_folder_ids: A list of UserFolder IDs. All UserFiles within these folders will be fetched.
db_session: The SQLAlchemy database session.
Returns:
A list containing UserFile SQLAlchemy model objects corresponding to the
specified file IDs and all files within the specified folder IDs.
specified file IDs and all files within the specified project IDs.
It does NOT return the actual file content.
"""
user_files: list[UserFile] = []
@@ -222,43 +202,28 @@ def get_user_files(
if user_file is not None:
user_files.append(user_file)
# 2. Fetch UserFile records for all files within specified folder IDs
for user_folder_id in user_folder_ids:
# Query the database for all UserFiles belonging to the current folder ID
# and extend the list with the results
user_files.extend(
db_session.query(UserFile)
.filter(UserFile.folder_id == user_folder_id)
.all()
)
# 3. Return the combined list of UserFile database objects
return user_files
def get_user_files_as_user(
user_file_ids: list[int],
user_folder_ids: list[int],
user_file_ids: list[UUID],
user_id: UUID | None,
db_session: Session,
) -> list[UserFile]:
"""
Fetches all UserFile database records for a given user.
"""
user_files = get_user_files(user_file_ids, user_folder_ids, db_session)
user_files = get_user_files(user_file_ids, db_session)
current_user_files = []
for user_file in user_files:
# Note: if user_id is None, then all files should be None as well
# (since auth must be disabled in this case)
if user_file.folder_id == RECENT_FOLDER_ID:
if user_file.user_id == user_id:
current_user_files.append(user_file)
else:
if user_file.user_id != user_id:
raise ValueError(
f"User {user_id} does not have access to file {user_file.id}"
)
current_user_files.append(user_file)
if user_file.user_id != user_id:
raise ValueError(
f"User {user_id} does not have access to file {user_file.id}"
)
current_user_files.append(user_file)
return current_user_files
@@ -332,39 +297,3 @@ def save_files(urls: list[str], base64_files: list[str]) -> list[str]:
def build_frontend_file_url(file_id: str) -> str:
return f"/api/chat/file/{file_id}"
def load_all_persona_files_for_chat(
persona_id: int, db_session: Session
) -> tuple[list[InMemoryChatFile], list[int]]:
from onyx.db.models import Persona
from sqlalchemy.orm import joinedload
persona = (
db_session.query(Persona)
.filter(Persona.id == persona_id)
.options(
joinedload(Persona.user_files),
joinedload(Persona.user_folders).joinedload(UserFolder.files),
)
.one()
)
persona_file_calls = [
(load_user_file, (user_file.id, db_session)) for user_file in persona.user_files
]
persona_loaded_files = run_functions_tuples_in_parallel(persona_file_calls)
persona_folder_files = []
persona_folder_file_ids = []
for user_folder in persona.user_folders:
folder_files = load_user_folder(user_folder.id, db_session)
persona_folder_files.extend(folder_files)
persona_folder_file_ids.extend([file.id for file in user_folder.files])
persona_files = list(persona_loaded_files) + persona_folder_files
persona_file_ids = [
file.id for file in persona.user_files
] + persona_folder_file_ids
return persona_files, persona_file_ids

View File

@@ -0,0 +1,210 @@
import contextlib
from collections.abc import Generator
from sqlalchemy.engine.util import TransactionalContext
from sqlalchemy.orm import Session
from onyx.access.access import get_access_for_documents
from onyx.access.models import DocumentAccess
from onyx.configs.constants import DEFAULT_BOOST
from onyx.connectors.models import Document
from onyx.connectors.models import IndexAttemptMetadata
from onyx.db.chunk import update_chunk_boost_components__no_commit
from onyx.db.document import fetch_chunk_counts_for_documents
from onyx.db.document import mark_document_as_indexed_for_cc_pair__no_commit
from onyx.db.document import prepare_to_modify_documents
from onyx.db.document import update_docs_chunk_count__no_commit
from onyx.db.document import update_docs_last_modified__no_commit
from onyx.db.document import update_docs_updated_at__no_commit
from onyx.db.document_set import fetch_document_sets_for_documents
from onyx.indexing.indexing_pipeline import DocumentBatchPrepareContext
from onyx.indexing.indexing_pipeline import index_doc_batch_prepare
from onyx.indexing.models import BuildMetadataAwareChunksResult
from onyx.indexing.models import DocMetadataAwareIndexChunk
from onyx.indexing.models import IndexChunk
from onyx.indexing.models import UpdatableChunkData
from onyx.utils.logger import setup_logger
logger = setup_logger()
class DocumentIndexingBatchAdapter:
"""Default adapter: handles DB prep, locking, metadata enrichment, and finalize.
Keeps orchestration logic in the pipeline and side-effects in the adapter.
"""
def __init__(
self,
db_session: Session,
connector_id: int,
credential_id: int,
tenant_id: str,
index_attempt_metadata: IndexAttemptMetadata,
):
self.db_session = db_session
self.connector_id = connector_id
self.credential_id = credential_id
self.tenant_id = tenant_id
self.index_attempt_metadata = index_attempt_metadata
def prepare(
self, documents: list[Document], ignore_time_skip: bool
) -> DocumentBatchPrepareContext | None:
"""Upsert docs, map CC pairs, return context or mark as indexed if no-op."""
context = index_doc_batch_prepare(
documents=documents,
index_attempt_metadata=self.index_attempt_metadata,
db_session=self.db_session,
ignore_time_skip=ignore_time_skip,
)
if not context:
# even though we didn't actually index anything, we should still
# mark them as "completed" for the CC Pair in order to make the
# counts match
mark_document_as_indexed_for_cc_pair__no_commit(
connector_id=self.index_attempt_metadata.connector_id,
credential_id=self.index_attempt_metadata.credential_id,
document_ids=[doc.id for doc in documents],
db_session=self.db_session,
)
self.db_session.commit()
return context
@contextlib.contextmanager
def lock_context(
self, documents: list[Document]
) -> Generator[TransactionalContext, None, None]:
"""Acquire transaction/row locks on docs for the critical section."""
with prepare_to_modify_documents(
db_session=self.db_session, document_ids=[doc.id for doc in documents]
) as transaction:
yield transaction
def build_metadata_aware_chunks(
self,
chunks_with_embeddings: list[IndexChunk],
chunk_content_scores: list[float],
tenant_id: str,
context: DocumentBatchPrepareContext,
) -> BuildMetadataAwareChunksResult:
"""Enrich chunks with access, document sets, boosts and token counts."""
no_access = DocumentAccess.build(
user_emails=[],
user_groups=[],
external_user_emails=[],
external_user_group_ids=[],
is_public=False,
)
updatable_ids = [doc.id for doc in context.updatable_docs]
doc_id_to_access_info = get_access_for_documents(
document_ids=updatable_ids, db_session=self.db_session
)
doc_id_to_document_set = {
document_id: document_sets
for document_id, document_sets in fetch_document_sets_for_documents(
document_ids=updatable_ids, db_session=self.db_session
)
}
doc_id_to_previous_chunk_cnt: dict[str, int] = {
document_id: chunk_count
for document_id, chunk_count in fetch_chunk_counts_for_documents(
document_ids=updatable_ids,
db_session=self.db_session,
)
}
doc_id_to_new_chunk_cnt: dict[str, int] = {
document_id: len(
[
chunk
for chunk in chunks_with_embeddings
if chunk.source_document.id == document_id
]
)
for document_id in updatable_ids
}
access_aware_chunks = [
DocMetadataAwareIndexChunk.from_index_chunk(
index_chunk=chunk,
access=doc_id_to_access_info.get(chunk.source_document.id, no_access),
document_sets=set(
doc_id_to_document_set.get(chunk.source_document.id, [])
),
boost=(
context.id_to_boost_map[chunk.source_document.id]
if chunk.source_document.id in context.id_to_boost_map
else DEFAULT_BOOST
),
tenant_id=tenant_id,
aggregated_chunk_boost_factor=chunk_content_scores[chunk_num],
)
for chunk_num, chunk in enumerate(chunks_with_embeddings)
]
return BuildMetadataAwareChunksResult(
chunks=access_aware_chunks,
doc_id_to_previous_chunk_cnt=doc_id_to_previous_chunk_cnt,
doc_id_to_new_chunk_cnt=doc_id_to_new_chunk_cnt,
user_file_id_to_raw_text={},
user_file_id_to_token_count={},
)
def post_index(
self,
context: DocumentBatchPrepareContext,
updatable_chunk_data: list[UpdatableChunkData],
filtered_documents: list[Document],
result: BuildMetadataAwareChunksResult,
) -> None:
"""Finalize DB updates, store plaintext, and mark docs as indexed."""
updatable_ids = [doc.id for doc in context.updatable_docs]
last_modified_ids = []
ids_to_new_updated_at = {}
for doc in context.updatable_docs:
last_modified_ids.append(doc.id)
# doc_updated_at is the source's idea (on the other end of the connector)
# of when the doc was last modified
if doc.doc_updated_at is None:
continue
ids_to_new_updated_at[doc.id] = doc.doc_updated_at
update_docs_updated_at__no_commit(
ids_to_new_updated_at=ids_to_new_updated_at, db_session=self.db_session
)
update_docs_last_modified__no_commit(
document_ids=last_modified_ids, db_session=self.db_session
)
update_docs_chunk_count__no_commit(
document_ids=updatable_ids,
doc_id_to_chunk_count=result.doc_id_to_new_chunk_cnt,
db_session=self.db_session,
)
# these documents can now be counted as part of the CC Pairs
# document count, so we need to mark them as indexed
# NOTE: even documents we skipped since they were already up
# to date should be counted here in order to maintain parity
# between CC Pair and index attempt counts
mark_document_as_indexed_for_cc_pair__no_commit(
connector_id=self.index_attempt_metadata.connector_id,
credential_id=self.index_attempt_metadata.credential_id,
document_ids=[doc.id for doc in filtered_documents],
db_session=self.db_session,
)
# save the chunk boost components to postgres
update_chunk_boost_components__no_commit(
chunk_data=updatable_chunk_data, db_session=self.db_session
)
self.db_session.commit()

View File

@@ -0,0 +1,210 @@
import contextlib
import time
from collections.abc import Generator
from sqlalchemy import select
from sqlalchemy.exc import OperationalError
from sqlalchemy.orm import Session
from sqlalchemy.orm.session import TransactionalContext
from onyx.access.access import get_access_for_user_files
from onyx.access.models import DocumentAccess
from onyx.configs.constants import DEFAULT_BOOST
from onyx.connectors.models import Document
from onyx.db.enums import UserFileStatus
from onyx.db.models import UserFile
from onyx.db.user_file import fetch_chunk_counts_for_user_files
from onyx.db.user_file import fetch_user_project_ids_for_user_files
from onyx.indexing.indexing_pipeline import DocumentBatchPrepareContext
from onyx.indexing.models import BuildMetadataAwareChunksResult
from onyx.indexing.models import DocMetadataAwareIndexChunk
from onyx.indexing.models import IndexChunk
from onyx.indexing.models import UpdatableChunkData
from onyx.llm.factory import get_default_llms
from onyx.natural_language_processing.utils import get_tokenizer
from onyx.utils.logger import setup_logger
logger = setup_logger()
_NUM_LOCK_ATTEMPTS = 3
retry_delay = 0.5
def _acquire_user_file_locks(db_session: Session, user_file_ids: list[int]) -> bool:
"""Acquire locks for the specified user files."""
stmt = (
select(UserFile.id)
.where(UserFile.id.in_(user_file_ids))
.with_for_update(nowait=True)
)
# will raise exception if any of the documents are already locked
documents = db_session.scalars(stmt).all()
# make sure we found every document
if len(documents) != len(set(user_file_ids)):
logger.warning("Didn't find row for all specified user file IDs. Aborting.")
return False
return True
class UserFileIndexingAdapter:
def __init__(self, tenant_id: str, db_session: Session):
self.tenant_id = tenant_id
self.db_session = db_session
def prepare(
self, documents: list[Document], ignore_time_skip: bool
) -> DocumentBatchPrepareContext:
return DocumentBatchPrepareContext(
updatable_docs=documents, id_to_boost_map={} # TODO(subash): add boost map
)
@contextlib.contextmanager
def lock_context(
self, documents: list[Document]
) -> Generator[TransactionalContext, None, None]:
self.db_session.commit() # ensure that we're not in a transaction
lock_acquired = False
for i in range(_NUM_LOCK_ATTEMPTS):
try:
with self.db_session.begin() as transaction:
lock_acquired = _acquire_user_file_locks(
db_session=self.db_session,
user_file_ids=[doc.id for doc in documents],
)
if lock_acquired:
yield transaction
break
except OperationalError as e:
logger.warning(
f"Failed to acquire locks for user files on attempt {i}, retrying. Error: {e}"
)
time.sleep(retry_delay)
if not lock_acquired:
raise RuntimeError(
f"Failed to acquire locks after {_NUM_LOCK_ATTEMPTS} attempts "
f"for user files: {[doc.id for doc in documents]}"
)
def build_metadata_aware_chunks(
self,
chunks_with_embeddings: list[IndexChunk],
chunk_content_scores: list[float],
tenant_id: str,
context: DocumentBatchPrepareContext,
) -> BuildMetadataAwareChunksResult:
no_access = DocumentAccess.build(
user_emails=[],
user_groups=[],
external_user_emails=[],
external_user_group_ids=[],
is_public=False,
)
updatable_ids = [doc.id for doc in context.updatable_docs]
user_file_id_to_project_ids = fetch_user_project_ids_for_user_files(
user_file_ids=updatable_ids,
db_session=self.db_session,
)
user_file_id_to_access: dict[str, DocumentAccess] = get_access_for_user_files(
user_file_ids=updatable_ids,
db_session=self.db_session,
)
user_file_id_to_previous_chunk_cnt: dict[str, int] = {
user_file_id: chunk_count
for user_file_id, chunk_count in fetch_chunk_counts_for_user_files(
user_file_ids=updatable_ids,
db_session=self.db_session,
)
}
user_file_id_to_new_chunk_cnt: dict[str, int] = {
user_file_id: len(
[
chunk
for chunk in chunks_with_embeddings
if chunk.source_document.id == user_file_id
]
)
for user_file_id in updatable_ids
}
# Initialize tokenizer used for token count calculation
try:
llm, _ = get_default_llms()
llm_tokenizer = get_tokenizer(
model_name=llm.config.model_name,
provider_type=llm.config.model_provider,
)
except Exception as e:
logger.error(f"Error getting tokenizer: {e}")
llm_tokenizer = None
user_file_id_to_raw_text: dict[str, str] = {}
user_file_id_to_token_count: dict[str, int | None] = {}
for user_file_id in updatable_ids:
user_file_chunks = [
chunk
for chunk in chunks_with_embeddings
if chunk.source_document.id == user_file_id
]
if user_file_chunks:
combined_content = " ".join(
[chunk.content for chunk in user_file_chunks]
)
user_file_id_to_raw_text[str(user_file_id)] = combined_content
token_count = (
len(llm_tokenizer.encode(combined_content)) if llm_tokenizer else 0
)
user_file_id_to_token_count[str(user_file_id)] = token_count
else:
user_file_id_to_raw_text[str(user_file_id)] = ""
user_file_id_to_token_count[str(user_file_id)] = None
access_aware_chunks = [
DocMetadataAwareIndexChunk.from_index_chunk(
index_chunk=chunk,
access=user_file_id_to_access.get(chunk.source_document.id, no_access),
document_sets=set(),
user_project=user_file_id_to_project_ids.get(
chunk.source_document.id, []
),
# we are going to index userfiles only once, so we just set the boost to the default
boost=DEFAULT_BOOST,
tenant_id=tenant_id,
aggregated_chunk_boost_factor=chunk_content_scores[chunk_num],
)
for chunk_num, chunk in enumerate(chunks_with_embeddings)
]
return BuildMetadataAwareChunksResult(
chunks=access_aware_chunks,
doc_id_to_previous_chunk_cnt=user_file_id_to_previous_chunk_cnt,
doc_id_to_new_chunk_cnt=user_file_id_to_new_chunk_cnt,
user_file_id_to_raw_text=user_file_id_to_raw_text,
user_file_id_to_token_count=user_file_id_to_token_count,
)
def post_index(
self,
context: DocumentBatchPrepareContext,
updatable_chunk_data: list[UpdatableChunkData],
filtered_documents: list[Document],
result: BuildMetadataAwareChunksResult,
) -> None:
user_file_ids = [doc.id for doc in context.updatable_docs]
user_files = (
self.db_session.query(UserFile).filter(UserFile.id.in_(user_file_ids)).all()
)
for user_file in user_files:
user_file.status = UserFileStatus.COMPLETED
user_file.chunk_count = result.doc_id_to_new_chunk_cnt[str(user_file.id)]
user_file.token_count = result.user_file_id_to_token_count[
str(user_file.id)
]
self.db_session.commit()

View File

@@ -6,8 +6,6 @@ from pydantic import BaseModel
from pydantic import ConfigDict
from sqlalchemy.orm import Session
from onyx.access.access import get_access_for_documents
from onyx.access.models import DocumentAccess
from onyx.configs.app_configs import DEFAULT_CONTEXTUAL_RAG_LLM_NAME
from onyx.configs.app_configs import DEFAULT_CONTEXTUAL_RAG_LLM_PROVIDER
from onyx.configs.app_configs import ENABLE_CONTEXTUAL_RAG
@@ -15,7 +13,6 @@ from onyx.configs.app_configs import MAX_DOCUMENT_CHARS
from onyx.configs.app_configs import MAX_TOKENS_FOR_FULL_INCLUSION
from onyx.configs.app_configs import USE_CHUNK_SUMMARY
from onyx.configs.app_configs import USE_DOCUMENT_SUMMARY
from onyx.configs.constants import DEFAULT_BOOST
from onyx.configs.llm_configs import get_image_extraction_and_analysis_enabled
from onyx.configs.model_configs import USE_INFORMATION_CONTENT_CLASSIFICATION
from onyx.connectors.cross_connector_utils.miscellaneous_utils import (
@@ -30,24 +27,13 @@ from onyx.connectors.models import IndexAttemptMetadata
from onyx.connectors.models import IndexingDocument
from onyx.connectors.models import Section
from onyx.connectors.models import TextSection
from onyx.db.chunk import update_chunk_boost_components__no_commit
from onyx.db.document import fetch_chunk_counts_for_documents
from onyx.db.document import get_documents_by_ids
from onyx.db.document import mark_document_as_indexed_for_cc_pair__no_commit
from onyx.db.document import prepare_to_modify_documents
from onyx.db.document import update_docs_chunk_count__no_commit
from onyx.db.document import update_docs_last_modified__no_commit
from onyx.db.document import update_docs_updated_at__no_commit
from onyx.db.document import upsert_document_by_connector_credential_pair
from onyx.db.document import upsert_documents
from onyx.db.document_set import fetch_document_sets_for_documents
from onyx.db.models import Document as DBDocument
from onyx.db.models import IndexModelStatus
from onyx.db.search_settings import get_active_search_settings
from onyx.db.tag import upsert_document_tags
from onyx.db.user_documents import fetch_user_files_for_documents
from onyx.db.user_documents import fetch_user_folders_for_documents
from onyx.db.user_documents import update_user_file_token_count__no_commit
from onyx.document_index.document_index_utils import (
get_multipass_config,
)
@@ -56,18 +42,16 @@ from onyx.document_index.interfaces import DocumentMetadata
from onyx.document_index.interfaces import IndexBatchParams
from onyx.file_processing.image_summarization import summarize_image_with_error_handling
from onyx.file_store.file_store import get_default_file_store
from onyx.file_store.utils import store_user_file_plaintext
from onyx.indexing.chunker import Chunker
from onyx.indexing.embedder import embed_chunks_with_failure_handling
from onyx.indexing.embedder import IndexingEmbedder
from onyx.indexing.models import DocAwareChunk
from onyx.indexing.models import DocMetadataAwareIndexChunk
from onyx.indexing.models import IndexChunk
from onyx.indexing.models import IndexingBatchAdapter
from onyx.indexing.models import UpdatableChunkData
from onyx.indexing.vector_db_insertion import write_chunks_to_vector_db_with_backoff
from onyx.llm.chat_llm import LLMRateLimitError
from onyx.llm.factory import get_default_llm_with_vision
from onyx.llm.factory import get_default_llms
from onyx.llm.factory import get_llm_for_contextual_rag
from onyx.llm.interfaces import LLM
from onyx.llm.utils import MAX_CONTEXT_TOKENS
@@ -94,7 +78,7 @@ logger = setup_logger()
class DocumentBatchPrepareContext(BaseModel):
updatable_docs: list[Document]
id_to_db_doc_map: dict[str, DBDocument]
id_to_boost_map: dict[str, int]
indexable_docs: list[IndexingDocument] = []
model_config = ConfigDict(arbitrary_types_allowed=True)
@@ -255,9 +239,9 @@ def index_doc_batch_with_handler(
information_content_classification_model: InformationContentClassificationModel,
document_index: DocumentIndex,
document_batch: list[Document],
index_attempt_metadata: IndexAttemptMetadata,
db_session: Session,
request_id: str | None,
tenant_id: str,
adapter: IndexingBatchAdapter,
ignore_time_skip: bool = False,
enable_contextual_rag: bool = False,
llm: LLM | None = None,
@@ -269,10 +253,10 @@ def index_doc_batch_with_handler(
information_content_classification_model=information_content_classification_model,
document_index=document_index,
document_batch=document_batch,
index_attempt_metadata=index_attempt_metadata,
db_session=db_session,
ignore_time_skip=ignore_time_skip,
request_id=request_id,
tenant_id=tenant_id,
adapter=adapter,
ignore_time_skip=ignore_time_skip,
enable_contextual_rag=enable_contextual_rag,
llm=llm,
)
@@ -364,9 +348,9 @@ def index_doc_batch_prepare(
if not updatable_docs:
return None
id_to_db_doc_map = {doc.id: doc for doc in db_docs}
id_to_boost_map = {doc.id: doc.boost for doc in db_docs}
return DocumentBatchPrepareContext(
updatable_docs=updatable_docs, id_to_db_doc_map=id_to_db_doc_map
updatable_docs=updatable_docs, id_to_boost_map=id_to_boost_map
)
@@ -681,14 +665,15 @@ def index_doc_batch(
embedder: IndexingEmbedder,
information_content_classification_model: InformationContentClassificationModel,
document_index: DocumentIndex,
index_attempt_metadata: IndexAttemptMetadata,
db_session: Session,
request_id: str | None,
tenant_id: str,
adapter: IndexingBatchAdapter,
enable_contextual_rag: bool = False,
llm: LLM | None = None,
ignore_time_skip: bool = False,
filter_fnc: Callable[[list[Document]], list[Document]] = filter_documents,
) -> IndexingPipelineResult:
"""End-to-end indexing for a pre-batched set of documents."""
"""Takes different pieces of the indexing pipeline and applies it to a batch of documents
Note that the documents should already be batched at this point so that it does not inflate the
memory requirements
@@ -696,33 +681,9 @@ def index_doc_batch(
Returns a tuple where the first element is the number of new docs and the
second element is the number of chunks."""
no_access = DocumentAccess.build(
user_emails=[],
user_groups=[],
external_user_emails=[],
external_user_group_ids=[],
is_public=False,
)
filtered_documents = filter_fnc(document_batch)
ctx = index_doc_batch_prepare(
documents=filtered_documents,
index_attempt_metadata=index_attempt_metadata,
ignore_time_skip=ignore_time_skip,
db_session=db_session,
)
if not ctx:
# even though we didn't actually index anything, we should still
# mark them as "completed" for the CC Pair in order to make the
# counts match
mark_document_as_indexed_for_cc_pair__no_commit(
connector_id=index_attempt_metadata.connector_id,
credential_id=index_attempt_metadata.credential_id,
document_ids=[doc.id for doc in filtered_documents],
db_session=db_session,
)
db_session.commit()
context = adapter.prepare(filtered_documents, ignore_time_skip)
if not context:
return IndexingPipelineResult(
new_docs=0,
total_docs=len(filtered_documents),
@@ -732,21 +693,21 @@ def index_doc_batch(
# Convert documents to IndexingDocument objects with processed section
# logger.debug("Processing image sections")
ctx.indexable_docs = process_image_sections(ctx.updatable_docs)
context.indexable_docs = process_image_sections(context.updatable_docs)
doc_descriptors = [
{
"doc_id": doc.id,
"doc_length": doc.get_total_char_length(),
}
for doc in ctx.indexable_docs
for doc in context.indexable_docs
]
logger.debug(f"Starting indexing process for documents: {doc_descriptors}")
logger.debug("Starting chunking")
# NOTE: no special handling for failures here, since the chunker is not
# a common source of failure for the indexing pipeline
chunks: list[DocAwareChunk] = chunker.chunk(ctx.indexable_docs)
chunks: list[DocAwareChunk] = chunker.chunk(context.indexable_docs)
llm_tokenizer: BaseTokenizer | None = None
# contextual RAG
@@ -772,7 +733,7 @@ def index_doc_batch(
chunks=chunks,
embedder=embedder,
tenant_id=tenant_id,
request_id=index_attempt_metadata.request_id,
request_id=request_id,
)
if chunks
else ([], [])
@@ -786,7 +747,7 @@ def index_doc_batch(
else [1.0] * len(chunks_with_embeddings)
)
updatable_ids = [doc.id for doc in ctx.updatable_docs]
updatable_ids = [doc.id for doc in context.updatable_docs]
updatable_chunk_data = [
UpdatableChunkData(
chunk_id=chunk.chunk_id,
@@ -799,113 +760,20 @@ def index_doc_batch(
# Acquires a lock on the documents so that no other process can modify them
# NOTE: don't need to acquire till here, since this is when the actual race condition
# with Vespa can occur.
with prepare_to_modify_documents(db_session=db_session, document_ids=updatable_ids):
doc_id_to_access_info = get_access_for_documents(
document_ids=updatable_ids, db_session=db_session
)
doc_id_to_document_set = {
document_id: document_sets
for document_id, document_sets in fetch_document_sets_for_documents(
document_ids=updatable_ids, db_session=db_session
)
}
doc_id_to_user_file_id: dict[str, int | None] = fetch_user_files_for_documents(
document_ids=updatable_ids, db_session=db_session
)
doc_id_to_user_folder_id: dict[str, int | None] = (
fetch_user_folders_for_documents(
document_ids=updatable_ids, db_session=db_session
)
)
doc_id_to_previous_chunk_cnt: dict[str, int] = {
document_id: chunk_count
for document_id, chunk_count in fetch_chunk_counts_for_documents(
document_ids=updatable_ids,
db_session=db_session,
)
}
doc_id_to_new_chunk_cnt: dict[str, int] = {
document_id: len(
[
chunk
for chunk in chunks_with_embeddings
if chunk.source_document.id == document_id
]
)
for document_id in updatable_ids
}
try:
llm, _ = get_default_llms()
llm_tokenizer = get_tokenizer(
model_name=llm.config.model_name,
provider_type=llm.config.model_provider,
)
except Exception as e:
logger.error(f"Error getting tokenizer: {e}")
llm_tokenizer = None
# Calculate token counts for each document by combining all its chunks' content
user_file_id_to_token_count: dict[int, int | None] = {}
user_file_id_to_raw_text: dict[int, str] = {}
for document_id in updatable_ids:
# Only calculate token counts for documents that have a user file ID
user_file_id = doc_id_to_user_file_id.get(document_id)
if user_file_id is None:
continue
document_chunks = [
chunk
for chunk in chunks_with_embeddings
if chunk.source_document.id == document_id
]
if document_chunks:
combined_content = " ".join(
[chunk.content for chunk in document_chunks]
)
token_count = (
len(llm_tokenizer.encode(combined_content)) if llm_tokenizer else 0
)
user_file_id_to_token_count[user_file_id] = token_count
user_file_id_to_raw_text[user_file_id] = combined_content
else:
user_file_id_to_token_count[user_file_id] = None
with adapter.lock_context(context.updatable_docs):
# we're concerned about race conditions where multiple simultaneous indexings might result
# in one set of metadata overwriting another one in vespa.
# we still write data here for the immediate and most likely correct sync, but
# to resolve this, an update of the last modified field at the end of this loop
# always triggers a final metadata sync via the celery queue
access_aware_chunks = [
DocMetadataAwareIndexChunk.from_index_chunk(
index_chunk=chunk,
access=doc_id_to_access_info.get(chunk.source_document.id, no_access),
document_sets=set(
doc_id_to_document_set.get(chunk.source_document.id, [])
),
user_file=doc_id_to_user_file_id.get(chunk.source_document.id, None),
user_folder=doc_id_to_user_folder_id.get(
chunk.source_document.id, None
),
boost=(
ctx.id_to_db_doc_map[chunk.source_document.id].boost
if chunk.source_document.id in ctx.id_to_db_doc_map
else DEFAULT_BOOST
),
tenant_id=tenant_id,
aggregated_chunk_boost_factor=chunk_content_scores[chunk_num],
)
for chunk_num, chunk in enumerate(chunks_with_embeddings)
]
result = adapter.build_metadata_aware_chunks(
chunks_with_embeddings=chunks_with_embeddings,
chunk_content_scores=chunk_content_scores,
tenant_id=tenant_id,
context=context,
)
short_descriptor_list = [
chunk.to_short_descriptor() for chunk in access_aware_chunks
]
short_descriptor_list = [chunk.to_short_descriptor() for chunk in result.chunks]
short_descriptor_log = str(short_descriptor_list)[:1024]
logger.debug(f"Indexing the following chunks: {short_descriptor_log}")
@@ -917,10 +785,10 @@ def index_doc_batch(
vector_db_write_failures,
) = write_chunks_to_vector_db_with_backoff(
document_index=document_index,
chunks=access_aware_chunks,
chunks=result.chunks,
index_batch_params=IndexBatchParams(
doc_id_to_previous_chunk_cnt=doc_id_to_previous_chunk_cnt,
doc_id_to_new_chunk_cnt=doc_id_to_new_chunk_cnt,
doc_id_to_previous_chunk_cnt=result.doc_id_to_previous_chunk_cnt,
doc_id_to_new_chunk_cnt=result.doc_id_to_new_chunk_cnt,
tenant_id=tenant_id,
large_chunks_enabled=chunker.enable_large_chunks,
),
@@ -951,85 +819,380 @@ def index_doc_batch(
"This should never happen."
)
last_modified_ids = []
ids_to_new_updated_at = {}
for doc in ctx.updatable_docs:
last_modified_ids.append(doc.id)
# doc_updated_at is the source's idea (on the other end of the connector)
# of when the doc was last modified
if doc.doc_updated_at is None:
continue
ids_to_new_updated_at[doc.id] = doc.doc_updated_at
# Store the plaintext in the file store for faster retrieval
# NOTE: this creates its own session to avoid committing the overall
# transaction.
for user_file_id, raw_text in user_file_id_to_raw_text.items():
store_user_file_plaintext(
user_file_id=user_file_id,
plaintext_content=raw_text,
)
update_docs_updated_at__no_commit(
ids_to_new_updated_at=ids_to_new_updated_at, db_session=db_session
adapter.post_index(
context=context,
updatable_chunk_data=updatable_chunk_data,
filtered_documents=filtered_documents,
result=result,
)
update_docs_last_modified__no_commit(
document_ids=last_modified_ids, db_session=db_session
)
update_docs_chunk_count__no_commit(
document_ids=updatable_ids,
doc_id_to_chunk_count=doc_id_to_new_chunk_cnt,
db_session=db_session,
)
update_user_file_token_count__no_commit(
user_file_id_to_token_count=user_file_id_to_token_count,
db_session=db_session,
)
# these documents can now be counted as part of the CC Pairs
# document count, so we need to mark them as indexed
# NOTE: even documents we skipped since they were already up
# to date should be counted here in order to maintain parity
# between CC Pair and index attempt counts
mark_document_as_indexed_for_cc_pair__no_commit(
connector_id=index_attempt_metadata.connector_id,
credential_id=index_attempt_metadata.credential_id,
document_ids=[doc.id for doc in filtered_documents],
db_session=db_session,
)
# save the chunk boost components to postgres
update_chunk_boost_components__no_commit(
chunk_data=updatable_chunk_data, db_session=db_session
)
# Pause user file ccpairs
# TODO: investigate why nothing is done here?
db_session.commit()
result = IndexingPipelineResult(
return IndexingPipelineResult(
new_docs=len([r for r in insertion_records if not r.already_existed]),
total_docs=len(filtered_documents),
total_chunks=len(access_aware_chunks),
total_chunks=len(chunks_with_embeddings),
failures=vector_db_write_failures + embedding_failures,
)
return result
# @log_function_time(debug_only=True)
# def index_doc_batch(
# *,
# document_batch: list[Document],
# chunker: Chunker,
# embedder: IndexingEmbedder,
# information_content_classification_model: InformationContentClassificationModel,
# document_index: DocumentIndex,
# index_attempt_metadata: IndexAttemptMetadata,
# db_session: Session,
# tenant_id: str,
# enable_contextual_rag: bool = False,
# llm: LLM | None = None,
# ignore_time_skip: bool = False,
# filter_fnc: Callable[[list[Document]], list[Document]] = filter_documents,
# ) -> IndexingPipelineResult:
# """Takes different pieces of the indexing pipeline and applies it to a batch of documents
# Note that the documents should already be batched at this point so that it does not inflate the
# memory requirements
# Returns a tuple where the first element is the number of new docs and the
# second element is the number of chunks."""
# no_access = DocumentAccess.build(
# user_emails=[],
# user_groups=[],
# external_user_emails=[],
# external_user_group_ids=[],
# is_public=False,
# )
# filtered_documents = filter_fnc(document_batch)
# ctx = index_doc_batch_prepare(
# documents=filtered_documents,
# index_attempt_metadata=index_attempt_metadata,
# ignore_time_skip=ignore_time_skip,
# db_session=db_session,
# )
# if not ctx:
# # even though we didn't actually index anything, we should still
# # mark them as "completed" for the CC Pair in order to make the
# # counts match
# mark_document_as_indexed_for_cc_pair__no_commit(
# connector_id=index_attempt_metadata.connector_id,
# credential_id=index_attempt_metadata.credential_id,
# document_ids=[doc.id for doc in filtered_documents],
# db_session=db_session,
# )
# db_session.commit()
# return IndexingPipelineResult(
# new_docs=0,
# total_docs=len(filtered_documents),
# total_chunks=0,
# failures=[],
# )
# # Convert documents to IndexingDocument objects with processed section
# # logger.debug("Processing image sections")
# ctx.indexable_docs = process_image_sections(ctx.updatable_docs)
# doc_descriptors = [
# {
# "doc_id": doc.id,
# "doc_length": doc.get_total_char_length(),
# }
# for doc in ctx.indexable_docs
# ]
# logger.debug(f"Starting indexing process for documents: {doc_descriptors}")
# logger.debug("Starting chunking")
# # NOTE: no special handling for failures here, since the chunker is not
# # a common source of failure for the indexing pipeline
# chunks: list[DocAwareChunk] = chunker.chunk(ctx.indexable_docs)
# llm_tokenizer: BaseTokenizer | None = None
# # contextual RAG
# if enable_contextual_rag:
# assert llm is not None, "must provide an LLM for contextual RAG"
# llm_tokenizer = get_tokenizer(
# model_name=llm.config.model_name,
# provider_type=llm.config.model_provider,
# )
# # Because the chunker's tokens are different from the LLM's tokens,
# # We add a fudge factor to ensure we truncate prompts to the LLM's token limit
# chunks = add_contextual_summaries(
# chunks=chunks,
# llm=llm,
# tokenizer=llm_tokenizer,
# chunk_token_limit=chunker.chunk_token_limit * 2,
# )
# logger.debug("Starting embedding")
# chunks_with_embeddings, embedding_failures = (
# embed_chunks_with_failure_handling(
# chunks=chunks,
# embedder=embedder,
# tenant_id=tenant_id,
# request_id=index_attempt_metadata.request_id,
# )
# if chunks
# else ([], [])
# )
# chunk_content_scores = (
# _get_aggregated_chunk_boost_factor(
# chunks_with_embeddings, information_content_classification_model
# )
# if USE_INFORMATION_CONTENT_CLASSIFICATION
# else [1.0] * len(chunks_with_embeddings)
# )
# updatable_ids = [doc.id for doc in ctx.updatable_docs]
# updatable_chunk_data = [
# UpdatableChunkData(
# chunk_id=chunk.chunk_id,
# document_id=chunk.source_document.id,
# boost_score=score,
# )
# for chunk, score in zip(chunks_with_embeddings, chunk_content_scores)
# ]
# # Acquires a lock on the documents so that no other process can modify them
# # NOTE: don't need to acquire till here, since this is when the actual race condition
# # with Vespa can occur.
# with prepare_to_modify_documents(db_session=db_session, document_ids=updatable_ids):
# doc_id_to_access_info = get_access_for_documents(
# document_ids=updatable_ids, db_session=db_session
# )
# doc_id_to_document_set = {
# document_id: document_sets
# for document_id, document_sets in fetch_document_sets_for_documents(
# document_ids=updatable_ids, db_session=db_session
# )
# }
# doc_id_to_user_file_id: dict[str, int | None] = fetch_user_files_for_documents(
# document_ids=updatable_ids, db_session=db_session
# )
# doc_id_to_user_folder_id: dict[str, int | None] = (
# fetch_user_folders_for_documents(
# document_ids=updatable_ids, db_session=db_session
# )
# )
# doc_id_to_previous_chunk_cnt: dict[str, int] = {
# document_id: chunk_count
# for document_id, chunk_count in fetch_chunk_counts_for_documents(
# document_ids=updatable_ids,
# db_session=db_session,
# )
# }
# doc_id_to_new_chunk_cnt: dict[str, int] = {
# document_id: len(
# [
# chunk
# for chunk in chunks_with_embeddings
# if chunk.source_document.id == document_id
# ]
# )
# for document_id in updatable_ids
# }
# try:
# llm, _ = get_default_llms()
# llm_tokenizer = get_tokenizer(
# model_name=llm.config.model_name,
# provider_type=llm.config.model_provider,
# )
# except Exception as e:
# logger.error(f"Error getting tokenizer: {e}")
# llm_tokenizer = None
# # Calculate token counts for each document by combining all its chunks' content
# user_file_id_to_token_count: dict[int, int | None] = {}
# user_file_id_to_raw_text: dict[int, str] = {}
# for document_id in updatable_ids:
# # Only calculate token counts for documents that have a user file ID
# user_file_id = doc_id_to_user_file_id.get(document_id)
# if user_file_id is None:
# continue
# document_chunks = [
# chunk
# for chunk in chunks_with_embeddings
# if chunk.source_document.id == document_id
# ]
# if document_chunks:
# combined_content = " ".join(
# [chunk.content for chunk in document_chunks]
# )
# token_count = (
# len(llm_tokenizer.encode(combined_content)) if llm_tokenizer else 0
# )
# user_file_id_to_token_count[user_file_id] = token_count
# user_file_id_to_raw_text[user_file_id] = combined_content
# else:
# user_file_id_to_token_count[user_file_id] = None
# # we're concerned about race conditions where multiple simultaneous indexings might result
# # in one set of metadata overwriting another one in vespa.
# # we still write data here for the immediate and most likely correct sync, but
# # to resolve this, an update of the last modified field at the end of this loop
# # always triggers a final metadata sync via the celery queue
# access_aware_chunks = [
# DocMetadataAwareIndexChunk.from_index_chunk(
# index_chunk=chunk,
# access=doc_id_to_access_info.get(chunk.source_document.id, no_access),
# document_sets=set(
# doc_id_to_document_set.get(chunk.source_document.id, [])
# ),
# user_file=doc_id_to_user_file_id.get(chunk.source_document.id, None),
# user_folder=doc_id_to_user_folder_id.get(
# chunk.source_document.id, None
# ),
# boost=(
# ctx.id_to_db_doc_map[chunk.source_document.id].boost
# if chunk.source_document.id in ctx.id_to_db_doc_map
# else DEFAULT_BOOST
# ),
# tenant_id=tenant_id,
# aggregated_chunk_boost_factor=chunk_content_scores[chunk_num],
# )
# for chunk_num, chunk in enumerate(chunks_with_embeddings)
# ]
# short_descriptor_list = [
# chunk.to_short_descriptor() for chunk in access_aware_chunks
# ]
# short_descriptor_log = str(short_descriptor_list)[:1024]
# logger.debug(f"Indexing the following chunks: {short_descriptor_log}")
# # A document will not be spread across different batches, so all the
# # documents with chunks in this set, are fully represented by the chunks
# # in this set
# (
# insertion_records,
# vector_db_write_failures,
# ) = write_chunks_to_vector_db_with_backoff(
# document_index=document_index,
# chunks=access_aware_chunks,
# index_batch_params=IndexBatchParams(
# doc_id_to_previous_chunk_cnt=doc_id_to_previous_chunk_cnt,
# doc_id_to_new_chunk_cnt=doc_id_to_new_chunk_cnt,
# tenant_id=tenant_id,
# large_chunks_enabled=chunker.enable_large_chunks,
# ),
# )
# all_returned_doc_ids = (
# {record.document_id for record in insertion_records}
# .union(
# {
# record.failed_document.document_id
# for record in vector_db_write_failures
# if record.failed_document
# }
# )
# .union(
# {
# record.failed_document.document_id
# for record in embedding_failures
# if record.failed_document
# }
# )
# )
# if all_returned_doc_ids != set(updatable_ids):
# raise RuntimeError(
# f"Some documents were not successfully indexed. "
# f"Updatable IDs: {updatable_ids}, "
# f"Returned IDs: {all_returned_doc_ids}. "
# "This should never happen."
# )
# last_modified_ids = []
# ids_to_new_updated_at = {}
# for doc in ctx.updatable_docs:
# last_modified_ids.append(doc.id)
# # doc_updated_at is the source's idea (on the other end of the connector)
# # of when the doc was last modified
# if doc.doc_updated_at is None:
# continue
# ids_to_new_updated_at[doc.id] = doc.doc_updated_at
# # Store the plaintext in the file store for faster retrieval
# # NOTE: this creates its own session to avoid committing the overall
# # transaction.
# for user_file_id, raw_text in user_file_id_to_raw_text.items():
# store_user_file_plaintext(
# user_file_id=user_file_id,
# plaintext_content=raw_text,
# )
# update_docs_updated_at__no_commit(
# ids_to_new_updated_at=ids_to_new_updated_at, db_session=db_session
# )
# update_docs_last_modified__no_commit(
# document_ids=last_modified_ids, db_session=db_session
# )
# update_docs_chunk_count__no_commit(
# document_ids=updatable_ids,
# doc_id_to_chunk_count=doc_id_to_new_chunk_cnt,
# db_session=db_session,
# )
# update_user_file_token_count__no_commit(
# user_file_id_to_token_count=user_file_id_to_token_count,
# db_session=db_session,
# )
# # these documents can now be counted as part of the CC Pairs
# # document count, so we need to mark them as indexed
# # NOTE: even documents we skipped since they were already up
# # to date should be counted here in order to maintain parity
# # between CC Pair and index attempt counts
# mark_document_as_indexed_for_cc_pair__no_commit(
# connector_id=index_attempt_metadata.connector_id,
# credential_id=index_attempt_metadata.credential_id,
# document_ids=[doc.id for doc in filtered_documents],
# db_session=db_session,
# )
# # save the chunk boost components to postgres
# update_chunk_boost_components__no_commit(
# chunk_data=updatable_chunk_data, db_session=db_session
# )
# # Pause user file ccpairs
# # TODO: investigate why nothing is done here?
# db_session.commit()
# result = IndexingPipelineResult(
# new_docs=len([r for r in insertion_records if not r.already_existed]),
# total_docs=len(filtered_documents),
# total_chunks=len(access_aware_chunks),
# failures=vector_db_write_failures + embedding_failures,
# )
# return result
def run_indexing_pipeline(
*,
document_batch: list[Document],
index_attempt_metadata: IndexAttemptMetadata,
request_id: str | None,
embedder: IndexingEmbedder,
information_content_classification_model: InformationContentClassificationModel,
document_index: DocumentIndex,
db_session: Session,
tenant_id: str,
adapter: IndexingBatchAdapter,
chunker: Chunker | None = None,
ignore_time_skip: bool = False,
) -> IndexingPipelineResult:
@@ -1070,10 +1233,10 @@ def run_indexing_pipeline(
information_content_classification_model=information_content_classification_model,
document_index=document_index,
document_batch=document_batch,
index_attempt_metadata=index_attempt_metadata,
ignore_time_skip=ignore_time_skip,
db_session=db_session,
request_id=request_id,
tenant_id=tenant_id,
adapter=adapter,
enable_contextual_rag=enable_contextual_rag,
llm=llm,
ignore_time_skip=ignore_time_skip,
)

View File

@@ -1,3 +1,7 @@
import contextlib
from collections.abc import Generator
from typing import Optional
from typing import Protocol
from typing import TYPE_CHECKING
from pydantic import BaseModel
@@ -10,6 +14,10 @@ from onyx.utils.logger import setup_logger
from shared_configs.enums import EmbeddingProvider
from shared_configs.model_server_models import Embedding
if TYPE_CHECKING:
from onyx.indexing.indexing_pipeline import DocumentBatchPrepareContext
from sqlalchemy.engine.util import TransactionalContext
if TYPE_CHECKING:
from onyx.db.models import SearchSettings
@@ -100,8 +108,7 @@ class DocMetadataAwareIndexChunk(IndexChunk):
tenant_id: str
access: "DocumentAccess"
document_sets: set[str]
user_file: int | None
user_folder: int | None
user_project: list[int]
boost: int
aggregated_chunk_boost_factor: float
@@ -111,8 +118,7 @@ class DocMetadataAwareIndexChunk(IndexChunk):
index_chunk: IndexChunk,
access: "DocumentAccess",
document_sets: set[str],
user_file: int | None,
user_folder: int | None,
user_project: list[int],
boost: int,
aggregated_chunk_boost_factor: float,
tenant_id: str,
@@ -122,8 +128,7 @@ class DocMetadataAwareIndexChunk(IndexChunk):
**index_chunk_data,
access=access,
document_sets=document_sets,
user_file=user_file,
user_folder=user_folder,
user_project=user_project,
boost=boost,
aggregated_chunk_boost_factor=aggregated_chunk_boost_factor,
tenant_id=tenant_id,
@@ -209,3 +214,39 @@ class UpdatableChunkData(BaseModel):
chunk_id: int
document_id: str
boost_score: float
class BuildMetadataAwareChunksResult(BaseModel):
chunks: list[DocMetadataAwareIndexChunk]
doc_id_to_previous_chunk_cnt: dict[str, int]
doc_id_to_new_chunk_cnt: dict[str, int]
user_file_id_to_raw_text: dict[str, str]
user_file_id_to_token_count: dict[str, int | None]
class IndexingBatchAdapter(Protocol):
def prepare(
self, documents: list[Document], ignore_time_skip: bool
) -> Optional["DocumentBatchPrepareContext"]: ...
@contextlib.contextmanager
def lock_context(
self, documents: list[Document]
) -> Generator[TransactionalContext, None, None]:
"""Provide a transaction/row-lock context for critical updates."""
def build_metadata_aware_chunks(
self,
chunks_with_embeddings: list[IndexChunk],
chunk_content_scores: list[float],
tenant_id: str,
context: "DocumentBatchPrepareContext",
) -> BuildMetadataAwareChunksResult: ...
def post_index(
self,
context: "DocumentBatchPrepareContext",
updatable_chunk_data: list[UpdatableChunkData],
filtered_documents: list[Document],
result: BuildMetadataAwareChunksResult,
) -> None: ...

View File

@@ -60,7 +60,6 @@ from onyx.server.documents.credential import router as credential_router
from onyx.server.documents.document import router as document_router
from onyx.server.documents.standard_oauth import router as standard_oauth_router
from onyx.server.features.document_set.api import router as document_set_router
from onyx.server.features.folder.api import router as folder_router
from onyx.server.features.input_prompt.api import (
admin_router as admin_input_prompt_router,
)
@@ -73,6 +72,7 @@ from onyx.server.features.notifications.api import router as notification_router
from onyx.server.features.password.api import router as password_router
from onyx.server.features.persona.api import admin_router as admin_persona_router
from onyx.server.features.persona.api import basic_router as persona_router
from onyx.server.features.projects.api import router as projects_router
from onyx.server.features.tool.api import admin_router as admin_tool_router
from onyx.server.features.tool.api import router as tool_router
from onyx.server.federated.api import router as federated_router
@@ -109,7 +109,6 @@ from onyx.server.settings.api import basic_router as settings_router
from onyx.server.token_rate_limits.api import (
router as token_rate_limit_settings_router,
)
from onyx.server.user_documents.api import router as user_documents_router
from onyx.server.utils import BasicAuthenticationError
from onyx.setup import setup_multitenant_onyx
from onyx.setup import setup_onyx
@@ -344,8 +343,7 @@ def get_application(lifespan_override: Lifespan | None = None) -> FastAPI:
include_router_with_global_prefix_prepended(application, input_prompt_router)
include_router_with_global_prefix_prepended(application, admin_input_prompt_router)
include_router_with_global_prefix_prepended(application, cc_pair_router)
include_router_with_global_prefix_prepended(application, user_documents_router)
include_router_with_global_prefix_prepended(application, folder_router)
include_router_with_global_prefix_prepended(application, projects_router)
include_router_with_global_prefix_prepended(application, document_set_router)
include_router_with_global_prefix_prepended(application, search_settings_router)
include_router_with_global_prefix_prepended(

View File

@@ -17,6 +17,10 @@ Do not provide any citations even if there are examples in the chat history.
CITATION_REMINDER = """
Remember to provide inline citations in the format [1], [2], [3], etc.
"""
PROJECT_INSTRUCTIONS_SEPARATOR = (
"\n\n[[USER-PROVIDED INSTRUCTIONS — allowed to override default prompt guidance, "
"but only for style, formatting, and context]]\n"
)
ADDITIONAL_INFO = "\n\nAdditional Information:\n\t- {datetime_info}."

View File

@@ -100,7 +100,7 @@ def _create_indexable_chunks(
access=default_public_access,
document_sets=set(),
user_file=None,
user_folder=None,
user_project=[],
boost=DEFAULT_BOOST,
large_chunk_id=None,
image_file_id=None,

View File

@@ -5,7 +5,6 @@ from onyx.configs.chat_configs import INPUT_PROMPT_YAML
from onyx.configs.chat_configs import MAX_CHUNKS_FED_TO_CHAT
from onyx.configs.chat_configs import PERSONAS_YAML
from onyx.configs.chat_configs import PROMPTS_YAML
from onyx.configs.chat_configs import USER_FOLDERS_YAML
from onyx.context.search.enums import RecencyBiasSetting
from onyx.db.document_set import get_or_create_document_set_by_name
from onyx.db.input_prompt import insert_input_prompt_if_not_exists
@@ -16,34 +15,11 @@ from onyx.db.models import Tool as ToolDBModel
from onyx.db.persona import upsert_persona
from onyx.db.prompts import get_prompt_by_name
from onyx.db.prompts import upsert_prompt
from onyx.db.user_documents import upsert_user_folder
from onyx.tools.tool_implementations.images.image_generation_tool import (
ImageGenerationTool,
)
def load_user_folders_from_yaml(
db_session: Session,
user_folders_yaml: str = USER_FOLDERS_YAML,
) -> None:
with open(user_folders_yaml, "r") as file:
data = yaml.safe_load(file)
all_user_folders = data.get("user_folders", [])
for user_folder in all_user_folders:
upsert_user_folder(
db_session=db_session,
id=user_folder.get("id"),
name=user_folder.get("name"),
description=user_folder.get("description"),
created_at=user_folder.get("created_at"),
user=user_folder.get("user"),
files=user_folder.get("files"),
assistants=user_folder.get("assistants"),
)
db_session.flush()
def load_prompts_from_yaml(
db_session: Session, prompts_yaml: str = PROMPTS_YAML
) -> None:
@@ -207,4 +183,3 @@ def load_chat_yamls(
load_prompts_from_yaml(db_session, prompt_yaml)
load_personas_from_yaml(db_session, personas_yaml)
load_input_prompts_from_yaml(db_session, input_prompts_yaml)
load_user_folders_from_yaml(db_session)

View File

@@ -1,6 +0,0 @@
user_folders:
- id: -1
name: "Recent Documents"
description: "Documents uploaded by the user"
files: []
assistants: []

View File

@@ -1,177 +0,0 @@
from fastapi import APIRouter
from fastapi import Depends
from fastapi import HTTPException
from fastapi import Path
from sqlalchemy.orm import Session
from onyx.auth.users import current_user
from onyx.db.chat import get_chat_session_by_id
from onyx.db.engine.sql_engine import get_session
from onyx.db.folder import add_chat_to_folder
from onyx.db.folder import create_folder
from onyx.db.folder import delete_folder
from onyx.db.folder import get_user_folders
from onyx.db.folder import remove_chat_from_folder
from onyx.db.folder import rename_folder
from onyx.db.folder import update_folder_display_priority
from onyx.db.models import User
from onyx.server.features.folder.models import DeleteFolderOptions
from onyx.server.features.folder.models import FolderChatSessionRequest
from onyx.server.features.folder.models import FolderCreationRequest
from onyx.server.features.folder.models import FolderUpdateRequest
from onyx.server.features.folder.models import GetUserFoldersResponse
from onyx.server.features.folder.models import UserFolderSnapshot
from onyx.server.models import DisplayPriorityRequest
from onyx.server.query_and_chat.models import ChatSessionDetails
router = APIRouter(prefix="/folder")
@router.get("")
def get_folders(
user: User = Depends(current_user),
db_session: Session = Depends(get_session),
) -> GetUserFoldersResponse:
folders = get_user_folders(
user_id=user.id if user else None,
db_session=db_session,
)
folders.sort()
return GetUserFoldersResponse(
folders=[
UserFolderSnapshot(
folder_id=folder.id,
folder_name=folder.name,
display_priority=folder.display_priority,
chat_sessions=[
ChatSessionDetails(
id=chat_session.id,
name=chat_session.description,
persona_id=chat_session.persona_id,
time_created=chat_session.time_created.isoformat(),
time_updated=chat_session.time_updated.isoformat(),
shared_status=chat_session.shared_status,
folder_id=folder.id,
)
for chat_session in folder.chat_sessions
if not chat_session.deleted
],
)
for folder in folders
]
)
@router.put("/reorder")
def put_folder_display_priority(
display_priority_request: DisplayPriorityRequest,
user: User | None = Depends(current_user),
db_session: Session = Depends(get_session),
) -> None:
update_folder_display_priority(
user_id=user.id if user else None,
display_priority_map=display_priority_request.display_priority_map,
db_session=db_session,
)
@router.post("")
def create_folder_endpoint(
request: FolderCreationRequest,
user: User = Depends(current_user),
db_session: Session = Depends(get_session),
) -> int:
return create_folder(
user_id=user.id if user else None,
folder_name=request.folder_name,
db_session=db_session,
)
@router.patch("/{folder_id}")
def patch_folder_endpoint(
request: FolderUpdateRequest,
folder_id: int = Path(..., description="The ID of the folder to rename"),
user: User = Depends(current_user),
db_session: Session = Depends(get_session),
) -> None:
try:
rename_folder(
user_id=user.id if user else None,
folder_id=folder_id,
folder_name=request.folder_name,
db_session=db_session,
)
except Exception as e:
raise HTTPException(status_code=400, detail=str(e))
@router.delete("/{folder_id}")
def delete_folder_endpoint(
request: DeleteFolderOptions,
folder_id: int = Path(..., description="The ID of the folder to delete"),
user: User = Depends(current_user),
db_session: Session = Depends(get_session),
) -> None:
user_id = user.id if user else None
try:
delete_folder(
user_id=user_id,
folder_id=folder_id,
including_chats=request.including_chats,
db_session=db_session,
)
except Exception as e:
raise HTTPException(status_code=400, detail=str(e))
@router.post("/{folder_id}/add-chat-session")
def add_chat_to_folder_endpoint(
request: FolderChatSessionRequest,
folder_id: int = Path(
..., description="The ID of the folder in which to add the chat session"
),
user: User = Depends(current_user),
db_session: Session = Depends(get_session),
) -> None:
user_id = user.id if user else None
try:
chat_session = get_chat_session_by_id(
chat_session_id=request.chat_session_id,
user_id=user_id,
db_session=db_session,
)
add_chat_to_folder(
user_id=user.id if user else None,
folder_id=folder_id,
chat_session=chat_session,
db_session=db_session,
)
except Exception as e:
raise HTTPException(status_code=400, detail=str(e))
@router.post("/{folder_id}/remove-chat-session")
def remove_chat_from_folder_endpoint(
request: FolderChatSessionRequest,
folder_id: int = Path(
..., description="The ID of the folder from which to remove the chat session"
),
user: User = Depends(current_user),
db_session: Session = Depends(get_session),
) -> None:
user_id = user.id if user else None
try:
chat_session = get_chat_session_by_id(
chat_session_id=request.chat_session_id,
user_id=user_id,
db_session=db_session,
)
remove_chat_from_folder(
user_id=user_id,
folder_id=folder_id,
chat_session=chat_session,
db_session=db_session,
)
except Exception as e:
raise HTTPException(status_code=400, detail=str(e))

View File

@@ -1,32 +0,0 @@
from uuid import UUID
from pydantic import BaseModel
from onyx.server.query_and_chat.models import ChatSessionDetails
class UserFolderSnapshot(BaseModel):
folder_id: int
folder_name: str | None
display_priority: int
chat_sessions: list[ChatSessionDetails]
class GetUserFoldersResponse(BaseModel):
folders: list[UserFolderSnapshot]
class FolderCreationRequest(BaseModel):
folder_name: str | None = None
class FolderUpdateRequest(BaseModel):
folder_name: str | None = None
class FolderChatSessionRequest(BaseModel):
chat_session_id: UUID
class DeleteFolderOptions(BaseModel):
including_chats: bool = False

View File

@@ -69,16 +69,16 @@ logger = setup_logger()
def _validate_user_knowledge_enabled(
persona_upsert_request: PersonaUpsertRequest, action: str
) -> None:
"""Check if user knowledge is enabled when user files/folders are provided."""
"""Check if user knowledge is enabled when user files/projects are provided."""
settings = load_settings()
if not settings.user_knowledge_enabled:
if (
persona_upsert_request.user_file_ids
or persona_upsert_request.user_folder_ids
# Only user files are supported going forward; keep getattr for backward compat
if persona_upsert_request.user_file_ids or getattr(
persona_upsert_request, "user_project_ids", None
):
raise HTTPException(
status_code=400,
detail=f"User Knowledge is disabled. Cannot {action} assistant with user files or folders.",
detail=f"User Knowledge is disabled. Cannot {action} assistant with user files or projects.",
)

View File

@@ -82,8 +82,8 @@ class PersonaUpsertRequest(BaseModel):
label_ids: list[int] | None = None
is_default_persona: bool = False
display_priority: int | None = None
user_file_ids: list[int] | None = None
user_folder_ids: list[int] | None = None
# Accept string UUIDs from frontend
user_file_ids: list[str] | None = None
class MinimalPersonaSnapshot(BaseModel):
@@ -159,8 +159,8 @@ class PersonaSnapshot(BaseModel):
icon_shape: int | None
icon_color: str | None
uploaded_image_id: str | None
user_file_ids: list[int]
user_folder_ids: list[int]
# Return string UUIDs to frontend for consistency
user_file_ids: list[str]
display_priority: int | None
is_default_persona: bool
builtin_persona: bool
@@ -186,8 +186,7 @@ class PersonaSnapshot(BaseModel):
icon_shape=persona.icon_shape,
icon_color=persona.icon_color,
uploaded_image_id=persona.uploaded_image_id,
user_file_ids=[file.id for file in persona.user_files],
user_folder_ids=[folder.id for folder in persona.user_folders],
user_file_ids=[str(file.id) for file in persona.user_files],
display_priority=persona.display_priority,
is_default_persona=persona.is_default_persona,
builtin_persona=persona.builtin_persona,
@@ -242,8 +241,7 @@ class FullPersonaSnapshot(PersonaSnapshot):
icon_shape=persona.icon_shape,
icon_color=persona.icon_color,
uploaded_image_id=persona.uploaded_image_id,
user_file_ids=[file.id for file in persona.user_files],
user_folder_ids=[folder.id for folder in persona.user_folders],
user_file_ids=[str(file.id) for file in persona.user_files],
display_priority=persona.display_priority,
is_default_persona=persona.is_default_persona,
builtin_persona=persona.builtin_persona,

View File

@@ -0,0 +1,512 @@
from uuid import UUID
from fastapi import APIRouter
from fastapi import Depends
from fastapi import File
from fastapi import Form
from fastapi import HTTPException
from fastapi import Response
from fastapi import UploadFile
from pydantic import BaseModel
from sqlalchemy import func
from sqlalchemy.orm import Session
from onyx.auth.users import current_user
from onyx.db.engine.sql_engine import get_session
from onyx.db.enums import UserFileStatus
from onyx.db.models import ChatSession
from onyx.db.models import Prompt
from onyx.db.models import User
from onyx.db.models import UserFile
from onyx.db.models import UserProject
from onyx.db.persona import get_personas_by_ids
from onyx.db.projects import upload_files_to_user_files_with_indexing
from onyx.db.prompts import upsert_prompt
from onyx.server.features.persona.models import PromptSnapshot
from onyx.server.features.projects.models import CategorizedFilesSnapshot
from onyx.server.features.projects.models import ChatSessionRequest
from onyx.server.features.projects.models import TokenCountResponse
from onyx.server.features.projects.models import UserFileSnapshot
from onyx.server.features.projects.models import UserProjectSnapshot
from onyx.utils.logger import setup_logger
logger = setup_logger()
router = APIRouter(prefix="/user/projects")
@router.get("/")
def get_projects(
user: User = Depends(current_user),
db_session: Session = Depends(get_session),
) -> list[UserProjectSnapshot]:
projects = (
db_session.query(UserProject).filter(UserProject.user_id == user.id).all()
)
return [UserProjectSnapshot.from_model(project) for project in projects]
@router.post("/create")
def create_project(
name: str,
user: User = Depends(current_user),
db_session: Session = Depends(get_session),
) -> UserProjectSnapshot:
project = UserProject(name=name, user_id=user.id)
db_session.add(project)
db_session.commit()
return UserProjectSnapshot.from_model(project)
@router.post("/file/upload")
def upload_user_files(
files: list[UploadFile] = File(...),
project_id: int | None = Form(None),
user: User = Depends(current_user),
db_session: Session = Depends(get_session),
) -> CategorizedFilesSnapshot:
try:
# Use our consolidated function that handles indexing properly
categorized_files_result = upload_files_to_user_files_with_indexing(
files=files, project_id=project_id, user=user, db_session=db_session
)
return CategorizedFilesSnapshot.from_result(categorized_files_result)
except Exception as e:
logger.error(f"Error uploading files: {str(e)}")
raise HTTPException(status_code=500, detail=f"Failed to upload files: {str(e)}")
@router.get("/{project_id}")
def get_project(
project_id: int,
user: User = Depends(current_user),
db_session: Session = Depends(get_session),
) -> UserProjectSnapshot:
project = (
db_session.query(UserProject)
.filter(UserProject.id == project_id, UserProject.user_id == user.id)
.one_or_none()
)
if project is None:
raise HTTPException(status_code=404, detail="Project not found")
return UserProjectSnapshot.from_model(project)
@router.get("/files/{project_id}")
def get_files_in_project(
project_id: int,
user: User = Depends(current_user),
db_session: Session = Depends(get_session),
) -> list[UserFileSnapshot]:
user_files = (
db_session.query(UserFile)
.filter(UserFile.projects.any(id=project_id), UserFile.user_id == user.id)
.filter(UserFile.status != UserFileStatus.FAILED)
.all()
)
return [UserFileSnapshot.from_model(user_file) for user_file in user_files]
@router.delete("/{project_id}/files/{file_id}")
def unlink_user_file_from_project(
project_id: int,
file_id: UUID,
user: User = Depends(current_user),
db_session: Session = Depends(get_session),
) -> Response:
"""Unlink an existing user file from a specific project for the current user.
Does not delete the underlying file; only removes the association.
"""
project = (
db_session.query(UserProject)
.filter(UserProject.id == project_id, UserProject.user_id == user.id)
.one_or_none()
)
if project is None:
raise HTTPException(status_code=404, detail="Project not found")
user_file = (
db_session.query(UserFile)
.filter(UserFile.id == file_id, UserFile.user_id == user.id)
.one_or_none()
)
if user_file is None:
raise HTTPException(status_code=404, detail="File not found")
# Remove the association if it exists
if user_file in project.user_files:
project.user_files.remove(user_file)
db_session.commit()
return Response(status_code=204)
@router.post("/{project_id}/files/{file_id}", response_model=UserFileSnapshot)
def link_user_file_to_project(
project_id: int,
file_id: UUID,
user: User = Depends(current_user),
db_session: Session = Depends(get_session),
):
"""Link an existing user file to a specific project for the current user.
Creates the association in the Project__UserFile join table if it does not exist.
Returns the linked user file snapshot.
"""
project = (
db_session.query(UserProject)
.filter(UserProject.id == project_id, UserProject.user_id == user.id)
.one_or_none()
)
if project is None:
raise HTTPException(status_code=404, detail="Project not found")
user_file = (
db_session.query(UserFile)
.filter(UserFile.id == file_id, UserFile.user_id == user.id)
.one_or_none()
)
if user_file is None:
raise HTTPException(status_code=404, detail="File not found")
if user_file not in project.user_files:
project.user_files.append(user_file)
db_session.commit()
return UserFileSnapshot.from_model(user_file)
@router.get("/{project_id}/instructions")
def get_project_instructions(
project_id: int,
user: User = Depends(current_user),
db_session: Session = Depends(get_session),
) -> PromptSnapshot:
project = (
db_session.query(UserProject)
.filter(UserProject.id == project_id, UserProject.user_id == user.id)
.one_or_none()
)
if project is None:
raise HTTPException(status_code=404, detail="Project not found")
prompt = db_session.query(Prompt).filter_by(id=project.prompt_id).one_or_none()
if prompt is None:
return None
return PromptSnapshot.from_model(prompt)
class UpsertProjectInstructionsRequest(BaseModel):
instructions: str
@router.post("/{project_id}/instructions", response_model=PromptSnapshot)
def upsert_project_instructions(
project_id: int,
body: UpsertProjectInstructionsRequest,
user: User = Depends(current_user),
db_session: Session = Depends(get_session),
):
"""Create or update a Prompt that stores this project's instructions."""
# Ensure the project exists and belongs to the user
project = (
db_session.query(UserProject)
.filter(UserProject.id == project_id, UserProject.user_id == user.id)
.one_or_none()
)
if project is None:
raise HTTPException(status_code=404, detail="Project not found")
print("upserting instructions", body.instructions)
prompt_name = f"project-{project_id}-instructions"
description = f"Instructions prompt for project {project_id}"
prompt = upsert_prompt(
db_session=db_session,
user=user,
name=prompt_name,
system_prompt=body.instructions,
task_prompt="",
datetime_aware=True,
prompt_id=project.prompt_id,
default_prompt=False,
description=description,
)
project.prompt_id = prompt.id
db_session.commit()
return PromptSnapshot.from_model(prompt)
class ProjectPayload(BaseModel):
project: UserProjectSnapshot
files: list[UserFileSnapshot] | None = None
instructions: PromptSnapshot | None = None
persona_id_to_is_default: dict[int, bool] | None = None
@router.get("/{project_id}/details", response_model=ProjectPayload)
def get_project_details(
project_id: int,
user: User = Depends(current_user),
db_session: Session = Depends(get_session),
) -> ProjectPayload:
project = get_project(project_id, user, db_session)
files = get_files_in_project(project_id, user, db_session)
instructions = get_project_instructions(project_id, user, db_session)
persona_ids = [
session.persona_id
for session in project.chat_sessions
if session.persona_id is not None
]
personas = get_personas_by_ids(persona_ids, db_session)
persona_id_to_is_default = {
persona.id: persona.is_default_persona for persona in personas
}
return ProjectPayload(
project=project,
files=files,
instructions=instructions,
persona_id_to_is_default=persona_id_to_is_default,
)
class UpdateProjectRequest(BaseModel):
name: str | None = None
description: str | None = None
@router.patch("/{project_id}", response_model=UserProjectSnapshot)
def update_project(
project_id: int,
body: UpdateProjectRequest,
user: User = Depends(current_user),
db_session: Session = Depends(get_session),
):
project = (
db_session.query(UserProject)
.filter(UserProject.id == project_id, UserProject.user_id == user.id)
.one_or_none()
)
if project is None:
raise HTTPException(status_code=404, detail="Project not found")
if body.name is not None:
project.name = body.name
if body.description is not None:
project.description = body.description
db_session.commit()
db_session.refresh(project)
return UserProjectSnapshot.from_model(project)
@router.delete("/{project_id}")
def delete_project(
project_id: int,
user: User = Depends(current_user),
db_session: Session = Depends(get_session),
) -> Response:
project = (
db_session.query(UserProject)
.filter(UserProject.id == project_id, UserProject.user_id == user.id)
.one_or_none()
)
if project is None:
raise HTTPException(status_code=404, detail="Project not found")
# Unlink chat sessions from this project
for chat in project.chat_sessions:
chat.project_id = None
# Unlink many-to-many user files association (Project__UserFile)
for uf in list(project.user_files):
project.user_files.remove(uf)
db_session.delete(project)
db_session.commit()
return Response(status_code=204)
@router.delete("/file/{file_id}")
def delete_user_file(
file_id: UUID,
user: User = Depends(current_user),
db_session: Session = Depends(get_session),
) -> Response:
"""Delete a user file belonging to the current user.
This will also remove any project associations for the file.
"""
user_file = (
db_session.query(UserFile)
.filter(UserFile.id == file_id, UserFile.user_id == user.id)
.one_or_none()
)
if user_file is None:
raise HTTPException(status_code=404, detail="File not found")
# Remove project associations if any
for project in list(user_file.projects):
user_file.projects.remove(project)
db_session.delete(user_file)
db_session.commit()
return Response(status_code=204)
@router.get("/file/{file_id}", response_model=UserFileSnapshot)
def get_user_file(
file_id: UUID,
user: User = Depends(current_user),
db_session: Session = Depends(get_session),
) -> UserFileSnapshot:
"""Fetch a single user file by ID for the current user.
Includes files in any status (including FAILED) to allow status polling.
"""
user_file = (
db_session.query(UserFile)
.filter(UserFile.id == file_id, UserFile.user_id == user.id)
.one_or_none()
)
if user_file is None:
raise HTTPException(status_code=404, detail="File not found")
return UserFileSnapshot.from_model(user_file)
class UserFileIdsRequest(BaseModel):
file_ids: list[UUID]
@router.post("/file/statuses", response_model=list[UserFileSnapshot])
def get_user_file_statuses(
body: UserFileIdsRequest,
user: User = Depends(current_user),
db_session: Session = Depends(get_session),
) -> list[UserFileSnapshot]:
"""Fetch statuses for a set of user file IDs owned by the current user.
Includes files in any status so the client can detect transitions to FAILED.
"""
if not body.file_ids:
return []
user_files = (
db_session.query(UserFile)
.filter(UserFile.user_id == user.id)
.filter(UserFile.id.in_(body.file_ids))
.all()
)
return [UserFileSnapshot.from_model(user_file) for user_file in user_files]
@router.post("/{project_id}/move_chat_session")
def move_chat_session(
project_id: int,
body: ChatSessionRequest,
user: User = Depends(current_user),
db_session: Session = Depends(get_session),
) -> Response:
chat_session = (
db_session.query(ChatSession)
.filter(ChatSession.id == body.chat_session_id, ChatSession.user_id == user.id)
.one_or_none()
)
if chat_session is None:
raise HTTPException(status_code=404, detail="Chat session not found")
chat_session.project_id = project_id
db_session.commit()
return Response(status_code=204)
@router.post("/remove_chat_session")
def remove_chat_session(
body: ChatSessionRequest,
user: User = Depends(current_user),
db_session: Session = Depends(get_session),
) -> Response:
chat_session = (
db_session.query(ChatSession)
.filter(ChatSession.id == body.chat_session_id, ChatSession.user_id == user.id)
.one_or_none()
)
if chat_session is None:
raise HTTPException(status_code=404, detail="Chat session not found")
chat_session.project_id = None
db_session.commit()
return Response(status_code=204)
@router.get("/session/{chat_session_id}/token-count", response_model=TokenCountResponse)
def get_chat_session_project_token_count(
chat_session_id: str,
user: User = Depends(current_user),
db_session: Session = Depends(get_session),
) -> TokenCountResponse:
"""Return sum of token_count for all user files in the project linked to the given chat session.
If the chat session has no project, returns 0.
"""
chat_session = (
db_session.query(ChatSession)
.filter(ChatSession.id == chat_session_id, ChatSession.user_id == user.id)
.one_or_none()
)
if chat_session is None:
raise HTTPException(status_code=404, detail="Chat session not found")
if chat_session.project_id is None:
return TokenCountResponse(total_tokens=0)
total_tokens = (
db_session.query(func.coalesce(func.sum(UserFile.token_count), 0))
.filter(
UserFile.user_id == user.id,
UserFile.projects.any(id=chat_session.project_id),
)
.scalar()
or 0
)
return TokenCountResponse(total_tokens=int(total_tokens))
@router.get("/{project_id}/token-count", response_model=TokenCountResponse)
def get_project_token_count(
project_id: int,
user: User = Depends(current_user),
db_session: Session = Depends(get_session),
) -> TokenCountResponse:
"""Return sum of token_count for all user files in the given project for the current user."""
# Verify the project belongs to the current user
project = (
db_session.query(UserProject)
.filter(UserProject.id == project_id, UserProject.user_id == user.id)
.one_or_none()
)
if project is None:
raise HTTPException(status_code=404, detail="Project not found")
total_tokens = (
db_session.query(func.coalesce(func.sum(UserFile.token_count), 0))
.filter(
UserFile.user_id == user.id,
UserFile.projects.any(id=project_id),
)
.scalar()
or 0
)
return TokenCountResponse(total_tokens=int(total_tokens))

View File

@@ -0,0 +1,93 @@
from datetime import datetime
from uuid import UUID
from pydantic import BaseModel
from onyx.db.enums import UserFileStatus
from onyx.db.models import UserFile
from onyx.db.models import UserProject
from onyx.db.projects import CategorizedFilesResult
from onyx.file_store.models import ChatFileType
from onyx.server.query_and_chat.chat_utils import mime_type_to_chat_file_type
from onyx.server.query_and_chat.models import ChatSessionDetails
class UserFileSnapshot(BaseModel):
id: UUID
name: str
project_id: int | None = None
user_id: UUID | None
file_id: str
created_at: datetime
status: UserFileStatus
last_accessed_at: datetime
file_type: str
chat_file_type: ChatFileType
token_count: int | None
chunk_count: int | None
@classmethod
def from_model(cls, model: UserFile) -> "UserFileSnapshot":
return cls(
id=model.id,
name=model.name,
project_id=None,
user_id=model.user_id,
file_id=model.file_id,
created_at=model.created_at,
status=model.status,
last_accessed_at=model.last_accessed_at,
file_type=model.content_type,
chat_file_type=mime_type_to_chat_file_type(model.content_type),
token_count=model.token_count,
chunk_count=model.chunk_count,
)
class TokenCountResponse(BaseModel):
total_tokens: int
class CategorizedFilesSnapshot(BaseModel):
user_files: list[UserFileSnapshot]
non_accepted_files: list[str]
unsupported_files: list[str]
@classmethod
def from_result(cls, result: CategorizedFilesResult) -> "CategorizedFilesSnapshot":
return cls(
user_files=[
UserFileSnapshot.from_model(user_file)
for user_file in result.user_files
],
non_accepted_files=result.non_accepted_files,
unsupported_files=result.unsupported_files,
)
class UserProjectSnapshot(BaseModel):
id: int
name: str
description: str | None
created_at: datetime
user_id: UUID
chat_sessions: list[ChatSessionDetails]
@classmethod
def from_model(cls, model: UserProject) -> "UserProjectSnapshot":
return cls(
id=model.id,
name=model.name,
description=model.description,
created_at=model.created_at,
user_id=model.user_id,
chat_sessions=[
ChatSessionDetails.from_model(chat)
for chat in model.chat_sessions
if not chat.deleted
],
)
class ChatSessionRequest(BaseModel):
chat_session_id: str

View File

@@ -0,0 +1,178 @@
from math import ceil
from fastapi import UploadFile
from PIL import Image
from PIL import ImageOps
from PIL import UnidentifiedImageError
from pydantic import BaseModel
from pydantic import ConfigDict
from pydantic import Field
from onyx.file_processing.extract_file_text import ACCEPTED_IMAGE_FILE_EXTENSIONS
from onyx.file_processing.extract_file_text import ALL_ACCEPTED_FILE_EXTENSIONS
from onyx.file_processing.extract_file_text import extract_file_text
from onyx.file_processing.extract_file_text import get_file_ext
from onyx.llm.factory import get_default_llms
from onyx.natural_language_processing.utils import get_tokenizer
from onyx.utils.logger import setup_logger
logger = setup_logger()
FILE_TOKEN_COUNT_THRESHOLD = 50000
# Guard against extremely large images
Image.MAX_IMAGE_PIXELS = 12000 * 12000
class CategorizedFiles(BaseModel):
acceptable: list[UploadFile] = Field(default_factory=list)
non_accepted: list[str] = Field(default_factory=list)
unsupported: list[str] = Field(default_factory=list)
acceptable_file_to_token_count: dict[str, int] = Field(default_factory=dict)
# Allow FastAPI UploadFile instances
model_config = ConfigDict(arbitrary_types_allowed=True)
def _apply_long_side_cap(width: int, height: int, cap: int) -> tuple[int, int]:
if max(width, height) <= cap:
return width, height
scale = cap / max(width, height)
new_w = max(1, int(round(width * scale)))
new_h = max(1, int(round(height * scale)))
return new_w, new_h
def _estimate_image_tokens(
width: int, height: int, patch_size: int, overhead: int
) -> int:
patches_w = ceil(width / patch_size)
patches_h = ceil(height / patch_size)
patches = patches_w * patches_h
return patches + overhead
def estimate_image_tokens_for_upload(
upload: UploadFile,
cap_long_side: int = 2048,
patch_size: int = 16,
overhead_tokens: int = 32,
) -> int:
"""Open the uploaded image, normalize orientation, cap long side, and estimate tokens.
Parameters
- cap_long_side: Maximum pixels allowed on the image's longer side before estimating.
Rationale: Many vision-language encoders downsample images so the longer side is
bounded (commonly around 10242048px). Capping avoids unbounded patch counts and
keeps costs predictable while preserving most semantic content for typical UI/docs.
Default 2048 is a balanced choice between fidelity and token cost.
- patch_size: The pixel size of square patches used in a rough ViT-style estimate.
Rationale: Modern vision backbones (e.g., ViT variants) commonly operate on 1416px
patches. Using 16 simplifies the estimate and aligns with widely used configurations.
Each patch approximately maps to one visual token in this heuristic.
- overhead_tokens: Fixed per-image overhead to account for special tokens, metadata,
and prompt framing added by providers. Rationale: Real models add tens of tokens per
image beyond pure patch count. 32 is a conservative, stable default that avoids
undercounting.
Notes
- This is a heuristic estimation for budgeting and gating. Actual tokenization varies
by model/provider and may differ slightly.
Always resets the file pointer before returning.
"""
try:
img = Image.open(upload.file)
img = ImageOps.exif_transpose(img)
width, height = img.size
capped_w, capped_h = _apply_long_side_cap(width, height, cap=cap_long_side)
return _estimate_image_tokens(
capped_w, capped_h, patch_size=patch_size, overhead=overhead_tokens
)
finally:
try:
upload.file.seek(0)
except Exception:
pass
def categorize_uploaded_files(files: list[UploadFile]) -> CategorizedFiles:
"""
Categorize uploaded files based on text extractability and tokenized length.
- Extracts text using extract_file_text for supported plain/document extensions.
- Uses default tokenizer to compute token length.
- If token length > 50,000, marked as non_accepted.
- If extension unsupported or text cannot be extracted, marked as unsupported.
- Otherwise marked as acceptable.
"""
results = CategorizedFiles()
llm, _ = get_default_llms()
tokenizer = get_tokenizer(
model_name=llm.config.model_name, provider_type=llm.config.model_provider
)
for upload in files:
try:
extension = get_file_ext(upload.filename or "")
# If image, estimate tokens via dedicated method first
if extension in ACCEPTED_IMAGE_FILE_EXTENSIONS:
try:
token_count = estimate_image_tokens_for_upload(upload)
except (UnidentifiedImageError, OSError):
results.unsupported.append(upload.filename or "")
continue
if token_count > FILE_TOKEN_COUNT_THRESHOLD:
results.non_accepted.append(upload.filename or "")
else:
results.acceptable.append(upload)
results.acceptable_file_to_token_count[upload.filename or ""] = (
token_count
)
continue
# Otherwise, handle as text/document: extract text and count tokens
if (
extension in ALL_ACCEPTED_FILE_EXTENSIONS
and extension not in ACCEPTED_IMAGE_FILE_EXTENSIONS
):
text_content = extract_file_text(
file=upload.file,
file_name=upload.filename or "",
break_on_unprocessable=False,
extension=extension,
)
if not text_content:
results.unsupported.append(upload.filename or "")
continue
token_count = len(tokenizer.encode(text_content))
if token_count > FILE_TOKEN_COUNT_THRESHOLD:
results.non_accepted.append(upload.filename or "")
else:
results.acceptable.append(upload)
results.acceptable_file_to_token_count[upload.filename or ""] = (
token_count
)
# Reset file pointer for subsequent upload handling
try:
upload.file.seek(0)
except Exception:
pass
continue
# If not recognized as supported types above, mark unsupported
results.unsupported.append(upload.filename or "")
except Exception as e:
logger.warning(
f"Failed to process uploaded file '{getattr(upload, 'filename', 'unknown')}': {e}"
)
results.unsupported.append(upload.filename or "")
return results

View File

@@ -154,7 +154,6 @@ def enable_or_disable_kg(
is_default_persona=False,
display_priority=0,
user_file_ids=[],
user_folder_ids=[],
)
persona_snapshot = create_update_persona(

View File

@@ -42,7 +42,9 @@ from onyx.configs.constants import FASTAPI_USERS_AUTH_COOKIE_NAME
from onyx.db.api_key import is_api_key_email_address
from onyx.db.auth import get_live_users_count
from onyx.db.engine.sql_engine import get_session
from onyx.db.enums import UserFileStatus
from onyx.db.models import User
from onyx.db.models import UserFile
from onyx.db.user_preferences import activate_user
from onyx.db.user_preferences import deactivate_user
from onyx.db.user_preferences import get_all_user_assistant_specific_configs
@@ -64,6 +66,7 @@ from onyx.db.users import validate_user_role_update
from onyx.key_value_store.factory import get_kv_store
from onyx.redis.redis_pool import get_raw_redis_client
from onyx.server.documents.models import PaginatedReturn
from onyx.server.features.projects.models import UserFileSnapshot
from onyx.server.manage.models import AllUsersResponse
from onyx.server.manage.models import AutoScrollRequest
from onyx.server.manage.models import TenantInfo
@@ -889,3 +892,20 @@ def update_assistant_preferences_for_user_api(
update_assistant_preferences(
assistant_id, user.id, new_assistant_preference, db_session
)
db_session.commit()
@router.get("/user/files/recent")
def get_recent_files(
user: User = Depends(current_user),
db_session: Session = Depends(get_session),
) -> list[UserFileSnapshot]:
user_files = (
db_session.query(UserFile)
.filter(UserFile.user_id == user.id)
.filter(UserFile.status != UserFileStatus.FAILED)
.order_by(UserFile.last_accessed_at.desc())
.all()
)
return [UserFileSnapshot.from_model(user_file) for user_file in user_files]

View File

@@ -121,10 +121,7 @@ def upsert_ingestion_doc(
db_session=db_session,
tenant_id=tenant_id,
document_batch=[document],
index_attempt_metadata=IndexAttemptMetadata(
connector_id=cc_pair.connector_id,
credential_id=cc_pair.credential_id,
),
request_id=None,
)
# If there's a secondary index being built, index the doc but don't use it for return here

View File

@@ -149,7 +149,6 @@ def list_threads(
time_created=chat.time_created.isoformat(),
time_updated=chat.time_updated.isoformat(),
shared_status=chat.shared_status,
folder_id=chat.folder_id,
current_alternate_model=chat.current_alternate_model,
current_temperature_override=chat.temperature_override,
)

View File

@@ -2,7 +2,6 @@ import asyncio
import datetime
import json
import os
import time
from collections.abc import Callable
from collections.abc import Generator
from datetime import timedelta
@@ -14,7 +13,6 @@ from fastapi import HTTPException
from fastapi import Query
from fastapi import Request
from fastapi import Response
from fastapi import UploadFile
from fastapi.responses import StreamingResponse
from pydantic import BaseModel
from sqlalchemy.orm import Session
@@ -29,11 +27,9 @@ from onyx.chat.prompt_builder.citations_prompt import (
)
from onyx.configs.app_configs import WEB_DOMAIN
from onyx.configs.chat_configs import HARD_DELETE_CHATS
from onyx.configs.constants import DocumentSource
from onyx.configs.constants import MessageType
from onyx.configs.constants import MilestoneRecordType
from onyx.configs.model_configs import LITELLM_PASS_THROUGH_HEADERS
from onyx.connectors.models import InputType
from onyx.db.chat import add_chats_to_session_from_slack_thread
from onyx.db.chat import create_chat_session
from onyx.db.chat import create_new_chat_message
@@ -50,20 +46,15 @@ from onyx.db.chat import translate_db_message_to_chat_message_detail
from onyx.db.chat import translate_db_message_to_packets
from onyx.db.chat import update_chat_session
from onyx.db.chat_search import search_chat_sessions
from onyx.db.connector import create_connector
from onyx.db.connector_credential_pair import add_credential_to_connector
from onyx.db.credentials import create_credential
from onyx.db.engine.sql_engine import get_session
from onyx.db.engine.sql_engine import get_session_with_tenant
from onyx.db.enums import AccessType
from onyx.db.feedback import create_chat_message_feedback
from onyx.db.feedback import create_doc_retrieval_feedback
from onyx.db.models import User
from onyx.db.persona import get_persona_by_id
from onyx.db.user_documents import create_user_files
from onyx.db.projects import check_project_ownership
from onyx.file_processing.extract_file_text import docx_to_txt_filename
from onyx.file_store.file_store import get_default_file_store
from onyx.file_store.models import FileDescriptor
from onyx.llm.exceptions import GenAIDisabledException
from onyx.llm.factory import get_default_llms
from onyx.llm.factory import get_llms_for_persona
@@ -71,9 +62,6 @@ from onyx.natural_language_processing.utils import get_tokenizer
from onyx.secondary_llm_flows.chat_session_naming import (
get_renamed_conversation_name,
)
from onyx.server.documents.models import ConnectorBase
from onyx.server.documents.models import CredentialBase
from onyx.server.query_and_chat.chat_utils import mime_type_to_chat_file_type
from onyx.server.query_and_chat.models import ChatFeedbackRequest
from onyx.server.query_and_chat.models import ChatMessageIdentifier
from onyx.server.query_and_chat.models import ChatRenameRequest
@@ -96,14 +84,11 @@ from onyx.server.query_and_chat.models import UpdateChatSessionThreadRequest
from onyx.server.query_and_chat.streaming_models import OverallStop
from onyx.server.query_and_chat.streaming_models import Packet
from onyx.server.query_and_chat.token_limit import check_token_rate_limits
from onyx.utils.file_types import UploadMimeTypes
from onyx.utils.headers import get_custom_tool_additional_request_headers
from onyx.utils.logger import setup_logger
from onyx.utils.telemetry import create_milestone_and_report
from shared_configs.contextvars import get_current_tenant_id
RECENT_DOCS_FOLDER_ID = -1
logger = setup_logger()
router = APIRouter(prefix="/chat")
@@ -113,12 +98,18 @@ router = APIRouter(prefix="/chat")
def get_user_chat_sessions(
user: User | None = Depends(current_user),
db_session: Session = Depends(get_session),
project_id: UUID | None = None,
only_non_project_chats: bool = True,
) -> ChatSessionsResponse:
user_id = user.id if user is not None else None
try:
chat_sessions = get_chat_sessions_by_user(
user_id=user_id, deleted=False, db_session=db_session
user_id=user_id,
deleted=False,
db_session=db_session,
project_id=project_id,
only_non_project_chats=only_non_project_chats,
)
except ValueError:
@@ -133,7 +124,6 @@ def get_user_chat_sessions(
time_created=chat.time_created.isoformat(),
time_updated=chat.time_updated.isoformat(),
shared_status=chat.shared_status,
folder_id=chat.folder_id,
current_alternate_model=chat.current_alternate_model,
current_temperature_override=chat.temperature_override,
)
@@ -282,6 +272,15 @@ def create_new_chat_session(
user: User | None = Depends(current_chat_accessible_user),
db_session: Session = Depends(get_session),
) -> CreateChatSessionID:
logger.info(
f"Creating chat session with request: {chat_session_creation_request.persona_id}"
)
project_id = chat_session_creation_request.project_id
if project_id:
if not check_project_ownership(project_id, user.id, db_session):
raise HTTPException(
status_code=403, detail="User does not have access to project"
)
user_id = user.id if user is not None else None
try:
new_chat_session = create_chat_session(
@@ -290,6 +289,7 @@ def create_new_chat_session(
or "", # Leave the naming till later to prevent delay
user_id=user_id,
persona_id=chat_session_creation_request.persona_id,
project_id=chat_session_creation_request.project_id,
)
except Exception as e:
logger.exception(e)
@@ -575,6 +575,39 @@ def get_max_document_tokens(
)
class AvailableContextTokensResponse(BaseModel):
available_tokens: int
@router.get("/available-context-tokens/{session_id}")
def get_available_context_tokens_for_session(
session_id: UUID,
user: User | None = Depends(current_chat_accessible_user),
db_session: Session = Depends(get_session),
) -> AvailableContextTokensResponse:
"""Return available context tokens for a chat session based on its persona."""
try:
chat_session = get_chat_session_by_id(
chat_session_id=session_id,
user_id=user.id if user is not None else None,
db_session=db_session,
is_shared=False,
include_deleted=False,
)
except ValueError:
raise HTTPException(status_code=404, detail="Chat session not found")
if not chat_session.persona:
raise HTTPException(status_code=400, detail="Chat session has no persona")
available = compute_max_document_tokens_for_persona(
db_session=db_session,
persona=chat_session.persona,
)
return AvailableContextTokensResponse(available_tokens=available)
"""Endpoints for chat seeding"""
@@ -681,122 +714,6 @@ def seed_chat_from_slack(
)
"""File upload"""
@router.post("/file")
def upload_files_for_chat(
files: list[UploadFile],
db_session: Session = Depends(get_session),
user: User | None = Depends(current_user),
) -> dict[str, list[FileDescriptor]]:
# NOTE(rkuo): Unify this with file_validation.py and extract_file_text.py
# image_content_types = {"image/jpeg", "image/png", "image/webp"}
# csv_content_types = {"text/csv"}
# text_content_types = {
# "text/plain",
# "text/markdown",
# "text/x-markdown",
# "text/x-config",
# "text/tab-separated-values",
# "application/json",
# "application/xml",
# "text/xml",
# "application/x-yaml",
# }
# document_content_types = {
# "application/pdf",
# "application/vnd.openxmlformats-officedocument.wordprocessingml.document",
# "application/vnd.openxmlformats-officedocument.presentationml.presentation",
# "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet",
# "message/rfc822",
# "application/epub+zip",
# }
# allowed_content_types = (
# image_content_types.union(text_content_types)
# .union(document_content_types)
# .union(csv_content_types)
# )
for file in files:
if not file.content_type:
raise HTTPException(status_code=400, detail="File content type is required")
if file.content_type not in UploadMimeTypes.ALLOWED_MIME_TYPES:
raise HTTPException(status_code=400, detail="Unsupported file type.")
if (
file.content_type in UploadMimeTypes.IMAGE_MIME_TYPES
and file.size
and file.size > 20 * 1024 * 1024
):
raise HTTPException(
status_code=400,
detail="Images must be less than 20MB",
)
# 5) Create a user file for each uploaded file
user_files = create_user_files(files, RECENT_DOCS_FOLDER_ID, user, db_session)
for user_file in user_files:
# 6) Create connector
connector_base = ConnectorBase(
name=f"UserFile-{int(time.time())}",
source=DocumentSource.FILE,
input_type=InputType.LOAD_STATE,
connector_specific_config={
"file_locations": [user_file.file_id],
"file_names": [user_file.name],
"zip_metadata": {},
},
refresh_freq=None,
prune_freq=None,
indexing_start=None,
)
connector = create_connector(
db_session=db_session,
connector_data=connector_base,
)
# 7) Create credential
credential_info = CredentialBase(
credential_json={},
admin_public=True,
source=DocumentSource.FILE,
curator_public=True,
groups=[],
name=f"UserFileCredential-{int(time.time())}",
is_user_file=True,
)
credential = create_credential(credential_info, user, db_session)
# 8) Create connector credential pair
cc_pair = add_credential_to_connector(
db_session=db_session,
user=user,
connector_id=connector.id,
credential_id=credential.id,
cc_pair_name=f"UserFileCCPair-{int(time.time())}",
access_type=AccessType.PRIVATE,
auto_sync_options=None,
groups=[],
)
user_file.cc_pair_id = cc_pair.data
db_session.commit()
return {
"files": [
{
"id": user_file.file_id,
"type": mime_type_to_chat_file_type(user_file.content_type),
"name": user_file.name,
}
for user_file in user_files
]
}
@router.get("/file/{file_id:path}")
def fetch_chat_file(
file_id: str,
@@ -870,7 +787,6 @@ async def search_chats(
persona_id=session.persona_id,
time_created=session.time_created,
shared_status=session.shared_status,
folder_id=session.folder_id,
current_alternate_model=session.current_alternate_model,
current_temperature_override=session.temperature_override,
)

View File

@@ -19,6 +19,7 @@ from onyx.context.search.models import RetrievalDetails
from onyx.context.search.models import SearchDoc
from onyx.context.search.models import Tag
from onyx.db.enums import ChatSessionSharedStatus
from onyx.db.models import ChatSession
from onyx.file_store.models import FileDescriptor
from onyx.llm.override_models import LLMOverride
from onyx.llm.override_models import PromptOverride
@@ -53,6 +54,7 @@ class ChatSessionCreationRequest(BaseModel):
# If not specified, use Onyx default persona
persona_id: int = 0
description: str | None = None
project_id: int | None = None
class CreateChatSessionID(BaseModel):
@@ -92,9 +94,8 @@ class CreateChatMessageRequest(ChunkContext):
# New message contents
message: str
# Files that we should attach to this message
file_descriptors: list[FileDescriptor]
user_file_ids: list[int] = []
user_folder_ids: list[int] = []
file_descriptors: list[FileDescriptor] = []
current_message_files: list[FileDescriptor] = []
# If no prompt provided, uses the largest prompt of the chat session
# but really this should be explicitly specified, only in the simplified APIs is this inferred
@@ -192,10 +193,22 @@ class ChatSessionDetails(BaseModel):
time_created: str
time_updated: str
shared_status: ChatSessionSharedStatus
folder_id: int | None = None
current_alternate_model: str | None = None
current_temperature_override: float | None = None
@classmethod
def from_model(cls, model: ChatSession) -> "ChatSessionDetails":
return cls(
id=model.id,
name=model.description,
persona_id=model.persona_id,
time_created=model.time_created.isoformat(),
time_updated=model.time_updated.isoformat(),
shared_status=model.shared_status,
current_alternate_model=model.current_alternate_model,
current_temperature_override=model.temperature_override,
)
class ChatSessionsResponse(BaseModel):
sessions: list[ChatSessionDetails]
@@ -302,7 +315,6 @@ class ChatSessionSummary(BaseModel):
persona_id: int | None = None
time_created: datetime
shared_status: ChatSessionSharedStatus
folder_id: int | None = None
current_alternate_model: str | None = None
current_temperature_override: float | None = None

View File

@@ -161,7 +161,6 @@ def get_user_search_sessions(
time_created=search.time_created.isoformat(),
time_updated=search.time_updated.isoformat(),
shared_status=search.shared_status,
folder_id=search.folder_id,
current_alternate_model=search.current_alternate_model,
)
for search in search_sessions

View File

@@ -1,596 +0,0 @@
import io
import time
from datetime import datetime
from datetime import timedelta
from typing import List
import requests
import sqlalchemy.exc
from bs4 import BeautifulSoup
from fastapi import APIRouter
from fastapi import Depends
from fastapi import File
from fastapi import Form
from fastapi import HTTPException
from fastapi import Query
from fastapi import UploadFile
from pydantic import BaseModel
from sqlalchemy.orm import Session
from onyx.auth.users import current_user
from onyx.configs.constants import DocumentSource
from onyx.connectors.models import InputType
from onyx.db.connector import create_connector
from onyx.db.connector_credential_pair import add_credential_to_connector
from onyx.db.credentials import create_credential
from onyx.db.engine.sql_engine import get_session
from onyx.db.enums import AccessType
from onyx.db.enums import ConnectorCredentialPairStatus
from onyx.db.models import ConnectorCredentialPair
from onyx.db.models import User
from onyx.db.models import UserFile
from onyx.db.models import UserFolder
from onyx.db.user_documents import calculate_user_files_token_count
from onyx.db.user_documents import create_user_files
from onyx.db.user_documents import get_user_file_indexing_status
from onyx.db.user_documents import share_file_with_assistant
from onyx.db.user_documents import share_folder_with_assistant
from onyx.db.user_documents import unshare_file_with_assistant
from onyx.db.user_documents import unshare_folder_with_assistant
from onyx.db.user_documents import upload_files_to_user_files_with_indexing
from onyx.file_processing.html_utils import web_html_cleanup
from onyx.server.documents.connector import trigger_indexing_for_cc_pair
from onyx.server.documents.models import ConnectorBase
from onyx.server.documents.models import CredentialBase
from onyx.server.query_and_chat.chat_backend import RECENT_DOCS_FOLDER_ID
from onyx.server.user_documents.models import MessageResponse
from onyx.server.user_documents.models import UserFileSnapshot
from onyx.server.user_documents.models import UserFolderSnapshot
from onyx.utils.logger import setup_logger
from shared_configs.contextvars import get_current_tenant_id
logger = setup_logger()
router = APIRouter()
class FolderCreationRequest(BaseModel):
name: str
description: str
@router.post("/user/folder")
def create_folder(
request: FolderCreationRequest,
user: User = Depends(current_user),
db_session: Session = Depends(get_session),
) -> UserFolderSnapshot:
try:
new_folder = UserFolder(
user_id=user.id if user else None,
name=request.name,
description=request.description,
)
db_session.add(new_folder)
db_session.commit()
return UserFolderSnapshot.from_model(new_folder)
except sqlalchemy.exc.DataError as e:
if "StringDataRightTruncation" in str(e):
raise HTTPException(
status_code=400,
detail="Folder name or description is too long. Please use a shorter name or description.",
)
raise
@router.get(
"/user/folder",
)
def user_get_folders(
user: User = Depends(current_user),
db_session: Session = Depends(get_session),
) -> list[UserFolderSnapshot]:
user_id = user.id if user else None
# Get folders that belong to the user or have the RECENT_DOCS_FOLDER_ID
folders = (
db_session.query(UserFolder)
.filter(
(UserFolder.user_id == user_id) | (UserFolder.id == RECENT_DOCS_FOLDER_ID)
)
.all()
)
# For each folder, filter files to only include those belonging to the current user
result = []
for folder in folders:
folder_snapshot = UserFolderSnapshot.from_model(folder)
folder_snapshot.files = [
file for file in folder_snapshot.files if file.user_id == user_id
]
result.append(folder_snapshot)
return result
@router.get("/user/folder/{folder_id}")
def get_folder(
folder_id: int,
user: User | None = Depends(current_user),
db_session: Session = Depends(get_session),
) -> UserFolderSnapshot:
user_id = user.id if user else None
folder = (
db_session.query(UserFolder)
.filter(
UserFolder.id == folder_id,
(
(UserFolder.user_id == user_id)
| (UserFolder.id == RECENT_DOCS_FOLDER_ID)
),
)
.first()
)
if not folder:
raise HTTPException(status_code=404, detail="Folder not found")
folder_snapshot = UserFolderSnapshot.from_model(folder)
# Filter files to only include those belonging to the current user
folder_snapshot.files = [
file for file in folder_snapshot.files if file.user_id == user_id
]
return folder_snapshot
@router.post("/user/file/upload")
def upload_user_files(
files: List[UploadFile] = File(...),
folder_id: int | None = Form(None),
user: User = Depends(current_user),
db_session: Session = Depends(get_session),
) -> list[UserFileSnapshot]:
if folder_id == 0:
folder_id = None
try:
# Use our consolidated function that handles indexing properly
user_files = upload_files_to_user_files_with_indexing(
files, folder_id or RECENT_DOCS_FOLDER_ID, user, db_session
)
return [UserFileSnapshot.from_model(user_file) for user_file in user_files]
except Exception as e:
logger.error(f"Error uploading files: {str(e)}")
raise HTTPException(status_code=500, detail=f"Failed to upload files: {str(e)}")
class FolderUpdateRequest(BaseModel):
name: str | None = None
description: str | None = None
@router.put("/user/folder/{folder_id}")
def update_folder(
folder_id: int,
request: FolderUpdateRequest,
user: User | None = Depends(current_user),
db_session: Session = Depends(get_session),
) -> UserFolderSnapshot:
user_id = user.id if user else None
folder = (
db_session.query(UserFolder)
.filter(UserFolder.id == folder_id, UserFolder.user_id == user_id)
.first()
)
if not folder:
raise HTTPException(status_code=404, detail="Folder not found")
if request.name:
folder.name = request.name
if request.description:
folder.description = request.description
db_session.commit()
return UserFolderSnapshot.from_model(folder)
@router.delete("/user/folder/{folder_id}")
def delete_folder(
folder_id: int,
user: User = Depends(current_user),
db_session: Session = Depends(get_session),
) -> MessageResponse:
user_id = user.id if user else None
folder = (
db_session.query(UserFolder)
.filter(UserFolder.id == folder_id, UserFolder.user_id == user_id)
.first()
)
if not folder:
raise HTTPException(status_code=404, detail="Folder not found")
db_session.delete(folder)
db_session.commit()
return MessageResponse(message="Folder deleted successfully")
@router.delete("/user/file/{file_id}")
def delete_file(
file_id: int,
user: User = Depends(current_user),
db_session: Session = Depends(get_session),
) -> MessageResponse:
user_id = user.id if user else None
file = (
db_session.query(UserFile)
.filter(UserFile.id == file_id, UserFile.user_id == user_id)
.first()
)
if not file:
raise HTTPException(status_code=404, detail="File not found")
db_session.delete(file)
db_session.commit()
return MessageResponse(message="File deleted successfully")
class FileMoveRequest(BaseModel):
new_folder_id: int | None
@router.put("/user/file/{file_id}/move")
def move_file(
file_id: int,
request: FileMoveRequest,
user: User = Depends(current_user),
db_session: Session = Depends(get_session),
) -> UserFileSnapshot:
user_id = user.id if user else None
file = (
db_session.query(UserFile)
.filter(UserFile.id == file_id, UserFile.user_id == user_id)
.first()
)
if not file:
raise HTTPException(status_code=404, detail="File not found")
file.folder_id = request.new_folder_id
db_session.commit()
return UserFileSnapshot.from_model(file)
@router.get("/user/file-system")
def get_file_system(
user: User = Depends(current_user),
db_session: Session = Depends(get_session),
) -> list[UserFolderSnapshot]:
user_id = user.id if user else None
folders = db_session.query(UserFolder).filter(UserFolder.user_id == user_id).all()
return [UserFolderSnapshot.from_model(folder) for folder in folders]
@router.put("/user/file/{file_id}/rename")
def rename_file(
file_id: int,
name: str,
user: User = Depends(current_user),
db_session: Session = Depends(get_session),
) -> UserFileSnapshot:
user_id = user.id if user else None
file = (
db_session.query(UserFile)
.filter(UserFile.id == file_id, UserFile.user_id == user_id)
.first()
)
if not file:
raise HTTPException(status_code=404, detail="File not found")
file.name = name
db_session.commit()
return UserFileSnapshot.from_model(file)
class ShareRequest(BaseModel):
assistant_id: int
@router.post("/user/file/{file_id}/share")
def share_file(
file_id: int,
request: ShareRequest,
user: User = Depends(current_user),
db_session: Session = Depends(get_session),
) -> MessageResponse:
user_id = user.id if user else None
file = (
db_session.query(UserFile)
.filter(UserFile.id == file_id, UserFile.user_id == user_id)
.first()
)
if not file:
raise HTTPException(status_code=404, detail="File not found")
share_file_with_assistant(file_id, request.assistant_id, db_session)
return MessageResponse(message="File shared successfully with the assistant")
@router.post("/user/file/{file_id}/unshare")
def unshare_file(
file_id: int,
request: ShareRequest,
user: User = Depends(current_user),
db_session: Session = Depends(get_session),
) -> MessageResponse:
user_id = user.id if user else None
file = (
db_session.query(UserFile)
.filter(UserFile.id == file_id, UserFile.user_id == user_id)
.first()
)
if not file:
raise HTTPException(status_code=404, detail="File not found")
unshare_file_with_assistant(file_id, request.assistant_id, db_session)
return MessageResponse(message="File unshared successfully from the assistant")
@router.post("/user/folder/{folder_id}/share")
def share_folder(
folder_id: int,
request: ShareRequest,
user: User = Depends(current_user),
db_session: Session = Depends(get_session),
) -> MessageResponse:
user_id = user.id if user else None
folder = (
db_session.query(UserFolder)
.filter(UserFolder.id == folder_id, UserFolder.user_id == user_id)
.first()
)
if not folder:
raise HTTPException(status_code=404, detail="Folder not found")
share_folder_with_assistant(folder_id, request.assistant_id, db_session)
return MessageResponse(
message="Folder and its files shared successfully with the assistant"
)
@router.post("/user/folder/{folder_id}/unshare")
def unshare_folder(
folder_id: int,
request: ShareRequest,
user: User = Depends(current_user),
db_session: Session = Depends(get_session),
) -> MessageResponse:
user_id = user.id if user else None
folder = (
db_session.query(UserFolder)
.filter(UserFolder.id == folder_id, UserFolder.user_id == user_id)
.first()
)
if not folder:
raise HTTPException(status_code=404, detail="Folder not found")
unshare_folder_with_assistant(folder_id, request.assistant_id, db_session)
return MessageResponse(
message="Folder and its files unshared successfully from the assistant"
)
class CreateFileFromLinkRequest(BaseModel):
url: str
folder_id: int | None
@router.post("/user/file/create-from-link")
def create_file_from_link(
request: CreateFileFromLinkRequest,
user: User = Depends(current_user),
db_session: Session = Depends(get_session),
) -> list[UserFileSnapshot]:
try:
response = requests.get(request.url)
response.raise_for_status()
content = response.text
soup = BeautifulSoup(content, "html.parser")
parsed_html = web_html_cleanup(soup, mintlify_cleanup_enabled=False)
file_name = f"{parsed_html.title or 'Untitled'}.txt"
file_content = parsed_html.cleaned_text.encode()
file = UploadFile(filename=file_name, file=io.BytesIO(file_content))
user_files = create_user_files(
[file], request.folder_id or -1, user, db_session, link_url=request.url
)
# Create connector and credential (same as in upload_user_files)
for user_file in user_files:
connector_base = ConnectorBase(
name=f"UserFile-{user_file.file_id}-{int(time.time())}",
source=DocumentSource.FILE,
input_type=InputType.LOAD_STATE,
connector_specific_config={
"file_locations": [user_file.file_id],
"file_names": [user_file.name],
"zip_metadata": {},
},
refresh_freq=None,
prune_freq=None,
indexing_start=None,
)
connector = create_connector(
db_session=db_session,
connector_data=connector_base,
)
credential_info = CredentialBase(
credential_json={},
admin_public=True,
source=DocumentSource.FILE,
curator_public=True,
groups=[],
name=f"UserFileCredential-{user_file.file_id}-{int(time.time())}",
)
credential = create_credential(credential_info, user, db_session)
cc_pair = add_credential_to_connector(
db_session=db_session,
user=user,
connector_id=connector.id,
credential_id=credential.id,
cc_pair_name=f"UserFileCCPair-{int(time.time())}",
access_type=AccessType.PRIVATE,
auto_sync_options=None,
groups=[],
is_user_file=True,
)
user_file.cc_pair_id = cc_pair.data
db_session.commit()
# Trigger immediate indexing with highest priority
tenant_id = get_current_tenant_id()
trigger_indexing_for_cc_pair(
[], connector.id, False, tenant_id, db_session, is_user_file=True
)
db_session.commit()
return [UserFileSnapshot.from_model(user_file) for user_file in user_files]
except requests.RequestException as e:
raise HTTPException(status_code=400, detail=f"Failed to fetch URL: {str(e)}")
@router.get("/user/file/indexing-status")
def get_files_indexing_status(
file_ids: list[int] = Query(...),
user: User = Depends(current_user),
db_session: Session = Depends(get_session),
) -> dict[int, bool]:
"""Get indexing status for multiple files"""
return get_user_file_indexing_status(file_ids, db_session)
@router.get("/user/file/token-estimate")
def get_files_token_estimate(
file_ids: list[int] = Query([]),
folder_ids: list[int] = Query([]),
user: User = Depends(current_user),
db_session: Session = Depends(get_session),
) -> dict:
"""Get token estimate for files and folders"""
total_tokens = calculate_user_files_token_count(file_ids, folder_ids, db_session)
return {"total_tokens": total_tokens}
class ReindexFileRequest(BaseModel):
file_id: int
@router.post("/user/file/reindex")
def reindex_file(
request: ReindexFileRequest,
user: User = Depends(current_user),
db_session: Session = Depends(get_session),
) -> MessageResponse:
user_id = user.id if user else None
user_file_to_reindex = (
db_session.query(UserFile)
.filter(UserFile.id == request.file_id, UserFile.user_id == user_id)
.first()
)
if not user_file_to_reindex:
raise HTTPException(status_code=404, detail="File not found")
if not user_file_to_reindex.cc_pair_id:
raise HTTPException(
status_code=400,
detail="File does not have an associated connector-credential pair",
)
# Get the connector id from the cc_pair
cc_pair = (
db_session.query(ConnectorCredentialPair)
.filter_by(id=user_file_to_reindex.cc_pair_id)
.first()
)
if not cc_pair:
raise HTTPException(
status_code=404, detail="Associated connector-credential pair not found"
)
# Trigger immediate reindexing with highest priority
tenant_id = get_current_tenant_id()
# Update the cc_pair status to ACTIVE to ensure it's processed
cc_pair.status = ConnectorCredentialPairStatus.ACTIVE
db_session.commit()
try:
trigger_indexing_for_cc_pair(
[], cc_pair.connector_id, True, tenant_id, db_session, is_user_file=True
)
return MessageResponse(
message="File reindexing has been triggered successfully"
)
except Exception as e:
logger.error(
f"Error triggering reindexing for file {request.file_id}: {str(e)}"
)
raise HTTPException(
status_code=500, detail=f"Failed to trigger reindexing: {str(e)}"
)
class BulkCleanupRequest(BaseModel):
folder_id: int
days_older_than: int | None = None
@router.post("/user/file/bulk-cleanup")
def bulk_cleanup_files(
request: BulkCleanupRequest,
user: User = Depends(current_user),
db_session: Session = Depends(get_session),
) -> MessageResponse:
"""Bulk delete files older than specified days in a folder"""
user_id = user.id if user else None
logger.info(
f"Bulk cleanup request: folder_id={request.folder_id}, days_older_than={request.days_older_than}"
)
# Check if folder exists
if request.folder_id != RECENT_DOCS_FOLDER_ID:
folder = (
db_session.query(UserFolder)
.filter(UserFolder.id == request.folder_id, UserFolder.user_id == user_id)
.first()
)
if not folder:
raise HTTPException(status_code=404, detail="Folder not found")
filter_criteria = [UserFile.user_id == user_id]
# Filter by folder
if request.folder_id != -2: # -2 means all folders
filter_criteria.append(UserFile.folder_id == request.folder_id)
# Filter by date if days_older_than is provided
if request.days_older_than is not None:
cutoff_date = datetime.utcnow() - timedelta(days=request.days_older_than)
logger.info(f"Filtering files older than {cutoff_date} (UTC)")
filter_criteria.append(UserFile.created_at < cutoff_date)
# Get all files matching the criteria
files_to_delete = db_session.query(UserFile).filter(*filter_criteria).all()
logger.info(f"Found {len(files_to_delete)} files to delete")
# Delete files
delete_count = 0
for file in files_to_delete:
logger.debug(
f"Deleting file: id={file.id}, name={file.name}, created_at={file.created_at}"
)
db_session.delete(file)
delete_count += 1
db_session.commit()
return MessageResponse(message=f"Successfully deleted {delete_count} files")

View File

@@ -1,114 +0,0 @@
from datetime import datetime
from enum import Enum as PyEnum
from typing import List
from uuid import UUID
from pydantic import BaseModel
from onyx.db.enums import ConnectorCredentialPairStatus
from onyx.db.models import UserFile
from onyx.db.models import UserFolder
from onyx.file_store.models import ChatFileType
from onyx.server.query_and_chat.chat_utils import mime_type_to_chat_file_type
class UserFileStatus(str, PyEnum):
FAILED = "FAILED"
INDEXING = "INDEXING"
INDEXED = "INDEXED"
REINDEXING = "REINDEXING"
# this maps to FileResponse on the front end
class UserFileSnapshot(BaseModel):
id: int
name: str
document_id: str
folder_id: int | None = None
user_id: UUID | None
file_id: str
created_at: datetime
assistant_ids: List[int] = [] # List of assistant IDs
token_count: int | None
indexed: bool
link_url: str | None
status: UserFileStatus
chat_file_type: ChatFileType
@classmethod
def from_model(cls, model: UserFile) -> "UserFileSnapshot":
return cls(
id=model.id,
name=(
model.name[:-4]
if model.link_url and model.name.endswith(".txt")
else model.name
),
folder_id=model.folder_id,
document_id=model.document_id,
user_id=model.user_id,
file_id=model.file_id,
created_at=model.created_at,
assistant_ids=[assistant.id for assistant in model.assistants],
token_count=model.token_count,
status=(
UserFileStatus.FAILED
if model.cc_pair
and len(model.cc_pair.index_attempts) > 0
and model.cc_pair.last_successful_index_time is None
and model.cc_pair.status == ConnectorCredentialPairStatus.PAUSED
else (
UserFileStatus.INDEXED
if model.cc_pair
and model.cc_pair.last_successful_index_time is not None
else (
UserFileStatus.REINDEXING
if model.cc_pair
and len(model.cc_pair.index_attempts) > 1
and model.cc_pair.last_successful_index_time is None
and model.cc_pair.status != ConnectorCredentialPairStatus.PAUSED
else UserFileStatus.INDEXING
)
)
),
indexed=(
model.cc_pair.last_successful_index_time is not None
if model.cc_pair
else False
),
link_url=model.link_url,
chat_file_type=mime_type_to_chat_file_type(model.content_type),
)
class UserFolderSnapshot(BaseModel):
id: int
name: str
description: str
files: List[UserFileSnapshot]
created_at: datetime
user_id: UUID | None
assistant_ids: List[int] = [] # List of assistant IDs
token_count: int | None
@classmethod
def from_model(cls, model: UserFolder) -> "UserFolderSnapshot":
return cls(
id=model.id,
name=model.name,
description=model.description,
files=[UserFileSnapshot.from_model(file) for file in model.files],
created_at=model.created_at,
user_id=model.user_id,
assistant_ids=[assistant.id for assistant in model.assistants],
token_count=sum(file.token_count or 0 for file in model.files) or None,
)
class MessageResponse(BaseModel):
message: str
class FileSystemResponse(BaseModel):
folders: list[UserFolderSnapshot]
files: list[UserFileSnapshot]

View File

@@ -74,8 +74,8 @@ class SearchToolOverrideKwargs(BaseModel):
precomputed_query_embedding: Embedding | None = None
precomputed_is_keyword: bool | None = None
precomputed_keywords: list[str] | None = None
user_file_ids: list[int] | None = None
user_folder_ids: list[int] | None = None
user_file_ids: list[UUID] | None = None
project_id: int | None = None
document_sources: list[DocumentSource] | None = None
time_cutoff: datetime | None = None
expanded_queries: QueryExpansions | None = None

View File

@@ -302,7 +302,6 @@ class SearchTool(Tool[SearchToolOverrideKwargs]):
retrieved_sections_callback = None
skip_query_analysis = False
user_file_ids = None
user_folder_ids = None
document_sources = None
time_cutoff = None
expanded_queries = None
@@ -323,7 +322,7 @@ class SearchTool(Tool[SearchToolOverrideKwargs]):
override_kwargs.skip_query_analysis, False
)
user_file_ids = override_kwargs.user_file_ids
user_folder_ids = override_kwargs.user_folder_ids
project_id = override_kwargs.project_id
document_sources = override_kwargs.document_sources
time_cutoff = override_kwargs.time_cutoff
expanded_queries = override_kwargs.expanded_queries
@@ -378,7 +377,8 @@ class SearchTool(Tool[SearchToolOverrideKwargs]):
retrieval_options.filters if retrieval_options else None
),
user_file_filters=UserFileFilters(
user_file_ids=user_file_ids, user_folder_ids=user_folder_ids
user_file_ids=user_file_ids,
project_id=project_id,
),
persona=self.persona,
offset=(retrieval_options.offset if retrieval_options else None),

View File

@@ -95,7 +95,7 @@ def generate_dummy_chunk(
return DocMetadataAwareIndexChunk.from_index_chunk(
index_chunk=chunk,
user_file=None,
user_folder=None,
user_project=[],
access=DocumentAccess.build(
user_emails=user_emails,
user_groups=user_groups,

View File

@@ -120,7 +120,6 @@ def test_list_threads(admin_user: DATestUser | None) -> None:
assert "persona_id" in session
assert "time_created" in session
assert "shared_status" in session
assert "folder_id" in session
assert "current_alternate_model" in session
# Validate UUID

552
web/package-lock.json generated
View File

@@ -21,6 +21,7 @@
"@radix-ui/react-dialog": "^1.1.6",
"@radix-ui/react-dropdown-menu": "^2.1.6",
"@radix-ui/react-label": "^2.1.1",
"@radix-ui/react-menubar": "^1.1.16",
"@radix-ui/react-popover": "^1.1.6",
"@radix-ui/react-radio-group": "^1.2.2",
"@radix-ui/react-scroll-area": "^1.2.2",
@@ -3716,6 +3717,524 @@
}
}
},
"node_modules/@radix-ui/react-menubar": {
"version": "1.1.16",
"resolved": "https://registry.npmjs.org/@radix-ui/react-menubar/-/react-menubar-1.1.16.tgz",
"integrity": "sha512-EB1FktTz5xRRi2Er974AUQZWg2yVBb1yjip38/lgwtCVRd3a+maUoGHN/xs9Yv8SY8QwbSEb+YrxGadVWbEutA==",
"license": "MIT",
"dependencies": {
"@radix-ui/primitive": "1.1.3",
"@radix-ui/react-collection": "1.1.7",
"@radix-ui/react-compose-refs": "1.1.2",
"@radix-ui/react-context": "1.1.2",
"@radix-ui/react-direction": "1.1.1",
"@radix-ui/react-id": "1.1.1",
"@radix-ui/react-menu": "2.1.16",
"@radix-ui/react-primitive": "2.1.3",
"@radix-ui/react-roving-focus": "1.1.11",
"@radix-ui/react-use-controllable-state": "1.2.2"
},
"peerDependencies": {
"@types/react": "*",
"@types/react-dom": "*",
"react": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc",
"react-dom": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc"
},
"peerDependenciesMeta": {
"@types/react": {
"optional": true
},
"@types/react-dom": {
"optional": true
}
}
},
"node_modules/@radix-ui/react-menubar/node_modules/@radix-ui/primitive": {
"version": "1.1.3",
"resolved": "https://registry.npmjs.org/@radix-ui/primitive/-/primitive-1.1.3.tgz",
"integrity": "sha512-JTF99U/6XIjCBo0wqkU5sK10glYe27MRRsfwoiq5zzOEZLHU3A3KCMa5X/azekYRCJ0HlwI0crAXS/5dEHTzDg==",
"license": "MIT"
},
"node_modules/@radix-ui/react-menubar/node_modules/@radix-ui/react-arrow": {
"version": "1.1.7",
"resolved": "https://registry.npmjs.org/@radix-ui/react-arrow/-/react-arrow-1.1.7.tgz",
"integrity": "sha512-F+M1tLhO+mlQaOWspE8Wstg+z6PwxwRd8oQ8IXceWz92kfAmalTRf0EjrouQeo7QssEPfCn05B4Ihs1K9WQ/7w==",
"license": "MIT",
"dependencies": {
"@radix-ui/react-primitive": "2.1.3"
},
"peerDependencies": {
"@types/react": "*",
"@types/react-dom": "*",
"react": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc",
"react-dom": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc"
},
"peerDependenciesMeta": {
"@types/react": {
"optional": true
},
"@types/react-dom": {
"optional": true
}
}
},
"node_modules/@radix-ui/react-menubar/node_modules/@radix-ui/react-collection": {
"version": "1.1.7",
"resolved": "https://registry.npmjs.org/@radix-ui/react-collection/-/react-collection-1.1.7.tgz",
"integrity": "sha512-Fh9rGN0MoI4ZFUNyfFVNU4y9LUz93u9/0K+yLgA2bwRojxM8JU1DyvvMBabnZPBgMWREAJvU2jjVzq+LrFUglw==",
"license": "MIT",
"dependencies": {
"@radix-ui/react-compose-refs": "1.1.2",
"@radix-ui/react-context": "1.1.2",
"@radix-ui/react-primitive": "2.1.3",
"@radix-ui/react-slot": "1.2.3"
},
"peerDependencies": {
"@types/react": "*",
"@types/react-dom": "*",
"react": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc",
"react-dom": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc"
},
"peerDependenciesMeta": {
"@types/react": {
"optional": true
},
"@types/react-dom": {
"optional": true
}
}
},
"node_modules/@radix-ui/react-menubar/node_modules/@radix-ui/react-compose-refs": {
"version": "1.1.2",
"resolved": "https://registry.npmjs.org/@radix-ui/react-compose-refs/-/react-compose-refs-1.1.2.tgz",
"integrity": "sha512-z4eqJvfiNnFMHIIvXP3CY57y2WJs5g2v3X0zm9mEJkrkNv4rDxu+sg9Jh8EkXyeqBkB7SOcboo9dMVqhyrACIg==",
"license": "MIT",
"peerDependencies": {
"@types/react": "*",
"react": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc"
},
"peerDependenciesMeta": {
"@types/react": {
"optional": true
}
}
},
"node_modules/@radix-ui/react-menubar/node_modules/@radix-ui/react-context": {
"version": "1.1.2",
"resolved": "https://registry.npmjs.org/@radix-ui/react-context/-/react-context-1.1.2.tgz",
"integrity": "sha512-jCi/QKUM2r1Ju5a3J64TH2A5SpKAgh0LpknyqdQ4m6DCV0xJ2HG1xARRwNGPQfi1SLdLWZ1OJz6F4OMBBNiGJA==",
"license": "MIT",
"peerDependencies": {
"@types/react": "*",
"react": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc"
},
"peerDependenciesMeta": {
"@types/react": {
"optional": true
}
}
},
"node_modules/@radix-ui/react-menubar/node_modules/@radix-ui/react-direction": {
"version": "1.1.1",
"resolved": "https://registry.npmjs.org/@radix-ui/react-direction/-/react-direction-1.1.1.tgz",
"integrity": "sha512-1UEWRX6jnOA2y4H5WczZ44gOOjTEmlqv1uNW4GAJEO5+bauCBhv8snY65Iw5/VOS/ghKN9gr2KjnLKxrsvoMVw==",
"license": "MIT",
"peerDependencies": {
"@types/react": "*",
"react": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc"
},
"peerDependenciesMeta": {
"@types/react": {
"optional": true
}
}
},
"node_modules/@radix-ui/react-menubar/node_modules/@radix-ui/react-dismissable-layer": {
"version": "1.1.11",
"resolved": "https://registry.npmjs.org/@radix-ui/react-dismissable-layer/-/react-dismissable-layer-1.1.11.tgz",
"integrity": "sha512-Nqcp+t5cTB8BinFkZgXiMJniQH0PsUt2k51FUhbdfeKvc4ACcG2uQniY/8+h1Yv6Kza4Q7lD7PQV0z0oicE0Mg==",
"license": "MIT",
"dependencies": {
"@radix-ui/primitive": "1.1.3",
"@radix-ui/react-compose-refs": "1.1.2",
"@radix-ui/react-primitive": "2.1.3",
"@radix-ui/react-use-callback-ref": "1.1.1",
"@radix-ui/react-use-escape-keydown": "1.1.1"
},
"peerDependencies": {
"@types/react": "*",
"@types/react-dom": "*",
"react": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc",
"react-dom": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc"
},
"peerDependenciesMeta": {
"@types/react": {
"optional": true
},
"@types/react-dom": {
"optional": true
}
}
},
"node_modules/@radix-ui/react-menubar/node_modules/@radix-ui/react-focus-guards": {
"version": "1.1.3",
"resolved": "https://registry.npmjs.org/@radix-ui/react-focus-guards/-/react-focus-guards-1.1.3.tgz",
"integrity": "sha512-0rFg/Rj2Q62NCm62jZw0QX7a3sz6QCQU0LpZdNrJX8byRGaGVTqbrW9jAoIAHyMQqsNpeZ81YgSizOt5WXq0Pw==",
"license": "MIT",
"peerDependencies": {
"@types/react": "*",
"react": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc"
},
"peerDependenciesMeta": {
"@types/react": {
"optional": true
}
}
},
"node_modules/@radix-ui/react-menubar/node_modules/@radix-ui/react-focus-scope": {
"version": "1.1.7",
"resolved": "https://registry.npmjs.org/@radix-ui/react-focus-scope/-/react-focus-scope-1.1.7.tgz",
"integrity": "sha512-t2ODlkXBQyn7jkl6TNaw/MtVEVvIGelJDCG41Okq/KwUsJBwQ4XVZsHAVUkK4mBv3ewiAS3PGuUWuY2BoK4ZUw==",
"license": "MIT",
"dependencies": {
"@radix-ui/react-compose-refs": "1.1.2",
"@radix-ui/react-primitive": "2.1.3",
"@radix-ui/react-use-callback-ref": "1.1.1"
},
"peerDependencies": {
"@types/react": "*",
"@types/react-dom": "*",
"react": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc",
"react-dom": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc"
},
"peerDependenciesMeta": {
"@types/react": {
"optional": true
},
"@types/react-dom": {
"optional": true
}
}
},
"node_modules/@radix-ui/react-menubar/node_modules/@radix-ui/react-id": {
"version": "1.1.1",
"resolved": "https://registry.npmjs.org/@radix-ui/react-id/-/react-id-1.1.1.tgz",
"integrity": "sha512-kGkGegYIdQsOb4XjsfM97rXsiHaBwco+hFI66oO4s9LU+PLAC5oJ7khdOVFxkhsmlbpUqDAvXw11CluXP+jkHg==",
"license": "MIT",
"dependencies": {
"@radix-ui/react-use-layout-effect": "1.1.1"
},
"peerDependencies": {
"@types/react": "*",
"react": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc"
},
"peerDependenciesMeta": {
"@types/react": {
"optional": true
}
}
},
"node_modules/@radix-ui/react-menubar/node_modules/@radix-ui/react-menu": {
"version": "2.1.16",
"resolved": "https://registry.npmjs.org/@radix-ui/react-menu/-/react-menu-2.1.16.tgz",
"integrity": "sha512-72F2T+PLlphrqLcAotYPp0uJMr5SjP5SL01wfEspJbru5Zs5vQaSHb4VB3ZMJPimgHHCHG7gMOeOB9H3Hdmtxg==",
"license": "MIT",
"dependencies": {
"@radix-ui/primitive": "1.1.3",
"@radix-ui/react-collection": "1.1.7",
"@radix-ui/react-compose-refs": "1.1.2",
"@radix-ui/react-context": "1.1.2",
"@radix-ui/react-direction": "1.1.1",
"@radix-ui/react-dismissable-layer": "1.1.11",
"@radix-ui/react-focus-guards": "1.1.3",
"@radix-ui/react-focus-scope": "1.1.7",
"@radix-ui/react-id": "1.1.1",
"@radix-ui/react-popper": "1.2.8",
"@radix-ui/react-portal": "1.1.9",
"@radix-ui/react-presence": "1.1.5",
"@radix-ui/react-primitive": "2.1.3",
"@radix-ui/react-roving-focus": "1.1.11",
"@radix-ui/react-slot": "1.2.3",
"@radix-ui/react-use-callback-ref": "1.1.1",
"aria-hidden": "^1.2.4",
"react-remove-scroll": "^2.6.3"
},
"peerDependencies": {
"@types/react": "*",
"@types/react-dom": "*",
"react": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc",
"react-dom": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc"
},
"peerDependenciesMeta": {
"@types/react": {
"optional": true
},
"@types/react-dom": {
"optional": true
}
}
},
"node_modules/@radix-ui/react-menubar/node_modules/@radix-ui/react-popper": {
"version": "1.2.8",
"resolved": "https://registry.npmjs.org/@radix-ui/react-popper/-/react-popper-1.2.8.tgz",
"integrity": "sha512-0NJQ4LFFUuWkE7Oxf0htBKS6zLkkjBH+hM1uk7Ng705ReR8m/uelduy1DBo0PyBXPKVnBA6YBlU94MBGXrSBCw==",
"license": "MIT",
"dependencies": {
"@floating-ui/react-dom": "^2.0.0",
"@radix-ui/react-arrow": "1.1.7",
"@radix-ui/react-compose-refs": "1.1.2",
"@radix-ui/react-context": "1.1.2",
"@radix-ui/react-primitive": "2.1.3",
"@radix-ui/react-use-callback-ref": "1.1.1",
"@radix-ui/react-use-layout-effect": "1.1.1",
"@radix-ui/react-use-rect": "1.1.1",
"@radix-ui/react-use-size": "1.1.1",
"@radix-ui/rect": "1.1.1"
},
"peerDependencies": {
"@types/react": "*",
"@types/react-dom": "*",
"react": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc",
"react-dom": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc"
},
"peerDependenciesMeta": {
"@types/react": {
"optional": true
},
"@types/react-dom": {
"optional": true
}
}
},
"node_modules/@radix-ui/react-menubar/node_modules/@radix-ui/react-portal": {
"version": "1.1.9",
"resolved": "https://registry.npmjs.org/@radix-ui/react-portal/-/react-portal-1.1.9.tgz",
"integrity": "sha512-bpIxvq03if6UNwXZ+HTK71JLh4APvnXntDc6XOX8UVq4XQOVl7lwok0AvIl+b8zgCw3fSaVTZMpAPPagXbKmHQ==",
"license": "MIT",
"dependencies": {
"@radix-ui/react-primitive": "2.1.3",
"@radix-ui/react-use-layout-effect": "1.1.1"
},
"peerDependencies": {
"@types/react": "*",
"@types/react-dom": "*",
"react": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc",
"react-dom": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc"
},
"peerDependenciesMeta": {
"@types/react": {
"optional": true
},
"@types/react-dom": {
"optional": true
}
}
},
"node_modules/@radix-ui/react-menubar/node_modules/@radix-ui/react-presence": {
"version": "1.1.5",
"resolved": "https://registry.npmjs.org/@radix-ui/react-presence/-/react-presence-1.1.5.tgz",
"integrity": "sha512-/jfEwNDdQVBCNvjkGit4h6pMOzq8bHkopq458dPt2lMjx+eBQUohZNG9A7DtO/O5ukSbxuaNGXMjHicgwy6rQQ==",
"license": "MIT",
"dependencies": {
"@radix-ui/react-compose-refs": "1.1.2",
"@radix-ui/react-use-layout-effect": "1.1.1"
},
"peerDependencies": {
"@types/react": "*",
"@types/react-dom": "*",
"react": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc",
"react-dom": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc"
},
"peerDependenciesMeta": {
"@types/react": {
"optional": true
},
"@types/react-dom": {
"optional": true
}
}
},
"node_modules/@radix-ui/react-menubar/node_modules/@radix-ui/react-primitive": {
"version": "2.1.3",
"resolved": "https://registry.npmjs.org/@radix-ui/react-primitive/-/react-primitive-2.1.3.tgz",
"integrity": "sha512-m9gTwRkhy2lvCPe6QJp4d3G1TYEUHn/FzJUtq9MjH46an1wJU+GdoGC5VLof8RX8Ft/DlpshApkhswDLZzHIcQ==",
"license": "MIT",
"dependencies": {
"@radix-ui/react-slot": "1.2.3"
},
"peerDependencies": {
"@types/react": "*",
"@types/react-dom": "*",
"react": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc",
"react-dom": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc"
},
"peerDependenciesMeta": {
"@types/react": {
"optional": true
},
"@types/react-dom": {
"optional": true
}
}
},
"node_modules/@radix-ui/react-menubar/node_modules/@radix-ui/react-roving-focus": {
"version": "1.1.11",
"resolved": "https://registry.npmjs.org/@radix-ui/react-roving-focus/-/react-roving-focus-1.1.11.tgz",
"integrity": "sha512-7A6S9jSgm/S+7MdtNDSb+IU859vQqJ/QAtcYQcfFC6W8RS4IxIZDldLR0xqCFZ6DCyrQLjLPsxtTNch5jVA4lA==",
"license": "MIT",
"dependencies": {
"@radix-ui/primitive": "1.1.3",
"@radix-ui/react-collection": "1.1.7",
"@radix-ui/react-compose-refs": "1.1.2",
"@radix-ui/react-context": "1.1.2",
"@radix-ui/react-direction": "1.1.1",
"@radix-ui/react-id": "1.1.1",
"@radix-ui/react-primitive": "2.1.3",
"@radix-ui/react-use-callback-ref": "1.1.1",
"@radix-ui/react-use-controllable-state": "1.2.2"
},
"peerDependencies": {
"@types/react": "*",
"@types/react-dom": "*",
"react": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc",
"react-dom": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc"
},
"peerDependenciesMeta": {
"@types/react": {
"optional": true
},
"@types/react-dom": {
"optional": true
}
}
},
"node_modules/@radix-ui/react-menubar/node_modules/@radix-ui/react-slot": {
"version": "1.2.3",
"resolved": "https://registry.npmjs.org/@radix-ui/react-slot/-/react-slot-1.2.3.tgz",
"integrity": "sha512-aeNmHnBxbi2St0au6VBVC7JXFlhLlOnvIIlePNniyUNAClzmtAUEY8/pBiK3iHjufOlwA+c20/8jngo7xcrg8A==",
"license": "MIT",
"dependencies": {
"@radix-ui/react-compose-refs": "1.1.2"
},
"peerDependencies": {
"@types/react": "*",
"react": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc"
},
"peerDependenciesMeta": {
"@types/react": {
"optional": true
}
}
},
"node_modules/@radix-ui/react-menubar/node_modules/@radix-ui/react-use-callback-ref": {
"version": "1.1.1",
"resolved": "https://registry.npmjs.org/@radix-ui/react-use-callback-ref/-/react-use-callback-ref-1.1.1.tgz",
"integrity": "sha512-FkBMwD+qbGQeMu1cOHnuGB6x4yzPjho8ap5WtbEJ26umhgqVXbhekKUQO+hZEL1vU92a3wHwdp0HAcqAUF5iDg==",
"license": "MIT",
"peerDependencies": {
"@types/react": "*",
"react": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc"
},
"peerDependenciesMeta": {
"@types/react": {
"optional": true
}
}
},
"node_modules/@radix-ui/react-menubar/node_modules/@radix-ui/react-use-controllable-state": {
"version": "1.2.2",
"resolved": "https://registry.npmjs.org/@radix-ui/react-use-controllable-state/-/react-use-controllable-state-1.2.2.tgz",
"integrity": "sha512-BjasUjixPFdS+NKkypcyyN5Pmg83Olst0+c6vGov0diwTEo6mgdqVR6hxcEgFuh4QrAs7Rc+9KuGJ9TVCj0Zzg==",
"license": "MIT",
"dependencies": {
"@radix-ui/react-use-effect-event": "0.0.2",
"@radix-ui/react-use-layout-effect": "1.1.1"
},
"peerDependencies": {
"@types/react": "*",
"react": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc"
},
"peerDependenciesMeta": {
"@types/react": {
"optional": true
}
}
},
"node_modules/@radix-ui/react-menubar/node_modules/@radix-ui/react-use-escape-keydown": {
"version": "1.1.1",
"resolved": "https://registry.npmjs.org/@radix-ui/react-use-escape-keydown/-/react-use-escape-keydown-1.1.1.tgz",
"integrity": "sha512-Il0+boE7w/XebUHyBjroE+DbByORGR9KKmITzbR7MyQ4akpORYP/ZmbhAr0DG7RmmBqoOnZdy2QlvajJ2QA59g==",
"license": "MIT",
"dependencies": {
"@radix-ui/react-use-callback-ref": "1.1.1"
},
"peerDependencies": {
"@types/react": "*",
"react": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc"
},
"peerDependenciesMeta": {
"@types/react": {
"optional": true
}
}
},
"node_modules/@radix-ui/react-menubar/node_modules/@radix-ui/react-use-layout-effect": {
"version": "1.1.1",
"resolved": "https://registry.npmjs.org/@radix-ui/react-use-layout-effect/-/react-use-layout-effect-1.1.1.tgz",
"integrity": "sha512-RbJRS4UWQFkzHTTwVymMTUv8EqYhOp8dOOviLj2ugtTiXRaRQS7GLGxZTLL1jWhMeoSCf5zmcZkqTl9IiYfXcQ==",
"license": "MIT",
"peerDependencies": {
"@types/react": "*",
"react": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc"
},
"peerDependenciesMeta": {
"@types/react": {
"optional": true
}
}
},
"node_modules/@radix-ui/react-menubar/node_modules/@radix-ui/react-use-rect": {
"version": "1.1.1",
"resolved": "https://registry.npmjs.org/@radix-ui/react-use-rect/-/react-use-rect-1.1.1.tgz",
"integrity": "sha512-QTYuDesS0VtuHNNvMh+CjlKJ4LJickCMUAqjlE3+j8w+RlRpwyX3apEQKGFzbZGdo7XNG1tXa+bQqIE7HIXT2w==",
"license": "MIT",
"dependencies": {
"@radix-ui/rect": "1.1.1"
},
"peerDependencies": {
"@types/react": "*",
"react": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc"
},
"peerDependenciesMeta": {
"@types/react": {
"optional": true
}
}
},
"node_modules/@radix-ui/react-menubar/node_modules/@radix-ui/react-use-size": {
"version": "1.1.1",
"resolved": "https://registry.npmjs.org/@radix-ui/react-use-size/-/react-use-size-1.1.1.tgz",
"integrity": "sha512-ewrXRDTAqAXlkl6t/fkXWNAhFX9I+CkKlw6zjEwk86RSPKwZr3xpBRso655aqYafwtnbpHLj6toFzmd6xdVptQ==",
"license": "MIT",
"dependencies": {
"@radix-ui/react-use-layout-effect": "1.1.1"
},
"peerDependencies": {
"@types/react": "*",
"react": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc"
},
"peerDependenciesMeta": {
"@types/react": {
"optional": true
}
}
},
"node_modules/@radix-ui/react-menubar/node_modules/@radix-ui/rect": {
"version": "1.1.1",
"resolved": "https://registry.npmjs.org/@radix-ui/rect/-/rect-1.1.1.tgz",
"integrity": "sha512-HPwpGIzkl28mWyZqG52jiqDJ12waP11Pa1lGoiyUkIEuMLBP0oeK/C89esbXrxsky5we7dfd8U58nm0SgAWpVw==",
"license": "MIT"
},
"node_modules/@radix-ui/react-popover": {
"version": "1.1.15",
"resolved": "https://registry.npmjs.org/@radix-ui/react-popover/-/react-popover-1.1.15.tgz",
@@ -4194,6 +4713,39 @@
}
}
},
"node_modules/@radix-ui/react-use-effect-event": {
"version": "0.0.2",
"resolved": "https://registry.npmjs.org/@radix-ui/react-use-effect-event/-/react-use-effect-event-0.0.2.tgz",
"integrity": "sha512-Qp8WbZOBe+blgpuUT+lw2xheLP8q0oatc9UpmiemEICxGvFLYmHm9QowVZGHtJlGbS6A6yJ3iViad/2cVjnOiA==",
"license": "MIT",
"dependencies": {
"@radix-ui/react-use-layout-effect": "1.1.1"
},
"peerDependencies": {
"@types/react": "*",
"react": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc"
},
"peerDependenciesMeta": {
"@types/react": {
"optional": true
}
}
},
"node_modules/@radix-ui/react-use-effect-event/node_modules/@radix-ui/react-use-layout-effect": {
"version": "1.1.1",
"resolved": "https://registry.npmjs.org/@radix-ui/react-use-layout-effect/-/react-use-layout-effect-1.1.1.tgz",
"integrity": "sha512-RbJRS4UWQFkzHTTwVymMTUv8EqYhOp8dOOviLj2ugtTiXRaRQS7GLGxZTLL1jWhMeoSCf5zmcZkqTl9IiYfXcQ==",
"license": "MIT",
"peerDependencies": {
"@types/react": "*",
"react": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc"
},
"peerDependenciesMeta": {
"@types/react": {
"optional": true
}
}
},
"node_modules/@radix-ui/react-use-escape-keydown": {
"version": "1.1.1",
"resolved": "https://registry.npmjs.org/@radix-ui/react-use-escape-keydown/-/react-use-escape-keydown-1.1.1.tgz",

View File

@@ -27,6 +27,7 @@
"@radix-ui/react-dialog": "^1.1.6",
"@radix-ui/react-dropdown-menu": "^2.1.6",
"@radix-ui/react-label": "^2.1.1",
"@radix-ui/react-menubar": "^1.1.16",
"@radix-ui/react-popover": "^1.1.6",
"@radix-ui/react-radio-group": "^1.2.2",
"@radix-ui/react-scroll-area": "^1.2.2",

View File

@@ -71,27 +71,20 @@ import {
Option as DropdownOption,
} from "@/components/Dropdown";
import { SourceChip } from "@/app/chat/components/input/ChatInputBar";
import {
TagIcon,
UserIcon,
FileIcon,
FolderIcon,
InfoIcon,
BookIcon,
} from "lucide-react";
import { TagIcon, UserIcon, FileIcon, InfoIcon, BookIcon } from "lucide-react";
import { LLMSelector } from "@/components/llm/LLMSelector";
import useSWR from "swr";
import { errorHandlingFetcher } from "@/lib/fetcher";
import { ConfirmEntityModal } from "@/components/modals/ConfirmEntityModal";
import { FilePickerModal } from "@/app/chat/my-documents/components/FilePicker";
import { useDocumentsContext } from "@/app/chat/my-documents/DocumentsContext";
import { SEARCH_TOOL_ID } from "@/app/chat/components/tools/constants";
import TextView from "@/components/chat/TextView";
import { MinimalOnyxDocument } from "@/lib/search/interfaces";
import { MAX_CHARACTERS_PERSONA_DESCRIPTION } from "@/lib/constants";
import { FormErrorFocus } from "@/components/FormErrorHelpers";
import { ProjectFile } from "@/app/chat/projects/projectsService";
import { useProjectsContext } from "@/app/chat/projects/ProjectsContext";
import FilePicker from "@/app/chat/components/files/FilePicker";
function findSearchTool(tools: ToolSnapshot[]) {
return tools.find((tool) => tool.in_code_tool_id === SEARCH_TOOL_ID);
@@ -297,7 +290,7 @@ export function AssistantEditor({
enabledToolsMap[tool.id] = personaCurrentToolIds.includes(tool.id);
});
const { files, folders, refreshFolders } = useDocumentsContext();
const { recentFiles, uploadFiles: uploadProjectFiles } = useProjectsContext();
const [showVisibilityWarning, setShowVisibilityWarning] = useState(false);
@@ -345,13 +338,11 @@ export function AssistantEditor({
) ?? [],
selectedGroups: existingPersona?.groups ?? [],
user_file_ids: existingPersona?.user_file_ids ?? [],
user_folder_ids: existingPersona?.user_folder_ids ?? [],
knowledge_source: !canShowKnowledgeSource
? "user_files"
: !userKnowledgeEnabled
? "team_knowledge"
: (existingPersona?.user_file_ids?.length ?? 0) > 0 ||
(existingPersona?.user_folder_ids?.length ?? 0) > 0
: (existingPersona?.user_file_ids?.length ?? 0) > 0
? "user_files"
: "team_knowledge",
is_default_persona: existingPersona?.is_default_persona ?? false,
@@ -654,7 +645,6 @@ export function AssistantEditor({
num_chunks: numChunks,
document_set_ids: teamKnowledge ? values.document_set_ids : [],
user_file_ids: teamKnowledge ? [] : values.user_file_ids,
user_folder_ids: teamKnowledge ? [] : values.user_folder_ids,
};
let personaResponse;
@@ -705,7 +695,6 @@ export function AssistantEditor({
}
await refreshAssistants();
await refreshFolders();
router.push(
isAdminPage
@@ -738,38 +727,8 @@ export function AssistantEditor({
values.llm_model_version_override || defaultModelName || ""
);
// TODO: memoize this / make more efficient
const selectedFiles = files.filter((file) =>
values.user_file_ids.includes(file.id)
);
const selectedFolders = folders.filter((folder) =>
values.user_folder_ids.includes(folder.id)
);
return (
<>
{filePickerModalOpen && (
<FilePickerModal
setPresentingDocument={setPresentingDocument}
isOpen={filePickerModalOpen}
onClose={() => {
setFilePickerModalOpen(false);
}}
onSave={(selectedFiles, selectedFolders) => {
setFieldValue(
"user_file_ids",
selectedFiles.map((file) => file.id)
);
setFieldValue(
"user_folder_ids",
selectedFolders.map((folder) => folder.id)
);
setFilePickerModalOpen(false);
}}
buttonContent="Add to Assistant"
/>
)}
<Form className="w-full text-text-950 assistant-editor">
<FormErrorFocus />
{/* Refresh starter messages when name or description changes */}
@@ -1070,37 +1029,75 @@ export function AssistantEditor({
!existingPersona?.is_default_persona && (
<div className="text-sm flex flex-col items-start">
<SubLabel>
Click below to add documents or folders from My
Documents
Click below to add files from My Documents
</SubLabel>
{(values.user_file_ids.length > 0 ||
values.user_folder_ids.length > 0) && (
{values.user_file_ids.length > 0 && (
<div className="flex flex-wrap mb-2 max-w-sm gap-2">
{selectedFiles.map((file) => (
<SourceChip
key={file.id}
onRemove={() => {}}
title={file.name}
icon={<FileIcon size={16} />}
/>
))}
{selectedFolders.map((folder) => (
<SourceChip
key={folder.id}
onRemove={() => {}}
title={folder.name}
icon={<FolderIcon size={16} />}
/>
))}
{values.user_file_ids.map(
(userFileId: string) => {
const rf = recentFiles.find(
(f) => f.id === userFileId
);
const title =
rf?.name ||
`File ${userFileId.slice(0, 8)}`;
return (
<SourceChip
key={userFileId}
onRemove={() => {
setFieldValue(
"user_file_ids",
values.user_file_ids.filter(
(id: string) =>
id !== userFileId
)
);
}}
title={title}
icon={<FileIcon size={16} />}
/>
);
}
)}
</div>
)}
<button
type="button"
onClick={() => setFilePickerModalOpen(true)}
className="text-primary hover:underline"
>
+ Add User Files
</button>
<FilePicker
showTriggerLabel
triggerLabel="Add User Files"
recentFiles={recentFiles}
onPickRecent={(file: ProjectFile) => {
if (!values.user_file_ids.includes(file.id)) {
setFieldValue("user_file_ids", [
...values.user_file_ids,
file.id,
]);
}
}}
handleUploadChange={async (
e: React.ChangeEvent<HTMLInputElement>
) => {
const files = e.target.files;
if (!files || files.length === 0) return;
try {
const uploaded = await uploadProjectFiles(
Array.from(files)
);
const newIds = uploaded.user_files.map(
(f) => f.id
);
const merged = Array.from(
new Set([
...(values.user_file_ids || []),
...newIds,
])
);
setFieldValue("user_file_ids", merged);
} finally {
e.target.value = "";
}
}}
/>
</div>
)}

View File

@@ -1,106 +0,0 @@
import {
FileResponse,
FolderResponse,
} from "@/app/chat/my-documents/DocumentsContext";
export interface AssistantFileChanges {
filesToShare: number[];
filesToUnshare: number[];
foldersToShare: number[];
foldersToUnshare: number[];
}
export function calculateFileChanges(
existingFileIds: number[],
existingFolderIds: number[],
selectedFiles: FileResponse[],
selectedFolders: FolderResponse[]
): AssistantFileChanges {
const selectedFileIds = selectedFiles.map((file) => file.id);
const selectedFolderIds = selectedFolders.map((folder) => folder.id);
return {
filesToShare: selectedFileIds.filter((id) => !existingFileIds.includes(id)),
filesToUnshare: existingFileIds.filter(
(id) => !selectedFileIds.includes(id)
),
foldersToShare: selectedFolderIds.filter(
(id) => !existingFolderIds.includes(id)
),
foldersToUnshare: existingFolderIds.filter(
(id) => !selectedFolderIds.includes(id)
),
};
}
export async function shareFiles(
assistantId: number,
fileIds: number[]
): Promise<void> {
for (const fileId of fileIds) {
await fetch(`/api/user/file/${fileId}/share`, {
method: "POST",
headers: {
"Content-Type": "application/json",
},
body: JSON.stringify({ assistant_id: assistantId }),
});
}
}
export async function unshareFiles(
assistantId: number,
fileIds: number[]
): Promise<void> {
for (const fileId of fileIds) {
await fetch(`/api/user/file/${fileId}/unshare`, {
method: "POST",
headers: {
"Content-Type": "application/json",
},
body: JSON.stringify({ assistant_id: assistantId }),
});
}
}
export async function shareFolders(
assistantId: number,
folderIds: number[]
): Promise<void> {
for (const folderId of folderIds) {
await fetch(`/api/user/folder/${folderId}/share`, {
method: "POST",
headers: {
"Content-Type": "application/json",
},
body: JSON.stringify({ assistant_id: assistantId }),
});
}
}
export async function unshareFolders(
assistantId: number,
folderIds: number[]
): Promise<void> {
for (const folderId of folderIds) {
await fetch(`/api/user/folder/${folderId}/unshare`, {
method: "POST",
headers: {
"Content-Type": "application/json",
},
body: JSON.stringify({ assistant_id: assistantId }),
});
}
}
export async function updateAssistantFiles(
assistantId: number,
changes: AssistantFileChanges
): Promise<void> {
await Promise.all([
shareFiles(assistantId, changes.filesToShare),
unshareFiles(assistantId, changes.filesToUnshare),
shareFolders(assistantId, changes.foldersToShare),
unshareFolders(assistantId, changes.foldersToUnshare),
]);
}

View File

@@ -43,8 +43,7 @@ export interface MinimalPersonaSnapshot {
}
export interface Persona extends MinimalPersonaSnapshot {
user_file_ids: number[];
user_folder_ids: number[];
user_file_ids: string[];
users: MinimalUserSnapshot[];
groups: number[];
num_chunks?: number;

View File

@@ -28,8 +28,7 @@ interface PersonaUpsertRequest {
is_default_persona: boolean;
display_priority: number | null;
label_ids: number[] | null;
user_file_ids: number[] | null;
user_folder_ids: number[] | null;
user_file_ids: string[] | null;
}
export interface PersonaUpsertParameters {
@@ -56,8 +55,7 @@ export interface PersonaUpsertParameters {
uploaded_image: File | null;
is_default_persona: boolean;
label_ids: number[] | null;
user_file_ids: number[];
user_folder_ids: number[];
user_file_ids: string[];
}
export const createPersonaLabel = (name: string) => {
@@ -116,7 +114,6 @@ function buildPersonaUpsertRequest(
remove_image,
search_start_date,
user_file_ids,
user_folder_ids,
} = creationRequest;
return {
@@ -149,7 +146,6 @@ function buildPersonaUpsertRequest(
display_priority: null,
label_ids: creationRequest.label_ids ?? null,
user_file_ids: user_file_ids ?? null,
user_folder_ids: user_folder_ids ?? null,
};
}

View File

@@ -41,7 +41,7 @@ export default function SidebarWrapper<T extends object>({
}, [sidebarVisible]);
const sidebarElementRef = useRef<HTMLDivElement>(null);
const { folders, openedFolders, chatSessions } = useChatContext();
const { chatSessions } = useChatContext();
const explicitlyUntoggle = () => {
setShowDocSidebar(false);
@@ -106,7 +106,6 @@ export default function SidebarWrapper<T extends object>({
toggled={sidebarVisible}
existingChats={chatSessions}
currentChatSession={null}
folders={folders}
/>
</div>
</div>

View File

@@ -2,6 +2,7 @@ import { ErrorCallout } from "@/components/ErrorCallout";
import CardSection from "@/components/admin/CardSection";
import { AssistantEditor } from "@/app/admin/assistants/AssistantEditor";
import { fetchAssistantEditorInfoSS } from "@/lib/assistants/fetchPersonaEditorInfoSS";
import { ProjectsProvider } from "@/app/chat/projects/ProjectsContext";
export default async function Page(props: { params: Promise<{ id: string }> }) {
const params = await props.params;
@@ -15,15 +16,17 @@ export default async function Page(props: { params: Promise<{ id: string }> }) {
);
} else {
return (
<div className="w-full py-8">
<div className="px-32">
<div className="mx-auto container">
<CardSection className="!border-none !bg-transparent !ring-none">
<AssistantEditor {...values} defaultPublic={false} />
</CardSection>
<ProjectsProvider>
<div className="w-full py-8">
<div className="px-32">
<div className="mx-auto container">
<CardSection className="!border-none !bg-transparent !ring-none">
<AssistantEditor {...values} defaultPublic={false} />
</CardSection>
</div>
</div>
</div>
</div>
</ProjectsProvider>
);
}
}

View File

@@ -2,6 +2,7 @@ import CardSection from "@/components/admin/CardSection";
import { AssistantEditor } from "@/app/admin/assistants/AssistantEditor";
import { fetchAssistantEditorInfoSS } from "@/lib/assistants/fetchPersonaEditorInfoSS";
import { ErrorCallout } from "@/components/ErrorCallout";
import { ProjectsProvider } from "@/app/chat/projects/ProjectsContext";
export default async function Page() {
const [values, error] = await fetchAssistantEditorInfoSS();
@@ -31,5 +32,9 @@ export default async function Page() {
);
}
return <div>{body}</div>;
return (
<ProjectsProvider>
<div>{body}</div>
</ProjectsProvider>
);
}

View File

@@ -6,7 +6,6 @@ export interface ChatSessionSummary {
persona_id: number | null;
time_created: string;
shared_status: ChatSessionSharedStatus;
folder_id: number | null;
current_alternate_model: string | null;
current_temperature_override: number | null;
highlights?: string[];

View File

@@ -49,11 +49,9 @@ import { getSourceMetadata } from "@/lib/sources";
import { UserSettingsModal } from "./modal/UserSettingsModal";
import AssistantModal from "../../assistants/mine/AssistantModal";
import { useSidebarShortcut } from "@/lib/browserUtilities";
import { FilePickerModal } from "../my-documents/components/FilePicker";
import { SourceMetadata } from "@/lib/search/interfaces";
import { FederatedConnectorDetail, ValidSources } from "@/lib/types";
import { useDocumentsContext } from "../my-documents/DocumentsContext";
import { ChatSearchModal } from "../chat_search/ChatSearchModal";
import { ErrorBanner } from "../message/Resubmit";
import MinimalMarkdown from "@/components/chat/MinimalMarkdown";
@@ -85,6 +83,11 @@ import { FederatedOAuthModal } from "@/components/chat/FederatedOAuthModal";
import { AssistantIcon } from "@/components/assistants/AssistantIcon";
import { StarterMessageDisplay } from "./starterMessages/StarterMessageDisplay";
import { MessagesDisplay } from "./MessagesDisplay";
import ProjectContextPanel from "./projects/ProjectContextPanel";
import { useProjectsContext } from "@/app/chat/projects/ProjectsContext";
import { getProjectTokenCount } from "@/app/chat/projects/projectsService";
import ProjectChatSessionList from "./projects/ProjectChatSessionList";
export function ChatPage({
toggle,
@@ -123,21 +126,19 @@ export function ChatPage({
tags,
documentSets,
llmProviders,
folders,
shouldShowWelcomeModal,
refreshChatSessions,
} = useChatContext();
const {
selectedFiles,
selectedFolders,
addSelectedFolder,
clearSelectedItems,
folders: userFolders,
files: allUserFiles,
currentMessageFiles,
setCurrentMessageFiles,
} = useDocumentsContext();
setCurrentProjectId,
currentProjectId,
currentProjectDetails,
lastFailedFiles,
clearLastFailedFiles,
} = useProjectsContext();
const { height: screenHeight } = useScreenSize();
@@ -148,8 +149,6 @@ export function ChatPage({
const settings = useContext(SettingsContext);
const enterpriseSettings = settings?.enterpriseSettings;
const [toggleDocSelection, setToggleDocSelection] = useState(false);
const isInitialLoad = useRef(true);
const [userSettingsToggled, setUserSettingsToggled] = useState(false);
@@ -192,8 +191,6 @@ export function ChatPage({
if (message) {
onSubmit({
message,
selectedFiles,
selectedFolders,
currentMessageFiles,
useAgentSearch: deepResearchEnabled,
});
@@ -203,6 +200,17 @@ export function ChatPage({
const { selectedAssistant, setSelectedAssistantFromId, liveAssistant } =
useAssistantController({
selectedChatSession,
onAssistantSelect: () => {
// Only remove project context if user explicitly selected an assistant
// (i.e., assistantId is present). Avoid clearing project when assistantId was removed.
const newSearchParams = new URLSearchParams(
searchParams?.toString() || ""
);
if (newSearchParams.has("assistantId")) {
newSearchParams.delete("projectid");
router.replace(`?${newSearchParams.toString()}`, { scroll: false });
}
},
});
const { deepResearchEnabled, toggleDeepResearch } = useDeepResearchToggle({
@@ -252,41 +260,38 @@ export function ChatPage({
const { popup, setPopup } = usePopup();
// Show popup if any files failed in ProjectsContext reconciliation
useEffect(() => {
const userFolderId = searchParams?.get(SEARCH_PARAM_NAMES.USER_FOLDER_ID);
const allMyDocuments = searchParams?.get(
SEARCH_PARAM_NAMES.ALL_MY_DOCUMENTS
);
if (userFolderId) {
const userFolder = userFolders.find(
(folder) => folder.id === parseInt(userFolderId)
);
if (userFolder) {
addSelectedFolder(userFolder);
}
} else if (allMyDocuments === "true" || allMyDocuments === "1") {
// Clear any previously selected folders
clearSelectedItems();
// Add all user folders to the current context
userFolders.forEach((folder) => {
addSelectedFolder(folder);
if (lastFailedFiles && lastFailedFiles.length > 0) {
const names = lastFailedFiles.map((f) => f.name).join(", ");
setPopup({
type: "error",
message:
lastFailedFiles.length === 1
? `File failed and was removed: ${names}`
: `Files failed and were removed: ${names}`,
});
clearLastFailedFiles();
}
}, [
userFolders,
searchParams?.get(SEARCH_PARAM_NAMES.USER_FOLDER_ID),
searchParams?.get(SEARCH_PARAM_NAMES.ALL_MY_DOCUMENTS),
addSelectedFolder,
clearSelectedItems,
]);
}, [lastFailedFiles, setPopup, clearLastFailedFiles]);
useEffect(() => {
const projectId = searchParams?.get("projectid");
if (projectId) {
console.log("setting project id", projectId);
setCurrentProjectId(parseInt(projectId));
} else {
console.log("clearing project id");
setCurrentProjectId(null);
}
}, [searchParams?.get("projectid"), setCurrentProjectId]);
const [message, setMessage] = useState(
searchParams?.get(SEARCH_PARAM_NAMES.USER_PROMPT) || ""
);
const [projectPanelVisible, setProjectPanelVisible] = useState(true);
const filterManager = useFilters();
const [isChatSearchModalOpen, setIsChatSearchModalOpen] = useState(false);
@@ -506,7 +511,11 @@ export function ChatPage({
setSelectedAssistantFromId,
});
const { onMessageSelection } = useChatSessionController({
const {
onMessageSelection,
currentSessionFileTokenCount,
availableContextTokens,
} = useChatSessionController({
existingChatSessionId,
searchParams,
filterManager,
@@ -522,7 +531,6 @@ export function ChatPage({
submitOnLoadPerformed,
hasPerformedInitialScroll,
clientScrollToBottom,
clearSelectedItems,
refreshChatSessions,
onSubmit,
});
@@ -555,10 +563,9 @@ export function ChatPage({
const reset = useCallback(() => {
setMessage("");
setCurrentMessageFiles([]);
clearSelectedItems();
// TODO: move this into useChatController
// setLoadingError(null);
}, [setMessage, setCurrentMessageFiles, clearSelectedItems]);
}, [setMessage, setCurrentMessageFiles]);
// Used to maintain a "time out" for history sidebar so our existing refs can have time to process change
const [untoggled, setUntoggled] = useState(false);
@@ -663,8 +670,6 @@ export function ChatPage({
// We call onSubmit, passing a `messageOverride`
onSubmit({
message: lastUserMsg.message,
selectedFiles: selectedFiles,
selectedFolders: selectedFolders,
currentMessageFiles: currentMessageFiles,
useAgentSearch: deepResearchEnabled,
messageIdToResend: lastUserMsg.messageId,
@@ -690,11 +695,6 @@ export function ChatPage({
redirect("/auth/login");
}
const clearSelectedDocuments = useCallback(() => {
setSelectedDocuments([]);
clearSelectedItems();
}, [clearSelectedItems]);
const toggleDocumentSelection = useCallback((document: OnyxDocument) => {
setSelectedDocuments((prev) =>
prev.some((d) => d.document_id === document.document_id)
@@ -703,31 +703,18 @@ export function ChatPage({
);
}, []);
// Memoized callbacks for ChatInputBar
const handleToggleDocSelection = useCallback(() => {
setToggleDocSelection(true);
}, []);
const handleShowApiKeyModal = useCallback(() => {
setShowApiKeyModal(true);
}, []);
const handleChatInputSubmit = useCallback(() => {
setProjectPanelVisible(false);
onSubmit({
message: message,
selectedFiles: selectedFiles,
selectedFolders: selectedFolders,
currentMessageFiles: currentMessageFiles,
useAgentSearch: deepResearchEnabled,
});
}, [
message,
onSubmit,
selectedFiles,
selectedFolders,
currentMessageFiles,
deepResearchEnabled,
]);
}, [message, onSubmit, currentMessageFiles, deepResearchEnabled]);
// Memoized callbacks for Header
const handleToggleUserSettings = useCallback(() => {
@@ -754,6 +741,48 @@ export function ChatPage({
!loadingError &&
!submittedMessage;
// Only show the centered hero layout when there is NO project selected
// and there are no messages yet. If a project is selected, prefer a top layout.
const showCenteredHero = currentProjectId === null && showCenteredInput;
useEffect(() => {
if (currentProjectId !== null && showCenteredInput) {
setProjectPanelVisible(true);
}
if (!showCenteredInput) {
setProjectPanelVisible(false);
}
}, [currentProjectId, showCenteredInput]);
// When no chat session exists but a project is selected, fetch the
// total tokens for the project's files so upload UX can compare
// against available context similar to session-based flows.
const [projectContextTokenCount, setProjectContextTokenCount] = useState(0);
// Fetch project-level token count when no chat session exists.
// Note: useEffect cannot be async, so we define an inner async function (run)
// and invoke it. The `cancelled` guard prevents setting state after the
// component unmounts or when the dependencies change and a newer effect run
// supersedes an older in-flight request.
useEffect(() => {
let cancelled = false;
async function run() {
if (!existingChatSessionId && currentProjectId !== null) {
try {
const total = await getProjectTokenCount(currentProjectId);
if (!cancelled) setProjectContextTokenCount(total || 0);
} catch {
if (!cancelled) setProjectContextTokenCount(0);
}
} else {
setProjectContextTokenCount(0);
}
}
run();
return () => {
cancelled = true;
};
}, [existingChatSessionId, currentProjectId, currentProjectDetails?.files]);
// handle error case where no assistants are available
if (noAssistants) {
return (
@@ -803,18 +832,6 @@ export function ChatPage({
/>
)}
{toggleDocSelection && (
<FilePickerModal
setPresentingDocument={setPresentingDocument}
buttonContent="Set as Context"
isOpen={true}
onClose={() => setToggleDocSelection(false)}
onSave={() => {
setToggleDocSelection(false);
}}
/>
)}
<ChatSearchModal
open={isChatSearchModalOpen}
onCloseModal={() => setIsChatSearchModalOpen(false)}
@@ -837,7 +854,7 @@ export function ChatPage({
closeSidebar={handleMobileDocumentSidebarClose}
selectedDocuments={selectedDocuments}
toggleDocumentSelection={toggleDocumentSelection}
clearSelectedDocuments={clearSelectedDocuments}
clearSelectedDocuments={() => setSelectedDocuments([])}
// TODO (chris): fix
selectedDocumentTokens={0}
maxTokens={maxTokens}
@@ -935,7 +952,6 @@ export function ChatPage({
toggled={sidebarVisible}
existingChats={chatSessions}
currentChatSession={selectedChatSession}
folders={folders}
removeToggle={removeToggle}
showShareModal={setSharedChatSession}
/>
@@ -974,10 +990,6 @@ export function ChatPage({
duration-300
ease-in-out
bg-transparent
transition-all
duration-300
ease-in-out
h-full
${
documentSidebarVisible && !settings?.isMobile
? "w-[400px]"
@@ -995,7 +1007,7 @@ export function ChatPage({
closeSidebar={handleDesktopDocumentSidebarClose}
selectedDocuments={selectedDocuments}
toggleDocumentSelection={toggleDocumentSelection}
clearSelectedDocuments={clearSelectedDocuments}
clearSelectedDocuments={() => setSelectedDocuments([])}
// TODO (chris): fix
selectedDocumentTokens={0}
maxTokens={maxTokens}
@@ -1047,7 +1059,7 @@ export function ChatPage({
noClick
>
{({ getRootProps }) => (
<div className="flex h-full w-full">
<div className="flex w-full h-full">
{!settings?.isMobile && (
<div
style={{ transition: "width 0.30s ease-out" }}
@@ -1100,8 +1112,6 @@ export function ChatPage({
liveAssistant={liveAssistant}
llmManager={llmManager}
deepResearchEnabled={deepResearchEnabled}
selectedFiles={selectedFiles}
selectedFolders={selectedFolders}
currentMessageFiles={currentMessageFiles}
setPresentingDocument={setPresentingDocument}
setCurrentFeedback={setCurrentFeedback}
@@ -1129,9 +1139,11 @@ export function ChatPage({
<div
ref={inputRef}
className={`absolute pointer-events-none z-10 w-full ${
showCenteredInput
showCenteredHero
? "inset-0"
: "bottom-0 left-0 right-0 translate-y-0"
: currentProjectId !== null && showCenteredInput
? "top-0 left-0 right-0"
: "bottom-0 left-0 right-0 translate-y-0"
}`}
>
{!showCenteredInput && aboveHorizon && (
@@ -1147,15 +1159,15 @@ export function ChatPage({
<div
className={`pointer-events-auto w-[95%] mx-auto relative text-text-600 ${
showCenteredInput
showCenteredHero
? "h-full grid grid-rows-[0.85fr_auto_1.15fr]"
: "mb-8"
}`}
>
{showCenteredInput && (
{showCenteredHero && (
<div
data-testid="chat-intro"
className="row-start-1 self-end flex text-text-800 justify-center mb-6 transition-opacity duration-300"
className="row-start-1 self-end flex text-text-800 justify-center mb-8 transition-opacity duration-300"
>
<AssistantIcon
colorOverride="text-text-800"
@@ -1168,17 +1180,27 @@ export function ChatPage({
</div>
)}
<div
className={showCenteredInput ? "row-start-2" : ""}
className={showCenteredHero ? "row-start-2" : ""}
>
{currentProjectId !== null && (
<div
className={`transition-all duration-700 ease-out ${
projectPanelVisible
? "opacity-100 translate-y-0"
: "opacity-0 translate-y-6 pointer-events-none"
}`}
>
<ProjectContextPanel />
</div>
)}
<ChatInputBar
deepResearchEnabled={deepResearchEnabled}
toggleDeepResearch={toggleDeepResearch}
toggleDocumentSidebar={toggleDocumentSidebar}
filterManager={filterManager}
llmManager={llmManager}
removeDocs={clearSelectedDocuments}
removeDocs={() => setSelectedDocuments([])}
retrievalEnabled={retrievalEnabled}
toggleDocSelection={handleToggleDocSelection}
showConfigureAPIKey={handleShowApiKeyModal}
selectedDocuments={selectedDocuments}
message={message}
@@ -1186,6 +1208,12 @@ export function ChatPage({
stopGenerating={stopGenerating}
onSubmit={handleChatInputSubmit}
chatState={currentChatState}
currentSessionFileTokenCount={
existingChatSessionId
? currentSessionFileTokenCount
: projectContextTokenCount
}
availableContextTokens={availableContextTokens}
selectedAssistant={
selectedAssistant || liveAssistant
}
@@ -1196,10 +1224,16 @@ export function ChatPage({
/>
</div>
{currentProjectId !== null && (
<div className="transition-all duration-700 ease-out">
<ProjectChatSessionList />
</div>
)}
{liveAssistant.starter_messages &&
liveAssistant.starter_messages.length > 0 &&
messageHistory.length === 0 &&
showCenteredInput && (
showCenteredHero && (
<div className="mt-6 row-start-3">
<StarterMessageDisplay
starterMessages={
@@ -1208,8 +1242,6 @@ export function ChatPage({
onSelectStarterMessage={(message) => {
onSubmit({
message: message,
selectedFiles: selectedFiles,
selectedFolders: selectedFolders,
currentMessageFiles:
currentMessageFiles,
useAgentSearch: deepResearchEnabled,
@@ -1233,12 +1265,12 @@ export function ChatPage({
)}
{enterpriseSettings &&
enterpriseSettings.use_custom_logotype && (
<div className="hidden lg:block fixed right-12 bottom-8 pointer-events-none z-10">
<div className="hidden lg:block absolute right-0 bottom-0">
<img
src="/api/enterprise-settings/logotype"
alt="logotype"
style={{ objectFit: "contain" }}
className="w-fit h-9"
className="w-fit h-8"
/>
</div>
)}

View File

@@ -6,13 +6,10 @@ import { ErrorBanner } from "../message/Resubmit";
import { FeedbackType } from "@/app/chat/interfaces";
import { MinimalPersonaSnapshot } from "@/app/admin/assistants/interfaces";
import { LlmDescriptor } from "@/lib/hooks";
import {
FileResponse,
FolderResponse,
} from "@/app/chat/my-documents/DocumentsContext";
import { EnterpriseSettings } from "@/app/admin/settings/interfaces";
import { FileDescriptor } from "@/app/chat/interfaces";
import { MemoizedAIMessage } from "../message/messageComponents/MemoizedAIMessage";
import { ProjectFile } from "../projects/projectsService";
interface MessagesDisplayProps {
messageHistory: Message[];
@@ -20,17 +17,13 @@ interface MessagesDisplayProps {
liveAssistant: MinimalPersonaSnapshot;
llmManager: { currentLlm: LlmDescriptor | null };
deepResearchEnabled: boolean;
selectedFiles: FileResponse[];
selectedFolders: FolderResponse[];
currentMessageFiles: FileDescriptor[];
currentMessageFiles: ProjectFile[];
setPresentingDocument: (doc: MinimalOnyxDocument | null) => void;
setCurrentFeedback: (feedback: [FeedbackType, number] | null) => void;
onSubmit: (args: {
message: string;
messageIdToResend?: number;
selectedFiles: FileResponse[];
selectedFolders: FolderResponse[];
currentMessageFiles: FileDescriptor[];
currentMessageFiles: ProjectFile[];
useAgentSearch: boolean;
modelOverride?: LlmDescriptor;
regenerationRequest?: {
@@ -64,8 +57,6 @@ export const MessagesDisplay: React.FC<MessagesDisplayProps> = ({
liveAssistant,
llmManager,
deepResearchEnabled,
selectedFiles,
selectedFolders,
currentMessageFiles,
setPresentingDocument,
setCurrentFeedback,
@@ -96,8 +87,6 @@ export const MessagesDisplay: React.FC<MessagesDisplayProps> = ({
return async function (modelOverride: LlmDescriptor) {
return await onSubmit({
message: regenerationRequest.parentMessage.message,
selectedFiles,
selectedFolders,
currentMessageFiles,
useAgentSearch: deepResearchEnabled,
modelOverride,
@@ -107,13 +96,7 @@ export const MessagesDisplay: React.FC<MessagesDisplayProps> = ({
});
};
},
[
onSubmit,
deepResearchEnabled,
selectedFiles,
selectedFolders,
currentMessageFiles,
]
[onSubmit, deepResearchEnabled, currentMessageFiles]
);
const handleFeedback = useCallback(
@@ -128,8 +111,6 @@ export const MessagesDisplay: React.FC<MessagesDisplayProps> = ({
onSubmit({
message: editedContent,
messageIdToResend: msgId || undefined,
selectedFiles: [],
selectedFolders: [],
currentMessageFiles: [],
useAgentSearch: deepResearchEnabled,
});

View File

@@ -11,8 +11,6 @@ import {
memo,
} from "react";
import { XIcon } from "@/components/icons/icons";
import { FileSourceCardInResults } from "@/app/chat/message/SourcesDisplay";
import { useDocumentsContext } from "@/app/chat/my-documents/DocumentsContext";
import { getCitations } from "../../services/packetUtils";
import {
useCurrentMessageTree,
@@ -49,8 +47,6 @@ const DocumentResultsComponent = (
}: DocumentResultsProps,
ref: ForwardedRef<HTMLDivElement>
) => {
const { files: allUserFiles } = useDocumentsContext();
const idOfMessageToDisplay = useSelectedNodeForDocDisplay();
const currentMessageTree = useCurrentMessageTree();
@@ -86,9 +82,6 @@ const DocumentResultsComponent = (
const humanFileDescriptors = humanMessage?.files.filter(
(file) => file.type == ChatFileType.USER_KNOWLEDGE
);
const userFiles = allUserFiles?.filter((file) =>
humanFileDescriptors?.some((descriptor) => descriptor.id === file.file_id)
);
const selectedDocumentIds =
selectedDocuments?.map((document) => document.document_id) || [];
@@ -134,26 +127,7 @@ const DocumentResultsComponent = (
>
<div className="flex flex-col h-full">
<div className="overflow-y-auto h-fit mb-8 pb-8 sm:mx-0 flex-grow gap-y-0 default-scrollbar dark-scrollbar flex flex-col">
{userFiles && userFiles.length > 0 ? (
<div className=" gap-y-2 flex flex-col pt-2 mx-3">
{userFiles?.map((file, index) => (
<FileSourceCardInResults
key={index}
relevantDocument={dedupedDocuments.find(
(doc) =>
doc.document_id === `FILE_CONNECTOR__${file.file_id}`
)}
document={file}
setPresentingDocument={() =>
setPresentingDocument({
document_id: file.document_id,
semantic_identifier: file.file_id || null,
})
}
/>
))}
</div>
) : dedupedDocuments.length > 0 ? (
{dedupedDocuments.length > 0 ? (
<>
{/* Cited Documents Section */}
{citedDocuments.length > 0 && (

View File

@@ -0,0 +1,162 @@
"use client";
import React, { useMemo, useRef, useState } from "react";
import {
Menubar,
MenubarContent,
MenubarItem,
MenubarMenu,
MenubarSeparator,
MenubarTrigger,
} from "@/components/ui/menubar";
import { FileUploadIcon } from "@/components/icons/icons";
import { Files } from "@phosphor-icons/react";
import { FileIcon, Paperclip, Loader2 } from "lucide-react";
import { cn } from "@/lib/utils";
import { ChatInputOption } from "../input/ChatInputOption";
import FilesList from "./FilesList";
import {
Dialog,
DialogContent,
DialogHeader,
DialogTitle,
} from "@/components/ui/dialog";
import { ProjectFile } from "../../projects/projectsService";
type FilePickerProps = {
className?: string;
onPickRecent?: (file: ProjectFile) => void;
recentFiles: ProjectFile[];
handleUploadChange: (e: React.ChangeEvent<HTMLInputElement>) => void;
showTriggerLabel?: boolean;
triggerLabel?: string;
};
// Small helper to render an icon + label row
const Row = ({ children }: { children: React.ReactNode }) => (
<div className="flex items-center gap-2">{children}</div>
);
export default function FilePicker({
className,
onPickRecent,
recentFiles,
handleUploadChange,
showTriggerLabel = false,
triggerLabel = "Add Files",
}: FilePickerProps) {
const fileInputRef = useRef<HTMLInputElement | null>(null);
const [showRecentFiles, setShowRecentFiles] = useState(false);
const triggerUploadPicker = () => fileInputRef.current?.click();
return (
<div className={cn("relative", className)}>
<input
ref={fileInputRef}
type="file"
className="hidden"
multiple
onChange={handleUploadChange}
accept={"*/*"}
/>
<Menubar className="bg-transparent dark:bg-transparent p-0 border-0">
<MenubarMenu>
<MenubarTrigger className="relative cursor-pointer flex items-center group rounded-lg text-input-text hover:bg-background-chat-hover hover:text-neutral-900 dark:hover:text-neutral-50 py-1.5 px-0">
{showTriggerLabel ? (
<div className="flex flex-row gap-2 items-center justify-center p-2 rounded-md bg-background-dark/75 hover:dark:bg-neutral-800/75 hover:bg-accent-background-hovered transition-all duration-150">
<FileUploadIcon className="text-text-darker dark:text-text-lighter" />
<p className="text-sm text-text-darker dark:text-text-lighter">
{triggerLabel}
</p>
</div>
) : (
<Row>
<ChatInputOption
flexPriority="stiff"
Icon={FileUploadIcon}
tooltipContent={"Upload files and attach user files"}
/>
</Row>
)}
</MenubarTrigger>
<MenubarContent
align="start"
sideOffset={6}
className="min-w-[220px] text-input-text"
>
{recentFiles.length > 0 && (
<>
<label className="text-sm font-light text-input-text p-2.5">
Recent Files
</label>
{recentFiles.slice(0, 3).map((f) => (
<MenubarItem
key={f.id}
onClick={() =>
onPickRecent
? onPickRecent(f)
: console.log("Picked recent", f)
}
className="hover:bg-background-chat-hover hover:text-neutral-900 dark:hover:text-neutral-50 text-input-text p-2"
>
<Row>
{String(f.status).toLowerCase() === "processing" ? (
<Loader2 className="h-4 w-4 animate-spin" />
) : (
<FileIcon className="h-4 w-4" />
)}
<span className="truncate max-w-[160px]" title={f.name}>
{f.name}
</span>
</Row>
</MenubarItem>
))}
{recentFiles.length > 3 && (
<MenubarItem
onClick={() => setShowRecentFiles(true)}
className="hover:bg-background-chat-hover hover:text-neutral-900 dark:hover:text-neutral-50 text-input-text p-2 font-normal"
>
<Row>
<span className="truncate font-light">
... All Recent Files
</span>
</Row>
</MenubarItem>
)}
</>
)}
<MenubarSeparator />
<MenubarItem
onClick={triggerUploadPicker}
className="hover:bg-background-chat-hover hover:text-neutral-900 dark:hover:text-neutral-50 text-input-text p-2"
>
<Row>
<Paperclip size={16} />
<div className="flex flex-col">
<span className="font-semibold">Upload Files</span>
<span className="text-xs font-description text-text-400 dark:text-neutral-400">
Upload a file from your device
</span>
</div>
</Row>
</MenubarItem>
</MenubarContent>
</MenubarMenu>
</Menubar>
<Dialog open={showRecentFiles} onOpenChange={setShowRecentFiles}>
<DialogContent className="w-full max-w-lg">
<DialogHeader>
<Files size={32} />
<DialogTitle>Recent Files</DialogTitle>
</DialogHeader>
<FilesList
recentFiles={recentFiles}
onPickRecent={onPickRecent}
handleUploadChange={handleUploadChange}
/>
</DialogContent>
</Dialog>
</div>
);
}

View File

@@ -0,0 +1,168 @@
"use client";
import React, { useMemo, useRef, useState } from "react";
import { Input } from "@/components/ui/input";
import { ScrollArea } from "@/components/ui/scroll-area";
import { Separator } from "@/components/ui/separator";
import {
FileIcon,
Globe,
Image as ImageIcon,
FileText,
Search,
Loader2,
Trash2,
} from "lucide-react";
import { cn } from "@/lib/utils";
import { ProjectFile } from "../../projects/ProjectsContext";
import { formatRelativeTime } from "../projects/project_utils";
import { FileUploadIcon } from "@/components/icons/icons";
interface FilesListProps {
className?: string;
recentFiles: ProjectFile[];
onPickRecent?: (file: ProjectFile) => void;
handleUploadChange?: (e: React.ChangeEvent<HTMLInputElement>) => void;
showRemove?: boolean;
onRemove?: (file: ProjectFile) => void;
}
const kindIcon = (kind: string, status?: string) => {
if (String(status).toLowerCase() === "processing") {
return <Loader2 className="h-4 w-4 animate-spin" />;
}
const normalized = kind.toLowerCase();
if (normalized.includes("url") || normalized.includes("site"))
return <Globe className="h-4 w-4" />;
if (
normalized.includes("image") ||
normalized.includes("png") ||
normalized.includes("jpg")
)
return <ImageIcon className="h-4 w-4" />;
if (normalized.includes("txt") || normalized.includes("text"))
return <FileText className="h-4 w-4" />;
return <FileIcon className="h-4 w-4" />;
};
const getReadableFileType = (fileType: string | undefined | null): string => {
if (!fileType) return "";
const str = String(fileType);
const idx = str.lastIndexOf("/");
const val = idx >= 0 ? str.slice(idx + 1) : str;
return val.toUpperCase();
};
export default function FilesList({
className,
recentFiles,
onPickRecent,
handleUploadChange,
showRemove,
onRemove,
}: FilesListProps) {
const [search, setSearch] = useState("");
const fileInputRef = useRef<HTMLInputElement | null>(null);
const triggerUploadPicker = () => fileInputRef.current?.click();
const filtered = useMemo(() => {
const s = search.trim().toLowerCase();
if (!s) return recentFiles;
return recentFiles.filter((f) => f.name.toLowerCase().includes(s));
}, [recentFiles, search]);
return (
<div className={cn("flex flex-col gap-3", className)}>
<div className="flex items-center gap-2">
<div className="relative flex-1">
<Search className="absolute left-2 top-1/2 -translate-y-1/2 h-4 w-4 text-muted-foreground" />
<Input
placeholder="Search files..."
value={search}
onChange={(e) => setSearch(e.target.value)}
className="h-9 pl-8"
removeFocusRing
/>
</div>
{handleUploadChange && (
<>
<input
ref={fileInputRef}
type="file"
className="hidden"
multiple
onChange={handleUploadChange}
accept={"*/*"}
/>
<button
onClick={triggerUploadPicker}
className="flex flex-row gap-2 items-center justify-center p-2 rounded-md bg-background-dark/75 hover:dark:bg-neutral-800/75 hover:bg-accent-background-hovered transition-all duration-150"
>
<FileUploadIcon className="text-text-darker dark:text-text-lighter" />
<p className="text-sm text-text-darker dark:text-text-lighter whitespace-nowrap">
Add Files
</p>
</button>
</>
)}
</div>
<Separator />
<ScrollArea className="h-[320px] md:h-auto md:max-h-[70vh] pr-2">
<div className="flex flex-col">
{filtered.map((f) => (
<button
key={f.id}
className={cn(
"flex items-center justify-between gap-3 text-left rounded-md px-2 py-2 group",
"hover:bg-background-chat-hover hover:text-neutral-900 dark:hover:text-neutral-50"
)}
onClick={() => onPickRecent && onPickRecent(f)}
>
<div className="flex items-center gap-3 min-w-0">
{kindIcon(f.file_type, (f as any).status)}
<div className="min-w-0">
<div className="truncate text-sm font-normal">{f.name}</div>
<div className="text-xs text-text-400 dark:text-neutral-400">
{(() => {
const s = String(f.status || "").toLowerCase();
const typeLabel = getReadableFileType(f.file_type);
if (s === "processing") return "Processing...";
if (s === "completed") return typeLabel;
return f.status ? f.status : typeLabel;
})()}
</div>
</div>
</div>
<div className="flex items-center gap-2 ml-3">
{f.last_accessed_at && (
<div className="text-xs text-text-400 dark:text-neutral-400 whitespace-nowrap">
{formatRelativeTime(f.last_accessed_at)}
</div>
)}
{showRemove &&
String(f.status).toLowerCase() !== "processing" && (
<button
title="Remove from project"
aria-label="Remove file from project"
className="p-0 bg-transparent border-0 outline-none cursor-pointer opacity-0 group-hover:opacity-100 focus:opacity-100 transition-opacity duration-150"
onClick={(e) => {
e.stopPropagation();
onRemove && onRemove(f);
}}
>
<Trash2 className="h-4 w-4 text-neutral-600 hover:text-red-600 dark:text-neutral-400 dark:hover:text-red-400" />
</button>
)}
</div>
</button>
))}
{filtered.length === 0 && (
<div className="text-sm text-muted-foreground px-2 py-4">
No files found.
</div>
)}
</div>
</ScrollArea>
</div>
);
}

View File

@@ -1,267 +1,45 @@
import React, {
useState,
useRef,
useEffect,
ReactNode,
useCallback,
forwardRef,
} from "react";
import React, { useState, ReactNode, forwardRef } from "react";
import { Folder } from "./interfaces";
import { ChatSession } from "@/app/chat/interfaces";
import { FiTrash2, FiCheck, FiX } from "react-icons/fi";
import { Caret } from "@/components/icons/icons";
import { deleteFolder } from "./FolderManagement";
import { PencilIcon } from "lucide-react";
import { Popover } from "@/components/popover/Popover";
import { useChatContext } from "@/components/context/ChatContext";
import { useSortable } from "@dnd-kit/sortable";
interface FolderDropdownProps {
folder: Folder;
currentChatId?: string;
showShareModal?: (chatSession: ChatSession) => void;
showDeleteModal?: (chatSession: ChatSession) => void;
closeSidebar?: () => void;
onEdit?: (folderId: number, newName: string) => void;
onDelete?: (folderId: number) => void;
onDrop?: (folderId: number, chatSessionId: string) => void;
children?: ReactNode;
index: number;
}
export const FolderDropdown = forwardRef<HTMLDivElement, FolderDropdownProps>(
(
{
folder,
currentChatId,
showShareModal,
closeSidebar,
onEdit,
onDrop,
children,
index,
},
ref
) => {
({ folder, children, index }: FolderDropdownProps, ref) => {
const [isOpen, setIsOpen] = useState(true);
const [isEditing, setIsEditing] = useState(false);
const [newFolderName, setNewFolderName] = useState(folder.folder_name);
const [isHovered, setIsHovered] = useState(false);
const inputRef = useRef<HTMLInputElement>(null);
const [isDeletePopoverOpen, setIsDeletePopoverOpen] = useState(false);
const editingRef = useRef<HTMLDivElement>(null);
const { refreshFolders } = useChatContext();
const {
attributes,
listeners,
setNodeRef,
transform,
transition,
isDragging,
} = useSortable({ id: folder.folder_id?.toString() ?? "" });
const style: React.CSSProperties = {
transform: transform
? `translate3d(${transform.x}px, ${transform.y}px, 0)`
: undefined,
transition,
zIndex: isDragging ? 9999 : undefined,
position: isDragging ? "absolute" : "relative",
};
useEffect(() => {
if (isEditing && inputRef.current) {
inputRef.current.focus();
}
}, [isEditing]);
const handleEdit = useCallback(() => {
if (newFolderName && folder.folder_id !== undefined && onEdit) {
onEdit(folder.folder_id, newFolderName);
setIsEditing(false);
}
}, [newFolderName, folder.folder_id, onEdit]);
useEffect(() => {
const handleClickOutside = (event: MouseEvent) => {
if (
editingRef.current &&
!editingRef.current.contains(event.target as Node) &&
isEditing
) {
if (newFolderName !== folder.folder_name) {
handleEdit();
} else {
setIsEditing(false);
}
}
};
if (isEditing) {
document.addEventListener("mousedown", handleClickOutside);
}
return () => {
document.removeEventListener("mousedown", handleClickOutside);
};
}, [isEditing, newFolderName, folder.folder_name, handleEdit]);
const handleDeleteClick = useCallback(() => {
setIsDeletePopoverOpen(true);
}, []);
const handleCancelDelete = useCallback((e: React.MouseEvent) => {
e.preventDefault();
e.stopPropagation();
setIsDeletePopoverOpen(false);
}, []);
const handleConfirmDelete = useCallback(
async (e: React.MouseEvent) => {
e.preventDefault();
e.stopPropagation();
if (folder.folder_id !== undefined) {
await deleteFolder(folder.folder_id);
}
await refreshFolders();
setIsDeletePopoverOpen(false);
},
[folder.folder_id, refreshFolders]
);
const handleDragOver = (e: React.DragEvent<HTMLDivElement>) => {
e.preventDefault();
};
const handleDrop = useCallback(
(e: React.DragEvent<HTMLDivElement>) => {
e.preventDefault();
const chatSessionId = e.dataTransfer.getData("text/plain");
if (folder.folder_id && onDrop) {
onDrop(folder.folder_id, chatSessionId);
}
},
[folder.folder_id, onDrop]
);
return (
<div
ref={setNodeRef}
style={style}
{...attributes}
className="overflow-visible pt-2 w-full"
onDragOver={handleDragOver}
onDrop={handleDrop}
>
<div className="overflow-visible pt-2 w-full">
<div
className="sticky top-0 bg-background-sidebar dark:bg-transparent z-10"
style={{ zIndex: 1000 - index }}
>
<div
ref={ref}
className="flex overflow-visible items-center w-full text-text-darker rounded-md p-1 bg-background-sidebar dark:bg-[#000] relative sticky top-0"
className="flex overflow-visible items-center w-full text-text-darker rounded-md p-1 bg-background-sidebar dark:bg-[#000] sticky top-0"
style={{ zIndex: 10 - index }}
onMouseEnter={() => setIsHovered(true)}
onMouseLeave={() => setIsHovered(false)}
>
<button
className="flex overflow-hidden bg-background-sidebar dark:bg-[#000] items-center flex-grow"
onClick={() => !isEditing && setIsOpen(!isOpen)}
{...(isEditing ? {} : listeners)}
onClick={() => setIsOpen(!isOpen)}
>
{isOpen ? (
<Caret size={16} className="mr-1" />
) : (
<Caret size={16} className="-rotate-90 mr-1" />
)}
{isEditing ? (
<div ref={editingRef} className="flex-grow z-[9999] relative">
<input
ref={inputRef}
type="text"
value={newFolderName}
onChange={(e) => setNewFolderName(e.target.value)}
className="text-sm font-medium bg-transparent outline-none w-full pb-1 border-b border-background-500 transition-colors duration-200"
onKeyDown={(e) => {
e.stopPropagation();
if (e.key === "Enter") {
handleEdit();
}
}}
onClick={(e) => e.stopPropagation()}
/>
</div>
) : (
<div className="flex items-center">
<span className="text-sm font-[500]">
{folder.folder_name}
</span>
</div>
)}
</button>
{isHovered && !isEditing && folder.folder_id && (
<button
onClick={(e) => {
e.stopPropagation();
setIsEditing(true);
}}
className="ml-auto px-1"
>
<PencilIcon size={14} />
</button>
)}
{(isHovered || isDeletePopoverOpen) &&
!isEditing &&
folder.folder_id && (
<Popover
open={isDeletePopoverOpen}
onOpenChange={setIsDeletePopoverOpen}
content={
<button
onClick={(e) => {
e.stopPropagation();
handleDeleteClick();
}}
className="px-1"
>
<FiTrash2 size={14} />
</button>
}
popover={
<div className="p-3 w-64 border border-border rounded-lg bg-background z-50">
<p className="text-sm mb-3">
Are you sure you want to delete this folder?
</p>
<div className="flex justify-center gap-2">
<button
className="px-3 py-1 text-sm bg-background-200 rounded"
onClick={handleCancelDelete}
>
Cancel
</button>
<button
className="px-3 py-1 text-sm bg-red-500 text-white rounded"
onClick={handleConfirmDelete}
>
Delete
</button>
</div>
</div>
}
requiresContentPadding
sideOffset={6}
/>
)}
{isEditing && (
<div className="flex -my-1 z-[9999]">
<button onClick={handleEdit} className="p-1">
<FiCheck size={14} />
</button>
<button onClick={() => setIsEditing(false)} className="p-1">
<FiX size={14} />
</button>
<div className="flex items-center">
<span className="text-sm font-[500]">{folder.folder_name}</span>
</div>
)}
</button>
</div>
{isOpen && (
<div className="overflow-visible mr-3 ml-1 mt-1">{children}</div>

View File

@@ -1,361 +0,0 @@
"use client";
import React, { useState, useEffect, useRef } from "react";
import { Folder } from "./interfaces";
import { ChatSessionDisplay } from "@/components/sidebar/ChatSessionDisplay"; // Ensure this is correctly imported
import {
FiChevronDown,
FiChevronRight,
FiFolder,
FiEdit2,
FiCheck,
FiX,
FiTrash, // Import the trash icon
} from "react-icons/fi";
import { BasicSelectable } from "@/components/BasicClickable";
import {
addChatToFolder,
deleteFolder,
updateFolderName,
} from "./FolderManagement";
import { usePopup } from "@/components/admin/connectors/Popup";
import { useRouter } from "next/navigation";
import { CHAT_SESSION_ID_KEY } from "@/lib/drag/constants";
import Cookies from "js-cookie";
import { Popover } from "@/components/popover/Popover";
import { ChatSession } from "@/app/chat/interfaces";
import { useChatContext } from "@/components/context/ChatContext";
const FolderItem = ({
folder,
currentChatId,
isInitiallyExpanded,
initiallySelected,
showShareModal,
showDeleteModal,
}: {
folder: Folder;
currentChatId?: string;
isInitiallyExpanded: boolean;
initiallySelected: boolean;
showShareModal: ((chatSession: ChatSession) => void) | undefined;
showDeleteModal: ((chatSession: ChatSession) => void) | undefined;
}) => {
const { refreshChatSessions } = useChatContext();
const [isExpanded, setIsExpanded] = useState<boolean>(isInitiallyExpanded);
const [isEditing, setIsEditing] = useState<boolean>(initiallySelected);
const [editedFolderName, setEditedFolderName] = useState<string>(
folder.folder_name
);
const [isHovering, setIsHovering] = useState<boolean>(false);
const [isDragOver, setIsDragOver] = useState<boolean>(false);
const { setPopup } = usePopup();
const router = useRouter();
const toggleFolderExpansion = () => {
if (!isEditing) {
const newIsExpanded = !isExpanded;
setIsExpanded(newIsExpanded);
// Update the cookie with the new state
const openedFoldersCookieVal = Cookies.get("openedFolders");
const openedFolders = openedFoldersCookieVal
? JSON.parse(openedFoldersCookieVal)
: {};
if (newIsExpanded) {
openedFolders[folder.folder_id!] = true;
} else {
setShowDeleteConfirm(false);
delete openedFolders[folder.folder_id!];
}
Cookies.set("openedFolders", JSON.stringify(openedFolders));
}
};
const handleEditFolderName = (event: React.MouseEvent<HTMLDivElement>) => {
event.stopPropagation(); // Prevent the event from bubbling up to the toggle expansion
setIsEditing(true);
};
const handleFolderNameChange = (
event: React.ChangeEvent<HTMLInputElement>
) => {
setEditedFolderName(event.target.value);
};
const handleKeyDown = (event: React.KeyboardEvent<HTMLInputElement>) => {
if (event.key === "Enter") {
saveFolderName();
}
};
const saveFolderName = async (continueEditing?: boolean) => {
try {
await updateFolderName(folder.folder_id!, editedFolderName);
if (!continueEditing) {
setIsEditing(false);
}
router.refresh(); // Refresh values to update the sidebar
} catch (error) {
setPopup({ message: "Failed to save folder name", type: "error" });
}
};
const [showDeleteConfirm, setShowDeleteConfirm] = useState<boolean>(false);
const deleteConfirmRef = useRef<HTMLDivElement>(null);
const handleDeleteClick = (event: React.MouseEvent<HTMLDivElement>) => {
event.stopPropagation();
setShowDeleteConfirm(true);
};
const confirmDelete = async (event: React.MouseEvent<HTMLButtonElement>) => {
event.stopPropagation();
try {
await deleteFolder(folder.folder_id!);
router.refresh();
} catch (error) {
setPopup({ message: "Failed to delete folder", type: "error" });
} finally {
setShowDeleteConfirm(false);
}
};
const cancelDelete = (event: React.MouseEvent<HTMLButtonElement>) => {
event.stopPropagation();
setShowDeleteConfirm(false);
};
useEffect(() => {
const handleClickOutside = (event: MouseEvent) => {
if (
deleteConfirmRef.current &&
!deleteConfirmRef.current.contains(event.target as Node)
) {
setShowDeleteConfirm(false);
}
};
document.addEventListener("mousedown", handleClickOutside);
return () => {
document.removeEventListener("mousedown", handleClickOutside);
};
}, []);
const inputRef = useRef<HTMLInputElement>(null);
useEffect(() => {
if (initiallySelected && inputRef.current) {
inputRef.current.focus();
}
}, [initiallySelected]);
const handleDrop = async (event: React.DragEvent<HTMLDivElement>) => {
event.preventDefault();
setIsDragOver(false);
const chatSessionId = event.dataTransfer.getData(CHAT_SESSION_ID_KEY);
try {
await addChatToFolder(folder.folder_id!, chatSessionId);
await refreshChatSessions();
router.refresh();
} catch (error) {
setPopup({
message: "Failed to add chat session to folder",
type: "error",
});
}
};
const folders = folder.chat_sessions.sort((a, b) => {
return a.time_updated.localeCompare(b.time_updated);
});
// Determine whether to show the trash can icon
const showTrashIcon = (isHovering && !isEditing) || showDeleteConfirm;
return (
<div
key={folder.folder_id}
onDragOver={(event) => {
event.preventDefault();
setIsDragOver(true);
}}
onDragLeave={() => setIsDragOver(false)}
onDrop={handleDrop}
className={`transition duration-300 ease-in-out rounded-md ${
isDragOver ? "bg-accent-background-hovered" : ""
}`}
>
<BasicSelectable fullWidth selected={false}>
<div
onMouseEnter={() => setIsHovering(true)}
onMouseLeave={() => setIsHovering(false)}
>
<div onClick={toggleFolderExpansion} className="cursor-pointer">
<div className="text-sm text-text-600 flex items-center justify-start w-full">
<div className="mr-2">
{isExpanded ? (
<FiChevronDown size={16} />
) : (
<FiChevronRight size={16} />
)}
</div>
<div>
<FiFolder size={16} className="mr-2" />
</div>
{isEditing ? (
<input
ref={inputRef}
type="text"
value={editedFolderName}
onChange={handleFolderNameChange}
onKeyDown={handleKeyDown}
onBlur={() => saveFolderName(true)}
className="text-sm px-1 flex-1 min-w-0 -my-px mr-2"
/>
) : (
<div className="flex-1 break-all min-w-0">
{editedFolderName || folder.folder_name}
</div>
)}
<div className="flex ml-auto my-auto">
<div
onClick={handleEditFolderName}
className={`hover:bg-black/10 p-1 -m-1 rounded ${
isHovering && !isEditing
? ""
: "opacity-0 pointer-events-none"
}`}
>
<FiEdit2 size={16} />
</div>
<div className="relative">
<Popover
open={showDeleteConfirm}
onOpenChange={setShowDeleteConfirm}
content={
<div
onClick={handleDeleteClick}
className={`hover:bg-black/10 p-1 -m-1 rounded ml-2 ${
showTrashIcon ? "" : "opacity-0 pointer-events-none"
}`}
>
<FiTrash size={16} />
</div>
}
popover={
<div className="p-2 w-[225px] bg-background-100 rounded shadow-lg">
<p className="text-sm mb-2">
Are you sure you want to delete folder{" "}
<i>{folder.folder_name}</i>?
</p>
<div className="flex justify-end">
<button
onClick={confirmDelete}
className="bg-red-500 hover:bg-red-600 text-white px-2 py-1 rounded text-xs mr-2"
>
Yes
</button>
<button
onClick={cancelDelete}
className="bg-background-300 hover:bg-background-200 px-2 py-1 rounded text-xs"
>
No
</button>
</div>
</div>
}
side="top"
align="center"
/>
</div>
</div>
{isEditing && (
<div className="flex ml-auto my-auto">
<div
onClick={() => saveFolderName()}
className="hover:bg-black/10 p-1 -m-1 rounded"
>
<FiCheck size={16} />
</div>
<div
onClick={() => setIsEditing(false)}
className="hover:bg-black/10 p-1 -m-1 rounded ml-2"
>
<FiX size={16} />
</div>
</div>
)}
</div>
</div>
</div>
</BasicSelectable>
{/* Expanded Folder Content */}
{isExpanded && folders && (
<div className={"mr-4 pl-2 w-full border-l border-border"}>
{folders.map((chatSession) => (
<ChatSessionDisplay
key={chatSession.id}
chatSession={chatSession}
isSelected={chatSession.id === currentChatId}
showShareModal={showShareModal}
showDeleteModal={showDeleteModal}
parentFolderName={folder.folder_name}
/>
))}
</div>
)}
</div>
);
};
export const FolderList = ({
folders,
currentChatId,
openedFolders,
newFolderId,
showShareModal,
showDeleteModal,
}: {
folders: Folder[];
currentChatId?: string;
openedFolders?: { [key: number]: boolean };
newFolderId: number | null;
showShareModal: ((chatSession: ChatSession) => void) | undefined;
showDeleteModal: ((chatSession: ChatSession) => void) | undefined;
}) => {
if (folders.length === 0) {
return null;
}
return (
<div className="mt-1 mb-1 overflow-visible">
{folders.map((folder) => (
<FolderItem
key={folder.folder_id}
folder={folder}
currentChatId={currentChatId}
initiallySelected={newFolderId == folder.folder_id}
isInitiallyExpanded={
openedFolders ? openedFolders[folder.folder_id!] || false : false
}
showShareModal={showShareModal}
showDeleteModal={showDeleteModal}
/>
))}
{folders.length == 1 &&
folders[0] &&
folders[0].chat_sessions.length == 0 && (
<p className="text-sm font-normal text-subtle mt-2">
{" "}
Drag a chat into a folder to save for later{" "}
</p>
)}
</div>
);
};

View File

@@ -1,97 +0,0 @@
// Function to create a new folder
export async function createFolder(folderName: string): Promise<number> {
const response = await fetch("/api/folder", {
method: "POST",
headers: {
"Content-Type": "application/json",
},
body: JSON.stringify({ folder_name: folderName }),
});
if (!response.ok) {
const errorData = await response.json();
throw new Error(errorData.detail || "Failed to create folder");
}
const data = await response.json();
return data;
}
// Function to add a chat session to a folder
export async function addChatToFolder(
folderId: number,
chatSessionId: string
): Promise<void> {
const response = await fetch(`/api/folder/${folderId}/add-chat-session`, {
method: "POST",
headers: {
"Content-Type": "application/json",
},
body: JSON.stringify({ chat_session_id: chatSessionId }),
});
if (!response.ok) {
throw new Error("Failed to add chat to folder");
}
}
// Function to remove a chat session from a folder
export async function removeChatFromFolder(
folderId: number,
chatSessionId: string
): Promise<void> {
const response = await fetch(`/api/folder/${folderId}/remove-chat-session`, {
method: "POST",
headers: {
"Content-Type": "application/json",
},
body: JSON.stringify({ chat_session_id: chatSessionId }),
});
if (!response.ok) {
throw new Error("Failed to remove chat from folder");
}
}
// Function to delete a folder
export async function deleteFolder(folderId: number): Promise<void> {
const response = await fetch(`/api/folder/${folderId}`, {
method: "DELETE",
headers: {
"Content-Type": "application/json",
},
body: JSON.stringify({}),
});
if (!response.ok) {
throw new Error("Failed to delete folder");
}
}
// Function to update a folder's name
export async function updateFolderName(
folderId: number,
newName: string
): Promise<void> {
const response = await fetch(`/api/folder/${folderId}`, {
method: "PATCH",
headers: {
"Content-Type": "application/json",
},
body: JSON.stringify({ folder_name: newName }),
});
if (!response.ok) {
throw new Error("Failed to update folder name");
}
}
// Function to update folder display priorities
export async function updateFolderDisplayPriorities(
displayPriorityMap: Record<number, number>
): Promise<void> {
const response = await fetch(`/api/folder/reorder`, {
method: "PUT",
headers: {
"Content-Type": "application/json",
},
body: JSON.stringify({ display_priority_map: displayPriorityMap }),
});
if (!response.ok) {
throw new Error("Failed to update folder display priorities");
}
}

View File

@@ -3,6 +3,7 @@ import React, {
useContext,
useEffect,
useMemo,
useRef,
useState,
} from "react";
import { FiPlus } from "react-icons/fi";
@@ -14,30 +15,34 @@ import { InputPrompt } from "@/app/chat/interfaces";
import { FilterManager, LlmManager } from "@/lib/hooks";
import { useChatContext } from "@/components/context/ChatContext";
import { ChatFileType } from "../../interfaces";
import {
DocumentIcon2,
FileIcon,
FileUploadIcon,
SendIcon,
StopGeneratingIcon,
} from "@/components/icons/icons";
import { OnyxDocument, SourceMetadata } from "@/lib/search/interfaces";
import { OnyxDocument } from "@/lib/search/interfaces";
import { ChatState } from "@/app/chat/interfaces";
import { useAssistantsContext } from "@/components/context/AssistantsContext";
import { CalendarIcon, TagIcon, XIcon, FolderIcon } from "lucide-react";
import { CalendarIcon, TagIcon, XIcon } from "lucide-react";
import { SourceIcon } from "@/components/SourceIcon";
import { getFormattedDateRangeString } from "@/lib/dateUtils";
import { truncateString } from "@/lib/utils";
import { buildImgUrl } from "@/app/chat/components/files/images/utils";
import { useUser } from "@/components/user/UserProvider";
import { SettingsContext } from "@/components/settings/SettingsProvider";
import { useDocumentsContext } from "@/app/chat/my-documents/DocumentsContext";
import { UnconfiguredLlmProviderText } from "@/components/chat/UnconfiguredLlmProviderText";
import { DeepResearchToggle } from "./DeepResearchToggle";
import { ActionToggle } from "./ActionManagement";
import { SelectedTool } from "./SelectedTool";
import { getProviderIcon } from "@/app/admin/configuration/llm/utils";
import FilePicker from "../files/FilePicker";
import { useProjectsContext } from "../../projects/ProjectsContext";
import { FileCard } from "../projects/ProjectContextPanel";
import {
ProjectFile,
UserFileStatus,
} from "@/app/chat/projects/projectsService";
import { useChatController } from "../../hooks/useChatController";
const MAX_INPUT_HEIGHT = 200;
@@ -90,7 +95,6 @@ export const SourceChip = ({
);
interface ChatInputBarProps {
toggleDocSelection: () => void;
removeDocs: () => void;
showConfigureAPIKey: () => void;
selectedDocuments: OnyxDocument[];
@@ -100,7 +104,8 @@ interface ChatInputBarProps {
onSubmit: () => void;
llmManager: LlmManager;
chatState: ChatState;
currentSessionFileTokenCount: number;
availableContextTokens: number;
// assistants
selectedAssistant: MinimalPersonaSnapshot;
@@ -115,7 +120,6 @@ interface ChatInputBarProps {
}
export const ChatInputBar = React.memo(function ChatInputBar({
toggleDocSelection,
retrievalEnabled,
removeDocs,
toggleDocumentSidebar,
@@ -127,7 +131,8 @@ export const ChatInputBar = React.memo(function ChatInputBar({
stopGenerating,
onSubmit,
chatState,
currentSessionFileTokenCount,
availableContextTokens,
// assistants
selectedAssistant,
@@ -139,22 +144,25 @@ export const ChatInputBar = React.memo(function ChatInputBar({
placeholder,
}: ChatInputBarProps) {
const { user } = useUser();
const {
selectedFiles,
selectedFolders,
removeSelectedFile,
removeSelectedFolder,
currentMessageFiles,
setCurrentMessageFiles,
} = useDocumentsContext();
const { forcedToolIds, setForcedToolIds } = useAssistantsContext();
const { currentMessageFiles, setCurrentMessageFiles, recentFiles } =
useProjectsContext();
// Create a Set of IDs from currentMessageFiles for efficient lookup
// Assuming FileDescriptor.id corresponds conceptually to FileResponse.file_id or FileResponse.id
const currentMessageFileIds = useMemo(
() => new Set(currentMessageFiles.map((f) => String(f.id))), // Ensure IDs are strings for comparison
[currentMessageFiles]
const currentIndexingFiles = useMemo(() => {
return currentMessageFiles.filter(
(file) => file.status === UserFileStatus.PROCESSING
);
}, [currentMessageFiles]);
const handleUploadChange = useCallback(
async (e: React.ChangeEvent<HTMLInputElement>) => {
const files = e.target.files;
if (!files || files.length === 0) return;
handleFileUpload(Array.from(files));
e.target.value = "";
},
[handleFileUpload]
);
const settings = useContext(SettingsContext);
@@ -187,8 +195,17 @@ export const ChatInputBar = React.memo(function ChatInputBar({
}
};
const handleRemoveMessageFile = useCallback(
(fileId: string) => {
setCurrentMessageFiles((prev) => prev.filter((f) => f.id !== fileId));
},
[setCurrentMessageFiles]
);
const { llmProviders, inputPrompts } = useChatContext();
const [showPrompts, setShowPrompts] = useState(false);
const [showStickyBanner, setShowStickyBanner] = useState(false);
const [bannerMessage, setBannerMessage] = useState<string | null>(null);
const hidePrompts = () => {
setTimeout(() => {
@@ -197,6 +214,11 @@ export const ChatInputBar = React.memo(function ChatInputBar({
setTabbingIconIndex(0);
};
useEffect(() => {
const timer = setTimeout(() => setShowStickyBanner(true), 50);
return () => clearTimeout(timer);
}, []);
const updateInputPrompt = (prompt: InputPrompt) => {
hidePrompts();
setMessage(`${prompt.content}`);
@@ -251,6 +273,33 @@ export const ChatInputBar = React.memo(function ChatInputBar({
[inputPrompts, startFilterSlash]
);
useEffect(() => {
if (currentMessageFiles.length > 0 && currentIndexingFiles.length > 0) {
const currentFilesTokenTotal = currentMessageFiles.reduce(
(acc, file) => acc + (file.token_count || 0),
0
);
const totalTokens =
(currentSessionFileTokenCount || 0) + currentFilesTokenTotal;
if (totalTokens < availableContextTokens) {
setBannerMessage(
"Since your file is within the context limit, you dont need to wait for processing, messages sent early still give the best answer."
);
} else {
setBannerMessage(
"Since your total file context exceeds the maximum length, you need to wait until the file processing completes to get the better answer."
);
}
} else {
setBannerMessage(null);
}
}, [
currentMessageFiles,
currentSessionFileTokenCount,
currentIndexingFiles,
availableContextTokens,
]);
const handleKeyDown = (e: React.KeyboardEvent<HTMLTextAreaElement>) => {
if (showPrompts && (e.key === "Tab" || e.key == "Enter")) {
e.preventDefault();
@@ -286,48 +335,11 @@ export const ChatInputBar = React.memo(function ChatInputBar({
}
};
// Combine selectedFiles and currentMessageFiles for unified rendering
const allFiles = useMemo(() => {
const combined: Array<{
id: string;
name: string;
chatFileType: ChatFileType;
isUploading?: boolean;
source: "selected" | "current";
originalFile: any;
}> = [];
// Add selected files (excluding those already in currentMessageFiles)
selectedFiles.forEach((file) => {
if (!currentMessageFileIds.has(String(file.file_id || file.id))) {
combined.push({
id: String(file.file_id || file.id),
name: file.name,
chatFileType: file.chat_file_type,
source: "selected",
originalFile: file,
});
}
});
// Add current message files
currentMessageFiles.forEach((file, index) => {
combined.push({
id: file.id,
name: file.name || `File${file.id}`,
chatFileType: file.type,
isUploading: file.isUploading,
source: "current",
originalFile: file,
});
});
return combined;
}, [selectedFiles, currentMessageFiles, currentMessageFileIds]);
return (
<div id="onyx-chat-input">
<div className="flex justify-center mx-auto">
<div
className={`flex justify-center mx-auto ${bannerMessage ? "mt-4" : ""}`}
>
<div
className="
max-w-full
@@ -337,6 +349,8 @@ export const ChatInputBar = React.memo(function ChatInputBar({
mx-auto
"
>
{/* Sticky background banner will be positioned relative to the card wrapper below */}
{showPrompts && user?.preferences?.shortcut_enabled && (
<div className="text-sm absolute inset-x-0 top-0 w-full transform -translate-y-full">
<div className="rounded-lg overflow-y-auto max-h-[200px] py-1.5 bg-input-background dark:border-none border border-border shadow-lg mx-2 px-1.5 mt-2 rounded z-10">
@@ -380,32 +394,79 @@ export const ChatInputBar = React.memo(function ChatInputBar({
showConfigureAPIKey={showConfigureAPIKey}
/>
<div className="w-full h-[10px]"></div>
<div
className="
opacity-100
w-full
h-fit
flex
flex-col
border
shadow-lg
bg-input-background
border-input-border
dark:border-none
rounded-xl
overflow-hidden
text-text-chatbar
[&:has(textarea:focus)]::ring-1
[&:has(textarea:focus)]::ring-black
"
>
<textarea
onPaste={handlePaste}
onKeyDownCapture={handleKeyDown}
onChange={handleInputChange}
ref={textAreaRef}
id="onyx-chat-input-textarea"
className={`
<div className="relative">
{bannerMessage && (
<>
<div
className={`
absolute
inset-x-0
-top-8
h-16
rounded-xl
border
shadow-sm
z-0
transition-all
duration-300
ease-out
${showStickyBanner ? "opacity-100 translate-y-0" : "opacity-0 -translate-y-2"}
bg-amber-50 dark:bg-yellow-800/30
border-amber-300 dark:border-yellow-700
flex
items-start
pt-2
justify-center
`}
>
<span className="text-xs text-neutral-800 dark:text-neutral-100 text-center">
{bannerMessage}
</span>
</div>
</>
)}
<div
className="
opacity-100
w-full
h-fit
flex
flex-col
border
shadow-lg
bg-input-background
border-input-border
dark:border-none
rounded-xl
overflow-hidden
text-text-chatbar
[&:has(textarea:focus)]::ring-1
[&:has(textarea:focus)]::ring-black
"
// Ensure this sits above the sticky background banner
style={{ position: "relative", zIndex: 10 }}
>
{currentMessageFiles.length > 0 && (
<div className="px-4 pt-4">
<div className="flex flex-wrap gap-2">
{currentMessageFiles.map((file) => (
<FileCard
key={file.id}
file={file}
removeFile={handleRemoveMessageFile}
/>
))}
</div>
</div>
)}
<textarea
onPaste={handlePaste}
onKeyDownCapture={handleKeyDown}
onChange={handleInputChange}
ref={textAreaRef}
id="onyx-chat-input-textarea"
className={`
m-0
w-full
shrink
@@ -431,277 +492,224 @@ export const ChatInputBar = React.memo(function ChatInputBar({
px-5
py-5
`}
autoFocus
style={{ scrollbarWidth: "thin" }}
role="textarea"
aria-multiline
placeholder={
placeholder ||
`How can ${selectedAssistant.name} help you today`
}
value={message}
onKeyDown={(event) => {
if (
event.key === "Enter" &&
!showPrompts &&
!event.shiftKey &&
!(event.nativeEvent as any).isComposing
) {
event.preventDefault();
if (message) {
onSubmit();
}
autoFocus
style={{ scrollbarWidth: "thin" }}
role="textarea"
aria-multiline
placeholder={
placeholder ||
`How can ${selectedAssistant.name} help you today`
}
}}
suppressContentEditableWarning={true}
/>
value={message}
onKeyDown={(event) => {
if (
event.key === "Enter" &&
!showPrompts &&
!event.shiftKey &&
!(event.nativeEvent as any).isComposing
) {
event.preventDefault();
if (message) {
setBannerMessage(null);
onSubmit();
}
}
}}
suppressContentEditableWarning={true}
/>
{(selectedDocuments.length > 0 ||
selectedFiles.length > 0 ||
selectedFolders.length > 0 ||
currentMessageFiles.length > 0 ||
filterManager.timeRange ||
filterManager.selectedDocumentSets.length > 0 ||
filterManager.selectedTags.length > 0 ||
filterManager.selectedSources.length > 0) && (
<div className="flex bg-input-background gap-x-.5 px-2">
<div className="flex gap-x-1 px-2 overflow-visible overflow-x-scroll items-end miniscroll">
{filterManager.selectedTags &&
filterManager.selectedTags.map((tag, index) => (
<SourceChip
key={index}
icon={<TagIcon size={12} />}
title={`#${tag.tag_key}_${tag.tag_value}`}
onRemove={() => {
filterManager.setSelectedTags(
filterManager.selectedTags.filter(
(t) => t.tag_key !== tag.tag_key
)
);
}}
/>
))}
{/* Unified file rendering section for both selected and current message files */}
{allFiles.map((file, index) =>
file.chatFileType === ChatFileType.IMAGE ? (
<SourceChip
key={`${file.source}-${file.id}-${index}`}
icon={
file.isUploading ? (
<FiLoader className="animate-spin" />
) : (
<img
className="h-full py-.5 object-cover rounded-lg bg-background cursor-pointer"
src={buildImgUrl(file.id)}
alt={file.name || "File image"}
/>
)
}
title={file.name}
onRemove={() => {
if (file.source === "selected") {
removeSelectedFile(file.originalFile);
} else {
setCurrentMessageFiles(
currentMessageFiles.filter(
(fileInFilter) => fileInFilter.id !== file.id
{(selectedDocuments.length > 0 ||
currentMessageFiles.length > 0 ||
filterManager.timeRange ||
filterManager.selectedDocumentSets.length > 0 ||
filterManager.selectedTags.length > 0 ||
filterManager.selectedSources.length > 0) && (
<div className="flex bg-input-background gap-x-.5 px-2">
<div className="flex gap-x-1 px-2 overflow-visible overflow-x-scroll items-end miniscroll">
{filterManager.selectedTags &&
filterManager.selectedTags.map((tag, index) => (
<SourceChip
key={index}
icon={<TagIcon size={12} />}
title={`#${tag.tag_key}_${tag.tag_value}`}
onRemove={() => {
filterManager.setSelectedTags(
filterManager.selectedTags.filter(
(t) => t.tag_key !== tag.tag_key
)
);
}
}}
/>
) : (
<SourceChip
key={`${file.source}-${file.id}-${index}`}
icon={
<FileIcon
className={
file.source === "current" ? "text-red-500" : ""
}
size={16}
/>
}
title={file.name}
onRemove={() => {
if (file.source === "selected") {
removeSelectedFile(file.originalFile);
} else {
setCurrentMessageFiles(
currentMessageFiles.filter(
(fileInFilter) => fileInFilter.id !== file.id
)
);
}
}}
/>
)
)}
{selectedFolders.map((folder) => (
<SourceChip
key={folder.id}
icon={<FolderIcon size={16} />}
title={folder.name}
onRemove={() => removeSelectedFolder(folder)}
/>
))}
{filterManager.timeRange && (
<SourceChip
truncateTitle={false}
key="time-range"
icon={<CalendarIcon size={12} />}
title={`${getFormattedDateRangeString(
filterManager.timeRange.from,
filterManager.timeRange.to
)}`}
onRemove={() => {
filterManager.setTimeRange(null);
}}
/>
)}
{filterManager.selectedDocumentSets.length > 0 &&
filterManager.selectedDocumentSets.map((docSet, index) => (
<SourceChip
key={`doc-set-${index}`}
icon={<DocumentIcon2 size={16} />}
title={docSet}
onRemove={() => {
filterManager.setSelectedDocumentSets(
filterManager.selectedDocumentSets.filter(
(ds) => ds !== docSet
)
);
}}
/>
))}
{filterManager.selectedSources.length > 0 &&
filterManager.selectedSources.map((source, index) => (
<SourceChip
key={`source-${index}`}
icon={
<SourceIcon
sourceType={source.internalName}
iconSize={16}
/>
}
title={source.displayName}
onRemove={() => {
filterManager.setSelectedSources(
filterManager.selectedSources.filter(
(s) => s.internalName !== source.internalName
)
);
}}
/>
))}
{selectedDocuments.length > 0 && (
<SourceChip
key="selected-documents"
onClick={() => {
toggleDocumentSidebar();
}}
icon={<FileIcon size={16} />}
title={`${selectedDocuments.length} selected`}
onRemove={removeDocs}
/>
)}
</div>
</div>
)}
<div className="flex pr-4 pb-2 justify-between bg-input-background items-center w-full ">
<div className="space-x-1 flex px-4 ">
<ChatInputOption
flexPriority="stiff"
Icon={FileUploadIcon}
onClick={() => {
toggleDocSelection();
}}
tooltipContent={"Upload files and attach user files"}
/>
{selectedAssistant.tools.length > 0 && (
<ActionToggle selectedAssistant={selectedAssistant} />
)}
{retrievalEnabled &&
settings?.settings.deep_research_enabled && (
<DeepResearchToggle
deepResearchEnabled={deepResearchEnabled}
toggleDeepResearch={toggleDeepResearch}
/>
)}
{forcedToolIds.length > 0 && (
<div className="pl-1 flex items-center gap-2 text-blue-500">
{forcedToolIds.map((toolId) => {
const tool = selectedAssistant.tools.find(
(tool) => tool.id === toolId
);
if (!tool) {
return null;
}
return (
<SelectedTool
key={toolId}
tool={tool}
onClick={() => {
setForcedToolIds((prev) =>
prev.filter((id) => id !== toolId)
);
}}
/>
);
})}
))}
{filterManager.timeRange && (
<SourceChip
truncateTitle={false}
key="time-range"
icon={<CalendarIcon size={12} />}
title={`${getFormattedDateRangeString(
filterManager.timeRange.from,
filterManager.timeRange.to
)}`}
onRemove={() => {
filterManager.setTimeRange(null);
}}
/>
)}
{filterManager.selectedDocumentSets.length > 0 &&
filterManager.selectedDocumentSets.map(
(docSet, index) => (
<SourceChip
key={`doc-set-${index}`}
icon={<DocumentIcon2 size={16} />}
title={docSet}
onRemove={() => {
filterManager.setSelectedDocumentSets(
filterManager.selectedDocumentSets.filter(
(ds) => ds !== docSet
)
);
}}
/>
)
)}
{filterManager.selectedSources.length > 0 &&
filterManager.selectedSources.map((source, index) => (
<SourceChip
key={`source-${index}`}
icon={
<SourceIcon
sourceType={source.internalName}
iconSize={16}
/>
}
title={source.displayName}
onRemove={() => {
filterManager.setSelectedSources(
filterManager.selectedSources.filter(
(s) => s.internalName !== source.internalName
)
);
}}
/>
))}
{selectedDocuments.length > 0 && (
<SourceChip
key="selected-documents"
onClick={() => {
toggleDocumentSidebar();
}}
icon={<FileIcon size={16} />}
title={`${selectedDocuments.length} selected`}
onRemove={removeDocs}
/>
)}
</div>
)}
</div>
</div>
)}
<div className="flex items-center my-auto gap-x-2">
<LLMPopover
llmProviders={llmProviders}
llmManager={llmManager}
requiresImageGeneration={true}
currentAssistant={selectedAssistant}
/>
<div className="flex pr-4 pb-2 justify-between bg-input-background items-center w-full ">
<div className="space-x-1 flex px-4 ">
<FilePicker
onPickRecent={(file: ProjectFile) => {
// Check if file with same ID already exists
if (
!currentMessageFiles.some(
(existingFile) =>
existingFile.file_id === file.file_id
)
) {
setCurrentMessageFiles((prev) => [...prev, file]);
}
}}
recentFiles={recentFiles}
handleUploadChange={handleUploadChange}
/>
<button
id="onyx-chat-input-send-button"
className={`cursor-pointer ${
chatState == "streaming" ||
chatState == "toolBuilding" ||
chatState == "loading"
? chatState != "streaming"
? "bg-neutral-500 dark:bg-neutral-400 "
: "bg-neutral-900 dark:bg-neutral-50"
: "bg-red-200"
} h-[22px] w-[22px] rounded-full`}
onClick={() => {
if (chatState == "streaming") {
stopGenerating();
} else if (message) {
onSubmit();
}
}}
>
{chatState == "streaming" ||
chatState == "toolBuilding" ||
chatState == "loading" ? (
<StopGeneratingIcon
size={8}
className="text-neutral-50 dark:text-neutral-900 m-auto text-white flex-none"
/>
) : (
<SendIcon
size={22}
className={`text-neutral-50 dark:text-neutral-900 p-1 my-auto rounded-full ${
chatState == "input" && message
? "bg-neutral-900 dark:bg-neutral-50"
: "bg-neutral-500 dark:bg-neutral-400"
}`}
/>
{selectedAssistant.tools.length > 0 && (
<ActionToggle selectedAssistant={selectedAssistant} />
)}
</button>
{retrievalEnabled &&
settings?.settings.deep_research_enabled && (
<DeepResearchToggle
deepResearchEnabled={deepResearchEnabled}
toggleDeepResearch={toggleDeepResearch}
/>
)}
{forcedToolIds.length > 0 && (
<div className="pl-1 flex items-center gap-2 text-blue-500">
{forcedToolIds.map((toolId) => {
const tool = selectedAssistant.tools.find(
(tool) => tool.id === toolId
);
if (!tool) {
return null;
}
return (
<SelectedTool
key={toolId}
tool={tool}
onClick={() => {
setForcedToolIds((prev) =>
prev.filter((id) => id !== toolId)
);
}}
/>
);
})}
</div>
)}
</div>
<div className="flex items-center my-auto gap-x-2">
<LLMPopover
llmProviders={llmProviders}
llmManager={llmManager}
requiresImageGeneration={true}
currentAssistant={selectedAssistant}
/>
<button
id="onyx-chat-input-send-button"
className={`cursor-pointer ${
chatState == "streaming" ||
chatState == "toolBuilding" ||
chatState == "loading"
? chatState != "streaming"
? "bg-neutral-500 dark:bg-neutral-400 "
: "bg-neutral-900 dark:bg-neutral-50"
: "bg-red-200"
} h-[22px] w-[22px] rounded-full`}
onClick={() => {
if (chatState == "streaming") {
stopGenerating();
} else if (message) {
setBannerMessage(null);
onSubmit();
}
}}
>
{chatState == "streaming" ||
chatState == "toolBuilding" ||
chatState == "loading" ? (
<StopGeneratingIcon
size={8}
className="text-neutral-50 dark:text-neutral-900 m-auto text-white flex-none"
/>
) : (
<SendIcon
size={22}
className={`text-neutral-50 dark:text-neutral-900 p-1 my-auto rounded-full ${
chatState == "input" && message
? "bg-neutral-900 dark:bg-neutral-50"
: "bg-neutral-500 dark:bg-neutral-400"
}`}
/>
)}
</button>
</div>
</div>
</div>
</div>

View File

@@ -46,9 +46,8 @@ export const ChatInputOption: React.FC<ChatInputOptionProps> = ({
group
rounded-lg
text-input-text
hover:bg-background-chat-hover
hover:text-neutral-900
dark:hover:text-neutral-50
group-hover:text-neutral-900
dark:group-hover:text-neutral-50
py-1.5
px-2
${

View File

@@ -0,0 +1,132 @@
"use client";
import React, { useMemo } from "react";
import Link from "next/link";
import { MessageSquareText } from "lucide-react";
import { ChatSessionMorePopup } from "@/components/sidebar/ChatSessionMorePopup";
import { useProjectsContext } from "../../projects/ProjectsContext";
import { ChatSession } from "@/app/chat/interfaces";
import { InfoIcon } from "@/components/icons/icons";
import {
Tooltip,
TooltipContent,
TooltipProvider,
TooltipTrigger,
} from "@/components/ui/tooltip";
import { formatRelativeTime } from "./project_utils";
export default function ProjectChatSessionList() {
const {
currentProjectDetails,
currentProjectId,
refreshCurrentProjectDetails,
} = useProjectsContext();
const [isRenamingChat, setIsRenamingChat] = React.useState<string | null>(
null
);
const projectChats: ChatSession[] = useMemo(() => {
console.log("currentProjectDetails", currentProjectDetails);
const sessions = currentProjectDetails?.project?.chat_sessions || [];
console.log("sessions", sessions.length);
return [...sessions].sort(
(a, b) =>
new Date(b.time_updated).getTime() - new Date(a.time_updated).getTime()
);
}, [currentProjectDetails?.project?.chat_sessions]);
if (!currentProjectId) return null;
return (
<div className="flex flex-col gap-2 p-4 w-[800px] mx-auto mt-6">
<div className="flex items-center gap-2">
<h2 className="text-md font-light">Recent Chats</h2>
</div>
{projectChats.length === 0 ? (
<p className="text-sm text-text-400">No chats yet.</p>
) : (
<div className="flex flex-col gap-2 max-h-[46vh] overflow-y-auto overscroll-y-none pr-1">
{projectChats.map((chat) => (
<Link
key={chat.id}
href={`/chat?chatId=${encodeURIComponent(chat.id)}`}
className="flex items-center justify-between rounded-xl bg-background-background px-3 py-2 shadow-sm hover:bg-accent-background-hovered transition-colors"
>
<div className="flex items-center gap-3 min-w-0">
<div className="flex h-10 w-10 items-center justify-center rounded-lg bg-background-dark/60 flex-none">
<MessageSquareText className="h-5 w-5 text-text-400" />
</div>
<div className="flex flex-col overflow-hidden">
<div className="flex items-center gap-1 min-w-0">
<span
className="text-sm font-medium text-text-darker truncate"
title={chat.name}
>
{chat.name || "Unnamed Chat"}
</span>
{(() => {
const personaIdToDefault =
currentProjectDetails?.persona_id_to_is_default || {};
const isDefault = personaIdToDefault[chat.persona_id];
if (isDefault === false) {
return (
<TooltipProvider>
<Tooltip>
<TooltipTrigger asChild>
<div className="flex items-center text-amber-600 dark:text-yellow-500 cursor-default flex-shrink-0">
<InfoIcon
size={14}
className="text-amber-600 dark:text-yellow-500"
/>
</div>
</TooltipTrigger>
<TooltipContent side="top" align="center">
<p className="max-w-[220px] text-sm">
Project files and instructions arent applied
here because this chat uses a custom
assistant.
</p>
</TooltipContent>
</Tooltip>
</TooltipProvider>
);
}
return null;
})()}
</div>
<span className="text-xs text-text-400 truncate">
Last message {formatRelativeTime(chat.time_updated)}
</span>
</div>
</div>
<div
className="flex items-center gap-2"
onClick={(e) => e.preventDefault()}
>
<ChatSessionMorePopup
chatSession={chat}
projectId={currentProjectId}
isRenamingChat={isRenamingChat === chat.id}
setIsRenamingChat={(value) =>
setIsRenamingChat(value ? chat.id : null)
}
search={false}
afterDelete={() => {
refreshCurrentProjectDetails();
}}
afterMove={() => {
refreshCurrentProjectDetails();
}}
afterRemoveFromProject={() => {
refreshCurrentProjectDetails();
}}
/>
</div>
</Link>
))}
</div>
)}
</div>
);
}

View File

@@ -0,0 +1,336 @@
"use client";
import React, {
useCallback,
useEffect,
useMemo,
useRef,
useState,
} from "react";
import { Button } from "@/components/ui/button";
import {
Dialog,
DialogContent,
DialogDescription,
DialogHeader,
DialogTitle,
} from "@/components/ui/dialog";
import { Textarea } from "@/components/ui/textarea";
import { FileIcon, FolderOpen, Loader2, X } from "lucide-react";
import { Separator } from "@/components/ui/separator";
import { RiPlayListAddFill } from "react-icons/ri";
import { useProjectsContext } from "../../projects/ProjectsContext";
import FilePicker from "../files/FilePicker";
import FilesList from "../files/FilesList";
import type {
ProjectFile,
CategorizedFiles,
} from "../../projects/projectsService";
import { UserFileStatus } from "../../projects/projectsService";
import { ChatFileType } from "@/app/chat/interfaces";
import { usePopup } from "@/components/admin/connectors/Popup";
export function FileCard({
file,
removeFile,
}: {
file: ProjectFile;
removeFile: (fileId: string) => void;
}) {
const typeLabel = useMemo(() => {
if (!file.file_type) return "";
const parts = String(file.file_type).split("/");
const ext = parts[parts.length - 1] || file.file_type;
return String(ext).toUpperCase();
}, [file.file_type]);
const isProcessing =
String(file.status).toLowerCase() === "processing" ||
String(file.status).toLowerCase() === "uploading";
const handleRemoveFile = async (e: React.MouseEvent) => {
e.stopPropagation();
if (isProcessing) return;
removeFile(file.id);
};
return (
<div className="relative flex items-center gap-3 border border-border rounded-xl bg-background-background px-3 py-2 shadow-sm">
{!isProcessing && (
<button
onClick={handleRemoveFile}
title="Delete file"
aria-label="Delete file"
className="absolute -left-2 -top-2 z-10 h-5 w-5 flex items-center justify-center rounded-[4px] border border-border text-[11px] bg-[#1f1f1f] text-white dark:bg-[#fefcfa] dark:text-black shadow-sm hover:opacity-90"
>
<X className="h-4 w-4 dark:text-dark-tremor-background-muted" />
</button>
)}
<div className="flex h-10 w-10 items-center justify-center rounded-lg bg-background-dark/60">
{isProcessing ? (
<Loader2 className="h-5 w-5 text-text-400 animate-spin" />
) : (
<FileIcon className="h-5 w-5 text-text-400" />
)}
</div>
<div className="flex flex-col overflow-hidden">
<span
className="text-sm font-medium text-text-darker truncate"
title={file.name}
>
{file.name}
</span>
<span className="text-xs text-text-400 truncate">
{isProcessing
? file.status === UserFileStatus.UPLOADING
? "Uploading..."
: "Processing..."
: typeLabel}
</span>
</div>
</div>
);
}
export default function ProjectContextPanel() {
const [isInstrOpen, setIsInstrOpen] = useState(false);
const [showProjectFiles, setShowProjectFiles] = useState(false);
const [instructionText, setInstructionText] = useState("");
const { popup, setPopup } = usePopup();
const [tempProjectFiles, setTempProjectFiles] = useState<ProjectFile[]>([]);
const {
upsertInstructions,
currentProjectDetails,
currentProjectId,
uploadFiles,
recentFiles,
unlinkFileFromProject,
linkFileToProject,
} = useProjectsContext();
const [isUploading, setIsUploading] = useState(false);
useEffect(() => {
const preset = currentProjectDetails?.instructions?.system_prompt ?? "";
setInstructionText(preset);
}, [currentProjectDetails?.instructions?.system_prompt ?? ""]);
if (!currentProjectId) return null; // no selection yet
const handleUploadChange = useCallback(
async (e: React.ChangeEvent<HTMLInputElement>) => {
const files = e.target.files;
if (!files || files.length === 0) return;
setIsUploading(true);
try {
// Show temporary uploading files immediately
const tempFiles: ProjectFile[] = Array.from(files).map((file) => ({
id: file.name,
file_id: file.name,
name: file.name,
project_id: currentProjectId,
user_id: null,
created_at: new Date().toISOString(),
status: UserFileStatus.UPLOADING,
file_type: file.type,
last_accessed_at: new Date().toISOString(),
chat_file_type: ChatFileType.DOCUMENT,
token_count: 0,
chunk_count: 0,
}));
setTempProjectFiles((prev) => [...prev, ...tempFiles]);
const result: CategorizedFiles = await uploadFiles(
Array.from(files),
currentProjectId
);
// Replace temp entries with backend entries (by index) so keys become backend IDs. This will prevent flickering.
setTempProjectFiles((prev) => [
...prev.slice(0, -tempFiles.length),
...result.user_files,
]);
const unsupported = result?.unsupported_files || [];
const nonAccepted = result?.non_accepted_files || [];
if (unsupported.length > 0 || nonAccepted.length > 0) {
const parts: string[] = [];
if (unsupported.length > 0) {
parts.push(`Unsupported: ${unsupported.join(", ")}`);
}
if (nonAccepted.length > 0) {
parts.push(`Not accepted: ${nonAccepted.join(", ")}`);
}
setPopup({
type: "warning",
message: `Some files were not uploaded. ${parts.join(" | ")}`,
});
}
} finally {
setIsUploading(false);
setTempProjectFiles([]);
e.target.value = "";
}
},
[currentProjectId, uploadFiles, setPopup]
);
return (
<div className="flex flex-col gap-2 p-4 w-[800px] mx-auto mt-10">
<FolderOpen size={34} />
<h1 className="text-4xl font-medium">
{currentProjectDetails?.project?.name || "Loading project..."}
</h1>
<Separator />
<div className="flex flex-row gap-2 justify-between">
<div className="min-w-0">
<p className="font-bold">Instructions</p>
{currentProjectDetails?.instructions ? (
<p
className="font-light truncate"
title={currentProjectDetails.instructions.system_prompt}
>
{currentProjectDetails.instructions.system_prompt}
</p>
) : (
<p className="font-light truncate">
Add instructions to tailor the response in this project.
</p>
)}
</div>
<button
onClick={() => setIsInstrOpen(true)}
className="flex flex-row gap-2 items-center justify-center p-2 rounded-md bg-background-dark/75 hover:dark:bg-neutral-800/75 hover:bg-accent-background-hovered cursor-pointer transition-all duration-150 shrink-0 whitespace-nowrap"
>
<RiPlayListAddFill
size={20}
className="text-text-darker dark:text-text-lighter"
/>
<p className="text-sm text-text-darker dark:text-text-lighter whitespace-nowrap">
Set Instructions
</p>
</button>
</div>
<div className="flex flex-row gap-2 justify-between">
<div>
<p className="font-bold">Files</p>
<p className="font-light">
Chats in this project can access these files.
</p>
</div>
<FilePicker
showTriggerLabel
triggerLabel="Add Files"
recentFiles={recentFiles}
onPickRecent={async (file) => {
if (!currentProjectId) return;
if (!linkFileToProject) return;
await linkFileToProject(currentProjectId, file.id);
}}
handleUploadChange={handleUploadChange}
/>
</div>
{tempProjectFiles.length > 0 ||
(currentProjectDetails?.files &&
currentProjectDetails.files.length > 0) ? (
<div className="grid grid-cols-1 sm:grid-cols-2 md:grid-cols-3 lg:grid-cols-4 gap-3">
{(() => {
const byId = new Map<string, ProjectFile>();
// Prefer backend files when available
(currentProjectDetails?.files || []).forEach((f) =>
byId.set(f.id, f)
);
// Add temp files only if a backend file with same id doesn't exist yet
tempProjectFiles.forEach((f) => {
if (!byId.has(f.id)) byId.set(f.id, f);
});
return Array.from(byId.values())
.slice(0, 3)
.map((f) => (
<FileCard
key={f.id}
file={f}
removeFile={async (fileId: string) => {
if (!currentProjectId) return;
await unlinkFileFromProject(currentProjectId, fileId);
}}
/>
));
})()}
{[...(currentProjectDetails?.files || [])].length > 3 && (
<button
className="flex items-center gap-3 border border-border rounded-xl bg-background-background px-3 py-2 shadow-sm text-left"
onClick={() => setShowProjectFiles(true)}
>
<div className="flex h-10 w-10 items-center justify-center rounded-lg bg-background-dark/60">
<FileIcon className="h-5 w-5 text-text-400" />
</div>
<div className="flex flex-col overflow-hidden">
<span className="text-sm font-medium text-text-darker truncate">
View all project files
</span>
<span className="text-xs text-text-400 truncate">
{(currentProjectDetails?.files || []).length} files
</span>
</div>
</button>
)}
</div>
) : (
<p className="text-sm text-text-400">No files yet.</p>
)}
<Dialog open={isInstrOpen} onOpenChange={setIsInstrOpen}>
<DialogContent className="w-[95%] max-w-2xl">
<DialogHeader>
<div className="flex flex-col gap-3">
<RiPlayListAddFill size={22} />
<DialogTitle>Set Project Instructions</DialogTitle>
</div>
<DialogDescription>
Instruct specific behaviors, focus, tones, or formats for the
response in this project.
</DialogDescription>
</DialogHeader>
<div className="space-y-3">
<Textarea
value={instructionText}
onChange={(e) => setInstructionText(e.target.value)}
placeholder="Think step by step and show reasoning for complex problems. Use specific examples."
className="min-h-[140px]"
/>
<div className="flex justify-end gap-4">
<Button variant="outline" onClick={() => setIsInstrOpen(false)}>
Cancel
</Button>
<Button
onClick={() => {
setIsInstrOpen(false);
upsertInstructions(instructionText);
}}
>
Save Instructions
</Button>
</div>
</div>
</DialogContent>
</Dialog>
<Dialog open={showProjectFiles} onOpenChange={setShowProjectFiles}>
<DialogContent className="w-full max-w-lg">
<DialogHeader>
<FolderOpen size={32} />
<DialogTitle>Project files</DialogTitle>
</DialogHeader>
<FilesList
recentFiles={(currentProjectDetails?.files || []) as any}
showRemove
onRemove={async (file) => {
if (!currentProjectId) return;
await unlinkFileFromProject(currentProjectId, file.id);
}}
/>
</DialogContent>
</Dialog>
{popup}
</div>
);
}

View File

@@ -0,0 +1,18 @@
export function formatRelativeTime(isoDate: string): string {
const date = new Date(isoDate);
const now = new Date();
const diffMs = now.getTime() - date.getTime();
const seconds = Math.floor(diffMs / 1000);
if (seconds < 45) return "just now";
const minutes = Math.floor(seconds / 60);
if (minutes < 60) return `${minutes} min${minutes === 1 ? "" : "s"} ago`;
const hours = Math.floor(minutes / 60);
if (hours < 24) return `${hours} hour${hours === 1 ? "" : "s"} ago`;
const days = Math.floor(hours / 24);
if (days < 30) return `${days} day${days === 1 ? "" : "s"} ago`;
const months = Math.floor(days / 30);
if (months < 12) return `${months} month${months === 1 ? "" : "s"} ago`;
const years = Math.floor(months / 12);
return `${years} year${years === 1 ? "" : "s"} ago`;
}

View File

@@ -7,8 +7,10 @@ import { SEARCH_PARAM_NAMES } from "../services/searchParams";
export function useAssistantController({
selectedChatSession,
onAssistantSelect,
}: {
selectedChatSession: ChatSession | null | undefined;
onAssistantSelect?: () => void;
}) {
const searchParams = useSearchParams();
const { assistants: availableAssistants, pinnedAssistants } =
@@ -70,8 +72,9 @@ export function useAssistantController({
}
setSelectedAssistant(newAssistant);
onAssistantSelect?.();
},
[availableAssistants, defaultAssistantId]
[availableAssistants, defaultAssistantId, onAssistantSelect]
);
return {

View File

@@ -54,11 +54,6 @@ import {
useRouter,
useSearchParams,
} from "next/navigation";
import {
FileResponse,
FolderResponse,
useDocumentsContext,
} from "../my-documents/DocumentsContext";
import { useChatContext } from "@/components/context/ChatContext";
import Prism from "prismjs";
import {
@@ -74,6 +69,13 @@ import {
PacketType,
} from "../services/streamingModels";
import { useAssistantsContext } from "@/components/context/AssistantsContext";
import { Klee_One } from "next/font/google";
import { ProjectFile, useProjectsContext } from "../projects/ProjectsContext";
import { CategorizedFiles, UserFileStatus } from "../projects/projectsService";
const TEMP_USER_MESSAGE_ID = -1;
const TEMP_ASSISTANT_MESSAGE_ID = -2;
const SYSTEM_MESSAGE_ID = -3;
interface RegenerationRequest {
messageId: number;
@@ -118,6 +120,8 @@ export function useChatController({
const searchParams = useSearchParams();
const { refreshChatSessions, llmProviders } = useChatContext();
const { assistantPreferences, forcedToolIds } = useAssistantsContext();
const { fetchProjects, uploadFiles, setCurrentMessageFiles } =
useProjectsContext();
// Use selectors to access only the specific fields we need
const currentSessionId = useChatSessionStore(
@@ -163,9 +167,6 @@ export function useChatController({
const currentMessageHistory = useCurrentMessageHistory();
const currentChatState = useCurrentChatState();
const { selectedFiles, selectedFolders, uploadFile, setCurrentMessageFiles } =
useDocumentsContext();
const navigatingAway = useRef(false);
// Local state that doesn't need to be in the store
@@ -297,8 +298,6 @@ export function useChatController({
const onSubmit = useCallback(
async ({
message,
selectedFiles,
selectedFolders,
currentMessageFiles,
useAgentSearch,
messageIdToResend,
@@ -310,11 +309,10 @@ export function useChatController({
overrideFileDescriptors,
}: {
message: string;
// from MyDocuments
selectedFiles: FileResponse[];
selectedFolders: FolderResponse[];
//from chat input bar
currentMessageFiles: ProjectFile[];
// from the chat bar???
currentMessageFiles: FileDescriptor[];
useAgentSearch: boolean;
// optional params
@@ -326,6 +324,18 @@ export function useChatController({
regenerationRequest?: RegenerationRequest | null;
overrideFileDescriptors?: FileDescriptor[];
}) => {
const projectId = searchParams?.get("projectid");
{
const params = new URLSearchParams(searchParams?.toString() || "");
if (params.has("projectid")) {
params.delete("projectid");
const newUrl = params.toString()
? `${pathname}?${params.toString()}`
: pathname;
router.replace(newUrl, { scroll: false });
}
}
updateSubmittedMessage(getCurrentSessionId(), message);
navigatingAway.current = false;
@@ -416,11 +426,11 @@ export function useChatController({
const searchParamBasedChatSessionName =
searchParams?.get(SEARCH_PARAM_NAMES.TITLE) || null;
if (isNewSession) {
currChatSessionId = await createChatSession(
liveAssistant?.id || 0,
searchParamBasedChatSessionName
searchParamBasedChatSessionName,
projectId ? parseInt(projectId) : null
);
} else {
currChatSessionId = existingChatSessionId as string;
@@ -551,7 +561,7 @@ export function useChatController({
signal: controller.signal,
message: currMessage,
alternateAssistantId: liveAssistant?.id,
fileDescriptors: overrideFileDescriptors || currentMessageFiles,
fileDescriptors: overrideFileDescriptors,
parentMessageId:
regenerationRequest?.parentMessage.messageId ||
messageToResendParent?.messageId ||
@@ -561,8 +571,7 @@ export function useChatController({
filterManager.selectedSources,
filterManager.selectedDocumentSets,
filterManager.timeRange,
filterManager.selectedTags,
selectedFiles.map((file) => file.id)
filterManager.selectedTags
),
selectedDocumentIds: selectedDocuments
.filter(
@@ -572,11 +581,12 @@ export function useChatController({
.map((document) => document.db_doc_id as number),
queryOverride,
forceSearch,
userFolderIds: selectedFolders.map((folder) => folder.id),
userFileIds: selectedFiles
.filter((file) => file.id !== undefined && file.id !== null)
.map((file) => file.id),
currentMessageFiles: currentMessageFiles.map((file) => ({
id: file.file_id,
type: file.chat_file_type,
name: file.name,
user_file_id: file.id,
})),
regenerate: regenerationRequest !== undefined,
modelProvider:
modelOverride?.name || llmManager.currentLlm.name || undefined,
@@ -746,7 +756,12 @@ export function useChatController({
nodeId: initialUserNode.nodeId,
message: currMessage,
type: "user",
files: currentMessageFiles,
files: currentMessageFiles.map((file) => ({
id: file.file_id,
type: file.chat_file_type,
name: file.name,
user_file_id: file.id,
})),
toolCall: null,
parentNodeId: parentMessage?.nodeId || SYSTEM_NODE_ID,
packets: [],
@@ -774,6 +789,7 @@ export function useChatController({
await new Promise((resolve) => setTimeout(resolve, 200));
await nameChatSession(currChatSessionId);
refreshChatSessions();
fetchProjects();
}
// NOTE: don't switch pages if the user has navigated away from the chat
@@ -793,6 +809,7 @@ export function useChatController({
if (pathname == "/chat" && !navigatingAway.current) {
router.push(newUrl, { scroll: false });
fetchProjects();
}
}
}
@@ -819,6 +836,7 @@ export function useChatController({
currentMessageTree,
currentChatState,
llmProviders,
fetchProjects,
]
);
@@ -846,34 +864,63 @@ export function useChatController({
updateChatStateAction(getCurrentSessionId(), "uploading");
for (let file of acceptedFiles) {
const formData = new FormData();
formData.append("files", file);
const response: FileResponse[] = await uploadFile(formData, null);
try {
//this is to show files in the INPUT BAR immediately
const tempProjectFiles: ProjectFile[] = Array.from(acceptedFiles).map(
(file) => ({
id: file.name,
file_id: file.name,
name: file.name,
project_id: null,
user_id: null,
created_at: new Date().toISOString(),
status: UserFileStatus.UPLOADING,
file_type: file.type,
last_accessed_at: new Date().toISOString(),
chat_file_type: ChatFileType.DOCUMENT,
token_count: null,
chunk_count: null,
})
);
setCurrentMessageFiles((prev) => [...prev, ...tempProjectFiles]);
if (response.length > 0 && response[0] !== undefined) {
const uploadedFile = response[0];
const uploadedMessageFiles: CategorizedFiles = await uploadFiles(
Array.from(acceptedFiles)
);
//remove the temp files
setCurrentMessageFiles((prev) =>
prev.filter(
(file) =>
!tempProjectFiles.some((tempFile) => tempFile.id === file.id)
)
);
setCurrentMessageFiles((prev) => [
...prev,
...uploadedMessageFiles.user_files,
]);
const newFileDescriptor: FileDescriptor = {
// Use file_id (storage ID) if available, otherwise fallback to DB id
// Ensure it's a string as FileDescriptor expects
id: uploadedFile.file_id
? String(uploadedFile.file_id)
: String(uploadedFile.id),
type: uploadedFile.chat_file_type
? uploadedFile.chat_file_type
: ChatFileType.PLAIN_TEXT,
name: uploadedFile.name,
isUploading: false, // Mark as successfully uploaded
};
// Show toast if any files were rejected or unsupported
const unsupported = uploadedMessageFiles.unsupported_files || [];
const nonAccepted = uploadedMessageFiles.non_accepted_files || [];
if (unsupported.length > 0 || nonAccepted.length > 0) {
const detailsParts: string[] = [];
if (unsupported.length > 0) {
detailsParts.push(`Unsupported: ${unsupported.join(", ")}`);
}
if (nonAccepted.length > 0) {
detailsParts.push(`Not accepted: ${nonAccepted.join(", ")}`);
}
setCurrentMessageFiles((prev) => [...prev, newFileDescriptor]);
} else {
setPopup({
type: "error",
message: "Failed to upload file",
type: "warning",
message: `Some files were not uploaded. ${detailsParts.join(" | ")}`,
});
}
} catch (error) {
setPopup({
type: "error",
message: "Failed to upload file",
});
}
updateChatStateAction(getCurrentSessionId(), "input");
@@ -960,44 +1007,6 @@ export function useChatController({
fetchMaxTokens();
}, [liveAssistant]);
// fetch # of document tokens for the selected files
useEffect(() => {
const calculateTokensAndUpdateSearchMode = async () => {
if (selectedFiles.length > 0 || selectedFolders.length > 0) {
try {
// Prepare the query parameters for the API call
const fileIds = selectedFiles.map((file: FileResponse) => file.id);
const folderIds = selectedFolders.map(
(folder: FolderResponse) => folder.id
);
// Build the query string
const queryParams = new URLSearchParams();
fileIds.forEach((id) =>
queryParams.append("file_ids", id.toString())
);
folderIds.forEach((id) =>
queryParams.append("folder_ids", id.toString())
);
// Make the API call to get token estimate
const response = await fetch(
`/api/user/file/token-estimate?${queryParams.toString()}`
);
if (!response.ok) {
console.error("Failed to fetch token estimate");
return;
}
} catch (error) {
console.error("Error calculating tokens:", error);
}
}
};
calculateTokensAndUpdateSearchMode();
}, [selectedFiles, selectedFolders, llmManager.currentLlm]);
// check if there's an image file in the message history so that we know
// which LLMs are available to use
const imageFileInMessageHistory = useMemo(() => {

View File

@@ -1,6 +1,6 @@
"use client";
import { useEffect, useRef, useCallback } from "react";
import { useEffect, useRef, useCallback, useState } from "react";
import { ReadonlyURLSearchParams, useRouter } from "next/navigation";
import {
nameChatSession,
@@ -18,14 +18,14 @@ import {
} from "../services/searchParams";
import { FilterManager } from "@/lib/hooks";
import { OnyxDocument } from "@/lib/search/interfaces";
import { FileDescriptor } from "../interfaces";
import { FileResponse, FolderResponse } from "../my-documents/DocumentsContext";
import {
useChatSessionStore,
useCurrentMessageHistory,
} from "../stores/useChatSessionStore";
import { getCitations } from "../services/packetUtils";
import { getAvailableContextTokens } from "../services/lib";
import { useAssistantsContext } from "@/components/context/AssistantsContext";
import { ProjectFile } from "../projects/projectsService";
import { getSessionProjectTokenCount } from "../projects/projectsService";
interface UseChatSessionControllerProps {
existingChatSessionId: string | null;
@@ -37,7 +37,7 @@ interface UseChatSessionControllerProps {
setSelectedAssistantFromId: (assistantId: number | null) => void;
setSelectedDocuments: (documents: OnyxDocument[]) => void;
setCurrentMessageFiles: (
files: FileDescriptor[] | ((prev: FileDescriptor[]) => FileDescriptor[])
files: ProjectFile[] | ((prev: ProjectFile[]) => ProjectFile[])
) => void;
// Refs
@@ -53,13 +53,10 @@ interface UseChatSessionControllerProps {
// Actions
clientScrollToBottom: (fast?: boolean) => void;
clearSelectedItems: () => void;
refreshChatSessions: () => void;
onSubmit: (params: {
message: string;
selectedFiles: FileResponse[];
selectedFolders: FolderResponse[];
currentMessageFiles: FileDescriptor[];
currentMessageFiles: ProjectFile[];
useAgentSearch: boolean;
isSeededChat?: boolean;
}) => Promise<void>;
@@ -81,10 +78,13 @@ export function useChatSessionController({
submitOnLoadPerformed,
hasPerformedInitialScroll,
clientScrollToBottom,
clearSelectedItems,
refreshChatSessions,
onSubmit,
}: UseChatSessionControllerProps) {
const [currentSessionFileTokenCount, setCurrentSessionFileTokenCount] =
useState<number>(0);
const [availableContextTokens, setAvailableContextTokens] =
useState<number>(0);
// Store actions
const updateSessionAndMessageTree = useChatSessionStore(
(state) => state.updateSessionAndMessageTree
@@ -140,7 +140,6 @@ export function useChatSessionController({
// If we're creating a brand new chat, then don't need to scroll
if (priorChatSessionId !== null) {
setSelectedDocuments([]);
clearSelectedItems();
if (existingChatSessionId) {
updateHasPerformedInitialScroll(existingChatSessionId, false);
}
@@ -167,8 +166,6 @@ export function useChatSessionController({
submitOnLoadPerformed.current = true;
await onSubmit({
message: firstMessage || "",
selectedFiles: [],
selectedFolders: [],
currentMessageFiles: [],
useAgentSearch: false,
});
@@ -245,6 +242,34 @@ export function useChatSessionController({
setIsFetchingChatMessages(chatSession.chat_session_id, false);
// Fetch token count for this chat session's project (if any)
try {
if (chatSession.chat_session_id) {
const total = await getSessionProjectTokenCount(
chatSession.chat_session_id
);
setCurrentSessionFileTokenCount(total || 0);
} else {
setCurrentSessionFileTokenCount(0);
}
} catch (e) {
setCurrentSessionFileTokenCount(0);
}
// Fetch available context tokens for this chat session
try {
if (chatSession.chat_session_id) {
const available = await getAvailableContextTokens(
chatSession.chat_session_id
);
setAvailableContextTokens(available);
} else {
setAvailableContextTokens(0);
}
} catch (e) {
setAvailableContextTokens(0);
}
// If this is a seeded chat, then kick off the AI message generation
if (
newMessageHistory.length === 1 &&
@@ -261,8 +286,6 @@ export function useChatSessionController({
await onSubmit({
message: seededMessage,
isSeededChat: true,
selectedFiles: [],
selectedFolders: [],
currentMessageFiles: [],
useAgentSearch: false,
});
@@ -337,6 +360,8 @@ export function useChatSessionController({
);
return {
currentSessionFileTokenCount,
availableContextTokens,
onMessageSelection,
};
}

Some files were not shown because too many files have changed in this diff Show More