Compare commits

..

133 Commits

Author SHA1 Message Date
Weves
ca3db17b08 add restart 2025-12-17 12:48:46 -08:00
Weves
ffd13b1104 dump scripts 2025-12-17 12:48:46 -08:00
Wenxi
1caa860f8e fix(file upload): properly convert and process files uploaded directly to chat (#6815)
Co-authored-by: _htz_ <100520465+1htz2@users.noreply.github.com>
2025-12-17 12:38:14 -08:00
trial-danswer
7181cc41af feat: adding support for SearXNG as an option for web search. It operates a… (#6653)
Co-authored-by: Weves <chrisweaver101@gmail.com>
2025-12-17 12:27:19 -08:00
Chris Weaver
959b8c320d fix: don't leave redis ports exposed (#6814) 2025-12-17 12:06:10 -08:00
roshan
96fd0432ff fix(tool): default tool descriptions assistant -> agent (#6788)
Co-authored-by: greptile-apps[bot] <165735046+greptile-apps[bot]@users.noreply.github.com>
2025-12-17 19:12:17 +00:00
Jamison Lahman
4c73a03f57 chore(fe): followups to 7f79e34aa (#6808) 2025-12-17 18:36:31 +00:00
Raunak Bhagat
e57713e376 fix: Clean up DocumentsSidebar (#6805) 2025-12-17 09:00:14 -08:00
Jamison Lahman
21ea320323 fix(style): standardize projects page layout (#6807) 2025-12-17 01:11:09 -08:00
Jamison Lahman
bac9c48e53 fix(style): "More Agents" page is responsive (#6806) 2025-12-17 01:01:13 -08:00
roshan
7f79e34aa4 fix(projects): add special logic for internal search tool when no connectors available (#6774)
Co-authored-by: Yuhong Sun <yuhongsun96@gmail.com>
2025-12-17 06:45:03 +00:00
Jamison Lahman
f1a81d45a1 chore(fe): popover component uses z-index.css (#6804) 2025-12-16 23:07:31 -08:00
Jamison Lahman
285755a540 chore(pre-commit): fix uv.lock after filelock "upgrade" (#6803) 2025-12-16 22:16:19 -08:00
Justin Tahara
89003ad2d8 chore(tf): Update VPC calling (#6798) 2025-12-17 05:38:50 +00:00
Yuhong Sun
9f93f97259 feat(vectordb): New Document Index Interface (#5700) 2025-12-17 03:28:02 +00:00
Yuhong Sun
f702eebbe7 chore: some readme updates (#6802) 2025-12-16 19:53:23 -08:00
Yuhong Sun
8487e1856b feat: Deep Research first couple stages (#6801) 2025-12-16 19:40:54 -08:00
acaprau
a36445f840 fix(devtools): restart_containers.sh should source venv before running alembic (#6795) 2025-12-17 02:33:21 +00:00
roshan
7f30293b0e chore: improved error handling and display for agent failure types (#6784) 2025-12-17 02:30:24 +00:00
acaprau
619d9528b4 fix(devtools): CLAUDE.md.template makes reference to a venv that does not exist (#6796) 2025-12-17 02:29:47 +00:00
Yuhong Sun
6f83c669e7 feat: enable skip clarification (#6797) 2025-12-16 18:25:15 -08:00
Chris Weaver
c3e5f48cb4 fix: horrible typo in README (#6793) 2025-12-16 17:05:57 -08:00
Justin Tahara
fdf8fe391c fix(ui): Search Settings Active Only (#6657) 2025-12-16 17:00:06 -08:00
Raunak Bhagat
f1d6bb9e02 refactor: Transfer all icons to @opal/icons (#6755) 2025-12-17 00:16:44 +00:00
Justin Tahara
9a64a717dc fix(users): User Groups Race Condition (#6710) 2025-12-17 00:11:07 +00:00
Raunak Bhagat
aa0f475e01 refactor: Add new z-indexing file (#6789) 2025-12-16 23:56:13 +00:00
Nikolas Garza
75238dc353 fix: attach user credentials to assistant requests (#6785) 2025-12-16 23:15:31 +00:00
Nikolas Garza
9e19803244 chore: bump fallback max token limit to 32k (#6787) 2025-12-16 23:09:47 +00:00
dependabot[bot]
5cabd32638 chore(deps): Bump filelock from 3.15.4 to 3.20.1 in /backend/requirements (#6781)
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
2025-12-16 22:36:09 +00:00
Justin Tahara
4ccd88c331 fix(confluence): Skip attachments gracefully (#6769) 2025-12-16 22:34:16 +00:00
Justin Tahara
5a80b98320 feat(cleanup): No Bastion Setup (#6562) 2025-12-16 14:51:05 -08:00
Jamison Lahman
ff109d9f5c chore(style): fix chat page scrollbar after padding change (#6780) 2025-12-16 22:08:12 +00:00
Justin Tahara
4cc276aca9 fix(helm): Add Update Strategy (#6782) 2025-12-16 14:19:20 -08:00
Jamison Lahman
29f0df2c93 fix(style): increase tooltip z-index (#6778) 2025-12-16 21:30:19 +00:00
Nikolas Garza
e2edcf0e0b fix: improve ux for fed slack config error handling (#6699) 2025-12-16 21:23:11 +00:00
Chris Weaver
9396fc547d fix: confluence params (#6773) 2025-12-16 20:53:39 +00:00
Jamison Lahman
c089903aad fix: chat page overflow on small screens (#6723) 2025-12-16 13:03:07 -08:00
Chris Weaver
95471f64e9 fix: main chat page w/ overridden app name (#6775) 2025-12-16 12:56:15 -08:00
Jamison Lahman
13c1619d01 fix(style): center-ish align chat icon on small screen (#6727) 2025-12-16 20:10:09 +00:00
Justin Tahara
ddb5068847 fix(helm): Redis Operator Name (#6770) 2025-12-16 20:07:00 +00:00
Nikolas Garza
81a4f654c2 fix: scrollable container height for popover.tsx (#6772) 2025-12-16 20:04:33 +00:00
Jamison Lahman
9393c56a21 fix: remove unnecessary chat display tabindex (#6722) 2025-12-16 20:00:01 +00:00
Nikolas Garza
1ee96ff99c fix(llm): fix custom provider detection and model filtering (#6766) 2025-12-16 19:14:38 +00:00
Jamison Lahman
6bb00d2c6b chore(gha): run connector tests when uv.lock changes (#6768) 2025-12-16 18:44:06 +00:00
Wenxi
d9cc923c6a fix(hubspot): api client and urllib conflict (#6765) 2025-12-16 18:35:24 +00:00
Evan Lohn
bfbba0f036 chore: gpt 5.2 model naming (#6754) 2025-12-16 10:38:29 -08:00
Wenxi
ccf6911f97 chore: alembic readme nit (#6767) 2025-12-16 10:20:50 -08:00
Wenxi
15c9c2ba8e fix(llms): only save model configs for active/usable LLMs (#6758) 2025-12-16 17:54:47 +00:00
Wenxi
8b3fedf480 fix(web search): clamp google pse max results to api max (#6764) 2025-12-16 09:47:56 -08:00
Jamison Lahman
b8dc0749ee chore(tests): allow REDIS_CLOUD_PYTEST_PASSWORD to be empty (#6249) 2025-12-16 02:53:28 -08:00
Jamison Lahman
d6426458c6 chore(hygiene): rm unused secrets (#6762) 2025-12-16 02:29:56 -08:00
Jamison Lahman
941c4d6a54 chore(gha): use ods openapi in CI (#6761) 2025-12-16 02:04:42 -08:00
Jamison Lahman
653b65da66 chore(devtools): replace check_lazy_imports.py w/ ods check-lazy-imports (#6760) 2025-12-16 01:05:08 -08:00
Jamison Lahman
503e70be02 chore(deployment): fetch-depth: 0 for check-version-tag (#6759) 2025-12-15 23:51:37 -08:00
Nikolas Garza
9c19493160 fix: llm popover scroll (#6757) 2025-12-16 05:24:28 +00:00
Nikolas Garza
933315646b fix(llm): restore default models and filter obsolete/duplicate models from API (#6731) 2025-12-16 03:11:38 +00:00
Nikolas Garza
d2061f8a26 chore(ui): LLM popover improvements (#6742) 2025-12-15 19:36:00 -08:00
Jamison Lahman
6a98f0bf3c chore(devtools): ods openapi to generate schema and client (#6748) 2025-12-15 19:34:12 -08:00
Jamison Lahman
2f4d39d834 chore(devtools): ods check-lazy-imports (#6751) 2025-12-15 18:54:49 -08:00
Raunak Bhagat
40f8bcc6f8 refactor: Clean up message display (#6706) 2025-12-15 18:48:32 -08:00
Wenxi
af9ed73f00 fix(llms): reduce list of openai models (#6753) 2025-12-16 02:28:17 +00:00
acaprau
bf28041f4e feat(agents pagination): FE changes for pagination to the agents admin page (#6516)
Co-authored-by: Andrei <andrei@Andreis-MacBook-Pro.local>
2025-12-16 02:21:43 +00:00
Wenxi
395d5927b7 fix(llms): destructure fetched_model_configurations (#6749) 2025-12-16 01:33:16 +00:00
Jamison Lahman
c96f24e37c chore(deployment): run check-version-tag in debug mode (#6747) 2025-12-15 17:15:51 -08:00
Emerson Gomes
070519f823 Add LLM Session Tracking for Budget Control and Observability (#6564)
Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com>
Co-authored-by: Wenxi Onyx <wenxi@onyx.app>
2025-12-15 23:45:25 +00:00
Jamison Lahman
a7dc1c0f3b chore(gha): remove duplicate check-lazy-imports (#6746) 2025-12-15 15:38:13 -08:00
Jamison Lahman
a947e44926 chore(gha): uv run openapi-generator-cli instead of docker (#6737) 2025-12-15 22:00:39 +00:00
Evan Lohn
a6575b6254 feat: allow updating embedding API key (#6707) 2025-12-15 19:21:05 +00:00
Wenxi
31733a9c7c fix(projects): don't disable internal search when no project files are uploaded (#6732) 2025-12-15 10:53:17 -08:00
dependabot[bot]
5415e2faf1 chore(deps): Bump actions/setup-node from 6.0.0 to 6.1.0 (#6735)
Signed-off-by: dependabot[bot] <support@github.com>
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
2025-12-15 18:34:29 +00:00
dependabot[bot]
749f720dfd chore(deps): Bump actions/checkout from 6.0.0 to 6.0.1 (#6734)
Signed-off-by: dependabot[bot] <support@github.com>
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
2025-12-15 18:32:05 +00:00
Wenxi
eac79cfdf2 chore: disable coda tests temporarily until we fully configure (#6733) 2025-12-15 10:19:28 -08:00
Chris Weaver
e3b1202731 fix: mypy (#6724) 2025-12-15 09:46:02 -08:00
Yuhong Sun
6df13cc2de feat: Handle repeat calls to internal search (#6728) 2025-12-14 23:59:35 -08:00
Yuhong Sun
682f660aa3 feat: Minor teachups on DR (#6726) 2025-12-14 23:00:30 -08:00
Yuhong Sun
c4670ea86c feat: Deep Research Clarification Stage (#6725) 2025-12-14 22:55:39 -08:00
ethan
a6757eb49f feat: add coda connector (#6558)
Co-authored-by: cubic-dev-ai[bot] <191113872+cubic-dev-ai[bot]@users.noreply.github.com>
2025-12-14 19:49:55 -08:00
Justin Tahara
cd372fb585 fix(asana): Cleaning up Errors (#6689) 2025-12-15 02:07:05 +00:00
Chris Weaver
45fa0d9b32 fix: package-lock.json (#6721) 2025-12-14 17:36:48 -08:00
Chris Weaver
45091f2ee2 fix: add darwin (#6634) 2025-12-14 17:14:16 -08:00
Chris Weaver
43a3cb89b9 fix: env vars for tests (#6720) 2025-12-14 16:37:06 -08:00
Chris Weaver
9428eaed8d fix: copying markdown tables into spreadsheets (#6717) 2025-12-14 23:01:07 +00:00
Chris Weaver
dd29d989ff chore: ignore plans dir (#6718) 2025-12-14 14:50:21 -08:00
Chris Weaver
f44daa2116 fix: remove bottom logo (#6716) 2025-12-14 22:09:27 +00:00
Justin Tahara
212cbcb683 fix(redis): Adding missing TTL's (#6708) 2025-12-13 02:15:09 +00:00
Justin Tahara
aaad573c3f feat(helm): Add Default Redis Configs (#6709) 2025-12-13 02:10:27 +00:00
Jamison Lahman
e1325e84ae chore(pre-commit): test selection w/ merge-group & postsubmits (#6705) 2025-12-13 00:08:39 +00:00
Evan Lohn
e759cdd4ab fix: mcp server name and desc updates (#6692) 2025-12-12 07:04:46 +00:00
Yuhong Sun
2ed6607e10 chore: Few frontend cleanup (#6700) 2025-12-11 19:47:51 -08:00
dependabot[bot]
ba5b9cf395 chore(deps): Bump next from 16.0.7 to 16.0.10 in /web (#6695)
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
2025-12-11 19:18:37 -08:00
Yuhong Sun
bab23f62b8 fix: Citation on replay bug (#6697) 2025-12-11 19:17:06 -08:00
Yuhong Sun
d72e2e4081 fix: Search tool reasoning level (#6696) 2025-12-11 18:28:01 -08:00
Raunak Bhagat
4ed2d08336 fix: Fix custom-agent-avatar-invocation (#6644) 2025-12-11 16:20:39 -08:00
Yuhong Sun
24a0ceee18 chore: fix llm interface (#6691) 2025-12-11 15:44:44 -08:00
Jamison Lahman
d8fba38780 chore(gha): replace pre-commit with prek (#6684)
Co-authored-by: greptile-apps[bot] <165735046+greptile-apps[bot]@users.noreply.github.com>
2025-12-10 17:23:08 -08:00
Justin Tahara
5f358a1e20 fix(users): Add Race Condition Handling (#6639) 2025-12-09 07:43:47 -10:00
Evan Lohn
00b0c23e13 fix(web): handle br encoding of sitemap (#6647) 2025-12-09 04:03:56 +00:00
Chris Weaver
2103ed9e81 fix: tag race condition (#6674) 2025-12-08 17:01:07 -10:00
Chris Weaver
2c5ab72312 chore: only pause after repeated failure on cloud (#6673) 2025-12-08 16:44:13 -10:00
roshan
672d1ca8fa fix: toast for non-admin onboarding flow (#6651) 2025-12-07 00:48:18 +00:00
Jamison Lahman
a418de4287 chore(devtools): upgrade onyx-devtools 0.0.3->0.1.0 (#6663) 2025-12-06 10:48:46 -08:00
Jamison Lahman
349aba6c02 chore(devtools): upgrade onyx-devtools 0.0.2->0.0.3 (#6662) 2025-12-06 10:10:02 -08:00
Jamison Lahman
18a7bdc292 chore(devtools): ods db operations (#6661)
Co-authored-by: greptile-apps[bot] <165735046+greptile-apps[bot]@users.noreply.github.com>
2025-12-06 09:53:25 -08:00
Raunak Bhagat
c658fd4c7d refactor: Modal cleanup (#6614) 2025-12-05 19:40:30 -08:00
Yuhong Sun
f1e87dda5b chore: LLM step to give packets that can be modified as needed (#6641) 2025-12-05 17:12:55 -08:00
roshan
b93edb3e89 feat: standardize placeholders in default system prompt (#6643) 2025-12-06 00:13:36 +00:00
Jamison Lahman
dc4e76bd64 chore(vscode): migrate install python reqs to uv (#6654) 2025-12-05 16:15:58 -08:00
Justin Tahara
c4242ad17a fix(ui): Normalize Emails (#6636) 2025-12-05 23:26:06 +00:00
roshan
a4dee62660 fix: add reciprocal rank score for web search docs based on ordering (#6625) 2025-12-05 22:53:07 +00:00
Nikolas Garza
2d2c76ec7b feat(llm): fetch dynamic provider models directly from source APIs (#6619) 2025-12-05 22:22:56 +00:00
dependabot[bot]
d80025138d chore(deps): Bump urllib3 from 2.5.0 to 2.6.0 in /backend/requirements (#6638)
Signed-off-by: dependabot[bot] <support@github.com>
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
2025-12-05 21:28:41 +00:00
Evan Lohn
90ec595936 fix: hitting endpoint with undefined persona (#6637) 2025-12-05 20:33:00 +00:00
Jamison Lahman
f30e88a61b chore(dev): make "dev" an optional-dependency (#6640) 2025-12-05 10:51:23 -08:00
roshan
9c04e9269f feat: add standard for default tools -> make openURL a default tool (#6581)
Signed-off-by: dependabot[bot] <support@github.com>
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
Co-authored-by: Wenxi Onyx <wenxi@onyx.app>
Co-authored-by: Raunak Bhagat <r@rabh.io>
Co-authored-by: Nikolas Garza <90273783+nmgarza5@users.noreply.github.com>
Co-authored-by: Yuhong Sun <yuhongsun96@gmail.com>
Co-authored-by: SubashMohan <subashmohan75@gmail.com>
Co-authored-by: Jamison Lahman <jamison@lahman.dev>
Co-authored-by: Justin Tahara <105671973+justin-tahara@users.noreply.github.com>
Co-authored-by: Roshan Desai <rohoswagger@rohoswagger-onyx.local>
Co-authored-by: acaprau <48705707+acaprau@users.noreply.github.com>
Co-authored-by: Andrei <andrei@Andreis-MacBook-Pro.local>
2025-12-05 18:02:50 +00:00
Jamison Lahman
8c65fcd193 chore(devtools): simplify compile requirements (#6630) 2025-12-05 10:29:07 -08:00
Jamison Lahman
f42e3eb823 chore(docs): prefer uv over pip (#6628) 2025-12-05 10:28:03 -08:00
Yuhong Sun
9b76ed085c feat: deep research prompts (#6635) 2025-12-05 10:15:37 -08:00
Chris Weaver
0eb4d039ae fix: only re-index active connectors (#6631) 2025-12-05 17:52:44 +00:00
Justin Tahara
3c0b66a174 fix(sharepoint): Shared link fix (#6607) 2025-12-05 17:35:10 +00:00
Chris Weaver
895a8e774e fix: add default-groups=all (#6632) 2025-12-05 17:30:06 +00:00
SubashMohan
c14ea4dbb9 refactor(actions): improved flexibility of actioncard and few ui changes (#6597) 2025-12-05 07:06:33 +00:00
Wenxi
80b1e07586 fix(llms): sanitize vision providers (#6624) 2025-12-05 03:03:21 +00:00
Nikolas Garza
59b243d585 chore(slack): add better typing + move some logs from debug to info (#6613)
Co-authored-by: greptile-apps[bot] <165735046+greptile-apps[bot]@users.noreply.github.com>
2025-12-05 03:02:28 +00:00
Jamison Lahman
d4ae3d1cb5 chore(devtools): upgrade onyx-devtools 0.0.1->0.0.2 (#6623) 2025-12-04 18:36:21 -08:00
Jamison Lahman
ed0a86c681 chore(deps): make backend/ a uv workspace (#6460) 2025-12-04 18:30:04 -08:00
dependabot[bot]
e825e5732f chore(deps): Bump aiohttp from 3.12.14 to 3.13.2 in /backend (#6406)
Signed-off-by: dependabot[bot] <support@github.com>
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
2025-12-04 18:28:05 -08:00
dependabot[bot]
a93854ae70 chore(deps): Bump sendgrid from 6.11.0 to 6.12.5 in /backend (#6408)
Co-authored-by: Jamison Lahman <jamison@lahman.dev>
2025-12-05 02:04:24 +00:00
Yuhong Sun
fc8767a04f chore: delete unused code (#6622) 2025-12-04 17:50:17 -08:00
Jamison Lahman
6c231e7ad1 chore(devtools): QOL improvements for cherry-pick script (#6620) 2025-12-04 17:27:27 -08:00
Wenxi
bac751d4a9 feat(helm): add mcp server (#6586) 2025-12-05 00:57:05 +00:00
Jessica Singh
3e0f386d5b fix(web search ui): make font sizes consistent (#6606) 2025-12-05 00:09:21 +00:00
Chris Weaver
edb6957268 fix: litellm w/ azure reasoning mode (#6612) 2025-12-04 23:49:55 +00:00
Jamison Lahman
0348d11fb2 chore(mypy): type-check tools/ (#6615)
Co-authored-by: cubic-dev-ai[bot] <191113872+cubic-dev-ai[bot]@users.noreply.github.com>
2025-12-04 23:44:34 +00:00
695 changed files with 25324 additions and 16375 deletions

View File

@@ -1,33 +0,0 @@
name: Check Lazy Imports
concurrency:
group: Check-Lazy-Imports-${{ github.workflow }}-${{ github.head_ref || github.event.workflow_run.head_branch || github.run_id }}
cancel-in-progress: true
on:
merge_group:
pull_request:
branches:
- main
- 'release/**'
permissions:
contents: read
jobs:
check-lazy-imports:
runs-on: ubuntu-latest
timeout-minutes: 45
steps:
- name: Checkout code
uses: actions/checkout@1af3b93b6815bc44a9784bd300feb67ff0d1eeb3 # ratchet:actions/checkout@v6
with:
persist-credentials: false
- name: Set up Python
uses: actions/setup-python@83679a892e2d95755f2dac6acb0bfd1e9ac5d548 # ratchet:actions/setup-python@v6
with:
python-version: '3.11'
- name: Check lazy imports
run: python3 backend/scripts/check_lazy_imports.py

View File

@@ -89,9 +89,10 @@ jobs:
if: ${{ !startsWith(github.ref_name, 'nightly-latest') && github.event_name != 'workflow_dispatch' }}
steps:
- name: Checkout
uses: actions/checkout@1af3b93b6815bc44a9784bd300feb67ff0d1eeb3 # ratchet:actions/checkout@v6
uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8 # ratchet:actions/checkout@v6
with:
persist-credentials: false
fetch-depth: 0
- name: Setup uv
uses: astral-sh/setup-uv@1e862dfacbd1d6d858c55d9b792c756523627244 # ratchet:astral-sh/setup-uv@v7.1.4
@@ -111,7 +112,7 @@ jobs:
timeout-minutes: 10
steps:
- name: Checkout
uses: actions/checkout@1af3b93b6815bc44a9784bd300feb67ff0d1eeb3 # ratchet:actions/checkout@v6
uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8 # ratchet:actions/checkout@v6
with:
persist-credentials: false
@@ -140,7 +141,7 @@ jobs:
- uses: runs-on/action@cd2b598b0515d39d78c38a02d529db87d2196d1e # ratchet:runs-on/action@v2
- name: Checkout
uses: actions/checkout@1af3b93b6815bc44a9784bd300feb67ff0d1eeb3 # ratchet:actions/checkout@v6
uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8 # ratchet:actions/checkout@v6
with:
persist-credentials: false
@@ -198,7 +199,7 @@ jobs:
- uses: runs-on/action@cd2b598b0515d39d78c38a02d529db87d2196d1e # ratchet:runs-on/action@v2
- name: Checkout
uses: actions/checkout@1af3b93b6815bc44a9784bd300feb67ff0d1eeb3 # ratchet:actions/checkout@v6
uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8 # ratchet:actions/checkout@v6
with:
persist-credentials: false
@@ -306,7 +307,7 @@ jobs:
- uses: runs-on/action@cd2b598b0515d39d78c38a02d529db87d2196d1e # ratchet:runs-on/action@v2
- name: Checkout
uses: actions/checkout@1af3b93b6815bc44a9784bd300feb67ff0d1eeb3 # ratchet:actions/checkout@v6
uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8 # ratchet:actions/checkout@v6
with:
persist-credentials: false
@@ -372,7 +373,7 @@ jobs:
- uses: runs-on/action@cd2b598b0515d39d78c38a02d529db87d2196d1e # ratchet:runs-on/action@v2
- name: Checkout
uses: actions/checkout@1af3b93b6815bc44a9784bd300feb67ff0d1eeb3 # ratchet:actions/checkout@v6
uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8 # ratchet:actions/checkout@v6
with:
persist-credentials: false
@@ -485,7 +486,7 @@ jobs:
- uses: runs-on/action@cd2b598b0515d39d78c38a02d529db87d2196d1e # ratchet:runs-on/action@v2
- name: Checkout code
uses: actions/checkout@1af3b93b6815bc44a9784bd300feb67ff0d1eeb3 # ratchet:actions/checkout@v6
uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8 # ratchet:actions/checkout@v6
with:
persist-credentials: false
@@ -542,7 +543,7 @@ jobs:
- uses: runs-on/action@cd2b598b0515d39d78c38a02d529db87d2196d1e # ratchet:runs-on/action@v2
- name: Checkout code
uses: actions/checkout@1af3b93b6815bc44a9784bd300feb67ff0d1eeb3 # ratchet:actions/checkout@v6
uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8 # ratchet:actions/checkout@v6
with:
persist-credentials: false
@@ -650,7 +651,7 @@ jobs:
- uses: runs-on/action@cd2b598b0515d39d78c38a02d529db87d2196d1e # ratchet:runs-on/action@v2
- name: Checkout code
uses: actions/checkout@1af3b93b6815bc44a9784bd300feb67ff0d1eeb3 # ratchet:actions/checkout@v6
uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8 # ratchet:actions/checkout@v6
with:
persist-credentials: false
@@ -714,7 +715,7 @@ jobs:
- uses: runs-on/action@cd2b598b0515d39d78c38a02d529db87d2196d1e # ratchet:runs-on/action@v2
- name: Checkout code
uses: actions/checkout@1af3b93b6815bc44a9784bd300feb67ff0d1eeb3 # ratchet:actions/checkout@v6
uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8 # ratchet:actions/checkout@v6
with:
persist-credentials: false
@@ -907,7 +908,7 @@ jobs:
- uses: runs-on/action@cd2b598b0515d39d78c38a02d529db87d2196d1e # ratchet:runs-on/action@v2
- name: Checkout
uses: actions/checkout@1af3b93b6815bc44a9784bd300feb67ff0d1eeb3 # ratchet:actions/checkout@v6
uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8 # ratchet:actions/checkout@v6
with:
persist-credentials: false
@@ -997,7 +998,7 @@ jobs:
timeout-minutes: 90
steps:
- name: Checkout
uses: actions/checkout@1af3b93b6815bc44a9784bd300feb67ff0d1eeb3 # ratchet:actions/checkout@v6
uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8 # ratchet:actions/checkout@v6
with:
persist-credentials: false

View File

@@ -15,7 +15,7 @@ jobs:
timeout-minutes: 45
steps:
- name: Checkout
uses: actions/checkout@1af3b93b6815bc44a9784bd300feb67ff0d1eeb3 # ratchet:actions/checkout@v6
uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8 # ratchet:actions/checkout@v6
with:
fetch-depth: 0
persist-credentials: false

View File

@@ -28,7 +28,7 @@ jobs:
steps:
- name: Checkout code
uses: actions/checkout@1af3b93b6815bc44a9784bd300feb67ff0d1eeb3 # ratchet:actions/checkout@v6
uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8 # ratchet:actions/checkout@v6
with:
persist-credentials: false

View File

@@ -52,7 +52,7 @@ jobs:
test-dirs: ${{ steps.set-matrix.outputs.test-dirs }}
steps:
- name: Checkout code
uses: actions/checkout@1af3b93b6815bc44a9784bd300feb67ff0d1eeb3 # ratchet:actions/checkout@v6
uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8 # ratchet:actions/checkout@v6
with:
persist-credentials: false
@@ -80,12 +80,13 @@ jobs:
env:
PYTHONPATH: ./backend
MODEL_SERVER_HOST: "disabled"
DISABLE_TELEMETRY: "true"
steps:
- uses: runs-on/action@cd2b598b0515d39d78c38a02d529db87d2196d1e # ratchet:runs-on/action@v2
- name: Checkout code
uses: actions/checkout@1af3b93b6815bc44a9784bd300feb67ff0d1eeb3 # ratchet:actions/checkout@v6
uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8 # ratchet:actions/checkout@v6
with:
persist-credentials: false
@@ -113,6 +114,7 @@ jobs:
run: |
cat <<EOF > deployment/docker_compose/.env
CODE_INTERPRETER_BETA_ENABLED=true
DISABLE_TELEMETRY=true
EOF
- name: Set up Standard Dependencies

View File

@@ -24,7 +24,7 @@ jobs:
# fetch-depth 0 is required for helm/chart-testing-action
steps:
- name: Checkout code
uses: actions/checkout@1af3b93b6815bc44a9784bd300feb67ff0d1eeb3 # ratchet:actions/checkout@v6
uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8 # ratchet:actions/checkout@v6
with:
fetch-depth: 0
persist-credentials: false

View File

@@ -43,7 +43,7 @@ jobs:
test-dirs: ${{ steps.set-matrix.outputs.test-dirs }}
steps:
- name: Checkout code
uses: actions/checkout@1af3b93b6815bc44a9784bd300feb67ff0d1eeb3 # ratchet:actions/checkout@v6
uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8 # ratchet:actions/checkout@v6
with:
persist-credentials: false
@@ -74,7 +74,7 @@ jobs:
steps:
- uses: runs-on/action@cd2b598b0515d39d78c38a02d529db87d2196d1e # ratchet:runs-on/action@v2
- name: Checkout code
uses: actions/checkout@1af3b93b6815bc44a9784bd300feb67ff0d1eeb3 # ratchet:actions/checkout@v6
uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8 # ratchet:actions/checkout@v6
with:
persist-credentials: false
@@ -129,7 +129,7 @@ jobs:
steps:
- uses: runs-on/action@cd2b598b0515d39d78c38a02d529db87d2196d1e # ratchet:runs-on/action@v2
- name: Checkout code
uses: actions/checkout@1af3b93b6815bc44a9784bd300feb67ff0d1eeb3 # ratchet:actions/checkout@v6
uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8 # ratchet:actions/checkout@v6
with:
persist-credentials: false
@@ -183,7 +183,7 @@ jobs:
steps:
- uses: runs-on/action@cd2b598b0515d39d78c38a02d529db87d2196d1e # ratchet:runs-on/action@v2
- name: Checkout code
uses: actions/checkout@1af3b93b6815bc44a9784bd300feb67ff0d1eeb3 # ratchet:actions/checkout@v6
uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8 # ratchet:actions/checkout@v6
with:
persist-credentials: false
@@ -259,7 +259,7 @@ jobs:
steps:
- uses: runs-on/action@cd2b598b0515d39d78c38a02d529db87d2196d1e # ratchet:runs-on/action@v2
- name: Checkout code
uses: actions/checkout@1af3b93b6815bc44a9784bd300feb67ff0d1eeb3 # ratchet:actions/checkout@v6
uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8 # ratchet:actions/checkout@v6
with:
persist-credentials: false
@@ -274,23 +274,28 @@ jobs:
# NOTE: Use pre-ping/null pool to reduce flakiness due to dropped connections
# NOTE: don't need web server for integration tests
- name: Start Docker containers
- name: Create .env file for Docker Compose
env:
ECR_CACHE: ${{ env.RUNS_ON_ECR_CACHE }}
RUN_ID: ${{ github.run_id }}
run: |
cat <<EOF > deployment/docker_compose/.env
ENABLE_PAID_ENTERPRISE_EDITION_FEATURES=true
AUTH_TYPE=basic
POSTGRES_POOL_PRE_PING=true
POSTGRES_USE_NULL_POOL=true
REQUIRE_EMAIL_VERIFICATION=false
DISABLE_TELEMETRY=true
ONYX_BACKEND_IMAGE=${ECR_CACHE}:integration-test-backend-test-${RUN_ID}
ONYX_MODEL_SERVER_IMAGE=${ECR_CACHE}:integration-test-model-server-test-${RUN_ID}
INTEGRATION_TESTS_MODE=true
CHECK_TTL_MANAGEMENT_TASK_FREQUENCY_IN_HOURS=0.001
MCP_SERVER_ENABLED=true
EOF
- name: Start Docker containers
run: |
cd deployment/docker_compose
ENABLE_PAID_ENTERPRISE_EDITION_FEATURES=true \
AUTH_TYPE=basic \
POSTGRES_POOL_PRE_PING=true \
POSTGRES_USE_NULL_POOL=true \
REQUIRE_EMAIL_VERIFICATION=false \
DISABLE_TELEMETRY=true \
ONYX_BACKEND_IMAGE=${ECR_CACHE}:integration-test-backend-test-${RUN_ID} \
ONYX_MODEL_SERVER_IMAGE=${ECR_CACHE}:integration-test-model-server-test-${RUN_ID} \
INTEGRATION_TESTS_MODE=true \
CHECK_TTL_MANAGEMENT_TASK_FREQUENCY_IN_HOURS=0.001 \
MCP_SERVER_ENABLED=true \
docker compose -f docker-compose.yml -f docker-compose.dev.yml up \
relational_db \
index \
@@ -436,7 +441,7 @@ jobs:
steps:
- uses: runs-on/action@cd2b598b0515d39d78c38a02d529db87d2196d1e # ratchet:runs-on/action@v2
- name: Checkout code
uses: actions/checkout@1af3b93b6815bc44a9784bd300feb67ff0d1eeb3 # ratchet:actions/checkout@v6
uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8 # ratchet:actions/checkout@v6
with:
persist-credentials: false

View File

@@ -16,12 +16,12 @@ jobs:
timeout-minutes: 45
steps:
- name: Checkout code
uses: actions/checkout@1af3b93b6815bc44a9784bd300feb67ff0d1eeb3 # ratchet:actions/checkout@v6
uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8 # ratchet:actions/checkout@v6
with:
persist-credentials: false
- name: Setup node
uses: actions/setup-node@2028fbc5c25fe9cf00d9f06a71cc4710d4507903 # ratchet:actions/setup-node@v4
uses: actions/setup-node@395ad3262231945c25e8478fd5baf05154b1d79f # ratchet:actions/setup-node@v4
with:
node-version: 22
cache: "npm"

View File

@@ -40,7 +40,7 @@ jobs:
test-dirs: ${{ steps.set-matrix.outputs.test-dirs }}
steps:
- name: Checkout code
uses: actions/checkout@1af3b93b6815bc44a9784bd300feb67ff0d1eeb3 # ratchet:actions/checkout@v6
uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8 # ratchet:actions/checkout@v6
with:
persist-credentials: false
@@ -70,7 +70,7 @@ jobs:
steps:
- uses: runs-on/action@cd2b598b0515d39d78c38a02d529db87d2196d1e # ratchet:runs-on/action@v2
- name: Checkout code
uses: actions/checkout@1af3b93b6815bc44a9784bd300feb67ff0d1eeb3 # ratchet:actions/checkout@v6
uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8 # ratchet:actions/checkout@v6
with:
persist-credentials: false
@@ -124,7 +124,7 @@ jobs:
steps:
- uses: runs-on/action@cd2b598b0515d39d78c38a02d529db87d2196d1e # ratchet:runs-on/action@v2
- name: Checkout code
uses: actions/checkout@1af3b93b6815bc44a9784bd300feb67ff0d1eeb3 # ratchet:actions/checkout@v6
uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8 # ratchet:actions/checkout@v6
with:
persist-credentials: false
@@ -177,7 +177,7 @@ jobs:
steps:
- uses: runs-on/action@cd2b598b0515d39d78c38a02d529db87d2196d1e # ratchet:runs-on/action@v2
- name: Checkout code
uses: actions/checkout@1af3b93b6815bc44a9784bd300feb67ff0d1eeb3 # ratchet:actions/checkout@v6
uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8 # ratchet:actions/checkout@v6
with:
persist-credentials: false
@@ -253,7 +253,7 @@ jobs:
steps:
- uses: runs-on/action@cd2b598b0515d39d78c38a02d529db87d2196d1e # ratchet:runs-on/action@v2
- name: Checkout code
uses: actions/checkout@1af3b93b6815bc44a9784bd300feb67ff0d1eeb3 # ratchet:actions/checkout@v6
uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8 # ratchet:actions/checkout@v6
with:
persist-credentials: false
@@ -268,21 +268,26 @@ jobs:
# NOTE: Use pre-ping/null pool to reduce flakiness due to dropped connections
# NOTE: don't need web server for integration tests
- name: Start Docker containers
- name: Create .env file for Docker Compose
env:
ECR_CACHE: ${{ env.RUNS_ON_ECR_CACHE }}
RUN_ID: ${{ github.run_id }}
run: |
cat <<EOF > deployment/docker_compose/.env
AUTH_TYPE=basic
POSTGRES_POOL_PRE_PING=true
POSTGRES_USE_NULL_POOL=true
REQUIRE_EMAIL_VERIFICATION=false
DISABLE_TELEMETRY=true
ONYX_BACKEND_IMAGE=${ECR_CACHE}:integration-test-backend-test-${RUN_ID}
ONYX_MODEL_SERVER_IMAGE=${ECR_CACHE}:integration-test-model-server-test-${RUN_ID}
INTEGRATION_TESTS_MODE=true
MCP_SERVER_ENABLED=true
EOF
- name: Start Docker containers
run: |
cd deployment/docker_compose
AUTH_TYPE=basic \
POSTGRES_POOL_PRE_PING=true \
POSTGRES_USE_NULL_POOL=true \
REQUIRE_EMAIL_VERIFICATION=false \
DISABLE_TELEMETRY=true \
ONYX_BACKEND_IMAGE=${ECR_CACHE}:integration-test-backend-test-${RUN_ID} \
ONYX_MODEL_SERVER_IMAGE=${ECR_CACHE}:integration-test-model-server-test-${RUN_ID} \
INTEGRATION_TESTS_MODE=true \
MCP_SERVER_ENABLED=true \
docker compose -f docker-compose.yml -f docker-compose.dev.yml up \
relational_db \
index \

View File

@@ -53,7 +53,7 @@ jobs:
- uses: runs-on/action@cd2b598b0515d39d78c38a02d529db87d2196d1e # ratchet:runs-on/action@v2
- name: Checkout code
uses: actions/checkout@1af3b93b6815bc44a9784bd300feb67ff0d1eeb3 # ratchet:actions/checkout@v6
uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8 # ratchet:actions/checkout@v6
with:
persist-credentials: false
@@ -108,7 +108,7 @@ jobs:
- uses: runs-on/action@cd2b598b0515d39d78c38a02d529db87d2196d1e # ratchet:runs-on/action@v2
- name: Checkout code
uses: actions/checkout@1af3b93b6815bc44a9784bd300feb67ff0d1eeb3 # ratchet:actions/checkout@v6
uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8 # ratchet:actions/checkout@v6
with:
persist-credentials: false
@@ -163,7 +163,7 @@ jobs:
- uses: runs-on/action@cd2b598b0515d39d78c38a02d529db87d2196d1e # ratchet:runs-on/action@v2
- name: Checkout code
uses: actions/checkout@1af3b93b6815bc44a9784bd300feb67ff0d1eeb3 # ratchet:actions/checkout@v6
uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8 # ratchet:actions/checkout@v6
with:
persist-credentials: false
@@ -229,13 +229,13 @@ jobs:
- uses: runs-on/action@cd2b598b0515d39d78c38a02d529db87d2196d1e # ratchet:runs-on/action@v2
- name: Checkout code
uses: actions/checkout@1af3b93b6815bc44a9784bd300feb67ff0d1eeb3 # ratchet:actions/checkout@v6
uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8 # ratchet:actions/checkout@v6
with:
fetch-depth: 0
persist-credentials: false
- name: Setup node
uses: actions/setup-node@2028fbc5c25fe9cf00d9f06a71cc4710d4507903 # ratchet:actions/setup-node@v4
uses: actions/setup-node@395ad3262231945c25e8478fd5baf05154b1d79f # ratchet:actions/setup-node@v4
with:
node-version: 22
cache: 'npm'
@@ -465,12 +465,12 @@ jobs:
# ]
# steps:
# - name: Checkout code
# uses: actions/checkout@1af3b93b6815bc44a9784bd300feb67ff0d1eeb3 # ratchet:actions/checkout@v6
# uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8 # ratchet:actions/checkout@v6
# with:
# fetch-depth: 0
# - name: Setup node
# uses: actions/setup-node@2028fbc5c25fe9cf00d9f06a71cc4710d4507903 # ratchet:actions/setup-node@v4
# uses: actions/setup-node@395ad3262231945c25e8478fd5baf05154b1d79f # ratchet:actions/setup-node@v4
# with:
# node-version: 22

View File

@@ -17,24 +17,6 @@ permissions:
contents: read
jobs:
validate-requirements:
runs-on: ubuntu-slim
timeout-minutes: 45
steps:
- name: Checkout code
uses: actions/checkout@1af3b93b6815bc44a9784bd300feb67ff0d1eeb3 # ratchet:actions/checkout@v6
with:
persist-credentials: false
- name: Setup uv
uses: astral-sh/setup-uv@1e862dfacbd1d6d858c55d9b792c756523627244 # ratchet:astral-sh/setup-uv@v7.1.4
# TODO: Enable caching once there is a uv.lock file checked in.
# with:
# enable-cache: true
- name: Validate requirements lock files
run: ./backend/scripts/compile_requirements.py --check
mypy-check:
# See https://runs-on.com/runners/linux/
# Note: Mypy seems quite optimized for x64 compared to arm64.
@@ -45,7 +27,7 @@ jobs:
steps:
- uses: runs-on/action@cd2b598b0515d39d78c38a02d529db87d2196d1e # ratchet:runs-on/action@v2
- name: Checkout code
uses: actions/checkout@1af3b93b6815bc44a9784bd300feb67ff0d1eeb3 # ratchet:actions/checkout@v6
uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8 # ratchet:actions/checkout@v6
with:
persist-credentials: false
@@ -58,35 +40,10 @@ jobs:
backend/requirements/model_server.txt
backend/requirements/ee.txt
- name: Generate OpenAPI schema
shell: bash
working-directory: backend
env:
PYTHONPATH: "."
run: |
python scripts/onyx_openapi_schema.py --filename generated/openapi.json
# needed for pulling openapitools/openapi-generator-cli
# otherwise, we hit the "Unauthenticated users" limit
# https://docs.docker.com/docker-hub/usage/
- name: Login to Docker Hub
uses: docker/login-action@5e57cd118135c172c3672efd75eb46360885c0ef # ratchet:docker/login-action@v3
with:
username: ${{ secrets.DOCKER_USERNAME }}
password: ${{ secrets.DOCKER_TOKEN }}
- name: Generate OpenAPI Python client
- name: Generate OpenAPI schema and Python client
shell: bash
run: |
docker run --rm \
-v "${{ github.workspace }}/backend/generated:/local" \
openapitools/openapi-generator-cli generate \
-i /local/openapi.json \
-g python \
-o /local/onyx_openapi_client \
--package-name onyx_openapi_client \
--skip-validate-spec \
--openapi-normalizer "SIMPLIFY_ONEOF_ANYOF=true,SET_OAS3_NULLABLE=true"
ods openapi all
- name: Cache mypy cache
if: ${{ vars.DISABLE_MYPY_CACHE != 'true' }}
@@ -103,3 +60,9 @@ jobs:
MYPY_FORCE_COLOR: 1
TERM: xterm-256color
run: mypy .
- name: Run MyPy (tools/)
env:
MYPY_FORCE_COLOR: 1
TERM: xterm-256color
run: mypy tools/

View File

@@ -133,12 +133,13 @@ jobs:
env:
PYTHONPATH: ./backend
DISABLE_TELEMETRY: "true"
steps:
- uses: runs-on/action@cd2b598b0515d39d78c38a02d529db87d2196d1e # ratchet:runs-on/action@v2
- name: Checkout code
uses: actions/checkout@1af3b93b6815bc44a9784bd300feb67ff0d1eeb3 # ratchet:actions/checkout@v6
uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8 # ratchet:actions/checkout@v6
with:
persist-credentials: false
@@ -160,16 +161,20 @@ jobs:
hubspot:
- 'backend/onyx/connectors/hubspot/**'
- 'backend/tests/daily/connectors/hubspot/**'
- 'uv.lock'
salesforce:
- 'backend/onyx/connectors/salesforce/**'
- 'backend/tests/daily/connectors/salesforce/**'
- 'uv.lock'
github:
- 'backend/onyx/connectors/github/**'
- 'backend/tests/daily/connectors/github/**'
- 'uv.lock'
file_processing:
- 'backend/onyx/file_processing/**'
- 'uv.lock'
- name: Run Tests (excluding HubSpot, Salesforce, and GitHub)
- name: Run Tests (excluding HubSpot, Salesforce, GitHub, and Coda)
shell: script -q -e -c "bash --noprofile --norc -eo pipefail {0}"
run: |
py.test \
@@ -182,7 +187,8 @@ jobs:
backend/tests/daily/connectors \
--ignore backend/tests/daily/connectors/hubspot \
--ignore backend/tests/daily/connectors/salesforce \
--ignore backend/tests/daily/connectors/github
--ignore backend/tests/daily/connectors/github \
--ignore backend/tests/daily/connectors/coda
- name: Run HubSpot Connector Tests
if: ${{ github.event_name == 'schedule' || steps.changes.outputs.hubspot == 'true' || steps.changes.outputs.file_processing == 'true' }}

View File

@@ -39,7 +39,7 @@ jobs:
steps:
- name: Checkout code
uses: actions/checkout@1af3b93b6815bc44a9784bd300feb67ff0d1eeb3 # ratchet:actions/checkout@v6
uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8 # ratchet:actions/checkout@v6
with:
persist-credentials: false

View File

@@ -26,15 +26,13 @@ jobs:
env:
PYTHONPATH: ./backend
REDIS_CLOUD_PYTEST_PASSWORD: ${{ secrets.REDIS_CLOUD_PYTEST_PASSWORD }}
SF_USERNAME: ${{ secrets.SF_USERNAME }}
SF_PASSWORD: ${{ secrets.SF_PASSWORD }}
SF_SECURITY_TOKEN: ${{ secrets.SF_SECURITY_TOKEN }}
DISABLE_TELEMETRY: "true"
steps:
- uses: runs-on/action@cd2b598b0515d39d78c38a02d529db87d2196d1e # ratchet:runs-on/action@v2
- name: Checkout code
uses: actions/checkout@1af3b93b6815bc44a9784bd300feb67ff0d1eeb3 # ratchet:actions/checkout@v6
uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8 # ratchet:actions/checkout@v6
with:
persist-credentials: false

View File

@@ -7,6 +7,8 @@ on:
merge_group:
pull_request: null
push:
branches:
- main
tags:
- "v*.*.*"
@@ -15,17 +17,10 @@ permissions:
jobs:
quality-checks:
# See https://runs-on.com/runners/linux/
runs-on:
[
runs-on,
runner=1cpu-linux-arm64,
"run-id=${{ github.run_id }}-quality-checks",
]
runs-on: ubuntu-latest
timeout-minutes: 45
steps:
- uses: runs-on/action@cd2b598b0515d39d78c38a02d529db87d2196d1e # ratchet:runs-on/action@v2
- uses: actions/checkout@1af3b93b6815bc44a9784bd300feb67ff0d1eeb3 # ratchet:actions/checkout@v6
- uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8 # ratchet:actions/checkout@v6
with:
fetch-depth: 0
persist-credentials: false
@@ -35,7 +30,7 @@ jobs:
- name: Setup Terraform
uses: hashicorp/setup-terraform@b9cd54a3c349d3f38e8881555d616ced269862dd # ratchet:hashicorp/setup-terraform@v3
- name: Setup node
uses: actions/setup-node@2028fbc5c25fe9cf00d9f06a71cc4710d4507903 # ratchet:actions/setup-node@v4
uses: actions/setup-node@395ad3262231945c25e8478fd5baf05154b1d79f # ratchet:actions/setup-node@v6
with: # zizmor: ignore[cache-poisoning]
node-version: 22
cache: "npm"
@@ -43,12 +38,10 @@ jobs:
- name: Install node dependencies
working-directory: ./web
run: npm ci
- uses: pre-commit/action@2c7b3805fd2a0fd8c1884dcaebf91fc102a13ecd # ratchet:pre-commit/action@v3.0.1
env:
# uv-run is mypy's id and mypy is covered by the Python Checks which caches dependencies better.
SKIP: uv-run
- uses: j178/prek-action@91fd7d7cf70ae1dee9f4f44e7dfa5d1073fe6623 # ratchet:j178/prek-action@v1
with:
extra_args: ${{ github.event_name == 'pull_request' && format('--from-ref {0} --to-ref {1}', github.event.pull_request.base.sha, github.event.pull_request.head.sha) || '' }}
prek-version: '0.2.21'
extra-args: ${{ github.event_name == 'pull_request' && format('--from-ref {0} --to-ref {1}', github.event.pull_request.base.sha, github.event.pull_request.head.sha) || github.event_name == 'merge_group' && format('--from-ref {0} --to-ref {1}', github.event.merge_group.base_sha, github.event.merge_group.head_sha) || github.ref_name == 'main' && '--all-files' || '' }}
- name: Check Actions
uses: giner/check-actions@28d366c7cbbe235f9624a88aa31a628167eee28c # ratchet:giner/check-actions@v1.0.1
with:

View File

@@ -24,7 +24,7 @@ jobs:
- {goos: "darwin", goarch: "arm64"}
- {goos: "", goarch: ""}
steps:
- uses: actions/checkout@1af3b93b6815bc44a9784bd300feb67ff0d1eeb3 # ratchet:actions/checkout@v6
- uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8 # ratchet:actions/checkout@v6
with:
persist-credentials: false
fetch-depth: 0

View File

@@ -14,7 +14,7 @@ jobs:
contents: read
steps:
- name: Checkout main Onyx repo
uses: actions/checkout@1af3b93b6815bc44a9784bd300feb67ff0d1eeb3 # ratchet:actions/checkout@v6
uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8 # ratchet:actions/checkout@v6
with:
fetch-depth: 0
persist-credentials: false

View File

@@ -18,7 +18,7 @@ jobs:
# see https://github.com/orgs/community/discussions/27028#discussioncomment-3254367 for the workaround we
# implement here which needs an actual user's deploy key
- name: Checkout code
uses: actions/checkout@1af3b93b6815bc44a9784bd300feb67ff0d1eeb3 # ratchet:actions/checkout@v6
uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8 # ratchet:actions/checkout@v6
with:
ssh-key: "${{ secrets.DEPLOY_KEY }}"
persist-credentials: true

View File

@@ -17,7 +17,7 @@ jobs:
security-events: write # needed for SARIF uploads
steps:
- name: Checkout repository
uses: actions/checkout@1af3b93b6815bc44a9784bd300feb67ff0d1eeb3 # ratchet:actions/checkout@v6.0.0
uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8 # ratchet:actions/checkout@v6.0.1
with:
persist-credentials: false

3
.gitignore vendored
View File

@@ -53,3 +53,6 @@ node_modules
# MCP configs
.playwright-mcp
# plans
plans/

View File

@@ -5,13 +5,37 @@ default_install_hook_types:
- post-rewrite
repos:
- repo: https://github.com/astral-sh/uv-pre-commit
rev: 569ddf04117761eb74cef7afb5143bbb96fcdfbb # frozen: 0.9.15
# From: https://github.com/astral-sh/uv-pre-commit/pull/53/commits/d30b4298e4fb63ce8609e29acdbcf4c9018a483c
rev: d30b4298e4fb63ce8609e29acdbcf4c9018a483c
hooks:
- id: uv-run
name: Check lazy imports
args: ["--with=onyx-devtools", "ods", "check-lazy-imports"]
files: ^backend/(?!\.venv/).*\.py$
- id: uv-sync
args: ["--locked", "--all-extras"]
- id: uv-lock
files: ^pyproject\.toml$
- id: uv-export
name: uv-export default.txt
args: ["--no-emit-project", "--no-default-groups", "--no-hashes", "--extra", "backend", "-o", "backend/requirements/default.txt"]
files: ^(pyproject\.toml|uv\.lock|backend/requirements/.*\.txt)$
- id: uv-export
name: uv-export dev.txt
args: ["--no-emit-project", "--no-default-groups", "--no-hashes", "--extra", "dev", "-o", "backend/requirements/dev.txt"]
files: ^(pyproject\.toml|uv\.lock|backend/requirements/.*\.txt)$
- id: uv-export
name: uv-export ee.txt
args: ["--no-emit-project", "--no-default-groups", "--no-hashes", "--extra", "ee", "-o", "backend/requirements/ee.txt"]
files: ^(pyproject\.toml|uv\.lock|backend/requirements/.*\.txt)$
- id: uv-export
name: uv-export model_server.txt
args: ["--no-emit-project", "--no-default-groups", "--no-hashes", "--extra", "model_server", "-o", "backend/requirements/model_server.txt"]
files: ^(pyproject\.toml|uv\.lock|backend/requirements/.*\.txt)$
# NOTE: This takes ~6s on a single, large module which is prohibitively slow.
# - id: uv-run
# name: mypy
# args: ["mypy"]
# args: ["--all-extras", "mypy"]
# pass_filenames: true
# files: ^backend/.*\.py$
@@ -52,7 +76,7 @@ repos:
args: [ '--remove-all-unused-imports', '--remove-unused-variables', '--in-place' , '--recursive']
- repo: https://github.com/golangci/golangci-lint
rev: e6ebea0145f385056bce15041d3244c0e5e15848 # frozen: v2.7.0
rev: 9f61b0f53f80672872fced07b6874397c3ed197b # frozen: v2.7.2
hooks:
- id: golangci-lint
entry: bash -c "find tools/ -name go.mod -print0 | xargs -0 -I{} bash -c 'cd \"$(dirname {})\" && golangci-lint run ./...'"
@@ -88,12 +112,6 @@ repos:
pass_filenames: false
files: \.tf$
- id: check-lazy-imports
name: Check lazy imports
entry: python3 backend/scripts/check_lazy_imports.py
language: system
files: ^backend/(?!\.venv/).*\.py$
- id: typescript-check
name: TypeScript type check
entry: bash -c 'cd web && npm run types:check'

View File

@@ -508,7 +508,6 @@
],
"cwd": "${workspaceFolder}",
"console": "integratedTerminal",
"stopOnEntry": true,
"presentation": {
"group": "3"
}
@@ -554,10 +553,10 @@
"name": "Install Python Requirements",
"type": "node",
"request": "launch",
"runtimeExecutable": "bash",
"runtimeExecutable": "uv",
"runtimeArgs": [
"-c",
"pip install -r backend/requirements/default.txt && pip install -r backend/requirements/dev.txt && pip install -r backend/requirements/ee.txt && pip install -r backend/requirements/model_server.txt"
"sync",
"--all-extras"
],
"cwd": "${workspaceFolder}",
"console": "integratedTerminal",

View File

@@ -4,7 +4,7 @@ This file provides guidance to Claude Code (claude.ai/code) when working with co
## KEY NOTES
- If you run into any missing python dependency errors, try running your command with `source backend/.venv/bin/activate` \
- If you run into any missing python dependency errors, try running your command with `source .venv/bin/activate` \
to assume the python venv.
- To make tests work, check the `.env` file at the root of the project to find an OpenAI key.
- If using `playwright` to explore the frontend, you can usually log in with username `a@test.com` and password

View File

@@ -71,12 +71,12 @@ If using a higher version, sometimes some libraries will not be available (i.e.
#### Backend: Python requirements
Currently, we use pip and recommend creating a virtual environment.
Currently, we use [uv](https://docs.astral.sh/uv/) and recommend creating a [virtual environment](https://docs.astral.sh/uv/pip/environments/#using-a-virtual-environment).
For convenience here's a command for it:
```bash
python -m venv .venv
uv venv .venv --python 3.11
source .venv/bin/activate
```
@@ -95,33 +95,15 @@ If using PowerShell, the command slightly differs:
Install the required python dependencies:
```bash
pip install -r backend/requirements/combined.txt
uv sync --all-extras
```
or
Install Playwright for Python (headless browser required by the Web Connector):
```bash
pip install -r backend/requirements/default.txt
pip install -r backend/requirements/dev.txt
pip install -r backend/requirements/ee.txt
pip install -r backend/requirements/model_server.txt
uv run playwright install
```
Fix vscode/cursor auto-imports:
```bash
pip install -e .
```
Install Playwright for Python (headless browser required by the Web Connector)
In the activated Python virtualenv, install Playwright for Python by running:
```bash
playwright install
```
You may have to deactivate and reactivate your virtualenv for `playwright` to appear on your path.
#### Frontend: Node dependencies
Onyx uses Node v22.20.0. We highly recommend you use [Node Version Manager (nvm)](https://github.com/nvm-sh/nvm)
@@ -130,7 +112,7 @@ to manage your Node installations. Once installed, you can run
```bash
nvm install 22 && nvm use 22
node -v # verify your active version
```
```
Navigate to `onyx/web` and run:
@@ -144,21 +126,15 @@ npm i
For the backend, you'll need to setup pre-commit hooks (black / reorder-python-imports).
With the virtual environment active, install the pre-commit library with:
Then run:
```bash
pip install pre-commit
```
Then, from the `onyx/backend` directory, run:
```bash
pre-commit install
uv run pre-commit install
```
Additionally, we use `mypy` for static type checking.
Onyx is fully type-annotated, and we want to keep it that way!
To run the mypy checks manually, run `python -m mypy .` from the `onyx/backend` directory.
To run the mypy checks manually, run `uv run mypy .` from the `onyx/backend` directory.
### Web

View File

@@ -7,8 +7,12 @@ Onyx migrations use a generic single-database configuration with an async dbapi.
## To generate new migrations:
run from onyx/backend:
`alembic revision --autogenerate -m <DESCRIPTION_OF_MIGRATION>`
From onyx/backend, run:
`alembic revision -m <DESCRIPTION_OF_MIGRATION>`
Note: you cannot use the `--autogenerate` flag as the automatic schema parsing does not work.
Manually populate the upgrade and downgrade in your new migration.
More info can be found here: https://alembic.sqlalchemy.org/en/latest/autogenerate.html

View File

@@ -0,0 +1,29 @@
"""add is_clarification to chat_message
Revision ID: 18b5b2524446
Revises: 87c52ec39f84
Create Date: 2025-01-16
"""
from alembic import op
import sqlalchemy as sa
# revision identifiers, used by Alembic.
revision = "18b5b2524446"
down_revision = "87c52ec39f84"
branch_labels = None
depends_on = None
def upgrade() -> None:
op.add_column(
"chat_message",
sa.Column(
"is_clarification", sa.Boolean(), nullable=False, server_default="false"
),
)
def downgrade() -> None:
op.drop_column("chat_message", "is_clarification")

View File

@@ -0,0 +1,27 @@
"""Add display_name to model_configuration
Revision ID: 7bd55f264e1b
Revises: e8f0d2a38171
Create Date: 2025-12-04
"""
from alembic import op
import sqlalchemy as sa
# revision identifiers, used by Alembic.
revision = "7bd55f264e1b"
down_revision = "e8f0d2a38171"
branch_labels = None
depends_on = None
def upgrade() -> None:
op.add_column(
"model_configuration",
sa.Column("display_name", sa.String(), nullable=True),
)
def downgrade() -> None:
op.drop_column("model_configuration", "display_name")

View File

@@ -0,0 +1,55 @@
"""update_default_system_prompt
Revision ID: 87c52ec39f84
Revises: 7bd55f264e1b
Create Date: 2025-12-05 15:54:06.002452
"""
from alembic import op
import sqlalchemy as sa
# revision identifiers, used by Alembic.
revision = "87c52ec39f84"
down_revision = "7bd55f264e1b"
branch_labels = None
depends_on = None
DEFAULT_PERSONA_ID = 0
# ruff: noqa: E501, W605 start
DEFAULT_SYSTEM_PROMPT = """
You are a highly capable, thoughtful, and precise assistant. Your goal is to deeply understand the user's intent, ask clarifying questions when needed, think step-by-step through complex problems, provide clear and accurate answers, and proactively anticipate helpful follow-up information. Always prioritize being truthful, nuanced, insightful, and efficient.
The current date is [[CURRENT_DATETIME]].[[CITATION_GUIDANCE]]
# Response Style
You use different text styles, bolding, emojis (sparingly), block quotes, and other formatting to make your responses more readable and engaging.
You use proper Markdown and LaTeX to format your responses for math, scientific, and chemical formulas, symbols, etc.: '$$\\n[expression]\\n$$' for standalone cases and '\\( [expression] \\)' when inline.
For code you prefer to use Markdown and specify the language.
You can use horizontal rules (---) to separate sections of your responses.
You can use Markdown tables to format your responses for data, lists, and other structured information.
""".lstrip()
# ruff: noqa: E501, W605 end
def upgrade() -> None:
conn = op.get_bind()
conn.execute(
sa.text(
"""
UPDATE persona
SET system_prompt = :system_prompt
WHERE id = :persona_id
"""
),
{"system_prompt": DEFAULT_SYSTEM_PROMPT, "persona_id": DEFAULT_PERSONA_ID},
)
def downgrade() -> None:
# We don't revert the system prompt on downgrade since we don't know
# what the previous value was. The new prompt is a reasonable default.
pass

View File

@@ -0,0 +1,62 @@
"""update_default_tool_descriptions
Revision ID: a01bf2971c5d
Revises: 87c52ec39f84
Create Date: 2025-12-16 15:21:25.656375
"""
from alembic import op
import sqlalchemy as sa
# revision identifiers, used by Alembic.
revision = "a01bf2971c5d"
down_revision = "18b5b2524446"
branch_labels = None
depends_on = None
# new tool descriptions (12/2025)
TOOL_DESCRIPTIONS = {
"SearchTool": "The Search Action allows the agent to search through connected knowledge to help build an answer.",
"ImageGenerationTool": (
"The Image Generation Action allows the agent to use DALL-E 3 or GPT-IMAGE-1 to generate images. "
"The action will be used when the user asks the agent to generate an image."
),
"WebSearchTool": (
"The Web Search Action allows the agent "
"to perform internet searches for up-to-date information."
),
"KnowledgeGraphTool": (
"The Knowledge Graph Search Action allows the agent to search the "
"Knowledge Graph for information. This tool can (for now) only be active in the KG Beta Agent, "
"and it requires the Knowledge Graph to be enabled."
),
"OktaProfileTool": (
"The Okta Profile Action allows the agent to fetch the current user's information from Okta. "
"This may include the user's name, email, phone number, address, and other details such as their "
"manager and direct reports."
),
}
def upgrade() -> None:
conn = op.get_bind()
conn.execute(sa.text("BEGIN"))
try:
for tool_id, description in TOOL_DESCRIPTIONS.items():
conn.execute(
sa.text(
"UPDATE tool SET description = :description WHERE in_code_tool_id = :tool_id"
),
{"description": description, "tool_id": tool_id},
)
conn.execute(sa.text("COMMIT"))
except Exception as e:
conn.execute(sa.text("ROLLBACK"))
raise e
def downgrade() -> None:
pass

View File

@@ -8,6 +8,7 @@ from sqlalchemy import func
from sqlalchemy import Select
from sqlalchemy import select
from sqlalchemy import update
from sqlalchemy.dialects.postgresql import insert
from sqlalchemy.orm import Session
from ee.onyx.server.user_group.models import SetCuratorRequest
@@ -362,14 +363,29 @@ def _check_user_group_is_modifiable(user_group: UserGroup) -> None:
def _add_user__user_group_relationships__no_commit(
db_session: Session, user_group_id: int, user_ids: list[UUID]
) -> list[User__UserGroup]:
"""NOTE: does not commit the transaction."""
relationships = [
User__UserGroup(user_id=user_id, user_group_id=user_group_id)
for user_id in user_ids
]
db_session.add_all(relationships)
return relationships
) -> None:
"""NOTE: does not commit the transaction.
This function is idempotent - it will skip users who are already in the group
to avoid duplicate key violations during concurrent operations or re-syncs.
Uses ON CONFLICT DO NOTHING to keep inserts atomic under concurrency.
"""
if not user_ids:
return
insert_stmt = (
insert(User__UserGroup)
.values(
[
{"user_id": user_id, "user_group_id": user_group_id}
for user_id in user_ids
]
)
.on_conflict_do_nothing(
index_elements=[User__UserGroup.user_group_id, User__UserGroup.user_id]
)
)
db_session.execute(insert_stmt)
def _add_user_group__cc_pair_relationships__no_commit(

View File

@@ -8,12 +8,10 @@ from ee.onyx.server.query_and_chat.models import (
BasicCreateChatMessageWithHistoryRequest,
)
from onyx.auth.users import current_user
from onyx.chat.chat_utils import combine_message_thread
from onyx.chat.chat_utils import create_chat_history_chain
from onyx.chat.models import ChatBasicResponse
from onyx.chat.process_message import gather_stream
from onyx.chat.process_message import stream_chat_message_objects
from onyx.configs.chat_configs import CHAT_TARGET_CHUNK_PERCENTAGE
from onyx.configs.constants import MessageType
from onyx.context.search.models import OptionalSearchSetting
from onyx.context.search.models import RetrievalDetails
@@ -24,7 +22,6 @@ from onyx.db.engine.sql_engine import get_session
from onyx.db.models import User
from onyx.llm.factory import get_llms_for_persona
from onyx.natural_language_processing.utils import get_tokenizer
from onyx.secondary_llm_flows.query_expansion import thread_based_query_rephrase
from onyx.server.query_and_chat.models import CreateChatMessageRequest
from onyx.utils.logger import setup_logger
@@ -168,8 +165,6 @@ def handle_send_message_simple_with_history(
provider_type=llm.config.model_provider,
)
max_history_tokens = int(llm.config.max_input_tokens * CHAT_TARGET_CHUNK_PERCENTAGE)
# Every chat Session begins with an empty root message
root_message = get_or_create_root_message(
chat_session_id=chat_session.id, db_session=db_session
@@ -188,17 +183,6 @@ def handle_send_message_simple_with_history(
)
db_session.commit()
history_str = combine_message_thread(
messages=msg_history,
max_tokens=max_history_tokens,
llm_tokenizer=llm_tokenizer,
)
rephrased_query = req.query_override or thread_based_query_rephrase(
user_query=query,
history_str=history_str,
)
if req.retrieval_options is None and req.search_doc_ids is None:
retrieval_options: RetrievalDetails | None = RetrievalDetails(
run_search=OptionalSearchSetting.ALWAYS,
@@ -216,7 +200,7 @@ def handle_send_message_simple_with_history(
retrieval_options=retrieval_options,
# Simple API does not support reranking, hide complexity from user
rerank_settings=None,
query_override=rephrased_query,
query_override=None,
chunks_above=0,
chunks_below=0,
full_doc=req.full_doc,

View File

@@ -56,6 +56,7 @@ from httpx_oauth.oauth2 import OAuth2Token
from pydantic import BaseModel
from sqlalchemy import nulls_last
from sqlalchemy import select
from sqlalchemy.exc import IntegrityError
from sqlalchemy.ext.asyncio import AsyncSession
from onyx.auth.api_key import get_hashed_api_key_from_request
@@ -218,7 +219,7 @@ def verify_email_is_invited(email: str) -> None:
raise PermissionError("Email must be specified")
try:
email_info = validate_email(email)
email_info = validate_email(email, check_deliverability=False)
except EmailUndeliverableError:
raise PermissionError("Email is not valid")
@@ -226,7 +227,9 @@ def verify_email_is_invited(email: str) -> None:
try:
# normalized emails are now being inserted into the db
# we can remove this normalization on read after some time has passed
email_info_whitelist = validate_email(email_whitelist)
email_info_whitelist = validate_email(
email_whitelist, check_deliverability=False
)
except EmailNotValidError:
continue
@@ -339,6 +342,39 @@ class UserManager(UUIDIDMixin, BaseUserManager[User, uuid.UUID]):
user_create, safe=safe, request=request
) # type: ignore
user_created = True
except IntegrityError as error:
# Race condition: another request created the same user after the
# pre-insert existence check but before our commit.
await self.user_db.session.rollback()
logger.warning(
"IntegrityError while creating user %s, assuming duplicate: %s",
user_create.email,
str(error),
)
try:
user = await self.get_by_email(user_create.email)
except exceptions.UserNotExists:
# Unexpected integrity error, surface it for handling upstream.
raise error
if MULTI_TENANT:
user_by_session = await db_session.get(User, user.id)
if user_by_session:
user = user_by_session
if (
user.role.is_web_login()
or not isinstance(user_create, UserCreate)
or not user_create.role.is_web_login()
):
raise exceptions.UserAlreadyExists()
user_update = UserUpdateWithRole(
password=user_create.password,
is_verified=user_create.is_verified,
role=user_create.role,
)
user = await self.update(user_update, user)
except exceptions.UserAlreadyExists:
user = await self.get_by_email(user_create.email)

View File

@@ -816,10 +816,14 @@ def check_for_indexing(self: Task, *, tenant_id: str) -> int | None:
secondary_cc_pair_ids: list[int] = []
secondary_search_settings = get_secondary_search_settings(db_session)
if secondary_search_settings:
# Include paused CC pairs during embedding swap
# For ACTIVE_ONLY, we skip paused connectors
include_paused = (
secondary_search_settings.switchover_type
!= SwitchoverType.ACTIVE_ONLY
)
standard_cc_pair_ids = (
fetch_indexable_standard_connector_credential_pair_ids(
db_session, active_cc_pairs_only=False
db_session, active_cc_pairs_only=not include_paused
)
)
user_file_cc_pair_ids = (

View File

@@ -105,52 +105,49 @@ S, U1, TC, TR, R -- agent calls another tool -> S, U1, TC, TR, TC, TR, R, A1
- Reminder moved to the end
```
## Product considerations
Project files are important to the entire duration of the chat session. If the user has uploaded project files, they are likely very intent on working with
those files. The LLM is much better at referencing documents close to the end of the context window so keeping it there for ease of access.
User uploaded files are considered relevant for that point in time, it is ok if the Agent forgets about it as the chat gets long. If every uploaded file is
constantly moved towards the end of the chat, it would degrade quality as these stack up. Even with a single file, there is some cost of making the previous
User Message further away. This tradeoff is accepted for Projects because of the intent of the feature.
Reminder are absolutely necessary to ensure 1-2 specific instructions get followed with a very high probability. It is less detailed than the system prompt
and should be very targetted for it to work reliably and also not interfere with the last user message.
## Reasons / Experiments
Custom Agent instructions being placed in the system prompt is poorly followed. It also degrade performance of the system especially when the instructions
are orthogonal (or even possibly contradictory) to the system prompt. For weaker models, it causes strange artifacts in tool calls and final responses
that completely ruins the user experience. Empirically, this way works better across a range of models especially when the history gets longer.
Having the Custom Agent instructions not move means it fades more as the chat gets long which is also not ok from a UX perspective.
Project files are important to the entire duration of the chat session. If the user has uploaded project files, they are likely very intent on working with
those files. The LLM is much better at referencing documents close to the end of the context window so keeping it there for ease of access.
Reminder are absolutely necessary to ensure 1-2 specific instructions get followed with a very high probability. It is less detailed than the system prompt
and should be very targetted for it to work reliably.
User uploaded files are considered relevant for that point in time, it is ok if the Agent forgets about it as the chat gets long. If every uploaded file is
constantly moved towards the end of the chat, it would degrade quality as these stack up. Even with a single file, there is some cost of making the previous
User Message further away. This tradeoff is accepted for Projects because of the intent of the feature.
## Other related pointers
- How messages, files, images are stored can be found in db/models.py
# Appendix (just random tidbits for those interested)
- Reminder messages are placed at the end of the prompt because all model fine tuning approaches cause the LLMs to attend very strongly to the tokens at the very
back of the context closest to generation. This is the only way to get the LLMs to not miss critical information and for the product to be reliable. Specifically
the built-in reminders are around citations and what tools it should call in certain situations.
- LLMs are able to handle changes in topic best at message boundaries. There are special tokens under the hood for this. We also use this property to slice up
the history in the way presented above.
- Different LLMs vary in this but some now have a section that cannot be set via the API layer called the "System Prompt" (OpenAI terminology) which contains
Different LLMs vary in this but some now have a section that cannot be set via the API layer called the "System Prompt" (OpenAI terminology) which contains
information like the model cutoff date, identity, and some other basic non-changing information. The System prompt described above is in that convention called
the "Developer Prompt". It seems the distribution of the System Prompt, by which I mean the style of wording and terms used can also affect the behavior. This
is different between different models and not necessarily scientific so the system prompt is built from an exploration across different models. It currently
starts with: "You are a highly capable, thoughtful, and precise assistant. Your goal is to deeply understand the user's intent..."
- The document json includes a field for the LLM to cite (it's a single number) to make citations reliable and avoid weird artifacts. It's called "document" so
LLMs are able to handle changes in topic best at message boundaries. There are special tokens under the hood for this. We also use this property to slice up
the history in the way presented above.
Reminder messages are placed at the end of the prompt because all model fine tuning approaches cause the LLMs to attend very strongly to the tokens at the very
back of the context closest to generation. This is the only way to get the LLMs to not miss critical information and for the product to be reliable. Specifically
the built-in reminders are around citations and what tools it should call in certain situations.
The document json includes a field for the LLM to cite (it's a single number) to make citations reliable and avoid weird artifacts. It's called "document" so
that the LLM does not create weird artifacts in reasoning like "I should reference citation_id: 5 for...". It is also strategically placed so that it is easy to
reference. It is followed by a couple short sections like the metadata and title before the long content section. It seems LLMs are still better at local
attention despite having global access.
- In a similar concept, LLM instructions in the system prompt are structured specifically so that there are coherent sections for the LLM to attend to. This is
In a similar concept, LLM instructions in the system prompt are structured specifically so that there are coherent sections for the LLM to attend to. This is
fairly surprising actually but if there is a line of instructions effectively saying "If you try to use some tools and find that you need more information or
need to call additional tools, you are encouraged to do this", having this in the Tool section of the System prompt makes all the LLMs follow it well but if it's
even just a paragraph away like near the beginning of the prompt, it is often often ignored. The difference is as drastic as a 30% follow rate to a 90% follow
rate even just moving the same statement a few sentences.
- Custom Agent prompts are also completely separate from the system prompt. Having potentially orthogonal instructions in the system prompt (both the actual
instructions and the writing style) can greatly deteriorate the quality of the responses. There is also a product motivation to keep it close to the end of
generation so it's strongly followed.
## Other related pointers
- How messages, files, images are stored can be found in backend/onyx/db/models.py, there is also a README.md under that directory that may be helpful.

View File

@@ -26,6 +26,8 @@ class ChatStateContainer:
self.answer_tokens: str | None = None
# Store citation mapping for building citation_docs_info during partial saves
self.citation_to_doc: dict[int, SearchDoc] = {}
# True if this turn is a clarification question (deep research flow)
self.is_clarification: bool = False
def add_tool_call(self, tool_call: ToolCallInfo) -> None:
"""Add a tool call to the accumulated state."""
@@ -43,12 +45,16 @@ class ChatStateContainer:
"""Set the citation mapping from citation processor."""
self.citation_to_doc = citation_to_doc
def set_is_clarification(self, is_clarification: bool) -> None:
"""Set whether this turn is a clarification question."""
self.is_clarification = is_clarification
def run_chat_llm_with_state_containers(
func: Callable[..., None],
is_connected: Callable[[], bool],
emitter: Emitter,
state_container: ChatStateContainer,
is_connected: Callable[[], bool],
*args: Any,
**kwargs: Any,
) -> Generator[Packet, None]:

View File

@@ -477,7 +477,10 @@ def load_chat_file(
# Extract text content if it's a text file type (not an image)
content_text = None
file_type = file_descriptor["type"]
# `FileDescriptor` is often JSON-roundtripped (e.g. JSONB / API), so `type`
# may arrive as a raw string value instead of a `ChatFileType`.
file_type = ChatFileType(file_descriptor["type"])
if file_type.is_text_file():
try:
content_text = content.decode("utf-8")
@@ -708,3 +711,21 @@ def get_custom_agent_prompt(persona: Persona, chat_session: ChatSession) -> str
return chat_session.project.instructions
else:
return None
def is_last_assistant_message_clarification(chat_history: list[ChatMessage]) -> bool:
"""Check if the last assistant message in chat history was a clarification question.
This is used in the deep research flow to determine whether to skip the
clarification step when the user has already responded to a clarification.
Args:
chat_history: List of ChatMessage objects in chronological order
Returns:
True if the last assistant message has is_clarification=True, False otherwise
"""
for message in reversed(chat_history):
if message.message_type == MessageType.ASSISTANT:
return message.is_clarification
return False

View File

@@ -1,8 +1,5 @@
import json
from collections.abc import Callable
from collections.abc import Mapping
from collections.abc import Sequence
from typing import Any
from typing import cast
from sqlalchemy.orm import Session
@@ -10,6 +7,9 @@ from sqlalchemy.orm import Session
from onyx.chat.chat_state import ChatStateContainer
from onyx.chat.citation_processor import DynamicCitationProcessor
from onyx.chat.emitter import Emitter
from onyx.chat.llm_step import run_llm_step
from onyx.chat.llm_step import TOOL_CALL_MSG_ARGUMENTS
from onyx.chat.llm_step import TOOL_CALL_MSG_FUNC_NAME
from onyx.chat.models import ChatMessageSimple
from onyx.chat.models import ExtractedProjectFiles
from onyx.chat.models import LlmStepResult
@@ -19,38 +19,20 @@ from onyx.chat.prompt_utils import build_system_prompt
from onyx.chat.prompt_utils import (
get_default_base_system_prompt,
)
from onyx.configs.app_configs import LOG_ONYX_MODEL_INTERACTIONS
from onyx.configs.constants import DocumentSource
from onyx.configs.constants import MessageType
from onyx.context.search.models import SearchDoc
from onyx.context.search.models import SearchDocsResponse
from onyx.db.models import Persona
from onyx.file_store.models import ChatFileType
from onyx.llm.interfaces import LanguageModelInput
from onyx.llm.interfaces import LLM
from onyx.llm.interfaces import LLMUserIdentity
from onyx.llm.interfaces import ToolChoiceOptions
from onyx.llm.message_types import AssistantMessage
from onyx.llm.message_types import ChatCompletionMessage
from onyx.llm.message_types import ImageContentPart
from onyx.llm.message_types import SystemMessage
from onyx.llm.message_types import TextContentPart
from onyx.llm.message_types import ToolCall
from onyx.llm.message_types import ToolMessage
from onyx.llm.message_types import UserMessageWithParts
from onyx.llm.message_types import UserMessageWithText
from onyx.llm.utils import model_needs_formatting_reenabled
from onyx.prompts.chat_prompts import IMAGE_GEN_REMINDER
from onyx.prompts.chat_prompts import OPEN_URL_REMINDER
from onyx.server.query_and_chat.streaming_models import AgentResponseDelta
from onyx.server.query_and_chat.streaming_models import AgentResponseStart
from onyx.server.query_and_chat.streaming_models import CitationInfo
from onyx.server.query_and_chat.streaming_models import OverallStop
from onyx.server.query_and_chat.streaming_models import Packet
from onyx.server.query_and_chat.streaming_models import ReasoningDelta
from onyx.server.query_and_chat.streaming_models import ReasoningDone
from onyx.server.query_and_chat.streaming_models import ReasoningStart
from onyx.tools.models import ToolCallInfo
from onyx.tools.models import ToolCallKickoff
from onyx.tools.models import ToolResponse
from onyx.tools.tool import Tool
from onyx.tools.tool_implementations.images.image_generation_tool import (
@@ -63,9 +45,7 @@ from onyx.tools.tool_implementations.open_url.open_url_tool import OpenURLTool
from onyx.tools.tool_implementations.search.search_tool import SearchTool
from onyx.tools.tool_implementations.web_search.web_search_tool import WebSearchTool
from onyx.tools.tool_runner import run_tool_calls
from onyx.tracing.framework.create import generation_span
from onyx.tracing.framework.create import trace
from onyx.utils.b64 import get_image_type_from_bytes
from onyx.utils.logger import setup_logger
from shared_configs.contextvars import get_current_tenant_id
@@ -80,9 +60,6 @@ logger = setup_logger()
# Cycle 6: No more tools available, forced to answer
MAX_LLM_CYCLES = 6
TOOL_CALL_MSG_FUNC_NAME = "function_name"
TOOL_CALL_MSG_ARGUMENTS = "arguments"
def _build_project_file_citation_mapping(
project_file_metadata: list[ProjectFileMetadata],
@@ -127,15 +104,23 @@ def construct_message_history(
custom_agent_prompt: ChatMessageSimple | None,
simple_chat_history: list[ChatMessageSimple],
reminder_message: ChatMessageSimple | None,
project_files: ExtractedProjectFiles,
project_files: ExtractedProjectFiles | None,
available_tokens: int,
last_n_user_messages: int | None = None,
) -> list[ChatMessageSimple]:
if last_n_user_messages is not None:
if last_n_user_messages <= 0:
raise ValueError(
"filtering chat history by last N user messages must be a value greater than 0"
)
history_token_budget = available_tokens
history_token_budget -= system_prompt.token_count
history_token_budget -= (
custom_agent_prompt.token_count if custom_agent_prompt else 0
)
history_token_budget -= project_files.total_token_count
if project_files:
history_token_budget -= project_files.total_token_count
history_token_budget -= reminder_message.token_count if reminder_message else 0
if history_token_budget < 0:
@@ -146,7 +131,7 @@ def construct_message_history(
result = [system_prompt]
if custom_agent_prompt:
result.append(custom_agent_prompt)
if project_files.project_file_texts:
if project_files and project_files.project_file_texts:
project_message = _create_project_files_message(
project_files, token_counter=None
)
@@ -155,6 +140,26 @@ def construct_message_history(
result.append(reminder_message)
return result
# If last_n_user_messages is set, filter history to only include the last n user messages
if last_n_user_messages is not None:
# Find all user message indices
user_msg_indices = [
i
for i, msg in enumerate(simple_chat_history)
if msg.message_type == MessageType.USER
]
if not user_msg_indices:
raise ValueError("No user message found in simple_chat_history")
# If we have more than n user messages, keep only the last n
if len(user_msg_indices) > last_n_user_messages:
# Find the index of the n-th user message from the end
# For example, if last_n_user_messages=2, we want the 2nd-to-last user message
nth_user_msg_index = user_msg_indices[-(last_n_user_messages)]
# Keep everything from that user message onwards
simple_chat_history = simple_chat_history[nth_user_msg_index:]
# Find the last USER message in the history
# The history may contain tool calls and responses after the last user message
last_user_msg_index = None
@@ -202,7 +207,7 @@ def construct_message_history(
break
# Attach project images to the last user message
if project_files.project_image_files:
if project_files and project_files.project_image_files:
existing_images = last_user_message.image_files or []
last_user_message = ChatMessageSimple(
message=last_user_message.message,
@@ -224,7 +229,7 @@ def construct_message_history(
result.append(custom_agent_prompt)
# 3. Add project files message (inserted before last user message)
if project_files.project_file_texts:
if project_files and project_files.project_file_texts:
project_message = _create_project_files_message(
project_files, token_counter=None
)
@@ -274,509 +279,6 @@ def _create_project_files_message(
)
def translate_history_to_llm_format(
history: list[ChatMessageSimple],
) -> LanguageModelInput:
"""Convert a list of ChatMessageSimple to LanguageModelInput format.
Converts ChatMessageSimple messages to ChatCompletionMessage format,
handling different message types and image files for multimodal support.
"""
messages: list[ChatCompletionMessage] = []
for msg in history:
if msg.message_type == MessageType.SYSTEM:
system_msg: SystemMessage = {
"role": "system",
"content": msg.message,
}
messages.append(system_msg)
elif msg.message_type == MessageType.USER:
# Handle user messages with potential images
if msg.image_files:
# Build content parts: text + images
content_parts: list[TextContentPart | ImageContentPart] = [
{"type": "text", "text": msg.message}
]
# Add image parts
for img_file in msg.image_files:
if img_file.file_type == ChatFileType.IMAGE:
try:
image_type = get_image_type_from_bytes(img_file.content)
base64_data = img_file.to_base64()
image_url = f"data:{image_type};base64,{base64_data}"
image_part: ImageContentPart = {
"type": "image_url",
"image_url": {"url": image_url},
}
content_parts.append(image_part)
except Exception as e:
logger.warning(
f"Failed to process image file {img_file.file_id}: {e}. "
"Skipping image."
)
user_msg_with_parts: UserMessageWithParts = {
"role": "user",
"content": content_parts,
}
messages.append(user_msg_with_parts)
else:
# Simple text-only user message
user_msg_text: UserMessageWithText = {
"role": "user",
"content": msg.message,
}
messages.append(user_msg_text)
elif msg.message_type == MessageType.ASSISTANT:
assistant_msg: AssistantMessage = {
"role": "assistant",
"content": msg.message or None,
}
messages.append(assistant_msg)
elif msg.message_type == MessageType.TOOL_CALL:
# Tool calls are represented as Assistant Messages with tool_calls field
# Try to reconstruct tool call structure if we have tool_call_id
tool_calls: list[ToolCall] = []
if msg.tool_call_id:
try:
# Parse the message content (which should contain function_name and arguments)
tool_call_data = json.loads(msg.message) if msg.message else {}
if (
isinstance(tool_call_data, dict)
and TOOL_CALL_MSG_FUNC_NAME in tool_call_data
):
function_name = tool_call_data.get(
TOOL_CALL_MSG_FUNC_NAME, "unknown"
)
tool_args = tool_call_data.get(TOOL_CALL_MSG_ARGUMENTS, {})
else:
function_name = "unknown"
tool_args = (
tool_call_data if isinstance(tool_call_data, dict) else {}
)
# NOTE: if the model is trained on a different tool call format, this may slightly interfere
# with the future tool calls, if it doesn't look like this. Almost certainly not a big deal.
tool_call: ToolCall = {
"id": msg.tool_call_id,
"type": "function",
"function": {
"name": function_name,
"arguments": json.dumps(tool_args) if tool_args else "{}",
},
}
tool_calls.append(tool_call)
except (json.JSONDecodeError, ValueError) as e:
logger.warning(
f"Failed to parse tool call data for tool_call_id {msg.tool_call_id}: {e}. "
"Including as content-only message."
)
assistant_msg_with_tool: AssistantMessage = {
"role": "assistant",
"content": None, # The tool call is parsed, doesn't need to be duplicated in the content
}
if tool_calls:
assistant_msg_with_tool["tool_calls"] = tool_calls
messages.append(assistant_msg_with_tool)
elif msg.message_type == MessageType.TOOL_CALL_RESPONSE:
if not msg.tool_call_id:
raise ValueError(
f"Tool call response message encountered but tool_call_id is not available. Message: {msg}"
)
tool_msg: ToolMessage = {
"role": "tool",
"content": msg.message,
"tool_call_id": msg.tool_call_id,
}
messages.append(tool_msg)
else:
logger.warning(
f"Unknown message type {msg.message_type} in history. Skipping message."
)
return messages
def _format_message_history_for_logging(
message_history: LanguageModelInput,
) -> str:
"""Format message history for logging, with special handling for tool calls.
Tool calls are formatted as JSON with 4-space indentation for readability.
"""
formatted_lines = []
separator = "================================================"
# Handle string input
if isinstance(message_history, str):
formatted_lines.append("Message [string]:")
formatted_lines.append(separator)
formatted_lines.append(f"{message_history}")
return "\n".join(formatted_lines)
# Handle sequence of messages
for i, msg in enumerate(message_history):
# Type guard: ensure msg is a dict-like object (TypedDict)
if not isinstance(msg, dict):
formatted_lines.append(f"Message {i + 1} [unknown]:")
formatted_lines.append(separator)
formatted_lines.append(f"{msg}")
if i < len(message_history) - 1:
formatted_lines.append(separator)
continue
role = msg.get("role", "unknown")
formatted_lines.append(f"Message {i + 1} [{role}]:")
formatted_lines.append(separator)
if role == "system":
content = msg.get("content", "")
if isinstance(content, str):
formatted_lines.append(f"{content}")
elif role == "user":
content = msg.get("content", "")
if isinstance(content, str):
formatted_lines.append(f"{content}")
elif isinstance(content, list):
# Handle multimodal content (text + images)
for part in content:
if isinstance(part, dict):
part_type = part.get("type")
if part_type == "text":
text = part.get("text", "")
if isinstance(text, str):
formatted_lines.append(f"{text}")
elif part_type == "image_url":
image_url_dict = part.get("image_url")
if isinstance(image_url_dict, dict):
url = image_url_dict.get("url", "")
if isinstance(url, str):
formatted_lines.append(f"[Image: {url[:50]}...]")
elif role == "assistant":
content = msg.get("content")
if content and isinstance(content, str):
formatted_lines.append(f"{content}")
tool_calls = msg.get("tool_calls")
if tool_calls and isinstance(tool_calls, list):
formatted_lines.append("Tool calls:")
for tool_call in tool_calls:
if isinstance(tool_call, dict):
tool_call_dict: dict[str, Any] = {}
tool_call_id = tool_call.get("id")
tool_call_type = tool_call.get("type")
function_dict = tool_call.get("function")
if tool_call_id:
tool_call_dict["id"] = tool_call_id
if tool_call_type:
tool_call_dict["type"] = tool_call_type
if isinstance(function_dict, dict):
tool_call_dict["function"] = {
"name": function_dict.get("name", ""),
"arguments": function_dict.get("arguments", ""),
}
tool_call_json = json.dumps(tool_call_dict, indent=4)
formatted_lines.append(tool_call_json)
elif role == "tool":
content = msg.get("content", "")
tool_call_id = msg.get("tool_call_id", "")
if isinstance(content, str) and isinstance(tool_call_id, str):
formatted_lines.append(f"Tool call ID: {tool_call_id}")
formatted_lines.append(f"Response: {content}")
# Add separator before next message (or at end)
if i < len(message_history) - 1:
formatted_lines.append(separator)
return "\n".join(formatted_lines)
def run_llm_step(
history: list[ChatMessageSimple],
tool_definitions: list[dict],
tool_choice: ToolChoiceOptions,
emitter: Emitter,
llm: LLM,
turn_index: int,
citation_processor: DynamicCitationProcessor,
state_container: ChatStateContainer,
final_documents: list[SearchDoc] | None = None,
) -> tuple[LlmStepResult, int]:
# The second return value is for the turn index because reasoning counts on the frontend as a turn
# TODO this is maybe ok but does not align well with the backend logic too well
llm_msg_history = translate_history_to_llm_format(history)
# Uncomment the line below to log the entire message history to the console
if LOG_ONYX_MODEL_INTERACTIONS:
logger.info(
f"Message history:\n{_format_message_history_for_logging(llm_msg_history)}"
)
id_to_tool_call_map: dict[int, dict[str, Any]] = {}
reasoning_start = False
answer_start = False
accumulated_reasoning = ""
accumulated_answer = ""
with generation_span(
model=llm.config.model_name,
model_config={
"base_url": str(llm.config.api_base or ""),
"model_impl": "litellm",
},
) as span_generation:
span_generation.span_data.input = cast(
Sequence[Mapping[str, Any]], llm_msg_history
)
for packet in llm.stream(
prompt=llm_msg_history,
tools=tool_definitions,
tool_choice=tool_choice,
structured_response_format=None, # TODO
):
if packet.usage:
usage = packet.usage
span_generation.span_data.usage = {
"input_tokens": usage.prompt_tokens,
"output_tokens": usage.completion_tokens,
"cache_read_input_tokens": usage.cache_read_input_tokens,
"cache_creation_input_tokens": usage.cache_creation_input_tokens,
}
delta = packet.choice.delta
# Should only happen once, frontend does not expect multiple
# ReasoningStart or ReasoningDone packets.
if delta.reasoning_content:
accumulated_reasoning += delta.reasoning_content
# Save reasoning incrementally to state container
state_container.set_reasoning_tokens(accumulated_reasoning)
if not reasoning_start:
emitter.emit(
Packet(
turn_index=turn_index,
obj=ReasoningStart(),
)
)
emitter.emit(
Packet(
turn_index=turn_index,
obj=ReasoningDelta(reasoning=delta.reasoning_content),
)
)
reasoning_start = True
if delta.content:
if reasoning_start:
emitter.emit(
Packet(
turn_index=turn_index,
obj=ReasoningDone(),
)
)
turn_index += 1
reasoning_start = False
if not answer_start:
emitter.emit(
Packet(
turn_index=turn_index,
obj=AgentResponseStart(
final_documents=final_documents,
),
)
)
answer_start = True
for result in citation_processor.process_token(delta.content):
if isinstance(result, str):
accumulated_answer += result
# Save answer incrementally to state container
state_container.set_answer_tokens(accumulated_answer)
emitter.emit(
Packet(
turn_index=turn_index,
obj=AgentResponseDelta(content=result),
)
)
elif isinstance(result, CitationInfo):
emitter.emit(
Packet(
turn_index=turn_index,
obj=result,
)
)
if delta.tool_calls:
if reasoning_start:
emitter.emit(
Packet(
turn_index=turn_index,
obj=ReasoningDone(),
)
)
turn_index += 1
reasoning_start = False
for tool_call_delta in delta.tool_calls:
_update_tool_call_with_delta(id_to_tool_call_map, tool_call_delta)
tool_calls = _extract_tool_call_kickoffs(id_to_tool_call_map)
if tool_calls:
tool_calls_list: list[ToolCall] = [
{
"id": kickoff.tool_call_id,
"type": "function",
"function": {
"name": kickoff.tool_name,
"arguments": json.dumps(kickoff.tool_args),
},
}
for kickoff in tool_calls
]
assistant_msg: AssistantMessage = {
"role": "assistant",
"content": accumulated_answer if accumulated_answer else None,
"tool_calls": tool_calls_list,
}
span_generation.span_data.output = [assistant_msg]
elif accumulated_answer:
span_generation.span_data.output = [
{"role": "assistant", "content": accumulated_answer}
]
# Close reasoning block if still open (stream ended with reasoning content)
if reasoning_start:
emitter.emit(
Packet(
turn_index=turn_index,
obj=ReasoningDone(),
)
)
turn_index += 1
# Flush any remaining content from citation processor
if citation_processor:
for result in citation_processor.process_token(None):
if isinstance(result, str):
accumulated_answer += result
# Save answer incrementally to state container
state_container.set_answer_tokens(accumulated_answer)
emitter.emit(
Packet(
turn_index=turn_index,
obj=AgentResponseDelta(content=result),
)
)
elif isinstance(result, CitationInfo):
emitter.emit(
Packet(
turn_index=turn_index,
obj=result,
)
)
# Note: Content (AgentResponseDelta) doesn't need an explicit end packet - OverallStop handles it
# Tool calls are handled by tool execution code and emit their own packets (e.g., SectionEnd)
if LOG_ONYX_MODEL_INTERACTIONS:
logger.debug(f"Accumulated reasoning: {accumulated_reasoning}")
logger.debug(f"Accumulated answer: {accumulated_answer}")
if tool_calls:
tool_calls_str = "\n".join(
f" - {tc.tool_name}: {json.dumps(tc.tool_args, indent=4)}"
for tc in tool_calls
)
logger.debug(f"Tool calls:\n{tool_calls_str}")
else:
logger.debug("Tool calls: []")
return (
LlmStepResult(
reasoning=accumulated_reasoning if accumulated_reasoning else None,
answer=accumulated_answer if accumulated_answer else None,
tool_calls=tool_calls if tool_calls else None,
),
turn_index,
)
def _update_tool_call_with_delta(
tool_calls_in_progress: dict[int, dict[str, Any]],
tool_call_delta: Any,
) -> None:
index = tool_call_delta.index
if index not in tool_calls_in_progress:
tool_calls_in_progress[index] = {
"id": None,
"name": None,
"arguments": "",
}
if tool_call_delta.id:
tool_calls_in_progress[index]["id"] = tool_call_delta.id
if tool_call_delta.function:
if tool_call_delta.function.name:
tool_calls_in_progress[index]["name"] = tool_call_delta.function.name
if tool_call_delta.function.arguments:
tool_calls_in_progress[index][
"arguments"
] += tool_call_delta.function.arguments
def _extract_tool_call_kickoffs(
id_to_tool_call_map: dict[int, dict[str, Any]],
) -> list[ToolCallKickoff]:
"""Extract ToolCallKickoff objects from the tool call map.
Returns a list of ToolCallKickoff objects for valid tool calls (those with both id and name).
"""
tool_calls: list[ToolCallKickoff] = []
for tool_call_data in id_to_tool_call_map.values():
if tool_call_data.get("id") and tool_call_data.get("name"):
try:
# Parse arguments JSON string to dict
tool_args = (
json.loads(tool_call_data["arguments"])
if tool_call_data["arguments"]
else {}
)
except json.JSONDecodeError:
# If parsing fails, try empty dict, most tools would fail though
logger.error(
f"Failed to parse tool call arguments: {tool_call_data['arguments']}"
)
tool_args = {}
tool_calls.append(
ToolCallKickoff(
tool_call_id=tool_call_data["id"],
tool_name=tool_call_data["name"],
tool_args=tool_args,
)
)
return tool_calls
def run_llm_loop(
emitter: Emitter,
state_container: ChatStateContainer,
@@ -790,6 +292,7 @@ def run_llm_loop(
token_counter: Callable[[str], int],
db_session: Session,
forced_tool_id: int | None = None,
user_identity: LLMUserIdentity | None = None,
) -> None:
with trace("run_llm_loop", metadata={"tenant_id": get_current_tenant_id()}):
# Fix some LiteLLM issues,
@@ -821,7 +324,7 @@ def run_llm_loop(
# Pass the total budget to construct_message_history, which will handle token allocation
available_tokens = llm.config.max_input_tokens
tool_choice: ToolChoiceOptions = "auto"
tool_choice: ToolChoiceOptions = ToolChoiceOptions.AUTO
collected_tool_calls: list[ToolCallInfo] = []
# Initialize gathered_documents with project files if present
gathered_documents: list[SearchDoc] | None = (
@@ -837,6 +340,7 @@ def run_llm_loop(
should_cite_documents: bool = False
ran_image_gen: bool = False
just_ran_web_search: bool = False
has_called_search_tool: bool = False
citation_mapping: dict[int, str] = {} # Maps citation_num -> document_id/URL
current_tool_call_index = (
@@ -850,14 +354,14 @@ def run_llm_loop(
final_tools = [tool for tool in tools if tool.id == forced_tool_id]
if not final_tools:
raise ValueError(f"Tool {forced_tool_id} not found in tools")
tool_choice = "required"
tool_choice = ToolChoiceOptions.REQUIRED
forced_tool_id = None
elif llm_cycle_count == MAX_LLM_CYCLES - 1 or ran_image_gen:
# Last cycle, no tools allowed, just answer!
tool_choice = "none"
tool_choice = ToolChoiceOptions.NONE
final_tools = []
else:
tool_choice = "auto"
tool_choice = ToolChoiceOptions.AUTO
final_tools = tools
# The section below calculates the available tokens for history a bit more accurately
@@ -939,13 +443,12 @@ def run_llm_loop(
available_tokens=available_tokens,
)
# This calls the LLM, passes in the emitter which can collect packets like reasoning, answers, etc.
# This calls the LLM, yields packets (reasoning, answers, etc.) and returns the result
# It also pre-processes the tool calls in preparation for running them
llm_step_result, current_tool_call_index = run_llm_step(
step_generator = run_llm_step(
history=truncated_message_history,
tool_definitions=[tool.tool_definition() for tool in final_tools],
tool_choice=tool_choice,
emitter=emitter,
llm=llm,
turn_index=current_tool_call_index,
citation_processor=citation_processor,
@@ -954,8 +457,21 @@ def run_llm_loop(
# immediately yield the full set of found documents. This gives us the option to show the
# final set of documents immediately if desired.
final_documents=gathered_documents,
user_identity=user_identity,
)
# Consume the generator, emitting packets and capturing the final result
while True:
try:
packet = next(step_generator)
emitter.emit(packet)
except StopIteration as e:
llm_step_result, current_tool_call_index = e.value
break
# Type narrowing: generator always returns a result, so this can't be None
llm_step_result = cast(LlmStepResult, llm_step_result)
# Save citation mapping after each LLM step for incremental state updates
state_container.set_citation_mapping(citation_processor.citation_to_doc)
@@ -976,8 +492,13 @@ def run_llm_loop(
user_info=None, # TODO, this is part of memories right now, might want to separate it out
citation_mapping=citation_mapping,
citation_processor=citation_processor,
skip_search_query_expansion=has_called_search_tool,
)
# Track if search tool was called (for skipping query expansion on subsequent calls)
if tool_call.tool_name == SearchTool.NAME:
has_called_search_tool = True
# Build a mapping of tool names to tool objects for getting tool_id
tools_by_name = {tool.name: tool for tool in final_tools}
@@ -991,9 +512,6 @@ def run_llm_loop(
f"Tool '{tool_call.tool_name}' not found in tools list"
)
# Collect tool call info with reasoning tokens from this LLM step
# All tool calls from the same loop iteration share the same reasoning tokens
# Extract search_docs if this is a search tool response
search_docs = None
if isinstance(tool_response.rich_response, SearchDocsResponse):
@@ -1110,10 +628,6 @@ def run_llm_loop(
if not llm_step_result or not llm_step_result.answer:
raise RuntimeError("LLM did not return an answer.")
# Note: All state (answer, reasoning, citations, tool_calls) is saved incrementally
# in state_container. The process_message layer will persist to DB.
# Signal completion
emitter.emit(
Packet(turn_index=current_tool_call_index, obj=OverallStop(type="stop"))
)

View File

@@ -0,0 +1,518 @@
import json
from collections.abc import Generator
from collections.abc import Mapping
from collections.abc import Sequence
from typing import Any
from typing import cast
from onyx.chat.chat_state import ChatStateContainer
from onyx.chat.citation_processor import DynamicCitationProcessor
from onyx.chat.models import ChatMessageSimple
from onyx.chat.models import LlmStepResult
from onyx.configs.app_configs import LOG_ONYX_MODEL_INTERACTIONS
from onyx.configs.constants import MessageType
from onyx.context.search.models import SearchDoc
from onyx.file_store.models import ChatFileType
from onyx.llm.interfaces import LanguageModelInput
from onyx.llm.interfaces import LLM
from onyx.llm.interfaces import LLMUserIdentity
from onyx.llm.interfaces import ToolChoiceOptions
from onyx.llm.models import AssistantMessage
from onyx.llm.models import ChatCompletionMessage
from onyx.llm.models import FunctionCall
from onyx.llm.models import ImageContentPart
from onyx.llm.models import ImageUrlDetail
from onyx.llm.models import SystemMessage
from onyx.llm.models import TextContentPart
from onyx.llm.models import ToolCall
from onyx.llm.models import ToolMessage
from onyx.llm.models import UserMessage
from onyx.server.query_and_chat.streaming_models import AgentResponseDelta
from onyx.server.query_and_chat.streaming_models import AgentResponseStart
from onyx.server.query_and_chat.streaming_models import CitationInfo
from onyx.server.query_and_chat.streaming_models import Packet
from onyx.server.query_and_chat.streaming_models import ReasoningDelta
from onyx.server.query_and_chat.streaming_models import ReasoningDone
from onyx.server.query_and_chat.streaming_models import ReasoningStart
from onyx.tools.models import ToolCallKickoff
from onyx.tracing.framework.create import generation_span
from onyx.utils.b64 import get_image_type_from_bytes
from onyx.utils.logger import setup_logger
logger = setup_logger()
TOOL_CALL_MSG_FUNC_NAME = "function_name"
TOOL_CALL_MSG_ARGUMENTS = "arguments"
def _format_message_history_for_logging(
message_history: LanguageModelInput,
) -> str:
"""Format message history for logging, with special handling for tool calls.
Tool calls are formatted as JSON with 4-space indentation for readability.
"""
formatted_lines = []
separator = "================================================"
# Handle string input
if isinstance(message_history, str):
formatted_lines.append("Message [string]:")
formatted_lines.append(separator)
formatted_lines.append(f"{message_history}")
return "\n".join(formatted_lines)
# Handle sequence of messages
for i, msg in enumerate(message_history):
if isinstance(msg, SystemMessage):
formatted_lines.append(f"Message {i + 1} [system]:")
formatted_lines.append(separator)
formatted_lines.append(f"{msg.content}")
elif isinstance(msg, UserMessage):
formatted_lines.append(f"Message {i + 1} [user]:")
formatted_lines.append(separator)
if isinstance(msg.content, str):
formatted_lines.append(f"{msg.content}")
elif isinstance(msg.content, list):
# Handle multimodal content (text + images)
for part in msg.content:
if isinstance(part, TextContentPart):
formatted_lines.append(f"{part.text}")
elif isinstance(part, ImageContentPart):
url = part.image_url.url
formatted_lines.append(f"[Image: {url[:50]}...]")
elif isinstance(msg, AssistantMessage):
formatted_lines.append(f"Message {i + 1} [assistant]:")
formatted_lines.append(separator)
if msg.content:
formatted_lines.append(f"{msg.content}")
if msg.tool_calls:
formatted_lines.append("Tool calls:")
for tool_call in msg.tool_calls:
tool_call_dict: dict[str, Any] = {
"id": tool_call.id,
"type": tool_call.type,
"function": {
"name": tool_call.function.name,
"arguments": tool_call.function.arguments,
},
}
tool_call_json = json.dumps(tool_call_dict, indent=4)
formatted_lines.append(tool_call_json)
elif isinstance(msg, ToolMessage):
formatted_lines.append(f"Message {i + 1} [tool]:")
formatted_lines.append(separator)
formatted_lines.append(f"Tool call ID: {msg.tool_call_id}")
formatted_lines.append(f"Response: {msg.content}")
else:
# Fallback for unknown message types
formatted_lines.append(f"Message {i + 1} [unknown]:")
formatted_lines.append(separator)
formatted_lines.append(f"{msg}")
# Add separator before next message (or at end)
if i < len(message_history) - 1:
formatted_lines.append(separator)
return "\n".join(formatted_lines)
def _update_tool_call_with_delta(
tool_calls_in_progress: dict[int, dict[str, Any]],
tool_call_delta: Any,
) -> None:
index = tool_call_delta.index
if index not in tool_calls_in_progress:
tool_calls_in_progress[index] = {
"id": None,
"name": None,
"arguments": "",
}
if tool_call_delta.id:
tool_calls_in_progress[index]["id"] = tool_call_delta.id
if tool_call_delta.function:
if tool_call_delta.function.name:
tool_calls_in_progress[index]["name"] = tool_call_delta.function.name
if tool_call_delta.function.arguments:
tool_calls_in_progress[index][
"arguments"
] += tool_call_delta.function.arguments
def _extract_tool_call_kickoffs(
id_to_tool_call_map: dict[int, dict[str, Any]],
) -> list[ToolCallKickoff]:
"""Extract ToolCallKickoff objects from the tool call map.
Returns a list of ToolCallKickoff objects for valid tool calls (those with both id and name).
"""
tool_calls: list[ToolCallKickoff] = []
for tool_call_data in id_to_tool_call_map.values():
if tool_call_data.get("id") and tool_call_data.get("name"):
try:
# Parse arguments JSON string to dict
tool_args = (
json.loads(tool_call_data["arguments"])
if tool_call_data["arguments"]
else {}
)
except json.JSONDecodeError:
# If parsing fails, try empty dict, most tools would fail though
logger.error(
f"Failed to parse tool call arguments: {tool_call_data['arguments']}"
)
tool_args = {}
tool_calls.append(
ToolCallKickoff(
tool_call_id=tool_call_data["id"],
tool_name=tool_call_data["name"],
tool_args=tool_args,
)
)
return tool_calls
def translate_history_to_llm_format(
history: list[ChatMessageSimple],
) -> LanguageModelInput:
"""Convert a list of ChatMessageSimple to LanguageModelInput format.
Converts ChatMessageSimple messages to ChatCompletionMessage format,
handling different message types and image files for multimodal support.
"""
messages: list[ChatCompletionMessage] = []
for msg in history:
if msg.message_type == MessageType.SYSTEM:
system_msg = SystemMessage(
role="system",
content=msg.message,
)
messages.append(system_msg)
elif msg.message_type == MessageType.USER:
# Handle user messages with potential images
if msg.image_files:
# Build content parts: text + images
content_parts: list[TextContentPart | ImageContentPart] = [
TextContentPart(
type="text",
text=msg.message,
)
]
# Add image parts
for img_file in msg.image_files:
if img_file.file_type == ChatFileType.IMAGE:
try:
image_type = get_image_type_from_bytes(img_file.content)
base64_data = img_file.to_base64()
image_url = f"data:{image_type};base64,{base64_data}"
image_part = ImageContentPart(
type="image_url",
image_url=ImageUrlDetail(
url=image_url,
detail=None,
),
)
content_parts.append(image_part)
except Exception as e:
logger.warning(
f"Failed to process image file {img_file.file_id}: {e}. "
"Skipping image."
)
user_msg = UserMessage(
role="user",
content=content_parts,
)
messages.append(user_msg)
else:
# Simple text-only user message
user_msg_text = UserMessage(
role="user",
content=msg.message,
)
messages.append(user_msg_text)
elif msg.message_type == MessageType.ASSISTANT:
assistant_msg = AssistantMessage(
role="assistant",
content=msg.message or None,
tool_calls=None,
)
messages.append(assistant_msg)
elif msg.message_type == MessageType.TOOL_CALL:
# Tool calls are represented as Assistant Messages with tool_calls field
# Try to reconstruct tool call structure if we have tool_call_id
tool_calls: list[ToolCall] = []
if msg.tool_call_id:
try:
# Parse the message content (which should contain function_name and arguments)
tool_call_data = json.loads(msg.message) if msg.message else {}
if (
isinstance(tool_call_data, dict)
and TOOL_CALL_MSG_FUNC_NAME in tool_call_data
):
function_name = tool_call_data.get(
TOOL_CALL_MSG_FUNC_NAME, "unknown"
)
tool_args = tool_call_data.get(TOOL_CALL_MSG_ARGUMENTS, {})
else:
function_name = "unknown"
tool_args = (
tool_call_data if isinstance(tool_call_data, dict) else {}
)
# NOTE: if the model is trained on a different tool call format, this may slightly interfere
# with the future tool calls, if it doesn't look like this. Almost certainly not a big deal.
tool_call = ToolCall(
id=msg.tool_call_id,
type="function",
function=FunctionCall(
name=function_name,
arguments=json.dumps(tool_args) if tool_args else "{}",
),
)
tool_calls.append(tool_call)
except (json.JSONDecodeError, ValueError) as e:
logger.warning(
f"Failed to parse tool call data for tool_call_id {msg.tool_call_id}: {e}. "
"Including as content-only message."
)
assistant_msg_with_tool = AssistantMessage(
role="assistant",
content=None, # The tool call is parsed, doesn't need to be duplicated in the content
tool_calls=tool_calls if tool_calls else None,
)
messages.append(assistant_msg_with_tool)
elif msg.message_type == MessageType.TOOL_CALL_RESPONSE:
if not msg.tool_call_id:
raise ValueError(
f"Tool call response message encountered but tool_call_id is not available. Message: {msg}"
)
tool_msg = ToolMessage(
role="tool",
content=msg.message,
tool_call_id=msg.tool_call_id,
)
messages.append(tool_msg)
else:
logger.warning(
f"Unknown message type {msg.message_type} in history. Skipping message."
)
return messages
def run_llm_step(
history: list[ChatMessageSimple],
tool_definitions: list[dict],
tool_choice: ToolChoiceOptions,
llm: LLM,
turn_index: int,
citation_processor: DynamicCitationProcessor,
state_container: ChatStateContainer,
final_documents: list[SearchDoc] | None = None,
user_identity: LLMUserIdentity | None = None,
) -> Generator[Packet, None, tuple[LlmStepResult, int]]:
# The second return value is for the turn index because reasoning counts on the frontend as a turn
# TODO this is maybe ok but does not align well with the backend logic too well
llm_msg_history = translate_history_to_llm_format(history)
# Uncomment the line below to log the entire message history to the console
if LOG_ONYX_MODEL_INTERACTIONS:
logger.info(
f"Message history:\n{_format_message_history_for_logging(llm_msg_history)}"
)
id_to_tool_call_map: dict[int, dict[str, Any]] = {}
reasoning_start = False
answer_start = False
accumulated_reasoning = ""
accumulated_answer = ""
with generation_span(
model=llm.config.model_name,
model_config={
"base_url": str(llm.config.api_base or ""),
"model_impl": "litellm",
},
) as span_generation:
span_generation.span_data.input = cast(
Sequence[Mapping[str, Any]], llm_msg_history
)
for packet in llm.stream(
prompt=llm_msg_history,
tools=tool_definitions,
tool_choice=tool_choice,
structured_response_format=None, # TODO
# reasoning_effort=ReasoningEffort.OFF, # Can set this for dev/testing.
user_identity=user_identity,
):
if packet.usage:
usage = packet.usage
span_generation.span_data.usage = {
"input_tokens": usage.prompt_tokens,
"output_tokens": usage.completion_tokens,
"cache_read_input_tokens": usage.cache_read_input_tokens,
"cache_creation_input_tokens": usage.cache_creation_input_tokens,
}
delta = packet.choice.delta
# Should only happen once, frontend does not expect multiple
# ReasoningStart or ReasoningDone packets.
if delta.reasoning_content:
accumulated_reasoning += delta.reasoning_content
# Save reasoning incrementally to state container
state_container.set_reasoning_tokens(accumulated_reasoning)
if not reasoning_start:
yield Packet(
turn_index=turn_index,
obj=ReasoningStart(),
)
yield Packet(
turn_index=turn_index,
obj=ReasoningDelta(reasoning=delta.reasoning_content),
)
reasoning_start = True
if delta.content:
if reasoning_start:
yield Packet(
turn_index=turn_index,
obj=ReasoningDone(),
)
turn_index += 1
reasoning_start = False
if not answer_start:
yield Packet(
turn_index=turn_index,
obj=AgentResponseStart(
final_documents=final_documents,
),
)
answer_start = True
for result in citation_processor.process_token(delta.content):
if isinstance(result, str):
accumulated_answer += result
# Save answer incrementally to state container
state_container.set_answer_tokens(accumulated_answer)
yield Packet(
turn_index=turn_index,
obj=AgentResponseDelta(content=result),
)
elif isinstance(result, CitationInfo):
yield Packet(
turn_index=turn_index,
obj=result,
)
if delta.tool_calls:
if reasoning_start:
yield Packet(
turn_index=turn_index,
obj=ReasoningDone(),
)
turn_index += 1
reasoning_start = False
for tool_call_delta in delta.tool_calls:
_update_tool_call_with_delta(id_to_tool_call_map, tool_call_delta)
tool_calls = _extract_tool_call_kickoffs(id_to_tool_call_map)
if tool_calls:
tool_calls_list: list[ToolCall] = [
ToolCall(
id=kickoff.tool_call_id,
type="function",
function=FunctionCall(
name=kickoff.tool_name,
arguments=json.dumps(kickoff.tool_args),
),
)
for kickoff in tool_calls
]
assistant_msg: AssistantMessage = AssistantMessage(
role="assistant",
content=accumulated_answer if accumulated_answer else None,
tool_calls=tool_calls_list,
)
span_generation.span_data.output = [assistant_msg.model_dump()]
elif accumulated_answer:
assistant_msg_no_tools = AssistantMessage(
role="assistant",
content=accumulated_answer,
tool_calls=None,
)
span_generation.span_data.output = [assistant_msg_no_tools.model_dump()]
# Close reasoning block if still open (stream ended with reasoning content)
if reasoning_start:
yield Packet(
turn_index=turn_index,
obj=ReasoningDone(),
)
turn_index += 1
# Flush any remaining content from citation processor
if citation_processor:
for result in citation_processor.process_token(None):
if isinstance(result, str):
accumulated_answer += result
# Save answer incrementally to state container
state_container.set_answer_tokens(accumulated_answer)
yield Packet(
turn_index=turn_index,
obj=AgentResponseDelta(content=result),
)
elif isinstance(result, CitationInfo):
yield Packet(
turn_index=turn_index,
obj=result,
)
# Note: Content (AgentResponseDelta) doesn't need an explicit end packet - OverallStop handles it
# Tool calls are handled by tool execution code and emit their own packets (e.g., SectionEnd)
if LOG_ONYX_MODEL_INTERACTIONS:
logger.debug(f"Accumulated reasoning: {accumulated_reasoning}")
logger.debug(f"Accumulated answer: {accumulated_answer}")
if tool_calls:
tool_calls_str = "\n".join(
f" - {tc.tool_name}: {json.dumps(tc.tool_args, indent=4)}"
for tc in tool_calls
)
logger.debug(f"Tool calls:\n{tool_calls_str}")
else:
logger.debug("Tool calls: []")
return (
LlmStepResult(
reasoning=accumulated_reasoning if accumulated_reasoning else None,
answer=accumulated_answer if accumulated_answer else None,
tool_calls=tool_calls if tool_calls else None,
),
turn_index,
)

View File

@@ -102,6 +102,11 @@ class MessageResponseIDInfo(BaseModel):
class StreamingError(BaseModel):
error: str
stack_trace: str | None = None
error_code: str | None = (
None # e.g., "RATE_LIMIT", "AUTH_ERROR", "TOOL_CALL_FAILED"
)
is_retryable: bool = True # Hint to frontend if retry might help
details: dict | None = None # Additional context (tool name, model name, etc.)
class OnyxAnswer(BaseModel):

View File

@@ -1,3 +1,4 @@
import os
import re
import traceback
from collections.abc import Callable
@@ -12,6 +13,7 @@ from onyx.chat.chat_state import run_chat_llm_with_state_containers
from onyx.chat.chat_utils import convert_chat_history
from onyx.chat.chat_utils import create_chat_history_chain
from onyx.chat.chat_utils import get_custom_agent_prompt
from onyx.chat.chat_utils import is_last_assistant_message_clarification
from onyx.chat.chat_utils import load_all_chat_files
from onyx.chat.emitter import get_default_emitter
from onyx.chat.llm_loop import run_llm_loop
@@ -44,6 +46,7 @@ from onyx.db.models import User
from onyx.db.projects import get_project_token_count
from onyx.db.projects import get_user_files_from_project
from onyx.db.tools import get_tools
from onyx.deep_research.dr_loop import run_deep_research_llm_loop
from onyx.file_store.models import ChatFileType
from onyx.file_store.models import FileDescriptor
from onyx.file_store.utils import load_in_memory_chat_files
@@ -51,6 +54,7 @@ from onyx.file_store.utils import verify_user_files
from onyx.llm.factory import get_llm_token_counter
from onyx.llm.factory import get_llms_for_persona
from onyx.llm.interfaces import LLM
from onyx.llm.interfaces import LLMUserIdentity
from onyx.llm.utils import litellm_exception_to_error_msg
from onyx.onyxbot.slack.models import SlackContext
from onyx.redis.redis_pool import get_redis_client
@@ -60,10 +64,12 @@ from onyx.server.query_and_chat.streaming_models import AgentResponseStart
from onyx.server.query_and_chat.streaming_models import CitationInfo
from onyx.server.query_and_chat.streaming_models import Packet
from onyx.server.utils import get_json_line
from onyx.tools.constants import SEARCH_TOOL_ID
from onyx.tools.tool import Tool
from onyx.tools.tool_constructor import construct_tools
from onyx.tools.tool_constructor import CustomToolConfig
from onyx.tools.tool_constructor import SearchToolConfig
from onyx.tools.tool_constructor import SearchToolUsage
from onyx.utils.logger import setup_logger
from onyx.utils.long_term_log import LongTermLogger
from onyx.utils.timing import log_function_time
@@ -77,6 +83,10 @@ ERROR_TYPE_CANCELLED = "cancelled"
class ToolCallException(Exception):
"""Exception raised for errors during tool calls."""
def __init__(self, message: str, tool_name: str | None = None):
super().__init__(message)
self.tool_name = tool_name
def _extract_project_file_texts_and_images(
project_id: int | None,
@@ -204,6 +214,46 @@ def _extract_project_file_texts_and_images(
)
def _get_project_search_availability(
project_id: int | None,
persona_id: int | None,
has_project_file_texts: bool,
forced_tool_ids: list[int] | None,
search_tool_id: int | None,
) -> SearchToolUsage:
"""Determine search tool availability based on project context.
Args:
project_id: The project ID if the user is in a project
persona_id: The persona ID to check if it's the default persona
has_project_file_texts: Whether project files are loaded in context
forced_tool_ids: List of forced tool IDs (may be mutated to remove search tool)
search_tool_id: The search tool ID to check against
Returns:
SearchToolUsage setting indicating how search should be used
"""
# There are cases where the internal search tool should be disabled
# If the user is in a project, it should not use other sources / generic search
# If they are in a project but using a custom agent, it should use the agent setup
# (which means it can use search)
# However if in a project and there are more files than can fit in the context,
# it should use the search tool with the project filter on
# If no files are uploaded, search should remain enabled
search_usage_forcing_setting = SearchToolUsage.AUTO
if project_id:
if bool(persona_id is DEFAULT_PERSONA_ID and has_project_file_texts):
search_usage_forcing_setting = SearchToolUsage.DISABLED
# Remove search tool from forced_tool_ids if it's present
if forced_tool_ids and search_tool_id and search_tool_id in forced_tool_ids:
forced_tool_ids[:] = [
tool_id for tool_id in forced_tool_ids if tool_id != search_tool_id
]
elif forced_tool_ids and search_tool_id and search_tool_id in forced_tool_ids:
search_usage_forcing_setting = SearchToolUsage.ENABLED
return search_usage_forcing_setting
def _initialize_chat_session(
message_text: str,
files: list[FileDescriptor],
@@ -283,10 +333,15 @@ def stream_chat_message_objects(
tenant_id = get_current_tenant_id()
use_existing_user_message = new_msg_req.use_existing_user_message
llm: LLM
llm: LLM | None = None
try:
user_id = user.id if user is not None else None
llm_user_identifier = (
user.email
if user is not None and getattr(user, "email", None)
else (str(user_id) if user_id else "anonymous_user")
)
chat_session = get_chat_session_by_id(
chat_session_id=new_msg_req.chat_session_id,
@@ -297,6 +352,9 @@ def stream_chat_message_objects(
message_text = new_msg_req.message
chat_session_id = new_msg_req.chat_session_id
user_identity = LLMUserIdentity(
user_id=llm_user_identifier, session_id=str(chat_session_id)
)
parent_id = new_msg_req.parent_message_id
reference_doc_ids = new_msg_req.search_doc_ids
retrieval_options = new_msg_req.retrieval_options
@@ -389,19 +447,23 @@ def stream_chat_message_objects(
db_session=db_session,
)
# There are cases where the internal search tool should be disabled
# If the user is in a project, it should not use other sources / generic search
# If they are in a project but using a custom agent, it should use the agent setup
# (which means it can use search)
# However if in a project and there are more files than can fit in the context,
# it should use the search tool with the project filter on
disable_internal_search = bool(
chat_session.project_id
and persona.id is DEFAULT_PERSONA_ID
and (
extracted_project_files.project_file_texts
or not extracted_project_files.project_as_filter
)
# Build a mapping of tool_id to tool_name for history reconstruction
all_tools = get_tools(db_session)
tool_id_to_name_map = {tool.id: tool.name for tool in all_tools}
search_tool_id = next(
(tool.id for tool in all_tools if tool.in_code_tool_id == SEARCH_TOOL_ID),
None,
)
# This may also mutate the new_msg_req.forced_tool_ids
# This logic is specifically for projects
search_usage_forcing_setting = _get_project_search_availability(
project_id=chat_session.project_id,
persona_id=persona.id,
has_project_file_texts=bool(extracted_project_files.project_file_texts),
forced_tool_ids=new_msg_req.forced_tool_ids,
search_tool_id=search_tool_id,
)
emitter = get_default_emitter()
@@ -430,7 +492,7 @@ def stream_chat_message_objects(
additional_headers=custom_tool_additional_headers,
),
allowed_tool_ids=new_msg_req.allowed_tool_ids,
disable_internal_search=disable_internal_search,
search_usage_forcing_setting=search_usage_forcing_setting,
)
tools: list[Tool] = []
for tool_list in tool_dict.values():
@@ -455,10 +517,6 @@ def stream_chat_message_objects(
reserved_assistant_message_id=assistant_response.id,
)
# Build a mapping of tool_id to tool_name for history reconstruction
all_tools = get_tools(db_session)
tool_id_to_name_map = {tool.id: tool.name for tool in all_tools}
# Convert the chat history into a simple format that is free of any DB objects
# and is easy to parse for the agent loop
simple_chat_history = convert_chat_history(
@@ -488,24 +546,50 @@ def stream_chat_message_objects(
# for stop signals. run_llm_loop itself doesn't know about stopping.
# Note: DB session is not thread safe but nothing else uses it and the
# reference is passed directly so it's ok.
yield from run_chat_llm_with_state_containers(
run_llm_loop,
emitter=emitter,
state_container=state_container,
is_connected=check_is_connected, # Not passed through to run_llm_loop
simple_chat_history=simple_chat_history,
tools=tools,
custom_agent_prompt=custom_agent_prompt,
project_files=extracted_project_files,
persona=persona,
memories=memories,
llm=llm,
token_counter=token_counter,
db_session=db_session,
forced_tool_id=(
new_msg_req.forced_tool_ids[0] if new_msg_req.forced_tool_ids else None
),
)
if os.environ.get("ENABLE_DEEP_RESEARCH_LOOP"): # Dev only feature flag for now
if chat_session.project_id:
raise RuntimeError("Deep research is not supported for projects")
# Skip clarification if the last assistant message was a clarification
# (user has already responded to a clarification question)
skip_clarification = is_last_assistant_message_clarification(chat_history)
yield from run_chat_llm_with_state_containers(
run_deep_research_llm_loop,
is_connected=check_is_connected,
emitter=emitter,
state_container=state_container,
simple_chat_history=simple_chat_history,
tools=tools,
custom_agent_prompt=custom_agent_prompt,
llm=llm,
token_counter=token_counter,
db_session=db_session,
skip_clarification=skip_clarification,
user_identity=user_identity,
)
else:
yield from run_chat_llm_with_state_containers(
run_llm_loop,
is_connected=check_is_connected, # Not passed through to run_llm_loop
emitter=emitter,
state_container=state_container,
simple_chat_history=simple_chat_history,
tools=tools,
custom_agent_prompt=custom_agent_prompt,
project_files=extracted_project_files,
persona=persona,
memories=memories,
llm=llm,
token_counter=token_counter,
db_session=db_session,
forced_tool_id=(
new_msg_req.forced_tool_ids[0]
if new_msg_req.forced_tool_ids
else None
),
user_identity=user_identity,
)
# Determine if stopped by user
completed_normally = check_is_connected()
@@ -549,13 +633,18 @@ def stream_chat_message_objects(
tool_calls=state_container.tool_calls,
db_session=db_session,
assistant_message=assistant_response,
is_clarification=state_container.is_clarification,
)
except ValueError as e:
logger.exception("Failed to process chat message.")
error_msg = str(e)
yield StreamingError(error=error_msg)
yield StreamingError(
error=error_msg,
error_code="VALIDATION_ERROR",
is_retryable=True,
)
db_session.rollback()
return
@@ -565,9 +654,17 @@ def stream_chat_message_objects(
stack_trace = traceback.format_exc()
if isinstance(e, ToolCallException):
yield StreamingError(error=error_msg, stack_trace=stack_trace)
yield StreamingError(
error=error_msg,
stack_trace=stack_trace,
error_code="TOOL_CALL_FAILED",
is_retryable=True,
details={"tool_name": e.tool_name} if e.tool_name else None,
)
elif llm:
client_error_msg = litellm_exception_to_error_msg(e, llm)
client_error_msg, error_code, is_retryable = litellm_exception_to_error_msg(
e, llm
)
if llm.config.api_key and len(llm.config.api_key) > 2:
client_error_msg = client_error_msg.replace(
llm.config.api_key, "[REDACTED_API_KEY]"
@@ -576,7 +673,24 @@ def stream_chat_message_objects(
llm.config.api_key, "[REDACTED_API_KEY]"
)
yield StreamingError(error=client_error_msg, stack_trace=stack_trace)
yield StreamingError(
error=client_error_msg,
stack_trace=stack_trace,
error_code=error_code,
is_retryable=is_retryable,
details={
"model": llm.config.model_name,
"provider": llm.config.model_provider,
},
)
else:
# LLM was never initialized - early failure
yield StreamingError(
error="Failed to initialize the chat. Please check your configuration and try again.",
stack_trace=stack_trace,
error_code="INIT_FAILED",
is_retryable=True,
)
db_session.rollback()
return

View File

@@ -10,17 +10,18 @@ from onyx.file_store.models import FileDescriptor
from onyx.prompts.chat_prompts import CITATION_REMINDER
from onyx.prompts.chat_prompts import CODE_BLOCK_MARKDOWN
from onyx.prompts.chat_prompts import DEFAULT_SYSTEM_PROMPT
from onyx.prompts.chat_prompts import GENERATE_IMAGE_GUIDANCE
from onyx.prompts.chat_prompts import INTERNAL_SEARCH_GUIDANCE
from onyx.prompts.chat_prompts import OPEN_URLS_GUIDANCE
from onyx.prompts.chat_prompts import PYTHON_TOOL_GUIDANCE
from onyx.prompts.chat_prompts import REQUIRE_CITATION_GUIDANCE
from onyx.prompts.chat_prompts import TOOL_DESCRIPTION_SEARCH_GUIDANCE
from onyx.prompts.chat_prompts import TOOL_SECTION_HEADER
from onyx.prompts.chat_prompts import USER_INFO_HEADER
from onyx.prompts.chat_prompts import WEB_SEARCH_GUIDANCE
from onyx.prompts.prompt_utils import get_company_context
from onyx.prompts.prompt_utils import handle_onyx_date_awareness
from onyx.prompts.prompt_utils import replace_citation_guidance_tag
from onyx.prompts.tool_prompts import GENERATE_IMAGE_GUIDANCE
from onyx.prompts.tool_prompts import INTERNAL_SEARCH_GUIDANCE
from onyx.prompts.tool_prompts import OPEN_URLS_GUIDANCE
from onyx.prompts.tool_prompts import PYTHON_TOOL_GUIDANCE
from onyx.prompts.tool_prompts import TOOL_DESCRIPTION_SEARCH_GUIDANCE
from onyx.prompts.tool_prompts import TOOL_SECTION_HEADER
from onyx.prompts.tool_prompts import WEB_SEARCH_GUIDANCE
from onyx.tools.tool import Tool
from onyx.tools.tool_implementations.images.image_generation_tool import (
ImageGenerationTool,
@@ -141,20 +142,12 @@ def build_system_prompt(
if open_ai_formatting_enabled:
system_prompt = CODE_BLOCK_MARKDOWN + system_prompt
try:
citation_guidance = (
REQUIRE_CITATION_GUIDANCE
if should_cite_documents or include_all_guidance
else ""
)
system_prompt = system_prompt.format(
citation_reminder_or_empty=citation_guidance
)
except Exception:
# Even if the prompt is modified and there is not an explicit spot for citations, always require it
# This is more a product decision as it's likely better to always enforce citations
if should_cite_documents or include_all_guidance:
system_prompt += REQUIRE_CITATION_GUIDANCE
# Replace citation guidance placeholder if present
system_prompt, should_append_citation_guidance = replace_citation_guidance_tag(
system_prompt,
should_cite_documents=should_cite_documents,
include_all_guidance=include_all_guidance,
)
company_context = get_company_context()
if company_context or memories:
@@ -166,7 +159,9 @@ def build_system_prompt(
memory.strip() for memory in memories if memory.strip()
)
if should_cite_documents or include_all_guidance:
# Append citation guidance after company context if placeholder was not present
# This maintains backward compatibility and ensures citations are always enforced when needed
if should_append_citation_guidance:
system_prompt += REQUIRE_CITATION_GUIDANCE
if include_all_guidance:

View File

@@ -148,6 +148,7 @@ def save_chat_turn(
citation_docs_info: list[CitationDocInfo],
db_session: Session,
assistant_message: ChatMessage,
is_clarification: bool = False,
) -> None:
"""
Save a chat turn by populating the assistant_message and creating related entities.
@@ -175,10 +176,12 @@ def save_chat_turn(
citation_docs_info: List of citation document information for building citations mapping
db_session: Database session for persistence
assistant_message: The ChatMessage object to populate (should already exist in DB)
is_clarification: Whether this assistant message is a clarification question (deep research flow)
"""
# 1. Update ChatMessage with message content, reasoning tokens, and token count
assistant_message.message = message_text
assistant_message.reasoning_tokens = reasoning_tokens
assistant_message.is_clarification = is_clarification
# Calculate token count using default tokenizer, when storing, this should not use the LLM
# specific one so we use a system default tokenizer here.

View File

@@ -7,6 +7,7 @@ from shared_configs.contextvars import get_current_tenant_id
# Redis key prefixes for chat session stop signals
PREFIX = "chatsessionstop"
FENCE_PREFIX = f"{PREFIX}_fence"
FENCE_TTL = 24 * 60 * 60 # 24 hours - defensive TTL to prevent memory leaks
def set_fence(chat_session_id: UUID, redis_client: Redis, value: bool) -> None:
@@ -24,7 +25,7 @@ def set_fence(chat_session_id: UUID, redis_client: Redis, value: bool) -> None:
redis_client.delete(fence_key)
return
redis_client.set(fence_key, 0)
redis_client.set(fence_key, 0, ex=FENCE_TTL)
def is_connected(chat_session_id: UUID, redis_client: Redis) -> bool:

View File

@@ -24,6 +24,12 @@ APP_PORT = 8080
# prefix from requests directed towards the API server. In these cases, set this to `/api`
APP_API_PREFIX = os.environ.get("API_PREFIX", "")
# Whether to send user metadata (user_id/email and session_id) to the LLM provider.
# Disabled by default.
SEND_USER_METADATA_TO_LLM_PROVIDER = (
os.environ.get("SEND_USER_METADATA_TO_LLM_PROVIDER", "")
).lower() == "true"
#####
# User Facing Features Configs
#####
@@ -31,7 +37,6 @@ BLURB_SIZE = 128 # Number Encoder Tokens included in the chunk blurb
GENERATIVE_MODEL_ACCESS_CHECK_FREQ = int(
os.environ.get("GENERATIVE_MODEL_ACCESS_CHECK_FREQ") or 86400
) # 1 day
DISABLE_GENERATIVE_AI = os.environ.get("DISABLE_GENERATIVE_AI", "").lower() == "true"
# Controls whether users can use User Knowledge (personal documents) in assistants
DISABLE_USER_KNOWLEDGE = os.environ.get("DISABLE_USER_KNOWLEDGE", "").lower() == "true"

View File

@@ -177,6 +177,7 @@ class DocumentSource(str, Enum):
SLAB = "slab"
PRODUCTBOARD = "productboard"
FILE = "file"
CODA = "coda"
NOTION = "notion"
ZULIP = "zulip"
LINEAR = "linear"
@@ -596,6 +597,7 @@ DocumentSourceDescription: dict[DocumentSource, str] = {
DocumentSource.SLAB: "slab data",
DocumentSource.PRODUCTBOARD: "productboard data (boards, etc.)",
DocumentSource.FILE: "files",
DocumentSource.CODA: "coda - team workspace with docs, tables, and pages",
DocumentSource.NOTION: "notion data - a workspace that combines note-taking, \
project management, and collaboration tools into a single, customizable platform",
DocumentSource.ZULIP: "zulip data",

View File

@@ -65,9 +65,10 @@ GEN_AI_NUM_RESERVED_OUTPUT_TOKENS = int(
os.environ.get("GEN_AI_NUM_RESERVED_OUTPUT_TOKENS") or 1024
)
# Typically, GenAI models nowadays are at least 4K tokens
# Fallback token limit for models where the max context is unknown
# Set conservatively at 32K to handle most modern models
GEN_AI_MODEL_FALLBACK_MAX_TOKENS = int(
os.environ.get("GEN_AI_MODEL_FALLBACK_MAX_TOKENS") or 4096
os.environ.get("GEN_AI_MODEL_FALLBACK_MAX_TOKENS") or 32000
)
# This is used when computing how much context space is available for documents

View File

@@ -97,28 +97,31 @@ class AsanaAPI:
self, project_gid: str, start_date: str, start_seconds: int
) -> Iterator[AsanaTask]:
project = self.project_api.get_project(project_gid, opts={})
if project["archived"]:
logger.info(f"Skipping archived project: {project['name']} ({project_gid})")
yield from []
if not project["team"] or not project["team"]["gid"]:
project_name = project.get("name", project_gid)
team = project.get("team") or {}
team_gid = team.get("gid")
if project.get("archived"):
logger.info(f"Skipping archived project: {project_name} ({project_gid})")
return
if not team_gid:
logger.info(
f"Skipping project without a team: {project['name']} ({project_gid})"
f"Skipping project without a team: {project_name} ({project_gid})"
)
yield from []
if project["privacy_setting"] == "private":
if self.team_gid and project["team"]["gid"] != self.team_gid:
return
if project.get("privacy_setting") == "private":
if self.team_gid and team_gid != self.team_gid:
logger.info(
f"Skipping private project not in configured team: {project['name']} ({project_gid})"
)
yield from []
else:
logger.info(
f"Processing private project in configured team: {project['name']} ({project_gid})"
f"Skipping private project not in configured team: {project_name} ({project_gid})"
)
return
logger.info(
f"Processing private project in configured team: {project_name} ({project_gid})"
)
simple_start_date = start_date.split(".")[0].split("+")[0]
logger.info(
f"Fetching tasks modified since {simple_start_date} for project: {project['name']} ({project_gid})"
f"Fetching tasks modified since {simple_start_date} for project: {project_name} ({project_gid})"
)
opts = {
@@ -157,7 +160,7 @@ class AsanaAPI:
link=data["permalink_url"],
last_modified=datetime.fromisoformat(data["modified_at"]),
project_gid=project_gid,
project_name=project["name"],
project_name=project_name,
)
yield task
except Exception:

View File

View File

@@ -0,0 +1,711 @@
import os
from collections.abc import Generator
from datetime import datetime
from datetime import timezone
from typing import Any
from typing import cast
from typing import Dict
from typing import List
from typing import Optional
from pydantic import BaseModel
from retry import retry
from onyx.configs.app_configs import INDEX_BATCH_SIZE
from onyx.configs.constants import DocumentSource
from onyx.connectors.cross_connector_utils.rate_limit_wrapper import (
rl_requests,
)
from onyx.connectors.exceptions import ConnectorValidationError
from onyx.connectors.exceptions import CredentialExpiredError
from onyx.connectors.exceptions import UnexpectedValidationError
from onyx.connectors.interfaces import GenerateDocumentsOutput
from onyx.connectors.interfaces import LoadConnector
from onyx.connectors.interfaces import PollConnector
from onyx.connectors.interfaces import SecondsSinceUnixEpoch
from onyx.connectors.models import ConnectorMissingCredentialError
from onyx.connectors.models import Document
from onyx.connectors.models import ImageSection
from onyx.connectors.models import TextSection
from onyx.utils.batching import batch_generator
from onyx.utils.logger import setup_logger
_CODA_CALL_TIMEOUT = 30
_CODA_BASE_URL = "https://coda.io/apis/v1"
logger = setup_logger()
class CodaClientRequestFailedError(ConnectionError):
def __init__(self, message: str, status_code: int):
super().__init__(
f"Coda API request failed with status {status_code}: {message}"
)
self.status_code = status_code
class CodaDoc(BaseModel):
id: str
browser_link: str
name: str
created_at: str
updated_at: str
workspace_id: str
workspace_name: str
folder_id: str | None
folder_name: str | None
class CodaPage(BaseModel):
id: str
browser_link: str
name: str
content_type: str
created_at: str
updated_at: str
doc_id: str
class CodaTable(BaseModel):
id: str
name: str
browser_link: str
created_at: str
updated_at: str
doc_id: str
class CodaRow(BaseModel):
id: str
name: Optional[str] = None
index: Optional[int] = None
browser_link: str
created_at: str
updated_at: str
values: Dict[str, Any]
table_id: str
doc_id: str
class CodaApiClient:
def __init__(
self,
bearer_token: str,
) -> None:
self.bearer_token = bearer_token
self.base_url = os.environ.get("CODA_BASE_URL", _CODA_BASE_URL)
def get(
self, endpoint: str, params: Optional[dict[str, str]] = None
) -> dict[str, Any]:
url = self._build_url(endpoint)
headers = self._build_headers()
response = rl_requests.get(
url, headers=headers, params=params, timeout=_CODA_CALL_TIMEOUT
)
try:
json = response.json()
except Exception:
json = {}
if response.status_code >= 300:
error = response.reason
response_error = json.get("error", {}).get("message", "")
if response_error:
error = response_error
raise CodaClientRequestFailedError(error, response.status_code)
return json
def _build_headers(self) -> Dict[str, str]:
return {"Authorization": f"Bearer {self.bearer_token}"}
def _build_url(self, endpoint: str) -> str:
return self.base_url.rstrip("/") + "/" + endpoint.lstrip("/")
class CodaConnector(LoadConnector, PollConnector):
def __init__(
self,
batch_size: int = INDEX_BATCH_SIZE,
index_page_content: bool = True,
workspace_id: str | None = None,
) -> None:
self.batch_size = batch_size
self.index_page_content = index_page_content
self.workspace_id = workspace_id
self._coda_client: CodaApiClient | None = None
@property
def coda_client(self) -> CodaApiClient:
if self._coda_client is None:
raise ConnectorMissingCredentialError("Coda")
return self._coda_client
@retry(tries=3, delay=1, backoff=2)
def _get_doc(self, doc_id: str) -> CodaDoc:
"""Fetch a specific Coda document by its ID."""
logger.debug(f"Fetching Coda doc with ID: {doc_id}")
try:
response = self.coda_client.get(f"docs/{doc_id}")
except CodaClientRequestFailedError as e:
if e.status_code == 404:
raise ConnectorValidationError(f"Failed to fetch doc: {doc_id}") from e
else:
raise
return CodaDoc(
id=response["id"],
browser_link=response["browserLink"],
name=response["name"],
created_at=response["createdAt"],
updated_at=response["updatedAt"],
workspace_id=response["workspace"]["id"],
workspace_name=response["workspace"]["name"],
folder_id=response["folder"]["id"] if response.get("folder") else None,
folder_name=response["folder"]["name"] if response.get("folder") else None,
)
@retry(tries=3, delay=1, backoff=2)
def _get_page(self, doc_id: str, page_id: str) -> CodaPage:
"""Fetch a specific page from a Coda document."""
logger.debug(f"Fetching Coda page with ID: {page_id}")
try:
response = self.coda_client.get(f"docs/{doc_id}/pages/{page_id}")
except CodaClientRequestFailedError as e:
if e.status_code == 404:
raise ConnectorValidationError(
f"Failed to fetch page: {page_id} from doc: {doc_id}"
) from e
else:
raise
return CodaPage(
id=response["id"],
doc_id=doc_id,
browser_link=response["browserLink"],
name=response["name"],
content_type=response["contentType"],
created_at=response["createdAt"],
updated_at=response["updatedAt"],
)
@retry(tries=3, delay=1, backoff=2)
def _get_table(self, doc_id: str, table_id: str) -> CodaTable:
"""Fetch a specific table from a Coda document."""
logger.debug(f"Fetching Coda table with ID: {table_id}")
try:
response = self.coda_client.get(f"docs/{doc_id}/tables/{table_id}")
except CodaClientRequestFailedError as e:
if e.status_code == 404:
raise ConnectorValidationError(
f"Failed to fetch table: {table_id} from doc: {doc_id}"
) from e
else:
raise
return CodaTable(
id=response["id"],
name=response["name"],
browser_link=response["browserLink"],
created_at=response["createdAt"],
updated_at=response["updatedAt"],
doc_id=doc_id,
)
@retry(tries=3, delay=1, backoff=2)
def _get_row(self, doc_id: str, table_id: str, row_id: str) -> CodaRow:
"""Fetch a specific row from a Coda table."""
logger.debug(f"Fetching Coda row with ID: {row_id}")
try:
response = self.coda_client.get(
f"docs/{doc_id}/tables/{table_id}/rows/{row_id}"
)
except CodaClientRequestFailedError as e:
if e.status_code == 404:
raise ConnectorValidationError(
f"Failed to fetch row: {row_id} from table: {table_id} in doc: {doc_id}"
) from e
else:
raise
values = {}
for col_name, col_value in response.get("values", {}).items():
values[col_name] = col_value
return CodaRow(
id=response["id"],
name=response.get("name"),
index=response.get("index"),
browser_link=response["browserLink"],
created_at=response["createdAt"],
updated_at=response["updatedAt"],
values=values,
table_id=table_id,
doc_id=doc_id,
)
@retry(tries=3, delay=1, backoff=2)
def _list_all_docs(
self, endpoint: str = "docs", params: Optional[Dict[str, str]] = None
) -> List[CodaDoc]:
"""List all Coda documents in the workspace."""
logger.debug("Listing documents in Coda")
all_docs: List[CodaDoc] = []
next_page_token: str | None = None
params = params or {}
if self.workspace_id:
params["workspaceId"] = self.workspace_id
while True:
if next_page_token:
params["pageToken"] = next_page_token
try:
response = self.coda_client.get(endpoint, params=params)
except CodaClientRequestFailedError as e:
if e.status_code == 404:
raise ConnectorValidationError("Failed to list docs") from e
else:
raise
items = response.get("items", [])
for item in items:
doc = CodaDoc(
id=item["id"],
browser_link=item["browserLink"],
name=item["name"],
created_at=item["createdAt"],
updated_at=item["updatedAt"],
workspace_id=item["workspace"]["id"],
workspace_name=item["workspace"]["name"],
folder_id=item["folder"]["id"] if item.get("folder") else None,
folder_name=item["folder"]["name"] if item.get("folder") else None,
)
all_docs.append(doc)
next_page_token = response.get("nextPageToken")
if not next_page_token:
break
logger.debug(f"Found {len(all_docs)} docs")
return all_docs
@retry(tries=3, delay=1, backoff=2)
def _list_pages_in_doc(self, doc_id: str) -> List[CodaPage]:
"""List all pages in a Coda document."""
logger.debug(f"Listing pages in Coda doc with ID: {doc_id}")
pages: List[CodaPage] = []
endpoint = f"docs/{doc_id}/pages"
params: Dict[str, str] = {}
next_page_token: str | None = None
while True:
if next_page_token:
params["pageToken"] = next_page_token
try:
response = self.coda_client.get(endpoint, params=params)
except CodaClientRequestFailedError as e:
if e.status_code == 404:
raise ConnectorValidationError(
f"Failed to list pages for doc: {doc_id}"
) from e
else:
raise
items = response.get("items", [])
for item in items:
# can be removed if we don't care to skip hidden pages
if item.get("isHidden", False):
continue
pages.append(
CodaPage(
id=item["id"],
browser_link=item["browserLink"],
name=item["name"],
content_type=item["contentType"],
created_at=item["createdAt"],
updated_at=item["updatedAt"],
doc_id=doc_id,
)
)
next_page_token = response.get("nextPageToken")
if not next_page_token:
break
logger.debug(f"Found {len(pages)} pages in doc {doc_id}")
return pages
@retry(tries=3, delay=1, backoff=2)
def _fetch_page_content(self, doc_id: str, page_id: str) -> str:
"""Fetch the content of a Coda page."""
logger.debug(f"Fetching content for page {page_id} in doc {doc_id}")
content_parts = []
next_page_token: str | None = None
params: Dict[str, str] = {}
while True:
if next_page_token:
params["pageToken"] = next_page_token
try:
response = self.coda_client.get(
f"docs/{doc_id}/pages/{page_id}/content", params=params
)
except CodaClientRequestFailedError as e:
if e.status_code == 404:
logger.debug(f"No content available for page {page_id}")
return ""
raise
items = response.get("items", [])
for item in items:
item_content = item.get("itemContent", {})
content_text = item_content.get("content", "")
if content_text:
content_parts.append(content_text)
next_page_token = response.get("nextPageToken")
if not next_page_token:
break
return "\n\n".join(content_parts)
@retry(tries=3, delay=1, backoff=2)
def _list_tables(self, doc_id: str) -> List[CodaTable]:
"""List all tables in a Coda document."""
logger.debug(f"Listing tables in Coda doc with ID: {doc_id}")
tables: List[CodaTable] = []
endpoint = f"docs/{doc_id}/tables"
params: Dict[str, str] = {}
next_page_token: str | None = None
while True:
if next_page_token:
params["pageToken"] = next_page_token
try:
response = self.coda_client.get(endpoint, params=params)
except CodaClientRequestFailedError as e:
if e.status_code == 404:
raise ConnectorValidationError(
f"Failed to list tables for doc: {doc_id}"
) from e
else:
raise
items = response.get("items", [])
for item in items:
tables.append(
CodaTable(
id=item["id"],
browser_link=item["browserLink"],
name=item["name"],
created_at=item["createdAt"],
updated_at=item["updatedAt"],
doc_id=doc_id,
)
)
next_page_token = response.get("nextPageToken")
if not next_page_token:
break
logger.debug(f"Found {len(tables)} tables in doc {doc_id}")
return tables
@retry(tries=3, delay=1, backoff=2)
def _list_rows_and_values(self, doc_id: str, table_id: str) -> List[CodaRow]:
"""List all rows and their values in a table."""
logger.debug(f"Listing rows in Coda table: {table_id} in Coda doc: {doc_id}")
rows: List[CodaRow] = []
endpoint = f"docs/{doc_id}/tables/{table_id}/rows"
params: Dict[str, str] = {"valueFormat": "rich"}
next_page_token: str | None = None
while True:
if next_page_token:
params["pageToken"] = next_page_token
try:
response = self.coda_client.get(endpoint, params=params)
except CodaClientRequestFailedError as e:
if e.status_code == 404:
raise ConnectorValidationError(
f"Failed to list rows for table: {table_id} in doc: {doc_id}"
) from e
else:
raise
items = response.get("items", [])
for item in items:
values = {}
for col_name, col_value in item.get("values", {}).items():
values[col_name] = col_value
rows.append(
CodaRow(
id=item["id"],
name=item["name"],
index=item["index"],
browser_link=item["browserLink"],
created_at=item["createdAt"],
updated_at=item["updatedAt"],
values=values,
table_id=table_id,
doc_id=doc_id,
)
)
next_page_token = response.get("nextPageToken")
if not next_page_token:
break
logger.debug(f"Found {len(rows)} rows in table {table_id}")
return rows
def _convert_page_to_document(self, page: CodaPage, content: str = "") -> Document:
"""Convert a page into a Document."""
page_updated = datetime.fromisoformat(page.updated_at).astimezone(timezone.utc)
text_parts = [page.name, page.browser_link]
if content:
text_parts.append(content)
sections = [TextSection(link=page.browser_link, text="\n\n".join(text_parts))]
return Document(
id=f"coda-page-{page.doc_id}-{page.id}",
sections=cast(list[TextSection | ImageSection], sections),
source=DocumentSource.CODA,
semantic_identifier=page.name or f"Page {page.id}",
doc_updated_at=page_updated,
metadata={
"browser_link": page.browser_link,
"doc_id": page.doc_id,
"content_type": page.content_type,
},
)
def _convert_table_with_rows_to_document(
self, table: CodaTable, rows: List[CodaRow]
) -> Document:
"""Convert a table and its rows into a single Document with multiple sections (one per row)."""
table_updated = datetime.fromisoformat(table.updated_at).astimezone(
timezone.utc
)
sections: List[TextSection] = []
for row in rows:
content_text = " ".join(
str(v) if not isinstance(v, list) else " ".join(map(str, v))
for v in row.values.values()
)
row_name = row.name or f"Row {row.index or row.id}"
text = f"{row_name}: {content_text}" if content_text else row_name
sections.append(TextSection(link=row.browser_link, text=text))
# If no rows, create a single section for the table itself
if not sections:
sections = [
TextSection(link=table.browser_link, text=f"Table: {table.name}")
]
return Document(
id=f"coda-table-{table.doc_id}-{table.id}",
sections=cast(list[TextSection | ImageSection], sections),
source=DocumentSource.CODA,
semantic_identifier=table.name or f"Table {table.id}",
doc_updated_at=table_updated,
metadata={
"browser_link": table.browser_link,
"doc_id": table.doc_id,
"row_count": str(len(rows)),
},
)
def load_credentials(self, credentials: dict[str, Any]) -> dict[str, Any] | None:
"""Load and validate Coda credentials."""
self._coda_client = CodaApiClient(bearer_token=credentials["coda_bearer_token"])
try:
self._coda_client.get("docs", params={"limit": "1"})
except CodaClientRequestFailedError as e:
if e.status_code == 401:
raise ConnectorMissingCredentialError("Invalid Coda API token")
raise
return None
def load_from_state(self) -> GenerateDocumentsOutput:
"""Load all documents from Coda workspace."""
def _iter_documents() -> Generator[Document, None, None]:
docs = self._list_all_docs()
logger.info(f"Found {len(docs)} Coda docs to process")
for doc in docs:
logger.debug(f"Processing doc: {doc.name} ({doc.id})")
try:
pages = self._list_pages_in_doc(doc.id)
for page in pages:
content = ""
if self.index_page_content:
try:
content = self._fetch_page_content(doc.id, page.id)
except Exception as e:
logger.warning(
f"Failed to fetch content for page {page.id}: {e}"
)
yield self._convert_page_to_document(page, content)
except ConnectorValidationError as e:
logger.warning(f"Failed to list pages for doc {doc.id}: {e}")
try:
tables = self._list_tables(doc.id)
for table in tables:
try:
rows = self._list_rows_and_values(doc.id, table.id)
yield self._convert_table_with_rows_to_document(table, rows)
except ConnectorValidationError as e:
logger.warning(
f"Failed to list rows for table {table.id}: {e}"
)
yield self._convert_table_with_rows_to_document(table, [])
except ConnectorValidationError as e:
logger.warning(f"Failed to list tables for doc {doc.id}: {e}")
return batch_generator(_iter_documents(), self.batch_size)
def poll_source(
self, start: SecondsSinceUnixEpoch, end: SecondsSinceUnixEpoch
) -> GenerateDocumentsOutput:
"""
Polls the Coda API for documents updated between start and end timestamps.
We refer to page and table update times to determine if they need to be re-indexed.
"""
def _iter_documents() -> Generator[Document, None, None]:
docs = self._list_all_docs()
logger.info(
f"Polling {len(docs)} Coda docs for updates between {start} and {end}"
)
for doc in docs:
try:
pages = self._list_pages_in_doc(doc.id)
for page in pages:
page_timestamp = (
datetime.fromisoformat(page.updated_at)
.astimezone(timezone.utc)
.timestamp()
)
if start < page_timestamp <= end:
content = ""
if self.index_page_content:
try:
content = self._fetch_page_content(doc.id, page.id)
except Exception as e:
logger.warning(
f"Failed to fetch content for page {page.id}: {e}"
)
yield self._convert_page_to_document(page, content)
except ConnectorValidationError as e:
logger.warning(f"Failed to list pages for doc {doc.id}: {e}")
try:
tables = self._list_tables(doc.id)
for table in tables:
table_timestamp = (
datetime.fromisoformat(table.updated_at)
.astimezone(timezone.utc)
.timestamp()
)
try:
rows = self._list_rows_and_values(doc.id, table.id)
table_or_rows_updated = start < table_timestamp <= end
if not table_or_rows_updated:
for row in rows:
row_timestamp = (
datetime.fromisoformat(row.updated_at)
.astimezone(timezone.utc)
.timestamp()
)
if start < row_timestamp <= end:
table_or_rows_updated = True
break
if table_or_rows_updated:
yield self._convert_table_with_rows_to_document(
table, rows
)
except ConnectorValidationError as e:
logger.warning(
f"Failed to list rows for table {table.id}: {e}"
)
if table_timestamp > start and table_timestamp <= end:
yield self._convert_table_with_rows_to_document(
table, []
)
except ConnectorValidationError as e:
logger.warning(f"Failed to list tables for doc {doc.id}: {e}")
return batch_generator(_iter_documents(), self.batch_size)
def validate_connector_settings(self) -> None:
"""Validates the Coda connector settings calling the 'whoami' endpoint."""
try:
response = self.coda_client.get("whoami")
logger.info(
f"Coda connector validated for user: {response.get('name', 'Unknown')}"
)
if self.workspace_id:
params = {"workspaceId": self.workspace_id, "limit": "1"}
self.coda_client.get("docs", params=params)
logger.info(f"Validated access to workspace: {self.workspace_id}")
except CodaClientRequestFailedError as e:
if e.status_code == 401:
raise CredentialExpiredError(
"Coda credential appears to be invalid or expired (HTTP 401)."
)
elif e.status_code == 404:
raise ConnectorValidationError(
"Coda workspace not found or not accessible (HTTP 404). "
"Please verify the workspace_id is correct and shared with the integration."
)
elif e.status_code == 429:
raise ConnectorValidationError(
"Validation failed due to Coda rate-limits being exceeded (HTTP 429). "
"Please try again later."
)
else:
raise UnexpectedValidationError(
f"Unexpected Coda HTTP error (status={e.status_code}): {e}"
)
except Exception as exc:
raise UnexpectedValidationError(
f"Unexpected error during Coda settings validation: {exc}"
)

View File

@@ -387,124 +387,162 @@ class ConfluenceConnector(
attachment_docs: list[Document] = []
page_url = ""
for attachment in self.confluence_client.paginated_cql_retrieval(
cql=attachment_query,
expand=",".join(_ATTACHMENT_EXPANSION_FIELDS),
):
media_type: str = attachment.get("metadata", {}).get("mediaType", "")
# TODO(rkuo): this check is partially redundant with validate_attachment_filetype
# and checks in convert_attachment_to_content/process_attachment
# but doing the check here avoids an unnecessary download. Due for refactoring.
if not self.allow_images:
if media_type.startswith("image/"):
logger.info(
f"Skipping attachment because allow images is False: {attachment['title']}"
)
continue
if not validate_attachment_filetype(
attachment,
try:
for attachment in self.confluence_client.paginated_cql_retrieval(
cql=attachment_query,
expand=",".join(_ATTACHMENT_EXPANSION_FIELDS),
):
logger.info(
f"Skipping attachment because it is not an accepted file type: {attachment['title']}"
)
continue
media_type: str = attachment.get("metadata", {}).get("mediaType", "")
logger.info(
f"Processing attachment: {attachment['title']} attached to page {page['title']}"
)
# Attachment document id: use the download URL for stable identity
try:
object_url = build_confluence_document_id(
self.wiki_base, attachment["_links"]["download"], self.is_cloud
)
except Exception as e:
logger.warning(
f"Invalid attachment url for id {attachment['id']}, skipping"
)
logger.debug(f"Error building attachment url: {e}")
continue
try:
response = convert_attachment_to_content(
confluence_client=self.confluence_client,
attachment=attachment,
page_id=page["id"],
allow_images=self.allow_images,
)
if response is None:
# TODO(rkuo): this check is partially redundant with validate_attachment_filetype
# and checks in convert_attachment_to_content/process_attachment
# but doing the check here avoids an unnecessary download. Due for refactoring.
if not self.allow_images:
if media_type.startswith("image/"):
logger.info(
f"Skipping attachment because allow images is False: {attachment['title']}"
)
continue
if not validate_attachment_filetype(
attachment,
):
logger.info(
f"Skipping attachment because it is not an accepted file type: {attachment['title']}"
)
continue
content_text, file_storage_name = response
logger.info(
f"Processing attachment: {attachment['title']} attached to page {page['title']}"
)
# Attachment document id: use the download URL for stable identity
try:
object_url = build_confluence_document_id(
self.wiki_base, attachment["_links"]["download"], self.is_cloud
)
except Exception as e:
logger.warning(
f"Invalid attachment url for id {attachment['id']}, skipping"
)
logger.debug(f"Error building attachment url: {e}")
continue
try:
response = convert_attachment_to_content(
confluence_client=self.confluence_client,
attachment=attachment,
page_id=page["id"],
allow_images=self.allow_images,
)
if response is None:
continue
sections: list[TextSection | ImageSection] = []
if content_text:
sections.append(TextSection(text=content_text, link=object_url))
elif file_storage_name:
sections.append(
ImageSection(link=object_url, image_file_id=file_storage_name)
content_text, file_storage_name = response
sections: list[TextSection | ImageSection] = []
if content_text:
sections.append(TextSection(text=content_text, link=object_url))
elif file_storage_name:
sections.append(
ImageSection(
link=object_url, image_file_id=file_storage_name
)
)
# Build attachment-specific metadata
attachment_metadata: dict[str, str | list[str]] = {}
if "space" in attachment:
attachment_metadata["space"] = attachment["space"].get(
"name", ""
)
labels: list[str] = []
if "metadata" in attachment and "labels" in attachment["metadata"]:
for label in attachment["metadata"]["labels"].get(
"results", []
):
labels.append(label.get("name", ""))
if labels:
attachment_metadata["labels"] = labels
page_url = page_url or build_confluence_document_id(
self.wiki_base, page["_links"]["webui"], self.is_cloud
)
attachment_metadata["parent_page_id"] = page_url
attachment_id = build_confluence_document_id(
self.wiki_base, attachment["_links"]["webui"], self.is_cloud
)
# Build attachment-specific metadata
attachment_metadata: dict[str, str | list[str]] = {}
if "space" in attachment:
attachment_metadata["space"] = attachment["space"].get("name", "")
labels: list[str] = []
if "metadata" in attachment and "labels" in attachment["metadata"]:
for label in attachment["metadata"]["labels"].get("results", []):
labels.append(label.get("name", ""))
if labels:
attachment_metadata["labels"] = labels
page_url = page_url or build_confluence_document_id(
self.wiki_base, page["_links"]["webui"], self.is_cloud
)
attachment_metadata["parent_page_id"] = page_url
attachment_id = build_confluence_document_id(
self.wiki_base, attachment["_links"]["webui"], self.is_cloud
)
primary_owners: list[BasicExpertInfo] | None = None
if "version" in attachment and "by" in attachment["version"]:
author = attachment["version"]["by"]
display_name = author.get("displayName", "Unknown")
email = author.get("email", "unknown@domain.invalid")
primary_owners = [
BasicExpertInfo(display_name=display_name, email=email)
]
primary_owners: list[BasicExpertInfo] | None = None
if "version" in attachment and "by" in attachment["version"]:
author = attachment["version"]["by"]
display_name = author.get("displayName", "Unknown")
email = author.get("email", "unknown@domain.invalid")
primary_owners = [
BasicExpertInfo(display_name=display_name, email=email)
]
attachment_doc = Document(
id=attachment_id,
sections=sections,
source=DocumentSource.CONFLUENCE,
semantic_identifier=attachment.get("title", object_url),
metadata=attachment_metadata,
doc_updated_at=(
datetime_from_string(attachment["version"]["when"])
if attachment.get("version")
and attachment["version"].get("when")
else None
),
primary_owners=primary_owners,
)
attachment_docs.append(attachment_doc)
except Exception as e:
logger.error(
f"Failed to extract/summarize attachment {attachment['title']}",
exc_info=e,
)
if is_atlassian_date_error(e):
# propagate error to be caught and retried
raise
attachment_failures.append(
ConnectorFailure(
failed_document=DocumentFailure(
document_id=object_url,
document_link=object_url,
),
failure_message=f"Failed to extract/summarize attachment {attachment['title']} for doc {object_url}",
exception=e,
)
)
except HTTPError as e:
# If we get a 403 after all retries, the user likely doesn't have permission
# to access attachments on this page. Log and skip rather than failing the whole job.
if e.response and e.response.status_code == 403:
page_title = page.get("title", "unknown")
page_id = page.get("id", "unknown")
logger.warning(
f"Permission denied (403) when fetching attachments for page '{page_title}' "
f"(ID: {page_id}). The user may not have permission to query attachments on this page. "
"Skipping attachments for this page."
)
# Build the page URL for the failure record
try:
page_url = build_confluence_document_id(
self.wiki_base, page["_links"]["webui"], self.is_cloud
)
except Exception:
page_url = f"page_id:{page_id}"
attachment_doc = Document(
id=attachment_id,
sections=sections,
source=DocumentSource.CONFLUENCE,
semantic_identifier=attachment.get("title", object_url),
metadata=attachment_metadata,
doc_updated_at=(
datetime_from_string(attachment["version"]["when"])
if attachment.get("version")
and attachment["version"].get("when")
else None
),
primary_owners=primary_owners,
)
attachment_docs.append(attachment_doc)
except Exception as e:
logger.error(
f"Failed to extract/summarize attachment {attachment['title']}",
exc_info=e,
)
if is_atlassian_date_error(e):
# propagate error to be caught and retried
raise
attachment_failures.append(
return [], [
ConnectorFailure(
failed_document=DocumentFailure(
document_id=object_url,
document_link=object_url,
document_id=page_id,
document_link=page_url,
),
failure_message=f"Failed to extract/summarize attachment {attachment['title']} for doc {object_url}",
failure_message=f"Permission denied (403) when fetching attachments for page '{page_title}'",
exception=e,
)
)
]
else:
raise
return attachment_docs, attachment_failures

View File

@@ -579,13 +579,18 @@ class OnyxConfluence:
while url_suffix:
logger.debug(f"Making confluence call to {url_suffix}")
try:
# Only pass params if they're not already in the URL to avoid duplicate
# params accumulating. Confluence's _links.next already includes these.
params = {}
if "body-format=" not in url_suffix:
params["body-format"] = "atlas_doc_format"
if "expand=" not in url_suffix:
params["expand"] = "body.atlas_doc_format"
raw_response = self.get(
path=url_suffix,
advanced_mode=True,
params={
"body-format": "atlas_doc_format",
"expand": "body.atlas_doc_format",
},
params=params,
)
except Exception as e:
logger.exception(f"Error in confluence call to {url_suffix}")

View File

@@ -26,7 +26,6 @@ from onyx.utils.logger import setup_logger
HUBSPOT_BASE_URL = "https://app.hubspot.com"
HUBSPOT_API_URL = "https://api.hubapi.com/integrations/v1/me"
# Available HubSpot object types
AVAILABLE_OBJECT_TYPES = {"tickets", "companies", "deals", "contacts"}
HUBSPOT_PAGE_SIZE = 100

View File

@@ -68,6 +68,10 @@ CONNECTOR_CLASS_MAP = {
module_path="onyx.connectors.slab.connector",
class_name="SlabConnector",
),
DocumentSource.CODA: ConnectorMapping(
module_path="onyx.connectors.coda.connector",
class_name="CodaConnector",
),
DocumentSource.NOTION: ConnectorMapping(
module_path="onyx.connectors.notion.connector",
class_name="NotionConnector",

View File

@@ -13,6 +13,7 @@ from enum import Enum
from typing import Any
from typing import cast
from urllib.parse import unquote
from urllib.parse import urlsplit
import msal # type: ignore[import-untyped]
import requests
@@ -727,46 +728,77 @@ class SharepointConnector(
return self._graph_client
@staticmethod
def _strip_share_link_tokens(path: str) -> list[str]:
# Share links often include a token prefix like /:f:/r/ or /:x:/r/.
segments = [segment for segment in path.split("/") if segment]
if segments and segments[0].startswith(":"):
segments = segments[1:]
if segments and segments[0] in {"r", "s", "g"}:
segments = segments[1:]
return segments
@staticmethod
def _normalize_sharepoint_url(url: str) -> tuple[str | None, list[str]]:
try:
parsed = urlsplit(url)
except ValueError:
logger.warning(f"Sharepoint URL '{url}' could not be parsed")
return None, []
if not parsed.scheme or not parsed.netloc:
logger.warning(
f"Sharepoint URL '{url}' is not a valid absolute URL (missing scheme or host)"
)
return None, []
path_segments = SharepointConnector._strip_share_link_tokens(parsed.path)
return f"{parsed.scheme}://{parsed.netloc}", path_segments
@staticmethod
def _extract_site_and_drive_info(site_urls: list[str]) -> list[SiteDescriptor]:
site_data_list = []
for url in site_urls:
parts = url.strip().split("/")
base_url, parts = SharepointConnector._normalize_sharepoint_url(url.strip())
if base_url is None:
continue
lower_parts = [part.lower() for part in parts]
site_type_index = None
if "sites" in parts:
site_type_index = parts.index("sites")
elif "teams" in parts:
site_type_index = parts.index("teams")
for site_token in ("sites", "teams"):
if site_token in lower_parts:
site_type_index = lower_parts.index(site_token)
break
if site_type_index is not None:
# Extract the base site URL (up to and including the site/team name)
site_url = "/".join(parts[: site_type_index + 2])
remaining_parts = parts[site_type_index + 2 :]
if site_type_index is None or len(parts) <= site_type_index + 1:
logger.warning(
f"Site URL '{url}' is not a valid Sharepoint URL (must contain /sites/<name> or /teams/<name>)"
)
continue
# Extract drive name and folder path
if remaining_parts:
drive_name = unquote(remaining_parts[0])
folder_path = (
"/".join(unquote(part) for part in remaining_parts[1:])
if len(remaining_parts) > 1
else None
)
else:
drive_name = None
folder_path = None
site_path = parts[: site_type_index + 2]
remaining_parts = parts[site_type_index + 2 :]
site_url = f"{base_url}/" + "/".join(site_path)
site_data_list.append(
SiteDescriptor(
url=site_url,
drive_name=drive_name,
folder_path=folder_path,
)
# Extract drive name and folder path
if remaining_parts:
drive_name = unquote(remaining_parts[0])
folder_path = (
"/".join(unquote(part) for part in remaining_parts[1:])
if len(remaining_parts) > 1
else None
)
else:
logger.warning(
f"Site URL '{url}' is not a valid Sharepoint URL (must contain /sites/ or /teams/)"
drive_name = None
folder_path = None
site_data_list.append(
SiteDescriptor(
url=site_url,
drive_name=drive_name,
folder_path=folder_path,
)
)
return site_data_list
def _get_drive_items_for_drive_name(

View File

@@ -99,7 +99,9 @@ DEFAULT_HEADERS = {
"image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.7"
),
"Accept-Language": "en-US,en;q=0.9",
"Accept-Encoding": "gzip, deflate, br",
# Brotli decoding has been flaky in brotlicffi/httpx for certain chunked responses;
# stick to gzip/deflate to keep connectivity checks stable.
"Accept-Encoding": "gzip, deflate",
"Connection": "keep-alive",
"Upgrade-Insecure-Requests": "1",
"Sec-Fetch-Dest": "document",
@@ -349,10 +351,13 @@ def start_playwright() -> Tuple[Playwright, BrowserContext]:
def extract_urls_from_sitemap(sitemap_url: str) -> list[str]:
# requests should handle brotli compression automatically
# as long as the brotli package is available in the venv. Leaving this line here to avoid
# a regression as someone says "Ah, looks like this brotli package isn't used anywhere, let's remove it"
# import brotli
try:
response = requests.get(sitemap_url, headers=DEFAULT_HEADERS)
response.raise_for_status()
soup = BeautifulSoup(response.content, "html.parser")
urls = [
_ensure_absolute_url(sitemap_url, loc_tag.text)

View File

@@ -20,6 +20,11 @@ class OptionalSearchSetting(str, Enum):
AUTO = "auto"
class QueryType(str, Enum):
KEYWORD = "keyword"
SEMANTIC = "semantic"
class SearchType(str, Enum):
KEYWORD = "keyword"
SEMANTIC = "semantic"

View File

@@ -1,7 +1,19 @@
from datetime import datetime
from typing import TypedDict
from pydantic import BaseModel
from onyx.onyxbot.slack.models import ChannelType
class ChannelMetadata(TypedDict):
"""Type definition for cached channel metadata."""
name: str
type: ChannelType
is_private: bool
is_member: bool
class SlackMessage(BaseModel):
document_id: str

View File

@@ -3,6 +3,7 @@ import re
import time
from datetime import datetime
from typing import Any
from typing import cast
from pydantic import BaseModel
from pydantic import ConfigDict
@@ -15,11 +16,11 @@ from onyx.configs.app_configs import ENABLE_CONTEXTUAL_RAG
from onyx.configs.chat_configs import DOC_TIME_DECAY
from onyx.connectors.models import IndexingDocument
from onyx.connectors.models import TextSection
from onyx.context.search.federated.models import ChannelMetadata
from onyx.context.search.federated.models import SlackMessage
from onyx.context.search.federated.slack_search_utils import ALL_CHANNEL_TYPES
from onyx.context.search.federated.slack_search_utils import build_channel_query_filter
from onyx.context.search.federated.slack_search_utils import build_slack_queries
from onyx.context.search.federated.slack_search_utils import ChannelTypeString
from onyx.context.search.federated.slack_search_utils import get_channel_type
from onyx.context.search.federated.slack_search_utils import (
get_channel_type_for_missing_scope,
@@ -62,7 +63,7 @@ CHANNEL_METADATA_RETRY_DELAY = 1 # Initial retry delay in seconds (exponential
def fetch_and_cache_channel_metadata(
access_token: str, team_id: str, include_private: bool = True
) -> dict[str, dict[str, Any]]:
) -> dict[str, ChannelMetadata]:
"""
Fetch ALL channel metadata in one API call and cache it.
@@ -84,13 +85,13 @@ def fetch_and_cache_channel_metadata(
cached_str: str = (
cached.decode("utf-8") if isinstance(cached, bytes) else str(cached)
)
cached_data: dict[str, dict[str, Any]] = json.loads(cached_str)
cached_data = cast(dict[str, ChannelMetadata], json.loads(cached_str))
logger.debug(f"Loaded {len(cached_data)} channels from cache")
if not include_private:
filtered = {
filtered: dict[str, ChannelMetadata] = {
k: v
for k, v in cached_data.items()
if v.get("type") != "private_channel"
if v.get("type") != ChannelType.PRIVATE_CHANNEL.value
}
logger.debug(f"Filtered to {len(filtered)} channels (exclude private)")
return filtered
@@ -101,7 +102,7 @@ def fetch_and_cache_channel_metadata(
# Cache miss - fetch from Slack API with retry logic
logger.debug(f"Channel metadata cache MISS for team {team_id} - fetching from API")
slack_client = WebClient(token=access_token)
channel_metadata: dict[str, dict[str, Any]] = {}
channel_metadata: dict[str, ChannelMetadata] = {}
# Retry logic with exponential backoff
last_exception = None
@@ -133,7 +134,7 @@ def fetch_and_cache_channel_metadata(
# Determine channel type
channel_type_enum = get_channel_type(channel_info=ch)
channel_type = channel_type_enum.value
channel_type = ChannelType(channel_type_enum.value)
channel_metadata[channel_id] = {
"name": ch.get("name", ""),
@@ -326,7 +327,7 @@ def batch_get_user_profiles(
def _extract_channel_data_from_entities(
entities: dict[str, Any] | None,
channel_metadata_dict: dict[str, dict[str, Any]] | None,
channel_metadata_dict: dict[str, ChannelMetadata] | None,
) -> list[str] | None:
"""Extract available channels list from metadata based on entity configuration.
@@ -351,7 +352,7 @@ def _extract_channel_data_from_entities(
if meta["name"]
and (
parsed_entities.include_private_channels
or meta.get("type") != ChannelTypeString.PRIVATE_CHANNEL.value
or meta.get("type") != ChannelType.PRIVATE_CHANNEL.value
)
]
except ValidationError:
@@ -366,7 +367,7 @@ def _should_skip_channel(
bot_token: str | None,
access_token: str,
include_dm: bool,
channel_metadata_dict: dict[str, dict[str, Any]] | None = None,
channel_metadata_dict: dict[str, ChannelMetadata] | None = None,
) -> bool:
"""Bot context filtering: skip private channels unless explicitly allowed.
@@ -430,7 +431,7 @@ def query_slack(
include_dm: bool = False,
entities: dict[str, Any] | None = None,
available_channels: list[str] | None = None,
channel_metadata_dict: dict[str, dict[str, Any]] | None = None,
channel_metadata_dict: dict[str, ChannelMetadata] | None = None,
) -> SlackQueryResult:
# Check if query has channel override (user specified channels in query)
@@ -450,7 +451,7 @@ def query_slack(
# Add channel filter to query
final_query = f"{query_string} {channel_filter}"
logger.debug(f"Final query to slack: {final_query}")
logger.info(f"Final query to slack: {final_query}")
# Detect if query asks for most recent results
sort_by_time = is_recency_query(original_query.query)
@@ -474,7 +475,7 @@ def query_slack(
messages: dict[str, Any] = response.get("messages", {})
matches: list[dict[str, Any]] = messages.get("matches", [])
logger.debug(f"Slack search found {len(matches)} messages")
logger.info(f"Slack search found {len(matches)} messages")
except SlackApiError as slack_error:
logger.error(f"Slack API error in search_messages: {slack_error}")
logger.error(

View File

@@ -4,17 +4,16 @@ import re
from datetime import datetime
from datetime import timedelta
from datetime import timezone
from enum import Enum
from typing import Any
from langchain_core.messages import HumanMessage
from pydantic import ValidationError
from onyx.configs.app_configs import MAX_SLACK_QUERY_EXPANSIONS
from onyx.context.search.federated.models import ChannelMetadata
from onyx.context.search.models import ChunkIndexRequest
from onyx.federated_connectors.slack.models import SlackEntities
from onyx.llm.interfaces import LLM
from onyx.llm.utils import message_to_string
from onyx.llm.utils import llm_response_to_string
from onyx.onyxbot.slack.models import ChannelType
from onyx.prompts.federated_search import SLACK_DATE_EXTRACTION_PROMPT
from onyx.prompts.federated_search import SLACK_QUERY_EXPANSION_PROMPT
@@ -34,35 +33,25 @@ WORD_PUNCTUATION = ".,!?;:\"'#"
RECENCY_KEYWORDS = ["recent", "latest", "newest", "last"]
class ChannelTypeString(str, Enum):
"""String representations of Slack channel types."""
IM = "im"
MPIM = "mpim"
PRIVATE_CHANNEL = "private_channel"
PUBLIC_CHANNEL = "public_channel"
# All Slack channel types for fetching metadata
ALL_CHANNEL_TYPES = [
ChannelTypeString.PUBLIC_CHANNEL.value,
ChannelTypeString.IM.value,
ChannelTypeString.MPIM.value,
ChannelTypeString.PRIVATE_CHANNEL.value,
ChannelType.PUBLIC_CHANNEL.value,
ChannelType.IM.value,
ChannelType.MPIM.value,
ChannelType.PRIVATE_CHANNEL.value,
]
# Map Slack API scopes to their corresponding channel types
# This is used for graceful degradation when scopes are missing
SCOPE_TO_CHANNEL_TYPE_MAP = {
"mpim:read": ChannelTypeString.MPIM.value,
"mpim:history": ChannelTypeString.MPIM.value,
"im:read": ChannelTypeString.IM.value,
"im:history": ChannelTypeString.IM.value,
"groups:read": ChannelTypeString.PRIVATE_CHANNEL.value,
"groups:history": ChannelTypeString.PRIVATE_CHANNEL.value,
"channels:read": ChannelTypeString.PUBLIC_CHANNEL.value,
"channels:history": ChannelTypeString.PUBLIC_CHANNEL.value,
"mpim:read": ChannelType.MPIM.value,
"mpim:history": ChannelType.MPIM.value,
"im:read": ChannelType.IM.value,
"im:history": ChannelType.IM.value,
"groups:read": ChannelType.PRIVATE_CHANNEL.value,
"groups:history": ChannelType.PRIVATE_CHANNEL.value,
"channels:read": ChannelType.PUBLIC_CHANNEL.value,
"channels:history": ChannelType.PUBLIC_CHANNEL.value,
}
@@ -201,9 +190,7 @@ def extract_date_range_from_query(
try:
prompt = SLACK_DATE_EXTRACTION_PROMPT.format(query=query)
response = message_to_string(
llm.invoke_langchain([HumanMessage(content=prompt)])
)
response = llm_response_to_string(llm.invoke(prompt))
response_clean = _parse_llm_code_block_response(response)
@@ -334,7 +321,7 @@ def build_channel_query_filter(
def get_channel_type(
channel_info: dict[str, Any] | None = None,
channel_id: str | None = None,
channel_metadata: dict[str, dict[str, Any]] | None = None,
channel_metadata: dict[str, ChannelMetadata] | None = None,
) -> ChannelType:
"""
Determine channel type from channel info dict or by looking up channel_id.
@@ -361,11 +348,11 @@ def get_channel_type(
ch_meta = channel_metadata.get(channel_id)
if ch_meta:
type_str = ch_meta.get("type")
if type_str == ChannelTypeString.IM.value:
if type_str == ChannelType.IM.value:
return ChannelType.IM
elif type_str == ChannelTypeString.MPIM.value:
elif type_str == ChannelType.MPIM.value:
return ChannelType.MPIM
elif type_str == ChannelTypeString.PRIVATE_CHANNEL.value:
elif type_str == ChannelType.PRIVATE_CHANNEL.value:
return ChannelType.PRIVATE_CHANNEL
return ChannelType.PUBLIC_CHANNEL
@@ -594,9 +581,7 @@ def expand_query_with_llm(query_text: str, llm: LLM) -> list[str]:
)
try:
response = message_to_string(
llm.invoke_langchain([HumanMessage(content=prompt)])
)
response = llm_response_to_string(llm.invoke(prompt))
response_clean = _parse_llm_code_block_response(response)

View File

@@ -1,7 +1,7 @@
An explanation of how the history of messages, tool calls, and docs are stored in the database:
Messages are grouped by a chat session, a tree structured is used to allow edits and for the
user to switch between branches. Each ChatMessage is either a user message of an assistant message.
user to switch between branches. Each ChatMessage is either a user message or an assistant message.
It should always alternate between the two, System messages, custom agent prompt injections, and
reminder messages are injected dynamically after the chat session is loaded into memory. The user
and assistant messages are stored in pairs, though it is ok if the user message is stored and the

View File

@@ -17,8 +17,10 @@ from sqlalchemy.orm import joinedload
from sqlalchemy.orm import selectinload
from sqlalchemy.orm import Session
from onyx.configs.app_configs import AUTH_TYPE
from onyx.configs.app_configs import DISABLE_AUTH
from onyx.configs.app_configs import USER_FILE_INDEXING_LIMIT
from onyx.configs.constants import AuthType
from onyx.configs.constants import DocumentSource
from onyx.db.connector import fetch_connector_by_id
from onyx.db.credentials import fetch_credential_by_id
@@ -445,10 +447,12 @@ def set_cc_pair_repeated_error_state(
values: dict = {"in_repeated_error_state": in_repeated_error_state}
# When entering repeated error state, also pause the connector
# to prevent continued indexing retry attempts.
# to prevent continued indexing retry attempts burning through embedding credits.
# However, don't pause if there's an active manual indexing trigger,
# which indicates the user wants to retry immediately.
if in_repeated_error_state:
# NOTE: only for Cloud, since most self-hosted users use self-hosted embedding
# models. Also, they are more prone to repeated failures -> eventual success.
if in_repeated_error_state and AUTH_TYPE == AuthType.CLOUD:
cc_pair = get_connector_credential_pair_from_id(
db_session=db_session,
cc_pair_id=cc_pair_id,

View File

@@ -251,15 +251,27 @@ def upsert_llm_provider(
db_session.flush()
# Import here to avoid circular imports
from onyx.llm.utils import get_max_input_tokens
for model_configuration in llm_provider_upsert_request.model_configurations:
# If max_input_tokens is not provided, look it up from LiteLLM
max_input_tokens = model_configuration.max_input_tokens
if max_input_tokens is None:
max_input_tokens = get_max_input_tokens(
model_name=model_configuration.name,
model_provider=llm_provider_upsert_request.provider,
)
db_session.execute(
insert(ModelConfiguration)
.values(
llm_provider_id=existing_llm_provider.id,
name=model_configuration.name,
is_visible=model_configuration.is_visible,
max_input_tokens=model_configuration.max_input_tokens,
max_input_tokens=max_input_tokens,
supports_image_input=model_configuration.supports_image_input,
display_name=model_configuration.display_name,
)
.on_conflict_do_nothing()
)
@@ -289,6 +301,56 @@ def upsert_llm_provider(
return full_llm_provider
def sync_model_configurations(
db_session: Session,
provider_name: str,
models: list[dict],
) -> int:
"""Sync model configurations for a dynamic provider (OpenRouter, Bedrock, Ollama).
This inserts NEW models from the source API without overwriting existing ones.
User preferences (is_visible, max_input_tokens) are preserved for existing models.
Args:
db_session: Database session
provider_name: Name of the LLM provider
models: List of model dicts with keys: name, display_name, max_input_tokens, supports_image_input
Returns:
Number of new models added
"""
provider = fetch_existing_llm_provider(name=provider_name, db_session=db_session)
if not provider:
raise ValueError(f"LLM Provider '{provider_name}' not found")
# Get existing model names to count new additions
existing_names = {mc.name for mc in provider.model_configurations}
new_count = 0
for model in models:
model_name = model["name"]
if model_name not in existing_names:
# Insert new model with is_visible=False (user must explicitly enable)
db_session.execute(
insert(ModelConfiguration)
.values(
llm_provider_id=provider.id,
name=model_name,
is_visible=False,
max_input_tokens=model.get("max_input_tokens"),
supports_image_input=model.get("supports_image_input", False),
display_name=model.get("display_name"),
)
.on_conflict_do_nothing()
)
new_count += 1
if new_count > 0:
db_session.commit()
return new_count
def fetch_existing_embedding_providers(
db_session: Session,
) -> list[CloudEmbeddingProviderModel]:

View File

@@ -2141,6 +2141,8 @@ class ChatMessage(Base):
time_sent: Mapped[datetime.datetime] = mapped_column(
DateTime(timezone=True), server_default=func.now()
)
# True if this assistant message is a clarification question (deep research flow)
is_clarification: Mapped[bool] = mapped_column(Boolean, default=False)
# Relationships
chat_session: Mapped[ChatSession] = relationship("ChatSession")
@@ -2437,6 +2439,11 @@ class ModelConfiguration(Base):
supports_image_input: Mapped[bool | None] = mapped_column(Boolean, nullable=True)
# Human-readable display name for the model.
# For dynamic providers (OpenRouter, Bedrock, Ollama), this comes from the source API.
# For static providers (OpenAI, Anthropic), this may be null and will fall back to LiteLLM.
display_name: Mapped[str | None] = mapped_column(String, nullable=True)
llm_provider: Mapped["LLMProvider"] = relationship(
"LLMProvider",
back_populates="model_configurations",

View File

@@ -41,7 +41,7 @@ from onyx.server.features.persona.models import MinimalPersonaSnapshot
from onyx.server.features.persona.models import PersonaSharedNotificationData
from onyx.server.features.persona.models import PersonaSnapshot
from onyx.server.features.persona.models import PersonaUpsertRequest
from onyx.server.features.tool.models import should_expose_tool_to_fe
from onyx.server.features.tool.tool_visibility import should_expose_tool_to_fe
from onyx.utils.logger import setup_logger
from onyx.utils.variable_functionality import fetch_versioned_implementation
@@ -416,6 +416,9 @@ def get_persona_snapshots_for_user(
selectinload(Persona.labels),
selectinload(Persona.document_sets),
selectinload(Persona.user),
selectinload(Persona.user_files),
selectinload(Persona.users),
selectinload(Persona.groups),
)
results = db_session.scalars(stmt).all()

View File

@@ -4,6 +4,7 @@ from sqlalchemy import and_
from sqlalchemy import delete
from sqlalchemy import or_
from sqlalchemy import select
from sqlalchemy.dialects.postgresql import insert as pg_insert
from sqlalchemy.orm import Session
from onyx.configs.constants import DocumentSource
@@ -43,17 +44,26 @@ def create_or_add_document_tag(
if not document:
raise ValueError("Invalid Document, cannot attach Tags")
# Use upsert to avoid race condition when multiple workers try to create the same tag
insert_stmt = pg_insert(Tag).values(
tag_key=tag_key,
tag_value=tag_value,
source=source,
is_list=False,
)
insert_stmt = insert_stmt.on_conflict_do_nothing(
index_elements=["tag_key", "tag_value", "source", "is_list"]
)
db_session.execute(insert_stmt)
# Now fetch the tag (either just inserted or already existed)
tag_stmt = select(Tag).where(
Tag.tag_key == tag_key,
Tag.tag_value == tag_value,
Tag.source == source,
Tag.is_list.is_(False),
)
tag = db_session.execute(tag_stmt).scalar_one_or_none()
if not tag:
tag = Tag(tag_key=tag_key, tag_value=tag_value, source=source, is_list=False)
db_session.add(tag)
tag = db_session.execute(tag_stmt).scalar_one()
if tag not in document.tags:
document.tags.append(tag)
@@ -79,31 +89,27 @@ def create_or_add_document_tag_list(
if not document:
raise ValueError("Invalid Document, cannot attach Tags")
existing_tags_stmt = select(Tag).where(
# Use upsert to avoid race condition when multiple workers try to create the same tags
for tag_value in valid_tag_values:
insert_stmt = pg_insert(Tag).values(
tag_key=tag_key,
tag_value=tag_value,
source=source,
is_list=True,
)
insert_stmt = insert_stmt.on_conflict_do_nothing(
index_elements=["tag_key", "tag_value", "source", "is_list"]
)
db_session.execute(insert_stmt)
# Now fetch all tags (either just inserted or already existed)
all_tags_stmt = select(Tag).where(
Tag.tag_key == tag_key,
Tag.tag_value.in_(valid_tag_values),
Tag.source == source,
Tag.is_list.is_(True),
)
existing_tags = list(db_session.execute(existing_tags_stmt).scalars().all())
existing_tag_values = {tag.tag_value for tag in existing_tags}
new_tags = []
for tag_value in valid_tag_values:
if tag_value not in existing_tag_values:
new_tag = Tag(
tag_key=tag_key, tag_value=tag_value, source=source, is_list=True
)
db_session.add(new_tag)
new_tags.append(new_tag)
existing_tag_values.add(tag_value)
if new_tags:
logger.debug(
f"Created new tags: {', '.join([f'{tag.tag_key}:{tag.tag_value}' for tag in new_tags])}"
)
all_tags = existing_tags + new_tags
all_tags = list(db_session.execute(all_tags_stmt).scalars().all())
for tag in all_tags:
if tag not in document.tags:

View File

@@ -4,6 +4,7 @@ from typing import Type
from typing import TYPE_CHECKING
from uuid import UUID
from sqlalchemy import func
from sqlalchemy import or_
from sqlalchemy import select
from sqlalchemy.orm import Session
@@ -49,7 +50,12 @@ def get_tools(
query = query.where(Tool.enabled.is_(True))
if only_openapi:
query = query.where(Tool.openapi_schema.is_not(None))
query = query.where(
Tool.openapi_schema.is_not(None),
# To avoid showing rows that have JSON literal `null` stored in the column to the user.
# tools from mcp servers will not have an openapi schema but it has `null`, so we need to exclude them.
func.jsonb_typeof(Tool.openapi_schema) == "object",
)
return list(db_session.scalars(query).all())

View File

View File

@@ -0,0 +1,254 @@
# TODO: Notes for potential extensions and future improvements:
# 1. Allow tools that aren't search specific tools
# 2. Use user provided custom prompts
from collections.abc import Callable
from typing import cast
from sqlalchemy.orm import Session
from onyx.chat.chat_state import ChatStateContainer
from onyx.chat.citation_processor import DynamicCitationProcessor
from onyx.chat.emitter import Emitter
from onyx.chat.llm_loop import construct_message_history
from onyx.chat.llm_step import run_llm_step
from onyx.chat.models import ChatMessageSimple
from onyx.chat.models import LlmStepResult
from onyx.configs.constants import MessageType
from onyx.deep_research.dr_mock_tools import get_clarification_tool_definitions
from onyx.llm.interfaces import LLM
from onyx.llm.interfaces import LLMUserIdentity
from onyx.llm.models import ToolChoiceOptions
from onyx.llm.utils import model_is_reasoning_model
from onyx.prompts.deep_research.orchestration_layer import CLARIFICATION_PROMPT
from onyx.prompts.deep_research.orchestration_layer import ORCHESTRATOR_PROMPT
from onyx.prompts.deep_research.orchestration_layer import ORCHESTRATOR_PROMPT_REASONING
from onyx.prompts.deep_research.orchestration_layer import RESEARCH_PLAN_PROMPT
from onyx.prompts.prompt_utils import get_current_llm_day_time
from onyx.server.query_and_chat.streaming_models import AgentResponseDelta
from onyx.server.query_and_chat.streaming_models import AgentResponseStart
from onyx.server.query_and_chat.streaming_models import DeepResearchPlanDelta
from onyx.server.query_and_chat.streaming_models import DeepResearchPlanStart
from onyx.server.query_and_chat.streaming_models import OverallStop
from onyx.server.query_and_chat.streaming_models import Packet
from onyx.tools.tool import Tool
from onyx.tools.tool_implementations.open_url.open_url_tool import OpenURLTool
from onyx.tools.tool_implementations.search.search_tool import SearchTool
from onyx.tools.tool_implementations.web_search.web_search_tool import WebSearchTool
from onyx.utils.logger import setup_logger
logger = setup_logger()
MAX_USER_MESSAGES_FOR_CONTEXT = 5
MAX_ORCHESTRATOR_CYCLES = 8
def run_deep_research_llm_loop(
emitter: Emitter,
state_container: ChatStateContainer,
simple_chat_history: list[ChatMessageSimple],
tools: list[Tool],
custom_agent_prompt: str | None,
llm: LLM,
token_counter: Callable[[str], int],
db_session: Session,
skip_clarification: bool = False,
user_identity: LLMUserIdentity | None = None,
) -> None:
# Here for lazy load LiteLLM
from onyx.llm.litellm_singleton.config import initialize_litellm
# An approximate limit. In extreme cases it may still fail but this should allow deep research
# to work in most cases.
if llm.config.max_input_tokens < 25000:
raise RuntimeError(
"Cannot run Deep Research with an LLM that has less than 25,000 max input tokens"
)
initialize_litellm()
available_tokens = llm.config.max_input_tokens
llm_step_result: LlmStepResult | None = None
# Filter tools to only allow web search, internal search, and open URL
allowed_tool_names = {SearchTool.NAME, WebSearchTool.NAME, OpenURLTool.NAME}
[tool for tool in tools if tool.name in allowed_tool_names]
#########################################################
# CLARIFICATION STEP (optional)
#########################################################
if not skip_clarification:
clarification_prompt = CLARIFICATION_PROMPT.format(
current_datetime=get_current_llm_day_time(full_sentence=False)
)
system_prompt = ChatMessageSimple(
message=clarification_prompt,
token_count=300, # Skips the exact token count but has enough leeway
message_type=MessageType.SYSTEM,
)
truncated_message_history = construct_message_history(
system_prompt=system_prompt,
custom_agent_prompt=None,
simple_chat_history=simple_chat_history,
reminder_message=None,
project_files=None,
available_tokens=available_tokens,
last_n_user_messages=MAX_USER_MESSAGES_FOR_CONTEXT,
)
step_generator = run_llm_step(
history=truncated_message_history,
tool_definitions=get_clarification_tool_definitions(),
tool_choice=ToolChoiceOptions.AUTO,
llm=llm,
turn_index=0,
# No citations in this step, it should just pass through all
# tokens directly so initialized as an empty citation processor
citation_processor=DynamicCitationProcessor(),
state_container=state_container,
final_documents=None,
user_identity=user_identity,
)
# Consume the generator, emitting packets and capturing the final result
while True:
try:
packet = next(step_generator)
emitter.emit(packet)
except StopIteration as e:
llm_step_result, _ = e.value
break
# Type narrowing: generator always returns a result, so this can't be None
llm_step_result = cast(LlmStepResult, llm_step_result)
if not llm_step_result.tool_calls:
# Mark this turn as a clarification question
state_container.set_is_clarification(True)
emitter.emit(Packet(turn_index=0, obj=OverallStop(type="stop")))
# If a clarification is asked, we need to end this turn and wait on user input
return
#########################################################
# RESEARCH PLAN STEP
#########################################################
system_prompt = ChatMessageSimple(
message=RESEARCH_PLAN_PROMPT.format(
current_datetime=get_current_llm_day_time(full_sentence=False)
),
token_count=300,
message_type=MessageType.SYSTEM,
)
truncated_message_history = construct_message_history(
system_prompt=system_prompt,
custom_agent_prompt=None,
simple_chat_history=simple_chat_history,
reminder_message=None,
project_files=None,
available_tokens=available_tokens,
last_n_user_messages=MAX_USER_MESSAGES_FOR_CONTEXT,
)
research_plan_generator = run_llm_step(
history=truncated_message_history,
tool_definitions=[],
tool_choice=ToolChoiceOptions.NONE,
llm=llm,
turn_index=0,
# No citations in this step, it should just pass through all
# tokens directly so initialized as an empty citation processor
citation_processor=DynamicCitationProcessor(),
state_container=state_container,
final_documents=None,
user_identity=user_identity,
)
while True:
try:
packet = next(research_plan_generator)
# Translate AgentResponseStart/Delta packets to DeepResearchPlanStart/Delta
if isinstance(packet.obj, AgentResponseStart):
emitter.emit(
Packet(
turn_index=packet.turn_index,
obj=DeepResearchPlanStart(),
)
)
elif isinstance(packet.obj, AgentResponseDelta):
emitter.emit(
Packet(
turn_index=packet.turn_index,
obj=DeepResearchPlanDelta(content=packet.obj.content),
)
)
else:
# Pass through other packet types (e.g., ReasoningStart, ReasoningDelta, etc.)
emitter.emit(packet)
except StopIteration as e:
llm_step_result, _ = e.value
break
llm_step_result = cast(LlmStepResult, llm_step_result)
research_plan = llm_step_result.answer
#########################################################
# RESEARCH EXECUTION STEP
#########################################################
is_reasoning_model = model_is_reasoning_model(
llm.config.model_name, llm.config.model_provider
)
orchestrator_prompt_template = (
ORCHESTRATOR_PROMPT if not is_reasoning_model else ORCHESTRATOR_PROMPT_REASONING
)
token_count_prompt = orchestrator_prompt_template.format(
current_datetime=get_current_llm_day_time(full_sentence=False),
current_cycle_count=1,
max_cycles=MAX_ORCHESTRATOR_CYCLES,
research_plan=research_plan,
)
orchestration_tokens = token_counter(token_count_prompt)
for cycle in range(MAX_ORCHESTRATOR_CYCLES):
orchestrator_prompt = orchestrator_prompt_template.format(
current_datetime=get_current_llm_day_time(full_sentence=False),
current_cycle_count=cycle,
max_cycles=MAX_ORCHESTRATOR_CYCLES,
research_plan=research_plan,
)
system_prompt = ChatMessageSimple(
message=orchestrator_prompt,
token_count=orchestration_tokens,
message_type=MessageType.SYSTEM,
)
truncated_message_history = construct_message_history(
system_prompt=system_prompt,
custom_agent_prompt=None,
simple_chat_history=simple_chat_history,
reminder_message=None,
project_files=None,
available_tokens=available_tokens,
last_n_user_messages=MAX_USER_MESSAGES_FOR_CONTEXT,
)
research_plan_generator = run_llm_step(
history=truncated_message_history,
tool_definitions=[],
tool_choice=ToolChoiceOptions.AUTO,
llm=llm,
turn_index=cycle,
# No citations in this step, it should just pass through all
# tokens directly so initialized as an empty citation processor
citation_processor=DynamicCitationProcessor(),
state_container=state_container,
final_documents=None,
user_identity=user_identity,
)

View File

@@ -0,0 +1,18 @@
GENERATE_PLAN_TOOL_NAME = "generate_plan"
def get_clarification_tool_definitions() -> list[dict]:
return [
{
"type": "function",
"function": {
"name": GENERATE_PLAN_TOOL_NAME,
"description": "No clarification needed, generate a research plan for the user's query.",
"parameters": {
"type": "object",
"properties": {},
"required": [],
},
},
}
]

View File

@@ -0,0 +1,325 @@
import abc
from collections.abc import Iterator
from typing import Any
from pydantic import BaseModel
from onyx.access.models import DocumentAccess
from onyx.context.search.enums import QueryType
from onyx.context.search.models import IndexFilters
from onyx.context.search.models import InferenceChunk
from onyx.db.enums import EmbeddingPrecision
from onyx.indexing.models import DocMetadataAwareIndexChunk
from shared_configs.model_server_models import Embedding
# NOTE: "Document" in the naming convention is used to refer to the entire document as represented in Onyx.
# What is actually stored in the index is the document chunks. By the terminology of most search engines / vector
# databases, the individual objects stored are called documents, but in this case it refers to a chunk.
# Outside of searching and update capabilities, the document index must also implement the ability to port all of
# the documents over to a secondary index. This allows for embedding models to be updated and for porting documents
# to happen in the background while the primary index still serves the main traffic.
__all__ = [
# Main interfaces - these are what you should inherit from
"DocumentIndex",
# Data models - used in method signatures
"DocumentInsertionRecord",
"DocumentSectionRequest",
"IndexingMetadata",
"MetadataUpdateRequest",
# Capability mixins - for custom compositions or type checking
"SchemaVerifiable",
"Indexable",
"Deletable",
"Updatable",
"IdRetrievalCapable",
"HybridCapable",
"RandomCapable",
]
class DocumentInsertionRecord(BaseModel):
"""
Result of indexing a document
"""
model_config = {"frozen": True}
document_id: str
already_existed: bool
class DocumentSectionRequest(BaseModel):
"""
Request for a document section or whole document
If no min_chunk_ind is provided it should start at the beginning of the document
If no max_chunk_ind is provided it should go to the end of the document
"""
model_config = {"frozen": True}
document_id: str
min_chunk_ind: int | None = None
max_chunk_ind: int | None = None
class IndexingMetadata(BaseModel):
"""
Information about chunk counts for efficient cleaning / updating of document chunks. A common pattern to ensure
that no chunks are left over is to delete all of the chunks for a document and then re-index the document. This
information allows us to only delete the extra "tail" chunks when the document has gotten shorter.
"""
# The tuple is (old_chunk_cnt, new_chunk_cnt)
doc_id_to_chunk_cnt_diff: dict[str, tuple[int, int]]
class MetadataUpdateRequest(BaseModel):
"""
Updates to the documents that can happen without there being an update to the contents of the document.
"""
document_ids: list[str]
# Passed in to help with potential optimizations of the implementation
doc_id_to_chunk_cnt: dict[str, int]
# For the ones that are None, there is no update required to that field
access: DocumentAccess | None = None
document_sets: set[str] | None = None
boost: float | None = None
hidden: bool | None = None
secondary_index_updated: bool | None = None
project_ids: set[int] | None = None
class SchemaVerifiable(abc.ABC):
"""
Class must implement document index schema verification. For example, verify that all of the
necessary attributes for indexing, querying, filtering, and fields to return from search are
all valid in the schema.
"""
def __init__(
self,
index_name: str,
tenant_id: int | None,
*args: Any,
**kwargs: Any,
) -> None:
super().__init__(*args, **kwargs)
self.index_name = index_name
self.tenant_id = tenant_id
@abc.abstractmethod
def verify_and_create_index_if_necessary(
self,
embedding_dim: int,
embedding_precision: EmbeddingPrecision,
) -> None:
"""
Verify that the document index exists and is consistent with the expectations in the code. For certain search
engines, the schema needs to be created before indexing can happen. This call should create the schema if it
does not exist.
Parameters:
- embedding_dim: Vector dimensionality for the vector similarity part of the search
- embedding_precision: Precision of the vector similarity part of the search
"""
raise NotImplementedError
class Indexable(abc.ABC):
"""
Class must implement the ability to index document chunks
"""
@abc.abstractmethod
def index(
self,
chunks: Iterator[DocMetadataAwareIndexChunk],
indexing_metadata: IndexingMetadata,
) -> set[DocumentInsertionRecord]:
"""
Takes a list of document chunks and indexes them in the document index. This is often a batch operation
including chunks from multiple documents.
NOTE: When a document is reindexed/updated here and has gotten shorter, it is important to delete the extra
chunks at the end to ensure there are no stale chunks in the index.
NOTE: The chunks of a document are never separated into separate index() calls. So there is
no worry of receiving the first 0 through n chunks in one index call and the next n through
m chunks of a document in the next index call.
Parameters:
- chunks: Document chunks with all of the information needed for indexing to the document index.
- indexing_metadata: Information about chunk counts for efficient cleaning / updating
Returns:
List of document ids which map to unique documents and are used for deduping chunks
when updating, as well as if the document is newly indexed or already existed and
just updated
"""
raise NotImplementedError
class Deletable(abc.ABC):
"""
Class must implement the ability to delete document by a given unique document id. Note that the document id is the
unique identifier for the document as represented in Onyx, not in the document index.
"""
@abc.abstractmethod
def delete(
self,
db_doc_id: str,
*,
# Passed in in case it helps the efficiency of the delete implementation
chunk_count: int | None,
) -> int:
"""
Given a single document, hard delete all of the chunks for the document from the document index
Parameters:
- doc_id: document id as represented in Onyx
- chunk_count: number of chunks in the document
Returns:
number of chunks deleted
"""
raise NotImplementedError
class Updatable(abc.ABC):
"""
Class must implement the ability to update certain attributes of a document without needing to
update all of the fields. Specifically, needs to be able to update:
- Access Control List
- Document-set membership
- Boost value (learning from feedback mechanism)
- Whether the document is hidden or not, hidden documents are not returned from search
- Which Projects the document is a part of
"""
@abc.abstractmethod
def update(self, update_requests: list[MetadataUpdateRequest]) -> None:
"""
Updates some set of chunks. The document and fields to update are specified in the update
requests. Each update request in the list applies its changes to a list of document ids.
None values mean that the field does not need an update.
Parameters:
- update_requests: for a list of document ids in the update request, apply the same updates
to all of the documents with those ids. This is for bulk handling efficiency. Many
updates are done at the connector level which have many documents for the connector
"""
raise NotImplementedError
class IdRetrievalCapable(abc.ABC):
"""
Class must implement the ability to retrieve either:
- All of the chunks of a document IN ORDER given a document id. Caller assumes it to be in order.
- A specific section (continuous set of chunks) for some document.
"""
@abc.abstractmethod
def id_based_retrieval(
self,
chunk_requests: list[DocumentSectionRequest],
) -> list[InferenceChunk]:
"""
Fetch chunk(s) based on document id
NOTE: This is used to reconstruct a full document or an extended (multi-chunk) section
of a document. Downstream currently assumes that the chunking does not introduce overlaps
between the chunks. If there are overlaps for the chunks, then the reconstructed document
or extended section will have duplicate segments.
NOTE: This should be used after a search call to get more context around returned chunks.
There is no filters here since the calling code should not be calling this on arbitrary
documents.
Parameters:
- chunk_requests: requests containing the document id and the chunk range to retrieve
Returns:
list of sections from the documents specified
"""
raise NotImplementedError
class HybridCapable(abc.ABC):
"""
Class must implement hybrid (keyword + vector) search functionality
"""
@abc.abstractmethod
def hybrid_retrieval(
self,
query: str,
query_embedding: Embedding,
final_keywords: list[str] | None,
query_type: QueryType,
filters: IndexFilters,
num_to_retrieve: int,
offset: int = 0,
) -> list[InferenceChunk]:
"""
Run hybrid search and return a list of inference chunks.
Parameters:
- query: unmodified user query. This may be needed for getting the matching highlighted
keywords or for logging purposes
- query_embedding: vector representation of the query, must be of the correct
dimensionality for the primary index
- final_keywords: Final keywords to be used from the query, defaults to query if not set
- query_type: Semantic or keyword type query, may use different scoring logic for each
- filters: Filters for things like permissions, source type, time, etc.
- num_to_retrieve: number of highest matching chunks to return
- offset: number of highest matching chunks to skip (kind of like pagination)
Returns:
Score ranked (highest first) list of highest matching chunks
"""
raise NotImplementedError
class RandomCapable(abc.ABC):
"""Class must implement random document retrieval capability.
This currently is just used for porting the documents to a secondary index."""
@abc.abstractmethod
def random_retrieval(
self,
filters: IndexFilters | None = None,
num_to_retrieve: int = 100,
dirty: bool | None = None,
) -> list[InferenceChunk]:
"""Retrieve random chunks matching the filters"""
raise NotImplementedError
class DocumentIndex(
SchemaVerifiable,
Indexable,
Updatable,
Deletable,
HybridCapable,
IdRetrievalCapable,
RandomCapable,
abc.ABC,
):
"""
A valid document index that can plug into all Onyx flows must implement all of these
functionalities.
As a high level summary, document indices need to be able to
- Verify the schema definition is valid
- Index new documents
- Update specific attributes of existing documents
- Delete documents
- Run hybrid search
- Retrieve document or sections of documents based on document id
- Retrieve sets of random documents
"""

View File

@@ -25,17 +25,17 @@ class SlackEntities(BaseModel):
# Direct message filtering
include_dm: bool = Field(
default=False,
default=True,
description="Include user direct messages in search results",
)
include_group_dm: bool = Field(
default=False,
default=True,
description="Include group direct messages (multi-person DMs) in search results",
)
# Private channel filtering
include_private_channels: bool = Field(
default=False,
default=True,
description="Include private channels in search results (user must have access)",
)

View File

@@ -1,15 +1,19 @@
import base64
from io import BytesIO
from langchain_core.messages import BaseMessage
from langchain_core.messages import HumanMessage
from langchain_core.messages import SystemMessage
from PIL import Image
from onyx.configs.app_configs import IMAGE_SUMMARIZATION_SYSTEM_PROMPT
from onyx.configs.app_configs import IMAGE_SUMMARIZATION_USER_PROMPT
from onyx.llm.interfaces import LLM
from onyx.llm.utils import message_to_string
from onyx.llm.models import ChatCompletionMessage
from onyx.llm.models import ContentPart
from onyx.llm.models import ImageContentPart
from onyx.llm.models import ImageUrlDetail
from onyx.llm.models import SystemMessage
from onyx.llm.models import TextContentPart
from onyx.llm.models import UserMessage
from onyx.llm.utils import llm_response_to_string
from onyx.utils.b64 import get_image_type_from_bytes
from onyx.utils.logger import setup_logger
@@ -97,22 +101,24 @@ def _summarize_image(
) -> str:
"""Use default LLM (if it is multimodal) to generate a summary of an image."""
messages: list[BaseMessage] = []
messages: list[ChatCompletionMessage] = []
if system_prompt:
messages.append(SystemMessage(content=system_prompt))
content: list[ContentPart] = []
if query:
content.append(TextContentPart(text=query))
content.append(ImageContentPart(image_url=ImageUrlDetail(url=encoded_image)))
messages.append(
HumanMessage(
content=[
{"type": "text", "text": query},
{"type": "image_url", "image_url": {"url": encoded_image}},
],
UserMessage(
content=content,
),
)
try:
return message_to_string(llm.invoke_langchain(messages))
return llm_response_to_string(llm.invoke(messages))
except Exception as e:
error_msg = f"Summarization failed. Messages: {messages}"

View File

@@ -298,17 +298,17 @@ def verify_user_files(
for file_descriptor in user_files:
# Check if this file descriptor has a user_file_id
if "user_file_id" in file_descriptor and file_descriptor["user_file_id"]:
if file_descriptor.get("user_file_id"):
try:
user_file_ids.append(UUID(file_descriptor["user_file_id"]))
except (ValueError, TypeError):
logger.warning(
f"Invalid user_file_id in file descriptor: {file_descriptor.get('user_file_id')}"
f"Invalid user_file_id in file descriptor: {file_descriptor['user_file_id']}"
)
continue
else:
# This is a project file - use the 'id' field which is the file_id
if "id" in file_descriptor and file_descriptor["id"]:
if file_descriptor.get("id"):
project_file_ids.append(file_descriptor["id"])
# Verify user files (existing logic)

View File

@@ -54,8 +54,8 @@ from onyx.llm.chat_llm import LLMRateLimitError
from onyx.llm.factory import get_default_llm_with_vision
from onyx.llm.factory import get_llm_for_contextual_rag
from onyx.llm.interfaces import LLM
from onyx.llm.utils import llm_response_to_string
from onyx.llm.utils import MAX_CONTEXT_TOKENS
from onyx.llm.utils import message_to_string
from onyx.natural_language_processing.search_nlp_models import (
InformationContentClassificationModel,
)
@@ -542,8 +542,8 @@ def add_document_summaries(
doc_tokens = tokenizer.encode(chunks_by_doc[0].source_document.get_text_content())
doc_content = tokenizer_trim_middle(doc_tokens, trunc_doc_tokens, tokenizer)
summary_prompt = DOCUMENT_SUMMARY_PROMPT.format(document=doc_content)
doc_summary = message_to_string(
llm.invoke_langchain(summary_prompt, max_tokens=MAX_CONTEXT_TOKENS)
doc_summary = llm_response_to_string(
llm.invoke(summary_prompt, max_tokens=MAX_CONTEXT_TOKENS)
)
for chunk in chunks_by_doc:
@@ -583,8 +583,8 @@ def add_chunk_summaries(
if not doc_info:
# This happens if the document is too long AND document summaries are turned off
# In this case we compute a doc summary using the LLM
doc_info = message_to_string(
llm.invoke_langchain(
doc_info = llm_response_to_string(
llm.invoke(
DOCUMENT_SUMMARY_PROMPT.format(document=doc_content),
max_tokens=MAX_CONTEXT_TOKENS,
)
@@ -595,8 +595,8 @@ def add_chunk_summaries(
def assign_context(chunk: DocAwareChunk) -> None:
context_prompt2 = CONTEXTUAL_RAG_PROMPT2.format(chunk=chunk.content)
try:
chunk.chunk_context = message_to_string(
llm.invoke_langchain(
chunk.chunk_context = llm_response_to_string(
llm.invoke(
context_prompt1 + context_prompt2,
max_tokens=MAX_CONTEXT_TOKENS,
)

View File

@@ -80,7 +80,11 @@ class PgRedisKVStore(KeyValueStore):
value = None
try:
self.redis_client.set(REDIS_KEY_PREFIX + key, json.dumps(value))
self.redis_client.set(
REDIS_KEY_PREFIX + key,
json.dumps(value),
ex=KV_REDIS_KEY_EXPIRATION,
)
except Exception as e:
logger.error(f"Failed to set value in Redis for key '{key}': {str(e)}")

View File

@@ -1,7 +1,5 @@
import json
from langchain_core.messages import HumanMessage
from onyx.configs.constants import DocumentSource
from onyx.configs.constants import OnyxCallTypes
from onyx.configs.kg_configs import KG_METADATA_TRACKING_THRESHOLD
@@ -31,7 +29,7 @@ from onyx.kg.utils.formatting_utils import make_relationship_id
from onyx.kg.utils.formatting_utils import make_relationship_type_id
from onyx.kg.vespa.vespa_interactions import get_document_vespa_contents
from onyx.llm.factory import get_default_llms
from onyx.llm.utils import message_to_string
from onyx.llm.utils import llm_response_to_string
from onyx.prompts.kg_prompts import CALL_CHUNK_PREPROCESSING_PROMPT
from onyx.prompts.kg_prompts import CALL_DOCUMENT_CLASSIFICATION_PROMPT
from onyx.prompts.kg_prompts import GENERAL_CHUNK_PREPROCESSING_PROMPT
@@ -418,14 +416,10 @@ def kg_classify_document(
# classify with LLM
primary_llm, _ = get_default_llms()
msg = [HumanMessage(content=prompt)]
try:
raw_classification_result = primary_llm.invoke_langchain(msg)
raw_classification_result = llm_response_to_string(primary_llm.invoke(prompt))
classification_result = (
message_to_string(raw_classification_result)
.replace("```json", "")
.replace("```", "")
.strip()
raw_classification_result.replace("```json", "").replace("```", "").strip()
)
# no json parsing here because of reasoning output
classification_class = classification_result.split("CATEGORY:")[1].strip()
@@ -486,12 +480,10 @@ def kg_deep_extract_chunks(
# extract with LLM
_, fast_llm = get_default_llms()
msg = [HumanMessage(content=prompt)]
try:
raw_extraction_result = fast_llm.invoke_langchain(msg)
raw_extraction_result = llm_response_to_string(fast_llm.invoke(prompt))
cleaned_response = (
message_to_string(raw_extraction_result)
.replace("{{", "{")
raw_extraction_result.replace("{{", "{")
.replace("}}", "}")
.replace("```json\n", "")
.replace("\n```", "")

View File

@@ -1,45 +1,23 @@
import json
import os
import traceback
from collections.abc import Iterator
from collections.abc import Sequence
from typing import Any
from typing import cast
from typing import TYPE_CHECKING
from typing import Union
from httpx import RemoteProtocolError
from langchain.schema.language_model import (
LanguageModelInput as LangChainLanguageModelInput,
)
from langchain_core.messages import AIMessage
from langchain_core.messages import AIMessageChunk
from langchain_core.messages import BaseMessage
from langchain_core.messages import BaseMessageChunk
from langchain_core.messages import ChatMessage
from langchain_core.messages import ChatMessageChunk
from langchain_core.messages import FunctionMessage
from langchain_core.messages import FunctionMessageChunk
from langchain_core.messages import HumanMessage
from langchain_core.messages import HumanMessageChunk
from langchain_core.messages import SystemMessage
from langchain_core.messages import SystemMessageChunk
from langchain_core.messages.tool import ToolCallChunk
from langchain_core.messages.tool import ToolMessage
from langchain_core.prompt_values import PromptValue
from onyx.configs.app_configs import LOG_ONYX_MODEL_INTERACTIONS
from onyx.configs.app_configs import MOCK_LLM_RESPONSE
from onyx.configs.app_configs import SEND_USER_METADATA_TO_LLM_PROVIDER
from onyx.configs.chat_configs import QA_TIMEOUT
from onyx.configs.model_configs import (
DISABLE_LITELLM_STREAMING,
)
from onyx.configs.model_configs import GEN_AI_TEMPERATURE
from onyx.configs.model_configs import LITELLM_EXTRA_BODY
from onyx.llm.interfaces import LanguageModelInput
from onyx.llm.interfaces import LLM
from onyx.llm.interfaces import LLMConfig
from onyx.llm.interfaces import STANDARD_TOOL_CHOICE_OPTIONS
from onyx.llm.interfaces import LLMUserIdentity
from onyx.llm.interfaces import ReasoningEffort
from onyx.llm.interfaces import ToolChoiceOptions
from onyx.llm.llm_provider_options import AZURE_PROVIDER_NAME
from onyx.llm.llm_provider_options import OLLAMA_PROVIDER_NAME
@@ -47,6 +25,8 @@ from onyx.llm.llm_provider_options import VERTEX_CREDENTIALS_FILE_KWARG
from onyx.llm.llm_provider_options import VERTEX_LOCATION_KWARG
from onyx.llm.model_response import ModelResponse
from onyx.llm.model_response import ModelResponseStream
from onyx.llm.models import CLAUDE_REASONING_BUDGET_TOKENS
from onyx.llm.models import OPENAI_REASONING_EFFORT
from onyx.llm.utils import is_true_openai_model
from onyx.llm.utils import model_is_reasoning_model
from onyx.server.utils import mask_string
@@ -57,14 +37,13 @@ from onyx.utils.special_types import JSON_ro
logger = setup_logger()
if TYPE_CHECKING:
from litellm import CustomStreamWrapper, Message
from litellm import CustomStreamWrapper
_LLM_PROMPT_LONG_TERM_LOG_CATEGORY = "llm_prompt"
LEGACY_MAX_TOKENS_KWARG = "max_tokens"
STANDARD_MAX_TOKENS_KWARG = "max_completion_tokens"
LegacyPromptDict = Sequence[str | list[str] | dict[str, Any] | tuple[str, str]]
MAX_LITELLM_USER_ID_LENGTH = 64
class LLMTimeoutError(Exception):
@@ -79,231 +58,30 @@ class LLMRateLimitError(Exception):
"""
def _convert_tools_to_responses_api_format(tools: list[dict]) -> list[dict]:
"""Convert tools from Chat Completions API format to Responses API format.
def _prompt_to_dicts(prompt: LanguageModelInput) -> list[dict[str, Any]]:
"""Convert Pydantic message models to dictionaries for LiteLLM.
Chat Completions API format:
{"type": "function", "function": {"name": "...", "description": "...", "parameters": {...}}}
Responses API format:
{"type": "function", "name": "...", "description": "...", "parameters": {...}}
LiteLLM expects messages to be dictionaries (with .get() method),
not Pydantic models. This function serializes the messages.
"""
converted_tools = []
for tool in tools:
if tool.get("type") == "function" and "function" in tool:
func = tool["function"]
name = func.get("name")
if not name:
logger.warning("Skipping tool with missing name in function definition")
continue
converted_tool = {
"type": "function",
"name": name,
"description": func.get("description", ""),
"parameters": func.get("parameters", {}),
}
if "strict" in func:
converted_tool["strict"] = func["strict"]
converted_tools.append(converted_tool)
else:
# If already in correct format or unknown format, pass through
converted_tools.append(tool)
return converted_tools
def _base_msg_to_role(msg: BaseMessage) -> str:
if isinstance(msg, HumanMessage) or isinstance(msg, HumanMessageChunk):
return "user"
if isinstance(msg, AIMessage) or isinstance(msg, AIMessageChunk):
return "assistant"
if isinstance(msg, SystemMessage) or isinstance(msg, SystemMessageChunk):
return "system"
if isinstance(msg, FunctionMessage) or isinstance(msg, FunctionMessageChunk):
return "function"
return "unknown"
def _convert_litellm_message_to_langchain_message(
litellm_message: "Message",
) -> BaseMessage:
from onyx.llm.litellm_singleton import litellm
# Extracting the basic attributes from the litellm message
content = litellm_message.content or ""
role = litellm_message.role
# Handling function calls and tool calls if present
tool_calls = (
cast(
list[litellm.ChatCompletionMessageToolCall],
litellm_message.tool_calls,
)
if hasattr(litellm_message, "tool_calls")
else []
)
# Create the appropriate langchain message based on the role
if role == "user":
return HumanMessage(content=content)
elif role == "assistant":
return AIMessage(
content=content,
tool_calls=(
[
{
"name": tool_call.function.name or "",
"args": json.loads(tool_call.function.arguments),
"id": tool_call.id,
}
for tool_call in tool_calls
]
if tool_calls
else []
),
)
elif role == "system":
return SystemMessage(content=content)
else:
raise ValueError(f"Unknown role type received: {role}")
def _convert_message_to_dict(message: BaseMessage) -> dict:
"""Adapted from langchain_community.chat_models.litellm._convert_message_to_dict"""
if isinstance(message, ChatMessage):
message_dict = {"role": message.role, "content": message.content}
elif isinstance(message, HumanMessage):
message_dict = {"role": "user", "content": message.content}
elif isinstance(message, AIMessage):
message_dict = {"role": "assistant", "content": message.content}
if message.tool_calls:
message_dict["tool_calls"] = [
{
"id": tool_call.get("id"),
"function": {
"name": tool_call["name"],
"arguments": json.dumps(tool_call["args"]),
},
"type": "function",
"index": tool_call.get("index", 0),
}
for tool_call in message.tool_calls
]
if "function_call" in message.additional_kwargs:
message_dict["function_call"] = message.additional_kwargs["function_call"]
elif isinstance(message, SystemMessage):
message_dict = {"role": "system", "content": message.content}
elif isinstance(message, FunctionMessage):
message_dict = {
"role": "function",
"content": message.content,
"name": message.name,
}
elif isinstance(message, ToolMessage):
message_dict = {
"tool_call_id": message.tool_call_id,
"role": "tool",
"name": message.name or "",
"content": message.content,
}
else:
raise ValueError(f"Got unknown type {message}")
if "name" in message.additional_kwargs:
message_dict["name"] = message.additional_kwargs["name"]
return message_dict
def _convert_delta_to_message_chunk(
_dict: dict[str, Any],
curr_msg: BaseMessage | None,
stop_reason: str | None = None,
) -> BaseMessageChunk:
from litellm.utils import ChatCompletionDeltaToolCall
"""Adapted from langchain_community.chat_models.litellm._convert_delta_to_message_chunk"""
role = _dict.get("role") or (_base_msg_to_role(curr_msg) if curr_msg else "unknown")
content = _dict.get("content") or ""
additional_kwargs = {}
if _dict.get("function_call"):
additional_kwargs.update({"function_call": dict(_dict["function_call"])})
tool_calls = cast(list[ChatCompletionDeltaToolCall] | None, _dict.get("tool_calls"))
if role == "user":
return HumanMessageChunk(content=content)
# NOTE: if tool calls are present, then it's an assistant.
# In Ollama, the role will be None for tool-calls
elif role == "assistant" or tool_calls:
if tool_calls:
tool_call = tool_calls[0]
tool_name = tool_call.function.name or (curr_msg and curr_msg.name) or ""
idx = tool_call.index
tool_call_chunk = ToolCallChunk(
name=tool_name,
id=tool_call.id,
args=tool_call.function.arguments,
index=idx,
)
return AIMessageChunk(
content=content,
tool_call_chunks=[tool_call_chunk],
additional_kwargs={
"usage_metadata": {"stop": stop_reason},
**additional_kwargs,
},
)
return AIMessageChunk(
content=content,
additional_kwargs={
"usage_metadata": {"stop": stop_reason},
**additional_kwargs,
},
)
elif role == "system":
return SystemMessageChunk(content=content)
elif role == "function":
return FunctionMessageChunk(content=content, name=_dict["name"])
elif role:
return ChatMessageChunk(content=content, role=role)
raise ValueError(f"Unknown role: {role}")
def _prompt_to_dict(
prompt: LanguageModelInput | LangChainLanguageModelInput,
) -> LegacyPromptDict:
# NOTE: this must go first, since it is also a Sequence
if isinstance(prompt, str):
return [_convert_message_to_dict(HumanMessage(content=prompt))]
if isinstance(prompt, (list, Sequence)):
normalized_prompt: list[str | list[str] | dict[str, Any] | tuple[str, str]] = []
for msg in prompt:
if isinstance(msg, BaseMessage):
normalized_prompt.append(_convert_message_to_dict(msg))
elif isinstance(msg, dict):
normalized_prompt.append(dict(msg))
else:
normalized_prompt.append(msg)
return normalized_prompt
if isinstance(prompt, BaseMessage):
return [_convert_message_to_dict(prompt)]
if isinstance(prompt, PromptValue):
return [_convert_message_to_dict(message) for message in prompt.to_messages()]
raise TypeError(f"Unsupported prompt type: {type(prompt)}")
return [{"role": "user", "content": prompt}]
return [msg.model_dump(exclude_none=True) for msg in prompt]
def _prompt_as_json(
prompt: LanguageModelInput | LangChainLanguageModelInput,
*,
is_legacy_langchain: bool,
) -> JSON_ro:
prompt_payload = _prompt_to_dict(prompt) if is_legacy_langchain else prompt
return cast(JSON_ro, prompt_payload)
def _prompt_as_json(prompt: LanguageModelInput) -> JSON_ro:
return cast(JSON_ro, _prompt_to_dicts(prompt))
def _truncate_litellm_user_id(user_id: str) -> str:
if len(user_id) <= MAX_LITELLM_USER_ID_LENGTH:
return user_id
logger.warning(
"LLM user id exceeds %d chars (len=%d); truncating for provider compatibility.",
MAX_LITELLM_USER_ID_LENGTH,
len(user_id),
)
return user_id[:MAX_LITELLM_USER_ID_LENGTH]
class LitellmLLM(LLM):
@@ -403,18 +181,12 @@ class LitellmLLM(LLM):
dump["credentials_file"] = mask_string(credentials_file)
return dump
def log_model_configs(self) -> None:
logger.debug(f"Config: {self._safe_model_config()}")
def _record_call(
self,
prompt: LanguageModelInput | LangChainLanguageModelInput,
is_legacy_langchain: bool = False,
prompt: LanguageModelInput,
) -> None:
if self._long_term_logger:
prompt_json = _prompt_as_json(
prompt, is_legacy_langchain=is_legacy_langchain
)
prompt_json = _prompt_as_json(prompt)
self._long_term_logger.record(
{
"prompt": prompt_json,
@@ -425,14 +197,11 @@ class LitellmLLM(LLM):
def _record_result(
self,
prompt: LanguageModelInput | LangChainLanguageModelInput,
prompt: LanguageModelInput,
model_output: BaseMessage,
is_legacy_langchain: bool,
) -> None:
if self._long_term_logger:
prompt_json = _prompt_as_json(
prompt, is_legacy_langchain=is_legacy_langchain
)
prompt_json = _prompt_as_json(prompt)
tool_calls = (
model_output.tool_calls if hasattr(model_output, "tool_calls") else []
)
@@ -448,14 +217,11 @@ class LitellmLLM(LLM):
def _record_error(
self,
prompt: LanguageModelInput | LangChainLanguageModelInput,
prompt: LanguageModelInput,
error: Exception,
is_legacy_langchain: bool,
) -> None:
if self._long_term_logger:
prompt_json = _prompt_as_json(
prompt, is_legacy_langchain=is_legacy_langchain
)
prompt_json = _prompt_as_json(prompt)
self._long_term_logger.record(
{
"prompt": prompt_json,
@@ -472,62 +238,56 @@ class LitellmLLM(LLM):
def _completion(
self,
prompt: LanguageModelInput | LangChainLanguageModelInput,
prompt: LanguageModelInput,
tools: list[dict] | None,
tool_choice: ToolChoiceOptions | None,
stream: bool,
parallel_tool_calls: bool,
reasoning_effort: str | None = None,
reasoning_effort: ReasoningEffort | None = None,
structured_response_format: dict | None = None,
timeout_override: int | None = None,
max_tokens: int | None = None,
is_legacy_langchain: bool = False,
user_identity: LLMUserIdentity | None = None,
) -> Union["ModelResponse", "CustomStreamWrapper"]:
# litellm doesn't accept LangChain BaseMessage objects, so we need to convert them
# to a dict representation
processed_prompt: LegacyPromptDict | LanguageModelInput
if is_legacy_langchain:
processed_prompt = _prompt_to_dict(prompt)
else:
processed_prompt = cast(LanguageModelInput, prompt)
# Record the original prompt (not the processed one) for logging
original_prompt = prompt
self._record_call(original_prompt, is_legacy_langchain)
self._record_call(prompt)
from onyx.llm.litellm_singleton import litellm
from litellm.exceptions import Timeout, RateLimitError
tool_choice_formatted: dict[str, Any] | str | None
if not tools:
tool_choice_formatted = None
elif tool_choice and tool_choice not in STANDARD_TOOL_CHOICE_OPTIONS:
tool_choice_formatted = {
"type": "function",
"function": {"name": tool_choice},
}
else:
tool_choice_formatted = tool_choice
is_reasoning = model_is_reasoning_model(
self.config.model_name, self.config.model_provider
)
# Needed to get reasoning tokens from the model
use_responses_api = not is_legacy_langchain and (
if (
is_true_openai_model(self.config.model_provider, self.config.model_name)
or self.config.model_provider == AZURE_PROVIDER_NAME
)
if use_responses_api:
):
model_provider = f"{self.config.model_provider}/responses"
else:
model_provider = self.config.model_provider
# Convert tools to Responses API format if using that API
processed_tools = (
_convert_tools_to_responses_api_format(tools)
if use_responses_api and tools
else tools
)
completion_kwargs: dict[str, Any] = self._model_kwargs
if SEND_USER_METADATA_TO_LLM_PROVIDER and user_identity:
completion_kwargs = dict(self._model_kwargs)
if user_identity.user_id:
completion_kwargs["user"] = _truncate_litellm_user_id(
user_identity.user_id
)
if user_identity.session_id:
existing_metadata = completion_kwargs.get("metadata")
metadata: dict[str, Any] | None
if existing_metadata is None:
metadata = {}
elif isinstance(existing_metadata, dict):
metadata = dict(existing_metadata)
else:
metadata = None
if metadata is not None:
metadata["session_id"] = user_identity.session_id
completion_kwargs["metadata"] = metadata
try:
return litellm.completion(
@@ -542,20 +302,15 @@ class LitellmLLM(LLM):
api_version=self._api_version or None,
custom_llm_provider=self._custom_llm_provider or None,
# actual input
messages=processed_prompt,
tools=processed_tools,
tool_choice=tool_choice_formatted,
messages=_prompt_to_dicts(prompt),
tools=tools,
tool_choice=tool_choice if tools else None,
# streaming choice
stream=stream,
# model params
temperature=(1 if is_reasoning else self._temperature),
timeout=timeout_override or self._timeout,
# stream_options is not supported by the Responses API
**(
{"stream_options": {"include_usage": True}}
if stream and not use_responses_api
else {}
),
**({"stream_options": {"include_usage": True}} if stream else {}),
# For now, we don't support parallel tool calls
# NOTE: we can't pass this in if tools are not specified
# or else OpenAI throws an error
@@ -577,8 +332,16 @@ class LitellmLLM(LLM):
# Anthropic Claude uses `thinking` with budget_tokens for extended thinking
# This applies to Claude models on any provider (anthropic, vertex_ai, bedrock)
**(
{"thinking": {"type": "enabled", "budget_tokens": 10000}}
{
"thinking": {
"type": "enabled",
"budget_tokens": CLAUDE_REASONING_BUDGET_TOKENS[
reasoning_effort
],
}
}
if reasoning_effort
and reasoning_effort != ReasoningEffort.OFF
and is_reasoning
and "claude" in self.config.model_name.lower()
else {}
@@ -586,8 +349,9 @@ class LitellmLLM(LLM):
# OpenAI and other providers use reasoning_effort
# (litellm maps this to thinking_level for Gemini 3 models)
**(
{"reasoning_effort": reasoning_effort}
{"reasoning_effort": OPENAI_REASONING_EFFORT[reasoning_effort]}
if reasoning_effort
and reasoning_effort != ReasoningEffort.OFF
and is_reasoning
and "claude" not in self.config.model_name.lower()
else {}
@@ -598,11 +362,11 @@ class LitellmLLM(LLM):
else {}
),
**({self._max_token_param: max_tokens} if max_tokens else {}),
**self._model_kwargs,
**completion_kwargs,
)
except Exception as e:
self._record_error(original_prompt, e, is_legacy_langchain)
self._record_error(prompt, e)
# for break pointing
if isinstance(e, Timeout):
raise LLMTimeoutError(e)
@@ -632,134 +396,7 @@ class LitellmLLM(LLM):
max_input_tokens=self._max_input_tokens,
)
def _invoke_implementation_langchain(
self,
prompt: LangChainLanguageModelInput,
tools: list[dict] | None = None,
tool_choice: ToolChoiceOptions | None = None,
structured_response_format: dict | None = None,
timeout_override: int | None = None,
max_tokens: int | None = None,
) -> BaseMessage:
from litellm import ModelResponse
if LOG_ONYX_MODEL_INTERACTIONS:
self.log_model_configs()
response = cast(
ModelResponse,
self._completion(
is_legacy_langchain=True,
prompt=prompt,
tools=tools,
tool_choice=tool_choice,
stream=False,
structured_response_format=structured_response_format,
timeout_override=timeout_override,
max_tokens=max_tokens,
parallel_tool_calls=False,
),
)
choice = response.choices[0]
if hasattr(choice, "message"):
output = _convert_litellm_message_to_langchain_message(choice.message)
if output:
self._record_result(prompt, output, is_legacy_langchain=True)
return output
else:
raise ValueError("Unexpected response choice type")
def _stream_implementation_langchain(
self,
prompt: LangChainLanguageModelInput,
tools: list[dict] | None = None,
tool_choice: ToolChoiceOptions | None = None,
structured_response_format: dict | None = None,
timeout_override: int | None = None,
max_tokens: int | None = None,
) -> Iterator[BaseMessage]:
from litellm import CustomStreamWrapper
if LOG_ONYX_MODEL_INTERACTIONS:
self.log_model_configs()
if DISABLE_LITELLM_STREAMING:
yield self.invoke_langchain(
prompt,
tools,
tool_choice,
structured_response_format,
timeout_override,
max_tokens,
)
return
output = None
response = cast(
CustomStreamWrapper,
self._completion(
is_legacy_langchain=True,
prompt=prompt,
tools=tools,
tool_choice=tool_choice,
stream=True,
structured_response_format=structured_response_format,
timeout_override=timeout_override,
max_tokens=max_tokens,
parallel_tool_calls=False,
reasoning_effort="minimal",
),
)
try:
for part in response:
if not part["choices"]:
continue
choice = part["choices"][0]
message_chunk = _convert_delta_to_message_chunk(
choice["delta"],
output,
stop_reason=choice["finish_reason"],
)
if output is None:
output = message_chunk
else:
output += message_chunk
yield message_chunk
except RemoteProtocolError:
raise RuntimeError(
"The AI model failed partway through generation, please try again."
)
if output:
self._record_result(prompt, output, is_legacy_langchain=True)
if LOG_ONYX_MODEL_INTERACTIONS and output:
content = output.content or ""
if isinstance(output, AIMessage):
if content:
log_msg = content
elif output.tool_calls:
log_msg = "Tool Calls: " + str(
[
{
key: value
for key, value in tool_call.items()
if key != "index"
}
for tool_call in output.tool_calls
]
)
else:
log_msg = ""
logger.debug(f"Raw Model Output:\n{log_msg}")
else:
logger.debug(f"Raw Model Output:\n{content}")
def _invoke_implementation(
def invoke(
self,
prompt: LanguageModelInput,
tools: list[dict] | None = None,
@@ -767,15 +404,13 @@ class LitellmLLM(LLM):
structured_response_format: dict | None = None,
timeout_override: int | None = None,
max_tokens: int | None = None,
reasoning_effort: str | None = "medium",
reasoning_effort: ReasoningEffort | None = None,
user_identity: LLMUserIdentity | None = None,
) -> ModelResponse:
from litellm import ModelResponse as LiteLLMModelResponse
from onyx.llm.model_response import from_litellm_model_response
if LOG_ONYX_MODEL_INTERACTIONS:
self.log_model_configs()
response = cast(
LiteLLMModelResponse,
self._completion(
@@ -788,12 +423,13 @@ class LitellmLLM(LLM):
max_tokens=max_tokens,
parallel_tool_calls=True,
reasoning_effort=reasoning_effort,
user_identity=user_identity,
),
)
return from_litellm_model_response(response)
def _stream_implementation(
def stream(
self,
prompt: LanguageModelInput,
tools: list[dict] | None = None,
@@ -801,14 +437,12 @@ class LitellmLLM(LLM):
structured_response_format: dict | None = None,
timeout_override: int | None = None,
max_tokens: int | None = None,
reasoning_effort: str | None = "medium",
reasoning_effort: ReasoningEffort | None = None,
user_identity: LLMUserIdentity | None = None,
) -> Iterator[ModelResponseStream]:
from litellm import CustomStreamWrapper as LiteLLMCustomStreamWrapper
from onyx.llm.model_response import from_litellm_model_response_stream
if LOG_ONYX_MODEL_INTERACTIONS:
self.log_model_configs()
response = cast(
LiteLLMCustomStreamWrapper,
self._completion(
@@ -821,6 +455,7 @@ class LitellmLLM(LLM):
max_tokens=max_tokens,
parallel_tool_calls=True,
reasoning_effort=reasoning_effort,
user_identity=user_identity,
),
)

View File

@@ -21,16 +21,21 @@ PROVIDER_DISPLAY_NAMES: dict[str, str] = {
"deepseek": "DeepSeek",
"xai": "xAI",
"mistral": "Mistral",
"mistralai": "Mistral", # Alias used by some providers
"cohere": "Cohere",
"perplexity": "Perplexity",
"amazon": "Amazon",
"meta": "Meta",
"meta-llama": "Meta", # Alias used by some providers
"ai21": "AI21",
"nvidia": "NVIDIA",
"databricks": "Databricks",
"alibaba": "Alibaba",
"qwen": "Qwen",
"microsoft": "Microsoft",
"gemini": "Gemini",
"stability": "Stability",
"writer": "Writer",
}
# Map vendors to their brand names (used for provider_display_name generation)
@@ -45,6 +50,11 @@ VENDOR_BRAND_NAMES: dict[str, str] = {
"deepseek": "DeepSeek",
"xai": "Grok",
"perplexity": "Sonar",
"ai21": "Jamba",
"nvidia": "Nemotron",
"qwen": "Qwen",
"alibaba": "Qwen",
"writer": "Palmyra",
}
# Aggregator providers that host models from multiple vendors
@@ -52,6 +62,155 @@ AGGREGATOR_PROVIDERS: set[str] = {
"bedrock",
"bedrock_converse",
"openrouter",
"ollama_chat",
"vertex_ai",
"azure",
}
# Model family name mappings for display name generation
# Used by Bedrock display name generator
BEDROCK_MODEL_NAME_MAPPINGS: dict[str, str] = {
"claude": "Claude",
"llama": "Llama",
"mistral": "Mistral",
"mixtral": "Mixtral",
"titan": "Titan",
"nova": "Nova",
"jamba": "Jamba",
"command": "Command",
"deepseek": "DeepSeek",
}
# Used by Ollama display name generator
OLLAMA_MODEL_NAME_MAPPINGS: dict[str, str] = {
"llama": "Llama",
"qwen": "Qwen",
"mistral": "Mistral",
"deepseek": "DeepSeek",
"gemma": "Gemma",
"phi": "Phi",
"codellama": "Code Llama",
"starcoder": "StarCoder",
"wizardcoder": "WizardCoder",
"vicuna": "Vicuna",
"orca": "Orca",
"dolphin": "Dolphin",
"nous": "Nous",
"neural": "Neural",
"mixtral": "Mixtral",
"falcon": "Falcon",
"yi": "Yi",
"command": "Command",
"zephyr": "Zephyr",
"openchat": "OpenChat",
"solar": "Solar",
}
# Bedrock model token limits (AWS doesn't expose this via API)
# Note: Many Bedrock model IDs include context length suffix (e.g., ":200k")
# which is parsed first. This mapping is for models without suffixes.
# Sources:
# - LiteLLM model_prices_and_context_window.json
# - AWS Bedrock documentation and announcement blogs
BEDROCK_MODEL_TOKEN_LIMITS: dict[str, int] = {
# Anthropic Claude models (new naming: claude-{tier}-{version})
"claude-opus-4": 200000,
"claude-sonnet-4": 200000,
"claude-haiku-4": 200000,
# Anthropic Claude models (old naming: claude-{version})
"claude-4": 200000,
"claude-3-7": 200000,
"claude-3-5": 200000,
"claude-3": 200000,
"claude-v2": 100000,
"claude-instant": 100000,
# Amazon Nova models (from LiteLLM)
"nova-premier": 1000000,
"nova-pro": 300000,
"nova-lite": 300000,
"nova-2-lite": 1000000, # Nova 2 Lite has 1M context
"nova-2-sonic": 128000,
"nova-micro": 128000,
# Amazon Titan models (from LiteLLM: all text models are 42K)
"titan-text-premier": 42000,
"titan-text-express": 42000,
"titan-text-lite": 42000,
"titan-tg1": 8000,
# Meta Llama models (Llama 3 base = 8K, Llama 3.1+ = 128K)
"llama4": 128000,
"llama3-3": 128000,
"llama3-2": 128000,
"llama3-1": 128000,
"llama3-8b": 8000,
"llama3-70b": 8000,
# Mistral models (Large 2+ = 128K, original Large/Small = 32K)
"mistral-large-3": 128000,
"mistral-large-2407": 128000, # Mistral Large 2
"mistral-large-2402": 32000, # Original Mistral Large
"mistral-large": 128000, # Default to newer version
"mistral-small": 32000,
"mistral-7b": 32000,
"mixtral-8x7b": 32000,
"pixtral": 128000,
"ministral": 128000,
"magistral": 128000,
"voxtral": 32000,
# Cohere models
"command-r-plus": 128000,
"command-r": 128000,
# DeepSeek models
"deepseek": 64000,
# Google Gemma models
"gemma-3": 128000,
"gemma-2": 8000,
"gemma": 8000,
# Qwen models
"qwen3": 128000,
"qwen2": 128000,
# NVIDIA models
"nemotron": 128000,
# Writer Palmyra models
"palmyra": 128000,
# Moonshot Kimi
"kimi": 128000,
# Minimax
"minimax": 128000,
# OpenAI (via Bedrock)
"gpt-oss": 128000,
# AI21 models (from LiteLLM: Jamba 1.5 = 256K, Jamba Instruct = 70K)
"jamba-1-5": 256000,
"jamba-instruct": 70000,
"jamba": 256000, # Default to newer version
}
# Ollama model prefix to vendor mapping (for grouping models by vendor)
OLLAMA_MODEL_TO_VENDOR: dict[str, str] = {
"llama": "Meta",
"codellama": "Meta",
"qwen": "Alibaba",
"qwq": "Alibaba",
"mistral": "Mistral",
"ministral": "Mistral",
"mixtral": "Mistral",
"deepseek": "DeepSeek",
"gemma": "Google",
"phi": "Microsoft",
"command": "Cohere",
"aya": "Cohere",
"falcon": "TII",
"yi": "01.AI",
"starcoder": "BigCode",
"wizardcoder": "WizardLM",
"vicuna": "LMSYS",
"openchat": "OpenChat",
"solar": "Upstage",
"orca": "Microsoft",
"dolphin": "Cognitive Computations",
"nous": "Nous Research",
"neural": "Intel",
"zephyr": "HuggingFace",
"granite": "IBM",
"nemotron": "NVIDIA",
"smollm": "HuggingFace",
}

View File

@@ -1,4 +0,0 @@
class GenAIDisabledException(Exception):
def __init__(self, message: str = "Generative AI has been turned off") -> None:
self.message = message
super().__init__(self.message)

View File

@@ -3,7 +3,6 @@ from collections.abc import Callable
from sqlalchemy.orm import Session
from onyx.chat.models import PersonaOverrideConfig
from onyx.configs.app_configs import DISABLE_GENERATIVE_AI
from onyx.configs.model_configs import GEN_AI_TEMPERATURE
from onyx.db.engine.sql_engine import get_session_with_current_tenant
from onyx.db.llm import can_user_access_llm_provider
@@ -16,7 +15,6 @@ from onyx.db.llm import fetch_user_group_ids
from onyx.db.models import Persona
from onyx.db.models import User
from onyx.llm.chat_llm import LitellmLLM
from onyx.llm.exceptions import GenAIDisabledException
from onyx.llm.interfaces import LLM
from onyx.llm.interfaces import LLMConfig
from onyx.llm.llm_provider_options import OLLAMA_API_KEY_CONFIG_KEY
@@ -202,8 +200,6 @@ def get_default_llm_with_vision(
Returns None if no providers exist or if no provider supports images.
"""
if DISABLE_GENERATIVE_AI:
raise GenAIDisabledException()
def create_vision_llm(provider: LLMProviderView, model: str) -> LLM:
"""Helper to create an LLM if the provider supports image input."""
@@ -321,9 +317,6 @@ def get_default_llms(
additional_headers: dict[str, str] | None = None,
long_term_logger: LongTermLogger | None = None,
) -> tuple[LLM, LLM]:
if DISABLE_GENERATIVE_AI:
raise GenAIDisabledException()
with get_session_with_current_tenant() as db_session:
llm_provider = fetch_default_provider(db_session)

View File

@@ -1,30 +1,22 @@
import abc
from collections.abc import Iterator
from collections.abc import Sequence
from typing import Literal
from typing import Union
from braintrust import traced
from langchain.schema.language_model import (
LanguageModelInput as LangChainLanguageModelInput,
)
from langchain_core.messages import AIMessageChunk
from langchain_core.messages import BaseMessage
from pydantic import BaseModel
from onyx.configs.app_configs import DISABLE_GENERATIVE_AI
from onyx.configs.app_configs import LOG_INDIVIDUAL_MODEL_TOKENS
from onyx.configs.app_configs import LOG_ONYX_MODEL_INTERACTIONS
from onyx.llm.message_types import ChatCompletionMessage
from onyx.llm.model_response import ModelResponse
from onyx.llm.model_response import ModelResponseStream
from onyx.llm.models import LanguageModelInput
from onyx.llm.models import ReasoningEffort
from onyx.llm.models import ToolChoiceOptions
from onyx.utils.logger import setup_logger
logger = setup_logger()
STANDARD_TOOL_CHOICE_OPTIONS = ("required", "auto", "none")
ToolChoiceOptions = Union[Literal["required", "auto", "none"], str]
LanguageModelInput = Union[Sequence[ChatCompletionMessage], str]
class LLMUserIdentity(BaseModel):
user_id: str | None = None
session_id: str | None = None
class LLMConfig(BaseModel):
@@ -41,60 +33,12 @@ class LLMConfig(BaseModel):
model_config = {"protected_namespaces": ()}
def log_prompt(prompt: LangChainLanguageModelInput) -> None:
if isinstance(prompt, list):
for ind, msg in enumerate(prompt):
if isinstance(msg, AIMessageChunk):
if msg.content:
log_msg = msg.content
elif msg.tool_call_chunks:
log_msg = "Tool Calls: " + str(
[
{
key: value
for key, value in tool_call.items()
if key != "index"
}
for tool_call in msg.tool_call_chunks
]
)
else:
log_msg = ""
logger.debug(f"Message {ind}:\n{log_msg}")
else:
logger.debug(f"Message {ind}:\n{msg.content}")
if isinstance(prompt, str):
logger.debug(f"Prompt:\n{prompt}")
class LLM(abc.ABC):
"""Mimics the LangChain LLM / BaseChatModel interfaces to make it easy
to use these implementations to connect to a variety of LLM providers."""
@property
def requires_warm_up(self) -> bool:
"""Is this model running in memory and needs an initial call to warm it up?"""
return False
@property
def requires_api_key(self) -> bool:
return True
@property
@abc.abstractmethod
def config(self) -> LLMConfig:
raise NotImplementedError
@abc.abstractmethod
def log_model_configs(self) -> None:
raise NotImplementedError
def _precall(self, prompt: LangChainLanguageModelInput) -> None:
if DISABLE_GENERATIVE_AI:
raise Exception("Generative AI is disabled")
if LOG_ONYX_MODEL_INTERACTIONS:
log_prompt(prompt)
@traced(name="invoke llm", type="llm")
def invoke(
self,
@@ -104,72 +48,9 @@ class LLM(abc.ABC):
structured_response_format: dict | None = None,
timeout_override: int | None = None,
max_tokens: int | None = None,
reasoning_effort: ReasoningEffort | None = None,
user_identity: LLMUserIdentity | None = None,
) -> "ModelResponse":
return self._invoke_implementation(
prompt,
tools,
tool_choice,
structured_response_format,
timeout_override,
max_tokens,
)
@traced(name="invoke llm", type="llm")
def invoke_langchain(
self,
prompt: LangChainLanguageModelInput,
tools: list[dict] | None = None,
tool_choice: ToolChoiceOptions | None = None,
structured_response_format: dict | None = None,
timeout_override: int | None = None,
max_tokens: int | None = None,
) -> BaseMessage:
self._precall(prompt)
# TODO add a postcall to log model outputs independent of concrete class
# implementation
return self._invoke_implementation_langchain(
prompt,
tools,
tool_choice,
structured_response_format,
timeout_override,
max_tokens,
)
@abc.abstractmethod
def _invoke_implementation(
self,
prompt: LanguageModelInput,
tools: list[dict] | None = None,
tool_choice: ToolChoiceOptions | None = None,
structured_response_format: dict | None = None,
timeout_override: int | None = None,
max_tokens: int | None = None,
) -> "ModelResponse":
raise NotImplementedError
@abc.abstractmethod
def _stream_implementation(
self,
prompt: LanguageModelInput,
tools: list[dict] | None = None,
tool_choice: ToolChoiceOptions | None = None,
structured_response_format: dict | None = None,
timeout_override: int | None = None,
max_tokens: int | None = None,
) -> Iterator[ModelResponseStream]:
raise NotImplementedError
@abc.abstractmethod
def _invoke_implementation_langchain(
self,
prompt: LangChainLanguageModelInput,
tools: list[dict] | None = None,
tool_choice: ToolChoiceOptions | None = None,
structured_response_format: dict | None = None,
timeout_override: int | None = None,
max_tokens: int | None = None,
) -> BaseMessage:
raise NotImplementedError
def stream(
@@ -180,54 +61,7 @@ class LLM(abc.ABC):
structured_response_format: dict | None = None,
timeout_override: int | None = None,
max_tokens: int | None = None,
reasoning_effort: ReasoningEffort | None = None,
user_identity: LLMUserIdentity | None = None,
) -> Iterator[ModelResponseStream]:
return self._stream_implementation(
prompt,
tools,
tool_choice,
structured_response_format,
timeout_override,
max_tokens,
)
def stream_langchain(
self,
prompt: LangChainLanguageModelInput,
tools: list[dict] | None = None,
tool_choice: ToolChoiceOptions | None = None,
structured_response_format: dict | None = None,
timeout_override: int | None = None,
max_tokens: int | None = None,
) -> Iterator[BaseMessage]:
self._precall(prompt)
# TODO add a postcall to log model outputs independent of concrete class
# implementation
messages = self._stream_implementation_langchain(
prompt,
tools,
tool_choice,
structured_response_format,
timeout_override,
max_tokens,
)
tokens = []
for message in messages:
if LOG_INDIVIDUAL_MODEL_TOKENS:
tokens.append(message.content)
yield message
if LOG_INDIVIDUAL_MODEL_TOKENS and tokens:
logger.debug(f"Model Tokens: {tokens}")
@abc.abstractmethod
def _stream_implementation_langchain(
self,
prompt: LangChainLanguageModelInput,
tools: list[dict] | None = None,
tool_choice: ToolChoiceOptions | None = None,
structured_response_format: dict | None = None,
timeout_override: int | None = None,
max_tokens: int | None = None,
) -> Iterator[BaseMessage]:
raise NotImplementedError

View File

@@ -606,6 +606,56 @@ def _patch_openai_responses_transform_response() -> None:
LiteLLMResponsesTransformationHandler.transform_response = _patched_transform_response # type: ignore[method-assign]
def _patch_openai_responses_tool_content_type() -> None:
"""
Patches LiteLLMResponsesTransformationHandler._convert_content_str_to_input_text
to use 'input_text' type for tool messages instead of 'output_text'.
The OpenAI Responses API only accepts 'input_text', 'input_image', and 'input_file'
in the function_call_output.output array. The default litellm implementation
incorrectly uses 'output_text' for tool messages, causing 400 Bad Request errors.
See: https://github.com/BerriAI/litellm/issues/17507
This should be removed once litellm releases a fix for this issue.
"""
original_method = (
LiteLLMResponsesTransformationHandler._convert_content_str_to_input_text
)
if (
getattr(
original_method,
"__name__",
"",
)
== "_patched_convert_content_str_to_input_text"
):
return
def _patched_convert_content_str_to_input_text(
self: Any, content: str, role: str
) -> Dict[str, Any]:
"""
Convert string content to the appropriate Responses API format.
For user, system, and tool messages, use 'input_text' type.
For assistant messages, use 'output_text' type.
Tool messages go into function_call_output.output, which only accepts
'input_text', 'input_image', and 'input_file' types.
"""
if role in ("user", "system", "tool"):
return {"type": "input_text", "text": content}
else:
return {"type": "output_text", "text": content}
_patched_convert_content_str_to_input_text.__name__ = (
"_patched_convert_content_str_to_input_text"
)
LiteLLMResponsesTransformationHandler._convert_content_str_to_input_text = _patched_convert_content_str_to_input_text # type: ignore[method-assign]
def apply_monkey_patches() -> None:
"""
Apply all necessary monkey patches to LiteLLM for compatibility.
@@ -615,11 +665,13 @@ def apply_monkey_patches() -> None:
- Patching OllamaChatCompletionResponseIterator.chunk_parser for streaming content
- Patching OpenAiResponsesToChatCompletionStreamIterator.chunk_parser for OpenAI Responses API
- Patching LiteLLMResponsesTransformationHandler.transform_response for non-streaming responses
- Patching LiteLLMResponsesTransformationHandler._convert_content_str_to_input_text for tool content types
"""
_patch_ollama_transform_request()
_patch_ollama_chunk_parser()
_patch_openai_responses_chunk_parser()
_patch_openai_responses_transform_response()
_patch_openai_responses_tool_content_type()
def _extract_reasoning_content(message: dict) -> Tuple[Optional[str], Optional[str]]:

View File

@@ -56,6 +56,15 @@ class WellKnownLLMProviderDescriptor(BaseModel):
OPENAI_PROVIDER_NAME = "openai"
# Curated list of OpenAI models to show by default in the UI
OPENAI_VISIBLE_MODEL_NAMES = {
"gpt-5",
"gpt-5-mini",
"o1",
"o3-mini",
"gpt-4o",
"gpt-4o-mini",
}
BEDROCK_PROVIDER_NAME = "bedrock"
BEDROCK_DEFAULT_MODEL = "anthropic.claude-3-5-sonnet-20241022-v2:0"
@@ -125,6 +134,12 @@ _IGNORABLE_ANTHROPIC_MODELS = {
"claude-instant-1",
"anthropic/claude-3-5-sonnet-20241022",
}
# Curated list of Anthropic models to show by default in the UI
ANTHROPIC_VISIBLE_MODEL_NAMES = {
"claude-opus-4-5",
"claude-sonnet-4-5",
"claude-haiku-4-5",
}
AZURE_PROVIDER_NAME = "azure"
@@ -134,53 +149,113 @@ VERTEX_CREDENTIALS_FILE_KWARG = "vertex_credentials"
VERTEX_LOCATION_KWARG = "vertex_location"
VERTEXAI_DEFAULT_MODEL = "gemini-2.5-flash"
VERTEXAI_DEFAULT_FAST_MODEL = "gemini-2.5-flash-lite"
# Curated list of Vertex AI models to show by default in the UI
VERTEXAI_VISIBLE_MODEL_NAMES = {
"gemini-2.5-flash",
"gemini-2.5-flash-lite",
"gemini-2.5-pro",
}
def is_obsolete_model(model_name: str, provider: str) -> bool:
"""Check if a model is obsolete and should be filtered out.
Filters models that are 2+ major versions behind or deprecated.
This is the single source of truth for obsolete model detection.
"""
model_lower = model_name.lower()
# OpenAI obsolete models
if provider == "openai":
# GPT-3 models are obsolete
if "gpt-3" in model_lower:
return True
# Legacy models
deprecated = {
"text-davinci-003",
"text-davinci-002",
"text-curie-001",
"text-babbage-001",
"text-ada-001",
"davinci",
"curie",
"babbage",
"ada",
}
if model_lower in deprecated:
return True
# Anthropic obsolete models
if provider == "anthropic":
if "claude-2" in model_lower or "claude-instant" in model_lower:
return True
# Vertex AI obsolete models
if provider == "vertex_ai":
if "gemini-1.0" in model_lower:
return True
if "palm" in model_lower or "bison" in model_lower:
return True
return False
def _get_provider_to_models_map() -> dict[str, list[str]]:
"""Lazy-load provider model mappings to avoid importing litellm at module level."""
"""Lazy-load provider model mappings to avoid importing litellm at module level.
Dynamic providers (Bedrock, Ollama, OpenRouter) return empty lists here
because their models are fetched directly from the source API, which is
more up-to-date than LiteLLM's static lists.
"""
return {
OPENAI_PROVIDER_NAME: get_openai_model_names(),
BEDROCK_PROVIDER_NAME: get_bedrock_model_names(),
BEDROCK_PROVIDER_NAME: [], # Dynamic - fetched from AWS API
ANTHROPIC_PROVIDER_NAME: get_anthropic_model_names(),
VERTEXAI_PROVIDER_NAME: get_vertexai_model_names(),
OLLAMA_PROVIDER_NAME: [],
OPENROUTER_PROVIDER_NAME: get_openrouter_model_names(),
OLLAMA_PROVIDER_NAME: [], # Dynamic - fetched from Ollama API
OPENROUTER_PROVIDER_NAME: [], # Dynamic - fetched from OpenRouter API
}
def get_openai_model_names() -> list[str]:
"""Get OpenAI model names dynamically from litellm."""
import re
import litellm
# TODO: remove these lists once we have a comprehensive model configuration page
# The ideal flow should be: fetch all available models --> filter by type
# --> allow user to modify filters and select models based on current context
non_chat_model_terms = {
"embed",
"audio",
"tts",
"whisper",
"dall-e",
"image",
"moderation",
"sora",
"container",
}
deprecated_model_terms = {"babbage", "davinci", "gpt-3.5", "gpt-4-"}
excluded_terms = non_chat_model_terms | deprecated_model_terms
# NOTE: We are explicitly excluding all "timestamped" models
# because they are mostly just noise in the admin configuration panel
# e.g. gpt-4o-2025-07-16, gpt-3.5-turbo-0613, etc.
date_pattern = re.compile(r"-\d{4}")
def is_valid_model(model: str) -> bool:
model_lower = model.lower()
return not any(
ex in model_lower for ex in excluded_terms
) and not date_pattern.search(model)
return sorted(
[
# Strip openai/ prefix if present
model.replace("openai/", "") if model.startswith("openai/") else model
(
model.removeprefix("openai/")
for model in litellm.open_ai_chat_completion_models
if "embed" not in model.lower()
and "audio" not in model.lower()
and "tts" not in model.lower()
and "whisper" not in model.lower()
and "dall-e" not in model.lower()
and "moderation" not in model.lower()
and "sora" not in model.lower() # video generation
and "container" not in model.lower() # not a model
],
reverse=True,
)
def get_bedrock_model_names() -> list[str]:
"""Get Bedrock model names dynamically from litellm."""
import litellm
# bedrock_converse_models are just extensions of the bedrock_models
return sorted(
[
model
for model in litellm.bedrock_models.union(litellm.bedrock_converse_models)
if "/" not in model and "embed" not in model.lower()
],
if is_valid_model(model)
),
reverse=True,
)
@@ -194,6 +269,7 @@ def get_anthropic_model_names() -> list[str]:
model
for model in litellm.anthropic_models
if model not in _IGNORABLE_ANTHROPIC_MODELS
and not is_obsolete_model(model, ANTHROPIC_PROVIDER_NAME)
],
reverse=True,
)
@@ -239,21 +315,12 @@ def get_vertexai_model_names() -> list[str]:
and "/" not in model # filter out prefixed models like openai/gpt-oss
and "search_api" not in model.lower() # not a model
and "-maas" not in model.lower() # marketplace models
and not is_obsolete_model(model, VERTEXAI_PROVIDER_NAME)
],
reverse=True,
)
def get_openrouter_model_names() -> list[str]:
"""Get OpenRouter model names dynamically from litellm."""
import litellm
return sorted(
[model for model in litellm.openrouter_models if "embed" not in model.lower()],
reverse=True,
)
def fetch_available_well_known_llms() -> list[WellKnownLLMProviderDescriptor]:
return [
WellKnownLLMProviderDescriptor(
@@ -488,20 +555,46 @@ def get_provider_display_name(provider_name: str) -> str:
)
def _get_visible_models_for_provider(provider_name: str) -> set[str]:
"""Get the set of models that should be visible by default for a provider."""
_PROVIDER_TO_VISIBLE_MODELS: dict[str, set[str]] = {
OPENAI_PROVIDER_NAME: OPENAI_VISIBLE_MODEL_NAMES,
ANTHROPIC_PROVIDER_NAME: ANTHROPIC_VISIBLE_MODEL_NAMES,
VERTEXAI_PROVIDER_NAME: VERTEXAI_VISIBLE_MODEL_NAMES,
}
return _PROVIDER_TO_VISIBLE_MODELS.get(provider_name, set())
def fetch_model_configurations_for_provider(
provider_name: str,
) -> list[ModelConfigurationView]:
# No models are marked visible by default - the default model logic
# in the frontend/backend will handle making default models visible.
return [
ModelConfigurationView(
name=model_name,
is_visible=False,
max_input_tokens=None,
supports_image_input=model_supports_image_input(
model_name=model_name,
model_provider=provider_name,
),
"""Fetch model configurations for a static provider (OpenAI, Anthropic, Vertex AI).
Looks up max_input_tokens from LiteLLM's model_cost. If not found, stores None
and the runtime will use the fallback (32000).
Models in the curated visible lists (OPENAI_VISIBLE_MODEL_NAMES, etc.) are
marked as is_visible=True by default.
"""
from onyx.llm.utils import get_max_input_tokens
visible_models = _get_visible_models_for_provider(provider_name)
configs = []
for model_name in fetch_models_for_provider(provider_name):
max_input_tokens = get_max_input_tokens(
model_name=model_name,
model_provider=provider_name,
)
for model_name in fetch_models_for_provider(provider_name)
]
configs.append(
ModelConfigurationView(
name=model_name,
is_visible=model_name in visible_models,
max_input_tokens=max_input_tokens,
supports_image_input=model_supports_image_input(
model_name=model_name,
model_provider=provider_name,
),
)
)
return configs

View File

@@ -1,70 +0,0 @@
from typing import Literal
from typing import NotRequired
from typing_extensions import TypedDict
# Content part structures for multimodal messages
class TextContentPart(TypedDict):
type: Literal["text"]
text: str
class ImageUrlDetail(TypedDict):
url: str
detail: NotRequired[Literal["auto", "low", "high"]]
class ImageContentPart(TypedDict):
type: Literal["image_url"]
image_url: ImageUrlDetail
ContentPart = TextContentPart | ImageContentPart
# Tool call structures
class FunctionCall(TypedDict):
name: str
arguments: str
class ToolCall(TypedDict):
id: str
type: Literal["function"]
function: FunctionCall
# Message types
class SystemMessage(TypedDict):
role: Literal["system"]
content: str
class UserMessageWithText(TypedDict):
role: Literal["user"]
content: str
class UserMessageWithParts(TypedDict):
role: Literal["user"]
content: list[ContentPart]
UserMessage = UserMessageWithText | UserMessageWithParts
class AssistantMessage(TypedDict):
role: Literal["assistant"]
content: NotRequired[str | None]
tool_calls: NotRequired[list[ToolCall]]
class ToolMessage(TypedDict):
role: Literal["tool"]
content: str
tool_call_id: str
# Union type for all OpenAI Chat Completions messages
ChatCompletionMessage = SystemMessage | UserMessage | AssistantMessage | ToolMessage

View File

@@ -2621,6 +2621,28 @@
"model_vendor": "openai",
"model_version": "2025-10-06"
},
"gpt-5.2-pro-2025-12-11": {
"display_name": "GPT-5.2 Pro",
"model_vendor": "openai",
"model_version": "2025-12-11"
},
"gpt-5.2-pro": {
"display_name": "GPT-5.2 Pro",
"model_vendor": "openai"
},
"gpt-5.2-chat-latest": {
"display_name": "GPT 5.2 Chat",
"model_vendor": "openai"
},
"gpt-5.2-2025-12-11": {
"display_name": "GPT 5.2",
"model_vendor": "openai",
"model_version": "2025-12-11"
},
"gpt-5.2": {
"display_name": "GPT 5.2",
"model_vendor": "openai"
},
"gpt-5.1": {
"display_name": "GPT 5.1",
"model_vendor": "openai"

104
backend/onyx/llm/models.py Normal file
View File

@@ -0,0 +1,104 @@
from enum import Enum
from typing import Literal
from pydantic import BaseModel
class ToolChoiceOptions(str, Enum):
REQUIRED = "required"
AUTO = "auto"
NONE = "none"
class ReasoningEffort(str, Enum):
"""Reasoning effort levels for models that support extended thinking.
Different providers map these values differently:
- OpenAI: Uses "low", "medium", "high" directly for reasoning_effort. Recently added "none" for 5 series
which is like "minimal"
- Claude: Uses budget_tokens with different values for each level
- Gemini: Uses "none", "low", "medium", "high" for thinking_budget (via litellm mapping)
"""
OFF = "off"
LOW = "low"
MEDIUM = "medium"
HIGH = "high"
# Budget tokens for Claude extended thinking at each reasoning effort level
CLAUDE_REASONING_BUDGET_TOKENS: dict[ReasoningEffort, int] = {
ReasoningEffort.OFF: 0,
ReasoningEffort.LOW: 1000,
ReasoningEffort.MEDIUM: 5000,
ReasoningEffort.HIGH: 10000,
}
# OpenAI reasoning effort mapping (direct string values)
OPENAI_REASONING_EFFORT: dict[ReasoningEffort, str] = {
ReasoningEffort.OFF: "none", # this only works for the 5 series though
ReasoningEffort.LOW: "low",
ReasoningEffort.MEDIUM: "medium",
ReasoningEffort.HIGH: "high",
}
# Content part structures for multimodal messages
# The classes in this mirror the OpenAI Chat Completions message types and work well with routers like LiteLLM
class TextContentPart(BaseModel):
type: Literal["text"] = "text"
text: str
class ImageUrlDetail(BaseModel):
url: str
detail: Literal["auto", "low", "high"] | None = None
class ImageContentPart(BaseModel):
type: Literal["image_url"] = "image_url"
image_url: ImageUrlDetail
ContentPart = TextContentPart | ImageContentPart
# Tool call structures
class FunctionCall(BaseModel):
name: str
arguments: str
class ToolCall(BaseModel):
type: Literal["function"] = "function"
id: str
function: FunctionCall
# Message types
class SystemMessage(BaseModel):
role: Literal["system"] = "system"
content: str
class UserMessage(BaseModel):
role: Literal["user"] = "user"
content: str | list[ContentPart]
class AssistantMessage(BaseModel):
role: Literal["assistant"] = "assistant"
content: str | None = None
tool_calls: list[ToolCall] | None = None
class ToolMessage(BaseModel):
role: Literal["tool"] = "tool"
content: str
tool_call_id: str
# Union type for all OpenAI Chat Completions messages
ChatCompletionMessage = SystemMessage | UserMessage | AssistantMessage | ToolMessage
# Allows for passing in a string directly. This is provided for convenience and is wrapped as a UserMessage.
LanguageModelInput = list[ChatCompletionMessage] | str

View File

@@ -6,10 +6,6 @@ from typing import Any
from typing import cast
from typing import TYPE_CHECKING
from langchain.schema.messages import AIMessage
from langchain.schema.messages import BaseMessage
from langchain.schema.messages import HumanMessage
from langchain.schema.messages import SystemMessage
from sqlalchemy import select
from onyx.configs.app_configs import LITELLM_CUSTOM_ERROR_MESSAGE_MAPPINGS
@@ -23,6 +19,7 @@ from onyx.db.engine.sql_engine import get_session_with_current_tenant
from onyx.db.models import LLMProvider
from onyx.db.models import ModelConfiguration
from onyx.llm.interfaces import LLM
from onyx.llm.model_response import ModelResponse
from onyx.prompts.contextual_retrieval import CONTEXTUAL_RAG_TOKEN_ESTIMATE
from onyx.prompts.contextual_retrieval import DOCUMENT_SUMMARY_TOKEN_ESTIMATE
from onyx.utils.logger import setup_logger
@@ -88,7 +85,15 @@ def litellm_exception_to_error_msg(
custom_error_msg_mappings: (
dict[str, str] | None
) = LITELLM_CUSTOM_ERROR_MESSAGE_MAPPINGS,
) -> str:
) -> tuple[str, str, bool]:
"""Convert a LiteLLM exception to a user-friendly error message with classification.
Returns:
tuple: (error_message, error_code, is_retryable)
- error_message: User-friendly error description
- error_code: Categorized error code for frontend display
- is_retryable: Whether the user should try again
"""
from litellm.exceptions import BadRequestError
from litellm.exceptions import AuthenticationError
from litellm.exceptions import PermissionDeniedError
@@ -105,25 +110,37 @@ def litellm_exception_to_error_msg(
core_exception = _unwrap_nested_exception(e)
error_msg = str(core_exception)
error_code = "UNKNOWN_ERROR"
is_retryable = True
if custom_error_msg_mappings:
for error_msg_pattern, custom_error_msg in custom_error_msg_mappings.items():
if error_msg_pattern in error_msg:
return custom_error_msg
return custom_error_msg, "CUSTOM_ERROR", True
if isinstance(core_exception, BadRequestError):
error_msg = "Bad request: The server couldn't process your request. Please check your input."
error_code = "BAD_REQUEST"
is_retryable = True
elif isinstance(core_exception, AuthenticationError):
error_msg = "Authentication failed: Please check your API key and credentials."
error_code = "AUTH_ERROR"
is_retryable = False
elif isinstance(core_exception, PermissionDeniedError):
error_msg = (
"Permission denied: You don't have the necessary permissions for this operation."
"Permission denied: You don't have the necessary permissions for this operation. "
"Ensure you have access to this model."
)
error_code = "PERMISSION_DENIED"
is_retryable = False
elif isinstance(core_exception, NotFoundError):
error_msg = "Resource not found: The requested resource doesn't exist."
error_code = "NOT_FOUND"
is_retryable = False
elif isinstance(core_exception, UnprocessableEntityError):
error_msg = "Unprocessable entity: The server couldn't process your request due to semantic errors."
error_code = "UNPROCESSABLE_ENTITY"
is_retryable = True
elif isinstance(core_exception, RateLimitError):
provider_name = (
llm.config.model_provider
@@ -154,6 +171,8 @@ def litellm_exception_to_error_msg(
if upstream_detail
else f"{provider_name} rate limit exceeded: Please slow down your requests and try again later."
)
error_code = "RATE_LIMIT"
is_retryable = True
elif isinstance(core_exception, ServiceUnavailableError):
provider_name = (
llm.config.model_provider
@@ -171,6 +190,8 @@ def litellm_exception_to_error_msg(
else:
# Generic 503 Service Unavailable
error_msg = f"{provider_name} service error: {str(core_exception)}"
error_code = "SERVICE_UNAVAILABLE"
is_retryable = True
elif isinstance(core_exception, ContextWindowExceededError):
error_msg = (
"Context window exceeded: Your input is too long for the model to process."
@@ -181,58 +202,51 @@ def litellm_exception_to_error_msg(
model_name=llm.config.model_name,
model_provider=llm.config.model_provider,
)
error_msg += f"Your invoked model ({llm.config.model_name}) has a maximum context size of {max_context}"
error_msg += f" Your invoked model ({llm.config.model_name}) has a maximum context size of {max_context}."
except Exception:
logger.warning(
"Unable to get maximum input token for LiteLLM excpetion handling"
"Unable to get maximum input token for LiteLLM exception handling"
)
error_code = "CONTEXT_TOO_LONG"
is_retryable = False
elif isinstance(core_exception, ContentPolicyViolationError):
error_msg = "Content policy violation: Your request violates the content policy. Please revise your input."
error_code = "CONTENT_POLICY"
is_retryable = False
elif isinstance(core_exception, APIConnectionError):
error_msg = "API connection error: Failed to connect to the API. Please check your internet connection."
error_code = "CONNECTION_ERROR"
is_retryable = True
elif isinstance(core_exception, BudgetExceededError):
error_msg = (
"Budget exceeded: You've exceeded your allocated budget for API usage."
)
error_code = "BUDGET_EXCEEDED"
is_retryable = False
elif isinstance(core_exception, Timeout):
error_msg = "Request timed out: The operation took too long to complete. Please try again."
error_code = "CONNECTION_ERROR"
is_retryable = True
elif isinstance(core_exception, APIError):
error_msg = (
"API error: An error occurred while communicating with the API. "
f"Details: {str(core_exception)}"
)
error_code = "API_ERROR"
is_retryable = True
elif not fallback_to_error_msg:
error_msg = "An unexpected error occurred while processing your request. Please try again later."
return error_msg
error_code = "UNKNOWN_ERROR"
is_retryable = True
return error_msg, error_code, is_retryable
def dict_based_prompt_to_langchain_prompt(
messages: list[dict[str, str]],
) -> list[BaseMessage]:
prompt: list[BaseMessage] = []
for message in messages:
role = message.get("role")
content = message.get("content")
if not role:
raise ValueError(f"Message missing `role`: {message}")
if not content:
raise ValueError(f"Message missing `content`: {message}")
elif role == "user":
prompt.append(HumanMessage(content=content))
elif role == "system":
prompt.append(SystemMessage(content=content))
elif role == "assistant":
prompt.append(AIMessage(content=content))
else:
raise ValueError(f"Unknown role: {role}")
return prompt
def message_to_string(message: BaseMessage) -> str:
if not isinstance(message.content, str):
def llm_response_to_string(message: ModelResponse) -> str:
if not isinstance(message.choice.message.content, str):
raise RuntimeError("LLM message not in expected format.")
return message.content
return message.choice.message.content
def check_number_of_tokens(
@@ -255,7 +269,7 @@ def test_llm(llm: LLM) -> str | None:
error_msg = None
for _ in range(2):
try:
llm.invoke_langchain("Do not respond")
llm.invoke("Do not respond")
return None
except Exception as e:
error_msg = str(e)
@@ -432,77 +446,74 @@ def get_llm_contextual_cost(
return usd_per_prompt + usd_per_completion
def get_llm_max_tokens(
def llm_max_input_tokens(
model_map: dict,
model_name: str,
model_provider: str,
) -> int:
"""Best effort attempt to get the max tokens for the LLM"""
"""Best effort attempt to get the max input tokens for the LLM."""
if GEN_AI_MAX_TOKENS:
# This is an override, so always return this
logger.info(f"Using override GEN_AI_MAX_TOKENS: {GEN_AI_MAX_TOKENS}")
return GEN_AI_MAX_TOKENS
try:
model_obj = find_model_obj(
model_map,
model_provider,
model_name,
)
if not model_obj:
raise RuntimeError(
f"No litellm entry found for {model_provider}/{model_name}"
)
if "max_input_tokens" in model_obj:
max_tokens = model_obj["max_input_tokens"]
return max_tokens
if "max_tokens" in model_obj:
max_tokens = model_obj["max_tokens"]
return max_tokens
logger.error(f"No max tokens found for LLM: {model_name}")
raise RuntimeError("No max tokens found for LLM")
except Exception:
logger.exception(
f"Failed to get max tokens for LLM with name {model_name}. Defaulting to {GEN_AI_MODEL_FALLBACK_MAX_TOKENS}."
model_obj = find_model_obj(
model_map,
model_provider,
model_name,
)
if not model_obj:
logger.warning(
f"Model '{model_name}' not found in LiteLLM. "
f"Falling back to {GEN_AI_MODEL_FALLBACK_MAX_TOKENS} tokens."
)
return GEN_AI_MODEL_FALLBACK_MAX_TOKENS
if "max_input_tokens" in model_obj:
return model_obj["max_input_tokens"]
if "max_tokens" in model_obj:
return model_obj["max_tokens"]
logger.warning(
f"No max tokens found for '{model_name}'. "
f"Falling back to {GEN_AI_MODEL_FALLBACK_MAX_TOKENS} tokens."
)
return GEN_AI_MODEL_FALLBACK_MAX_TOKENS
def get_llm_max_output_tokens(
model_map: dict,
model_name: str,
model_provider: str,
) -> int:
"""Best effort attempt to get the max output tokens for the LLM"""
try:
model_obj = model_map.get(f"{model_provider}/{model_name}")
if not model_obj:
model_obj = model_map[model_name]
else:
pass
"""Best effort attempt to get the max output tokens for the LLM."""
default_output_tokens = int(GEN_AI_MODEL_FALLBACK_MAX_TOKENS)
if "max_output_tokens" in model_obj:
max_output_tokens = model_obj["max_output_tokens"]
return max_output_tokens
model_obj = model_map.get(f"{model_provider}/{model_name}")
if not model_obj:
model_obj = model_map.get(model_name)
# Fallback to a fraction of max_tokens if max_output_tokens is not specified
if "max_tokens" in model_obj:
max_output_tokens = int(model_obj["max_tokens"] * 0.1)
return max_output_tokens
logger.error(f"No max output tokens found for LLM: {model_name}")
raise RuntimeError("No max output tokens found for LLM")
except Exception:
default_output_tokens = int(GEN_AI_MODEL_FALLBACK_MAX_TOKENS)
logger.exception(
f"Failed to get max output tokens for LLM with name {model_name}. "
f"Defaulting to {default_output_tokens} (fallback max tokens)."
if not model_obj:
logger.warning(
f"Model '{model_name}' not found in LiteLLM. "
f"Falling back to {default_output_tokens} output tokens."
)
return default_output_tokens
if "max_output_tokens" in model_obj:
return model_obj["max_output_tokens"]
# Fallback to a fraction of max_tokens if max_output_tokens is not specified
if "max_tokens" in model_obj:
return int(model_obj["max_tokens"] * 0.1)
logger.warning(
f"No max output tokens found for '{model_name}'. "
f"Falling back to {default_output_tokens} output tokens."
)
return default_output_tokens
def get_max_input_tokens(
model_name: str,
@@ -518,7 +529,7 @@ def get_max_input_tokens(
litellm_model_map = get_model_map()
input_toks = (
get_llm_max_tokens(
llm_max_input_tokens(
model_name=model_name,
model_provider=model_provider,
model_map=litellm_model_map,
@@ -536,6 +547,19 @@ def get_max_input_tokens_from_llm_provider(
llm_provider: "LLMProviderView",
model_name: str,
) -> int:
"""Get max input tokens for a model, with fallback chain.
Fallback order:
1. Use max_input_tokens from model_configuration (populated from source APIs
like OpenRouter, Ollama, or our Bedrock mapping)
2. Look up in litellm.model_cost dictionary
3. Fall back to GEN_AI_MODEL_FALLBACK_MAX_TOKENS (32000)
Most dynamic providers (OpenRouter, Ollama) provide context_length via their
APIs. Bedrock doesn't expose this, so we parse from model ID suffix (:200k)
or use BEDROCK_MODEL_TOKEN_LIMITS mapping. The 32000 fallback is only hit for
unknown models not in any of these sources.
"""
max_input_tokens = None
for model_configuration in llm_provider.model_configurations:
if model_configuration.name == model_name:
@@ -550,6 +574,54 @@ def get_max_input_tokens_from_llm_provider(
)
def get_bedrock_token_limit(model_id: str) -> int:
"""Look up token limit for a Bedrock model.
AWS Bedrock API doesn't expose token limits directly. This function
attempts to determine the limit from multiple sources.
Lookup order:
1. Parse from model ID suffix (e.g., ":200k" → 200000)
2. Check LiteLLM's model_cost dictionary
3. Fall back to our hardcoded BEDROCK_MODEL_TOKEN_LIMITS mapping
4. Default to 32000 if not found anywhere
"""
from onyx.llm.constants import BEDROCK_MODEL_TOKEN_LIMITS
model_id_lower = model_id.lower()
# 1. Try to parse context length from model ID suffix
# Format: "model-name:version:NNNk" where NNN is the context length in thousands
# Examples: ":200k", ":128k", ":1000k", ":8k", ":4k"
context_match = re.search(r":(\d+)k\b", model_id_lower)
if context_match:
return int(context_match.group(1)) * 1000
# 2. Check LiteLLM's model_cost dictionary
try:
model_map = get_model_map()
# Try with bedrock/ prefix first, then without
for key in [f"bedrock/{model_id}", model_id]:
if key in model_map:
model_info = model_map[key]
if "max_input_tokens" in model_info:
return model_info["max_input_tokens"]
if "max_tokens" in model_info:
return model_info["max_tokens"]
except Exception:
pass # Fall through to mapping
# 3. Try our hardcoded mapping (longest match first)
for pattern, limit in sorted(
BEDROCK_MODEL_TOKEN_LIMITS.items(), key=lambda x: -len(x[0])
):
if pattern in model_id_lower:
return limit
# 4. Default fallback
return GEN_AI_MODEL_FALLBACK_MAX_TOKENS
def model_supports_image_input(model_name: str, model_provider: str) -> bool:
# First, try to read an explicit configuration from the model_configuration table
try:
@@ -643,22 +715,32 @@ def is_true_openai_model(model_provider: str, model_name: str) -> bool:
"""
# NOTE: not using the OPENAI_PROVIDER_NAME constant here due to circular import issues
if model_provider != "openai":
if model_provider != "openai" and model_provider != "litellm_proxy":
return False
model_map = get_model_map()
def _check_if_model_name_is_openai_provider(model_name: str) -> bool:
return (
model_name in model_map
and model_map[model_name].get("litellm_provider") == "openai"
)
try:
model_map = get_model_map()
# Check if any model exists in litellm's registry with openai prefix
# If it's registered as "openai/model-name", it's a real OpenAI model
if f"openai/{model_name}" in model_map:
return True
if (
model_name in model_map
and model_map[model_name].get("litellm_provider") == "openai"
):
if _check_if_model_name_is_openai_provider(model_name):
return True
if model_name.startswith("azure/"):
model_name_with_azure_removed = "/".join(model_name.split("/")[1:])
if _check_if_model_name_is_openai_provider(model_name_with_azure_removed):
return True
return False
except Exception:

View File

@@ -38,7 +38,6 @@ from onyx.configs.app_configs import APP_HOST
from onyx.configs.app_configs import APP_PORT
from onyx.configs.app_configs import AUTH_RATE_LIMITING_ENABLED
from onyx.configs.app_configs import AUTH_TYPE
from onyx.configs.app_configs import DISABLE_GENERATIVE_AI
from onyx.configs.app_configs import LOG_ENDPOINT_LATENCY
from onyx.configs.app_configs import OAUTH_CLIENT_ID
from onyx.configs.app_configs import OAUTH_CLIENT_SECRET
@@ -113,9 +112,6 @@ from onyx.server.middleware.rate_limiting import close_auth_limiter
from onyx.server.middleware.rate_limiting import get_auth_rate_limiters
from onyx.server.middleware.rate_limiting import setup_auth_limiter
from onyx.server.onyx_api.ingestion import router as onyx_api_router
from onyx.server.openai_assistants_api.full_openai_assistants_api import (
get_full_openai_assistants_api_router,
)
from onyx.server.pat.api import router as pat_router
from onyx.server.query_and_chat.chat_backend import router as chat_router
from onyx.server.query_and_chat.chat_backend_v0 import router as chat_v0_router
@@ -274,9 +270,6 @@ async def lifespan(app: FastAPI) -> AsyncGenerator[None, None]:
if OAUTH_CLIENT_ID and OAUTH_CLIENT_SECRET:
logger.notice("Both OAuth Client ID and Secret are configured.")
if DISABLE_GENERATIVE_AI:
logger.notice("Generative AI Q&A disabled")
# Initialize tracing if credentials are provided
setup_braintrust_if_creds_available()
setup_langfuse_if_creds_available()
@@ -411,9 +404,6 @@ def get_application(lifespan_override: Lifespan | None = None) -> FastAPI:
include_router_with_global_prefix_prepended(
application, token_rate_limit_settings_router
)
include_router_with_global_prefix_prepended(
application, get_full_openai_assistants_api_router()
)
include_router_with_global_prefix_prepended(application, long_term_logs_router)
include_router_with_global_prefix_prepended(application, api_key_router)
include_router_with_global_prefix_prepended(application, standard_oauth_router)

View File

@@ -16,7 +16,6 @@ from slack_sdk.models.blocks.basic_components import MarkdownTextObject
from slack_sdk.models.blocks.block_elements import ImageElement
from onyx.chat.models import ChatBasicResponse
from onyx.configs.app_configs import DISABLE_GENERATIVE_AI
from onyx.configs.app_configs import WEB_DOMAIN
from onyx.configs.constants import DocumentSource
from onyx.configs.constants import SearchFeedbackType
@@ -255,9 +254,7 @@ def _build_documents_blocks(
message_id: int | None,
num_docs_to_display: int = ONYX_BOT_NUM_DOCS_TO_DISPLAY,
) -> list[Block]:
header_text = (
"Retrieved Documents" if DISABLE_GENERATIVE_AI else "Reference Documents"
)
header_text = "Reference Documents"
seen_docs_identifiers = set()
section_blocks: list[Block] = [HeaderBlock(text=header_text)]
included_docs = 0

View File

@@ -34,10 +34,8 @@ from onyx.configs.onyxbot_configs import (
from onyx.connectors.slack.utils import SlackTextCleaner
from onyx.db.engine.sql_engine import get_session_with_current_tenant
from onyx.db.users import get_user_by_email
from onyx.llm.exceptions import GenAIDisabledException
from onyx.llm.factory import get_default_llms
from onyx.llm.utils import dict_based_prompt_to_langchain_prompt
from onyx.llm.utils import message_to_string
from onyx.llm.utils import llm_response_to_string
from onyx.onyxbot.slack.constants import FeedbackVisibility
from onyx.onyxbot.slack.models import ChannelType
from onyx.onyxbot.slack.models import ThreadMessage
@@ -143,24 +141,9 @@ def check_message_limit() -> bool:
def rephrase_slack_message(msg: str) -> str:
def _get_rephrase_message() -> list[dict[str, str]]:
messages = [
{
"role": "user",
"content": SLACK_LANGUAGE_REPHRASE_PROMPT.format(query=msg),
},
]
return messages
try:
llm, _ = get_default_llms(timeout=5)
except GenAIDisabledException:
logger.warning("Unable to rephrase Slack user message, Gen AI disabled")
return msg
messages = _get_rephrase_message()
filled_llm_prompt = dict_based_prompt_to_langchain_prompt(messages)
model_output = message_to_string(llm.invoke_langchain(filled_llm_prompt))
llm, _ = get_default_llms(timeout=5)
prompt = SLACK_LANGUAGE_REPHRASE_PROMPT.format(query=msg)
model_output = llm_response_to_string(llm.invoke(prompt))
logger.debug(model_output)
return model_output

File diff suppressed because it is too large Load Diff

View File

@@ -1,44 +0,0 @@
AGENTIC_SEARCH_SYSTEM_PROMPT = """
You are an expert at evaluating the relevance of a document to a search query.
Provided a document and a search query, you determine if the document is relevant to the user query.
You ALWAYS output the 3 sections described below and every section always begins with the same header line.
The "Chain of Thought" is to help you understand the document and query and their relevance to one another.
The "Useful Analysis" is shown to the user to help them understand why the document is or is not useful for them.
The "Final Relevance Determination" is always a single True or False.
You always output your response following these 3 sections:
1. Chain of Thought:
Provide a chain of thought analysis considering:
- The main purpose and content of the document
- What the user is searching for
- How the document relates to the query
- Potential uses of the document for the given query
Be thorough, but avoid unnecessary repetition. Think step by step.
2. Useful Analysis:
Summarize the contents of the document as it relates to the user query.
BE ABSOLUTELY AS CONCISE AS POSSIBLE.
If the document is not useful, briefly mention the what the document is about.
Do NOT say whether this document is useful or not useful, ONLY provide the summary.
If referring to the document, prefer using "this" document over "the" document.
3. Final Relevance Determination:
True or False
"""
AGENTIC_SEARCH_USER_PROMPT = """
Document Title: {title}{optional_metadata}
```
{content}
```
Query:
{query}
Be sure to run through the 3 steps of evaluation:
1. Chain of Thought
2. Useful Analysis
3. Final Relevance Determination
""".strip()

View File

@@ -1,147 +0,0 @@
# Standards
SEPARATOR_LINE = "-------"
SEPARATOR_LINE_LONG = "---------------"
NO_EXTRACTION = "No extraction of knowledge graph objects was feasable."
YES = "yes"
NO = "no"
DC_OBJECT_SEPARATOR = ";"
DC_OBJECT_NO_BASE_DATA_EXTRACTION_PROMPT = f"""
You are an expert in finding relevant objects/objext specifications of the same type in a list of documents. \
In this case you are interested \
in generating: {{objects_of_interest}}.
You should look at the documents - in no particular order! - and extract each object you find in the documents.
{SEPARATOR_LINE}
Here are the documents you are supposed to search through:
--
{{document_text}}
{SEPARATOR_LINE}
Here are the task instructions you should use to help you find the desired objects:
{SEPARATOR_LINE}
{{task}}
{SEPARATOR_LINE}
Here is the question that may provide critical additional context for the task:
{SEPARATOR_LINE}
{{question}}
{SEPARATOR_LINE}
Please answer the question in the following format:
REASONING: <your reasoning for the classification> - OBJECTS: <the objects - just their names - that you found, \
separated by ';'>
""".strip()
DC_OBJECT_WITH_BASE_DATA_EXTRACTION_PROMPT = f"""
You are an expert in finding relevant objects/object specifications of the same type in a list of documents. \
In this case you are interested \
in generating: {{objects_of_interest}}.
You should look at the provided data - in no particular order! - and extract each object you find in the documents.
{SEPARATOR_LINE}
Here are the data provided by the user:
--
{{base_data}}
{SEPARATOR_LINE}
Here are the task instructions you should use to help you find the desired objects:
{SEPARATOR_LINE}
{{task}}
{SEPARATOR_LINE}
Here is the request that may provide critical additional context for the task:
{SEPARATOR_LINE}
{{question}}
{SEPARATOR_LINE}
Please address the request in the following format:
REASONING: <your reasoning for the classification> - OBJECTS: <the objects - just their names - that you found, \
separated by ';'>
""".strip()
DC_OBJECT_SOURCE_RESEARCH_PROMPT = f"""
Today is {{today}}. You are an expert in extracting relevant structured information from a list of documents that \
should relate to one object. (Try to make sure that you know it relates to that one object!).
You should look at the documents - in no particular order! - and extract the information asked for this task:
{SEPARATOR_LINE}
{{task}}
{SEPARATOR_LINE}
Here is the user question that may provide critical additional context for the task:
{SEPARATOR_LINE}
{{question}}
{SEPARATOR_LINE}
Here are the documents you are supposed to search through:
--
{{document_text}}
{SEPARATOR_LINE}
Note: please cite your sources inline as you generate the results! Use the format [1], etc. Infer the \
number from the provided context documents. This is very important!
Please address the task in the following format:
REASONING:
-- <your reasoning for the classification>
RESEARCH RESULTS:
{{format}}
""".strip()
DC_OBJECT_CONSOLIDATION_PROMPT = f"""
You are a helpful assistant that consolidates information about a specific object \
from multiple sources.
The object is:
{SEPARATOR_LINE}
{{object}}
{SEPARATOR_LINE}
and the information is
{SEPARATOR_LINE}
{{information}}
{SEPARATOR_LINE}
Here is the user question that may provide critical additional context for the task:
{SEPARATOR_LINE}
{{question}}
{SEPARATOR_LINE}
Please consolidate the information into a single, concise answer. The consolidated informtation \
for the object should be in the following format:
{SEPARATOR_LINE}
{{format}}
{SEPARATOR_LINE}
Overall, please use this structure to communicate the consolidated information:
{SEPARATOR_LINE}
REASONING: <your reasoning for consolidating the information>
INFORMATION:
<consolidated information in the proper format that you have created>
"""
DC_FORMATTING_NO_BASE_DATA_PROMPT = f"""
You are an expert in text formatting. Your task is to take a given text and convert it 100 percent accurately \
in a new format.
Here is the text you are supposed to format:
{SEPARATOR_LINE}
{{text}}
{SEPARATOR_LINE}
Here is the format you are supposed to use:
{SEPARATOR_LINE}
{{format}}
{SEPARATOR_LINE}
Please start the generation directly with the formatted text. (Note that the output should not be code, but text.)
"""
DC_FORMATTING_WITH_BASE_DATA_PROMPT = f"""
You are an expert in text formatting. Your task is to take a given text and the initial \
base data provided by the user, and convert it 100 percent accurately \
in a new format. The base data may also contain important relationships that are critical \
for the formatting.
Here is the initial data provided by the user:
{SEPARATOR_LINE}
{{base_data}}
{SEPARATOR_LINE}
Here is the text you are supposed combine (and format) with the initial data, adhering to the \
format instructions provided by later in the prompt:
{SEPARATOR_LINE}
{{text}}
{SEPARATOR_LINE}
And here are the format instructions you are supposed to use:
{SEPARATOR_LINE}
{{format}}
{SEPARATOR_LINE}
Please start the generation directly with the formatted text. (Note that the output should not be code, but text.)
"""

View File

@@ -8,7 +8,7 @@ from onyx.prompts.constants import GENERAL_SEP_PAT
DEFAULT_SYSTEM_PROMPT = """
You are a highly capable, thoughtful, and precise assistant. Your goal is to deeply understand the user's intent, ask clarifying questions when needed, think step-by-step through complex problems, provide clear and accurate answers, and proactively anticipate helpful follow-up information. Always prioritize being truthful, nuanced, insightful, and efficient.
The current date is [[CURRENT_DATETIME]].{citation_reminder_or_empty}
The current date is [[CURRENT_DATETIME]].[[CITATION_GUIDANCE]]
# Response Style
You use different text styles, bolding, emojis (sparingly), block quotes, and other formatting to make your responses more readable and engaging.
@@ -38,68 +38,6 @@ DO NOT provide any links following the citations. Cite inline as opposed to leav
"""
# If there are any tools, this section is included, the sections below are for the available tools
TOOL_SECTION_HEADER = "\n\n# Tools\n"
# This section is included if there are search type tools, currently internal_search and web_search
TOOL_DESCRIPTION_SEARCH_GUIDANCE = """
For knowledge that you already have and that is unlikely to change, answer the user directly without using any tools.
When using any search type tool, do not make any assumptions and stay as faithful to the user's query as possible. Between internal and web search, think about if the user's query is likely better answered by team internal sources or online web pages. For queries that are short phrases, ambiguous/unclear, or keyword heavy, prioritize internal search. If ambiguious, prioritize internal search.
When searching for information, if the initial results cannot fully answer the user's query, try again with different tools or arguments. Do not repeat the same or very similar queries if it already has been run in the chat history.
"""
INTERNAL_SEARCH_GUIDANCE = """
## internal_search
Use the `internal_search` tool to search connected applications for information. Some examples of when to use `internal_search` include:
- Internal information: any time where there may be some information stored in internal applications that could help better answer the query.
- Niche/Specific information: information that is likely not found in public sources, things specific to a project or product, team, process, etc.
- Keyword Queries: queries that are heavily keyword based are often internal document search queries.
- Ambiguity: questions about something that is not widely known or understood.
"""
WEB_SEARCH_GUIDANCE = """
## web_search
Use the `web_search` tool to access up-to-date information from the web. Some examples of when to use `web_search` include:
- Freshness: if up-to-date information on a topic could change or enhance the answer. Very important for topics that are changing or evolving.
- Niche Information: detailed info not widely known or understood (but that is likely found on the internet).
- Accuracy: if the cost of outdated information is high, use web sources directly.
"""
OPEN_URLS_GUIDANCE = """
## open_urls
Use the `open_urls` tool to read the content of one or more URLs. Use this tool to access the contents of the most promising web pages from your searches.
You can open many URLs at once by passing multiple URLs in the array if multiple pages seem promising. Prioritize the most promising pages and reputable sources.
You should almost always use open_urls after a web_search call. Use this tool when a user asks about a specific provided URL.
"""
PYTHON_TOOL_GUIDANCE = """
## python
Use the `python` tool to execute Python code in an isolated sandbox. The tool will respond with the output of the execution or time out after 60.0 seconds.
Any files uploaded to the chat will be automatically be available in the execution environment's current directory.
The current directory in the file system can be used to save and persist user files. Files written to the current directory will be returned with a `file_link`. Use this to give the user a way to download the file OR to display generated images.
Internet access for this session is disabled. Do not make external web requests or API calls as they will fail.
Use `openpyxl` to read and write Excel files. You have access to libraries like numpy, pandas, scipy, matplotlib, and PIL.
IMPORTANT: each call to this tool is independent. Variables from previous calls will NOT be available in the current call.
"""
GENERATE_IMAGE_GUIDANCE = """
## generate_image
NEVER use generate_image unless the user specifically requests an image.
"""
# Reminder message if any search tool has been run anytime in the chat turn
CITATION_REMINDER = """
Remember to provide inline citations in the format [1], [2], [3], etc. based on the "document" field of the documents.

View File

@@ -0,0 +1,44 @@
GENERATE_PLAN_TOOL_NAME = "generate_plan"
GENERATE_REPORT_TOOL_NAME = "generate_report"
RESEARCH_AGENT_TOOL_NAME = "research_agent"
# This is to ensure that even the non-reasoning models can have an ok time with this more complex flow.
THINK_TOOL_NAME = "think_tool"
# ruff: noqa: E501, W605 start
# Hard for the open_url tool to be called for a ton of search results all at once so limit to 3
WEB_SEARCH_TOOL_DESCRIPTION = """
## web_search
Use the web_search tool to get search results from the web. You should use this tool to get context for your research. These should be optimized for search engines like Google. \
Use concise and specific queries and avoid merging multiple queries into one. You can call web_search with multiple queries at once (3 max) but generally only do this when there is a clear opportunity for parallel searching. \
If you use multiple queries, ensure that the queries are related in topic but not similar such that the results would be redundant.
"""
# This one is mostly similar to the one for the main flow but there won't be any user specified URLs to open.
OPEN_URLS_TOOL_DESCRIPTION = f"""
## open_urls
Use the `open_urls` tool to read the content of one or more URLs. Use this tool to access the contents of the most promising web pages from your searches. \
You can open many URLs at once by passing multiple URLs in the array if multiple pages seem promising. Prioritize the most promising pages and reputable sources. \
You should almost always use open_urls after a web_search call and sometimes after reasoning with the {THINK_TOOL_NAME} tool.
"""
OPEN_URLS_TOOL_DESCRIPTION_REASONING = """
## open_urls
Use the `open_urls` tool to read the content of one or more URLs. Use this tool to access the contents of the most promising web pages from your searches. \
You can open many URLs at once by passing multiple URLs in the array if multiple pages seem promising. Prioritize the most promising pages and reputable sources. \
You should almost always use open_urls after a web_search call.
"""
# NOTE: Internal search tool uses the same description as the default flow, not duplicating here.
# ruff: noqa: E501, W605 end

View File

@@ -0,0 +1,166 @@
from onyx.prompts.deep_research.dr_tool_prompts import GENERATE_PLAN_TOOL_NAME
from onyx.prompts.deep_research.dr_tool_prompts import GENERATE_REPORT_TOOL_NAME
from onyx.prompts.deep_research.dr_tool_prompts import RESEARCH_AGENT_TOOL_NAME
from onyx.prompts.deep_research.dr_tool_prompts import THINK_TOOL_NAME
# ruff: noqa: E501, W605 start
CLARIFICATION_PROMPT = f"""
You are a clarification agent that runs prior to deep research. Assess whether you need to ask clarifying questions, or if the user has already provided enough information for you to start research. Clarifications are generally helpful.
If the user query is already very detailed or lengthy (more than 3 sentences), do not ask for clarification and instead call the `{GENERATE_PLAN_TOOL_NAME}` tool.
For context, the date is {{current_datetime}}.
Be conversational and friendly, prefer saying "could you" rather than "I need" etc.
If you need to ask questions, follow these guidelines:
- Be concise and do not ask more than 5 questions.
- If there are ambiguous terms or questions, ask the user to clarify.
- Your questions should be a numbered list for clarity.
- Make sure to gather all the information needed to carry out the research task in a concise, well-structured manner.
- Wrap up with a quick sentence on what the clarification will help with, it's ok to reference the user query closely here.
""".strip()
RESEARCH_PLAN_PROMPT = """
You are a research planner agent that generates the high level approach for deep research on a user query. Analyze the query carefully and break it down into main concepts and areas of exploration. \
Stick closely to the user query and stay on topic but be curious and avoid duplicate or overlapped exploration directions. \
Be sure to take into account the time sensitive aspects of the research topic and make sure to emphasize up to date information where appropriate. \
Focus on providing a thorough research of the user's query over being helpful.
For context, the date is {current_datetime}.
The research plan should be formatted as a numbered list of steps and have less than 7 individual steps.
Each step should be a standalone exploration question or topic that can be researched independently but may build on previous steps.
Output only the numbered list of steps with no additional prefix or suffix.
""".strip()
ORCHESTRATOR_PROMPT = f"""
You are an orchestrator agent for deep research. Your job is to conduct research by calling the {RESEARCH_AGENT_TOOL_NAME} tool with high level research tasks. \
This delegates the lower level research work to the {RESEARCH_AGENT_TOOL_NAME} which will provide back the results of the research.
For context, the date is {{current_datetime}}.
Before calling {GENERATE_REPORT_TOOL_NAME}, reason to double check that all aspects of the user's query have been well researched and that all key topics around the plan have been researched. \
There are cases where new discoveries from research may lead to a deviation from the original research plan.
In these cases, ensure that the new directions are thoroughly investigated prior to calling {GENERATE_REPORT_TOOL_NAME}.
NEVER output normal response tokens, you must only call tools.
# Tools
## {RESEARCH_AGENT_TOOL_NAME}
The research task provided to the {RESEARCH_AGENT_TOOL_NAME} should be reasonably high level rather with a clear direction for investigation. \
It should not be a single short query, rather it should be 1 or 2 descriptive sentences that outline the direction of the investigation.
CRITICAL - the {RESEARCH_AGENT_TOOL_NAME} only received the task and has no additional context about the user's query, research plan, or message history. \
You absolutely must provide all of the context needed to complete the task in the argument to the {RESEARCH_AGENT_TOOL_NAME}.
You should call the {RESEARCH_AGENT_TOOL_NAME} MANY times before completing with the {GENERATE_REPORT_TOOL_NAME} tool.
You are encouraged to call the {RESEARCH_AGENT_TOOL_NAME} in parallel if the tasks are independent and do not build on each other, which is often the case. NEVER call more than 3 {RESEARCH_AGENT_TOOL_NAME} calls in parallel.
## {GENERATE_REPORT_TOOL_NAME}
You should call the {GENERATE_REPORT_TOOL_NAME} tool if any of the following conditions are met:
- You are close to or at the maximum number of cycles. You have currently used {{current_cycle_count}} of {{max_cycles}} cycles.
- You have researched all of the relevant topics of the research plan.
- You have shifted away from the original research plan and believe that you are done.
- You have all of the information needed to thoroughly answer all aspects of the user's query.
- The last research cycle yielded minimal new information and future cycles are unlikely to yield more information.
## {THINK_TOOL_NAME}
CRITICAL - use the {THINK_TOOL_NAME} to reason between every call to the {RESEARCH_AGENT_TOOL_NAME} and before calling {GENERATE_REPORT_TOOL_NAME}. You should treat this as chain-of-thought reasoning to think deeply on what to do next. \
Be curious, identify knowledge gaps and consider new potential directions of research. Use paragraph format, do not use bullet points or lists.
NEVER use the {THINK_TOOL_NAME} in parallel with other {RESEARCH_AGENT_TOOL_NAME} or {GENERATE_REPORT_TOOL_NAME}.
Before calling {GENERATE_REPORT_TOOL_NAME}, double check that all aspects of the user's query have been researched and that all key topics around the plan have been researched (unless you have gone in a different direction).
# Research Plan
{{research_plan}}
""".strip()
USER_ORCHESTRATOR_PROMPT = """
Remember to refer to the system prompt and follow how to use the tools. Call the {THINK_TOOL_NAME} between every call to the {RESEARCH_AGENT_TOOL_NAME} and before calling {GENERATE_REPORT_TOOL_NAME}. Never run more than 3 {RESEARCH_AGENT_TOOL_NAME} calls in parallel.
Don't mention this reminder or underlying details about the system.
""".strip()
FINAL_REPORT_PROMPT = """
You are the final answer generator for a deep research task. Your job is to produce a thorough, balanced, and comprehensive answer on the research question provided by the user. \
You have access to high-quality, diverse sources collected by secondary research agents as well as their analysis of the sources.
IMPORTANT - You get straight to the point, never providing a title and avoiding lengthy introductions/preambles.
For context, the date is {current_datetime}.
Users have explicitly selected the deep research mode and will expect a long and detailed answer. It is ok and encouraged that your response is many pages long.
You use different text styles and formatting to make the response easier to read. You may use markdown rarely when necessary to make the response more digestible.
Not every fact retrieved will be relevant to the user's query.
Provide inline citations in the format [1], [2], [3], etc. based on the citations included by the research agents.
"""
USER_FINAL_REPORT_QUERY = """
Provide a comprehensive answer to my previous query. CRITICAL: be as detailed as possible, stay on topic, and provide clear organization in your response.
Ignore the format styles of the intermediate {RESEARCH_AGENT_TOOL_NAME} reports, those are not end user facing and different from your task.
Provide inline citations in the format [1], [2], [3], etc. based on the citations included by the research agents. The citations should be just a number in a bracket, nothing additional.
"""
# Reasoning Model Variants of the prompts
ORCHESTRATOR_PROMPT_REASONING = f"""
You are an orchestrator agent for deep research. Your job is to conduct research by calling the {RESEARCH_AGENT_TOOL_NAME} tool with high level research tasks. \
This delegates the lower level research work to the {RESEARCH_AGENT_TOOL_NAME} which will provide back the results of the research.
For context, the date is {{current_datetime}}.
Before calling {GENERATE_REPORT_TOOL_NAME}, reason to double check that all aspects of the user's query have been well researched and that all key topics around the plan have been researched.
There are cases where new discoveries from research may lead to a deviation from the original research plan. In these cases, ensure that the new directions are thoroughly investigated prior to calling {GENERATE_REPORT_TOOL_NAME}.
Between calls, think deeply on what to do next. Be curious, identify knowledge gaps and consider new potential directions of research. Use paragraph format for your reasoning, do not use bullet points or lists.
NEVER output normal response tokens, you must only call tools.
# Tools
## {RESEARCH_AGENT_TOOL_NAME}
The research task provided to the {RESEARCH_AGENT_TOOL_NAME} should be reasonably high level rather with a clear direction for investigation. \
It should not be a single short query, rather it should be 1 or 2 descriptive sentences that outline the direction of the investigation.
CRITICAL - the {RESEARCH_AGENT_TOOL_NAME} only received the task and has no additional context about the user's query, research plan, or message history. \
You absolutely must provide all of the context needed to complete the task in the argument to the {RESEARCH_AGENT_TOOL_NAME}.
You should call the {RESEARCH_AGENT_TOOL_NAME} MANY times before completing with the {GENERATE_REPORT_TOOL_NAME} tool.
You are encouraged to call the {RESEARCH_AGENT_TOOL_NAME} in parallel if the tasks are independent and do not build on each other, which is often the case.
NEVER call more than 3 {RESEARCH_AGENT_TOOL_NAME} calls in parallel.
## {GENERATE_REPORT_TOOL_NAME}
You should call the {GENERATE_REPORT_TOOL_NAME} tool if any of the following conditions are met:
- You are close to or at the maximum number of cycles. You have currently used {{current_cycle_count}} of {{max_cycles}} cycles.
- You have researched all of the relevant topics of the research plan.
- You have shifted away from the original research plan and believe that you are done.
- You have all of the information needed to thoroughly answer all aspects of the user's query.
- The last research cycle yielded minimal new information and future cycles are unlikely to yield more information.
# Research Plan
{{research_plan}}
""".strip()
USER_ORCHESTRATOR_PROMPT_REASONING = """
Remember to refer to the system prompt and follow how to use the tools. Never run more than 3 research_agent calls in parallel.
Don't mention this reminder or underlying details about the system.
""".strip()
# ruff: noqa: E501, W605 end

View File

@@ -0,0 +1,88 @@
from onyx.prompts.deep_research.dr_tool_prompts import GENERATE_REPORT_TOOL_NAME
from onyx.prompts.deep_research.dr_tool_prompts import THINK_TOOL_NAME
MAX_RESEARCH_CYCLES = 3
# ruff: noqa: E501, W605 start
RESEARCH_AGENT_PROMPT = f"""
You are a highly capable, thoughtful, and precise research agent that conducts research on a specific topic. Prefer being thorough in research over being helpful. Be curious but stay strictly on topic. \
You iteratively call the tools available to you including {{available_tools}} until you have completed your research at which point you call the {GENERATE_REPORT_TOOL_NAME} tool.
NEVER output normal response tokens, you must only call tools.
For context, the date is {{current_datetime}}.
# Tools
You have a limited number of cycles of searches to complete your research but you do not have to use all cycles. \
Each set of web searches increments the cycle by 1 (only web searches increment the cycle count). You are on cycle {{current_cycle_count}} of 3.\
{{optional_internal_search_tool_description}}\
{{optional_web_search_tool_description}}\
{{optional_open_urls_tool_description}}
## {THINK_TOOL_NAME}
CRITICAL - use the think tool after every set of searches and reads. \
You MUST use the {THINK_TOOL_NAME} before calling the web_search tool for all calls to web_search except for the first call. \
Use the {THINK_TOOL_NAME} before calling the {GENERATE_REPORT_TOOL_NAME} tool.
After a set of searches + reads, use the {THINK_TOOL_NAME} to analyze the results and plan the next steps.
- Reflect on the key information found with relation to the task.
- Reason thoroughly about what could be missing, the knowledge gaps, and what queries might address them, \
or why there is enough information to answer the research task comprehensively.
## {GENERATE_REPORT_TOOL_NAME}
Once you have completed your research, call the {GENERATE_REPORT_TOOL_NAME} tool. \
You should only call this tool after you have fully researched the topic. \
Consider other potential areas of research and weigh that against the materials already gathered before calling this tool.
""".strip()
RESEARCH_REPORT_PROMPT = """
You are a highly capable and precise research sub-agent that has conducted research on a specific topic. \
Your job is now to organize the findings to return a comprehensive report that preserves all relevant statements and information that has been gathered in the existing messages. \
The report will be seen by another agent instead of a user so keep it free of formatting or commentary and instead focus on the facts only. \
Do not give it a title, do not break it down into sections, and do not provide any of your own conclusions/analysis.
CRITICAL - This report should be as long as necessary to return ALL of the information that the researcher has gathered. It should be several pages long so as to capture as much detail as possible from the research. \
It cannot be stressed enough that this report must be EXTREMELY THOROUGH and COMPREHENSIVE. Only this report is going to be returned, so it's CRUCIAL that you don't lose any details from the raw messages.
Remove any obviously irrelevant or duplicative information.
If a statement seems not trustworthy or is contradictory to other statements, it is important to flag it.
Cite all sources INLINE using the format [1], [2], [3], etc. based on the `document` field of the source. \
Cite inline as opposed to leaving all citations until the very end of the response.
"""
USER_REPORT_QUERY = """
Please write me a comprehensive report on the research topic given the context above. As a reminder, the original topic was:
{research_topic}
Remember to include as much information as possible and as faithful to the original sources as possible. \
Keep it free of formatting and focus on the facts only. Be sure to include all context for each fact to avoid misinterpretation or misattribution.
CRITICAL - BE EXTREMELY THOROUGH AND COMPREHENSIVE, YOUR RESPONSE SHOULD BE SEVERAL PAGES LONG.
"""
# Reasoning Model Variants of the prompts
RESEARCH_AGENT_PROMPT_REASONING = f"""
You are a highly capable, thoughtful, and precise research agent that conducts research on a specific topic. Prefer being thorough in research over being helpful. Be curious but stay strictly on topic. \
You iteratively call the tools available to you including web_search and open_url until you have completed your research at which point you call the generate_report tool. Between calls, think about the results of the previous tool call and plan the next steps. \
Reason thoroughly about what could be missing, identify knowledge gaps, and what queries might address them. Or consider why there is enough information to answer the research task comprehensively.
Once you have completed your research, call the {GENERATE_REPORT_TOOL_NAME} tool.
NEVER output normal response tokens, you must only call tools.
For context, the date is {{current_datetime}}.
# Tools
You have a limited number of cycles of searches to complete your research but you do not have to use all cycles. Each set of web searches increments the cycle by 1. You are on cycle {{current_cycle_count}} of {MAX_RESEARCH_CYCLES}.\
{{optional_internal_search_tool_description}}\
{{optional_web_search_tool_description}}\
{{optional_open_urls_tool_description}}
## {GENERATE_REPORT_TOOL_NAME}
Once you have completed your research, call the {GENERATE_REPORT_TOOL_NAME} tool. You should only call this tool after you have fully researched the topic.
""".strip()
# ruff: noqa: E501, W605 end

File diff suppressed because it is too large Load Diff

View File

@@ -7,6 +7,7 @@ from onyx.configs.constants import DocumentSource
from onyx.prompts.chat_prompts import ADDITIONAL_INFO
from onyx.prompts.chat_prompts import COMPANY_DESCRIPTION_BLOCK
from onyx.prompts.chat_prompts import COMPANY_NAME_BLOCK
from onyx.prompts.chat_prompts import REQUIRE_CITATION_GUIDANCE
from onyx.prompts.constants import CODE_BLOCK_PAT
from onyx.server.settings.store import load_settings
from onyx.utils.logger import setup_logger
@@ -16,6 +17,7 @@ logger = setup_logger()
_DANSWER_DATETIME_REPLACEMENT_PAT = "[[CURRENT_DATETIME]]"
_CITATION_GUIDANCE_REPLACEMENT_PAT = "[[CITATION_GUIDANCE]]"
_BASIC_TIME_STR = "The current date is {datetime_info}."
@@ -57,6 +59,44 @@ def replace_current_datetime_tag(
)
def replace_citation_guidance_tag(
prompt_str: str,
*,
should_cite_documents: bool = False,
include_all_guidance: bool = False,
) -> tuple[str, bool]:
"""
Replace [[CITATION_GUIDANCE]] placeholder with citation guidance if needed.
Returns:
tuple[str, bool]: (prompt_with_replacement, should_append_fallback)
- prompt_with_replacement: The prompt with placeholder replaced (or unchanged if not present)
- should_append_fallback: True if citation guidance should be appended
(placeholder is not present and citations are needed)
"""
placeholder_was_present = _CITATION_GUIDANCE_REPLACEMENT_PAT in prompt_str
if not placeholder_was_present:
# Placeholder not present - caller should append if citations are needed
should_append = (
should_cite_documents or include_all_guidance
) and REQUIRE_CITATION_GUIDANCE not in prompt_str
return prompt_str, should_append
citation_guidance = (
REQUIRE_CITATION_GUIDANCE
if should_cite_documents or include_all_guidance
else ""
)
replaced_prompt = prompt_str.replace(
_CITATION_GUIDANCE_REPLACEMENT_PAT,
citation_guidance,
)
return replaced_prompt, False
def handle_onyx_date_awareness(
prompt_str: str,
# We always replace the pattern [[CURRENT_DATETIME]] if it shows up

Some files were not shown because too many files have changed in this diff Show More