Compare commits

..

1966 Commits

Author SHA1 Message Date
Yuhong Sun
3921d105dc ok 2025-12-04 14:29:59 -08:00
Yuhong Sun
008e0021f1 mypy 2025-12-04 13:39:15 -08:00
Nikolas Garza
e166c1b095 chore: bump react version for sec vuln (#6600) 2025-12-04 17:04:30 +00:00
SubashMohan
84be68ef7c refactor(MCP): mcp backend and schema (#6475) 2025-12-04 08:24:44 +00:00
Yuhong Sun
90e9af82bf chore: Cleanup chat turn and prompts (#6589) 2025-12-03 23:46:06 -08:00
Raunak Bhagat
7f36fb2a4c refactor: Refresh "Agent Icon" to the new "Agent Avatar" standard (#6509) 2025-12-03 21:18:54 -08:00
Nikolas Garza
307464a736 feat: surface better model names in the ui/chat bar (#6514) 2025-12-04 04:51:38 +00:00
Raunak Bhagat
1d5c8bdb20 refactor: Icon cleanup (#6573) 2025-12-04 04:16:40 +00:00
dependabot[bot]
6de626ecc3 chore(deps): Bump next from 16.0.1 to 16.0.7 in /web (#6563)
Signed-off-by: dependabot[bot] <support@github.com>
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
Co-authored-by: Wenxi Onyx <wenxi@onyx.app>
2025-12-03 19:52:05 -08:00
roshan
6663c81aa6 fix: use absolute path for icon imports (#6585) 2025-12-04 02:29:10 +00:00
dependabot[bot]
35ca94c17e chore(deps): Bump werkzeug from 3.1.1 to 3.1.4 in /backend/requirements (#6521)
Signed-off-by: dependabot[bot] <support@github.com>
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
Co-authored-by: Jamison Lahman <jamison@lahman.dev>
2025-12-03 18:08:21 -08:00
Jamison Lahman
431f652be8 chore(pre-commit): upgrade some hooks to latest (#6583) 2025-12-03 18:07:00 -08:00
Yuhong Sun
6535d85ceb chore: Prompt builder update (#6582) 2025-12-03 17:30:17 -08:00
Chris Weaver
3a349d6ab3 fix: jira attribute error (#6584) 2025-12-03 17:26:21 -08:00
Chris Weaver
ddae686dc7 fix: workaround for bugged Confluence API (#6311) 2025-12-04 01:03:51 +00:00
roshan
0e42891cbf fix: install node dependencies for quality-checks pre-commit hook (#6580) 2025-12-04 00:38:43 +00:00
Chris Weaver
823b28b4a7 fix: improve jira perm sync handling (#6575) 2025-12-03 23:45:34 +00:00
Jamison Lahman
828036ceb8 chore(devtools): introduce the Onyx Developer Script, ods (#6559) 2025-12-03 23:45:09 +00:00
Wenxi
2a40ceab26 refactor(API): replace redundant api key dep from ingestion endpoints (#6568) 2025-12-03 23:39:27 +00:00
Yuhong Sun
f03f2bff78 chore: continue cleanup of dead files (#6579) 2025-12-03 15:46:44 -08:00
Raunak Bhagat
f9a548fbe9 refactor: Input styles (#6571) 2025-12-03 22:31:45 +00:00
Wenxi
8b45f911ff refactor(openapi generation): generate python client with openapi generation script for one click integration test setup (#6574) 2025-12-03 21:47:20 +00:00
Yuhong Sun
ae64ded7bb Removing LangGraph code (#6578) 2025-12-03 14:07:18 -08:00
Jamison Lahman
7287e3490d chore(pre-commit): disable mypy hook (#6576) 2025-12-03 13:57:00 -08:00
Yuhong Sun
7681c11585 chore: Removing Retrievaldoc (#6577) 2025-12-03 13:49:22 -08:00
Richard Guan
365e31a7f3 chore(tool): call output fix (#6572) 2025-12-03 21:28:06 +00:00
Nikolas Garza
dd33886946 chore: add fe type checking to pre-commit hooks (#6569) 2025-12-03 20:29:08 +00:00
Raunak Bhagat
6cdd5b7d3e fix: Fix failing type checks in message feedback tests (#6567)
Co-authored-by: greptile-apps[bot] <165735046+greptile-apps[bot]@users.noreply.github.com>
2025-12-03 12:47:45 -08:00
Yuhong Sun
7b6ae2b72a chore: Cleanup PreviousMessage class (#6570) 2025-12-03 12:37:02 -08:00
Yuhong Sun
629502ef6a fix: Basic Reenabling Code Interpreter (#6566) 2025-12-03 11:50:11 -08:00
Yuhong Sun
927e8addb5 fix: Reasoning Block Linebreaks (#6552) 2025-12-03 18:28:26 +00:00
Evan Lohn
14712af431 fix: expand special casing around sharepoint shared drives (#6539) 2025-12-03 18:12:19 +00:00
Richard Guan
4b38b91674 chore(framework): cleanup (#6538) 2025-12-03 18:01:11 +00:00
Emerson Gomes
508c248032 fix: prevent heartbeat timeout state pollution in validation loop (#5782)
Co-authored-by: Claude <noreply@anthropic.com>
2025-12-03 10:08:53 -08:00
Emerson Gomes
45db59eab1 db: remove duplicate chunk_stats deletion in delete_documents_complete__no_commit (#5792) 2025-12-03 10:02:57 -08:00
Yuhong Sun
5a14055a29 feat: Some UI enhancements for tools (#6550)
Co-authored-by: SubashMohan <subashmohan75@gmail.com>
2025-12-03 16:42:49 +00:00
Nikolas Garza
a698f01cab feat: add model metadata enrichments for LiteLLM (#6541)
Co-authored-by: Justin Tahara <105671973+justin-tahara@users.noreply.github.com>
2025-12-03 06:18:02 +00:00
Jamison Lahman
4e4bf197cf chore(gha): docker cache from HEAD (#6549) 2025-12-03 03:57:07 +00:00
dependabot[bot]
517b0d1e70 chore(deps): Bump mcp from 1.19.0 to 1.23.0 in /backend/requirements (#6526)
Signed-off-by: dependabot[bot] <support@github.com>
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
Co-authored-by: Jamison Lahman <jamison@lahman.dev>
2025-12-03 02:55:16 +00:00
Yuhong Sun
7b2b163d4e chore: Removes the translation layer for the new backend packets (#6546) 2025-12-03 02:40:55 +00:00
Jamison Lahman
29b28c8352 chore(deployment): run tests on tag push (#6543) 2025-12-03 01:49:21 +00:00
Jamison Lahman
83b624b658 chore(gha): /uv pip install/uv run --with/ (#6545) 2025-12-02 17:48:22 -08:00
Jamison Lahman
d3cd68014a chore(gha): persist docker cache intra-PR builds (#6524) 2025-12-03 01:14:10 +00:00
Jamison Lahman
64d9fd97ec chore(zizmor): upgrade and track verison via pyproject (#6542) 2025-12-02 17:12:10 -08:00
Jamison Lahman
7a9e2ebec6 chore(deployment): check if tagged correctly (#6537) 2025-12-03 00:39:57 +00:00
Richard Guan
51a69d7e55 chore(tracing): add tracing to new backend (#6532) 2025-12-02 22:38:23 +00:00
Nikolas Garza
f19362ce27 fix: eager load persona in slack channel config (#6535) 2025-12-02 22:13:24 +00:00
Justin Tahara
0c3330c105 chore(test): Playwright for User Feedback (#6534) 2025-12-02 21:14:12 +00:00
きわみざむらい
81cb0f2518 fix: Add proper DISABLE_MODEL_SERVER environment variable support (#6468)
Co-authored-by: Jamison Lahman <jamison@lahman.dev>
2025-12-02 21:11:09 +00:00
Chris Weaver
beb4e619e7 feat: move to client side rendering + incremental loading (#6464)
Co-authored-by: Claude <noreply@anthropic.com>
2025-12-02 12:30:43 -08:00
Yuhong Sun
0fa1d5b0ca Update search_tool.py description (#6531) 2025-12-02 11:08:36 -08:00
Yuhong Sun
1e30882222 Update README.md (#6530) 2025-12-02 11:07:19 -08:00
Yuhong Sun
42996a63fe README for DB Models (#6529) 2025-12-02 11:00:48 -08:00
Yuhong Sun
4a38068192 Knowledge for future (#6528) 2025-12-02 10:48:49 -08:00
Emerson Gomes
97f66b68c1 Harden markdown link protocol handling (#6517) 2025-12-02 17:49:44 +00:00
Wenxi
aeafd83cd1 fix(migration): new chat history downgrade (#6527) 2025-12-02 17:47:33 +00:00
Justin Tahara
0ba9a873e9 feat(pginto): Support IAM Auth (#6520) 2025-12-01 22:40:09 -06:00
Justin Tahara
b72bac993f feat(helm): PGInto Workflow (#6519) 2025-12-01 21:54:06 -06:00
Yuhong Sun
9572c63089 Fix Alembic Downgrade just in case (#6515) 2025-12-01 18:01:38 -08:00
Nikolas Garza
c4505cdb06 chore: remove fed slack entities button on doc set edit page (#6385) 2025-12-02 01:26:30 +00:00
Jamison Lahman
9055691c38 chore(docker): explicitly default env to empty string (#6511) 2025-12-02 01:25:39 +00:00
Raunak Bhagat
1afa7b0689 fix: Edit separator (#6513) 2025-12-01 17:15:23 -08:00
Evan Lohn
72c96a502e feat: mcp pass through oauth (#6469) 2025-12-02 00:35:08 +00:00
acaprau
093b399472 feat(persona): Add GET paginated personas to REST API (#6448)
Co-authored-by: Andrei <andrei@Andreis-MacBook-Pro.local>
2025-12-02 00:14:47 +00:00
Jamison Lahman
d89dd3c76b chore(gha): remove duplicate python checks (#6510) 2025-12-01 16:19:15 -08:00
dependabot[bot]
a24d0aa26d chore(deps): Bump actions/upload-artifact from 4 to 5 (#6502)
Signed-off-by: dependabot[bot] <support@github.com>
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
Co-authored-by: Jamison Lahman <jamison@lahman.dev>
2025-12-01 23:37:28 +00:00
dependabot[bot]
5e581c2c60 chore(deps): Bump actions/setup-python from 6.0.0 to 6.1.0 (#6501)
Signed-off-by: dependabot[bot] <support@github.com>
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
2025-12-01 23:36:43 +00:00
dependabot[bot]
17ea20ef5c chore(deps): Bump astral-sh/setup-uv from 3.2.4 to 7.1.4 (#6503)
Signed-off-by: dependabot[bot] <support@github.com>
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
Co-authored-by: Jamison Lahman <jamison@lahman.dev>
Co-authored-by: greptile-apps[bot] <165735046+greptile-apps[bot]@users.noreply.github.com>
2025-12-01 23:32:06 +00:00
Justin Tahara
0b8207ef4c fix(feedback): API Endpoint fix (#6500) 2025-12-01 17:28:32 -06:00
Yuhong Sun
c26da8dc75 feat: Updated Processing for Context to the LLM (#6485)
Co-authored-by: Vega <33913017+weijia619@users.noreply.github.com>
2025-12-01 14:41:13 -08:00
Richard Guan
f1f3cd392c chore(fix): langfuse tracing (#6505) 2025-12-01 21:54:53 +00:00
Justin Tahara
36e31e9ffa fix(helm): Code Interpreter Chart release (#6506) 2025-12-01 15:41:20 -06:00
Chris Weaver
f57c12cdaa feat: add code-interpreter to helm chart (#6489) 2025-12-01 13:37:56 -08:00
brano-rohlik
514c76c3ea feat(vertex-ai): add Gemini 3 Pro and Claude Opus 4.5 models (#6481) 2025-12-01 09:37:52 -08:00
Chris Weaver
921e82b839 fix: code interpreter UI (#6498) 2025-12-01 09:36:39 -08:00
Chris Weaver
6b7c6c9a37 fix: icon coloring in Renderer (#6491) 2025-11-30 18:53:26 -08:00
SubashMohan
53ae1b598b fix(WebSearch): adjust Separator styling for improved layout consistency (#6487) 2025-11-30 11:45:37 +05:30
SubashMohan
83e756bf05 fix(Projects): file ordering in project panel (#6334) 2025-11-29 19:40:01 +00:00
Jamison Lahman
19b485cffd chore(deps): upgrade supervisor 4.2.5->4.3.0 (#6466) 2025-11-26 18:38:13 -05:00
Jamison Lahman
f5a99053ac chore(deps): upgrade dropbox 11.36.2->12.0.2 (#6467) 2025-11-26 18:10:13 -05:00
Chris Weaver
91f0377dd5 chore: enable code interpreter tests (#6404) 2025-11-26 14:55:07 -08:00
Jamison Lahman
25522dfbb8 chore(gha): setup-python accepts requirements to install (#6463) 2025-11-26 17:27:30 -05:00
Jamison Lahman
b0e124ec89 chore(deps): upgrade pytest-asyncio 0.22.0->1.3.0 (#6461) 2025-11-26 16:39:52 -05:00
Raunak Bhagat
b699a65384 refactor: Edit Modal.Header to be more concise and adherent to mocks (#6452) 2025-11-26 13:17:51 -08:00
Jamison Lahman
cc82d6e506 chore(deps): remove non-dev packages (#6462) 2025-11-26 16:17:01 -05:00
Jamison Lahman
8a6db7474d chore(gha): assert GHA jobs have timeouts (#6455) 2025-11-26 18:14:23 +00:00
Jamison Lahman
fd9aea212b chore(dev): run mypy and uv-sync on pre-commit (#6454) 2025-11-26 17:24:28 +00:00
acaprau
4aed383e49 chore(logs): When final doc for context pruning gets pruned, that prob doesn't need to be an error (#6451)
Co-authored-by: Andrei <andrei@Andreis-MacBook-Pro.local>
2025-11-25 22:41:47 -08:00
Justin Tahara
d0ce313b1a fix(google): Fix embedding scopes (#6450) 2025-11-25 22:10:42 -06:00
Jamison Lahman
4d32c9f5e0 chore(python): use uv to manage and compile requirements (#6291) 2025-11-26 03:01:52 +00:00
Justin Tahara
158fe31b71 fix(azure): Normalizing Azure Target URIs (#6443) 2025-11-26 00:19:22 +00:00
Raunak Bhagat
97cddc1dd4 fix: Line item cleanup (#6444)
Co-authored-by: greptile-apps[bot] <165735046+greptile-apps[bot]@users.noreply.github.com>
2025-11-25 16:11:16 -08:00
Chris Weaver
c520a4ec17 fix: spinner during CSV load (#6441)
Co-authored-by: SubashMohan <subashmohan75@gmail.com>
2025-11-25 22:01:55 +00:00
Raunak Bhagat
9c1f8cc98c refactor: Line item cleanup (#6434) 2025-11-25 13:51:17 -08:00
Justin Tahara
58ba8cc68a chore(langfuse): Remove Env Var (#6440) 2025-11-25 15:32:15 -06:00
Evan Lohn
a307b0d366 fix: use raw mcp url (#6432) 2025-11-25 21:10:03 +00:00
Wenxi
e34f58e994 refactor(tests): use PATManager for tests that use PATs (#6438) 2025-11-25 15:39:49 -05:00
Justin Tahara
7f6dd2dc93 feat(api): Add Users to Group Endpoint (#6427) 2025-11-25 20:12:20 +00:00
Wenxi
ef3daa58b3 feat(claude): update claude models (#6433) 2025-11-25 14:24:47 -05:00
Raunak Bhagat
972c33046e fix: Responsiveness flash fix (#6422) 2025-11-25 10:27:41 -08:00
Jamison Lahman
802248c4e4 chore(python): update stale external type stubs (#6429) 2025-11-25 17:46:12 +00:00
Justin Tahara
f359c44183 fix(gemini): Migrate from Vertex AI to Gemini (#6424) 2025-11-25 17:16:55 +00:00
Jamison Lahman
bab2220091 chore(db): onyx_list_tenants.py --csv -n [count] (#6425) 2025-11-25 17:01:35 +00:00
Wenxi
bc35354ced feat(MCP): basic Onyx MCP server with search tools and indexed sources resource (#6309) 2025-11-25 02:30:55 +00:00
Jamison Lahman
742dd23fdd chore(deps): upgrade psutil: 5.9.8->7.1.3 (#6300) 2025-11-25 01:23:31 +00:00
Richard Guan
ea5690db81 chore(hotfix): tool choice bug (#6417) 2025-11-24 14:41:57 -08:00
Justin Tahara
853ca635d2 feat(helm): Add Deployment Labels (#6421) 2025-11-24 16:25:15 -06:00
Wenxi
c4d2fc9492 feat(API): make EE query APIs CE (#6411) 2025-11-24 21:15:52 +00:00
Justin Tahara
7aa12c0a36 feat(claude): Adding Opus 4.5 (#6415) 2025-11-24 15:27:58 -06:00
Wenxi
e74cf14401 chore(docker): make container startup checks case-less-sensitive (#6412) 2025-11-24 20:48:23 +00:00
Justin Tahara
75c42ffa9d feat(claude): Add Gov Cloud Names (#6414) 2025-11-24 20:40:23 +00:00
dependabot[bot]
d6fbb7affd chore(deps): Bump actions/checkout from 4.3.0 to 6.0.0 (#6410)
Signed-off-by: dependabot[bot] <support@github.com>
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
Co-authored-by: Jamison Lahman <jamison@lahman.dev>
2025-11-24 18:57:28 +00:00
dependabot[bot]
75cee70bbb chore(deps): Bump actions/stale from 9.1.0 to 10.1.0 (#6409)
Signed-off-by: dependabot[bot] <support@github.com>
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
Co-authored-by: Jamison Lahman <jamison@lahman.dev>
Co-authored-by: greptile-apps[bot] <165735046+greptile-apps[bot]@users.noreply.github.com>
2025-11-24 18:26:02 +00:00
Jamison Lahman
1c8b819aa2 chore(gha): fix zizmor for .github/actions/ (#6399) 2025-11-24 17:11:43 +00:00
Nikolas Garza
b7cf33a4cc fix: prevent slack bot from always using fed slack connector (#6400) 2025-11-24 02:37:28 +00:00
Nikolas Garza
b06459f674 chore: unify checkbox implementation across the frontend (#6345) 2025-11-23 21:18:56 +00:00
Raunak Bhagat
920db6b3c2 fix: Input select state-rendering fix (#6402) 2025-11-23 12:32:33 -08:00
Raunak Bhagat
b7e4b65a74 refactor: Consolidate hover, active, and focus styles (#6397) 2025-11-23 19:09:40 +00:00
Raunak Bhagat
e648e0f725 fix: Fix non-persistence issue with input-select (#6398) 2025-11-23 10:24:12 -08:00
SubashMohan
c8a3368fce fix(projects): Add Create New Project option in chat move menu (#6353) 2025-11-23 10:17:05 +00:00
SubashMohan
f74b02ad9e feat(UserFilesModal): add file count divider and conditional rendering (#6379) 2025-11-23 15:47:59 +05:30
SubashMohan
65b59c4a73 feat(projects): Improved Folder Icon Animation (#6354) 2025-11-23 09:46:25 +00:00
Raunak Bhagat
b74bcd0efc refactor: Separator cleanup (#6396) 2025-11-22 20:39:36 -08:00
Raunak Bhagat
8c133b3853 refactor: Input select (#6290)
Co-authored-by: cubic-dev-ai[bot] <191113872+cubic-dev-ai[bot]@users.noreply.github.com>
2025-11-23 02:00:20 +00:00
Raunak Bhagat
67554cef96 refactor: Input text area cleanup (#6395) 2025-11-22 17:37:54 -08:00
Jamison Lahman
07e03f3677 fix(docker): chown /app directory (#6390) 2025-11-22 07:29:58 +00:00
Chris Weaver
33fee46d71 feat: code interpreter (python) (#6136) 2025-11-22 07:19:25 +00:00
Jamison Lahman
72f5e3d38f chore(dev): docker-compose.multitenant-dev respects HOST_PORT (#6388) 2025-11-22 07:11:43 +00:00
Jamison Lahman
f89380ad87 chore(gha): increase playwright runner volume size: 40->50gb (#6392) 2025-11-21 21:48:35 -08:00
Raunak Bhagat
e6f00098f2 refactor: (light) Refresh of the "Shared Chat Display" (#6387)
Co-authored-by: greptile-apps[bot] <165735046+greptile-apps[bot]@users.noreply.github.com>
2025-11-21 20:58:46 -08:00
Evan Lohn
9100afa594 feat: Allow attaching mcp tools to default assistant (#6343) 2025-11-21 17:29:34 -08:00
Raunak Bhagat
93d2febf2a fix: Update buttons and stylings for new-team-modal (#6384) 2025-11-21 21:26:51 +00:00
Raunak Bhagat
693286411a feat: Responsiveness (#6383)
Co-authored-by: greptile-apps[bot] <165735046+greptile-apps[bot]@users.noreply.github.com>
2025-11-21 21:01:27 +00:00
Justin Tahara
01a3064ca3 fix(testrail): Linting (#6382) 2025-11-21 10:50:08 -08:00
sashank-rayapudi-ai
09a80265ee feat(testrail): Implement a read-only custom connector for Testrail (#6084) 2025-11-21 10:16:40 -08:00
Wenxi
2a77481c1e test(onboarding): add playwright test for onboarding flow (#6376) 2025-11-21 12:23:37 -05:00
Jamison Lahman
6838487689 chore(deployments): separate flag for model-server, enable nightly (#6377) 2025-11-21 04:29:41 +00:00
Jamison Lahman
1713c24080 chore(docker): breakup model-server model layers (#6370) 2025-11-21 03:47:47 +00:00
Chris Weaver
73b3a2525a fix: chat switching (#6374) 2025-11-20 18:32:54 -08:00
Wenxi
59738d9243 feat: cross link cookies (#6371) 2025-11-21 02:03:52 +00:00
Wenxi
c0ff9c623b feat(APIs): web search apis and indexed sources api (#6363) 2025-11-20 20:23:06 -05:00
Jessica Singh
c03979209a fix(ui): icon alignment + color (#6373) 2025-11-20 17:16:10 -08:00
Justin Tahara
a0b7639693 fix(connectors): Normalizing Onyx Metatada Connector Type (#6315) 2025-11-21 00:46:45 +00:00
Raunak Bhagat
e3ede3c186 fix: Sidebar fixes (#6358) 2025-11-21 00:35:31 +00:00
Jessica Singh
092dbebdf2 fix(migration): exa env var into db (#6366) 2025-11-21 00:12:09 +00:00
Justin Tahara
838e2fe924 chore(bedrock): Add better logging (#6368) 2025-11-20 23:38:19 +00:00
Chris Weaver
48e2bfa3eb chore: prevent sentry spam on fake issue (#6369) 2025-11-20 22:47:30 +00:00
Jamison Lahman
2a004ad257 chore(deployments): fix nightly tagging + add alerts & workflow_dispatch (#6367) 2025-11-20 21:55:24 +00:00
Wenxi
416c7fd75e chore(WebSearch): remove old web search env vars and update tooltip (#6365)
Co-authored-by: justin-tahara <justintahara@gmail.com>
2025-11-20 21:09:24 +00:00
Justin Tahara
a4372b461f feat(helm): Add Tolerations and Affinity (#6362) 2025-11-20 20:25:20 +00:00
mristau-alltrails
7eb13db6d9 SECURITY FIX: CVE-2023-38545 and CVE-2023-38546 (#6356) 2025-11-20 20:11:35 +00:00
Justin Tahara
c0075d5f59 fix(docprocessing): Pause Failing Connectors (#6350) 2025-11-20 19:14:56 +00:00
Wenxi
475a3afe56 fix(connector): handle hubspot ticket with None content (#6357) 2025-11-20 13:35:46 -05:00
SubashMohan
bf5b8e7bae fix(Project): project pending issues (#6099) 2025-11-20 17:53:08 +00:00
Jamison Lahman
4ff28c897b chore(dev): nginx container port 80 respects HOST_PORT_80 (#6338) 2025-11-20 17:48:10 +00:00
SubashMohan
ec9e9be42e Fix/user file modal (#6333) 2025-11-20 16:41:38 +00:00
Nikolas Garza
af5fa8fe54 fix: web search and image generation tool playwright test failures (#6347) 2025-11-20 07:13:05 +00:00
Jamison Lahman
03a9e9e068 chore(gha): playwright browser cache is arch-aware (#6351) 2025-11-20 03:28:53 +00:00
Richard Guan
ad81c3f9eb chore(tracing): updates (#6322) 2025-11-20 00:58:00 +00:00
Jamison Lahman
62129f4ab9 chore(gha): require playwright passing on merge (#6346) 2025-11-20 00:55:19 +00:00
Jamison Lahman
b30d38c747 chore(gha): fix zizmor issues (#6344) 2025-11-19 23:57:34 +00:00
Nikolas Garza
0596b57501 fix: featured assistant typo (#6341) 2025-11-19 14:44:54 -08:00
Jamison Lahman
482b2c4204 chore(gha): run uvx zizmor --fix=all (#6342) 2025-11-19 14:26:45 -08:00
Jamison Lahman
df155835b1 chore(docker): docker bake UX (#6339)
Co-authored-by: greptile-apps[bot] <165735046+greptile-apps[bot]@users.noreply.github.com>
2025-11-19 14:19:53 -08:00
Richard Guan
fd0762a1ee chore(agent): framework query improvements (#6297) 2025-11-19 21:43:33 +00:00
Jamison Lahman
bd41618dd9 chore(deployments): correctly set --debug for docker build (#6337) 2025-11-19 11:04:15 -08:00
Justin Tahara
5a7c6312af feat(jwt): JIT provision from token (#6252) 2025-11-19 10:06:20 -08:00
Raunak Bhagat
a477508bd7 fix: Fix header flashing (#6331)
Co-authored-by: greptile-apps[bot] <165735046+greptile-apps[bot]@users.noreply.github.com>
2025-11-19 09:27:49 -08:00
Raunak Bhagat
8ac34a8433 refactor: input type in fixes (#6335) 2025-11-19 08:31:39 -08:00
Raunak Bhagat
2c51466bc3 fix: Some minor touch-ups for the new modal (#6332) 2025-11-19 14:03:15 +00:00
Raunak Bhagat
62966bd172 fix: Switch fix (#6279) 2025-11-19 01:40:40 -08:00
Jamison Lahman
a8d4482b59 chore(deployments): set provenance=false and flag debug (#6330) 2025-11-18 22:26:53 -08:00
Jamison Lahman
dd42a45008 chore(deployments): flag to disable docker caching (#6328) 2025-11-19 04:07:07 +00:00
Jessica Singh
a368556282 feat(web search providers): adding support and changing env var approach (#6273) 2025-11-19 02:49:54 +00:00
Evan Lohn
679d1a5ef6 fix: openpyxl bug (#6317) 2025-11-19 00:59:46 +00:00
Nikolas Garza
12e49cd661 fix: slack config forms + scope issues (#6318) 2025-11-18 16:49:16 -08:00
Jamison Lahman
1859a0ad79 chore(gha): run zizmor (#6326) 2025-11-18 16:10:07 -08:00
Jamison Lahman
9199d146be fix(tests): test_partial_match_in_model_map AssertionError (#6321) 2025-11-18 16:06:01 -08:00
Jamison Lahman
9c1208ffd6 chore(deployments): separate builds by platform (#6314) 2025-11-18 14:49:23 -08:00
Jamison Lahman
c3387e33eb chore(deployments): remove DEPLOYMENT from cache path (#6319) 2025-11-18 14:16:09 -08:00
Jamison Lahman
c37f633a37 chore(deployments): remove driver-opts from model-server build (#6313) 2025-11-18 10:45:24 -08:00
Justin Tahara
6677e12e55 chore(vespa): Update version (#6299) 2025-11-18 09:50:38 -08:00
SubashMohan
7175b93a4c enhancement(onboarding) : Replacing Select input with combobox (#6048) 2025-11-18 17:40:57 +05:30
SubashMohan
fbbcd9646d fix(onboarding): Header animated icon (#6098) 2025-11-18 12:24:42 +05:30
SubashMohan
7afc9d417c feat(modal): Implement a new modal component (#6289) 2025-11-17 23:37:35 +00:00
Wenxi
a905f2d3fb chore: pydantic v2 model configs (#6302) 2025-11-17 23:24:41 +00:00
Jamison Lahman
3d1994a515 chore(deployments): run trivy scanners separate from build and push (#6301) 2025-11-17 23:16:16 +00:00
dependabot[bot]
7f507c7be0 chore(deps): Bump actions/setup-python from 4.9.1 to 6.0.0 (#6296)
Signed-off-by: dependabot[bot] <support@github.com>
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
Co-authored-by: Jamison Lahman <jamison@lahman.dev>
Co-authored-by: greptile-apps[bot] <165735046+greptile-apps[bot]@users.noreply.github.com>
2025-11-17 20:38:08 +00:00
Jamison Lahman
c0e418d63e chore(deployment): notifications on build failures (#6298) 2025-11-17 20:20:21 +00:00
dependabot[bot]
db49e14f12 chore(deps): Bump docker/login-action from 1.14.1 to 3.6.0 (#6295)
Signed-off-by: dependabot[bot] <support@github.com>
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
Co-authored-by: Jamison Lahman <jamison@lahman.dev>
Co-authored-by: greptile-apps[bot] <165735046+greptile-apps[bot]@users.noreply.github.com>
2025-11-17 20:19:48 +00:00
dependabot[bot]
e87d6403e8 chore(deps): Bump helm/kind-action from 1.12.0 to 1.13.0 (#6294)
Signed-off-by: dependabot[bot] <support@github.com>
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
2025-11-17 19:48:13 +00:00
Richard Guan
2b6e02a775 chore(internal): search prune sections (#6247) 2025-11-17 18:40:42 +00:00
Justin Tahara
26e1f349b9 fix(index attempts): Preserve some attempts (#6266) 2025-11-17 18:06:26 +00:00
Jamison Lahman
ba83d7e6c3 chore(docker): generate OpenAPI schema/client with docker (#6286) 2025-11-17 17:20:07 +00:00
dependabot[bot]
f869e44497 chore(deps-dev): Bump js-yaml from 3.14.1 to 3.14.2 in /web (#6293)
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
2025-11-17 17:17:37 +00:00
Jamison Lahman
b367a60680 chore(gha): replace background docker pulls with docker-compose (#6287) 2025-11-17 17:11:56 +00:00
Jamison Lahman
98a7e8b7e2 chore(docker): avoid ONYX_VERSION invalidating the docker cache (#6288) 2025-11-17 17:10:54 +00:00
Nikolas Garza
f93752a2b3 fix: disable aggressive caching for Next.js static assets in dev (#6280)
Co-authored-by: Nikolas Garza <nikolas@unknowna6c9beeb7428.attlocal.net>
2025-11-17 09:15:51 -08:00
Evan Lohn
0d20140cad fix: mcp fixes (#6080) 2025-11-17 08:58:49 -08:00
Wenxi
bdd6dc036e fix(ui): new action form spacing fixes (#6285) 2025-11-17 05:14:05 +00:00
dependabot[bot]
27fe196df3 chore(deps): Bump nanoid from 3.3.7 to 3.3.8 in /examples/widget (#3405)
Signed-off-by: dependabot[bot] <support@github.com>
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
2025-11-17 03:36:35 +00:00
Wenxi
18dad51bf8 fix(tests): pause connector while indexing timeout (#6282) 2025-11-16 22:51:49 +00:00
Wenxi
b6d60fb6a9 fix(permsync): don't fail on empty group ids (#6281) 2025-11-16 22:02:03 +00:00
Wenxi
86e7975c42 chore: foss sync readme (#6256) 2025-11-16 13:18:13 -08:00
Jamison Lahman
bb1fb2250e chore(scripts): only run check_lazy_imports on changed files (#6275) 2025-11-16 18:31:53 +00:00
Nikolas Garza
8fdc3411ed feat(slack federated search scoping - 4/4): Add frontend connector config support (#6181)
Co-authored-by: Nikolas Garza <nikolas@Nikolass-MacBook-Pro.local>
2025-11-16 10:29:44 -08:00
Chris Weaver
d5038e8e68 fix: assistant reordering (#6278) 2025-11-16 09:07:56 -08:00
Jamison Lahman
bc035a78e4 chore(deployment): increase model-server builder to 40GB disk (#6277) 2025-11-16 05:17:11 +00:00
Jamison Lahman
9e1043b2fa chore(mypy): color output in CI (#6274) 2025-11-16 05:12:50 +00:00
SubashMohan
107e83bf2a refactor(chat): Apply Ownership Checks Only to Current Message User Files (#6240) 2025-11-16 05:06:35 +00:00
SubashMohan
f5aade9f69 fix(userfiles): remove fixed width in AssistantEditor and ProjectContextPanel (#6239) 2025-11-15 11:50:37 +00:00
dependabot[bot]
9b9ca43671 chore(deps): bump next from 14.2.27 to 14.2.32 in /examples/widget (#5395)
Signed-off-by: dependabot[bot] <support@github.com>
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
2025-11-15 07:07:40 +00:00
Nikolas Garza
0c61cc3f65 feat(slack federated search scoping - 3/4): Add connector-level config support (#6178)
Co-authored-by: Nikolas Garza <nikolas@Nikolass-MacBook-Pro.local>
2025-11-15 04:42:23 +00:00
Nikolas Garza
553853c7f4 feat(slack federated search scoping - 2/4): Add query construction and filtering (#6175)
Co-authored-by: Nikolas Garza <nikolas@Nikolass-MacBook-Pro.local>
2025-11-15 04:11:28 +00:00
Jamison Lahman
15a05663ca chore(docker): install node deps before copying source (#6261) 2025-11-15 03:55:11 +00:00
Jamison Lahman
940773b9c5 chore(deployments): fix cross-platform related issues (#6272) 2025-11-15 03:24:26 +00:00
Nikolas Garza
a95ae6e88b feat(slack federated search scoping - 1/4): Add entity filtering config (#6174)
Co-authored-by: Nikolas Garza <nikolas@Nikolass-MacBook-Pro.local>
2025-11-15 02:47:52 +00:00
Raunak Bhagat
369f923929 refactor: Implement a proper Switch component (#6270) 2025-11-15 02:28:58 +00:00
Raunak Bhagat
3eefbfb646 fix: Fix header for white-labelling (#6271) 2025-11-14 18:27:29 -08:00
Justin Tahara
3919a2d0a2 fix(gdrive): Missing Id Field (#6262) 2025-11-14 17:59:34 -08:00
Justin Tahara
4553e811b0 feat(github): Showcasing our Github Repo Ranking (#6267) 2025-11-14 16:54:34 -08:00
Justin Tahara
7f7389692e fix(reformat): Teams Test (#6268) 2025-11-14 16:53:19 -08:00
Richard Guan
30147c03cf chore(fix): agent sdk replacement message formatting (#6180) 2025-11-14 14:51:37 -08:00
Wenxi
dc48ccc117 fix(teams): mypy (#6259) 2025-11-14 14:42:30 -08:00
Alex Kim
ee366c50c4 fix(teams): handle OData parsing errors with special characters (#6115)
Co-authored-by: Jessica Singh <86633231+jessicasingh7@users.noreply.github.com>
2025-11-14 14:38:58 -08:00
sktbcpraha
caf92a6cce fix: Assistant instruction ignored (#6243) 2025-11-14 14:30:14 -08:00
Jamison Lahman
259bc9d64b chore(deployments): fix actions/checkout typo (#6255) 2025-11-14 21:48:12 +00:00
dependabot[bot]
60664f7e5b chore(deps-dev): bump js-yaml from 4.1.0 to 4.1.1 in /examples/widget (#6248)
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
2025-11-14 21:09:16 +00:00
Wenxi
07f55c6ae2 fix: readme (#6254) 2025-11-14 13:31:29 -08:00
Wenxi
256ece05a6 chore: readme (#6253) 2025-11-14 13:26:53 -08:00
Jamison Lahman
530d6d8284 chore(deployments): simplify pipeline + cross-platform builds (#6250) 2025-11-14 13:16:20 -08:00
Chris Weaver
6299bc30b6 fix: playwright test (#6244)
Co-authored-by: Nikolas Garza <90273783+nmgarza5@users.noreply.github.com>
2025-11-14 12:26:50 -08:00
Jamison Lahman
0607ea9741 chore(deployments): add ability to trigger dry-run build (#6246) 2025-11-14 11:22:22 -08:00
Chris Weaver
3ba4bdfe78 fix: gpt-5 output formatting (#6245) 2025-11-14 10:55:17 -08:00
Chris Weaver
a9165ad329 feat: allow switchover with active connectors only (#6226) 2025-11-14 16:52:07 +00:00
Raunak Bhagat
24aea2d7ce refactor: Edit button types (#6235) 2025-11-14 16:21:08 +00:00
SubashMohan
aa30008419 feat(component): new switch component (#6212) 2025-11-14 08:46:53 +00:00
Raunak Bhagat
3605676f61 fix: Fix inputs overflowing in Settings page (#6238) 2025-11-14 06:24:25 +00:00
Raunak Bhagat
1faa9e7812 refactor: Updated Modals API (#6227)
Co-authored-by: greptile-apps[bot] <165735046+greptile-apps[bot]@users.noreply.github.com>
2025-11-13 21:48:28 -08:00
Jamison Lahman
d85b702cac chore(deployments): remove unnecessary install of build-essentials (#6234) 2025-11-14 04:33:28 +00:00
Jamison Lahman
a724f53e5b chore(deployments): prefer ecr over s3 as docker cache backend (#6232) 2025-11-13 19:39:55 -08:00
Chris Weaver
68fcc5cb8a fix: signup button (#6228) 2025-11-13 19:02:05 -08:00
Justin Tahara
3eb1ca01a2 fix(docprocessing): OOM cleanup (#6223) 2025-11-13 18:24:59 -08:00
Chris Weaver
c2c3d1a722 feat: allow disabling the default assistant (#6222) 2025-11-13 17:42:47 -08:00
Wenxi
f79a8533fb fix: show agent descriptions (#6219) 2025-11-13 14:17:43 -08:00
Jamison Lahman
c1dce9fabd chore(runs-on): define custom AMI specs (#6216) 2025-11-13 22:01:07 +00:00
Jamison Lahman
244bf82c7a chore(gha): prefer venv over installing python packages to the system (#6213) 2025-11-13 17:39:54 +00:00
Jamison Lahman
188ea3faff chore(gha): prefer Github-hosted for simple steps (#6208) 2025-11-13 02:37:48 +00:00
Justin Tahara
c04f624891 fix(slack): Fixing the link coloring (#6203) 2025-11-13 02:32:50 +00:00
Jamison Lahman
43ae02a870 chore(gha): remove custom cloudwatch metrics (#6202) 2025-11-13 00:12:13 +00:00
Jamison Lahman
14123926a7 chore(gha): final runs-on migration nits (#6170) 2025-11-12 23:00:25 +00:00
Justin Tahara
d14d1b833f fix(slack): Show Channels when Editing Fed Slack (#6200) 2025-11-12 22:30:49 +00:00
Nikolas Garza
ff06f10af6 fix: type checking for multiToolTestHelpers (#6199)
Co-authored-by: Nikolas Garza <nikolas@Nikolass-MacBook-Pro.local>
2025-11-12 14:36:04 -08:00
Justin Tahara
5d26c1bafc fix(slackbot): Switch between document set and assistant (#6198) 2025-11-12 22:21:27 +00:00
dependabot[bot]
dbf06c6a1b chore(deps): bump aquasecurity/trivy-action from 0.29.0 to 0.33.1 (#6194)
Signed-off-by: dependabot[bot] <support@github.com>
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
2025-11-12 19:43:15 +00:00
dependabot[bot]
d31e83900f chore(deps): bump docker/setup-buildx-action from 1.7.0 to 3.11.1 (#6196)
Signed-off-by: dependabot[bot] <support@github.com>
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
Co-authored-by: Jamison Lahman <jamison@lahman.dev>
Co-authored-by: greptile-apps[bot] <165735046+greptile-apps[bot]@users.noreply.github.com>
2025-11-12 19:25:04 +00:00
dependabot[bot]
1ac92e6bd0 chore(deps-dev): bump types-urllib3 from 1.26.25.11 to 1.26.25.14 in /backend (#6193)
Signed-off-by: dependabot[bot] <support@github.com>
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
2025-11-12 19:21:32 +00:00
dependabot[bot]
5e159c35f3 chore(deps): bump pilosus/action-pip-license-checker from 2.0.0 to 3.1.0 (#6191)
Signed-off-by: dependabot[bot] <support@github.com>
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
Co-authored-by: Jamison Lahman <jamison@lahman.dev>
Co-authored-by: greptile-apps[bot] <165735046+greptile-apps[bot]@users.noreply.github.com>
2025-11-12 19:00:23 +00:00
Raunak Bhagat
550271abd9 feat: Share chat button in top right corner (#6186)
Co-authored-by: greptile-apps[bot] <165735046+greptile-apps[bot]@users.noreply.github.com>
2025-11-12 11:08:23 -08:00
Nikolas Garza
db3d0bfb34 feat: improve usability of react testing framework + multi-tool renderer tests (#5973)
Co-authored-by: Nikolas Garza <nikolas@Nikolass-MacBook-Pro.local>
2025-11-12 10:48:49 -08:00
Nikolas Garza
860bdd3c0f chore: run playwright projects as separate jobs (#6190)
Co-authored-by: Nikolas Garza <nikolas@Nikolass-MacBook-Pro.local>
2025-11-12 18:28:19 +00:00
Jamison Lahman
3bc63b30ce chore(deps): dependabot for python (#6188) 2025-11-12 18:18:27 +00:00
dependabot[bot]
78a23eeec0 chore(deps): bump pypdf from 6.0.0 to 6.1.3 in /backend/requirements (#5866)
Signed-off-by: dependabot[bot] <support@github.com>
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
2025-11-12 18:15:05 +00:00
Raunak Bhagat
096e4029ba build: Revert to using webpack instead of turbopack (#6185)
Co-authored-by: greptile-apps[bot] <165735046+greptile-apps[bot]@users.noreply.github.com>
2025-11-12 18:10:17 +00:00
SubashMohan
e8da5722df feat(upload): increase token limit to 100k and configurable skip (#6187) 2025-11-12 09:53:37 -08:00
Jamison Lahman
a1a261f68e chore(lint): introduce actionlint, github actions linter (#6184) 2025-11-12 03:39:17 +00:00
Jamison Lahman
ac57b10240 chore(gha): ensure run-id is unique, fix hanging jobs (#6183) 2025-11-12 01:25:59 +00:00
Richard Guan
ce35e01ce3 chore(hotfix): for configuration (#6182) 2025-11-12 00:59:28 +00:00
Richard Guan
808f82de0b chore(agent sdk): make alternative to openai agent sdk (#6153) 2025-11-11 16:25:19 -08:00
Jamison Lahman
9518bd14bb chore(gha): explicit spot pricing (#6177) 2025-11-11 23:52:54 +00:00
Justin Tahara
54eb655634 fix(gdrive): Checkbox fix (#6171) 2025-11-11 22:39:36 +00:00
Wenxi
a773c398af fix: safari input bar quadrupling new lines (#6173) 2025-11-11 13:31:23 -08:00
Jamison Lahman
53131e7669 chore(gha): run whitespace fixers on actions (#6172) 2025-11-11 13:06:59 -08:00
Richard Guan
d5cb56b0e9 chore(llm): interface decoupled from langchain (#6128) 2025-11-11 19:48:25 +00:00
Wenxi
de6226e192 fix: img input support check false vs. none (#6169) 2025-11-11 11:21:58 -08:00
Jamison Lahman
a1d502804a chore(gha): migrate pr-integration-tests off blacksmith (#6164) 2025-11-11 19:06:56 +00:00
Jamison Lahman
76fc01968b chore(gha): de-dupe python setup for external dep tests (#6159)
Co-authored-by: greptile-apps[bot] <165735046+greptile-apps[bot]@users.noreply.github.com>
2025-11-11 09:38:01 -08:00
Jamison Lahman
f9de82c135 chore(runs-on): more instance families and use price-capacity-optimized (#6165) 2025-11-11 09:37:50 -08:00
Justin Tahara
db4b074938 fix(pegasus): Cleanup (#6163) 2025-11-11 09:26:58 -08:00
Justin Tahara
bc5a574cf1 fix(embedding): Fix Deletion of Same Name (#6149) 2025-11-10 19:37:21 -08:00
Jamison Lahman
c14414c9be feat(pre-commit): run check-yaml on .github/ (#6160) 2025-11-11 02:21:50 +00:00
Justin Tahara
770bfcf360 fix(gpt-5): Catch all (#6162) 2025-11-10 18:35:06 -08:00
Chris Weaver
67c1099f98 fix: improve /llm/provider performance (#6158) 2025-11-10 17:01:56 -08:00
Jamison Lahman
67eb54734f chore(gha): migrate playwright tests to runs-on (#6154) 2025-11-10 15:51:14 -08:00
Justin Tahara
f819fdf09b feat(auth): Allow JIT even with Invite List (#6157) 2025-11-10 14:36:59 -08:00
Justin Tahara
b39a4a075a fix(cohere): Add Billing Handler (#6156) 2025-11-10 14:31:01 -08:00
Justin Tahara
8a244aff0d feat(api): Paginated Document Search (#6155) 2025-11-10 14:10:36 -08:00
Jamison Lahman
6a74e54eda feat(gha): python tests use uv w/ caching (#6152) 2025-11-10 12:10:21 -08:00
Jamison Lahman
e87818c961 feat(gha): enable npm caching in CI (#6151) 2025-11-10 11:34:06 -08:00
dependabot[bot]
fbec393faa chore(deps): bump actions/download-artifact from 4.3.0 to 6.0.0 (#6147)
Signed-off-by: dependabot[bot] <support@github.com>
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
2025-11-10 11:27:45 -08:00
dependabot[bot]
da167e93ab chore(deps): bump actions/upload-artifact from 4.6.2 to 5.0.0 (#6146)
Signed-off-by: dependabot[bot] <support@github.com>
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
2025-11-10 11:27:36 -08:00
dependabot[bot]
91c0b273bf chore(deps): bump actions/setup-node from 4.4.0 to 6.0.0 (#6148)
Signed-off-by: dependabot[bot] <support@github.com>
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
2025-11-10 11:14:01 -08:00
Jamison Lahman
72d1cfa36a chore(gha): docker-login follow up (#6150) 2025-11-10 10:57:34 -08:00
Jamison Lahman
1f45ebc818 fix(gha): docker login for all external image fetching (#6139) 2025-11-10 10:34:02 -08:00
Chris Weaver
c1428d03f5 fix: infinite render on embedding model page (#6144) 2025-11-10 09:39:32 -08:00
Chris Weaver
904bcdb0fe chore: change log-level of keyword extraction failure (#6135) 2025-11-08 14:52:38 -08:00
Nikolas Garza
9caf136f0e feat: llm access controls (#5819) 2025-11-08 10:36:14 -08:00
Raunak Bhagat
e06ad214cb fix: White labelling (#6133)
Co-authored-by: cubic-dev-ai[bot] <191113872+cubic-dev-ai[bot]@users.noreply.github.com>
2025-11-07 18:39:04 -08:00
Chris Weaver
fe53ae9d79 fix: package-lock.json (#6106) 2025-11-07 18:10:01 -08:00
Jamison Lahman
5a2796d285 chore(gha): pr-python-checks instance update (#6129) 2025-11-07 17:29:25 -08:00
Justin Tahara
aba5bee4d7 fix(ui): Make Private Groups selectable again (#6116) 2025-11-07 17:10:39 -08:00
Justin Tahara
a0eaf126be feat(azure): Support OpenAI Image models (#6107) 2025-11-07 17:10:24 -08:00
Justin Tahara
28712aab1d fix(vercel): Remove deprecated fields (#6130) 2025-11-07 17:09:41 -08:00
Justin Tahara
25de38fcf7 fix(chat): Adding buffer for instructions (#6125) 2025-11-07 16:33:37 -08:00
Justin Tahara
53123e2870 fix(upload): File type handling (#6126) 2025-11-07 16:25:13 -08:00
Jamison Lahman
fa8487a1a8 chore(gha): reduce size of pr-quality-check instance (#6123) 2025-11-07 16:21:20 -08:00
Jamison Lahman
3f0bcd516d fix(gha): fix terraform pre-commit test (#6124) 2025-11-07 15:26:29 -08:00
Justin Tahara
76d25ff489 fix(tool): Hide Okta Tool (#6120) 2025-11-07 13:36:23 -08:00
Wenxi
f99d0285f9 fix: openapi generation required fields (#6117) 2025-11-07 18:45:30 +00:00
Raunak Bhagat
988221550e fix: Sidebar sizing fix (#6113) 2025-11-06 19:43:31 -08:00
Raunak Bhagat
6b636c1b90 feat: Sidebar anim (#6111) 2025-11-06 19:32:06 -08:00
Justin Tahara
1fee528d86 fix(icons): Update Token Rate Limits page (#6112) 2025-11-06 17:35:55 -08:00
Wenxi
815ab5ebc2 fix: hydration errors (#6100)
Co-authored-by: Raunak Bhagat <r@rabh.io>
2025-11-06 17:14:26 -08:00
Jessica Singh
c8553331ae fix(teams): increase timeout w special char team names (#6086)
Co-authored-by: greptile-apps[bot] <165735046+greptile-apps[bot]@users.noreply.github.com>
2025-11-06 23:22:25 +00:00
Raunak Bhagat
362da495ff refactor: Update tooltip colours (#6093)
Co-authored-by: greptile-apps[bot] <165735046+greptile-apps[bot]@users.noreply.github.com>
Co-authored-by: cubic-dev-ai[bot] <191113872+cubic-dev-ai[bot]@users.noreply.github.com>
2025-11-06 14:49:22 -08:00
Richard Guan
abb444cd85 chore(prompting): fix default behavior assistant (#6103) 2025-11-06 21:25:41 +00:00
Justin Tahara
fc7e6f798d fix(bedrock): Add Gov Cloud regions (#6105) 2025-11-06 13:18:59 -08:00
Jamison Lahman
8b39d60bca chore(gha): migrate connectors-check to uv w/ caching (#6102) 2025-11-06 19:55:24 +00:00
Justin Tahara
9ac8331cd3 fix(gdrive): Add support for domain link only filtering (#6076) 2025-11-06 19:53:39 +00:00
Raunak Bhagat
295dc2d28c refactor: New Opal component library (#6062)
Co-authored-by: cubic-dev-ai[bot] <191113872+cubic-dev-ai[bot]@users.noreply.github.com>
2025-11-06 18:50:32 +00:00
Raunak Bhagat
c15266227a refactor: Update how disabled actions are rendered (#6094) 2025-11-06 10:09:03 -08:00
Raunak Bhagat
1c54b357ee fix: Fix modal dragging issue (#6095) 2025-11-06 10:05:18 -08:00
Jamison Lahman
791346eca8 chore(gha): migrate external dependency tests to uv (#6083) 2025-11-06 07:17:25 +00:00
Chris Weaver
2d2a2452bf fix: setDisplayComplete not called for ollama (#6092) 2025-11-05 22:04:00 -08:00
dependabot[bot]
4d3094c09f chore(deps): bump aws-actions/configure-aws-credentials from 4.3.1 to 5.1.0 (#6089)
Signed-off-by: dependabot[bot] <support@github.com>
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
2025-11-06 05:28:20 +00:00
dependabot[bot]
882e5f999d chore(deps): bump helm/chart-testing-action from 2.7.0 to 2.8.0 (#6090)
Signed-off-by: dependabot[bot] <support@github.com>
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
2025-11-06 05:25:29 +00:00
dependabot[bot]
c2cf3991a0 chore(deps): bump actions/github-script from 7.1.0 to 8.0.0 (#6091)
Signed-off-by: dependabot[bot] <support@github.com>
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
Co-authored-by: Jamison Lahman <jamison@lahman.dev>
Co-authored-by: greptile-apps[bot] <165735046+greptile-apps[bot]@users.noreply.github.com>
2025-11-05 21:04:30 -08:00
Wenxi
402dfdad2c fix: disable PAT when AUTH_TYPE is disabled (#6088) 2025-11-05 20:00:20 -08:00
Jamison Lahman
ef8de62478 chore(deps): basic dependabot setup for actions (#6087) 2025-11-06 02:28:37 +00:00
Evan Lohn
8101be42ea feat: gmail connector checkpointing (#6040) 2025-11-05 18:03:28 -08:00
Chris Weaver
0c615cd76d fix: infinite render on React 19 (#6085) 2025-11-05 17:54:21 -08:00
Wenxi
421e9899b8 fix: preload user settings correctly (#6063) 2025-11-06 01:13:43 +00:00
Jamison Lahman
6379423dfc feat(gha): persist the mypy_cache (#6079)
Co-authored-by: greptile-apps[bot] <165735046+greptile-apps[bot]@users.noreply.github.com>
2025-11-06 00:49:56 +00:00
Jamison Lahman
1c742e675a fix(gha): Dont wait for vespa server (#6081) 2025-11-06 00:21:54 +00:00
Richard Guan
5c3b2320a7 chore(simplified): tools (#6064) 2025-11-05 16:12:14 -08:00
Richard Guan
198fc145fc chore(custom): instructions (#6055) 2025-11-05 22:05:30 +00:00
Jamison Lahman
0f84391f60 chore(gha): migrate mypy workflow to uv w/ caching (#6074) 2025-11-05 14:03:57 -08:00
Jamison Lahman
1e101f8028 chore(gha): pin workflow versions (#6058)
## Description

SHAs are more secure than version tags.
2025-11-05 13:29:58 -08:00
Wenxi
7e40cbe0d1 fix: honor pw min length env var on fe (#6065) 2025-11-05 11:43:44 -08:00
Chris Weaver
a528dbe241 fix: airgapped (#6067) 2025-11-04 23:34:37 -08:00
Chris Weaver
587cca4b13 feat: nextjs upgrade + react compiler (#6060) 2025-11-04 19:52:53 -08:00
Wenxi
990842c1cf feat(PAT): Final/3 add tests (#6047) 2025-11-04 15:23:01 -08:00
Wenxi
a3a420a6de feat(PAT): 3/3 PAT frontend (#6046) 2025-11-04 14:50:54 -08:00
Wenxi
03c2e62aee feat(PAT): 2/3 PAT APIs (#6045) 2025-11-04 14:50:26 -08:00
Wenxi
b7d7c62a7c feat(PAT): 1/3 PAT backend, crud, shared utils with API key (#6044) 2025-11-04 14:50:06 -08:00
Yuhong Sun
6f5c466f38 Ollama Img Compat (#6057) 2025-11-04 12:15:33 -08:00
Yuhong Sun
2b19b84245 Upgrade when version latest (#6056) 2025-11-04 11:57:00 -08:00
Justin Tahara
16e1b45cee fix(helm): Remove OAUTH Cookie secret (#6054) 2025-11-04 10:29:55 -08:00
Richard Guan
d9c2793341 chore(new): framework enhancements (#6039) 2025-11-04 00:44:22 +00:00
Chris Weaver
644920f2f9 fix: misc fixes for ollama reasoning (#6043)
Co-authored-by: greptile-apps[bot] <165735046+greptile-apps[bot]@users.noreply.github.com>
2025-11-03 15:14:16 -08:00
Raunak Bhagat
e38e96d4d1 fix: Fix signin button (#6041) 2025-11-03 13:48:47 -08:00
Wenxi
fc84e83fdb fix: don't re-direct to login when auth disabled (#6042) 2025-11-03 21:08:37 +00:00
Richard Guan
c428ad6dfe chore(robustness): quick fixes for ollama -- error handling fixes + litellm register lifecycle fix (#6024) 2025-11-03 12:58:49 -08:00
Wenxi
98a7a04633 chore: update vertex ai names (#6029) 2025-11-03 12:03:06 -08:00
Nikolas Garza
9b42a8c1f3 fix: update playwright tests for agent creation with knowledge (#5892)
Co-authored-by: Nikolas Garza <nikolas@Nikolass-MacBook-Pro.local>
Co-authored-by: Raunak Bhagat <r@rabh.io>
2025-11-03 19:26:35 +00:00
Raunak Bhagat
eacc663d5b fix: Remove dbg-red (#6038) 2025-11-03 10:00:29 -08:00
Raunak Bhagat
3020ee5964 fix: Fix "Projects" renaming UI (#6037) 2025-11-03 09:56:34 -08:00
Chris Weaver
472d080239 feat: support reasoning (#6004)
Co-authored-by: Richard Guan <rguan72@gmail.com>
2025-11-03 09:47:44 -08:00
SubashMohan
922069bfd3 Enhancement/new log in UI (#6009)
Co-authored-by: Yuhong Sun <yuhongsun96@gmail.com>
2025-11-03 09:31:06 -08:00
Jessica Singh
bffca81477 fix(citations): icon not visible in ui (#6003)
Co-authored-by: Raunak Bhagat <r@rabh.io>
2025-11-03 08:12:53 +00:00
Raunak Bhagat
561b487102 fix: Update transience state when like/dislike buttons are pressed (#6036) 2025-11-02 22:26:54 -08:00
Raunak Bhagat
cc9b14c99b feat: New agents nav (#6006) 2025-11-02 22:26:25 -08:00
Raunak Bhagat
de674a19e0 fix: Create new CopyIconButton component (#6035) 2025-11-02 18:35:33 -08:00
Chris Weaver
79114bf92c feat: add image previews (#6030) 2025-11-02 11:25:07 -08:00
Wenxi
b5dccd96b3 fix: don't fail filestore cleanup (#6018) 2025-11-02 04:09:21 +00:00
Wenxi
a55cc5a537 fix: don't flash connectors tab and cache federated connectors (#6019) 2025-11-01 18:18:42 +00:00
Wenxi
cdf3cc444b chore(claude): track plans/ and use CLAUDE/AGENT templates (#5993) 2025-11-01 11:23:31 -07:00
Richard Guan
cd3941f4b7 chore(reduce): multitenant flakiness (#6021) 2025-11-01 18:06:25 +00:00
Wenxi
0182743619 feat: tag beta images (#6022) 2025-11-01 01:49:19 +00:00
Nikolas Garza
0e2f596aa2 fix: dark/light coloring of manage subscription button (#6026)
Co-authored-by: Nikolas Garza <nikolas@Nikolass-MacBook-Pro.local>
2025-10-31 17:42:28 -07:00
Justin Tahara
0be45676b7 fix(helm): Adding config for db_readonly_user (#6025) 2025-10-31 17:30:34 -07:00
Nikolas Garza
30a3470001 fix: text overlap for tool responses when expanded (#5960)
Co-authored-by: Nikolas Garza <nikolas@Nikolass-MacBook-Pro.local>
2025-10-31 23:55:32 +00:00
Richard Guan
d52fa83afa chore(make): simple agent framework default on (#6017) 2025-10-31 14:11:35 -07:00
Richard Guan
9eb5643cc3 chore(fix): mypy check (#6008) 2025-10-31 10:36:37 -07:00
Justin Tahara
afe34218b8 fix(vespa): Adjust node count (#6016) 2025-10-31 10:36:26 -07:00
Richard Guan
4776947dfa chore(ollama): ollama support (#5963) 2025-10-31 00:26:27 -07:00
trial2onyx
c4bc25f540 feat: Load random documents in document explorer page for empty queries (#5966)
Co-authored-by: Onyx Trialee 2 <onyxtrial2@Onyxs-MBP.attlocal.net>
2025-10-30 15:45:04 -07:00
Dominic Feliton
b77078b339 fix(web-connector): empty semantic identifiers from trailing / with PDF URLs (#5997)
Co-authored-by: Dominic Feliton <37809476+dominicfeliton@users.noreply.github.com>
2025-10-30 15:28:51 -07:00
SubashMohan
88b28a303b fix(chat): enhance file upload handling and improve file removal (#5975) 2025-10-30 10:08:46 +05:30
Raunak Bhagat
59d7d3905a fix: Fix bug in which bottom-padding of mask would not get applied (#5994) 2025-10-30 01:53:04 +00:00
Justin Tahara
a48fe7550a fix: Better Logs for Rate Limits (#5988) 2025-10-30 01:50:14 +00:00
Nikolas Garza
c25a99955c fix: always convert strings to document sources (#5992)
Co-authored-by: Nikolas Garza <nikolas@Nikolass-MacBook-Pro.local>
2025-10-30 00:30:14 +00:00
Justin Tahara
ac509f865a chore(helm): Reducing Helm Chart Resource Requests/Limits (#5980) 2025-10-29 15:51:34 -07:00
Justin Tahara
5819389ae8 fix(message): Process UUID properly (#5989) 2025-10-29 15:51:19 -07:00
Justin Tahara
eae5774cdc fix(helm): Bump test version (#5978) 2025-10-29 13:38:25 -07:00
Raunak Bhagat
8fed0a8138 perf: Remove admin sidebar delay (#5985) 2025-10-29 19:31:43 +00:00
Nikolas Garza
c04196941d fix: fix linear icon in dark mode (#5971)
Co-authored-by: Nikolas Garza <nikolas@Nikolass-MacBook-Pro.local>
2025-10-29 17:08:38 +00:00
Nikolas Garza
19461955ed feat(e2e-testing): record playwright traces for CI failures (#5923)
Co-authored-by: Nikolas Garza <nikolas@Nikolass-MacBook-Pro.local>
2025-10-29 10:08:50 -07:00
Raunak Bhagat
cb3152ff5c refactor: Update button naming colour states (#5972) 2025-10-29 09:58:26 -07:00
Wenxi
cf187e8f58 fix: stop redis from spawning anonymous volumes (#5969) 2025-10-29 01:22:12 +00:00
Evan Lohn
deaa3df42f fix: deprecated confluence oauth api (#5962)
Co-authored-by: Wenxi Onyx <wenxi@onyx.app>
2025-10-28 17:00:14 -07:00
Richard Guan
d6e98bfbc8 chore(turn): fix sending multiple messages (#5961) 2025-10-28 22:41:52 +00:00
Raunak Bhagat
ff58ad0b87 fix: More MCP fixes (#5933) 2025-10-28 14:32:31 -07:00
Justin Tahara
eb7cb02cc0 fix(saml): Align Cookie Usage (#5954) 2025-10-28 19:15:30 +00:00
Evan Lohn
7876d8da1b fix: jira connector creation (#5956)
Co-authored-by: Wenxi Onyx <wenxi@onyx.app>
2025-10-28 18:47:14 +00:00
Justin Tahara
8a6f83115e fix(gmail): Adding Size Thresholds (#5948) 2025-10-27 20:56:59 -07:00
Richard Guan
b7f81aed10 chore(add): tests for citation and force tool use and some cleanup (#5953) 2025-10-28 01:36:49 +00:00
Richard Guan
a415a997cf chore(reduce): model strength for tests (#5726) 2025-10-28 00:47:36 +00:00
Justin Tahara
7781afd74e fix(gdrive): Adding fallback logic for Web View Links (#5952) 2025-10-27 23:59:41 +00:00
Evan Lohn
d0a4f4ce66 fix: always trigger DR (#5831) 2025-10-27 23:32:38 +00:00
Richard Guan
ba00de8904 chore(citation): processing (#5904) 2025-10-27 16:11:24 -07:00
Justin Tahara
91f21bb22b fix(openpyxl): Workbook issue (#5950) 2025-10-27 15:40:08 -07:00
Justin Tahara
491f3127c5 fix(misc): Update Date Checker (#5947) 2025-10-27 15:39:58 -07:00
Richard Guan
0987fb852b chore(force): tool use fix (#5930) 2025-10-27 21:15:37 +00:00
Justin Tahara
5f68141335 fix(vespa): Reducing Number of Nodes (#5942) 2025-10-27 14:26:38 -07:00
Wenxi
b5793ee522 fix: failing web connector test due sync api (#5936) 2025-10-27 12:39:27 -07:00
Wenxi
238c244fec fix: standardize and make user settings pretty (#5922) 2025-10-27 12:38:52 -07:00
Wenxi
c103a878b7 fix: chat feedback (#5896) 2025-10-27 11:55:06 -07:00
Justin Tahara
03deb064cc fix(ui): Remove Bubble Text for non-search ui chats (#5887) 2025-10-27 11:32:20 -07:00
Yuhong Sun
09062195b4 Script to generate test data (#5935) 2025-10-27 10:51:57 -07:00
Raunak Bhagat
dc57a5451c feat: Update pinning behaviour (#5934) 2025-10-27 10:51:21 -07:00
Raunak Bhagat
781f60a5ab fix: Edit recent files UI (#5879) 2025-10-27 00:10:51 -07:00
Raunak Bhagat
423961fefb refactor: Replace all instances of the old button with the new refreshed Button (#5889) 2025-10-26 23:02:41 -07:00
Chris Weaver
324b6ceeef refactor: remove custom spacing definitions (#5928)
Co-authored-by: Raunak Bhagat <r@rabh.io>
2025-10-26 22:32:18 -07:00
Nikolas Garza
d9e14bf5da fix: decrease background blur in modals (#5823)
Co-authored-by: Nikolas Garza <nikolas@Nikolass-MacBook-Pro.local>
Co-authored-by: Raunak Bhagat <r@rabh.io>
2025-10-26 22:25:26 -07:00
Wenxi
eb2cb1bb25 fix: auth logic, create util func, and add edge check in middleware (#5931) 2025-10-26 19:08:34 -07:00
Chris Weaver
0de9f47694 fix: change url at the start of a new chat rather than at the end of … (#5932) 2025-10-26 17:20:04 -07:00
Chris Weaver
2757f3936c fix: remove default agent from sidebar (#5929) 2025-10-26 15:22:34 -07:00
Chris Weaver
8ba61e9123 fix: code rendering in chat (#5927) 2025-10-26 13:38:20 -07:00
Chris Weaver
c10d7fbc32 fix: copy button switch to check (#5926) 2025-10-25 17:50:09 -07:00
Chris Weaver
b6ed217781 feat: oauth tools/fe (#5844) 2025-10-24 23:39:11 -07:00
Chris Weaver
7d20f73f71 feat: Oauth tools/support removing (#5876) 2025-10-24 20:02:12 -07:00
Nikolas Garza
2b306255f9 fix: better handling of dark/light icons (#5909)
Co-authored-by: Nikolas Garza <nikolas@Nikolass-MacBook-Pro.local>
2025-10-24 23:46:58 +00:00
Wenxi
e149d08d47 feat: save user's theme preference (#5908) 2025-10-24 23:28:23 +00:00
Chris Weaver
e98ddb9fe6 feat: center input bar (#5919) 2025-10-24 16:22:00 -07:00
Chris Weaver
b9a5297694 fix: input bar thickness (#5917) 2025-10-24 16:20:50 -07:00
Chris Weaver
4666312df2 fix: initial screen when no LLM provider is set up (#5912) 2025-10-24 14:08:36 -07:00
Evan Lohn
d4e524cd83 fix: mcp chat frontend part1 (#5913) 2025-10-24 11:50:17 -07:00
trial2onyx
a719228034 chore(gha): disable docker caching for backend images (#5910)
Co-authored-by: Onyx Trialee 2 <onyxtrial2@Onyxs-MBP.attlocal.net>
2025-10-24 18:13:51 +00:00
Chris Weaver
2fe8b5e33a fix: deep research disable (#5911) 2025-10-24 11:15:08 -07:00
Justin Tahara
af243b0ef5 chore(tf): Clean up and add linting (#5905) 2025-10-23 18:23:37 -07:00
Wenxi
c96ac04619 feat: show personal name if provided (#5898) 2025-10-23 17:45:04 -07:00
Wenxi
e2f2950fee fix: make entire query history row clickable (#5894) 2025-10-23 17:44:16 -07:00
Chris Weaver
8b84c59d29 fix: add __init__.py in empty ee dir (#5903) 2025-10-23 17:34:52 -07:00
Chris Weaver
b718a276cf fix: add global (#5902) 2025-10-23 17:29:27 -07:00
Raunak Bhagat
700511720f refactor: Remove assistant icon (#5882)
Co-authored-by: cubic-dev-ai[bot] <191113872+cubic-dev-ai[bot]@users.noreply.github.com>
2025-10-23 17:23:26 -07:00
Raunak Bhagat
6bd1719156 fix: Shared chat UI (#5895) 2025-10-23 17:21:54 -07:00
Richard Guan
c8bfe9e0a1 chore(integration): test instructions and image gen test (#5897) 2025-10-23 23:45:46 +00:00
Chris Weaver
037bc04740 fix: set git config early (#5900) 2025-10-23 16:59:30 -07:00
Chris Weaver
c3704d47df fix: add back empty ee dir (#5899) 2025-10-23 16:51:48 -07:00
Richard Guan
397a153ff6 chore(fix): bring llm prompts to spec (#5863) 2025-10-23 22:56:56 +00:00
Chris Weaver
870c432ccf fix: sharepoint .msg handling + lazy load check fix (#5497) 2025-10-23 16:00:49 -07:00
Chris Weaver
c4a81a590f fix: add license (#5891) 2025-10-23 15:08:45 -07:00
Nikolas Garza
017c095eed Revert "chore: add fe type check to pre-commit" (#5893) 2025-10-23 14:40:58 -07:00
Nikolas Garza
ee37d21aa4 fix: fix typing errors in react tests (#5881)
Co-authored-by: Nikolas Garza <nikolas@Nikolass-MacBook-Pro.local>
2025-10-23 21:08:48 +00:00
Nikolas Garza
e492d88b2d chore: add fe type check to pre-commit (#5883)
Co-authored-by: Nikolas Garza <nikolas@Nikolass-MacBook-Pro.local>
2025-10-23 20:46:48 +00:00
Wenxi
3512fdcd9d fix: don't set env vars if they're empty (aws bedrock) (#5886)
Co-authored-by: EC2 Default User <ec2-user@ip-172-31-7-79.us-east-2.compute.internal>
2025-10-23 20:45:53 +00:00
Chris Weaver
3550795cab fix: make_foss_repo.sh (#5890) 2025-10-23 14:02:12 -07:00
Chris Weaver
b26306d678 fix: foss repo syncing (#5888) 2025-10-23 13:43:04 -07:00
Chris Weaver
85140b4ba6 feat: FOSS repo sync (#5885) 2025-10-23 13:39:28 -07:00
Jessica Singh
c241f79f97 fix(team special char): ampersand fix (#5877) 2025-10-23 19:22:36 +00:00
Chris Weaver
9808dec6b7 feat: oauth tool apis (#5840) 2025-10-23 11:59:31 -07:00
Wenxi
632c74af6d chore: Update CONTRIBUTING.md (#5880) 2025-10-23 11:41:49 -07:00
trial2onyx
79073d878c chore(docker): migrate integration image to docker bake and de-dupe (#5873)
Co-authored-by: Onyx Trialee 2 <onyxtrial2@Onyxs-MBP.attlocal.net>
Co-authored-by: cubic-dev-ai[bot] <191113872+cubic-dev-ai[bot]@users.noreply.github.com>
2025-10-23 17:17:45 +00:00
Chris Weaver
620df88c51 fix: citation look (#5871) 2025-10-23 10:41:33 -07:00
Chris Weaver
717f05975d feat: add OAuthTokenManager (#5838) 2025-10-23 10:26:47 -07:00
Chris Weaver
d2176342c1 feat: add CRUD operations for OAuth Tools (#5837) 2025-10-23 10:21:36 -07:00
Wenxi
bb198b05e1 feat: update icons (#5864) 2025-10-23 10:12:55 -07:00
Chris Weaver
085013d8c3 feat: add DB models for OAuthTools (#5836) 2025-10-23 09:50:04 -07:00
Nikolas Garza
e46f632570 fix: allow user knowledge (file uploads) always (#5857)
Co-authored-by: Nikolas Garza <nikolas@Nikolass-MacBook-Pro.attlocal.net>
2025-10-22 23:37:25 +00:00
Justin Tahara
bbb4b9eda3 fix(docker): Clean up USE_IAM_AUTH log (#5870) 2025-10-22 15:59:12 -07:00
Richard Guan
12b7c7d4dd chore(ripsecrets): ripsecrets (#5868) 2025-10-22 22:02:57 +00:00
trial2onyx
464967340b chore(docker): prefer uv for installing python system packages (#5861)
Co-authored-by: Onyx Trialee 2 <onyxtrial2@Onyxs-MBP.attlocal.net>
2025-10-22 21:48:41 +00:00
trial2onyx
a2308c2f45 chore(gha): deduplicate prepare-build and migrate to uv (#5862)
Co-authored-by: Onyx Trialee 2 <onyxtrial2@Onyxs-MBP.attlocal.net>
2025-10-22 21:20:37 +00:00
trial2onyx
2ee9f79f71 chore(docker): remove empty echo ONYX_VERSION layers (#5848)
Co-authored-by: Onyx Trialee 2 <onyxtrial2@Onyxs-MBP.attlocal.net>
2025-10-22 20:36:36 +00:00
trial2onyx
c3904b7c96 fix(release): correctly set ONYX_VERSION in model-server image (#5847)
Co-authored-by: Onyx Trialee 2 <onyxtrial2@Onyxs-MBP.attlocal.net>
2025-10-22 19:56:55 +00:00
trial2onyx
5009dcf911 chore(docker): avoid duplicating cached models layer (#5845)
Co-authored-by: Onyx Trialee 2 <onyxtrial2@Onyxs-MBP.attlocal.net>
2025-10-22 19:56:42 +00:00
trial2onyx
c7b4a0fad9 chore(github): flag and enable docker build caching (#5839)
Co-authored-by: Onyx Trialee 2 <onyxtrial2@Onyxs-MBP.attlocal.net>
2025-10-22 19:56:23 +00:00
Raunak Bhagat
60a402fcab Render chat and project button popovers using the PopoverMenu component (#5858) 2025-10-21 20:37:22 -07:00
Raunak Bhagat
c9bb078a37 Edit height of mask again (#5856) 2025-10-21 20:22:51 -07:00
Raunak Bhagat
c36c2a6c8d fix: Edit height of mask (#5855) 2025-10-21 20:17:39 -07:00
Raunak Bhagat
f9e2f9cbb4 refactor: Remove hover state on chatbutton rename (#5850) 2025-10-21 19:44:57 -07:00
Raunak Bhagat
0b7c808480 refactor: "Unnest" admin panel button (#5852) 2025-10-21 19:30:00 -07:00
Justin Tahara
0a6ff30ee4 fix(ui): Update spacing for the API Key page (#5826) 2025-10-21 18:26:14 -07:00
Raunak Bhagat
dc036eb452 fix: Spacings update (#5846) 2025-10-21 18:11:13 -07:00
Justin Tahara
ee950b9cbd fix(ui): Document Processing revamp (#5825) 2025-10-21 17:56:06 -07:00
Justin Tahara
dd71765849 fix(internal search): Restore functionality (#5843) 2025-10-21 16:54:10 -07:00
Raunak Bhagat
dc6b97f1b1 refactor: Edit message generation ui (#5816) 2025-10-21 16:51:14 -07:00
Richard Guan
d960c23b6a chore(fix): input images in msg (#5798) 2025-10-21 20:33:00 +00:00
Richard Guan
d9c753ba92 chore(simple): agent small adjustments (#5729) 2025-10-21 20:32:57 +00:00
Chris Weaver
60234dd6da feat: Improve litellm model map logic (#5829) 2025-10-21 13:22:35 -07:00
Justin Tahara
f88ef2e9ff fix(ui): Align Default Assistant Page (#5828) 2025-10-21 19:12:30 +00:00
Chris Weaver
6b479a01ea feat: run tasks for gated tenants (#5827) 2025-10-21 11:47:39 -07:00
Wenxi
248fe416e1 chore: update template reference to sso ee (#5830) 2025-10-21 11:39:12 -07:00
trial2onyx
cbea4bb75c chore(docker): avoid chown-ing playwright cache (#5805)
Co-authored-by: Onyx Trialee 2 <onyxtrial2@Onyxs-MBP.attlocal.net>
2025-10-21 17:23:49 +00:00
Justin Tahara
4a147a48dc fix(ui): Update Upload Image and Generate Icon buttons (#5824) 2025-10-21 10:41:57 -07:00
Chris Weaver
a77025cd46 fix: adjust deletion threshold (#5818) 2025-10-21 10:37:10 -07:00
Jessica Singh
d10914ccc6 fix(teams connector): special char bug (#5767) 2025-10-21 10:27:37 -07:00
Nikolas Garza
7d44d48f87 fix: switch out OnyxSparkleIcon for OnyxIcon for default assistant (#5806)
Co-authored-by: Nikolas Garza <nikolas@Nikolass-MacBook-Pro.attlocal.net>
2025-10-20 17:48:30 -07:00
Chris Weaver
82fd0e0316 fix: extra sidebar spacing (#5811) 2025-10-20 17:48:02 -07:00
Wenxi
d7e4c47ef1 fix: custom llm setup fixes (#5804) 2025-10-20 17:47:38 -07:00
Wenxi
799b0df1cb fix: don't set new default provider when deleted provider was not default (#5812)
Co-authored-by: greptile-apps[bot] <165735046+greptile-apps[bot]@users.noreply.github.com>
2025-10-20 17:42:47 -07:00
Justin Tahara
b31d36564a fix(ui): Make Document Sets editable (#5809) 2025-10-20 17:41:08 -07:00
Chris Weaver
84df0a1bf9 feat: more cleanup script improvements (#5803) 2025-10-20 17:29:38 -07:00
Justin Tahara
dbc53fe176 fix(ui): Set as Default for LLM (#5795) 2025-10-20 17:21:12 -07:00
Wenxi
1e4ba93daa feat: optimistically rename chat sidebar items (#5810)
Co-authored-by: cubic-dev-ai[bot] <191113872+cubic-dev-ai[bot]@users.noreply.github.com>
2025-10-20 17:01:44 -07:00
Wenxi
d872715620 feat: azure parse deployment name (#5807)
Co-authored-by: greptile-apps[bot] <165735046+greptile-apps[bot]@users.noreply.github.com>
Co-authored-by: Evan Lohn <evan@danswer.ai>
Co-authored-by: cubic-dev-ai[bot] <191113872+cubic-dev-ai[bot]@users.noreply.github.com>
2025-10-20 16:52:56 -07:00
Chris Weaver
46ad541ebc fix: whitelabling assistant logo (#5808) 2025-10-20 16:52:26 -07:00
Raunak Bhagat
613907a06f fix: Fix colouring for all error pages (#5802) 2025-10-20 15:42:41 -07:00
Wenxi
ff723992d1 feat: openrouter support (#5772) 2025-10-20 14:24:12 -07:00
Raunak Bhagat
bda3c6b189 fix: Edit background colour (#5800) 2025-10-20 14:09:53 -07:00
Evan Lohn
264d1de994 chore: disable contextual rag in the cloud (#5801) 2025-10-20 13:52:57 -07:00
Nikolas Garza
335571ce79 feat: Add React testing framework (#5778) 2025-10-20 13:49:44 -07:00
Chris Weaver
4d3fac2574 feat: enhance tenant cleanup (#5788) 2025-10-20 13:29:04 -07:00
Evan Lohn
7c229dd103 fix: reduce spam of org info toast (#5794) 2025-10-20 13:01:05 -07:00
Evan Lohn
b5df182a36 chore: hide search settings in the cloud (#5796) 2025-10-20 13:00:48 -07:00
Justin Tahara
7e7cfa4187 fix(ui): Initial Index Attempt Tooltip (#5789) 2025-10-20 12:57:34 -07:00
Justin Tahara
69d8430288 fix(ui): Create Button Type (#5797) 2025-10-20 12:48:39 -07:00
Raunak Bhagat
467d294b30 fix: Add white-labelling back (#5757) 2025-10-19 13:25:25 -07:00
Chris Weaver
ba2dd18233 feat: improve performance of deletion scripts (#5787) 2025-10-19 13:02:19 -07:00
Chris Weaver
891eeb0212 feat: add new fields to usage report (#5784) 2025-10-19 12:37:57 -07:00
Wenxi
9085731ff0 fix: add latest check to merge step (#5781) 2025-10-18 18:26:21 -07:00
Chris Weaver
f5d88c47f4 fix: docker-tag-latest.yml (#5780) 2025-10-18 09:06:14 -07:00
Raunak Bhagat
807e5c21b0 fix: Fix styling (#5776) 2025-10-17 18:49:33 -07:00
Raunak Bhagat
1bcd795011 fix: Font loading fix (#5773) 2025-10-17 18:39:30 -07:00
Raunak Bhagat
aae357df40 fix: Fix document sidebar positioning + update stylings (#5769)
Co-authored-by: cubic-dev-ai[bot] <191113872+cubic-dev-ai[bot]@users.noreply.github.com>
2025-10-17 18:26:53 -07:00
Justin Tahara
4f03e85c57 fix(llm): Cleaning up models (#5771) 2025-10-17 23:48:08 +00:00
Nikolas Garza
c3411fb28d feat: read latest permission sync from the frontend (#5687)
Co-authored-by: Nikolas Garza <nikolas@Nikolass-MacBook-Pro.attlocal.net>
Co-authored-by: Nikolas Garza <nikolas@Nikolass-MBP.attlocal.net>
2025-10-17 23:43:55 +00:00
Richard Guan
b3d1b1f4aa chore(langfuse): tracing (#5753) 2025-10-17 22:44:22 +00:00
Justin Tahara
cbb86c12aa fix(bedrock): Make Region Selectable (#5770) 2025-10-17 22:26:24 +00:00
Chris Weaver
8fd606b713 fix: documents in chat flow (#5762) 2025-10-17 13:56:38 -07:00
Nikolas Garza
d69170ee13 chore: cleanup some console.logs (#5766)
Co-authored-by: Nikolas Garza <nikolas@Nikolass-MacBook-Pro.attlocal.net>
2025-10-17 13:52:44 -07:00
Justin Tahara
e356c5308c fix(ui): Cleaning up the Edit Action page (#5765) 2025-10-17 13:52:36 -07:00
Wenxi
3026ac8912 feat: blob connector and test enhancements (#5746) 2025-10-17 13:52:03 -07:00
Justin Tahara
0cee7c849f feat(curators): Allow curators to customize Actions (#5752) 2025-10-17 19:07:58 +00:00
Yuhong Sun
14bfb7fd0c No Bash in Background Container (#5761) 2025-10-17 11:15:11 -07:00
Justin Tahara
804e48a3da fix(ui): Fix Available Methods Table (#5756) 2025-10-17 17:29:18 +00:00
SubashMohan
907271656e fix: Fix "Projects" new UI components (#5662) 2025-10-17 17:21:36 +00:00
Chris Weaver
1f11dd3e46 refactor: make OIDC / SAML MIT licensed (#5739) 2025-10-17 16:25:50 +00:00
Raunak Bhagat
048561ce0b fix: Fix colours for error page (#5758) 2025-10-17 09:35:34 -07:00
Raunak Bhagat
8718f10c38 fix: Fix all tooltips rendering raw text (#5755) 2025-10-17 09:34:46 -07:00
Evan Lohn
ab4d820089 feat: user info personalization (#5743) 2025-10-17 00:49:36 +00:00
Justin Tahara
77ae4f1a45 feat(users): Add User Counts (#5750) 2025-10-16 18:17:17 -07:00
Raunak Bhagat
8fd1f42a1c docs: Add a new standards file for the web directory (#5749) 2025-10-16 18:03:25 -07:00
Chris Weaver
b94c7e581b fix: quality checks (#5747)
Co-authored-by: Evan Lohn <evan@danswer.ai>
2025-10-16 16:58:24 -07:00
Wenxi
c90ff701dc chore: move gh non-secrets to vars (#5744) 2025-10-16 16:42:24 -07:00
Justin Tahara
b1ad58c5af fix(ui): Fix Invite Modal (#5748) 2025-10-16 16:30:48 -07:00
Eli Ben-Shoshan
345f9b3497 feat: added support to generate sha256 checksum before uploading file to object store (#5734)
Co-authored-by: Eli Ben-Shoshan <ebs@ufl.edu>
2025-10-16 14:36:12 -07:00
Justin Tahara
4671d18d4f fix(sso): Fix Logout UI (#5741) 2025-10-16 17:24:07 +00:00
Wenxi
f0598be875 fix: s3 connector citation bugs (#5740) 2025-10-16 10:08:13 -07:00
Justin Tahara
eb361c6434 feat(onboarding): Pin Featured Agents to New Users (#5736) 2025-10-15 16:11:56 -07:00
Nikolas Garza
e39b0a921c feat: plumb auto sync permission attempts to celery tasks (#5686)
Co-authored-by: Nikolas Garza <nikolas@Nikolass-MacBook-Pro.attlocal.net>
2025-10-15 21:15:35 +00:00
Jessica Singh
2dd8a8c788 fix(slack bot ui): update tokens dark mode (#5728)
Co-authored-by: Raunak Bhagat <r@rabh.io>
2025-10-15 21:14:41 +00:00
Raunak Bhagat
8b79e2e90b feat: Unpin agent (#5721)
Co-authored-by: cubic-dev-ai[bot] <191113872+cubic-dev-ai[bot]@users.noreply.github.com>
2025-10-15 11:20:49 -07:00
Nikolas Garza
d05941d1bd feat: basic db methods to create, update, delete permission sync attempts (#5682)
Co-authored-by: Nikolas Garza <nikolas@Nikolass-MacBook-Pro.attlocal.net>
2025-10-15 17:28:18 +00:00
Raunak Bhagat
50070fb264 refactor: Make colours in AppSidebar darker (#5725) 2025-10-15 10:15:36 -07:00
edwin-onyx
5792d8d5ed fix(infra): consolidate more celery workers into background worker for default lightweight mode (#5718) 2025-10-15 15:48:18 +00:00
Raunak Bhagat
e1c4b33cf7 fix: Edit AccessRestrictedPage component to render the proper colours (#5724) 2025-10-15 03:10:47 +00:00
Richard Guan
2c2f6e7c23 feat(framework): simple agent to feature flags (#5692)
Co-authored-by: greptile-apps[bot] <165735046+greptile-apps[bot]@users.noreply.github.com>
2025-10-14 20:27:07 -07:00
Raunak Bhagat
3d30233d46 fix: Edit rendering issues with attached files (CSVs, images, text files, all other files) (#5708)
Co-authored-by: greptile-apps[bot] <165735046+greptile-apps[bot]@users.noreply.github.com>
2025-10-14 18:26:35 -07:00
Justin Tahara
875f8cff5c feat(google drive): Add small log (#5723) 2025-10-14 17:21:22 -07:00
Raunak Bhagat
6e4686a09f fix: Update colour of checkbox (#5707) 2025-10-14 22:47:31 +00:00
Justin Tahara
237c18e15e fix(ui): Fix Assistant Image not showing up in Sidebar (#5722) 2025-10-14 21:47:42 +00:00
Justin Tahara
a71d80329d fix(admin): Properly show Unique User count (#5717) 2025-10-14 21:44:09 +00:00
Justin Tahara
91c392b4fc fix(logout): Fix logout again for new UI (#5719) 2025-10-14 21:28:49 +00:00
Justin Tahara
a25df4002d fix(document sets): Delete Federated Slack Document Sets (#5716) 2025-10-14 18:45:57 +00:00
Nikolas Garza
436a5add88 feat: tables/migration for permission syncing attempts (#5681)
Co-authored-by: Nikolas Garza <nikolas@Nikolass-MacBook-Pro.attlocal.net>
2025-10-14 18:43:02 +00:00
edwin-onyx
3a4bb239b1 fix(infra): consolidate heavy, monitoring, and user file worker into one (#5558)
Co-authored-by: Edwin Luo <edwin@parafin.com>
2025-10-14 01:19:47 +00:00
Raunak Bhagat
2acb4cfdb6 fix: Fix selector colour (#5705) 2025-10-14 00:41:27 +00:00
Justin Tahara
f1d626adb0 fix(ui): Updated Document Sets UI (#5706) 2025-10-14 00:27:21 +00:00
Justin Tahara
5ca604f186 fix(slack): Fix Fed Slack Gear Button Error (#5704) 2025-10-13 23:16:26 +00:00
Chris Weaver
c19c76c3ad feat: tenant cleanup (#5703) 2025-10-13 14:19:02 -07:00
Justin Tahara
4555f6badc fix(entra): JWT Passthrough for Entra (#5697) 2025-10-13 19:57:11 +00:00
Justin Tahara
71bd643537 fix(helm): File Processing fix for helm (#5696) 2025-10-12 15:14:31 -07:00
SubashMohan
23f70f0a96 fix(indexing page): Improve page loading time (#5695) 2025-10-11 10:16:28 -07:00
Evan Lohn
c97672559a feat: org info (#5694) 2025-10-11 04:10:52 +00:00
Evan Lohn
243f0bbdbd fix: assorted mcp improvements (#5684) 2025-10-11 03:51:00 +00:00
Chris Weaver
0a5ca7f1cf feat: gemini-embedding-001 + search settings fixes (#5691) 2025-10-10 18:04:17 -07:00
Justin Tahara
8d56d213ec fix(ui): Update UI change (#5688) 2025-10-10 23:24:20 +00:00
Richard Guan
cea2ea924b feat(Simple Agent): [part1 - backwards compatible changes] (#5569) 2025-10-10 22:06:54 +00:00
Richard Guan
569d205e31 feat(flags): posthog feature flags (#5690) 2025-10-10 21:54:07 +00:00
Chris Weaver
9feff5002f fix: chat tweaks (#5685) 2025-10-09 22:04:56 -07:00
Nikolas Garza
a1314e49a3 fix: use system node version for prettier pre-commit hook (#5679)
Co-authored-by: Nikolas Garza <nikolas@Nikolass-MacBook-Pro.attlocal.net>
2025-10-09 17:21:19 -07:00
Nikolas Garza
463f839154 fix: show canceled status when indexing is canceled (#5675)
Co-authored-by: Nikolas Garza <nikolas@Nikolass-MacBook-Pro.attlocal.net>
2025-10-09 17:02:53 -07:00
Nikolas Garza
5a0fe3c1d1 fix: surface user friendly model names for bedrock models (#5680)
Co-authored-by: Nikolas Garza <nikolas@Nikolass-MacBook-Pro.attlocal.net>
2025-10-09 17:00:11 -07:00
Raunak Bhagat
8ac5c86c1e fix: Fix failing tests (#5628) 2025-10-09 22:34:49 +00:00
Chris Weaver
d803b48edd feat: Increase nginx default timeout (#5677) 2025-10-09 22:13:18 +00:00
Wenxi
bc3adcdc89 fix: non-image gen models and add test (#5678) 2025-10-09 14:38:15 -07:00
Raunak Bhagat
95e27f1c30 refactor: Clean up some context files (#5672) 2025-10-09 19:27:29 +00:00
Justin Tahara
d0724312db fix(helm): Removing duplicate exec (#5676) 2025-10-09 12:16:14 -07:00
Justin Tahara
5b1021f20b fix(eml): Fixing EML to Text Function (#5674) 2025-10-09 10:59:26 -07:00
Chris Weaver
55cdbe396f fix: action toggle (#5670) 2025-10-08 19:03:06 -07:00
Raunak Bhagat
e8fe0fecd2 feat: Update formik colours (action + danger) new new colour palette (#5668) 2025-10-08 14:42:26 -07:00
SubashMohan
5b4fc91a3e Fix/doc id migration task (#5620) 2025-10-08 14:40:12 -07:00
Raunak Bhagat
afd2d8c362 fix: Comment out keystroke hijacking (#5659) 2025-10-08 11:59:27 -07:00
Evan Lohn
8a8cf13089 feat: attachments are separate docs (#5641) 2025-10-08 10:29:06 -07:00
Evan Lohn
c7e872d4e3 feat: selectively run sf and hubspot tests (#5657) 2025-10-08 10:25:00 -07:00
Wenxi
1dbe926518 chore: bump dependabot backlog (#5653) 2025-10-08 09:57:27 -07:00
Chris Weaver
d095bec6df fix: deep research hiding logic (#5660) 2025-10-08 09:05:35 -07:00
Raunak Bhagat
58e8d501a1 fix: Add settings sections back (#5661) 2025-10-08 02:54:07 +00:00
Chris Weaver
a39782468b refactor: improve modal behavior (#5649) 2025-10-07 19:10:17 -07:00
Justin Tahara
d747b48d22 fix(Blob Storage): Add Chunking + Size Limits (#5638) 2025-10-07 19:02:01 -07:00
Yuhong Sun
817de23854 Remove document seeding (#5656) 2025-10-07 18:41:39 -07:00
Richard Guan
6474d30ba0 fix(braintrust): dont decorate generate and clean up unused code (#5636) 2025-10-07 15:11:47 -07:00
Paulius Klyvis
6c9635373a fix: ensure now and dt are in utc in gtp search (#5605) 2025-10-07 14:50:55 -07:00
Wenxi
1a945b6f94 chore: update comm links (#5650) 2025-10-07 14:40:54 -07:00
Chris Weaver
526c76fa08 fix: assistant creation (#5648) 2025-10-07 14:20:44 -07:00
Justin Tahara
932e62531f fix(UI): Update User Settings Model Selection (#5630) 2025-10-07 14:11:24 -07:00
edwin-onyx
83768e2ff1 fix(infra): lazy load nltk and some more (#5634) 2025-10-07 13:53:50 -07:00
Chris Weaver
f23b6506f4 fix: sidebar state persistence (#5647) 2025-10-07 13:25:16 -07:00
Chris Weaver
5f09318302 fix: pin text + create click (#5646) 2025-10-07 12:52:51 -07:00
Justin Tahara
674e789036 fix(playwright): Update Email Password Form (#5644) 2025-10-07 12:18:08 -07:00
Chris Weaver
cb514e6e34 fix: align text with icon (#5645) 2025-10-07 12:08:04 -07:00
Justin Tahara
965dad785c fix(images): Update Image Gen workflow after Refactor (#5631) 2025-10-07 12:03:48 -07:00
Chris Weaver
c9558224d2 feat: improved markdown spacing (#5643) 2025-10-07 12:02:24 -07:00
Chris Weaver
c2dbd3fd1e fix: slack bot creation (#5637) 2025-10-07 11:40:25 -07:00
Wenxi
d27c2b1b4e chore: update contributing readmes (#5635) 2025-10-07 10:24:58 -07:00
edwin-onyx
8c52444bda fix(infra): lazy load and don't warm up model server models (#5527)
Co-authored-by: cubic-dev-ai[bot] <191113872+cubic-dev-ai[bot]@users.noreply.github.com>
Co-authored-by: greptile-apps[bot] <165735046+greptile-apps[bot]@users.noreply.github.com>
2025-10-07 09:07:40 -07:00
Justin Tahara
b4caa85cd4 fix(infra): Nginx updates (#5627) 2025-10-06 13:37:23 -07:00
Wenxi
57163dd936 chore: pin prettier pre-commit version and run on web for comm prs (#5624) 2025-10-06 12:08:46 -07:00
Chris Weaver
15f2a0bf60 fix: regression (#5625) 2025-10-06 12:02:31 -07:00
Justin Tahara
aeae7ebdef fix(SAML): Add additional Email Fields (#5557) 2025-10-06 12:00:43 -07:00
Raunak Bhagat
eaa14a5ce0 feat: UI Refresh (#5529)
Co-authored-by: SubashMohan <subashmohan75@gmail.com>
2025-10-05 23:04:14 -07:00
Shahar Mazor
b07c834e83 Add RTL support (#5609) 2025-10-05 12:24:38 -07:00
Chris Weaver
97cd308ef7 fix: remove duplicate "file" option in Add Connector page (#5612) 2025-10-05 11:59:39 -07:00
Nils
28cdab7a70 feat: support SharePoint Teams URLs (#5498)
Co-authored-by: nsklei <nils.kleinrahm@pledoc.de>
2025-10-05 11:34:35 -07:00
Chris Weaver
ad9aa01819 ci: adjust latest/edge tags (#5610) 2025-10-04 14:30:40 -07:00
Justin Tahara
508a88c8d7 fix(helm): Migrate from Bitanmi NGINX (#5599) 2025-10-03 17:45:59 -07:00
Justin Tahara
b6f81fbb8e fix(SSO): Logout funtionality fixed (#5600) 2025-10-03 16:58:51 -07:00
Chris Weaver
b9b66396ec fix: try reduce playwright flake (#5598) 2025-10-03 13:10:32 -07:00
Justin Tahara
dd20b9ef4c fix(helm): MinIO Migration from Bitnami (#5597) 2025-10-03 12:32:27 -07:00
Evan Lohn
e1f7e8cacf feat: better interface for slim connectors (#5592) 2025-10-03 10:51:14 -07:00
Justin Tahara
fd567279fd fix(helm): Chart dependency for DB chart (#5596) 2025-10-03 10:43:27 -07:00
Justin Tahara
1427eb3cf0 fix(helm): Remove Bitnmai Dependency for DB Charts (#5593) 2025-10-03 10:38:13 -07:00
trial-danswer
e70be0f816 feat: add serper web search provider (#5545) 2025-10-03 10:35:33 -07:00
Justin Tahara
0014c7cff7 Revert "fix(github): Revert cache being turned off" (#5594) 2025-10-03 09:48:44 -07:00
Richard Guan
1c23dbeaee fix(mcp): asyncio simple sync run (#5591) 2025-10-03 01:29:57 +00:00
Chris Weaver
b2b122a24b fix: jira perm sync (#5585) 2025-10-02 16:32:22 -07:00
Wenxi
033ae74b0e fix: allow web connector to recurse www even if not specified (#5584) 2025-10-02 16:14:54 -07:00
Evan Lohn
c593fb4866 fix(github): Revert cache being turned off (#5589) 2025-10-02 15:36:01 -07:00
trial-danswer
b9580ef346 feat: Add download users (#5563) 2025-10-02 15:30:27 -07:00
Wenxi
4df3a9204f fix: reindex logic and allow seeded docs to refresh (#5578) 2025-10-02 15:22:24 -07:00
Wenxi
e0ad313a60 chore: bump playwright version (#5581) 2025-10-02 15:16:54 -07:00
Evan Lohn
a2bfb46edd fix: deny invalid space keys (#5570) 2025-10-02 15:01:17 -07:00
Evan Lohn
25e3371bee fix: minor mcp fixes + test (#5564) 2025-10-02 13:05:56 -07:00
Chris Weaver
4b9b306140 feat: enable DR by default (#5576) 2025-10-02 12:53:16 -07:00
Wenxi
ccf55136be feat: ollama official support (#5509) 2025-10-02 10:47:16 -07:00
Evan Lohn
a13db828f3 Revert "fix(github): Revert cache being turned off" (#5575) 2025-10-02 10:22:43 -07:00
SubashMohan
b7d56d0645 increase docid migration task priority (#5571) 2025-10-02 21:42:55 +05:30
SubashMohan
9ac70d35a8 Fix/indexattempt deletion failure (#5573) 2025-10-02 08:42:16 -07:00
Evan Lohn
7da792dd27 fix: user info 404s (#5567) 2025-10-01 23:20:31 +00:00
Chris Weaver
136c2f4082 Skip flakey test (#5566) 2025-10-01 15:14:18 -07:00
edwin-onyx
67bd14e801 fix(infra): lazy import litellm and some more pkgs and add layer to connector instantiation for lazy loading (#5488)
Co-authored-by: cubic-dev-ai[bot] <191113872+cubic-dev-ai[bot]@users.noreply.github.com>
2025-10-01 15:05:27 -07:00
Evan Lohn
8c9a20be7a fix: atlassian scoped tokens (#5483) 2025-10-01 14:53:28 -07:00
Wenxi
0427845502 fix: weird text wrapping (#5565) 2025-10-01 14:41:49 -07:00
Chris Weaver
a85a5a324e More prod fixes (#5556) 2025-09-30 14:43:21 -07:00
SubashMohan
78f1fb5bf4 fix(projects): Fix Migration (#5550) 2025-09-30 12:56:40 -07:00
Evan Lohn
6a8a214324 fix: avoid attempting to retrieve with non-org owners (#5555) 2025-09-30 12:55:03 -07:00
Justin Tahara
884266c009 fix(saml): Update the route to take GET's and transform to POST (#5554) 2025-09-30 11:28:07 -07:00
Chris Weaver
2c422215e6 Fix prod compose (#5553) 2025-09-30 10:41:38 -07:00
joachim-danswer
32fe185bb4 fix: set gpt-5 thinking setting (#5539) 2025-09-30 09:57:36 -07:00
Chris Weaver
c2758a28d5 fix: project migration tweak (#5544) 2025-09-29 19:57:09 -07:00
Justin Tahara
5cda2e0173 feat(LLM): Add Claude Sonnet 4.5 (#5543) 2025-09-29 17:58:18 -07:00
Evan Lohn
9e885a68b3 feat: mcp client v2 (#5481) 2025-09-29 17:01:32 -07:00
Justin Tahara
376fc86b0c fix(saml): GET Method for SAML Callback (#5538) 2025-09-29 15:08:44 -07:00
Chris Weaver
2eb1444d80 fix: more test hardening (#5537) 2025-09-29 13:54:56 -07:00
SubashMohan
bd6ebe4718 feat(chat): add popup handling for image file selection in ChatInputBar (#5536) 2025-09-29 11:02:18 -07:00
Chris Weaver
691d63bc0f fix: remove console.log (#5533) 2025-09-29 10:54:54 -07:00
Chris Weaver
dfd4d9abef fix: playwright tests (#5522) 2025-09-29 09:04:10 -07:00
SubashMohan
4cb39bc150 fix chat issue and change view icon (#5525) 2025-09-29 12:28:07 +05:30
Chris Weaver
4e357478e0 fix: package-lock.json (#5530) 2025-09-28 13:43:42 -07:00
Wenxi
b5b1b3287c fix: update package lock after projects merge (#5514) 2025-09-28 13:00:32 -07:00
Wenxi
2f58a972eb fix: launch template post projects merge (#5528) 2025-09-28 12:57:54 -07:00
Yuhong Sun
6b39d8eed9 Docker Version Check (#5523) 2025-09-27 19:03:43 -07:00
Chris Weaver
f81c34d040 fix: editing/regeneration (#5521) 2025-09-27 17:43:03 -07:00
Yuhong Sun
0771b1f476 SQL plaintext file (#5520) 2025-09-27 15:36:44 -07:00
Jessica Singh
eedd2ba3fe fix(source selection): enable all by default and persist choice (#5511) 2025-09-26 17:15:40 -07:00
Chris Weaver
98554e5025 feat: small projects UX tweaks (#5513) 2025-09-26 15:33:37 -07:00
Justin Tahara
dcd2cad6b4 fix(infra): Increment Helm Version for Projects (#5512) 2025-09-26 13:59:27 -07:00
Chris Weaver
189f4bb071 fix: add bitbucket env vars (#5510) 2025-09-26 12:38:59 -07:00
SubashMohan
7eeab8fb80 feat(projects): add project creation and management (#5248)
Co-authored-by: Weves <chrisweaver101@gmail.com>
2025-09-26 12:05:20 -07:00
Justin Tahara
60f83dd0db fix(gmail): Skip over emails that don't have gmail enabled (#5506) 2025-09-25 19:57:47 -07:00
Jessica Singh
2618602fd6 fix(source filter): dark mode support (#5505) 2025-09-25 18:10:48 -07:00
Chris Weaver
b80f96de85 fix: LlmPopover after filling in an initial model (#5504) 2025-09-25 17:09:22 -07:00
edwin-onyx
74a15b2c01 fix(infra): fix some dependency hells and add some lazy loading to reduce celery worker RAM usage (#5478)
Co-authored-by: cubic-dev-ai[bot] <191113872+cubic-dev-ai[bot]@users.noreply.github.com>
2025-09-25 16:12:26 -07:00
Jessica Singh
408b80ce51 feat(source selection): adding source selection for internal search in chat (#5455) 2025-09-25 16:12:02 -07:00
Wenxi
e82b68c1b0 fix: update seeded docs connector name (#5502) 2025-09-25 15:58:54 -07:00
Justin Tahara
af5eec648b fix(playwright): Add new fix for Playwright test (#5503) 2025-09-25 15:34:24 -07:00
Chris Weaver
d186c5e82e feat(docker): Add DEV_MODE flag for exposing service ports (#5499)
Co-authored-by: Claude <noreply@anthropic.com>
Co-authored-by: justin-tahara <justintahara@gmail.com>
2025-09-25 15:08:20 -07:00
Justin Tahara
4420a50aed fix(github): Revert cache being turned off (#5487) 2025-09-25 14:07:58 -07:00
Justin Tahara
9caa6ea7ff feat(infra): Default to HPA w/ KEDA option (#5480) 2025-09-25 11:58:19 -07:00
Yuhong Sun
8d7b217d33 Deployment README (#5496) 2025-09-25 11:34:30 -07:00
Yuhong Sun
57908769f1 Port 80 (#5495) 2025-09-25 11:10:41 -07:00
Yuhong Sun
600cec7c89 Robust Install (#5494) 2025-09-25 10:08:52 -07:00
Yuhong Sun
bb8ea536c4 Update README.md (#5492) 2025-09-25 09:05:50 -07:00
Yuhong Sun
f97869b91e README (#5486) 2025-09-24 20:33:36 -07:00
Justin Tahara
aa5be56884 fix(github): Remove the Backport workflow (#5484) 2025-09-24 19:33:18 -07:00
Justin Tahara
7580178c95 fix(github): Fix Integration Tests (#5485) 2025-09-24 19:30:07 -07:00
Yuhong Sun
2e0bc8caf0 feat: Easy Install (#5461) 2025-09-24 15:31:45 -07:00
Chris Weaver
f9bd03c7f0 refactor: change venv activation (#5463) 2025-09-23 16:07:46 -07:00
Jessica Singh
77466e1f2b feat(slack bot): add federated search (#5275)
Co-authored-by: Jessica Singh <jessicasingh@Mac.attlocal.net>
Co-authored-by: Jessica Singh <jessicasingh@mac.lan>
2025-09-22 19:19:44 -07:00
Justin Tahara
8dd79345ed fix(sharepoint): Add secondary filter for embedded images (#5473) 2025-09-22 18:48:47 -07:00
Justin Tahara
a049835c49 fix(processing): Mime types for Image Summarization (#5471) 2025-09-22 18:48:31 -07:00
Yuhong Sun
d186d8e8ed Remove incredibly strict password reqs (#5470) 2025-09-22 17:25:34 -07:00
Yuhong Sun
082897eb9b Fix Toggles (#5469) 2025-09-22 17:09:43 -07:00
Yuhong Sun
e38f79dec5 Remove confusing text (#5468) 2025-09-22 15:37:54 -07:00
SubashMohan
26e7bba25d Fix/connector page stack depth limit (#5417) 2025-09-22 19:23:53 +05:30
edwin-onyx
3cde4ef77f fix(infra): create pre commit script and port vertex as lazy import (#5453)
Co-authored-by: Claude <noreply@anthropic.com>
2025-09-21 20:43:28 -07:00
Evan Lohn
f4d135d710 fix: sharepoint memory via excel parsing (#5444) 2025-09-19 17:10:27 -07:00
Richard Guan
6094f70ac8 fix: braintrust masking was over truncating (#5458)
Co-authored-by: greptile-apps[bot] <165735046+greptile-apps[bot]@users.noreply.github.com>
2025-09-19 14:10:29 -07:00
Richard Guan
a90e58b39b feat: braintrust tracing (#5450) 2025-09-18 18:17:38 -07:00
Evan Lohn
e82e3141ed feat: zendesk rate limiting (#5452) 2025-09-18 16:35:48 -07:00
edwin-onyx
f8e9060bab fix(infra): remove transformers dependency for api server (#5441)
Co-authored-by: Edwin Luo <edwinluo@3ef5a334-3d74-4dbf-b1c8-d57dc87d5638.attlocal.net>
Co-authored-by: Claude <noreply@anthropic.com>
2025-09-18 12:59:12 -07:00
Jessica Singh
24831fa1a1 fix(slack): swapped checkpoint index (#5427) 2025-09-18 11:09:31 -07:00
edwin-onyx
f6a0e69b2a fix(infra): remove setfit dependency from api server (#5449) 2025-09-17 23:48:47 -07:00
Richard Guan
0394eaea7f fix: copy over tests/__init__.py on docker build (#5443) 2025-09-17 17:12:03 -07:00
Wenxi
898b8c316e feat: docs link on connector creation (#5447) 2025-09-17 17:06:35 -07:00
Chris Weaver
4b0c6d1e54 fix: image gen tool causing error (#5445) 2025-09-17 16:39:54 -07:00
Justin Tahara
da7dc33afa fix(Federated Slack): Persist Document Set for Federated Connectors (#5442) 2025-09-17 13:52:11 -07:00
Richard Guan
c558732ddd feat: eval pipeline (#5369) 2025-09-17 12:17:14 -07:00
Chris Weaver
339ad9189b fix: slackbot error (#5430) 2025-09-16 23:25:34 -07:00
Richard Guan
32d5e408b8 fix: HF Cache Warmup Fix and Celery Pool Management (#5435) 2025-09-16 18:57:52 -07:00
Justin Tahara
14ead457d9 fix(infra): Update chart releaser (#5434) 2025-09-16 16:58:43 -07:00
Justin Tahara
458cd7e832 fix(infra): Add KEDA Dependency (#5433) 2025-09-16 16:52:24 -07:00
Justin Tahara
770a2692e9 Revert "fix(infra): Add KEDA Dependency" (#5432) 2025-09-16 16:48:18 -07:00
Justin Tahara
5dd99b6acf fix(infra): Add KEDA Dependency (#5431) 2025-09-16 16:45:41 -07:00
Chris Weaver
6c7eb89374 fix: remove credential file log (#5429) 2025-09-16 15:48:33 -07:00
eric-zadara
fd11c16c6d feat(infra): Decouple helm chart from bitnami (#5200)
Co-authored-by: eric-zadara <eric-zadara@users.noreply.github.com>
2025-09-16 14:56:04 -07:00
Chris Weaver
11ec603c37 fix: Improve datetime replacement (#5425)
Co-authored-by: greptile-apps[bot] <165735046+greptile-apps[bot]@users.noreply.github.com>
2025-09-16 14:49:06 -07:00
Justin Tahara
495d4cac44 feat(infra): Migrate from HPA to KEDA for all Services (#5370) 2025-09-16 13:56:50 -07:00
Wenxi Onyx
fd2d74ae2e onyx mcp server 2025-09-16 11:06:00 -07:00
Evan Lohn
4c7a2e486b fix: skip huge files on sdk fallback (#5421) 2025-09-15 18:24:06 -07:00
Chris Weaver
01e0ba6270 fix: tool seeding migration (#5422) 2025-09-15 16:46:01 -07:00
Wenxi
227dfc4a05 fix: skip excluded img files in sharepoint (#5418) 2025-09-15 11:30:19 -07:00
Chris Weaver
c3702b76b6 docs: add agent files (#5412) 2025-09-14 20:07:18 -07:00
Chris Weaver
bb239d574c feat: single default assistant (#5351) 2025-09-14 20:05:33 -07:00
Chris Weaver
172e5f0e24 feat: Move reg IT to parallel + blacksmith and have MIT only run on merge q… (#5413) 2025-09-13 17:33:45 -07:00
Nils
26b026fb88 SharePoint Connector Fix - Nested Subfolder Indexing (#5404)
Co-authored-by: nsklei <nils.kleinrahm@pledoc.de>
2025-09-13 11:33:01 +00:00
joachim-danswer
870629e8a9 fix: Azure adjustment (#5410) 2025-09-13 00:03:37 +00:00
danielkravets
a547112321 feat: bitbucket connector (#5294) 2025-09-12 18:15:09 -07:00
joachim-danswer
da5a94815e fix: initial response quality, particularly for General assistant (#5399) 2025-09-12 00:14:49 -07:00
Jessica Singh
e024472b74 fix(federated-slack): pass in valid query (#5402) 2025-09-11 19:27:43 -07:00
Chris Weaver
e74855e633 feat: use private registry (#5401) 2025-09-11 18:20:56 -07:00
Justin Tahara
e4c26a933d fix(infra): Fix helm test timeout (#5386) 2025-09-11 18:19:07 -07:00
Chris Weaver
36c96f2d98 fix: playwright (#5396) 2025-09-11 14:06:03 -07:00
Justin Tahara
1ea94dcd8d fix(security): Remove Hard Fail from Trivy (#5394) 2025-09-11 10:35:26 -07:00
Wenxi
2b1c5a0755 fix: remove unneeded dependency from requirements (#5390) 2025-09-10 21:49:02 -07:00
Chris Weaver
82b5f806ab feat: Improve migration (#5391) 2025-09-10 19:29:11 -07:00
Chris Weaver
6340c517d1 fix: missing connectors section (#5387)
Co-authored-by: greptile-apps[bot] <165735046+greptile-apps[bot]@users.noreply.github.com>
2025-09-10 19:28:56 -07:00
joachim-danswer
3baae2d4f0 fix: tf/dr flow improvements (#5380) 2025-09-10 16:39:19 -07:00
Chris Weaver
d7c223ddd4 feat: playwright test speed improvement (#5388) 2025-09-10 16:19:56 -07:00
Chris Weaver
df4917243b fix: parallelized IT (#5389) 2025-09-10 14:37:36 -07:00
Justin Tahara
a79ab713ce feat(infra): Adding rety to Trivy tests (#5383) 2025-09-10 14:13:58 -07:00
Chris Weaver
d1f7cee959 feat: parallelized integration tests (#5021)
Co-authored-by: Claude <noreply@anthropic.com>
2025-09-10 12:15:02 -07:00
Justin Tahara
a3f41e20da feat(infra): Add Node Selector option to all Templates (#5384) 2025-09-10 10:23:54 -07:00
Chris Weaver
458ed93da0 feat: remove prompt table (#5348) 2025-09-10 10:21:57 -07:00
Chris Weaver
273d073bd7 fix: non-image gen models (#5381) 2025-09-09 15:52:03 -07:00
Wenxi
9455c8e5ae fix: add back reverted changes to readme (#5377) 2025-09-09 10:23:33 -07:00
Justin Tahara
d45d4389a0 Revert "fix: update contribution guide" (#5376)
Co-authored-by: Wenxi <wenxi@onyx.app>
2025-09-09 09:37:16 -07:00
Chris Weaver
bd901c0da1 fix: playwright tests (#5372)
Co-authored-by: greptile-apps[bot] <165735046+greptile-apps[bot]@users.noreply.github.com>
2025-09-09 00:29:52 -07:00
Wenxi
2192605c95 feat: Bedrock API Keys & filter available models (#5343)
Co-authored-by: cubic-dev-ai[bot] <191113872+cubic-dev-ai[bot]@users.noreply.github.com>
2025-09-08 18:50:04 -07:00
Wenxi
d248d2f4e9 refactor: update seeded docs (#5364)
Co-authored-by: greptile-apps[bot] <165735046+greptile-apps[bot]@users.noreply.github.com>
2025-09-08 18:06:29 -07:00
Chris Weaver
331c53871a fix: image gen display (#5367) 2025-09-08 17:47:17 -07:00
SubashMohan
f62d0d9144 feat(admin/connectors): Disable Auto Sync for unsupported auth; add disabled dropdown + tooltip (#5358) 2025-09-08 21:39:47 +00:00
Chris Weaver
427945e757 fix: model server build (#5362) 2025-09-08 14:00:33 -07:00
Wenxi
e55cdc6250 fix: new docs links (#5363) 2025-09-08 13:49:19 -07:00
sktbcpraha
6a01db9ff2 fix: IMAP - mail processing fixes (#5360) 2025-09-08 12:11:09 -07:00
Richard Guan
82e9df5c22 fix: various bug bash improvements (#5330) 2025-09-07 23:17:01 -07:00
Chris Weaver
16c2ef2852 feat: Make usage report gen a background job (#5342) 2025-09-07 14:44:40 -07:00
Edwin Luo
224a70eea9 fix: update contribution guide (#5354) 2025-09-07 13:06:37 -07:00
Chris Weaver
c457982120 fix: connector tests (#5353) 2025-09-07 11:57:34 -07:00
Chris Weaver
0649748da2 fix: playwright tests (#5352) 2025-09-07 11:24:26 -07:00
Wenxi
ddceddaa28 chore: bump litellm to fix self-hosted inference (#5349) 2025-09-06 19:29:26 -07:00
Evan Lohn
c6733a5026 fix: handle new error type (#5345) 2025-09-06 18:26:54 -07:00
Wenxi
7db744a5de refactor: simplify sharepoint document extraction (#5341) 2025-09-06 20:17:33 +00:00
Chris Weaver
cd2a8b0def Fix mypy (#5347) 2025-09-05 23:28:35 -07:00
Richard Guan
f15bc26cd6 fix: deep research and thoughtful assistant message context and trace all llm calls in langsmith (#5344)
Co-authored-by: greptile-apps[bot] <165735046+greptile-apps[bot]@users.noreply.github.com>
2025-09-05 23:13:45 -07:00
Chris Weaver
65f35f0293 fix: whitelabeling (#5346) 2025-09-05 21:06:44 -07:00
joachim-danswer
4e3e608249 fix: Tweaks to Deep Research and some KG adjustments (#5305) 2025-09-06 00:57:26 +00:00
Richard Guan
719a092a12 fix: web search bugs [DAN-2351] (#5281) 2025-09-05 19:50:59 +00:00
wichmann-git
6a8fde7eb1 fix(teams): sanitize None displayName to 'Unknown User' before parsing (#5322) 2025-09-05 10:21:26 -07:00
Justin Tahara
4fdd0812a0 fix(admin): Block access to Custom Analytics Page (#5319)
Co-authored-by: Wenxi <wenxi@onyx.app>
Co-authored-by: greptile-apps[bot] <165735046+greptile-apps[bot]@users.noreply.github.com>
Co-authored-by: cubic-dev-ai[bot] <191113872+cubic-dev-ai[bot]@users.noreply.github.com>
2025-09-05 10:02:31 -07:00
Wenxi
4913dc1e85 fix: skip large sharepoint files (#5338) 2025-09-05 06:47:30 +00:00
Chris Weaver
4a43a9642e fix: citatons endpoint (#5336) 2025-09-04 21:51:52 -07:00
Evan Lohn
cc48a0c38e fix: jira cloud api v3 (#5337) 2025-09-04 21:50:35 -07:00
Chris Weaver
01ccfd2df7 fix: Try to avoid timeouts on image gen (#5316) 2025-09-04 16:14:28 -07:00
Wenxi
36d75786ee fix: honor freshdesk 429 (#5334) 2025-09-04 12:06:19 -07:00
Chris Weaver
f9bc38ba65 fix: Add back MCP (#5333) 2025-09-04 11:13:38 -07:00
Chris Weaver
3da283221d feat: Re-enable sentry (#5329) 2025-09-03 19:07:37 -07:00
Wenxi
90568d3bbb refactor: remove option to exclude citations from assistants (#5320) 2025-09-03 17:32:18 -07:00
Wenxi
7955ca938c fix: freshdesk password and rate limits (#5325) 2025-09-03 17:32:00 -07:00
Chris Weaver
f5d357eb28 fix: old send-message (#5328) 2025-09-03 16:25:38 -07:00
Evan Lohn
d83f616214 fix: incorrect assumptions about fields (#5324) 2025-09-03 21:58:46 +00:00
Chris Weaver
275c1bec3d fix: adjust search tool display (#5317) 2025-09-02 16:44:23 -07:00
Wenxi
7d1ef912e8 fix: allow chats to be moved out of chat groups (#5315)
Co-authored-by: greptile-apps[bot] <165735046+greptile-apps[bot]@users.noreply.github.com>
Co-authored-by: cubic-dev-ai[bot] <191113872+cubic-dev-ai[bot]@users.noreply.github.com>
2025-09-02 21:35:55 +00:00
Wenxi
2fe1d4c373 fix: better tool tips (#5314) 2025-09-02 19:39:10 +00:00
SubashMohan
2396ad309e fix: enhance SharePoint connector error handling and content retrieval (#5302) 2025-09-02 08:57:30 -07:00
Wenxi
0b13ef963a fix: allow web and file to show in results (#5290)
* allow web and file to show in results

* don't lag on backspacing 2nd char
2025-09-01 21:53:09 -07:00
Justin Tahara
83073f3ded fix(infra): Add Playwright Directory (#5313) 2025-09-01 19:35:48 -07:00
Wenxi
439a27a775 scroll forms on invalid submit (#5310) 2025-09-01 15:33:52 -07:00
Justin Tahara
91773a4789 fix(jira): Upgrade the Jira Python Version (#5309) 2025-09-01 15:33:03 -07:00
Chris Weaver
185beca648 Small center bar improvements (#5306) 2025-09-01 13:32:56 -07:00
Justin Tahara
2dc564c8df feat(infra): Add IAM support for Redis (#5267)
* feat: JIRA support for custom JQL filter (#5164)

* jira jql support

* jira jql fixes

* Address comment

---------

Co-authored-by: sktbcpraha <131408565+sktbcpraha@users.noreply.github.com>
2025-09-01 10:52:28 -07:00
Chris Weaver
b259f53972 Remove console-log (#5304) 2025-09-01 10:18:39 -07:00
Chris Weaver
f8beb08e2f Fix web build (#5303) 2025-09-01 10:18:06 -07:00
Evan Lohn
83c88c7cf6 feat: mcp client1 (#5271)
* working mcp implementation v1

* attempt openapi fix

* fastmcp
2025-09-01 09:52:35 -07:00
Chris Weaver
2372dd40e0 fix: small formatting fixes (#5300)
* SMall formatting fixes

* Fix mypy

* reorder imports
2025-08-31 23:19:22 -07:00
Chris Weaver
5cb6bafe81 Cleanup on ChatPage/MessagesDisply (#5299) 2025-08-31 21:29:17 -07:00
Mohamed Mathari
a0309b31c7 feat: Add Outline Connector (#5284)
* Outline

* fixConnector

* fixTest

* The date filtering is implemented correctly as client-side filtering, which is the only way to achieve it with the Outline API since it doesn't support date parameters natively.

* Update web/src/lib/connectors/connectors.tsx

Co-authored-by: cubic-dev-ai[bot] <191113872+cubic-dev-ai[bot]@users.noreply.github.com>

* no connector config for outline

* Update backend/onyx/connectors/outline/client.py

Co-authored-by: cubic-dev-ai[bot] <191113872+cubic-dev-ai[bot]@users.noreply.github.com>

* Fix all PR review issues: document ID prefixes, error handling, test assertions, and null guards

* Update backend/onyx/connectors/outline/client.py

Co-authored-by: greptile-apps[bot] <165735046+greptile-apps[bot]@users.noreply.github.com>

* The test no longer depends on external network connectivity to httpbin.org

* I've enhanced the OutlineApiClient.post() method in backend/onyx/connectors/outline/client.py to properly handle network-level exceptions that could crash the connector during synchronization:

* Polling mechanism

* Removed flag-based approach

* commentOnClasses

* commentOnClasses

* commentOnClasses

* responseStatus

* startBound

* Changed the method signature to match the interface

* ConnectorMissingCredentials

* Time Out shared config

* Missing Credential message

---------

Co-authored-by: cubic-dev-ai[bot] <191113872+cubic-dev-ai[bot]@users.noreply.github.com>
Co-authored-by: greptile-apps[bot] <165735046+greptile-apps[bot]@users.noreply.github.com>
2025-08-31 20:56:10 -07:00
Chris Weaver
0fd268dba7 fix: message render performance (#5297)
* Separate out message display into it's own component

* Memoize AIMessage

* Cleanup

* Remove log

* Address greptile/cubic comments
2025-08-31 19:49:53 -07:00
Wenxi
f345da7487 fix: make radios and checkboxes actually clickable (#5298)
* dont nest labels, use htmlfor, fix slackbot form bug

* fix playwright tests for improved labels
2025-08-31 19:16:25 -07:00
Chris Weaver
f2dacf03f1 fix: Chat Page performance improvements (#5295)
* CC performance improvements r2

* More misc chat performance improvements

* Remove unused import

* Remove silly useMemo

* Fix small shift

* Address greptile + cubic + subash comments

* Fix build

* Improve document sidebar

* Remove console.log

* Remove more logs

* Fix build
2025-08-31 14:29:03 -07:00
Wenxi
e0fef50cf0 fix: don't skip ccpairs if embedding swap in progress (#5189)
* don't skip ccpairs if embedding swap in progress

* refactor check_for_indexing to properly handle search setting swaps

* mypy

* mypy

* comment debugging log

* nits and more efficient active index attempt check
2025-08-29 17:17:36 -07:00
Chris Weaver
6ba3eeefa5 feat: center bar + tool force + tool disable (#5272)
* Exploration

* Adding user-specific assistant preferences

* Small fixes

* Improvements

* Reset forced tools upon switch

* Add starter messages

* Improve starter messages

* Add misisng file

* cleanup renaming imports

* Address greptile/cubic comments

* Fix build

* Add back assistant info

* Fix image icon

* rebase fix

* Color corrections

* Small tweak

* More color correction

* Remove animation for now

* fix test

* Fix coloring + allow only one forced tool
2025-08-29 17:17:09 -07:00
Richard Guan
aa158abaa9 . (#5286) 2025-08-29 17:07:50 -07:00
Wenxi
255c2af1d6 feat: reorganize connectors pages (#5186)
* Add popular connectors sections and cleanup connectors page

* Add other connectors env var

* other connectors env var to vscode env template

* update playwright tests

* sort by popuarlity

* recategorize and sort by popularity
2025-08-29 16:59:00 -07:00
Chris Weaver
9ece3b0310 fix: improve index attempts API (#5287)
* Improve index attempts API

* Fix import
2025-08-29 16:15:58 -07:00
joachim-danswer
9e3aca03a7 fix: various dr issues and comments (#5280)
* replacement of "message_delta" etc as Enums + removal

* prompt changes

* cubic fixes where appropriate

* schema fixes + citation symbols

* various fixes

* fix for kg context in new search

* cw comments

* updates
2025-08-29 15:08:23 -07:00
Wenxi
dbd5d4d8f1 fix: allow jira api v3 (#5285)
* allow jira api v3

* don't rely on api version for parsing issues and separate cloud and dc versions
2025-08-29 14:02:01 -07:00
Chris Weaver
cdb97c3ce4 fix: test_soft_delete_chat_session (#5283)
* Fix test_soft_delete_chat_session

* Fix flakiness
2025-08-29 09:01:55 -07:00
Chris Weaver
f30ced31a9 fix: IT (#5276)
* Fix IT

* test

* Fix test

* test

* fix

* Fix test
2025-08-28 20:42:14 -07:00
Wenxi
6cc6c43234 fix: explain why limit=None is appropriate for discord (#5278)
* explain why limit=None is appropriate for discord

* linting
2025-08-28 14:17:46 -07:00
Wenxi
224d934cf4 fix: ruff complaint about type comparison (#5279)
* ruff complaint about type comparison

* ruff complaint type comparison
2025-08-28 14:17:30 -07:00
Nigel Brown
8ecdc61ad3 fix: Explicitly add limit to the function calls (#5273)
* Explicitly add limit to the function calls
This means we miss fewer messages. The default limit is 100.

Signed-off-by: nigel brown <nigel@stacklok.com>

* Update backend/onyx/connectors/discord/connector.py

Co-authored-by: greptile-apps[bot] <165735046+greptile-apps[bot]@users.noreply.github.com>

---------

Signed-off-by: nigel brown <nigel@stacklok.com>
Co-authored-by: greptile-apps[bot] <165735046+greptile-apps[bot]@users.noreply.github.com>
2025-08-28 13:35:02 -07:00
Chris Weaver
08161db7ea fix: playwright tests (#5259)
* Fix playwright tests

* Address comment

* Fix
2025-08-27 23:23:55 -07:00
Richard Guan
b139764631 feat: Fast internet search (#5238)
* squash: combine all DR commits into one

Co-authored-by: Joachim Rahmfeld <joachim@onyx.app>
Co-authored-by: Rei Meguro <rmeguro@umich.edu>

* Fixes

* show KG in Assistant only if available

* KG only usable for KG Beta (for now)

* base file upload

* improvements

* raise error if uploaded context is too long

* More improvements

* Fix citations

* jank implementation of internet search with deep research that can kind of work

* early implementation for google api support

* .

* .

* .

* .

* .

* .

* .

* .

* .

* .

* .

* .

* .

* .

* .

* .

* .

* .

* .

* .

* .

* .

* .

* .

* .

* .

* .

* .

* .

* .

---------

Co-authored-by: Weves <chrisweaver101@gmail.com>
Co-authored-by: Joachim Rahmfeld <joachim@onyx.app>
Co-authored-by: Rei Meguro <rmeguro@umich.edu>
Co-authored-by: joachim-danswer <joachim@danswer.ai>
2025-08-27 20:03:02 -07:00
joachim-danswer
2b23dbde8d fix: small DR/Thoughtful mode fixes (#5269)
* fix budget calculation

* Internal custom tool fix + Okta special casing

* nits

* CW comments
2025-08-26 22:33:54 -07:00
Wenxi
2dec009d63 feat: add api/versions to onyx (#5268)
* add api/versions to onyx

* add test and rename onyx

* cubic nit

Co-authored-by: cubic-dev-ai[bot] <191113872+cubic-dev-ai[bot]@users.noreply.github.com>

* move api version constants and add explanatory comment

---------

Co-authored-by: cubic-dev-ai[bot] <191113872+cubic-dev-ai[bot]@users.noreply.github.com>
2025-08-26 18:14:54 -07:00
Chris Weaver
91eadae353 Fix logger startup (#5263) 2025-08-26 17:33:25 -07:00
Wenxi
8bff616e27 fix: clarify jql instructions and embed links (#5264)
* clarify jql instructions and embed links

* typo

* lint

* fix unit test
2025-08-26 17:27:07 -07:00
sktbcpraha
2c049e170f feat: JIRA support for custom JQL filter (#5164)
* jira jql support

* jira jql fixes
2025-08-26 12:44:39 -07:00
Oht8wooWi8yait9n
23e6d7ef3c Update gemini model names. (#5262)
Co-authored-by: Aaron Sells <aaron.b.sells@nasa.gov>
2025-08-26 12:33:02 -07:00
Chris Weaver
ed81e75edd fix: add jira auto-sync option in UI (#5260)
* Add jira auto-sync option in UI

* Fix build
2025-08-26 11:21:04 -07:00
Wenxi
de22fc3a58 remove dead code (#5261) 2025-08-26 11:14:12 -07:00
Cameron
009b7f60f1 Update date format used for fetching from Bookstack (#5221) 2025-08-26 09:49:38 -07:00
Chris Weaver
9d997e20df feat: frontend refactor + DR (#5225)
* squash: combine all DR commits into one

Co-authored-by: Joachim Rahmfeld <joachim@onyx.app>
Co-authored-by: Rei Meguro <rmeguro@umich.edu>

* Fixes

* show KG in Assistant only if available

* KG only usable for KG Beta (for now)

* base file upload

* raise error if uploaded context is too long

* improvements

* More improvements

* Fix citations

* better decision making

* improved decision-making in Orchestrator

* generic_internal tools

* Small tweak

* tool use improvements

* add on

* More image gen stuff

* fixes

* Small color improvements

* Markdown utils

* fixed end conditions (incl early exit for image generation)

* remove agent search + image fixes

* Okta tool support for reload

* Some cleanup

* Stream back search tool results as they come

* tool forcing

* fixed no-Tool-Assistant

* Support anthropic tool calling

* Support anthropic models better

* More stuff

* prompt fixes and search step numbers

* Fix hook ordering issue

* internal search fix

* Improve citation look

* Small UI improvements

* Improvements

* Improve dot

* Small chat fixes

* Small UI tweaks

* Small improvements

* Remove un-used code

* Fix

* Remove test_answer.py for now

* Fix

* improvements

* Add foreign keys

* early forcing

* Fix tests

* Fix tests

---------

Co-authored-by: Joachim Rahmfeld <joachim@onyx.app>
Co-authored-by: Rei Meguro <rmeguro@umich.edu>
Co-authored-by: joachim-danswer <joachim@danswer.ai>
2025-08-26 00:26:14 -07:00
Denizhan Dakılır
e6423c4541 Handle disabled auth in connector indexing status endpoint (#5256) 2025-08-25 16:42:46 -07:00
Wenxi
cb969ad06a add require_email_verification to values.yaml (#5249) 2025-08-25 22:02:49 +00:00
Sam Waddell
c4076d16b6 fix: update all log paths to reflect change related to non-root user (#5244) 2025-08-25 14:11:18 -07:00
Evan Lohn
04a607a718 ensure multi-tenant contextvar is passed (#5240) 2025-08-25 13:35:50 -07:00
Evan Lohn
c1e1aa9dfd fix: downloads are never larger than 20mb (#5247)
* fix: downloads are never larger than 20mb

* JT comments

* import to fix integration tests
2025-08-25 18:10:14 +00:00
Chris Weaver
1ed7abae6e Small improvement (#5250) 2025-08-25 08:07:36 +05:30
SubashMohan
cf4855822b Perf/indexing status page (#5142)
* indexing status optimization first draft

* refactor: update pagination logic and enhance UI for indexing status table

* add index attempt pruning job and display federated connectors in index status page

* update celery worker command to include index_attempt_cleanup queue

* refactor: enhance indexing status table and remove deprecated components

* mypy fix

* address review comments

* fix pagination reset issue

* add TODO for optimizing connector materialization and performance in future deployments

* enhance connector indexing status retrieval by adding 'get_all_connectors' option and updating pagination logic

* refactor: transition to paginated connector indexing status retrieval and update related components

* fix: initialize latest_index_attempt_docs_indexed to 0 in CCPairIndexingStatusTable component

* feat: add mock connector file support for indexing status retrieval and update indexing_statuses type to Sequence

* mypy fix

* refactor: rename indexing status endpoint to simplify API and update related components
2025-08-24 17:43:47 -07:00
Justin Tahara
e242b1319c fix(infra): Fixed RDS IAM Issue (#5245) 2025-08-22 18:13:12 -07:00
Justin Tahara
eba4b6620e feat(infra): AWS IAM Terraform (#5228)
* feat(infra): AWS IAM Terraform

* Fixing dependency issue

* Fixing more weird logic

* Final cleanup

* one change

* oops
2025-08-22 16:39:16 -07:00
Justin Tahara
3534515e11 feat(infra): Utilize AWS RDS IAM Auth (#5226)
* feat(infra): Utilize AWS RDS IAM Auth

* Update spacing

* Bump helm version
2025-08-21 17:35:53 -07:00
Justin Tahara
5602ff8666 fix: use only celery-shared for security context (#5236) (#5239)
* fix: use only celery-shared for security context

* fix: bump helm chart version 0.2.8

Co-authored-by: Sam Waddell <shwaddell28@gmail.com>
2025-08-21 17:25:06 -07:00
Sam Waddell
2fc70781b4 fix: use only celery-shared for security context (#5236)
* fix: use only celery-shared for security context

* fix: bump helm chart version 0.2.8
2025-08-21 14:15:07 -07:00
Justin Tahara
f76b4dec4c feat(infra): Ignoring local Terraform files (#5227)
* feat(infra): Ignoring local Terraform files

* Addressing some comments
2025-08-21 09:43:18 -07:00
Jessica Singh
a5a516fa8a refactor(model): move api-based embeddings/reranking calls out of model server (#5216)
* move api-based embeddings/reranking calls to api server out of model server, added/modified unit tests

* ran pre-commit

* fix mypy errors

* mypy and precommit

* move utils to right place and add requirements

* precommit check

* removed extra constants, changed error msg

* Update backend/onyx/utils/search_nlp_models_utils.py

Co-authored-by: greptile-apps[bot] <165735046+greptile-apps[bot]@users.noreply.github.com>

* greptile

* addressed comments

* added code enforcement to throw error

---------

Co-authored-by: Jessica Singh <jessicasingh@Mac.attlocal.net>
Co-authored-by: greptile-apps[bot] <165735046+greptile-apps[bot]@users.noreply.github.com>
2025-08-20 21:50:21 +00:00
Sam Waddell
811a198134 docs: add non-root user info (#5224) 2025-08-20 13:50:10 -07:00
Sam Waddell
5867ab1d7d feat: add non-root user to backend and model-server images (#5134)
* feat: add non-root user to backend and model-server image

* feat: update values to support security context for index, inference, and celery_shared

* feat: add security context support for index and inference

* feat: add celery_shared security context support to celery worker templates

* fix: cache management strategy

* fix: update deployment files for volume mount

* fix: address comments

* fix: bump helm chart version for new security context template changes

* fix: bump helm chart version for new security context template changes

* feat: move useradd earlier in build for reduced image size

---------

Co-authored-by: Phil Critchfield <phil.critchfield@liatrio.com>
2025-08-20 13:49:50 -07:00
Jose Bañez
dd6653eb1f fix(connector): #5178 Add error handling and logging for empty answer text in Loopio Connector (#5179)
* fix(connector): #5178 Add error handling and logging for empty answer text in LoopioConnector

* fix(connector): onyx-dot-app#5178:  Improve handling of empty answer text in LoopioConnector

---------

Co-authored-by: Jose Bañez <jose@4gclinical.com>
2025-08-20 09:14:08 -07:00
Richard Guan
db457ef432 fix(admin): [DAN-2202] Remove users from invited users after accept (#5214)
* .

* .

* .

* .

* .

* .

* .

---------

Co-authored-by: Richard Guan <richardguan@Richards-MacBook-Pro.local>
Co-authored-by: Richard Guan <richardguan@Mac.attlocal.net>
2025-08-20 03:55:02 +00:00
Richard Guan
de7fe939b2 . (#5212)
Co-authored-by: Richard Guan <richardguan@Richards-MBP.lan>
2025-08-20 02:36:44 +00:00
Chris Weaver
38114d9542 fix: PDF file upload (#5218)
* Fix / improve file upload

* Address cubic comment
2025-08-19 15:16:08 -07:00
Justin Tahara
32f20f2e2e feat(infra): Add WAF implementation (#5213) (#5217)
* feat(infra): Add WAF implementation

* Addressing greptile comments

* Additional removal of unnecessary code
2025-08-19 13:01:40 -07:00
Justin Tahara
3dd27099f7 feat(infra): Add WAF implementation (#5213)
* feat(infra): Add WAF implementation

* Addressing greptile comments

* Additional removal of unnecessary code
2025-08-18 17:45:50 -07:00
Cameron
91c4d43a80 Move @types packages to devDependencies (#5210) 2025-08-18 14:34:09 -07:00
SubashMohan
a63ba1bb03 fix: sharepoint group not found error and url with apostrophe (#5208)
* fix: handle ClientRequestException in SharePoint permission utils and connector

* feat: enhance SharePoint permission utilities with logging and URL handling

* greptile typo fix

Co-authored-by: greptile-apps[bot] <165735046+greptile-apps[bot]@users.noreply.github.com>

* enhance group sync handling for public groups

---------

Co-authored-by: Wenxi <wenxi@onyx.app>
Co-authored-by: greptile-apps[bot] <165735046+greptile-apps[bot]@users.noreply.github.com>
2025-08-18 17:12:59 +00:00
Evan Lohn
7b6189e74c corrected routing (#5202) 2025-08-18 16:07:28 +00:00
Evan Lohn
ba423e5773 fix: model server concurrency (#5206)
* fix: model server race cond

* fix async

* different approach
2025-08-18 16:07:16 +00:00
SubashMohan
fe029eccae chore: add SharePoint sync environment variables to integration test (#5197)
* chore: add SharePoint sync environment variables to integration test workflows

* fix cubic comments

* test: skip SharePoint permission tests for non-enterprise

* test: update SharePoint permission tests to skip for non-enterprise environments
2025-08-18 03:21:04 +00:00
Wenxi
ea72af7698 fix sharepoint tests (#5209) 2025-08-17 22:25:47 +00:00
Wenxi
17abf85533 fix unpaused user files (#5205) 2025-08-16 01:39:16 +00:00
Wenxi
3bd162acb9 fix: sharepoint tests and indexing logic (#5204)
* don't index onedrive personal sites in sharepoint

* fix sharepoint tests and indexing behavior

* remove print
2025-08-15 18:19:42 -07:00
Evan Lohn
664ce441eb generous timeout between docfetching finishing and docprocessing starting (#5201) 2025-08-15 15:43:01 -07:00
Wenxi
6863fbee54 fix: validate sharepoint connector with validate_connector_settings (#5199)
* validate sharepoint connector with validate_connector_settings

* fix test

* fix tests
2025-08-15 00:38:31 +00:00
Justin Tahara
bb98088b80 fix(infra): Fix Helm Chart Test (#5198) 2025-08-14 23:28:17 +00:00
Justin Tahara
ce8cb1112a feat(infra): Adding new AWS Terraform Template Code (#5194)
* feat(infra): Adding new AWS Terraform Template Code

* Addressing greptile comments

* Applying some updates after the cubic reviews as well

* Adding one detail

* Removing unused var

* Addressing more cubic comments
2025-08-14 16:47:15 -07:00
Nils
a605bd4ca4 feat: make sharepoint documents and sharepoint pages optional (#5183)
* feat: make sharepoint documents and sharepoint pages optional

* fix: address review feedback for PR #5183

* fix: exclude personal sites from sharepoint connector

---------

Co-authored-by: Nils Kleinrahm <nils.kleinrahm@pledoc.de>
2025-08-14 15:17:23 -07:00
Dominic Feliton
0e8b5af619 fix(connector): user file helm start cmd + legacy file connector incompatibility (#5195)
* Fix user file helm start cmd + legacy file connector incompatibility

* typo

* remove unnecessary logic

* undo

* make recommended changes

* keep comment

* cleanup

* format

---------

Co-authored-by: Dominic Feliton <37809476+dominicfeliton@users.noreply.github.com>
2025-08-14 13:20:19 -07:00
SubashMohan
46f3af4f68 enhance file processing with content type handling (#5196) 2025-08-14 08:59:53 +00:00
Evan Lohn
2af64ebf4c fix: ensure exception strings don't get swallowed (#5192)
* ensure exception strings don't get swallowed

* just send exception code
2025-08-13 20:05:16 +00:00
Evan Lohn
0eb1824158 fix: sf connector docs (#5171)
* fix: sf connector docs

* more sf logs

* better logs and new attempt

* add fields to error temporarily

* fix sf

---------

Co-authored-by: Wenxi <wenxi@onyx.app>
2025-08-13 17:52:32 +00:00
Chris Weaver
e0a9a6fb66 feat: okta profile tool (#5184)
* Initial Okta profile tool

* Improve

* Fix

* Improve

* Improve

* Address EL comments
2025-08-13 09:57:31 -07:00
Wenxi
fe194076c2 make default personas hideable (#5190) 2025-08-13 01:12:51 +00:00
Wenxi
55dc24fd27 fix: seeded total doc count (#5188)
* fix seeded total doc count

* fix seeded total doc count
2025-08-13 00:19:06 +00:00
Evan Lohn
da02962a67 fix: thread safe approach to docprocessing logging (#5185)
* thread safe approach to docprocessing logging

* unify approaches

* reset
2025-08-12 02:25:47 +00:00
SubashMohan
9bc62cc803 feat: sharepoint perm sync (#5033)
* sharepoint perm sync first draft

* feat: Implement SharePoint permission synchronization

* mypy fix

* remove commented code

* bot comments fixes and job failure fixes

* introduce generic way to upload certificates in credentials

* mypy fix

* add checkpoiting to sharepoint connector

* add sharepoint integration tests

* Refactor SharePoint connector to derive tenant domain from verified domains and remove direct tenant domain input from credentials

* address review comments

* add permission sync to site pages

* mypy fix

* fix tests error

* fix tests and address comments

* Update file extraction behavior in SharePoint connector to continue processing on unprocessable files
2025-08-11 16:59:16 +00:00
Evan Lohn
bf6705a9a5 fix: max tokens param (#5174)
* max tokens param

* fix unit test

* fix unit test
2025-08-11 09:57:44 -07:00
Rei Meguro
df2fef3383 fix: removal of old tags + is_list differentiation (#5147)
* initial migration

* getting metadata from tags

* complete migration

* migration override for cloud

* fix: more robust structured tag gen

* tag and indexing update

* fix: move is_list to tags

* migration rebase

* test cases + bugfix on unique constraint

* fix logging
2025-08-10 22:39:33 +00:00
SubashMohan
8cec3448d7 fix: restrict user file access to current user only (#5177)
* fix: restrict user file access to current user only

* fix: enhance user file access control for recent folder
2025-08-10 19:00:18 +00:00
Justin Tahara
b81687995e fix(infra): Removing invalid helm version (#5176) 2025-08-08 18:40:55 -07:00
Justin Tahara
87c2253451 fix(infra): Update github workflow to not tag latest (#5172)
* fix(infra): Update github workflow to not tag latest

* Cleaned up the code a bit
2025-08-08 23:23:55 +00:00
Wenxi
297c2957b4 add gpt 5 display names (#5175) 2025-08-08 16:58:47 -07:00
Wenxi
bacee0d09d fix: sanitize slack payload before logging (#5167)
* sanitize slack payload before logging

* nit
2025-08-08 02:10:00 +00:00
Evan Lohn
297720c132 refactor: file processing (#5136)
* file processing refactor

* mypy

* CW comments

* address CW
2025-08-08 00:34:35 +00:00
Evan Lohn
bd4bd00cef feat: office parsing markitdown (#5115)
* switch to markitdown untested

* passing tests

* reset file

* dotenv version

* docs

* add test file

* add doc

* fix integration test
2025-08-07 23:26:02 +00:00
Chris Weaver
07c482f727 Make starter messages visible on smaller screens (#5170) 2025-08-07 16:49:18 -07:00
Wenxi
cf193dee29 feat: support gpt5 models (#5169)
* support gpt5 models

* gpt5mini visible
2025-08-07 12:35:46 -07:00
Evan Lohn
1b47fa2700 fix: remove erroneous error case and add valid error (#5163)
* fix: remove erroneous error case and add valid error

* also address docfetching-docprocessing limbo
2025-08-07 18:17:00 +00:00
Wenxi Onyx
e1a305d18a mask llm api key from logs 2025-08-07 00:01:29 -07:00
Evan Lohn
e2233d22c9 feat: salesforce custom query (#5158)
* WIP merged approach untested

* tested custom configs

* JT comments

* fix unit test

* CW comments

* fix unit test
2025-08-07 02:37:23 +00:00
Justin Tahara
20d1175312 feat(infra): Bump Vespa Helm Version (#5161)
* feat(infra): Bump Vespa Helm Version

* Adding the Chart.lock file
2025-08-06 19:06:18 -07:00
justin-tahara
7117774287 Revert that change. Let's do this properly 2025-08-06 18:54:21 -07:00
justin-tahara
77f2660bb2 feat(infra): Update Vespa Helm Chart Version 2025-08-06 18:53:02 -07:00
Wenxi
1b2f4f3b87 fix: slash command slackbot to respond in private msg (#5151)
* fix slash command slackbot to respond in private msg

* rename confusing variable. fix slash message response in DMs
2025-08-05 19:03:38 -07:00
Evan Lohn
d85b55a9d2 no more scheduled stalling (#5154) 2025-08-05 20:17:44 +00:00
Justin Tahara
e2bae5a2d9 fix(infra): Adding helm directory (#5156)
* feat(infra): Adding helm directory

* one more fix
2025-08-05 14:11:57 -07:00
Justin Tahara
cc9c76c4fb feat(infra): Release Charts on Github Pages (#5155) 2025-08-05 14:03:28 -07:00
Chris Weaver
258e08abcd feat: add customization via env vars for curator role (#5150)
* Add customization via env vars for curator role

* Simplify

* Simplify more

* Address comments
2025-08-05 09:58:36 -07:00
Evan Lohn
67047e42a7 fix: preserve error traces (#5152) 2025-08-05 09:44:55 -07:00
SubashMohan
146628e734 fix unsupported character error in minio migration (#5145)
* fix unsupported character error in minio migration

* slash fix
2025-08-04 12:42:07 -07:00
Wenxi
c1d4b08132 fix: minio file names (#5138)
* nit var clarity

* maintain file names in connector config for display

* remove unused util

* migration draft

* optional file names to not break existing instances

* backwards compatible

* backwards compatible

* migration logging

* update file ocnn tests

* unncessary none

* mypy + explanatory comments
2025-08-01 20:31:29 +00:00
Justin Tahara
f3f47d0709 feat(infra): Creating new helm chart action workflow (#5137)
* feat(infra) Creating new helm chart action workflow

* Adding the steps

* Adding in dependencies

* One more debug

* Adding a new step to install helm
2025-08-01 09:26:58 -07:00
Justin Tahara
fe26a1bfcc feat(infra): Codeowner for Helm directory (#5139) 2025-07-31 23:05:46 +00:00
Wenxi
554cd0f891 fix: accept multiple zip types and fallback to extension (#5135)
* accept multiple zip types and fallback to extension

* move zip check to util

* mypy nit
2025-07-30 22:21:16 +00:00
Raunak Bhagat
f87d3e9849 fix: Make ungrounded types have a default name when sending to the frontend (#5133)
* Update names in map-comprehension

* Make default name for ungrounded types public

* Return the default name for ungrounded entity-types

* Update backend/onyx/db/entities.py

Co-authored-by: cubic-dev-ai[bot] <191113872+cubic-dev-ai[bot]@users.noreply.github.com>

---------

Co-authored-by: cubic-dev-ai[bot] <191113872+cubic-dev-ai[bot]@users.noreply.github.com>
2025-07-30 20:46:30 +00:00
Rei Meguro
72cdada893 edit link to custom actions (#5129) 2025-07-30 15:08:39 +00:00
SubashMohan
c442ebaff6 Feature/GitHub permission sync (#4996)
* github perm sync initial draft

* introduce github  doc sync and perm sync

* remove specific start time check

* Refactor GitHub connector to use SlimCheckpointOutputWrapper for improved document handling

* Update GitHub sync frequency defaults from 30 minutes to 5 minutes

* Add stop signal handling and progress reporting in GitHub document sync

* Refactor tests for Confluence and Google Drive connectors to use a mock fetch function for document access

* change the doc_sync approach

* add static typing for ocument columns and where clause

* remove prefix logic in connector runner

* mypy fix

* code review changes

* mypy fix

* fix review comments

* add sort order

* Implement merge heads migration for Alembic and update Confluence and Google Drive test

* github unit tests fix

* delete merge head and rebase the docmetadata field migration

---------

Co-authored-by: Subash <subash@onyx.app>
2025-07-30 02:42:18 +00:00
Justin Tahara
56f16d107e feat(infra): Update helm version after new feature (#5120) 2025-07-29 16:31:35 -07:00
Justin Tahara
0157ae099a [Vespa] Update to optimized configuration pt.2 (#5113) 2025-07-28 20:42:31 +00:00
justin-tahara
565fb42457 Let's do this properly 2025-07-28 10:42:31 -07:00
justin-tahara
a50a8b4a12 [Vespa] Update to optimized configuration 2025-07-28 10:38:48 -07:00
Evan Lohn
4baf4e7d96 feat: pruning freq (#5097)
* pruning frequency increase

* add logs
2025-07-26 22:29:43 +00:00
Wenxi
8b7ab2eb66 onyx metadata minio fix + permissive unstructured fail (#5085) 2025-07-25 21:26:02 +00:00
Evan Lohn
1f75f3633e fix: sidebar ranges (#5084) 2025-07-25 19:46:47 +00:00
Evan Lohn
650884d76a fix: preserve error traces (#5083) 2025-07-25 18:56:11 +00:00
Wenxi
8722bdb414 typo (#5082) 2025-07-25 18:26:21 +00:00
Evan Lohn
71037678c3 attempt to fix parsing of tricky template files (#5080) 2025-07-25 02:18:35 +00:00
Chris Weaver
68de1015e1 feat: support aspx files (#5068)
* Support aspx files

* Add fetching of site pages

* Improve

* Small enhancement

* more improvements

* Improvements

* Fix tests
2025-07-24 19:19:24 -07:00
Evan Lohn
e2b3a6e144 fix: drive external links (#5079) 2025-07-24 17:42:12 -07:00
Evan Lohn
4f04b09efa add library to fall back to for tokenizing (#5078) 2025-07-24 11:15:07 -07:00
SubashMohan
5c4f44d258 fix: sharepoint lg files issue (#5065)
* add SharePoint file size threshold check

* Implement retry logic for SharePoint queries to handle rate limiting and server error

* mypy fix

* add content none check

* remove unreachable code from retry logic in sharepoint connector
2025-07-24 14:26:01 +00:00
Evan Lohn
19652ad60e attempt fix for broken excel files (#5071) 2025-07-24 01:21:13 +00:00
Evan Lohn
70c96b6ab3 fix: remove locks from indexing callback (#5070) 2025-07-23 23:05:35 +00:00
Raunak Bhagat
65076b916f refactor: Update location of sidebar (#5067)
* Use props instead of inline type def

* Add new AppProvider

* Remove unused component file

* Move `sessionSidebar` to be inside of `components` instead of `app/chat`

* Change name of `sessionSidebar` to `sidebar`

* Remove `AppModeProvider`

* Fix bug in how the cookies were set
2025-07-23 21:59:34 +00:00
PaulHLiatrio
06bc0e51db fix: adjust template variable from .Chart.AppVersion to .Values.global.version to match versioning pattern. (#5069) 2025-07-23 14:54:32 -07:00
Devin
508b456b40 fix: explicit api_server dependency on minio in docker compose files (#5066) 2025-07-23 13:37:42 -07:00
Evan Lohn
bf1e2a2661 feat: avoid full rerun (#5063)
* fix: remove extra group sync

* second extra task

* minor improvement for non-checkpointed connectors
2025-07-23 18:01:23 +00:00
Evan Lohn
991d5e4203 fix: regen api key (#5064) 2025-07-23 03:36:51 +00:00
Evan Lohn
d21f012b04 fix: remove extra group sync (#5061)
* fix: remove extra group sync

* second extra task
2025-07-22 23:24:42 +00:00
Wenxi
86b7beab01 fix: too many internet chunks (#5060)
* minor internet search env vars

* add limit to internet search chunks

* note

* nits
2025-07-22 23:11:10 +00:00
Evan Lohn
b4eaa81d8b handle empty doc batches (#5058) 2025-07-22 22:35:59 +00:00
Evan Lohn
ff2a4c8723 fix: time discrepancy (#5056)
* fix time discrepancy

* remove log

* remove log
2025-07-22 22:19:02 +00:00
Raunak Bhagat
51027fd259 fix: Make pr-labeler run on edits too 2025-07-22 15:04:37 -07:00
Raunak Bhagat
7e3fd2b12a refactor: Update the error message that is logged when PR title fails Conventional Commits regex (#5062) 2025-07-22 14:46:22 -07:00
Chris Weaver
d2fef6f0b7 Tiny launch.json template improvement (#5055) 2025-07-22 11:15:44 -07:00
Evan Lohn
bd06147d26 feat: connector indexing decoupling (#4893)
* WIP

* renamed and moved tasks (WIP)

* minio migration

* bug fixes and finally add document batch storage

* WIP: can suceed but status is error

* WIP

* import fixes

* working v1 of decoupled

* catastrophe handling

* refactor

* remove unused db session in prep for new approach

* renaming and docstrings (untested)

* renames

* WIP with no more indexing fences

* robustness improvements

* clean up rebase

* migration and salesforce rate limits

* minor tweaks

* test fix

* connector pausing behavior

* correct checkpoint resumption logic

* cleanups in docfetching

* add heartbeat file

* update template jsonc

* deployment fixes

* fix vespa httpx pool

* error handling

* cosmetic fixes

* dumb

* logging improvements and non checkpointed connector fixes

* didnt save

* misc fixes

* fix import

* fix deletion of old files

* add in attempt prefix

* fix attempt prefix

* tiny log improvement

* minor changes

* fixed resumption behavior

* passing int tests

* fix unit test

* fixed unit tests

* trying timeout bump to see if int tests pass

* trying timeout bump to see if int tests pass

* fix autodiscovery

* helm chart fixes

* helm and logging
2025-07-22 03:33:25 +00:00
Raunak Bhagat
1f3cc9ed6e Make from_.user optional (use "Unknown User") if not found (#5051) 2025-07-21 17:50:28 -07:00
Raunak Bhagat
6086d9e51a feat: Updated KG admin page (#5044)
* Update KG admin UI

* Styling changes

* More changes

* Make edits auto-save

* Add more stylings / transitions

* Fix opacity

* Separate out modal into new component

* Revert backend changes

* Update styling

* Add convenience / styling changes to date-picker

* More styling / functional updates to kg admin-page

* Avoid reducing opacity of active-toggle

* Update backend APIs for new KG admin page

* More updates of styling for kg-admin page

* Remove nullability

* Remove console log

* Remove unused imports

* Change type of `children` variable

* Update web/src/app/admin/kg/interfaces.ts

Co-authored-by: cubic-dev-ai[bot] <191113872+cubic-dev-ai[bot]@users.noreply.github.com>

* Update web/src/components/CollapsibleCard.tsx

Co-authored-by: cubic-dev-ai[bot] <191113872+cubic-dev-ai[bot]@users.noreply.github.com>

* Remove null

* Update web/src/components/CollapsibleCard.tsx

Co-authored-by: greptile-apps[bot] <165735046+greptile-apps[bot]@users.noreply.github.com>

* Force non-null

* Fix failing test

---------

Co-authored-by: cubic-dev-ai[bot] <191113872+cubic-dev-ai[bot]@users.noreply.github.com>
Co-authored-by: greptile-apps[bot] <165735046+greptile-apps[bot]@users.noreply.github.com>
2025-07-21 15:37:27 -07:00
Raunak Bhagat
e0de24f64e Remove empty tooltip (#5050) 2025-07-21 12:45:48 -07:00
Rei Meguro
08b6b1f8b3 feat: Search and Answer Quality Test Script (#4974)
* aefads

* search quality tests improvement

Co-authored-by: wenxi-onyx <wenxi@onyx.app>

* nits

* refactor: config refactor

* document context + skip genai fix

* feat: answer eval

* more error messages

* mypy ragas

* mypy

* small fixes

* feat: more metrics

* fix

* feat: grab content

* typing

* feat: lazy updates

* mypy

* all at front

* feat: answer correctness

* use api key so it works with auth enabled

* update readme

* feat: auto add path

* feat: rate limit

* fix: readme + remove rerank all

* fix: raise exception immediately

* docs: improved clarity

* feat: federated handling

* fix: mypy

* nits

---------

Co-authored-by: wenxi-onyx <wenxi@onyx.app>
2025-07-19 01:51:51 +00:00
joachim-danswer
afed1a4b37 feat: KG improvements (#5048)
* improvements

* drop views if SQL fails

* mypy fix
2025-07-18 16:15:11 -07:00
Chris Weaver
bca18cacdf fix: improve assistant fetching efficiency (#5047)
* Improve assistant fetching efficiency

* More fix

* Fix weird build stuff

* Improve
2025-07-18 14:16:10 -07:00
Chris Weaver
335db91803 fix: improve check for indexing status (#5042)
* Improve check_for_indexing + check_for_vespa_sync_task

* Remove unused

* Fix

* Simplify query

* Add more logging

* Address bot comments

* Increase # of tasks generated since we're not going cc-pair by cc-pair

* Only index 50 user files at a time
2025-07-17 23:52:51 -07:00
Chris Weaver
67c488ff1f Improve support for non-default postgres schemas (#5046) 2025-07-17 23:51:39 -07:00
Wenxi
deb7f13962 remove chat session necessity from send message simple api (#5040) 2025-07-17 23:23:46 +00:00
Raunak Bhagat
e2d3d65c60 fix: Move around group-sync tests (since they require docker services to be running) (#5041)
* Move around tests

* Add missing fixtures + change directory structure up some more

* Add env variables
2025-07-17 22:41:31 +00:00
Raunak Bhagat
b78a6834f5 fix: Have document show up before message starts streaming back (#5006)
* Have document show up before message starts streaming back

* Add docs
2025-07-17 10:17:57 -07:00
Raunak Bhagat
4abe90aa2c fix: Fix Confluence pagination (#5035)
* Re-implement pagination

* Add note

* Fix invalid integration test configs

* Fix other failing test

* Edit failing test

* Revert test

* Revert pagination size

* Add comment on yielding style

* Use fixture instead of manually initializing sql-engine

* Fix failing tests

* Move code back and copy-paste
2025-07-17 14:02:29 +00:00
Raunak Bhagat
de9568844b Add PR labeller job (#4611) 2025-07-16 18:28:18 -07:00
Evan Lohn
34268f9806 fix bug in index swap (#5036) 2025-07-16 23:09:17 +00:00
Chris Weaver
ed75678837 Add suggested helm resource limits (#5032)
* Add resource suggestions for helm

* Adjust README

* fix

* fix lint
2025-07-15 15:52:16 -07:00
Chris Weaver
3bb58a3dd3 Persona simplification r2 (#5031)
* Revert "Revert "Reduce amount of stuff we fetch on `/persona` (#4988)" (#5024)"

This reverts commit f7ed7cd3cd.

* Enhancements / fix re-render

* re-arrange

* greptile
2025-07-15 14:51:40 -07:00
Chris Weaver
4b02feef31 Add option to disable my documents (#5020)
* Add option to disable my documents

* cleanup
2025-07-14 23:16:14 -07:00
Chris Weaver
6a4d49f02e More pruning logging (#5027) 2025-07-14 12:55:12 -07:00
Chris Weaver
d1736187d3 Fix full tenant sync (#5026) 2025-07-14 10:56:40 -07:00
Wenxi
0e79b96091 Feature/revised internet search (#4994)
* remove unused pruning config

* add env vars

* internet search date time toggle

* revised internet search supporting multiple providers

* env var

* simplify retries and fix mypy issues

* greptile nits

* more mypy

* please mypy

* mypy final straw

* cursor vs. mypy

* simplify fields from provider results

* type-safe prompt, enum nit, provider enums, indexingdoc processing change

---------

Co-authored-by: Wenxi Onyx <wenxi-onyx@Wenxis-MacBook-Pro.local>
2025-07-14 10:24:03 -07:00
Raunak Bhagat
ae302d473d Fix imap tests 2025-07-14 09:50:33 -07:00
Raunak Bhagat
feca4fda78 feat: Add frontend for email connector (#5008)
* Add basic structure for frontend email connector

* Update names of credentials-json keys

* Fix up configurations workflow

* Edit logic on how `mail_client` is used

- imaplib.IMAP4_SSL is supposed to be treated as an ephemeral object

* Edit helper name and add docs

* Fix invalid mailbox selection error

* Implement greptile suggestions

* Make recipients optional and add sender to primary-owners

* Add sender to external-access too; perform dedupe-ing of emails

* Simplify logic
2025-07-14 09:43:36 -07:00
Chris Weaver
f7ed7cd3cd Revert "Reduce amount of stuff we fetch on /persona (#4988)" (#5024)
This reverts commit adf48de652.
2025-07-14 09:20:50 -07:00
Chris Weaver
8377ab3ef2 Send over less data for document sets (#5018)
* Send over less data for document sets

* Fix type errors

* Fix tests

* Fixes

* Don't change packages
2025-07-13 22:47:05 +00:00
Chris Weaver
95c23bf870 Add full sync endpoint (#5019)
* Add full sync endpoint

* Update backend/ee/onyx/server/tenants/billing_api.py

Co-authored-by: greptile-apps[bot] <165735046+greptile-apps[bot]@users.noreply.github.com>

* Update backend/ee/onyx/server/tenants/billing_api.py

Co-authored-by: greptile-apps[bot] <165735046+greptile-apps[bot]@users.noreply.github.com>

* fix

---------

Co-authored-by: greptile-apps[bot] <165735046+greptile-apps[bot]@users.noreply.github.com>
2025-07-13 13:59:19 -07:00
Chris Weaver
e49fb8f56d Fix pruning (#5017)
* Use better last_pruned time for never pruned connectors

* improved pruning req / refresh freq selections

* Small tweak

* Update web/src/lib/connectors/connectors.tsx

Co-authored-by: greptile-apps[bot] <165735046+greptile-apps[bot]@users.noreply.github.com>

---------

Co-authored-by: greptile-apps[bot] <165735046+greptile-apps[bot]@users.noreply.github.com>
2025-07-12 14:46:00 -07:00
Chris Weaver
adf48de652 Reduce amount of stuff we fetch on /persona (#4988)
* claude stuff

* Send over less Assistant data

* more

* Fix build

* Fix mypy

* fix

* small tweak

* Address EL cmments

* fix

* Fix build
2025-07-12 14:15:31 -07:00
Wenxi Onyx
bca2500438 personas no longer overwrite when same name 2025-07-12 10:57:01 -07:00
Raunak Bhagat
89f925662f feat: Add ability to specify vertex-ai model location (#4955)
* Make constant a global

* Add ability to specify vertex location

* Add period

* Add a hardcoding path to the frontend

* Add docs

* Add default value to `CustomConfigKey`

* Consume default value from custom-config-key on frontend

* Use markdown renderer instead

* Update description
2025-07-11 16:16:12 -07:00
Chris Weaver
b64c6d5d40 Skip federated connectors when document sets are specified (#5015) 2025-07-11 15:49:13 -07:00
Raunak Bhagat
36c63950a6 fix: More small IMAP backend fixes (#5014)
* Make recipients an optional header and add IMAP to recognized connectors

* Add sender to external-access; perform dedupe-ing of emails
2025-07-11 20:06:28 +00:00
Raunak Bhagat
3f31340e6f feat: Add support for Confluence Macros (#5001)
* Remove macro stylings from HTML tree

* Add params

* Handle multiple cases of `ac:structured-macro` being found.

Co-authored-by: greptile-apps[bot] <165735046+greptile-apps[bot]@users.noreply.github.com>

---------

Co-authored-by: greptile-apps[bot] <165735046+greptile-apps[bot]@users.noreply.github.com>
2025-07-11 00:34:49 +00:00
Raunak Bhagat
6ac2258c2e Fixes for imap backend (#5011) 2025-07-10 23:58:28 +00:00
Weves
b4d3b43e8a Add more error handling for drive group sync 2025-07-10 18:33:43 -07:00
Rei Meguro
ca281b71e3 add missing slack scope 2025-07-10 17:37:57 -07:00
Wenxi
9bd5a1de7a check file size first and clarify processing logic (#4985)
* check file size first and clarify processing logic

* basic gdrive extraction clariy

* typo

---------

Co-authored-by: Wenxi Onyx <wenxi-onyx@Wenxis-MacBook-Pro.local>
2025-07-10 00:48:36 +00:00
Wenxi Onyx
d3c5a4fba0 add docx fallback 2025-07-09 17:46:15 -07:00
Chris Weaver
f50006ee63 Stop fetching channel info to make pages load faster (#5005) 2025-07-09 16:45:45 -07:00
Evan Lohn
e0092024af add minIO to README 2025-07-09 16:36:18 -07:00
Evan Lohn
675ef524b0 add minio to contributing instructions 2025-07-09 16:33:56 -07:00
Evan Lohn
240367c775 skip id migration based on env var 2025-07-09 13:37:54 -07:00
Chris Weaver
f0ed063860 Allow curators to create public connectors / document sets (#4972)
* Allow curators to create public connectors / document sets

* Address EL comments
2025-07-09 11:38:56 -07:00
Rei Meguro
bcf0ef0c87 feat: original query + better slack expansion 2025-07-09 09:23:03 -07:00
Rei Meguro
0c7a245a46 Revert "feat: original query + better slack expansion"
This reverts commit 583d82433a.
2025-07-09 20:15:15 +09:00
Rei Meguro
583d82433a feat: original query + better slack expansion 2025-07-09 20:11:24 +09:00
Chris Weaver
391e710b6e Slack federated search ux (#4969)
* slack_search.py

* rem

* fix: get elements

* feat: better stack message processing

* fix: mypy

* fix: url parsing

* refactor: federated search

* feat: proper chunking + source filters

* highlighting + source check

* feat: forced section insertion

* feat: multi slack api queries

* slack query expansion

* feat: max slack queries env

* limit slack search to avoid overloading the search

* Initial draft

* more

* simpify

* Improve modal

* Fix oauth flow

* Fully working versino

* More nicities

* Improved cascade delete

* document set for fed connector UI

* Fix source filters + improve document set selection

* Improve callback modal

* fix: logging error + showing connectors in admin page user settings

* better log

* Fix mypy

* small rei comment

* Fix pydantic

* Improvements to modals

* feat: distributed pruning

* random fix

* greptile

* Encrypt token

* respect source filters + revert llm pruning ordering

* greptile nit

* feat: thread as context in slack search

* feat: slack doc ordering

* small improvements

* rebase

* Small improvements

* Fix web build

* try fix build

* Move to seaprate model file

* Use default_factory

* remove unused model

---------

Co-authored-by: Rei Meguro <36625832+Orbital-Web@users.noreply.github.com>
2025-07-08 21:35:51 -07:00
Raunak Bhagat
004e56a91b feat: IMAP connector (#4987)
* Implement fetching; still need to work on document parsing

* Add basic skeleton of parsing email bodies

* Add id field

* Add email body parsing

* Implement checkpointed imap-connector

* Add testing logic for basic iteration

* Add logic to get different header if "to" isn't present

- possible in mailing-list workflows

* Add ability to index specific mailboxes

* Add breaking when indexing has been fully exhausted

* Sanitize all mailbox names + add space between stripped strings after parsing

* Add multi-recipient parsing

* Change around semantic-identifier and title

* Add imap tests

* Add recipients and content assertions to tests

* Add envvars to github actions workflow file

* Remove encoding header

* Update logic to not immediately establish connection upon init of `ImapConnector`

* Add start and end datetime filtering + edit when connection is established / how login is done

* Remove content-type header

* Add note about guards

* Change default parameters to be `None` instead of `[]`

* Address comment on PR

* Implement more PR suggestions

* More PR suggestions

* Implement more PR suggestions

* Change up login/logout flow (PR suggestion)

* Move port number to be envvar

* Make globals variants in enum instead (PR suggestion)

* Fix more documentation related suggestions on PR

* Have the imap connector implement `CheckpointedConnectorWithPermSync` instead

* Add helper for loading all docs with permission syncing
2025-07-08 23:58:22 +00:00
Evan Lohn
103300798f Bugfix/drive doc ids3 (#4998)
* fix migration

* fix migration2

* cursor based pages

* correct vespa URL

* fix visit api index name

* use correct endpoint and query
2025-07-07 18:23:00 +00:00
Evan Lohn
8349d6f0ea Bugfix/drive doc ids (#4990)
* fixed id extraction in drive connector

* WIP migration

* full migration script

* migration works single tenant without duplicates

* tested single tenant with duplicate docs

* migrations and frontend

* tested mutlitenant

* fix connector tests

* make tests pass
2025-07-06 01:59:12 +00:00
Emerson Gomes
cd63bf6da9 Re-adding .epub file support (#4989)
.epub files apparently were forgotten and were not allowed for upload in the frontend.
2025-07-05 07:48:55 -07:00
Rei Meguro
5f03e85195 fireflies metadata update (#4993)
* fireflies metadata

* str
2025-07-04 18:39:41 -07:00
Raunak Bhagat
cbdbfcab5e fix: Fix bug with incorrect model icon being shown (#4986)
* Fix bug with incorrect model icon being shown

* Update web/src/app/chat/input/LLMPopover.tsx

Co-authored-by: greptile-apps[bot] <165735046+greptile-apps[bot]@users.noreply.github.com>

* Update web/src/app/chat/input/LLMPopover.tsx

Co-authored-by: greptile-apps[bot] <165735046+greptile-apps[bot]@users.noreply.github.com>

* Update web/src/app/chat/input/LLMPopover.tsx

Co-authored-by: greptile-apps[bot] <165735046+greptile-apps[bot]@users.noreply.github.com>

* Update web/src/app/chat/input/LLMPopover.tsx

Co-authored-by: greptile-apps[bot] <165735046+greptile-apps[bot]@users.noreply.github.com>

* Add visibility to filtering

* Update the model names which are shown in the popup

* Fix incorrect llm updating bug

* Fix bug in which the provider name would be used instead

---------

Co-authored-by: greptile-apps[bot] <165735046+greptile-apps[bot]@users.noreply.github.com>
2025-07-04 01:39:50 +00:00
SubashMohan
6918611287 remove check for folder assistant before uploading (#4975)
Co-authored-by: Subash <subash@onyx.app>
2025-07-03 09:25:03 -07:00
Chris Weaver
b0639add8f Fix migration (#4982) 2025-07-03 09:18:57 -07:00
Evan Lohn
7af10308d7 drive service account shared fixes (#4977)
* drive service account shared fixes

* oops

* ily greptile

* scrollable index attempt errors

* tentatively correct index errors page, needs testing

* mypy

* black

* better bounds in practice

* remove random failures

* remove console log

* CW
2025-07-02 16:56:32 -07:00
Rei Meguro
5e14f23507 mypy fix 2025-07-01 23:00:33 -07:00
Raunak Bhagat
0bf3a5c609 Add type ignore for dynamic sqlalchemy class (#4979) 2025-07-01 18:14:35 -07:00
Emerson Gomes
82724826ce Remove hardcoded image extraction flag for PDFs
PDFs currently always have their images extracted.
This will make use of the "Enable Image Extraction and Analysis" workspace configuration instead.
2025-07-01 13:57:36 -07:00
Wenxi
f9e061926a account for category prefix added by user (#4976)
Co-authored-by: Wenxi Onyx <wenxi-onyx@Wenxis-MacBook-Pro.local>
2025-07-01 10:39:46 -07:00
Chris Weaver
8afd07ff7a Small gdrive perm sync enhancement (#4973)
* Small gdrive perm sync enhancement

* Small enhancement
2025-07-01 09:33:45 -07:00
Evan Lohn
6523a38255 search speedup (#4971) 2025-07-01 01:41:27 +00:00
Yuhong Sun
264878a1c9 Onyx Metadata Header for File Connector (#4968) 2025-06-29 16:09:06 -07:00
Weves
e480946f8a Reduce frequency of heavy checks on primary for cloud 2025-06-28 17:56:34 -07:00
Evan Lohn
be25b1efbd perm sync validation framework (#4958)
* perm synce validation framework

* frontend fixes

* validate perm sync when getting runner

* attempt to fix integration tests

* added new file

* oops

* skipping salesforce test due to creds

* add todo
2025-06-28 19:57:54 +00:00
Chris Weaver
204493439b Move onyx_list_tenants.py to make sure it's in the image (#4966)
* Move onyx_list_tenants.py to make sure it's in the image

* Improve
2025-06-28 13:18:14 -07:00
Weves
106c685afb Remove CONCURRENTLY from migrations 2025-06-28 11:59:59 -07:00
Raunak Bhagat
809122fec3 fix: Fix bug in which emails would be fetched during initial indexing (#4959)
* Add new convenience method

* Fix bug in which emails would be fetched for initial indexing

* Improve tests for MS Teams connector

* Fix test_gdrive_perm_sync_with_real_data patching

* Protect against incorrect truthiness

---------

Co-authored-by: Weves <chrisweaver101@gmail.com>
2025-06-27 22:05:50 -07:00
Chris Weaver
c8741d8e9c Improve mt migration process (#4960)
* Improve MT migration process

* improve MT migrations

* Improve parallel migration

* Add additional options to env.py

* Improve script

* Remove script

* Simplify

* Address greptile comment

* Fix st migration

* fix run_alembic_migrations
2025-06-27 17:31:22 -07:00
Weves
885f01e6a7 Fix test_gdrive_perm_sync_with_real_data patching 2025-06-27 16:34:37 -07:00
Rei Meguro
3180a13cf1 source fix (#4956) 2025-06-27 13:20:42 -07:00
Rei Meguro
630ac31355 KG vespa error handling + separating relationship transfer & vespa updates (#4954)
* feat: move vespa at end in try block

* simplify query

* mypy

* added order by just in case for consistent pagination

* liveness probe

* kg_p check for both extraction and clustering

* fix: better vespa logging
2025-06-26 22:05:57 -07:00
Chris Weaver
80de62f47d Improve drive group sync (#4952)
* Improve drive group sync

* Improve group syncing approach

* Fix github action

* Improve tests

* address greptile
2025-06-26 20:14:35 -07:00
Raunak Bhagat
c75d42aa99 perf: Improve performance of MS Teams permission-syncing logic (#4953)
* Add function stubs for Teams

* Implement more boilerplate code

* Change structure of helper functions

* Implement teams perms for the initial index

* Make private functions start with underscore

* Implement slim_doc retrieval and fix up doc_sync

* Simplify how doc-sync is done

* Refactor jira doc-sync

* Make locally used function start with an underscore

* Update backend/ee/onyx/configs/app_configs.py

Co-authored-by: greptile-apps[bot] <165735046+greptile-apps[bot]@users.noreply.github.com>

* Add docstring to helper function

* Update tests

* Add an expected failure

* Address comment on PR

* Skip expert-info if user does not have a display-name

* Add doc comments

* Fix error in generic_doc_sync

* Move callback invocation to earlier in the loop

* Update tests to include proper list of user emails

* Update logic to grab user emails as well

* Only fetch expert-info if channel is not public

* Pull expert-info creation outside of loop

* Remove unnecessary call to `iter`

* Switch from `dataclass` to `BaseModel`

* Simplify boolean logic

* Simplify logic for determining if channel is public

* Remove unnecessary channel membership-type

* Add log-warns

* Only perform another API fetch if email is not present

* Address comments on PR

* Add message on assertion failure

* Address typo

* Make exception message more descriptive

---------

Co-authored-by: greptile-apps[bot] <165735046+greptile-apps[bot]@users.noreply.github.com>
2025-06-27 01:41:01 +00:00
Raunak Bhagat
e1766bca55 feat: MS Teams permission syncing (#4934)
* Add function stubs for Teams

* Implement more boilerplate code

* Change structure of helper functions

* Implement teams perms for the initial index

* Make private functions start with underscore

* Implement slim_doc retrieval and fix up doc_sync

* Simplify how doc-sync is done

* Refactor jira doc-sync

* Make locally used function start with an underscore

* Update backend/ee/onyx/configs/app_configs.py

Co-authored-by: greptile-apps[bot] <165735046+greptile-apps[bot]@users.noreply.github.com>

* Add docstring to helper function

* Update tests

* Add an expected failure

* Address comment on PR

* Skip expert-info if user does not have a display-name

* Add doc comments

* Fix error in generic_doc_sync

* Move callback invocation to earlier in the loop

* Update tests to include proper list of user emails

* Update logic to grab user emails as well

* Only fetch expert-info if channel is not public

* Pull expert-info creation outside of loop

* Remove unnecessary call to `iter`

* Switch from `dataclass` to `BaseModel`

* Simplify boolean logic

* Simplify logic for determining if channel is public

* Remove unnecessary channel membership-type

* Add log-warns

---------

Co-authored-by: greptile-apps[bot] <165735046+greptile-apps[bot]@users.noreply.github.com>
2025-06-26 22:36:09 +00:00
Rei Meguro
211102f5f0 kg cleanup + reintroducing deep extraction & classification (#4949)
* kg cleanup

* more cleanup

* fix: copy over _get_classification_content_from_call_chunks for content formatting

* added back deep extraction logic

* feat: making deep extraction and clustering work

* nit
2025-06-26 14:46:50 -07:00
Weves
c46cc4666f Fix query history 2 2025-06-25 21:35:53 -07:00
joachim-danswer
0b2536b82b expand definition of public 2025-06-25 20:01:09 -07:00
Rei Meguro
600a86f11d Add creator to linear (#4948)
* add creator to linear

* fix: mypy
2025-06-25 18:19:36 -07:00
Rei Meguro
4d97a03935 KG Attribute Overhaul + Processing Tests (#4933)
* feat: extract email

* title

* feat: new type definition

* working

* test and bugfix

* fix: set docid

* fix: mypy

* feat: show implied entities too

* fix import + migration

* fix: added random delay for vespa

* fix: mypy

* mypy again...

* fix: nit

* fix: mypy

* SOLUTION!

* fix

* cleanup

* fix: transfer

* nit

---------

Co-authored-by: joachim-danswer <joachim@danswer.ai>
2025-06-25 05:06:12 +00:00
Raunak Bhagat
5d7169f244 Implement JIRA permission syncing (#4899) 2025-06-24 23:59:26 +00:00
Wenxi
df9329009c curator bug fixes (#4941)
* curator bug fixes

* basic users default to my files

* fix admin param + move delete button

* fix trashcan admin only

---------

Co-authored-by: Wenxi Onyx <wenxi-onyx@Wenxis-MacBook-Pro.local>
2025-06-24 21:33:47 +00:00
Arun Philip
e74a0398dc Update Docker Compose restart policy to unless-stopped
Changed the restart policy to unless-stopped to ensure containers
automatically restart after failures or reboots but allow manual stop
without immediate restart.

This is preferable over always because it prevents containers from
restarting automatically after a manual stop, enabling controlled
shutdowns and maintenance without unintended restarts.
2025-06-24 13:27:50 -07:00
SubashMohan
94c5822cb7 Add MinIO configuration to env template and update restart script for MinIO container (#4944)
Co-authored-by: Subash <subash@onyx.app>
2025-06-24 17:21:16 +00:00
joachim-danswer
dedac55098 KG extraction without vespa queries (#4940)
* no vespa in extraction

* prompt/flow improvements

* EL comments

* nit

* Updated get_session_with_current_tenant import

---------

Co-authored-by: Rei Meguro <36625832+Orbital-Web@users.noreply.github.com>
2025-06-24 15:02:50 +00:00
Chris Weaver
2bbab5cefe Handle very long file names (#4939)
* Handle very long file names

* Add logging

* Enhancements

* EL comments
2025-06-23 19:22:02 -07:00
joachim-danswer
4bef718fad fix kg db proxy (#4942) 2025-06-23 18:27:59 -07:00
Chris Weaver
e7376e9dc2 Add support for db proxy (#4932)
* Split up engine file

* Switch to schema_translate_map

* Fix mass serach/replace

* Remove unused

* Fix mypy

* Fix

* Add back __init__.py

* kg fix for new session management

Adding "<tenant_id>" in front of all views.

* additional kg fix

* better handling

* improve naming

---------

Co-authored-by: joachim-danswer <joachim@danswer.ai>
2025-06-23 17:19:07 -07:00
Raunak Bhagat
8d5136fe8b Fix error in which curator sidebars were hitting kg-exposed endpoint 2025-06-23 17:07:11 -07:00
joachim-danswer
3272050975 docker dev and prod template (#4936)
* docker dev and prod template

* more dev files
2025-06-23 21:43:42 +00:00
Weves
1960714042 Fix query history 2025-06-23 14:32:14 -07:00
Weves
5bddb2632e Fix parallel tool calls 2025-06-23 09:50:44 -07:00
Raunak Bhagat
5cd055dab8 Add minor type-checking fixes (#4916) 2025-06-23 13:34:40 +00:00
Raunak Bhagat
fa32b7f21e Update ruff and remove ruff-formating from pr checks (#4914) 2025-06-23 05:34:34 -07:00
Rei Meguro
37f7227000 fix: too many vespa request fix (#4931) 2025-06-22 14:31:42 -07:00
Chris Weaver
c1f9a9d122 Hubspot connector enhancements (#4927)
* Enhance hubspot connector

* Add companies, deals, and tickets

* improve typing

* Add HUBSPOT_ACCESS_TOKEN to connector tests

* Fix prettier

* Fix mypy

* Address JR comments
2025-06-22 13:54:04 -07:00
Rei Meguro
045b7cc7e2 feat: comma separated citations (#4923)
* feat: comam separated citations

* nit

* fix

* fix: comment
2025-06-21 22:51:32 +00:00
joachim-danswer
970e07a93b Forcing vespa language 2025-06-21 16:12:13 -07:00
joachim-danswer
d463a3f213 KG Updates (#4925)
* updates

 - no classification if deep extraction is False
 - separate names for views in LLM generation
 - better prompts
 - any relationship type provided to LLM that relates to identified entities

* CW feedback/comment update
2025-06-21 20:16:39 +00:00
Wenxi
4ba44c5e48 Fix no subject gmail docs (#4922)
Co-authored-by: Wenxi Onyx <wenxi-onyx@Wenxis-MacBook-Pro.local>
2025-06-20 23:22:49 +00:00
Chris Weaver
6f8176092e S3 like file store (#4897)
* Move to an S3-like file store

* Add non-mocked test

* Add S3 tests

* Improve migration / add auto-running tests

* Refactor

* Fix mypy

* Small fixes

* Improve migration to handle downgrades

* fix file store tests

* Fix file store tests again

* Fix file store tests again

* Fix mypy

* Fix default values

* Add MinIO to other compose files

* Working helm w/ minio

* Fix test

* Address greptile comments

* Harden migration

* Fix README

* Fix it

* Address more greptile comments

* Fix it

* Rebase

* Handle multi-tenant case

* Fix mypy

* Fix test

* fix test

* Improve migration

* Fix test
2025-06-20 14:22:05 -07:00
Wenxi
198ec417ba fix gemini model names + add vertex claude sonnet 4 (#4920)
* fix gemini model names + add vertex claude sonnet 4

* few more models

---------

Co-authored-by: Wenxi Onyx <wenxi-onyx@Wenxis-MacBook-Pro.local>
2025-06-20 18:18:36 +00:00
Wenxi
fbdf7798cf GCS metadata processing (#4879)
* GCS metadata processing

* Unprocessable files should still be indexed to be searched by title

* Moved re-used logic to utils. Combined file metadata PR with GCS metadata changes

* Added OnyxMetadata type, adjusted timestamp naming consistency, clarified timestamp logic

* Use BaseModel

---------

Co-authored-by: Wenxi Onyx <wenxi-onyx@Wenxis-MacBook-Pro.local>
2025-06-20 16:11:38 +00:00
Weves
7bd9c856aa Really add psql to api-server 2025-06-19 18:50:17 -07:00
Rei Meguro
948c719d73 fix (#4915) 2025-06-19 23:06:34 +00:00
Weves
42572479cb Don't load prompts if not necessary 2025-06-19 16:56:33 -07:00
Suvodhoy Sinha
accd363d3f fix(discourse-connector): handle redirect issue with categoryId rewriting page number (#4780) 2025-06-19 16:21:09 -07:00
Rei Meguro
8cf754a8b6 Kg config refactor (#4902)
* refactor: kg_config

* feat: reworked migrations

* nit

* fix: test

* rebase alembic migration

* feat: bypass cache

* fix: mypy

* fix: processing when kg disabled

* feat: celery rework

* fix: grammar

* fix: only do kg commands for KG Beta

* fix: keep config on downgrade

* fix: nit
2025-06-19 20:25:56 +00:00
Raunak Bhagat
bf79220ac0 build: Remove ruff (#4912)
* Update ruff version

* Update format command

* Update pyproject.toml

* Remove line-length

* Remove ruff in general
2025-06-18 19:12:59 -07:00
Weves
4c9dc14e65 ADd slackbot to helm 2025-06-18 11:04:25 -07:00
Weves
f8621f7ea9 Add psql to backend containers 2025-06-17 21:15:40 -07:00
trial-danswer
e0e08427b9 Feature/connector creation feedback (#4644)
* Loading on connector creation

* Dangling connectors cleaned up. Fixed loading modal.

* Dangling connector deletion happens immediately at timeout. Swapped loading modal to spinner for consistency

* Removed redundant delete func
2025-06-17 18:32:37 -07:00
Evan Lohn
169df994da tiny connector logging tweaks (#4908) 2025-06-17 22:13:40 +00:00
Evan Lohn
d83eaf2efb fail loudly when error should be propagated (#4903) 2025-06-17 22:12:19 +00:00
joachim-danswer
4e1e30f751 KG - Entity-Only Path (#4898)
* Create Entity-Only path for simple entity-focussed queries. Plus
other fixes.

* fix: use env var

* mypy fix

* fix: mypy

---------

Co-authored-by: Rei Meguro <36625832+Orbital-Web@users.noreply.github.com>
2025-06-17 22:10:29 +00:00
Raunak Bhagat
561f8c9c53 fix: Implement time-filtering for MS Teams document fetching (#4906)
* Add delta-time filtering

* Remove unused variables

* Update retry logic

* Remove variable change (inside of overriden function)

* Add back helpful variables

* Add missing assignment to variable

* Reorder classes in order to avoid using quotes

* Compress f-strings

* Address PR comment

* Implement pagination
2025-06-17 21:06:04 +00:00
SubashMohan
f625a4d0a7 feat: Add support for Assume Role authentication in S3 (#4907)
Co-authored-by: Subash <subash@onyx.app>
2025-06-17 20:56:44 +00:00
rkuo-danswer
746d4b6d3c Bugfix/salesforce correctness 3 (#4598)
* refactor salesforce sqlite db access

* more refactoring

* refactor again

* refactor again

* rename object

* add finalizer to ensure db connection is always closed

* avoid unnecessarily nesting connections and commit regularly when possible

* remove db usage from csv download

* dead code

* hide deprecation warning in ddtrace

* remove unused param

* local testing WIP

* stuff for pytest-dotenv

* autodetect filter types instead of assuming last modified always works (it doesn't)
Move filtering responsibility up instead of making utility calls excessively stateful

* fix how changed parent id's are yielded

* remove slow part of test

* clean up comments

* small refactor

* more refactor

* add normalize test

* checkpoint and comments

* add helper function

* fix gitignore

* add gitignore

* update pyproject

* delta updates

* remove comments

* fix time import

* fix set init

* add salesforce env vars

* cleanup

* more cleanup

* filtered item is unbound here

* typo

* fix suffix check

* fix empty type query

---------

Co-authored-by: Richard Kuo (Onyx) <rkuo@onyx.app>
2025-06-17 18:39:22 +00:00
Evan Lohn
fdd48c6588 fix db connection assertion (#4905) 2025-06-17 03:42:55 +00:00
Rei Meguro
23a04f7b9c Kg Subtype Rework (#4892)
* feat: vespa schema update

* fix: vespa multiple entities/relations yql logic

* fix: mypy

* fix: comments

* fix: kgchunkformat

* fix: reset vespa fix

* feat: vespa schema update

* feat: modify entity type and attribute value extraction

* feat: modify entity type and attribute value extraction

* feat: removed entity class and subtype from db

* slightly formatting

* feat: subtype narrowed normalization

* fix: mypy

* nits

* fix: rebase error fix

* fix: null handling

* rename for clarity

* fix: reverse order downgrade

* fix: nit

* rebase leftovers
2025-06-17 01:17:35 +00:00
Chris Weaver
b7b0dde7aa Remove non-helm kubernetes deployment option (#4904)
* Remove non-helm kubernetes deployment option

* Improve Vespa default set up

* Make nginx LoadBalancer

* Add version in values.yaml

* Fix lint

* Fix typo
2025-06-16 18:27:23 -07:00
Wenxi
c40b78c7e9 Bugfix/honor disable default slack config (#4891)
* Honor disable default config & improve UI clarity

* Disable default will also disable DMs

---------

Co-authored-by: Wenxi Onyx <wenxi-onyx@Wenxis-MacBook-Pro.local>
2025-06-16 22:24:51 +00:00
Raunak Bhagat
33c0133cc7 feat: Knowledge graph full-stack implementation (#4790)
* db setup

* transfer 1 - incomplete

* more adjustments

* relationship table + query update

* temp view creation

* restructuring

* nits

* updates

* separate read_only engine

* extraction revamp

* focus on metadata relatonships 1

* dev

* migration downgrade fix

* rebase migration change

* a3+

* progress

* base

* new extraction

* progress

* fixed KG extraction

* nits

* updates

* simplifications & cleanup

* fixes

* updates

* more feature flag checks

* fixes

* extraction process fix

* read-only user creation as part of setup

* fix for missing entity attributes

* kg read-only user creation as part of migration

* typo

* EL initial comments

* initial Account/SF Connector chnges

* SF Connector update

 - include account information

* base w/ salesforce

* evan updates + quite a bit more

* kg-filtered search

* EL changes pt 2

* migrations and env vars

* quick migration fix

* migration update

* post_rebase fixes

* mypy fixes

* test fixes

* test fix

* test fix

* read_only pool + misc

* nf

* env vars

* test improvements

* salesforce fix

* test update

* small changes

* small adjustments

* SF Connector fix & kg_stage removal for one table

* mypy fix

* small fixes

* EL + RK (pt 1) comments

* nit

* setting updated

* Salesforce test update

* EL comments

* read-only user replacement & cleanup

* SQL View fix

* converting entity type-name separators

* sql view group ownership

* view fix

* SQL tweak

* dealing with docs that were skipped by indexing

* increased error handling

* more error handling

* Output formatting fix

* kg-incremental-reindexing

* 0-doc found improvement

* celery

* migration correction

* timeout adjustments

* nit

* Updated migration

* Entity Normalization for KG Dev 1 (#4746)

* feat: trigrams column

* fix: reranking and db

* feat: v1

* fix: convert to orm

* feat: parallel

* fix: default to id_name

* fix: renamed semantic_id and semantic_id_trigrams

* fix: scalar subquery

* fix: tuning + redundancy

* fix: threshold

* fix: typo

* fix: shorten names

* wip

* fix: reverted

* feat: config

* feat: works but it was dumb

* feat: clustering works

* fix: mypy

* normalization <-> language awareness for SQL generation

* small type fixes

---------

Co-authored-by: joachim-danswer <joachim@danswer.ai>

* mypy

* typo and dead code

* kg_time_fencing

* feat: remove temp views on migration downgrade

* remove functions and triggers for now

* rebase adjustments

* EL code review results

* quick fix + trigger/funcs for single tenant

* fix: typo, mypy, dead code

* fix: autoflake

* small updatesd

* nit

* fix: typo

* early + faster view creation

* Extension creation in MT migration

* nit changes to default ETs

* Incremental Clustering and KG Refactor V1 (#4784)

Optimized/restructured incremental clustering. New pipeline actually that moves vespa updates to clustering.
Also, celery configuration has been updated.
---------

Co-authored-by: joachim-danswer <joachim@danswer.ai>

* Move file

* Fix all prior imports

* Clean sidebar items logic; add kg page

* Add kg_processing celery background task

* prompt tweak & ET extraction reset

* more general hierarchical structure

* feat: better vespa reset logic

* Add basic knowledge graph configuration

* Add configurations for KG entity-type

* prompt optimization and entity replacemants

* small prompt changes

* Implement backend APIs

* KG Refactor V2 (#4814)

Clustering & Extraction improvements & various nits 

Co-authored-by: joachim-danswer <joachim@danswer.ai>

* add connector-level coverage days

* Update APIs to be more frontend ergonomic

* Add simple test

* Make config optional in test

* fix: nit

* initial  EL responses

* refactor: helper functions for formatting

* fix: more helper fns & comments

* fix: comment code that's been implemented elsewhere

* Add entity-types APIs

* Hook up frontend to backend

* Finish hookup up entity-types to backend

* Update ordering of entity-types and fix form submitting

* Add backend API to get kg-exposed

* Add kg-exposed to sidebar

* Fix path

* Use existing values, even if kg-enabled is false

* Update what initial values are used

* Add skeleton for kg resetting

* Add return type

* Add default entity-type population when fetching entity-types

* Remove circular deps

* Minor fixes to logic

* Edit logic for default entity-types population

* Add re-index API + skeleton

* Update verbiage for KG

* Remove templatization in favour of function

* Address comments on PR

* Pull call out into its own binding

* Remove re-index API and revert implement of reset back to stub

* Fix circular import error

* Remove 'reindex' button

* Edit how the empty vendor name list is handled

* Edit how exposed is processed

* Redirect if navigated to `/admin/kg` and kg is not exposed

* Address comments on PR

* reset + entity type table display & updating updates

* Update fetching entity-types

* Make KG entity types refresh when reset

* Edit verbiage of reset button

* Update package-lock.json file

* Protect against overflowing

* Re-implement refreshing table after reset

* Edit message when nothing is shown.

* UI enhancements

* small fixes

* remove form validation?

* fix

* nit

* nit

* nit

* nit

* fix configure max coverage days

* EL comments for JR

* refactor: moved functions where they belong to fix circular import

* feat: intuitive coverage days

* feat: intuitive coverage days

* fix: safe date picker

* fix: startdate

* evan fixes

* fix: evan comment on enable/disable

* fix: style

* fix: ui issues

* fix: ui issues for reset too

* fix: tests

* fix: kg entity is not enabled

* fix: entity type reload on enable

---------

Co-authored-by: joachim-danswer <joachim@danswer.ai>
Co-authored-by: Rei Meguro <36625832+Orbital-Web@users.noreply.github.com>
2025-06-16 15:51:11 +00:00
Shahar Mazor
cca5bc13dc Make password validation configurable (#4869) 2025-06-14 14:31:05 -07:00
Wenxi
d5ecaea8e7 new script for hard deleting sessions (#4883)
Co-authored-by: Wenxi Onyx <wenxi-onyx@Wenxis-MacBook-Pro.local>
2025-06-14 01:06:47 -07:00
joachim-danswer
b6d3c38ca9 Prep KG on-demand indexing through celery (#4874)
* filter updates

* nits

* nit

* moving to celery

* RK discussion updates

* fix of postgres reset logic

* greptile comments

* RK comments

* fix

* change num_chunks

* further hardening

* nit

* added logging

* fix: mypy and argument to function

* feat: log so we know when rs finishes

* nits

* nit

---------

Co-authored-by: Rei Meguro <36625832+Orbital-Web@users.noreply.github.com>
2025-06-13 18:00:02 +00:00
Rei Meguro
b5fc1b4323 fix kg (#4881) 2025-06-12 15:44:14 -07:00
rkuo-danswer
a1a9c42b0b bump disk size (#4882)
Co-authored-by: Richard Kuo (Onyx) <rkuo@onyx.app>
2025-06-12 20:19:49 +00:00
Raunak Bhagat
e689e143e5 cleanup: Edit logic for default entity-types population (#4876)
* Edit logic for default entity-types population

* Remove templatization in favour of function

* Address comments on PR

* Pull call out into its own binding

* Address comments on PR
2025-06-12 08:46:32 -07:00
joachim-danswer
a7a168d934 Dual search pipeline for non-tool-calling LLMs (#4872)
Added dual pipeline also for non-tool-calling LLMs. 
A helper function was created.
2025-06-11 17:43:44 -07:00
joachim-danswer
69f47fc3e3 kg_update (#4858)
Improving Vespa chunk retrieval

Co-authored-by: Weves <chrisweaver101@gmail.com>
2025-06-11 17:40:20 -07:00
Evan Lohn
8a87140b4f JR comments 2025-06-11 15:51:11 -07:00
Evan Lohn
53db0ddc4d skip large empty excel files 2025-06-11 15:51:11 -07:00
Rei Meguro
087085403f fix: kg answer streaming (#4877)
* fix: answer streaming

* fix: env vars

* fix: remove duplicate
2025-06-11 15:47:32 -07:00
Chris Weaver
c040b1cb47 Switch to chonkie from llamaindex chunker (#4838)
* Switch to chonkie from llamaindex chunker

* Remove un-intended changes

* Order requirements

* Upgrade chonkie version
2025-06-11 14:12:52 -07:00
Raunak Bhagat
1f4d0716b9 Remove invocation of parallel_yield (was causing problems) 2025-06-11 12:19:05 -07:00
SubashMohan
aa4993873f feat: add configurable image model name and update dependencies (#4873)
Co-authored-by: Subash <subash@onyx.app>
2025-06-11 16:17:11 +00:00
Raunak Bhagat
ce031c4394 Change how replies are processed (#4870) 2025-06-11 02:14:34 +00:00
Rei Meguro
d4dadb0dda feat: default kg entity types (#4851)
* feat: default entity types

* refactor: cleaned duplicate code

* fix: delete unused fn

* refactor: dictionary instead of model for better type safety

* fix: mypy
2025-06-10 18:19:41 -07:00
Raunak Bhagat
0ded5813cd fix: Add rate-limiting to Teams API request (#4854)
* Add rate-limiting to Teams API request

* Add comment for rate-limiting

* Implement rate-limiting for office365 library.

* Remove hardcoded value

* Fix nits on PR
2025-06-10 21:12:06 +00:00
Evan Lohn
83137a19fb remove lru cache (#4865)
* remove lru cache

* fix types issue

* mypy
2025-06-10 19:40:07 +00:00
Zhipeng He
be66f8dbeb add: add Qwen icon in LLM list and update provider icon mapping (#4625) 2025-06-10 12:36:18 -07:00
SubashMohan
70baecb402 Enhancement/gpt4o image gen support (#4859)
* initial model switching changes

* Update image generation output format and revise prompt handling

* Add validation for output format in ImageGenerationTool and implement tests

---------

Co-authored-by: Subash <subash@onyx.app>
2025-06-10 08:52:21 -07:00
Chris Weaver
c27ba6bad4 Add perm sync to indexing for google drive (#4842)
* Add perm sync to indexing for google drive

* Applying changes elsewhere

* Turn on EE for perm sync slack tests

* Add new load_from_checkpoint_with_perm_sync

* Adjust way perm sync configs are represented

* Adjust run_indexing to handle perm sync on first run

* Add missing file

* Add sync on index for slack

* Add test + fixes

* Update permission

* Fix connector tests

* skip perm sync test if running MIT tests

* Address EL comments
2025-06-10 02:36:04 +00:00
Evan Lohn
61fda6ec58 drive smaller checkpoints v1 (#4849)
* drive smaller checkpoints v1

* v2

* text encoding fix
2025-06-09 23:35:12 +00:00
Evan Lohn
2c93841eaa errors have correct file id (#4818) 2025-06-09 22:36:35 +00:00
Weves
879db3391b Enable embedding parallelism 2025-06-09 15:36:36 -07:00
Suvodhoy Sinha
6dff3d41fa fix(discourse): Remove early break that was limiting topics to batch size 2025-06-09 15:20:43 -07:00
Raunak Bhagat
e399eeb014 fix: Query History Export (#4841)
* Move task registration to earlier in the API

* Remove unnecessary check
2025-06-09 10:10:53 -07:00
joachim-danswer
b133e8fcf0 enforce sub_question ordering for chat_message (#4848) 2025-06-09 02:37:35 +00:00
SubashMohan
ba9b24a477 Enhance credential management with multi-auth support and improved validation (#4846)
Co-authored-by: Subash <subash@onyx.app>
2025-06-09 00:53:18 +00:00
Rei Meguro
cc7fb625a6 KG autofill metadata (#4834)
* feat: autofill metadata

* fix: typo

* fix: enum

* fix: nit
2025-06-08 21:56:50 +00:00
Rei Meguro
2b812b7d7d Kg batch clustering (#4847)
* super genius kg_entity parent migration

* feat: batched clustering

* fix: nit
2025-06-08 21:16:10 +00:00
joachim-danswer
c5adbe4180 Knowledge Graph v1 (#4626)
* db setup

* transfer 1 - incomplete

* more adjustments

* relationship table + query update

* temp view creation

* restructuring

* nits

* updates

* separate read_only engine

* extraction revamp

* focus on metadata relatonships 1

* dev

* migration downgrade fix

* rebase migration change

* a3+

* progress

* base

* new extraction

* progress

* fixed KG extraction

* nits

* updates

* simplifications & cleanup

* fixes

* updates

* more feature flag checks

* fixes

* extraction process fix

* read-only user creation as part of setup

* fix for missing entity attributes

* kg read-only user creation as part of migration

* typo

* EL initial comments

* initial Account/SF Connector chnges

* SF Connector update

 - include account information

* base w/ salesforce

* evan updates + quite a bit more

* kg-filtered search

* EL changes pt 2

* migrations and env vars

* quick migration fix

* migration update

* post_rebase fixes

* mypy fixes

* test fixes

* test fix

* test fix

* read_only pool + misc

* nf

* env vars

* test improvements

* salesforce fix

* test update

* small changes

* small adjustments

* SF Connector fix & kg_stage removal for one table

* mypy fix

* small fixes

* EL + RK (pt 1) comments

* nit

* setting updated

* Salesforce test update

* EL comments

* read-only user replacement & cleanup

* SQL View fix

* converting entity type-name separators

* sql view group ownership

* view fix

* SQL tweak

* dealing with docs that were skipped by indexing

* increased error handling

* more error handling

* Output formatting fix

* kg-incremental-reindexing

* 0-doc found improvement

* celery

* migration correction

* timeout adjustments

* nit

* Updated migration

* Entity Normalization for KG Dev 1 (#4746)

* feat: trigrams column

* fix: reranking and db

* feat: v1

* fix: convert to orm

* feat: parallel

* fix: default to id_name

* fix: renamed semantic_id and semantic_id_trigrams

* fix: scalar subquery

* fix: tuning + redundancy

* fix: threshold

* fix: typo

* fix: shorten names

* wip

* fix: reverted

* feat: config

* feat: works but it was dumb

* feat: clustering works

* fix: mypy

* normalization <-> language awareness for SQL generation

* small type fixes

---------

Co-authored-by: joachim-danswer <joachim@danswer.ai>

* mypy

* typo and dead code

* kg_time_fencing

* feat: remove temp views on migration downgrade

* remove functions and triggers for now

* rebase adjustments

* EL code review results

* quick fix + trigger/funcs for single tenant

* fix: typo, mypy, dead code

* fix: autoflake

* small updatesd

* nit

* fix: typo

* early + faster view creation

* Extension creation in MT migration

* nit changes to default ETs

* Incremental Clustering and KG Refactor V1 (#4784)

Optimized/restructured incremental clustering. New pipeline actually that moves vespa updates to clustering.
Also, celery configuration has been updated.
---------

Co-authored-by: joachim-danswer <joachim@danswer.ai>

* prompt tweak & ET extraction reset

* more general hierarchical structure

* feat: better vespa reset logic

* prompt optimization and entity replacemants

* small prompt changes

* KG Refactor V2 (#4814)

Clustering & Extraction improvements & various nits 

Co-authored-by: joachim-danswer <joachim@danswer.ai>

* add connector-level coverage days

* fix: nit

* initial  EL responses

* refactor: helper functions for formatting

* fix: more helper fns & comments

* fix: comment code that's been implemented elsewhere

* fix: tenant_id missing arg

* fix: removed debugging stuff

* fix: moved kg_interactions db query to helper fn

* fix: tenant_id

* fix: tenant_id & removed outdated helper fn

* fix always set entity class

* fix: typo

* fix alembic heads

* fix: celery logging

* fix: migrations fix

* fix: multi tenant permissions

* fix: temp connector fix

* fix: downgrade

* Fix upgrade migration

* fix: tenant for normalization

* added additional acl

* stray EL comments

* fix: connector test

* fix mypy

* fix: temporary connector test fix

* fix: jira connector test

* nit

* small nits

* fix: black

* fix: mypy

* fix: mypy

---------

Co-authored-by: Rei Meguro <36625832+Orbital-Web@users.noreply.github.com>
2025-06-07 23:14:20 +00:00
Wenxi
21dc3a2456 Restart script clarity (#4839)
* Add error clarity to restart containers script

* erroneous cleanup on exit

* fix when starting containers for the first time

---------

Co-authored-by: Wenxi Onyx <wenxi-onyx@Wenxis-MacBook-Pro.local>
2025-06-06 09:11:58 -07:00
Wenxi
9631f373f0 Restart script clarity (#4837)
* Add error clarity to restart containers script

* erroneous cleanup on exit

* space

Co-authored-by: greptile-apps[bot] <165735046+greptile-apps[bot]@users.noreply.github.com>

---------

Co-authored-by: Wenxi Onyx <wenxi-onyx@Wenxis-MacBook-Pro.local>
Co-authored-by: greptile-apps[bot] <165735046+greptile-apps[bot]@users.noreply.github.com>
2025-06-05 19:24:06 -07:00
rexjohannes
cbd4d46fa5 remove Hagen from CONTRIBUTING.md (#4778)
* remove Hagen from CONTRIBUTING.md

* fix slack invite url

* fix second slack invite
2025-06-05 18:59:34 -07:00
Wenxi
dc4b9bc003 Fixed indexing when no sites are specified (#4822)
* Fixed indexing when no sites are specificed

* Added test for Sharepoint all sites index

* Accounted for paginated results.

* Typing

* Typing

---------

Co-authored-by: Wenxi Onyx <wenxi-onyx@Wenxis-MacBook-Pro.local>
2025-06-05 23:25:20 +00:00
Chris Weaver
affb9e6941 Extend the onyx_vespa_schemas.py script (#4835) 2025-06-05 22:47:32 +00:00
Chris Weaver
dc542fd7fa Enable default quantization (#4815)
* Adjust migration

* update default in form

* Add cloud indices for bfloat16

* Update backend/shared_configs/configs.py

Co-authored-by: greptile-apps[bot] <165735046+greptile-apps[bot]@users.noreply.github.com>

* Update vespa schema gen script

* Move embedding configs

* Remove unused imports

* remove import from shared configs

* Remove unused model

---------

Co-authored-by: greptile-apps[bot] <165735046+greptile-apps[bot]@users.noreply.github.com>
2025-06-05 14:02:08 -07:00
rkuo-danswer
85eeb21b77 add slack percentage progress (#4809)
* add percentage progress

* range checking

* formatting

* for new channels, skip them if the most recent messages are all from bots

* comments

* bypass bot channels

* code review

---------

Co-authored-by: Richard Kuo (Onyx) <rkuo@onyx.app>
2025-06-05 17:52:46 +00:00
Rei Meguro
4bb3ee03a0 Update GitHub Connector metadata (#4769)
* feat: updated github metadata

* feat: nullity check

* feat: more metadata

* feat: userinfo

* test: connector test + more metadata

* feat: num files changed

* feat str

* feat: list of str
2025-06-04 18:33:14 +00:00
Maciej Bryński
1bb23d6837 Upgrade asyncpg for Python 3.12 (#4699) 2025-06-04 11:44:52 -07:00
joachim-danswer
f447359815 bump up agent timeouts across the board (#4821) 2025-06-04 14:36:46 +00:00
Weves
851e0b05f2 Small tweak to user invite flow 2025-06-04 08:09:33 -07:00
Chris Weaver
094cc940a4 Small embedding model cleanups (#4820)
* Small embedding model cleanups

* fix

* address greptile

* fix build
2025-06-04 00:10:44 +00:00
rkuo-danswer
51be9000bb Feature/vespa bump (#4819)
* bump cloudformation

* update kubernetes

* bump helm chart

* bump docker compose

* update chart.lock

* ai accident!

* bump vespa helm chart for fix

* increase timeout

---------

Co-authored-by: Richard Kuo (Onyx) <rkuo@onyx.app>
2025-06-04 00:03:01 +00:00
joachim-danswer
80ecdb711d New metadata for Jira for KG (#4785)
* new metadata components

* nits & tests
2025-06-03 20:12:56 +00:00
Chris Weaver
a599176bbf Improve reasoning detection (#4817)
* Improve reasoning detection

* Address greptile comments

* Fix mypy
2025-06-03 20:01:12 +00:00
rkuo-danswer
e0341b4c8a bumping docker push action version (#4816)
Co-authored-by: Richard Kuo (Onyx) <rkuo@onyx.app>
2025-06-03 12:47:01 -07:00
CaptainJeff
4c93fd448f fix: updating gemini models (#4806)
Co-authored-by: Jeffrey Drakos <jeffreydrakos@Jeffreys-MacBook-Pro-2.local>
2025-06-03 11:16:42 -07:00
Chris Weaver
84d916e210 Fix hard delete of agentic chats (#4803)
* Fix hard delete of agentic chats

* Update backend/tests/integration/tests/chat/test_chat_deletion.py

Co-authored-by: greptile-apps[bot] <165735046+greptile-apps[bot]@users.noreply.github.com>

* Address Greptile comments

* fix tests

---------

Co-authored-by: greptile-apps[bot] <165735046+greptile-apps[bot]@users.noreply.github.com>
2025-06-03 11:14:11 -07:00
Weves
f57ed2a8dd Adjust script 2025-06-02 18:39:00 -07:00
trial-danswer
713889babf [Connectors][Script] Resume Paused Connectors (#4798)
* [Connectors][Script] Resume Paused Connectors

* Addressing comment
2025-06-02 18:34:00 -07:00
Weves
58c641d8ec Remove ordering-only flow 2025-06-02 18:29:42 -07:00
Weves
94985e24c6 Adjust user file access 2025-06-02 17:28:49 -07:00
Evan Lohn
4c71a5f5ff drive perm sync logs + misc deployment improvements (#4788)
* some logs

* give postgress more memory

* give postgress more memory

* give postgress more memory

* revert

* give postgress more memory

* bump external access limit

* vespa timeout

* deployment consistency

* bump vespa version

* skip upgrade check

* retry permission by ids

* logs

* fix temp docx file issue

* fix drive file deduping

* RK comments

* mypy

* aggregate logs
2025-06-01 23:36:57 +00:00
rkuo-danswer
b19e3a500b try fixing slack bot (#4792)
* try fixing slack bot

* add logging

* just use if

* safe msg get

* .close isn't async

* enforce block list size limit

* various fixes and notes

* don't use self

* switch to punkt_tab

* fix return condition

* synchronize waiting, use non thread local redis locks

* fix log format, make collection copy more explicit for readability

* fix some logging

* unnecessary function

---------

Co-authored-by: Richard Kuo (Onyx) <rkuo@onyx.app>
2025-05-31 00:39:14 +00:00
Chris Weaver
267fe027f5 Fix failed docs table (#4800)
* Fix initial LLM provider set up

* Fix IndexAttemptErrorsModal pagination
2025-05-30 22:19:52 +00:00
Evan Lohn
0d4d8c0d64 jira daylight savings handling (#4797) 2025-05-30 19:13:38 +00:00
Chris Weaver
6f9d8c0cff Simplify passing in of file IDs for filtering (#4791)
* Simplify passing in of file IDs for filtering

* Address RK comments
2025-05-30 05:08:21 +00:00
Weves
5031096a2b Fix frozen add token rate limit migration 2025-05-29 22:22:36 -07:00
rkuo-danswer
797e113000 add a comment (#4789)
Co-authored-by: Richard Kuo (Onyx) <rkuo@onyx.app>
2025-05-29 14:11:19 -07:00
Raunak Bhagat
edc2892785 fix: Remove "Refining Answer" popup (#4783)
* Clean up logic

* Remove dead code

* Remove "Refining Answer" prompt
2025-05-29 19:55:38 +00:00
rkuo-danswer
ef4d5dcec3 new slack rate limiting approach (#4779)
* fix slack rate limit retry handler for groups

* trying to mitigate memory usage during csv download

* Revert "trying to mitigate memory usage during csv download"

This reverts commit 48262eacf6.

* integrated approach to rate limiting

* code review

* try no redis setting

* add pytest-dotenv

* add more debugging

* added comments

* add more stats

---------

Co-authored-by: Richard Kuo (Onyx) <rkuo@onyx.app>
2025-05-29 19:49:32 +00:00
Evan Lohn
0b5e3e5ee4 skip excel files that openpyxl fails on (#4787) 2025-05-29 18:09:46 +00:00
SubashMohan
f5afb3621e connector filter bug fix (#4771)
* connector filter bug fix

* refactor: use ValidStatuses type for last status filter

---------

Co-authored-by: Subash <subash@onyx.app>
2025-05-29 15:17:04 +00:00
rkuo-danswer
9f72826143 Bugfix/slack bot debugging (#4782)
* adding some logging

* better var name

---------

Co-authored-by: Richard Kuo (Onyx) <rkuo@onyx.app>
2025-05-28 18:43:11 +00:00
rkuo-danswer
ab7a4184df Feature/helm k8s probes 2 (#4766)
* add probes

* lint fixes

* add beat probes

---------

Co-authored-by: Richard Kuo (Onyx) <rkuo@onyx.app>
2025-05-28 05:20:24 +00:00
rkuo-danswer
16a14bac89 Feature/tenant reporting 2 (#4750)
* add more info

* fix headers

* add filename as param (merge)

* db manager entry in launch template

---------

Co-authored-by: Richard Kuo (Onyx) <rkuo@onyx.app>
2025-05-27 23:24:47 +00:00
Raunak Bhagat
baaf31513c fix: Create new grouping for CRM connectors (#4776)
* Create new grouping for CRM connectors
* Edit spacing
2025-05-27 06:51:34 -07:00
Rei Meguro
0b01d7f848 refactor: stream_llm_answer (#4772)
* refactor: stream_llm_answer

* fix: lambda

* fix: mypy, docstring
2025-05-26 22:29:33 +00:00
rkuo-danswer
23ff3476bc print sanitized api key to help troubleshoot (#4764)
Co-authored-by: Richard Kuo (Onyx) <rkuo@onyx.app>
2025-05-24 22:27:37 +00:00
Chris Weaver
0c7ba8e2ac Fix/add back search with files (#4767)
* Allow search w/ user files

* more

* More

* Fix

* Improve prompt

* Combine user files + regular uploaded files
2025-05-24 15:44:39 -07:00
Evan Lohn
dad99cbec7 v1 refresh drive creds during perm sync (#4768) 2025-05-23 23:01:26 +00:00
Chris Weaver
3e78c2f087 Fix POSTGRES_IDLE_SESSIONS_TIMEOUT (#4765) 2025-05-23 14:55:23 -07:00
rkuo-danswer
e822afdcfa add probes (#4762)
* add probes

* lint fixes

---------

Co-authored-by: Richard Kuo (Onyx) <rkuo@onyx.app>
2025-05-23 02:24:54 +00:00
rkuo-danswer
b824951c89 add probe signals for beat (#4760)
Co-authored-by: Richard Kuo (Onyx) <rkuo@onyx.app>
2025-05-23 01:41:11 +00:00
Evan Lohn
ca20e527fc fix tool calling for bedrock claude models (#4761)
* fix tool calling for bedrock claude models

* unit test

* fix unit test
2025-05-23 01:13:18 +00:00
rkuo-danswer
c8e65cce1e add k8s probes (#4752)
* add file signals to celery workers

* improve probe script

* cancel tref

---------

Co-authored-by: Richard Kuo (Onyx) <rkuo@onyx.app>
2025-05-22 20:21:59 +00:00
rkuo-danswer
6c349687da improve impersonation logging slightly (#4758)
Co-authored-by: Richard Kuo (Onyx) <rkuo@onyx.app>
2025-05-22 11:17:27 -07:00
Raunak Bhagat
3b64793d4b Update listener passing (#4751) 2025-05-22 01:31:20 +00:00
Rei Meguro
9dbe12cea8 Feat: Search Eval Testing Overhaul (provide ground truth, categorize query, etc.) (#4739)
* fix: autoflake & import order

* docs: readme

* fix: mypy

* feat: eval

* docs: readme

* fix: oops forgot to remove comment

* fix: typo

* fix: rename var

* updated default config

* fix: config issue

* oops

* fix: black

* fix: eval and config

* feat: non tool calling query mod
2025-05-21 19:25:10 +00:00
rkuo-danswer
e78637d632 mitigate memory usage during csv download (#4745)
* mitigate memory usage during csv download

* more gc tweaks

* missed some small changes

---------

Co-authored-by: Richard Kuo (Onyx) <rkuo@onyx.app>
2025-05-21 00:44:27 +00:00
Evan Lohn
cac03c07f7 v1 answer refactor (#4721)
* v1 answer refactor

* fix tests

* good catch, tests

* more cleanup
2025-05-20 23:34:27 +00:00
Raunak Bhagat
95dabfaa18 fix: Add back Teams' replies processing (#4744)
* Add replies to document construction and edit tests

* Update tests

* Add replies processing to teams

* Fix test

* Add try-except block around potential failure

* Update entity-id during ConnectorFailure raise
2025-05-20 22:55:28 +00:00
rkuo-danswer
e92c418e0f Feature/openapi (#4710)
* starting openapi support

* fix app / app_fn

* send gitignore

* dedupe function names

* add readme

* modify gitignore

* update launch template

* fix unused path param

* fix mypy

* local tests pass

* first pass at making integration tests work

* fixes

* fix script path

* set python path

* try full path

* fix output dir

* fix integration test

* more build fixes

* add generated directory

* use the config

* add a comment

* add

* modify tsconfig.json

* fix index linting bugs

* tons of lint fixes

* new gitignore

* remove generated dir

* add tasks template

* check for undefined explicitly

* fix hooks.ts

* refactor destructureValue

* improve readme

---------

Co-authored-by: Richard Kuo (Onyx) <rkuo@onyx.app>
2025-05-20 21:33:18 +00:00
Chris Weaver
0593d045bf Fix ext_perm_user sign-up for non multi-tenant (#4743)
* OAuth w/ external user fix

* Apply to basic auth as well
2025-05-20 20:17:01 +00:00
rkuo-danswer
fff701b0bb fix slack rate limit retry handler for groups (#4742)
Co-authored-by: Richard Kuo (Onyx) <rkuo@onyx.app>
2025-05-20 18:54:27 +00:00
rkuo-danswer
0087a32d8b database isn't a var! (#4741)
Co-authored-by: Richard Kuo (Onyx) <rkuo@onyx.app>
2025-05-20 11:02:53 -07:00
rkuo-danswer
06312e485c make sure the permission client uses the proper retry handler (#4737)
* make sure the permission client uses the proper retry handler

* fix client

---------

Co-authored-by: Richard Kuo <rkuo@rkuo.com>
2025-05-19 21:07:00 -07:00
Evan Lohn
e0f5b95cfc full drive perm sync 2025-05-19 21:06:43 -07:00
Chris Weaver
10bc072b4b Improve drive group sync (#4736)
* Improve drive group sync

* Fix mypy
2025-05-20 02:39:27 +00:00
Evan Lohn
b60884d3af don't fail on fake files (#4735)
* don't fail on fake files

* solve at the source

* oops

* oops2
2025-05-19 23:09:34 +00:00
Chris Weaver
95ae6d300c Fix slack bot kubernetes template (#4734)
* Fix slack path for kubernetes files

* Add env variables
2025-05-19 21:25:57 +00:00
Evan Lohn
b76e4754bf anthropic fix (#4733)
* anthropic fix

* naming
2025-05-19 20:34:29 +00:00
rkuo-danswer
b1085039ca fix nltk punkt (#4732)
Co-authored-by: Richard Kuo (Onyx) <rkuo@onyx.app>
2025-05-19 20:29:02 +00:00
Rei Meguro
d64f479c9f feat: error handling & optimization (#4722) 2025-05-19 20:27:22 +00:00
Raunak Bhagat
fd735c9a3f perf: Change query-exporting to use generators instead of expanding fully into memory (#4729)
* Change query-exporting to use generators instead of expanding fully into memory

* Fix pagination logic

Co-authored-by: greptile-apps[bot] <165735046+greptile-apps[bot]@users.noreply.github.com>

* Add type annotation

* Add early break if list of chat_sessions is empty

---------

Co-authored-by: greptile-apps[bot] <165735046+greptile-apps[bot]@users.noreply.github.com>
2025-05-19 20:09:45 +00:00
rkuo-danswer
2282f6a42e fix restart (#4726)
Co-authored-by: Richard Kuo (Onyx) <rkuo@onyx.app>
2025-05-17 17:02:27 +00:00
Rei Meguro
0262002883 fix: continue button (#4724) 2025-05-16 15:43:37 -07:00
Chris Weaver
01ca9dc85d Fix OAuth w/ ext_perm_user for multi-tenant (#4723)
* Fix OAuth w/ ext_perm_user for multi-tenant

* Improve comment
2025-05-16 14:44:01 -07:00
Weves
0735a98284 Fix import ordering 2025-05-16 14:43:50 -07:00
Emerson Gomes
8d2e170fc4 Use LiteLLM DB for determining model tool capability (#4698)
* Bump LiteLLM

* Use LiteLLM DB for determining model tool capability instead of using hardcoded list

* Make function defaults explicit
2025-05-16 13:31:39 -07:00
SubashMohan
f3e2795e69 Highlight active link in AdminSidebar based on current pathname (#4719)
* Highlight active link in AdminSidebar based on current pathname

* Refactor AdminSidebar to declare pathname variable earlier

---------

Co-authored-by: Subash <subash@onyx.app>
2025-05-16 04:55:28 +00:00
Rei Meguro
30d9ce1310 feat: search quality eval (#4720)
* fix: import order

* test examples

* fix: import

* wip: reranker based eval

* fix: import order

* feat: adjuted score

* fix: mypy

* fix: suggestions

* sorry cvs, you must go

Co-authored-by: greptile-apps[bot] <165735046+greptile-apps[bot]@users.noreply.github.com>

* fix: mypy

* fix: suggestions

---------

Co-authored-by: greptile-apps[bot] <165735046+greptile-apps[bot]@users.noreply.github.com>
2025-05-15 23:44:33 +00:00
Evan Lohn
2af2b7f130 fix connector tests and drive indexing (#4715)
* fix connector tests and drive indexing

* fix other test

* fix checkpoint data bug
2025-05-15 19:15:46 +00:00
SubashMohan
9d41820363 UI fixes (#4709)
Co-authored-by: Subash <subash@onyx.app>
2025-05-15 05:46:51 +00:00
rkuo-danswer
a44f289aed restructure to signal activity while processing (#4712)
Co-authored-by: Richard Kuo (Onyx) <rkuo@onyx.app>
2025-05-15 05:23:11 +00:00
SubashMohan
9c078b3acf Implement pagination for retrieving spots in HighspotClient (#4705)
Co-authored-by: Subash <subash@onyx.app>
2025-05-15 00:32:12 +00:00
Rei Meguro
349f2c6ed6 Bugfix/usage report UUID (#4703)
* feat: replace user id with username in user report

* feat: pagelink arrow disable

* fix: import order

* fix: removed things we're not doing
2025-05-14 22:27:01 +00:00
rkuo-danswer
0dc851a1cf use existing session user if it matches the email (#4706)
Co-authored-by: Richard Kuo <rkuo@rkuo.com>
2025-05-14 22:18:56 +00:00
Raunak Bhagat
f27fe068e8 Add env variables (#4711) 2025-05-14 19:39:29 +00:00
Evan Lohn
f836cff935 reset to prs on next checkpoint (#4704)
* reset to prs on next checkpoint

* github time fix
2025-05-14 18:47:38 +00:00
Raunak Bhagat
312e3b92bc perf: Implement checkpointing for Teams Connector. (#4601)
* Add basic foundation for teams checkpointing classes

* Fix slack connector main entrypoint

* Saving changes

* Finish teams checkpointing impl

* Remove commented out code

* Remove more unused code

* Move code around

* Add threadpool to process requests in parallel

* Fix mypy errors / warnings

* Move test import to main function only

* Address nits on PR

* Remove unnecessary check prior to entering while-loop

* Remove print statement

* Change exception message

* Address more nits

* Use indexing instead of destructuring

* Add back invocation of `run_with_timeout` instead of a direct call

* Revert slack testing code

* Move early return to before second API call

* Pull fetch to team outside of loop

* Address nits on PR

* Add back client-side filtering

* Updated connector to return after a team's indexing is finished

* Add type ignore

* Implement proper datetime range fetching

* Address comment on PR

* Rename function

* Change exception type when no team with the given id was found

* Address nit on PR

* Add comment on why `page_loaded` is needed to be specified explicitly

* Remove duplicated calls to fetching channels

* Use helper function for thread-based yielding instead of manual logic

* Move datetime filtering to message-level instead

* Address more comments on PR

* Add new utility function for yielding sections

* Add additional utility function

* Add teams tests

* Edit error message

* Address nits on PR

* Promote url-prefix to be a class level constant

* Fix mypy error

* Remove start/end parameters from function that doesn't use them anymore; move around comments

* Address more nits on PR

* Add comment
2025-05-14 04:30:57 +00:00
Evan Lohn
0cc0964231 Perf/drive finer checkpoints (#4702)
* celery and drive fixes

* some initial nits

* skip weird files

* safer extension check

* fix drive
2025-05-14 03:15:29 +00:00
Chris Weaver
b82278e685 Fix heavy import (#4701) 2025-05-13 23:04:16 +00:00
Richard Kuo (Onyx)
daa1746b4a just readme fixes 2025-05-13 09:56:07 -07:00
rkuo-danswer
d8068f0a68 Feature/helm separate workers (#4679)
* add test

* try breaking out background workers

* fix helm lint complaints

* rename disabled files more

* try different folder structure

* fix beat selector

* vespa setup should break on success

* improved instructions for basic helm chart testing

---------

Co-authored-by: Richard Kuo (Onyx) <rkuo@onyx.app>
2025-05-13 02:23:32 +00:00
Chris Weaver
d91f776c2d Fix initial checkpoint save (#4697)
* Fix initial checkpoint save

* Improve comment

* Another small fix
2025-05-13 01:59:07 +00:00
Chris Weaver
a01135581f Small GitHub enhancements (#4696)
* Small github enhancements

* Fix manual run

* Address EL comments
2025-05-13 01:14:16 +00:00
rkuo-danswer
392b87fb4f Bugfix/limit permission size (#4695)
* add utility function

* add utility functions to DocExternalAccess

* refactor db access out of individual celery tasks and put it directly into the heavy task

* code review and remove leftovers

* fix circular imports

---------

Co-authored-by: Richard Kuo (Onyx) <rkuo@onyx.app>
2025-05-13 00:46:31 +00:00
Evan Lohn
551a05aef0 light worker discovers beat task (#4694)
* light worker discovers beat task

* v2: put in right place
2025-05-12 21:20:18 +00:00
rkuo-danswer
6b9d0b5af9 ensure we don't tag 'latest' with cloud images (#4688)
* ensure we don't tag 'latest' with cloud images

* add docker login to trivy

* fix tag names

* flavor latest false (no auto latest tags)

* fix typo

* only run the appropriate workflow for web

---------

Co-authored-by: Richard Kuo (Onyx) <rkuo@onyx.app>
Co-authored-by: Richard Kuo <rkuo@rkuo.com>
2025-05-12 17:23:01 +00:00
Chris Weaver
b8f3ad3e5d Fix/remove ee fe (#4690)
* Remove ee imports from FE

* Remove ee imports from FE

* Style
2025-05-12 02:31:04 +00:00
Chris Weaver
b19515e25d Fix window_start (#4689)
* Fix window_start

* Add comment
2025-05-12 00:11:20 +00:00
Chris Weaver
913f7cc7d4 Fix/remove ee from mit (#4682)
* Remove some ee imports

* more

* Remove all ee imports

* Fix

* Autodiscover

* fix

* Fix typing

* More celery task stuff

* Fix import
2025-05-11 22:09:50 +00:00
rkuo-danswer
84566debab set field size limit (#4683)
* set field size limit

* don't use sys.maxsize

---------

Co-authored-by: Richard Kuo <rkuo@rkuo.com>
Co-authored-by: Richard Kuo (Onyx) <rkuo@onyx.app>
2025-05-09 22:46:13 +00:00
rkuo-danswer
1a8b7abd00 add test (#4676)
* add test

* comment

---------

Co-authored-by: Richard Kuo (Onyx) <rkuo@onyx.app>
2025-05-09 21:38:51 +00:00
Evan Lohn
4c0423f27b fix github cursor pagination infinite loop (#4673)
* fix infinite loop

* unit test for infinite loop issue

* mypy version

* more logging

* unbound locals
2025-05-09 21:35:37 +00:00
rkuo-danswer
7965fd9cbb run testing (#4681)
* run testing

* need to break on success

* add a readme

* raise vespa to 6GB

* allow test to retry

* add 20 attempts

* put memory limits back to normal

* restore chart testing on changes only

* increase retries to 40

---------

Co-authored-by: Richard Kuo (Onyx) <rkuo@onyx.app>
2025-05-09 11:49:43 -07:00
Chris Weaver
91831f4d07 Fix user count (#4677)
* Fix user count

* Add helper + fix async function as well

* fix mypy

* Address RK comment
2025-05-08 17:19:40 -07:00
Chris Weaver
1dd98a87cc Try to reduce memory usage on group sync (#4678) 2025-05-08 22:53:53 +00:00
rkuo-danswer
0dd65cc839 enterprise settings needs to 403 on tenant id absence (#4675)
Co-authored-by: Richard Kuo (Onyx) <rkuo@onyx.app>
2025-05-08 18:32:12 +00:00
Chris Weaver
519aeb6a1f Drive perm sync enhancement (#4672)
* Enhance drive perm sync

* add tests

* more stuff

* fixes

* Fix

* Speed up

* Add missing file

* Address EL comments

* Add ondelete=CASCADE

* Improve comment
2025-05-08 03:12:41 +00:00
Evan Lohn
0eab6ab935 fix drive slowness (#4668)
* fix slowness

* no more silent failing for users

* nits

* no silly info transfer
2025-05-07 22:48:08 +00:00
Evan Lohn
ee09cb95af fixes foreign key violation (#4670)
* fixes foreign key violation

* nit
2025-05-07 18:27:32 +00:00
Evan Lohn
8a9a66947e make 404s skippable (#4671) 2025-05-07 18:04:35 +00:00
Raunak Bhagat
d744c0dab4 fix: Fix error in which channel names would not have the leading "#" removed (#4664)
* Fix failing entrypoint into slack connector

* Pre-filter channel names upon instantiation of slack connector class

* Add decrypt script

* Add slack connector tests

* Fix mypy errors on decrypt.py

* Add property to SlackConnector class

* Add some basic tests

* Move location of tests

* Change name of env token

* Add secrets for Slack

* Add more parameterized cases

* Change env variable name

* Change names

* Update channel names

* Edit tests

* Modify tests

* Only import type in __main__

* Fix tests to actually test connectors

* Pass parameter to fixture directly
2025-05-07 04:55:21 +00:00
Chris Weaver
70df685709 Non default schema fix (#4667)
* Use correct postgres schema

* Remove raw Session() use

* Refactor + add test

* Fix comment
2025-05-06 20:35:59 -07:00
Chris Weaver
f85ef78238 Add more logging for confluence perm-sync + handle case where permiss… (#4586)
* Add more logging for confluence perm-sync + handle case where permissions are removed from the access token

* Make required permissions are explicit

* more

* Add slim fetch limit + mark all cc pairs of source type as successful upon group sync

* Add to dev compose

* Small teams fix

* Add file

* Add single limit pagination for confluence

* Restrict to server only

* more logging

* cleanup

* Cleanup

* Remove CONFLUENCE_CONNECTOR_SLIM_FETCH_LIMIT

* Handle teams error

* Fix ut

* Remove db dependency from confluence_doc_sync

* move stuff back to debug
2025-05-06 18:35:14 +00:00
Evan Lohn
2d7e48d8e8 possible mangling fix (#4666)
* possible mangling fix

* fixed nextUrl setting

* global bad
2025-05-06 15:51:39 +00:00
rkuo-danswer
8231328dc6 restore caching and fix up some prefixing (#4649)
* restore caching and fix up some prefixing

* try backend matrix build and fix artifact names

* need id

* add backslashes to be consistent

* fix no-cache

* leave docker tags to the meta action

* need checkout in merge

* add comment

* move spammy logs to debug status

* bunch of no-cache updates

* prefix

---------

Co-authored-by: Richard Kuo (Onyx) <rkuo@onyx.app>
2025-05-05 16:43:29 +00:00
Chris Weaver
7763e2fa23 Fix non-default schema in KV store (#4655)
* Fix non-default schema in KV store

* Fix custom schema
2025-05-04 22:19:35 +00:00
Chris Weaver
6085bff12d Fix test / display models (#4657)
* Fix test / display models

* Address greptile comments

* Increase wait time

* Increase overall timeout

* Move stuff to utils file

* Updates
2025-05-04 14:04:03 -07:00
Weves
97d60a89ae Add LRU cache to get_model_map 2025-05-03 17:43:58 -07:00
Raunak Bhagat
79b981075e perf: Optimize query history exporting process (#4602)
* Update mode to be a default parameter in `FileStore.read`

* Move query history exporting process to be a background job instead

* Move hardcoded report-file-naming to a common utility function

* Add type annotations

* Update download component

* Implement button to re-ping and download CSV file; fix up some backend file-checking logic

* De-indent logic (w/ early return)

* Return different error codes dependings on the type of task status

* Add more resistant failure retrying mechanisms

* Remove default parameter in helper function

* Use popup for error messaging

* Update return code

* Update web/src/app/ee/admin/performance/query-history/DownloadAsCSV.tsx

Co-authored-by: greptile-apps[bot] <165735046+greptile-apps[bot]@users.noreply.github.com>

* Add type to useState call

* Update backend/ee/onyx/server/query_history/api.py

Co-authored-by: greptile-apps[bot] <165735046+greptile-apps[bot]@users.noreply.github.com>

* Update backend/onyx/file_store/file_store.py

Co-authored-by: greptile-apps[bot] <165735046+greptile-apps[bot]@users.noreply.github.com>

* Update backend/ee/onyx/background/celery/apps/primary.py

Co-authored-by: greptile-apps[bot] <165735046+greptile-apps[bot]@users.noreply.github.com>

* Move rerender call to after check

* Run formatter

* Add type conversions back (smh greptile)

* Remove duplicated call to save_file

* Move non-fallible logic out of try-except block

* Pass date-ranges into API call

* Convert to ISO strings before passing it into the API call

* Add API to list all tasks

* Create new pydantic model to represent tasks to return instead

* Change helper to only fetch query-history tasks

* Use `shared_tasks` instead of old method

* Address more comments from PR; consolidate how task name is generated

* Mark task as failed if any exception is raised

* Change the task object which is returned back to the FE

* Add a table to display previously generated query-history-csv's

* Add timestamps to task; delete tasks as soon as file finishes processing

* Raise exception if start_time is not present

* Convert hard-coded string to constant

* Add "Generated At" field to table

* Return task list in sorted order (based off of start-time)

* Implement pagination

* Remove unused props and cleanup tailwind classes

* Change the name of kickoff button

* Redesign how previous query exports are viewed

* Make button a constant width even when contents change

* Remove timezone information before comparing

* Decrease interval time for re-pinging API

* Add timezone to start-time creation

* Add a refreshInterval for getting updated task status

* Add new background queue

* Edit small verbiage and remove error popup when max-retries is hit

* Change up heavy worker to recognize new task in new module

* Ensure `celery_app` is imported

* Change how `celery_app` is imported and defined

* Update comment on why `celery_app` must be imported

* Add basic skeleton for new beat task to cleanup any dead / failed query-history-export tasks

* Move cleanup task to different worker / queue

* Implement cleanup task

* Add return type

* Address comment on PR

* Remove delimiter from prefix

* Change name of function to be more descriptive

* Remove delimiter from prefix constant

* Move function invocation closer to usage location

* Move imports to top of file

* Move variable up a scope due to undefined error

* Remove dangling if-statement

* Make function more pure-functional

* Remove redefinition

---------

Co-authored-by: greptile-apps[bot] <165735046+greptile-apps[bot]@users.noreply.github.com>
2025-05-03 00:16:35 +00:00
Evan Lohn
113876b276 id not set in checkpoint FINAL (#4656)
* it will never happen again.

* fix perm sync issue

* fix perm sync issue2

* ensure member emails map is populated

* other fix for perm sync

* address CW comments

* nit
2025-05-03 00:10:21 +00:00
rkuo-danswer
5c3820b39f Bugfix/slack timeout (#4652)
* don't log all channels

* print number of channels

* sanitize indexing exception messages

* harden vespa index swap

* use constants and fix list generation

---------

Co-authored-by: Richard Kuo (Onyx) <rkuo@onyx.app>
2025-05-02 18:24:45 +00:00
Evan Lohn
55e4465782 orphan tag cleanup optimization (#4651)
* move orphan tag cleanup to final cleanup section of associated tparent tasks

* naming
2025-05-02 17:22:59 +00:00
Evan Lohn
6d9693dc51 drive file deduping (#4648)
* drive file deduping

* switched to version that does not require thread safety

* thanks greptile

* CW comments
2025-05-02 10:58:16 -07:00
Weves
75fa10cead fix highspot 2025-05-01 14:34:35 -07:00
Richard Kuo (Onyx)
0497bfdf78 fix double entry 2025-05-01 11:26:57 -07:00
rkuo-danswer
0db2ad2132 memory optimize task generation for connector deletion (#4645)
* memory optimize task generation for connector deletion

* test

* fix up integration test docker file

* more no-cache

---------

Co-authored-by: Richard Kuo (Onyx) <rkuo@onyx.app>
2025-05-01 10:47:26 -07:00
Chris Weaver
49cd38fb2d Update README.md 2025-05-01 09:58:33 -07:00
Raunak Bhagat
bd36b2ad6d Remove cursor-help for tooltip (#4643) 2025-05-01 09:38:13 -07:00
Evan Lohn
6436b60763 github cursor pagination (#4642)
* v1 of cursor pagination

* mypy

* unit tests

* CW comments
2025-04-30 19:09:20 -07:00
Raunak Bhagat
a6cc1c84dc Add padding to bottom of pages (#4641) 2025-04-30 22:34:18 +00:00
Weves
8515f4b57a Highspot cleanup 2025-04-30 14:57:36 -07:00
joachim-danswer
f68b74ff4a disable Agent Search refinement by default (#4638)
- created env variable  AGENT_ALLOW_REFINEMENT  with default "". Must be set to true to enable Refinement.
 - added an environment variable for the upper limit of docs that can be sent to verification
2025-04-30 19:51:08 +00:00
rkuo-danswer
e254fdc066 add sendgrid as option (#4639)
* add sendgrid as option

* code review

* mypy

---------

Co-authored-by: Richard Kuo (Onyx) <rkuo@onyx.app>
2025-04-30 07:33:15 +00:00
Raunak Bhagat
f26de37878 Remove info hoverable (#4637) 2025-04-30 03:16:14 +00:00
rkuo-danswer
94de23fe87 Bugfix/chat images 2 (#4630)
* don't hardcode -1

* extra spaces

* fix binary data in blurb

* add note to binary handling

---------

Co-authored-by: Richard Kuo (Onyx) <rkuo@onyx.app>
2025-04-30 01:29:10 +00:00
Chris Weaver
dd242c9926 Fix race condition with archived channels (#4635) 2025-04-29 23:35:40 +00:00
Chris Weaver
47767c1666 Small improvements to checkpoint pickup logic (#4634) 2025-04-29 21:23:54 +00:00
Chris Weaver
9be3da2357 Fix gitlab (#4629)
* Fix gitlab

* Add back assert
2025-04-28 17:42:49 -07:00
Evan Lohn
8961a3cc72 page token for drive group sync (#4627) 2025-04-28 19:06:06 +00:00
Chris Weaver
47b9e7aa62 Fix teams (#4628)
* Fix teams

* Use get_all

* Add comment
2025-04-28 11:53:22 -07:00
Evan Lohn
eebfa5be18 Confluence server api time fix (#4589)
* tolerance of confluence api weirdness

* remove checkpointing

* remove skipping logic from checkpointing

* add back checkpointing

* switch confluence checkpointing to be based on page starts

* address CW comments and fix unit tests

* some mitigations of bad confluence api

* new checkpointing approach and testing fixes

* fix test

* CW comments
2025-04-28 06:06:29 +00:00
Chris Weaver
5047d256b4 Add support for restrictions w/o any access (#4624)
* Add support for restrictions w/o any access

* Fix
2025-04-28 03:09:36 +00:00
Evan Lohn
5db676967f no more duplicate files during folder indexing (#4579)
* no more duplicate files during folder indexing

* cleanup checkpoint after a shared folder has been finished

* cleanup

* lint
2025-04-28 01:01:20 +00:00
Chris Weaver
ea0664e203 Fix LLM API key (#4623)
* Fix LLM API key

* Remove unused import

* Update web/src/app/admin/configuration/llm/LLMProviderUpdateForm.tsx

Co-authored-by: greptile-apps[bot] <165735046+greptile-apps[bot]@users.noreply.github.com>

---------

Co-authored-by: greptile-apps[bot] <165735046+greptile-apps[bot]@users.noreply.github.com>
2025-04-27 23:10:36 +00:00
Weves
bbd0874200 fix 2025-04-27 14:37:53 -07:00
rkuo-danswer
c6d100b415 Bugfix/chat images (#4618)
* keep chatfiletype as image instead of user_knowledge

* improve continue message

* fix to image handling

* greptile code review

---------

Co-authored-by: Richard Kuo (Onyx) <rkuo@onyx.app>
2025-04-27 20:34:30 +00:00
Evan Lohn
5ca7a7def9 fix migration and add test (#4615) 2025-04-25 21:27:59 +00:00
Chris Weaver
92b5e1adf4 Add support for overriding user list (#4616)
* Add support for overriding user list

* Fix

* Add typing

* pythonify
2025-04-25 15:15:23 -07:00
Chris Weaver
23c6e0f3bf Single source of truth for image capability (#4612)
* Single source of truth for image capability

* Update web/src/app/admin/assistants/AssistantEditor.tsx

Co-authored-by: greptile-apps[bot] <165735046+greptile-apps[bot]@users.noreply.github.com>

* Fix tests

---------

Co-authored-by: greptile-apps[bot] <165735046+greptile-apps[bot]@users.noreply.github.com>
2025-04-25 20:37:16 +00:00
Chris Weaver
ad76e6ac9e Adjust confluence perm sync frequency (#4613)
* Adjust confluence perm sync frequency

* Fiux comment
2025-04-25 19:36:10 +00:00
Evan Lohn
151aabea73 specific user emails for drive connector (#4608)
* specific user emails for drive connector

* fix drive connector tests

* fix connector tests
2025-04-25 18:49:20 +00:00
Chris Weaver
d711680069 Add e2e test for assistant creation/edit (#4597)
* Add e2e test for assistant creation/edit

* Skip initial full reset to have seeded connector
2025-04-25 13:21:34 -07:00
Evan Lohn
9835d55ecb transfer old fileds to new config 2025-04-25 12:25:20 -07:00
Raunak Bhagat
69c539df6e fix: Create migration to re-introduce display_model_names (#4600)
* Fix migration

* Fix migration to take care of various nullability cases

* Address comments on PR

* Rename variables to be more descriptive

* Make helpers private

* Fix select statement

* Add comments to explain the involved logic

* Saving changes

* Finish script to revalidate `display_model_names`

* Address comments on PR by greptile

* Add missing columns

* Pull difference operator out into binding

* Add deletion prior to re-insertion

* Use map from shared llm-provider file instead

* Use helper function instead of copying code

* Remove delete and convert into an update statement

* Use pydantic for ModelConfigurations

* Update to do nothing on-conflict rather than update

* Address nits on PR

* Add default visible model(s) for bedrock

* Perform an update on conflict instead of doing nothing
2025-04-25 10:44:13 -07:00
pablonyx
df67ca18d8 My docs cleanup (#4519)
* update

* improved my docs

* nit

* nit

* k

* push changes

* update

* looking good

* k

* fix preprocessing

* try a fix

* k

* update

* nit

* k

* quick nits

* Cleanup / fixes

* Fixes

* Fix build

* fix

* fix quality checks

---------

Co-authored-by: Weves <chrisweaver101@gmail.com>
2025-04-25 05:20:33 +00:00
Chris Weaver
115cfb6ae9 Fix tool choice (#4596)
* Fix tool choice

* fix
2025-04-24 21:51:14 -07:00
rkuo-danswer
672f3a1c34 fix provisioning and don't spawn tasks which could result in a race condition (#4604)
Co-authored-by: Richard Kuo (Onyx) <rkuo@onyx.app>
2025-04-25 02:41:05 +00:00
Raunak Bhagat
13b71f559f fix: Fix migration issue in which display-model-names were not being appropriately set (#4594)
* Fix migration

* Fix migration to take care of various nullability cases

* Address comments on PR

* Rename variables to be more descriptive

* Make helpers private

* Fix select statement

* Add comments to explain the involved logic

* Add helpers for viewing visible model names

* Fix logic for missing model + display-model names in migration
2025-04-24 21:26:33 +00:00
Evan Lohn
2981b7a425 linear dupe docs fix (#4607) 2025-04-24 21:00:21 +00:00
rkuo-danswer
37adf31a3b fix priority on vespa metadata sync (#4603)
Co-authored-by: Richard Kuo (Onyx) <rkuo@onyx.app>
2025-04-24 19:50:49 +00:00
Raunak Bhagat
5d59850a17 Fix slack formatting bug (#4587) 2025-04-24 17:18:54 +00:00
Evan Lohn
91d6b739a4 ensure drive id set in checkpoint (#4595)
* ensure drive id set in checkpoint

* asserts gone

* address CW
2025-04-24 01:20:13 +00:00
rkuo-danswer
c83ee06062 Feature/salesforce correctness 2 (#4506)
* refactor salesforce sqlite db access

* more refactoring

* refactor again

* refactor again

* rename object

* add finalizer to ensure db connection is always closed

* avoid unnecessarily nesting connections and commit regularly when possible

* remove db usage from csv download

* dead code

* hide deprecation warning in ddtrace

* remove unused param

---------

Co-authored-by: Richard Kuo (Onyx) <rkuo@onyx.app>
2025-04-24 01:05:52 +00:00
Raunak Bhagat
c93cebe1ab fix: Add minor fixes to how model configurations are displayed (#4593)
* Add minor fixes to how model configurations are interacted with

* Remove azure entry
2025-04-23 21:42:02 +00:00
rkuo-danswer
ea1d3c1eda Feature/db script (#4574)
* debug script + slight refactor of db class

* better comments

* move setup logger

---------

Co-authored-by: Richard Kuo (Onyx) <rkuo@onyx.app>
Co-authored-by: Richard Kuo <rkuo@rkuo.com>
2025-04-23 20:00:35 +00:00
rkuo-danswer
c9a609b7d8 Bugfix/slack bot channel config (#4585)
* friendlier handling of slack channel retrieval

* retry on downgrade_postgres deadlock

* fix comment

* text

---------

Co-authored-by: Richard Kuo (Onyx) <rkuo@onyx.app>
2025-04-23 20:00:03 +00:00
rkuo-danswer
07f04e35ec Bugfix/alembic sqlengine (#4592)
* need sqlengine to work

* add comments

---------

Co-authored-by: Richard Kuo (Onyx) <rkuo@onyx.app>
2025-04-23 19:21:34 +00:00
joachim-danswer
d8b050026d removal of keyword 1st phase 2025-04-22 20:29:57 -07:00
Raunak Bhagat
c76dc2ea2c fix: Fix the add_model_configuration migration by removing duplicate model-names during insertion (#4588)
* Convert the model_names and display_model_names into a set instead

* Update backend/alembic/versions/7a70b7664e37_add_model_configuration_table.py

Co-authored-by: greptile-apps[bot] <165735046+greptile-apps[bot]@users.noreply.github.com>

---------

Co-authored-by: greptile-apps[bot] <165735046+greptile-apps[bot]@users.noreply.github.com>
2025-04-22 18:59:31 -07:00
rkuo-danswer
5e11c635d9 wrong logger imported in a lot of wrong places (#4582)
Co-authored-by: Richard Kuo (Onyx) <rkuo@onyx.app>
2025-04-22 23:34:02 +00:00
joachim-danswer
669b668463 updated logging and basic search expansion procedure 2025-04-22 11:58:02 -07:00
Raunak Bhagat
85fa083717 fix: Return default value instead of throwing error (#4575)
* Return default value instead of throwing error

* Add default parameter

* Move logic around

* Use dummy value for max_input_tokens in testing flow

* Remove unnecessary assignment
2025-04-22 17:33:36 +00:00
Chris Weaver
420d2614d4 Fix assistants forms (#4578)
* Fix assistant num chunk setting

* test

* Fix test

* Update web/src/app/assistants/mine/AssistantModal.tsx

Co-authored-by: greptile-apps[bot] <165735046+greptile-apps[bot]@users.noreply.github.com>

* Update web/src/app/assistants/mine/AssistantModal.tsx

Co-authored-by: greptile-apps[bot] <165735046+greptile-apps[bot]@users.noreply.github.com>

---------

Co-authored-by: greptile-apps[bot] <165735046+greptile-apps[bot]@users.noreply.github.com>
2025-04-22 16:09:03 +00:00
Raunak Bhagat
e3218d358d feat: Add assistant name to UI (#4569)
* Add assistant name to UI

* Fix tailwind styling class
2025-04-22 09:35:35 -07:00
Weves
ae632b5fab Fix missing Connector Configuration 2025-04-21 18:50:25 -07:00
rkuo-danswer
0d4c600852 out of process retry for multitenant test reset (#4566)
* tool to generate vespa schema variations for our cloud

* extraneous assign

* use a real templating system instead of search/replace

* fix float

* maybe this should be double

* remove redundant var

* template the other files

* try a spawned process

* move the wrapper

* fix args

* increase timeout

* run multitenant reset operations out of process as well

---------

Co-authored-by: Richard Kuo (Onyx) <rkuo@onyx.app>
Co-authored-by: Richard Kuo <rkuo@rkuo.com>
2025-04-21 23:30:18 +00:00
Evan Lohn
eb569bf79d add emails to retry with on 403 (#4565)
* add emails to retry with on 403

* attempted fix for connector test

* CW comments

* connector test fix

* test fixes and continue on 403

* fix tests

* fix tests

* fix concurrency tests

* fix integration tests with llmprovider eager loading
2025-04-21 23:27:31 +00:00
Chris Weaver
f3d5303d93 Fix slack bot feedback (#4573)
* Fix slack bot feedback

* Fix

* Make safe
2025-04-21 15:54:48 -07:00
Raunak Bhagat
b97628070e feat: Add ability to specify max input token limit for custom LLM providers (#4510)
* Add multi text array field

* Add multiple values to model configuration for a custom LLM provider

* Fix reference to old field name

* Add migration

* Update all instances of model_names / display_model_names to use new schema migration

* Update background task

* Update endpoints to not throw errors

* Add test

* Update backend/alembic/versions/7a70b7664e37_add_models_configuration_table.py

Co-authored-by: greptile-apps[bot] <165735046+greptile-apps[bot]@users.noreply.github.com>

* Update backend/onyx/background/celery/tasks/llm_model_update/tasks.py

Co-authored-by: greptile-apps[bot] <165735046+greptile-apps[bot]@users.noreply.github.com>

* Fix list comprehension nits

* Update web/src/components/admin/connectors/Field.tsx

Co-authored-by: greptile-apps[bot] <165735046+greptile-apps[bot]@users.noreply.github.com>

* Update web/src/app/admin/configuration/llm/interfaces.ts

Co-authored-by: greptile-apps[bot] <165735046+greptile-apps[bot]@users.noreply.github.com>

* Implement greptile recommendations

* Update backend/onyx/db/llm.py

Co-authored-by: greptile-apps[bot] <165735046+greptile-apps[bot]@users.noreply.github.com>

* Update backend/onyx/server/manage/llm/api.py

Co-authored-by: greptile-apps[bot] <165735046+greptile-apps[bot]@users.noreply.github.com>

* Update backend/onyx/background/celery/tasks/llm_model_update/tasks.py

Co-authored-by: greptile-apps[bot] <165735046+greptile-apps[bot]@users.noreply.github.com>

* Update backend/onyx/db/llm.py

Co-authored-by: greptile-apps[bot] <165735046+greptile-apps[bot]@users.noreply.github.com>

* Fix more greptile suggestions

* Run formatter again

* Update backend/onyx/db/models.py

Co-authored-by: greptile-apps[bot] <165735046+greptile-apps[bot]@users.noreply.github.com>

* Add relationship to `LLMProvider` and `ModelConfigurations` classes

* Use sqlalchemy ORM relationships instead of manually populating fields

* Upgrade migration

* Update interface

* Remove all instances of model_names and display_model_names from backend

* Add more tests and fix bugs

* Run prettier

* Add types

* Update migration to perform data transformation

* Ensure native llm providers don't have custom max input tokens

* Start updating frontend logic to support custom max input tokens

* Pass max input tokens to LLM class (to be passed into `litellm.completion` call later)

* Add ModelConfigurationField component for custom llm providers

* Edit spacing and styling of model configuration matrix

* Fix error message displaying bug

* Edit opacity of `FiX` field for first index

* Change opacity back

* Change roundness

* Address comments on PR

* Perform fetching of `max_input_tokens` at the beginning of the callgraph and rope it throughout the entire callstack

* Change `add` to `execute`

* Move `max_input_tokens` into `LLMConfig`

* Fix bug with error messages not being cleared

* Change field used to fetch LLMProvider

* Fix model-configuration UI

* Address comments

* Remove circular import

* Fix failing tests in GH

* Fix failing tests

* Use `isSubset` instead of equality to determine native vs custom LLM Provider

* Remove unused import

* Make responses always display max_input_tokens

* Fix api endpoint to hit

* Update types in web application

* Update object field

* Fix more type errors

* Fix failing llm provider tests

---------

Co-authored-by: greptile-apps[bot] <165735046+greptile-apps[bot]@users.noreply.github.com>
2025-04-21 04:30:21 -07:00
pablonyx
72d3a7ff21 Frontend testing (#4500)
* add o3 + o4 mini

* k

* see which ones fail

* attempt

* k

* k

* llm ordering passing

* all tests passing

* quick bump

* Revert "add o3 + o4 mini"

This reverts commit 4cfa1984ec.

* k

* k
2025-04-20 23:29:47 +00:00
rkuo-danswer
2111eccf07 Feature/vespa jinja (#4558)
* tool to generate vespa schema variations for our cloud

* extraneous assign

* use a real templating system instead of search/replace

* fix float

* maybe this should be double

* remove redundant var

* template the other files

* try a spawned process

* move the wrapper

* fix args

* increase timeout

---------

Co-authored-by: Richard Kuo (Onyx) <rkuo@onyx.app>
Co-authored-by: Richard Kuo <rkuo@rkuo.com>
2025-04-20 22:28:55 +00:00
Chris Weaver
87478c5ca6 Parallelize connector tests (#4563)
* Parallelize connector tests

* Use --dist loadfile

* Add slow test logging
2025-04-19 18:10:50 -07:00
evan-danswer
dc62d83a06 File connector tests (#4561)
* danswer to onyx plus tests for file connector

* actually add test
2025-04-19 15:54:30 -07:00
evan-danswer
5681df9095 address getting attachments forever (#4562)
* address getting attachments forever

* fix unit tests
2025-04-19 15:53:27 -07:00
Chris Weaver
6666300f37 Fix flakey web test (#4551)
* Fix flakey web test

* Increase wait time

* Another attempt to fix

* Simplify + add new test

* Fix web tests
2025-04-19 15:12:11 -07:00
Chris Weaver
7f99c54527 Small improvements to connector UI (#4559)
* Small improvements to connector UI

* Update web/src/app/admin/connector/[ccPairId]/IndexingAttemptsTable.tsx

Co-authored-by: greptile-apps[bot] <165735046+greptile-apps[bot]@users.noreply.github.com>

* Fix last_permission_sync

* Handle cases where a source doesn't need group sync

* fix

---------

Co-authored-by: greptile-apps[bot] <165735046+greptile-apps[bot]@users.noreply.github.com>
2025-04-19 19:14:05 +00:00
Chris Weaver
4b8ef4b151 Update README.md 2025-04-18 18:29:56 -07:00
rkuo-danswer
e5e0944049 tool to generate vespa schema variations for our cloud (#4556)
* tool to generate vespa schema variations for our cloud

* extraneous assign

* float, not double

* back to double

---------

Co-authored-by: Richard Kuo (Onyx) <rkuo@onyx.app>
2025-04-18 20:47:17 +00:00
pablonyx
356336a842 add o3 + o4 mini (#4555) 2025-04-18 20:42:35 +00:00
rkuo-danswer
5bc059881e ping with keep alive (#4550)
Co-authored-by: Richard Kuo (Onyx) <rkuo@onyx.app>
2025-04-18 18:44:07 +00:00
rkuo-danswer
fa80842afe Bugfix/harden activity timeout (#4545)
* add some hardening

* add info memory logging

* fix last_observed

* remove log spam

* properly cache last activity details

* default values

---------

Co-authored-by: Richard Kuo (Onyx) <rkuo@onyx.app>
2025-04-18 02:28:22 +00:00
rkuo-danswer
a8a5a82251 slightly better slack logging (#4554)
Co-authored-by: Richard Kuo (Onyx) <rkuo@onyx.app>
2025-04-17 18:45:48 -07:00
evan-danswer
953a4e3793 v1 file connector with metadata (#4552) 2025-04-17 23:02:34 +00:00
rkuo-danswer
04ebde7838 refactor a mega function for readability and make sure to increment r… (#4542)
* refactor a mega function for readability and make sure to increment retry_count on exception so that we don't infinitely loop

* improve session and page level context handling

* don't use pydantic for the session context

* we don't need retry success

* move playwright handling into the session context

* need to break on ok

* return doc from scrape

* fix comment

---------

Co-authored-by: Richard Kuo (Onyx) <rkuo@onyx.app>
2025-04-17 06:43:30 +00:00
Chris Weaver
6df1c6c72f Pull in more fields for Jira (#4547)
* Pull in more fields for Jira

* Fix tests

* Fix

* more fix

* Fix

* Fix S3 test

* fix
2025-04-17 01:52:50 +00:00
Raunak Bhagat
fe94bdf936 fix: Fix duplicate kwarg issue when calling litellm.main.completion (#4533)
* Fix duplicate kwarg issue

* Change how vertex_credentials are passed

* Modify temporary dict instead

* Change string to a global constant

* Add extra condition to if-check during population of map
2025-04-16 19:29:53 -07:00
rkuo-danswer
2a9fd9342e small improvement to checking for image attachments (#4543)
* small improvement to checking for image attachments

* better comments

* check centralized list of types instead of hardcoding them in the connector

---------

Co-authored-by: Richard Kuo (Onyx) <rkuo@onyx.app>
2025-04-17 00:34:22 +00:00
pablonyx
597ad806e3 Skip image files for S3 (#4535)
* skip image files

* process images s3

* tests

* k

* update

* nit

* update
2025-04-16 23:41:00 +00:00
evan-danswer
5acae2dc80 fix re-processing of previously seen docs Confluence (#4544)
* fix re-processing of previously seen docs

* performance
2025-04-16 23:16:21 +00:00
pablonyx
99455db26c add 4.1 (#4540) 2025-04-16 15:34:01 -07:00
pablonyx
0d12e96362 Fix bug with saml validation (#4522)
* fix bug with saml validation

* k
2025-04-16 19:35:58 +00:00
Chris Weaver
7e7b6e08ff Fix confluence perm sync ancestry (#4536)
* Fix confluence perm sync ancestry

* Address EL comments

* add test for special case

* remove print

* Fix test
2025-04-16 03:02:54 +00:00
Raunak Bhagat
1dd32ebfce Remove alert upon submission (#4537) 2025-04-15 19:12:12 -07:00
Weves
c3ffaa19a4 Small no-letsencrypt improvement 2025-04-15 18:29:07 -07:00
pablonyx
f4ea7e62a7 Miscellaneous cleanup (#4516)
* stricter typing

* k
2025-04-15 23:35:13 +00:00
rkuo-danswer
2ac41c3719 Feature/celery beat watchdog (#4534)
* upgrade celery to release version

* make the watchdog script more reusable

* use constant

* code review

* catch interrupt

---------

Co-authored-by: Richard Kuo (Onyx) <rkuo@onyx.app>
2025-04-15 22:05:37 +00:00
evan-danswer
a8cba7abae extra logging for uncommon permissions cases (#4532)
* extra logging for uncommon permissions cases

* address CW comments
2025-04-15 18:56:17 +00:00
evan-danswer
ae9f8c3071 checkpointed confluence (#4473)
* checkpointed confluence

* confluence checkpointing tested

* fixed integration tests

* attempt to fix connector test flakiness

* fix rebase
2025-04-14 23:59:53 +00:00
evan-danswer
742041d97a fix font for dark mode (#4527) 2025-04-14 22:43:03 +00:00
pablonyx
187b93275d k (#4525) 2025-04-14 22:29:47 +00:00
Weves
ca2aeac2cc Fix black 2025-04-14 15:53:09 -07:00
ThomaciousD
f7543c6285 Fix #3764: Dynamically handle default branch in GitLab connector 2025-04-14 15:52:10 -07:00
pablonyx
1430a18d44 cohere validation logic update (#4523) 2025-04-14 21:49:22 +00:00
rkuo-danswer
7c4487585d rollback properly on exception (#4073)
* rollback properly on exception

* rollback on exception

* don't continue if we can't set the search path

* cleaner handling via context manager

---------

Co-authored-by: Richard Kuo (Danswer) <rkuo@onyx.app>
2025-04-14 21:48:35 +00:00
pablonyx
e572ce95e7 Shore up multi tenant tests (#4484)
* update

* fix

* finalize`

* remove unnecessary prints

* fix

* k
2025-04-14 18:34:57 +00:00
evan-danswer
68c6c1f4f8 refactor to use stricter typing (#4513)
* refactor to use stricter typing

* older version of ruff
2025-04-14 17:23:07 +00:00
Chris Weaver
a5edc8aa0f Fix default log level (#4501)
* Fix default log level

* fix
2025-04-14 16:40:11 +00:00
evan-danswer
a377f6ffb6 Unify document deduping (#4520)
* minor cleanup

* cleanup doc deduping and add unit tests
2025-04-14 16:33:00 +00:00
Weves
72ce2f75cc Add env var to docker compose file 2025-04-13 23:14:06 -07:00
joachim-danswer
2683207a24 Expanded basic search (#4517)
* initial working version

* ranking profile

* modification for keyword/instruction retrieval

* mypy fixes

* EL comments

* added env var (True for now)

* flipped default to False

* mypy & final EL/CW comments + import issue
2025-04-13 23:13:01 -07:00
Chris Weaver
e3aab8e85e Improve index attempt display (#4511) 2025-04-13 15:57:47 -07:00
pablonyx
65fd8b90a8 add image indexing tests (#4477)
* address file path

* k

* update

* update

* nit- fix typing

* k

* should path

* in a good state

* k

* k

* clean up file

* update

* update

* k

* k

* k
2025-04-11 22:16:37 +00:00
Chris Weaver
6eaa774051 Confluence timeout fix? (#4509) 2025-04-11 20:06:27 +00:00
evan-danswer
60da282dd1 ensure individual search tool runs do not affect each other (#4503)
* ensure individual search tool runs do not affect each other

* small bug fixes

* nit
2025-04-11 17:24:57 +00:00
rkuo-danswer
493e5386ec Bugfix/salesforce correctness (#4497)
* refactor salesforce sqlite db access

* more refactoring

* refactor again

* refactor again

* rename object

* add finalizer to ensure db connection is always closed

* avoid unnecessarily nesting connections and commit regularly when possible

---------

Co-authored-by: Richard Kuo (Onyx) <rkuo@onyx.app>
2025-04-11 08:41:22 +00:00
rkuo-danswer
bc74bcae3a updating more packages (#4502)
* updating more packages

* mypy fixes

---------

Co-authored-by: Richard Kuo (Onyx) <rkuo@onyx.app>
2025-04-10 20:53:36 -07:00
evan-danswer
e51e4b33b6 fix max 10 drives issue (#4505) 2025-04-11 02:22:38 +00:00
rkuo-danswer
1d7d5e1809 fix scheduler init (#4504) 2025-04-10 18:21:47 -07:00
Patrick Weston
4a6998b7e3 If an assistant limits knowledge, don't let a user override it in the Sets filter 2025-04-10 11:56:00 -07:00
Weves
6d48b9b4fd fix drive permission sync 2025-04-10 10:41:40 -07:00
Weves
86680cd45b Fix google drive group sync 2025-04-10 10:41:40 -07:00
rkuo-danswer
77e60b9812 remove try update in init ... we really don't need the init to access the db or do any work. (#4498)
Co-authored-by: Richard Kuo (Onyx) <rkuo@onyx.app>
2025-04-10 10:15:28 -07:00
rkuo-danswer
24184024bb Bugfix/dependency updates (#4482)
* bump fastapi and starlette

* bumping llama index and nltk and associated deps

* bump to fix python-multipart

* bump aiohttp

* update package lock for examples/widget

* bump black

* sentencesplitter has changed namespaces

* fix reorder import check, fix missing passlib

* update package-lock.json

* black formatter updated

* reformatted again

* change to black compatible reorder

* change to black compatible reorder-python-imports fork

* fix pytest dependency

* black format again

* we don't need cdk.txt. update packages to be consistent across all packages

---------

Co-authored-by: Richard Kuo (Onyx) <rkuo@onyx.app>
Co-authored-by: Richard Kuo <rkuo@rkuo.com>
2025-04-10 08:23:02 +00:00
evan-danswer
e79134eaa0 don't yield expected auth errors (#4494)
* don't yield expected auth errors

* only catch 403s
2025-04-10 01:53:02 +00:00
evan-danswer
b5be1fb948 important clarity comment (#4492) 2025-04-10 01:28:34 +00:00
evan-danswer
1718b8f677 fix claude bug (#4493)
* fix claude bug

* fixed tests
2025-04-10 00:59:18 +00:00
rkuo-danswer
3fc8027e73 pass through various id's and log them in the model server for better… (#4485)
* pass through various id's and log them in the model server for better tracking

* fix test

---------

Co-authored-by: Richard Kuo (Onyx) <rkuo@onyx.app>
2025-04-10 00:40:57 +00:00
pablonyx
caa9b106e4 k (#4487) 2025-04-10 00:19:47 +00:00
Chris Weaver
89688f0cef Fix naming of volume (#4491) 2025-04-09 23:46:10 +00:00
Raunak Bhagat
eeab3f06ec fix: Remove advanced options toggle if enterprise features are not enabled (#4489)
* Only show advanced options for custom llm providers *if* the paid features are enabled

* Change variable name
2025-04-09 20:42:20 +00:00
rkuo-danswer
15c74224ad xfail bedrock test (#4490)
Co-authored-by: Richard Kuo (Onyx) <rkuo@onyx.app>
2025-04-09 14:44:02 -07:00
Raunak Bhagat
2da26c16a9 Edit .gitignore file to add zed editor configurations (#4483) 2025-04-08 23:10:42 +00:00
pablonyx
8db80a6bb1 Add latency metrics (#4472)
* k

* update

* Update chat_backend.py

nit

---------

Co-authored-by: evan-danswer <evan@danswer.ai>
2025-04-08 21:23:26 +00:00
rkuo-danswer
9b6c7625fd Bugfix/cloud checkpoint cleanup (#4478)
* use send_task to be consistent

* add pidbox monitoring task

* add logging so we can track the task execution

* log the idletime of the pidbox

---------

Co-authored-by: Richard Kuo (Onyx) <rkuo@onyx.app>
2025-04-08 19:47:07 +00:00
Chris Weaver
634d990cb8 Fix startup w/ seed_db (#4481) 2025-04-08 19:46:41 +00:00
pablonyx
5792261a4f Minor doc set fix (#4480)
* update

* update

* update

* k
2025-04-08 19:14:56 +00:00
Chris Weaver
71839e723f Add stuff to better avoid bot-detection in web connector (#4479)
* Add stuff to better avoid bot-detection in web connector

* Switch to exception log
2025-04-08 12:31:30 -07:00
evan-danswer
10f1ac5da1 use persona info when creating tool args (#4397)
* use persona info when creating tool args

* fixed unit test

* include system message

* fix unit test

* nit
2025-04-08 02:55:36 +00:00
Weves
1f80ed11d9 Fix black 2025-04-07 20:33:15 -07:00
Emerson Gomes
ba80191f5b Handle exception for token cost calculation (#4474)
The code for token cost calculation fails when using a LiteLLM proxy due to mismatch with the provider naming. For now, just handle this exception and assume cost 0 when that happens instead of breaking the flow - A more precise, LiteLLM proxy based cost calculation (relying in the `/model/info`) LiteLLM Proxy method will be needed
2025-04-07 20:30:50 -07:00
Raunak Bhagat
206daa6903 feat: Vertex AI support (#4458)
* Add gemini well-known-llm-provider

* Edit styling of anonymous function

* Remove space

* Edit how advanced options are displayed

* Add VertexAI to acceptable llm providers

* Add new `FileUploadFormField` component

* Edit FileUpload component

* Clean up logic for displaying native llm providers; add support for more complex `CustomConfigKey` types

* Fix minor nits in web app

* Add ability to pass vertex credentials to `litellm`

* Remove unused prop

* Change name of enum value

* Add back ability to change form based on first time configurations

* Create new Error with string instead of throwing raw string

* Add more Gemini models

* Edit mappings for Gemini models

* Edit comment

* Rearrange llm models

* Run black formatter

* Remove complex configurations during first time registration

* Fix nit

* Update llm provider name

* Edit temporary formik field to also have the filename

* Run reformatter

* Reorder commits

* Add advanced configurations for enabled LLM Providers
2025-04-08 00:56:47 +00:00
evan-danswer
17562f9b8f Id not set in checkpoint2 (#4468)
* unconditionally set completion

* drive connector improvements

* fixing broader typing issue

* fix tests, CW comments

* actual test fix
2025-04-07 17:00:42 -07:00
evan-danswer
9c73099241 Drive smart chip indexing (#4459)
* WIP

* WIP almost done, but realized we can just do basic retrieval

* rebased and added scripts

* improved approach to extracting smart chips

* remove files from previous branch

* fix connector tests

* fix test
2025-04-07 21:52:45 +00:00
Emerson Gomes
88d4a65e7b Fix hardcoded temperature 2025-04-07 13:56:11 -07:00
Weves
614d0f8d72 Add more options to dev compose file 2025-04-07 10:03:34 -07:00
SubashMohan
157da24504 update test expectations for Highspot connector (#4464) 2025-04-07 05:12:22 +00:00
Evan Lohn
989dab51b9 unconditionally set completion 2025-04-06 22:39:42 -07:00
Weves
6a13401172 Small tweaks to thinking 2025-04-06 15:59:00 -07:00
rkuo-danswer
bb73bb224a slack permission tests are enterprise only (#4463)
* slack permission tests are enterprise only

* xfail highspot connector

* test is broken

---------

Co-authored-by: Richard Kuo (Onyx) <rkuo@onyx.app>
2025-04-06 15:23:17 -07:00
Richard Kuo (Onyx)
4dc382b571 Revert "slack permission tests are enterprise only"
This reverts commit 056a83493f.
2025-04-06 14:24:42 -07:00
Richard Kuo (Onyx)
056a83493f slack permission tests are enterprise only 2025-04-06 14:24:25 -07:00
Chris Weaver
aadd4f212a Adjust pg engine intialization (#4408)
* Adjust pg engine intialization

* Fix mypy

* Rename var

* fix typo

* Fix tests
2025-04-06 12:44:49 -07:00
Ferdinand Loesch
8b05f98d54 Thinking mode UI. (#4370)
* Update web connector implementation and fix line length issues

* Update configurations and fix connector issues

* Update Slack connector

* Update connectors and add jira_test_env to gitignore, removing sensitive information

* Restore checkpointing functionality and remove sensitive information

* Fix agent mode to properly handle thinking tokens

* up

* Enhance ThinkingBox component with improved content handling and animations. Added support for partial thinking tokens, refined scrolling behavior, and updated CSS for better visual feedback during thinking states.

* Create clean branch with frontend thinking mode changes only

* Update ThinkingBox component to include new props for completion and streaming states. Refactor smooth scrolling logic into a dedicated function for improved readability. Add new entry to .gitignore for jira_test_env.

* Remove autoCollapse prop from AIMessage component for improved flexibility in message display.

* Update thinking tokens handling in chat utils

* Remove unused cleanThinkingContent import from Messages component to streamline code.

---------

Co-authored-by: ferdinand loesch <f.loesch@sportradar.com>
Co-authored-by: EC2 Default User <ec2-user@ip-10-73-128-233.eu-central-1.compute.internal>
Co-authored-by: Your Name <you@example.com>
Co-authored-by: Chris Weaver <25087905+Weves@users.noreply.github.com>
2025-04-05 17:31:02 -07:00
Weves
1c16c4ea3d Adjusting default search assistant 2025-04-05 16:00:47 -07:00
Weves
cf6ff3ce4a Fix run-nginx 2025-04-05 16:00:10 -07:00
Weves
86d9f5d9dd Update resource limits 2025-04-05 16:00:10 -07:00
pablonyx
09450010cd refresh token limit (#4456) 2025-04-05 01:27:57 +00:00
pablonyx
0acd50b75d docx bugfix 2025-04-04 18:20:31 -07:00
pablonyx
c3c9a0e57c Docx parsing (#4455)
* looks okay

* k

* k

* k

* update values

* k

* quick fix
2025-04-04 23:36:43 +00:00
pablonyx
ef978aea97 Additional ACL Tests + Slackbot fix (#4430)
* try turning drive perm sync on

* try passing in env var

* add some logs

* Update pr-integration-tests.yml

* revert "Update pr-integration-tests.yml"

This reverts commit 76a44adbfe.

* Revert "add some logs"

This reverts commit ab9e6bcfb1.

* Revert "try passing in env var"

This reverts commit 9c0b6162ea.

* Revert "try turning drive perm sync on"

This reverts commit 2d35f61f42.

* try slack connector

* k

* update

* remove logs

* remove more logs

* nit

* k

* k

* address nits

* run test with additional logs

* Revert "run test with additional logs"

This reverts commit 1397a2c4a0.

* Revert "address nits"

This reverts commit d5e24b019d.
2025-04-04 22:00:17 +00:00
rkuo-danswer
15ab0586df handle gong api race condition (#4457)
* working around a gong race condition in their api

* add back gong basic test

* formatting

* add the call index

---------

Co-authored-by: Richard Kuo (Onyx) <rkuo@onyx.app>
2025-04-04 19:33:47 +00:00
rkuo-danswer
839c8611b7 Bugfix/salesforce (#4335)
* add some gc

* small refactoring for temp directories

* WIP

* add some gc collects and size calculations

* un-xfail

* fix salesforce test

* loose check for number of docs

* adjust test again

* cleanup

* nuke directory param, remove using sqlite db to cache email / id mappings

---------

Co-authored-by: Richard Kuo (Onyx) <rkuo@onyx.app>
2025-04-04 16:21:34 +00:00
joachim-danswer
68f9f157a6 Adding research topics for better search context (#4448)
* research topics addition

* allow for question to overwrite research area
2025-04-04 09:53:39 -07:00
SubashMohan
9dd56a5c80 Enhance Highspot connector with error handling and add unit tests (#4454)
* Enhance Highspot connector with error handling and add unit tests for poll_source functionality

* Fix file extension validation logic to allow either plain text or document format
2025-04-04 09:53:16 -07:00
pablonyx
842a73a242 Mock connector fix (#4446) 2025-04-04 09:26:10 -07:00
Weves
c04c1ea31b Fix onyx_config.jsonl 2025-04-03 22:44:56 -07:00
Chris Weaver
2380c2266c Infra and Deployment for ECS Fargate (#4449)
* Infra and Deployment for ECS Fargate
---------

Co-authored-by: jpb80 <jordan.buttkevitz@gmail.com>
2025-04-03 22:43:56 -07:00
pablonyx
b02af9b280 Div Con (#4442)
* base setup

* Improvements + time boxing

* time box fix

* mypy fix

* EL Comments

* CW comments

* date awareness

---------

Co-authored-by: joachim-danswer <joachim@danswer.ai>
2025-04-04 00:52:00 +00:00
rkuo-danswer
42938dcf62 Bugfix/gong tweaks (#4444)
* gong debugging

* add retries via class level session, add debugging

* add gong connector test

---------

Co-authored-by: Richard Kuo (Onyx) <rkuo@onyx.app>
2025-04-03 22:22:45 +00:00
pablonyx
93886f0e2c Assistant Prompt length + client side (#4433) 2025-04-03 11:26:53 -07:00
rkuo-danswer
8c3a953b7a add prometheus metrics endpoints via helper package (#4436)
* add prometheus metrics endpoints via helper package

* model server specific requirements

* mark as public endpoint

---------

Co-authored-by: Richard Kuo (Onyx) <rkuo@onyx.app>
2025-04-03 16:52:05 +00:00
evan-danswer
54b883d0ca fix large docs selected in chat pruning (#4412)
* fix large docs selected in chat pruning

* better approach to length restriction

* comments

* comments

* fix unit tests and minor pruning bug

* remove prints
2025-04-03 15:48:10 +00:00
pablonyx
91faac5447 minor fix (#4435) 2025-04-03 15:00:27 +00:00
Chris Weaver
1d8f9fc39d Fix weird re-index state (#4439)
* Fix weird re-index state

* Address rkuo's comments
2025-04-03 02:16:34 +00:00
Weves
9390de21e5 More logging on confluence space permissions 2025-04-02 20:01:38 -07:00
rkuo-danswer
3a33433fc9 unit tests for chunk censoring (#4434)
* unit tests for chunk censoring

* type hints for mypy

* pytestification

---------

Co-authored-by: Richard Kuo (Onyx) <rkuo@onyx.app>
2025-04-03 01:28:54 +00:00
Chris Weaver
c4865d57b1 Fix tons of users w/o drive access causing timeouts (#4437) 2025-04-03 00:01:05 +00:00
rkuo-danswer
81d04db08f Feature/request id middleware 2 (#4427)
* stubbing out request id

* passthru or create request id's in api and model server

* add onyx request id

* get request id logging into uvicorn

* no logs

* change prefixes

* fix comment

* docker image needs specific shared files

---------

Co-authored-by: Richard Kuo (Onyx) <rkuo@onyx.app>
2025-04-02 22:30:03 +00:00
rkuo-danswer
d50a17db21 add filter unit tests (#4421)
* add filter unit tests

* fix tests

---------

Co-authored-by: Richard Kuo (Onyx) <rkuo@onyx.app>
2025-04-02 20:26:25 +00:00
pablonyx
dc5a1e8fd0 add more flexible vision support check (#4429) 2025-04-02 18:11:33 +00:00
pablonyx
c0b3681650 update (#4428) 2025-04-02 18:09:44 +00:00
Chris Weaver
7ec04484d4 Another fix for Salesforce perm sync (#4432)
* Another fix for Salesforce perm sync

* typing
2025-04-02 11:08:40 -07:00
Weves
1cf966ecc1 Fix Salesforce perm sync 2025-04-02 10:47:26 -07:00
rkuo-danswer
8a8526dbbb harden join function (#4424)
* harden join function

* remove log spam

* use time.monotonic

* add pid logging

* client only celery app

---------

Co-authored-by: Richard Kuo (Onyx) <rkuo@onyx.app>
2025-04-02 01:04:00 -07:00
Weves
be20586ba1 Add retries for confluence calls 2025-04-01 23:00:37 -07:00
Weves
a314462d1e Fix migrations 2025-04-01 21:48:32 -07:00
rkuo-danswer
155f53c3d7 Revert "Add user invitation test (#4161)" (#4422)
This reverts commit 806de92feb.

Co-authored-by: Richard Kuo (Onyx) <rkuo@onyx.app>
2025-04-01 19:55:04 -07:00
pablonyx
7c027df186 Fix cc pair doc deletion (#4420) 2025-04-01 18:44:15 -07:00
pablonyx
0a5db96026 update (#4415) 2025-04-02 00:42:42 +00:00
joachim-danswer
daef985b02 Simpler approach (#4414) 2025-04-01 16:52:59 -07:00
Weves
b7ece296e0 Additional logging to salesforce perm sync 2025-04-01 16:19:50 -07:00
Richard Kuo (Onyx)
d7063e0a1d expose acl link feature in onyx_vespa 2025-04-01 16:19:50 -07:00
pablonyx
ee073f6d30 Tracking things (#4352) 2025-04-01 16:19:50 -07:00
Raunak Bhagat
2e524816a0 Regen (#4409)
* Edit styling of regeneration dropdown

* Finish regeneration style changes

* Remove invalid props

* Update web/src/app/chat/input/ChatInputBar.tsx

Co-authored-by: greptile-apps[bot] <165735046+greptile-apps[bot]@users.noreply.github.com>

* Remove unused variables

---------

Co-authored-by: greptile-apps[bot] <165735046+greptile-apps[bot]@users.noreply.github.com>
2025-04-01 16:19:50 -07:00
pablonyx
47ef0c8658 Still delete cookies (#4404) 2025-04-01 16:19:50 -07:00
pablonyx
806de92feb Add user invitation test (#4161) 2025-04-01 16:19:50 -07:00
pablonyx
da39f32fea Validate advanced fields + proper yup assurances for lists (#4399) 2025-04-01 16:19:50 -07:00
pablonyx
2a87837ce1 Very minor auth standardization (#4400) 2025-04-01 16:19:50 -07:00
pablonyx
7491cdd0f0 Update migration (#4410) 2025-04-01 16:19:50 -07:00
SubashMohan
aabd698295 refactor tests for Highspot connector to use mocking for API key retrieval (#4346) 2025-04-01 16:19:50 -07:00
Weves
4b725e4d1a Init engine in slackbot 2025-04-01 16:19:50 -07:00
rkuo-danswer
34d2d92fa8 also set permission upsert to medium priority (#4405)
Co-authored-by: Richard Kuo (Onyx) <rkuo@onyx.app>
2025-04-01 16:19:50 -07:00
pablonyx
3a3b2a2f8d add user files (#4152) 2025-04-01 16:19:44 -07:00
rkuo-danswer
ccd372cc4a Bugfix/slack rate limiting (#4386)
* use slack's built in rate limit handler for the bot

* WIP

* fix the slack rate limit handler

* change default to 8

* cleanup

* try catch int conversion just in case

* linearize this logic better

* code review comments

---------

Co-authored-by: Richard Kuo (Onyx) <rkuo@onyx.app>
2025-03-31 21:00:26 +00:00
evan-danswer
ea30f1de1e minor improvement to fireflies connector (#4383)
* minor improvement to fireflies connector

* reduce time diff
2025-03-31 20:00:52 +00:00
evan-danswer
a7130681d9 ensure bedrock model contains API key (#4396)
* ensure bedrock model contains API key

* fix storing bug
2025-03-31 19:58:53 +00:00
pablonyx
04911db715 fix slashes (#4259) 2025-03-31 18:08:17 +00:00
rkuo-danswer
feae7d0cc4 disambiguate job name from ee version (#4403)
Co-authored-by: Richard Kuo (Onyx) <rkuo@onyx.app>
2025-03-31 11:48:28 -07:00
pablonyx
ac19c64b3c temporary fix for auth (#4402) 2025-03-31 11:10:41 -07:00
pablonyx
03d5c30fd2 fix (#4372) 2025-03-31 17:25:21 +00:00
joachim-danswer
e988c13e1d Additional logging for the path from Search Results to LLM Context (#4387)
* added logging

* nit

* nit
2025-03-31 00:38:43 +00:00
pablonyx
dc18d53133 Improve multi tenant anonymous user interaction (#3857)
* cleaner handling

* k

* k

* address nits

* fix typing
2025-03-31 00:33:32 +00:00
evan-danswer
a1cef389aa fallback to ignoring unicode chars when huggingface tokenizer fails (#4394) 2025-03-30 23:45:20 +00:00
pablonyx
db8d6ce538 formatting (#4316) 2025-03-30 23:43:17 +00:00
pablonyx
e8370dcb24 Update refresh conditional (#4375)
* update refresh conditional

* k
2025-03-30 17:28:35 -07:00
pablonyx
9951fe13ba Fix image input processing without LLMs (#4390)
* quick fix

* quick fix

* Revert "quick fix"

This reverts commit 906b29bd9b.

* nit
2025-03-30 19:28:49 +00:00
evan-danswer
56f8ab927b Contextual Retrieval (#4029)
* contextual rag implementation

* WIP

* indexing test fix

* workaround for chunking errors, WIP on fixing massive memory cost

* mypy and test fixes

* reformatting

* fixed rebase
2025-03-30 18:49:09 +00:00
rkuo-danswer
cb5bbd3812 Feature/mit integration tests (#4299)
* new mit integration test template

* edit

* fix problem with ACL type tags and MIT testing for test_connector_deletion

* fix test_connector_deletion_for_overlapping_connectors

* disable some enterprise only tests in MIT version

* disable a bunch of user group / curator tests in MIT version

* wire off more tests

* typo fix

---------

Co-authored-by: Richard Kuo (Onyx) <rkuo@onyx.app>
Co-authored-by: Richard Kuo <rkuo@rkuo.com>
2025-03-30 02:41:08 +00:00
Yuhong Sun
742d29e504 Remove BETA 2025-03-29 15:38:46 -07:00
SubashMohan
ecc155d082 fix: ensure base_url ends with a trailing slash (#4388) 2025-03-29 14:34:30 -07:00
pablonyx
0857e4809d fix background color 2025-03-28 16:33:30 -07:00
Chris Weaver
22e00a1f5c Fix duplicate docs (#4378)
* Initial

* Fix duplicate docs

* Add tests

* Switch to list comprehension

* Fix test
2025-03-28 22:25:26 +00:00
Chris Weaver
0d0588a0c1 Remove OnyxContext (#4376)
* Remove OnyxContext

* Fix UT

* Fix tests v2
2025-03-28 12:39:51 -07:00
rkuo-danswer
aab777f844 Bugfix/acl prefix (#4377)
* fix acl prefixing

* increase timeout a tad

* block access to init'ing DocumentAccess directly, fix test to work with ee/MIT

* fix env var checks

---------

Co-authored-by: Richard Kuo (Onyx) <rkuo@onyx.app>
2025-03-28 05:52:35 +00:00
pablonyx
babbe7689a k (#4380) 2025-03-28 02:23:45 +00:00
evan-danswer
a123661c92 fixed shared folder issue (#4371)
* fixed shared folder issue

* fix existing tests

* default allow files shared with me for service account
2025-03-27 23:39:52 +00:00
pablonyx
c554889baf Fix actions link (#4374) 2025-03-27 16:39:35 -07:00
rkuo-danswer
f08fa878a6 refactor file extension checking and add test for blob s3 (#4369)
* refactor file extension checking and add test for blob s3

* code review

* fix checking ext

---------

Co-authored-by: Richard Kuo (Onyx) <rkuo@onyx.app>
2025-03-27 18:57:44 +00:00
pablonyx
d307534781 add some debug logging (#4328) 2025-03-27 11:49:32 -07:00
rkuo-danswer
6f54791910 adjust some vars in real time (#4365)
* adjust some vars in real time

* some sanity checking

---------

Co-authored-by: Richard Kuo (Onyx) <rkuo@onyx.app>
2025-03-27 17:30:08 +00:00
pablonyx
0d5497bb6b Add multi-tenant user invitation flow test (#4360) 2025-03-27 09:53:15 -07:00
Chris Weaver
7648627503 Save all logs + add log persistence to most Onyx-owned containers (#4368)
* Save all logs + add log persistence to most Onyx-owned containers

* Separate volumes for each container

* Small fixes
2025-03-26 22:25:39 -07:00
pablonyx
927554d5ca slight robustification (#4367) 2025-03-27 03:23:36 +00:00
pablonyx
7dcec6caf5 Fix session touching (#4363)
* fix session touching

* Revert "fix session touching"

This reverts commit c473d5c9a2.

* Revert "Revert "fix session touching""

This reverts commit 26a71d40b6.

* update

* quick nit
2025-03-27 01:18:46 +00:00
rkuo-danswer
036648146d possible fix for confluence query filter (#4280)
* possible fix for confluence query filter

* nuke the attachment filter query ... it doesn't work!

---------

Co-authored-by: Richard Kuo (Onyx) <rkuo@onyx.app>
2025-03-27 00:35:14 +00:00
rkuo-danswer
2aa4697ac8 permission sync runs so often that it starves out other tasks if run at high priority (#4364)
Co-authored-by: Richard Kuo (Onyx) <rkuo@onyx.app>
2025-03-27 00:22:53 +00:00
rkuo-danswer
bc9b4e4f45 use slack's built in rate limit handler for the bot (#4362)
Co-authored-by: Richard Kuo (Onyx) <rkuo@onyx.app>
2025-03-26 21:55:04 +00:00
evan-danswer
178a64f298 fix issue with drive connector service account indexing (#4356)
* fix issue with drive connector service account indexing

* correct checkpoint resumption

* final set of fixes

* nit

* fix typing

* logging and CW comments

* nit
2025-03-26 20:54:26 +00:00
pablonyx
c79f1edf1d add a flush (#4361) 2025-03-26 14:40:52 -07:00
pablonyx
7c8e23aa54 Fix saml conversion from ext_perm -> basic (#4343)
* fix saml conversion from ext_perm -> basic

* quick nit

* minor fix

* finalize

* update

* quick fix
2025-03-26 20:36:51 +00:00
pablonyx
d37b427d52 fix email flow (#4339) 2025-03-26 18:59:12 +00:00
pablonyx
a65fefd226 test fix 2025-03-26 12:43:38 -07:00
rkuo-danswer
bb09bde519 Bugfix/google drive size threshold 2 (#4355) 2025-03-26 12:06:36 -07:00
Tim Rosenblatt
0f6cf0fc58 Fixes docker logs helper text in run-nginx.sh (#3678)
The docker container name is slightly wrong, and this commit fixes it.
2025-03-26 09:03:35 -07:00
pablonyx
fed06b592d Auto refresh credentials (#4268)
* Auto refresh credentials

* remove dupes

* clean up + tests

* k

* quick nit

* add brief comment

* misc typing
2025-03-26 01:53:31 +00:00
pablonyx
8d92a1524e fix invitation on cloud (#4351)
* fix invitation on cloud

* k
2025-03-26 01:25:17 +00:00
pablonyx
ecfea9f5ed Email formatting devices (#4353)
* update email formatting

* k

* update

* k

* nit
2025-03-25 21:42:32 +00:00
rkuo-danswer
b269f1ba06 fix broken function call (#4354)
Co-authored-by: Richard Kuo (Onyx) <rkuo@onyx.app>
2025-03-25 21:07:31 +00:00
pablonyx
30c878efa5 Quick fix (#4341)
* quick fix

* Revert "quick fix"

This reverts commit f113616276.

* smaller chnage
2025-03-25 18:39:55 +00:00
pablonyx
2024776c19 Respect contextvars when parallelizing for Google Drive (#4291)
* k

* k

* fix typing
2025-03-25 17:40:12 +00:00
pablonyx
431316929c k (#4336) 2025-03-25 17:00:35 +00:00
pablonyx
c5b9c6e308 update (#4344) 2025-03-25 16:56:23 +00:00
pablonyx
73dd188b3f update (#4338) 2025-03-25 16:55:25 +00:00
evan-danswer
79b061abbc Daylight savings time handling (#4345)
* confluence timezone improvements

* confluence timezone improvements
2025-03-25 16:11:30 +00:00
rkuo-danswer
552f1ead4f use correct namespace in redis for certain keys (#4340)
Co-authored-by: Richard Kuo (Onyx) <rkuo@onyx.app>
2025-03-25 04:10:31 +00:00
evan-danswer
17925b49e8 typing fix (#4342)
* typing fix

* changed type hint to help future coders
2025-03-25 01:01:13 +00:00
rkuo-danswer
55fb5c3ca5 add size threshold for google drive (#4329)
* add size threshold for google drive

* greptile nits

---------

Co-authored-by: Richard Kuo (Onyx) <rkuo@onyx.app>
2025-03-24 04:09:28 +00:00
evan-danswer
99546e4a4d zendesk checkpointed connector (#4311)
* zendesk v1

* logic fix

* zendesk testing

* add unit tests

* zendesk caching

* CW comments

* fix unit tests
2025-03-23 20:43:13 +00:00
pablonyx
c25d56f4a5 Improved drive flow UX (#4331)
* wip

* k

* looking good

* clenaed up

* quick nit
2025-03-23 19:21:03 +00:00
Chris Weaver
35f3f4f120 Small slack bot fixes (#4333) 2025-03-22 23:22:17 +00:00
Weves
25b69a8aca Adjust spammy log 2025-03-22 14:52:09 -07:00
pablonyx
1b7d710b2a Fix links from file metadata (#4324)
* quick fix

* clarify comment

* fix file metadata

* k
2025-03-22 18:21:47 +00:00
pablonyx
ae3d3db3f4 Update slack bot listing endpoint (#4325)
* update slack bot listing endpoint

* nit
2025-03-22 18:21:31 +00:00
evan-danswer
fb79a9e700 Checkpointed GitHub connector (#4307)
* WIP github checkpointing

* first draft of github checkpointing

* nit

* CW comments

* github basic connector test

* connector test env var

* secrets cant start with GITHUB_

* unit tests and bug fix

* connector failures

* address CW comments

* validation fix

* validation fix

* remove prints

* fixed tests

* 100 items per page
2025-03-22 01:48:05 +00:00
rkuo-danswer
587ba11bbc alembic script logging fixes (#4322)
* log fixing

* fix typos

---------

Co-authored-by: Richard Kuo (Onyx) <rkuo@onyx.app>
2025-03-22 00:50:58 +00:00
pablonyx
fce81ebb60 Minor ux nits (#4327)
* k

* quick fix
2025-03-21 21:50:56 +00:00
Chris Weaver
61facfb0a8 Fix slack connector (#4326) 2025-03-21 21:30:03 +00:00
Chris Weaver
52b96854a2 Handle move errors (#4317)
* Handle move errors

* Make a warning
2025-03-21 11:11:12 -07:00
Chris Weaver
d123713c00 Fix GPU status request in sync flow (#4318)
* Fix GPU status request in sync flow

* tweak

* Fix test

* Fix more tests
2025-03-21 11:11:00 -07:00
Chris Weaver
775c847f82 Reduce drive retries (#4312)
* Reduce drive retries

* timestamp format fix

---------

Co-authored-by: Evan Lohn <evan@danswer.ai>
2025-03-21 00:23:55 +00:00
rkuo-danswer
6d330131fd wire off image downloading for confluence and gdrive if not enabled i… (#4305)
* wire off image downloading for confluence and gdrive if not enabled in settings

* fix partial func

* fix confluence basic test

* add test for skipping/allowing images

* review comments

* skip allow images test

* mock function using the db

* mock at the proper level

---------

Co-authored-by: Richard Kuo (Onyx) <rkuo@onyx.app>
2025-03-20 23:10:28 +00:00
Chris Weaver
0292ca2445 Add option to control # of slack threads (#4310) 2025-03-20 16:56:05 +00:00
Weves
15dd1e72ca Remove slack channel validation 2025-03-20 08:34:54 -07:00
Weves
91c9be37c0 Fix loader 2025-03-20 08:30:46 -07:00
Weves
2a01c854a0 Fix cases where the bot is disabled 2025-03-20 08:30:46 -07:00
rkuo-danswer
85ebadc8eb sanitize llm keys and handle updates properly (#4270)
* sanitize llm keys and handle updates properly

* fix llm provider testing

* fix test

* mypy

* fix default model editing

---------

Co-authored-by: Richard Kuo (Danswer) <rkuo@onyx.app>
Co-authored-by: Richard Kuo <rkuo@rkuo.com>
2025-03-20 01:13:02 +00:00
Chris Weaver
5dda53eec3 Notion improvement (#4306)
* Notion connector improvements

* Enable recursive index by default

* Small tweak
2025-03-19 23:16:05 +00:00
Chris Weaver
72bf427cc2 Address invalid connector state (#4304)
* Address invalid connector state

* Fixes

* Address mypy

* Address RK comment
2025-03-19 21:15:06 +00:00
Chris Weaver
f421c6010b Checkpointed Jira connector (#4286)
* Checkpointed Jira connector

* nit

Co-authored-by: greptile-apps[bot] <165735046+greptile-apps[bot]@users.noreply.github.com>

* typing improvements and test fixes

* cleaner typing

* remove default because it is from the future

* mypy

* Address EL comments

---------

Co-authored-by: evan-danswer <evan@danswer.ai>
Co-authored-by: greptile-apps[bot] <165735046+greptile-apps[bot]@users.noreply.github.com>
2025-03-19 20:41:01 +00:00
rkuo-danswer
0b87549f35 Feature/email whitelabeling (#4260)
* work in progress

* work in progress

* WIP

* refactor, use inline attachment for image (base64 encoding doesn't work)

* pretty sure this belongs behind a multi_tenant check

* code review / refactor

---------

Co-authored-by: Richard Kuo (Danswer) <rkuo@onyx.app>
2025-03-19 13:08:44 -07:00
evan-danswer
06624a988d Gdrive checkpointed connector (#4262)
* WIP rebased

* style

* WIP, testing theory

* fix type issue

* fixed filtering bug

* fix silliness

* correct serialization and validation of threadsafedict

* concurrent drive access

* nits

* nit

* oauth bug fix

* testing fix

* fix slim retrieval

* fix integration tests

* fix testing change

* CW comments

* nit

* guarantee completion stage existence

* fix default values
2025-03-19 18:49:35 +00:00
Chris Weaver
ae774105e3 Fix slack connector creation (#4303)
* Make it fail fast + succeed validation if rate limiting is happening

* Add logging + reduce spam
2025-03-19 18:26:49 +00:00
evan-danswer
4dafc3aa6d Update README.md 2025-03-18 21:14:05 -07:00
evan-danswer
5d7d471823 Update README.md
fix bullet points
2025-03-18 19:34:08 -07:00
Weves
61366df34c Add execute permission 2025-03-18 12:03:32 -07:00
Chris Weaver
1a444245f6 Memory tracking script (#4297)
* Add simple container-level memory tracking script
2025-03-18 12:00:09 -07:00
rkuo-danswer
c32d234491 xfail highspot connector tests (#4296)
Co-authored-by: Richard Kuo (Onyx) <rkuo@onyx.app>
2025-03-18 11:47:17 -07:00
pablonyx
07b68436cf use ONYX_CLOUD_CELERY_TASK_PREFIX for pre provisioning (#4293) 2025-03-18 17:34:22 +00:00
Chris Weaver
293d1a4476 Add process-level memory monitoring (#4294)
* Add process-level memory monitoring

* Switch to every 5 minutes
2025-03-17 22:39:52 -07:00
SubashMohan
ba514aaaa2 Highspot connector (#4277) 2025-03-17 08:36:02 -07:00
Arun Philip
f45798b5dd add overflow-auto to show all content in Modal (#4140) 2025-03-15 11:56:19 -07:00
Weves
64ff5df083 Fix basic auth for non-ee 2025-03-14 11:40:17 -07:00
rkuo-danswer
cf1b7e7a93 add proper boolean validation to field (#4283)
Co-authored-by: Richard Kuo (Onyx) <rkuo@onyx.app>
2025-03-14 03:38:25 +00:00
Chris Weaver
63692a6bd3 Fix perm sync memory usage (#4282)
* Fix slack perm sync memory usage

* Make perm syncing run in batches rather than fetching everything

* Update backend/ee/onyx/external_permissions/slack/doc_sync.py

Co-authored-by: greptile-apps[bot] <165735046+greptile-apps[bot]@users.noreply.github.com>

* Update backend/ee/onyx/external_permissions/slack/doc_sync.py

Co-authored-by: greptile-apps[bot] <165735046+greptile-apps[bot]@users.noreply.github.com>

* Loud error on slack doc sync missing permissions

---------

Co-authored-by: greptile-apps[bot] <165735046+greptile-apps[bot]@users.noreply.github.com>
2025-03-14 02:26:22 +00:00
evan-danswer
934700b928 better drive url cleaning (#4247)
* better drive url cleaning

* nit

* address JR comments
2025-03-13 21:16:24 +00:00
Chris Weaver
b1a7cff9e0 Enable claude 3.7 (#4279) 2025-03-13 18:33:06 +00:00
joachim-danswer
463340b8a1 Reduce ranking scores for short chunks without actual information (#4098)
* remove title for slack

* initial working code

* simplification

* improvements

* name change to information_content_model

* avoid boost_score > 1.0

* nit

* EL comments and improvements

Improvements:
  - proper import of information content model from cache or HF
  - warm up for information content model

Other:
  - EL PR review comments

* nit

* requirements version update

* fixed docker file

* new home for model_server configs

* default off

* small updates

* YS comments - pt 1

* renaming to chunk_boost & chunk table def

* saving and deleting chunk stats in new table

* saving and updating chunk stats

* improved dict score update

* create columns for individual boost factors

* RK comments

* Update migration

* manual import reordering
2025-03-13 17:35:45 +00:00
rkuo-danswer
ba82888e1e change max workers to 2 for the moment (#4278)
Co-authored-by: Richard Kuo (Onyx) <rkuo@onyx.app>
2025-03-13 09:58:24 -07:00
rkuo-danswer
39465d3104 change default build info in dockerfile's to something more obviously source only (#4275)
Co-authored-by: Richard Kuo <rkuo@rkuo.com>
2025-03-13 09:42:10 -07:00
rkuo-danswer
b4ecc870b9 safe handling for mediaType in confluence connector in all places (#4269)
Co-authored-by: Richard Kuo (Danswer) <rkuo@onyx.app>
2025-03-13 06:09:19 +00:00
rkuo-danswer
a2ac9f02fb unique constraint here doesn't work (#4271)
Co-authored-by: Richard Kuo (Danswer) <rkuo@onyx.app>
2025-03-12 16:25:27 -07:00
pablonyx
f87e559cc4 Separate out indexing-time image analysis into new phase (#4228)
* Separate out indexing-time image analysis into new phase

* looking good

* k

* k
2025-03-12 22:26:05 +00:00
pablonyx
5883336d5e Support image indexing customization (#4261)
* working well

* k

* ready to go

* k

* minor nits

* k

* quick fix

* k

* k
2025-03-12 20:03:45 +00:00
pablonyx
0153ff6b51 Improved logout flow (#4258)
* improved app provider modals

* improved logout flow

* k

* updates

* add docstring
2025-03-12 19:19:39 +00:00
pablonyx
2f8f0f01be Tenants on standby (#4218)
* add tenants on standby feature

* k

* fix alembic

* k

* k
2025-03-12 18:25:30 +00:00
pablonyx
a9e5ae2f11 Fix slash mystery (#4263) 2025-03-12 10:03:21 -07:00
Chris Weaver
997f40500d Add support for sandboxed salesforce (#4252) 2025-03-12 00:21:24 +00:00
rkuo-danswer
a918a84e7b fix oauth downloading and size limits in confluence (#4249)
* fix oauth downloading and size limits in confluence

* bump black to get past corrupt hash

* try working around another corrupt package

* fix raw_bytes

---------

Co-authored-by: Richard Kuo (Danswer) <rkuo@onyx.app>
Co-authored-by: Richard Kuo <rkuo@rkuo.com>
2025-03-11 23:57:47 +00:00
rkuo-danswer
090f3fe817 handle conflicts on lowercasing emails (#4255)
Co-authored-by: Richard Kuo (Danswer) <rkuo@onyx.app>
2025-03-11 21:25:50 +00:00
pablonyx
4e70f99214 Fix slack links (#4254)
* fix slack links

* updates

* k

* nit improvements
2025-03-11 19:58:15 +00:00
pablonyx
ecbd4eb1ad add basic user invite flow (#4253) 2025-03-11 19:02:51 +00:00
pablonyx
f94d335d12 Do not show modals to non-multitenant users (#4256) 2025-03-11 11:53:13 -07:00
pablonyx
59a388ce0a fix tests 2025-03-11 11:12:35 -07:00
rkuo-danswer
9cd3cbb978 fix versions (#4250)
Co-authored-by: Richard Kuo <rkuo@rkuo.com>
2025-03-10 23:50:07 -07:00
pablonyx
ab1b6b487e descrease model server logspam (#4166) 2025-03-10 18:29:27 +00:00
Chris Weaver
6ead9510a4 Small notion tweaks (#4244)
* Small notion tweaks

* Add comment
2025-03-10 15:51:12 +00:00
Chris Weaver
965f9e98bf Eliminate extremely long log line for large checkpointds (#4236)
* Eliminate extremely long log line for large checkpointds

* address greptile
2025-03-10 15:50:50 +00:00
rkuo-danswer
426883bbf5 Feature/agentic buffered (#4231)
* rename agent test script to prevent pytest autodiscovery

* first cut

* fix log message

* fix up typing

* add a sample test

---------

Co-authored-by: Richard Kuo (Danswer) <rkuo@onyx.app>
2025-03-10 15:48:42 +00:00
rkuo-danswer
6ca400ced9 Bugfix/delete document tags slow (#4232)
* Add Missing Date and Message-ID Headers to Ensure Email Delivery

* fix issue Performance issue during connector deletion #4191

* fix ruff

* bump to rebuild PR

---------

Co-authored-by: ThomaciousD <2194608+ThomaciousD@users.noreply.github.com>
Co-authored-by: Richard Kuo (Danswer) <rkuo@onyx.app>
2025-03-10 03:07:30 +00:00
Weves
104c4b9f4d small modal improvement 2025-03-09 20:54:53 -07:00
pablonyx
8b5e8bd5b9 k (#4240) 2025-03-10 03:06:13 +00:00
Weves
7f7621d7c0 SMall gitbook tweaks 2025-03-09 14:46:44 -07:00
pablonyx
06dcc28d05 Improved login experience (#4178)
* functional initial auth modal

* k

* k

* k

* looking good

* k

* k

* k

* k

* update

* k

* k

* misc bunch

* improvements

* k

* address comments

* k

* nit

* update

* k
2025-03-09 01:06:20 +00:00
pablonyx
18df63dfd9 Fix local background jobs (#4241) 2025-03-08 14:47:56 -08:00
Chris Weaver
0d3c72acbf Add basic memory logging (#4234)
* Add basic memory logging

* Small tweaks

* Switch to monotonic
2025-03-08 03:49:47 +00:00
rkuo-danswer
9217243e3e Bugfix/query history notes (#4204)
* early work in progress

* rename utility script

* move actual data seeding to a shareable function

* add test

* make the test pass with the fix

* fix comment

* slight improvements and notes to query history and seeding

* update test

---------

Co-authored-by: Richard Kuo (Danswer) <rkuo@onyx.app>
2025-03-07 19:52:30 +00:00
rkuo-danswer
61ccba82a9 light worker needs to discover some indexing tasks (#4209)
* light worker needs to discover some indexing tasks

* fix formatting

---------

Co-authored-by: Richard Kuo (Danswer) <rkuo@onyx.app>
2025-03-07 11:52:09 -08:00
Weves
9e8eba23c3 Fix frozen model issue 2025-03-07 09:05:43 -08:00
evan-danswer
0c29743538 use max_tokens to do better rate limit handling (#4224)
* use max_tokens to do better rate limit handling

* fix unti tests

* address greptile comment, thanks greptile
2025-03-06 18:12:05 -08:00
pablonyx
08b2421947 fix 2025-03-06 17:30:31 -08:00
pablonyx
ed518563db minor typing update 2025-03-06 17:02:39 -08:00
pablonyx
a32f7dc936 Fix Connector tests (confluence) (#4221) 2025-03-06 17:00:01 -08:00
rkuo-danswer
798e10c52f revert to always building model server (#4213)
* revert to always building model server

* fix just in case

---------

Co-authored-by: Richard Kuo (Danswer) <rkuo@onyx.app>
2025-03-06 23:49:45 +00:00
pablonyx
bf4983e35a Ensure consistent UX (#4222)
* ux consistent

* nit

* Update web/src/app/admin/configuration/llm/interfaces.ts

Co-authored-by: greptile-apps[bot] <165735046+greptile-apps[bot]@users.noreply.github.com>

---------

Co-authored-by: greptile-apps[bot] <165735046+greptile-apps[bot]@users.noreply.github.com>
2025-03-06 23:13:32 +00:00
evan-danswer
b7da91e3ae improved basic search latency (#4186)
* improved basic search latency

* address PR comments + minor cleanup
2025-03-06 22:22:59 +00:00
Weves
29382656fc Stop trying a million times for the user validity check 2025-03-06 15:35:49 -08:00
pablonyx
7d6db8d500 Comma separated list for Github repos (#4199) 2025-03-06 14:46:57 -08:00
Chris Weaver
a7a374dc81 Confluence fixes (#4220)
* Confluence fixes

* Small tweak

* Address greptile comments
2025-03-06 20:57:07 +00:00
rkuo-danswer
facc8cc2fa add scope needed for permission sync (#4198)
Co-authored-by: Richard Kuo (Danswer) <rkuo@onyx.app>
2025-03-06 20:03:38 +00:00
rkuo-danswer
2c0af0a0ca Feature/helm updates (#4201)
* add ingress for api and web

* helm setup docs

* add letsencrypt. close blocks

* use pathType ImplementationSpecific as Prefix is deprecated

* fix backend labels. configure nginx routes. update annotations

* fix linting

---------

Co-authored-by: Sajjad Anwar <sajjadkm@gmail.com>
Co-authored-by: Richard Kuo (Danswer) <rkuo@onyx.app>
2025-03-06 19:48:20 +00:00
pablonyx
bfbc1cd954 k (#4172) 2025-03-06 18:55:12 +00:00
pablonyx
626da583aa Fix gated tenants (#4177)
* fix

* mypy .
2025-03-06 18:07:15 +00:00
pablonyx
92faca139d Fix extra tenant mystery (#4197)
* fix extra tenant mystery

* nit
2025-03-06 18:06:49 +00:00
pablonyx
cec05c5ee9 Revert "k"
This reverts commit 687122911d.
2025-03-06 09:38:31 -08:00
Richard Kuo (Danswer)
eaf054ef06 oauth router went missing? 2025-03-05 15:50:23 -08:00
pablonyx
a7a1a24658 minor nit 2025-03-05 15:35:02 -08:00
pablonyx
687122911d k 2025-03-05 15:27:14 -08:00
pablonyx
40953bd4fe Workspace configs (#4202) 2025-03-05 12:28:44 -08:00
rkuo-danswer
a7acc07e79 fix usage report pagination (#4183)
* early work in progress

* rename utility script

* move actual data seeding to a shareable function

* add test

* make the test pass with the fix

* fix comment

---------

Co-authored-by: Richard Kuo (Danswer) <rkuo@onyx.app>
2025-03-05 19:13:51 +00:00
pablonyx
b6e9e65bb8 * Replaces Amazon and Anthropic Icons with version better suitable fo… (#4190)
* * Replaces Amazon and Anthropic Icons with version better suitable for both Dark and  Light modes;
* Adds icon for DeepSeek;
* Simplify logic on icon selection;
* Adds entries for Phi-4, Claude 3.7, Ministral and Gemini 2.0 models

* nit

* k

* k

---------

Co-authored-by: Emerson Gomes <emerson.gomes@thalesgroup.com>
2025-03-05 17:57:39 +00:00
pablonyx
20f2b9b2bb Add image support for search (#4090)
* add support for image search

* quick fix up

* k

* k

* k

* k

* nit

* quick fix for connector tests
2025-03-05 17:44:18 +00:00
Chris Weaver
f731beca1f Add ONYX_QUERY_HISTORY_TYPE to the dev compose files (#4196) 2025-03-05 17:34:55 +00:00
Weves
fe246aecbb Attempt to address tool happy claude 2025-03-05 09:47:27 -08:00
pablonyx
50ad066712 Better filtering (#4185)
* k

* k

* k

* k

* k
2025-03-05 04:35:50 +00:00
rkuo-danswer
870b59a1cc Bugfix/vertex crash (#4181)
* Update text embedding model to version 005 and enhance embedding retrieval process

* re

* Fix formatting issues

* Add support for Bedrock reranking provider and AWS credentials handling

* fix: improve AWS key format validation and error messages

* Fix vertex embedding model crash

* feat: add environment template for local development setup

* Add display name for Claude 3.7 Sonnet model

* Add display names for Gemini 2.0 models and update Claude 3.7 Sonnet entry

* Fix ruff errors by ensuring lines are within 130 characters

* revert to currently default onyx browser settings

* add / fix boto requirements

---------

Co-authored-by: ferdinand loesch <f.loesch@sportradar.com>
Co-authored-by: Ferdinand Loesch <ferdinandloesch@me.com>
Co-authored-by: Richard Kuo (Danswer) <rkuo@onyx.app>
2025-03-05 01:59:46 +00:00
pablonyx
5c896cb0f7 add minor fixes (#4170) 2025-03-04 20:29:28 +00:00
pablonyx
184b30643d Nit: logging adjustments (#4182) 2025-03-04 11:39:53 -08:00
pablonyx
ae585fd84c Delete all chats (#4171)
* nit

* k
2025-03-04 10:00:08 -08:00
rkuo-danswer
61e8f371b9 fix blowing up the entire task on exception and trying to reuse an in… (#4179)
* fix blowing up the entire task on exception and trying to reuse an invalid db session

* list comprehension

---------

Co-authored-by: Richard Kuo (Danswer) <rkuo@onyx.app>
2025-03-04 00:57:27 +00:00
rkuo-danswer
33cc4be492 Bugfix/GitHub validation (#4173)
* fixing unexpected errors disabling connectors

* rename UnexpectedError to UnexpectedValidationError

---------

Co-authored-by: Richard Kuo (Danswer) <rkuo@onyx.app>
2025-03-04 00:09:49 +00:00
joachim-danswer
117c8c0d78 Enable ephemeral message responses by Onyx Slack Bots (#4142)
A new setting 'is_ephemeral' has been added to the Slack channel configurations. 

Key features/effects:

  - if is_ephemeral is set for standard channel (and a Search Assistant is chosen):
     - the answer is only shown to user as an ephemeral message
     - the user has access to his private documents for a search (as the answer is only shown to them) 
     - the user has the ability to share the answer with the channel or keep private
     - a recipient list cannot be defined if the channel is set up as ephemeral
 
  - if is_ephemeral is set and DM with bot:
    - the user has access to private docs in searches
    - the message is not sent as ephemeral, as it is a 1:1 discussion with bot

 - if is_ephemeral is not set but recipient list is set:
    - the user search does *not* have access to their private documents as the information goes to the recipient list team members, and they may have different access rights

 - Overall:
     - Unless the channel is set to is_ephemeral or it is a direct conversation with the Bot, only public docs are accessible  
     - The ACL is never bypassed, also not in cases where the admin explicitly attached a document set to the bot config.
2025-03-03 15:02:21 -08:00
rkuo-danswer
9bb8cdfff1 fix web connector tests to handle new deduping (#4175)
Co-authored-by: Richard Kuo (Danswer) <rkuo@onyx.app>
2025-03-03 20:54:20 +00:00
Weves
a52d0d29be Small tweak to NumberInput 2025-03-03 11:20:53 -08:00
Chris Weaver
f25e1e80f6 Add option to not re-index (#4157)
* Add option to not re-index

* Add quantizaton / dimensionality override support

* Fix build / ut
2025-03-03 10:54:11 -08:00
Yuhong Sun
39fd6919ad Fix web scrolling 2025-03-03 09:00:05 -08:00
Yuhong Sun
7f0653d173 Handling of #! sites (#4169) 2025-03-03 08:18:44 -08:00
SubashMohan
e9905a398b Enhance iframe content extraction and add thresholds for JavaScript disabled scenarios (#4167) 2025-03-02 19:29:10 -08:00
Brad Slavin
3ed44e8bae Update Unstructured documentation URL to new location (#4168) 2025-03-02 19:16:38 -08:00
pablonyx
64158a5bdf silence_logs (#4165) 2025-03-02 19:00:59 +00:00
pablonyx
afb2393596 fix dark mode index attempt failure (#4163) 2025-03-02 01:23:16 +00:00
pablonyx
d473c4e876 Fix curator default persona editing (#4158)
* k

* k
2025-03-02 00:40:14 +00:00
pablonyx
692058092f fix typo 2025-03-01 13:00:07 -08:00
pablonyx
e88325aad6 bump version (#4164) 2025-03-01 01:58:45 +00:00
pablonyx
7490250e91 Fix user group edge case (#4159)
* fix user group

* k
2025-02-28 23:55:21 +00:00
pablonyx
e5369fcef8 Update warning copy (#4160)
* k

* k

* quick nit
2025-02-28 23:46:21 +00:00
Yuhong Sun
b0f00953bc Add CODEOWNERS 2025-02-28 13:57:33 -08:00
rkuo-danswer
f6a75c86c6 Bugfix/emit background error (#4156)
* print the test name when it runs

* type hints

* can't reuse session after an exception

* better logging

---------

Co-authored-by: Richard Kuo (Danswer) <rkuo@onyx.app>
2025-02-28 18:35:24 +00:00
pablonyx
ed9989282f nit- update casing enforcement on frontend 2025-02-28 10:09:06 -08:00
pablonyx
e80a0f2716 Improved google connector flow (#4155)
* fix handling

* k

* k

* fix function

* k

* k
2025-02-28 05:13:39 +00:00
rkuo-danswer
909403a648 Feature/confluence oauth (#3477)
* first cut at slack oauth flow

* fix usage of hooks

* fix button spacing

* add additional error logging

* no dev redirect

* early cut at google drive oauth

* second pass

* switch to production uri's

* try handling oauth_interactive differently

* pass through client id and secret if uploaded

* fix call

* fix test

* temporarily disable check for testing

* Revert "temporarily disable check for testing"

This reverts commit 4b5a022a5f.

* support visibility in test

* missed file

* first cut at confluence oauth

* work in progress

* work in progress

* work in progress

* work in progress

* work in progress

* first cut at distributed locking

* WIP to make test work

* add some dev mode affordances and gate usage of redis behind dynamic credentials

* mypy and credentials provider fixes

* WIP

* fix created at

* fix setting initialValue on everything

* remove debugging, fix ??? some TextFormField issues

* npm fixes

* comment cleanup

* fix comments

* pin the size of the card section

* more review fixes

* more fixes

---------

Co-authored-by: Richard Kuo <rkuo@rkuo.com>
Co-authored-by: Richard Kuo (Danswer) <rkuo@onyx.app>
2025-02-28 03:48:51 +00:00
pablonyx
cd84b65011 quick fix (#4154) 2025-02-28 02:03:34 +00:00
pablonyx
413f21cec0 Filter assistants fix (#4153)
* k

* quick nit

* minor assistant filtering fix
2025-02-28 02:03:21 +00:00
pablonyx
eb369384a7 Log server side auth error + slackbot pagination fix (#4149) 2025-02-27 18:05:28 -08:00
pablonyx
0a24dbc52c k# Please enter the commit message for your changes. Lines starting (#4144) 2025-02-27 23:34:20 +00:00
pablonyx
a7ba0da8cc Lowercase multi tenant email mapping (#4141) 2025-02-27 15:33:40 -08:00
Richard Kuo (Danswer)
aaced6d551 scan images 2025-02-27 15:25:29 -08:00
Richard Kuo (Danswer)
4c230f92ea trivy test 2025-02-27 15:05:03 -08:00
Richard Kuo (Danswer)
07d75b04d1 enable trivy scan 2025-02-27 14:22:44 -08:00
evan-danswer
a8d10750c1 fix propagation of is_agentic (#4150) 2025-02-27 11:56:51 -08:00
pablonyx
85e3ed57f1 Order chat sessions by time updated, not created (#4143)
* order chat sessions by time updated, not created

* quick update

* k
2025-02-27 17:35:42 +00:00
pablonyx
e10cc8ccdb Multi tenant user google auth fix (#4145) 2025-02-27 10:35:38 -08:00
pablonyx
7018bc974b Better looking errors (#4050)
* add error handling

* fix

* k
2025-02-27 04:58:25 +00:00
pablonyx
9c9075d71d Minor improvements to provisioning (#4109)
* quick fix

* k

* nit
2025-02-27 04:57:31 +00:00
pablonyx
338e084062 Improved tenant handling for slack bot (#4099) 2025-02-27 04:06:26 +00:00
pablonyx
2f64031f5c Improved tenant handling for slack bot1 (#4104) 2025-02-27 03:40:50 +00:00
pablonyx
abb74f2eaa Improved chat search (#4137)
* functional + fast

* k

* adapt

* k

* nit

* k

* k

* fix typing

* k
2025-02-27 02:27:45 +00:00
pablonyx
a3e3d83b7e Improve viewable assistant logic (#4125)
* k

* quick fix

* k
2025-02-27 01:24:39 +00:00
pablonyx
4dc88ca037 debug playwright failure case 2025-02-26 17:32:26 -08:00
rkuo-danswer
11e7e1c4d6 log processed tenant count (#4139)
Co-authored-by: Richard Kuo (Danswer) <rkuo@onyx.app>
2025-02-26 17:26:48 -08:00
pablonyx
f2d74ce540 Address Auth Edge Case (#4138) 2025-02-26 17:24:23 -08:00
rkuo-danswer
25389c5120 first cut at anonymizing query history (#4123)
Co-authored-by: Richard Kuo <rkuo@rkuo.com>
2025-02-26 21:32:01 +00:00
pablonyx
ad0721ecd8 update (#4086) 2025-02-26 18:12:07 +00:00
pablonyx
426a8842ae Markdown copying / html formatting (#4120)
* k

* delete unnecessary util
2025-02-26 04:56:38 +00:00
pablonyx
a98dcbc7de Update tenant logic (#4122)
* k

* k

* k

* quick nit

* nit
2025-02-26 03:53:46 +00:00
pablonyx
6f389dc100 Improve lengthy chats (#4126)
* remove scroll

* working well

* nit

* k

* nit
2025-02-26 03:22:21 +00:00
pablonyx
d56177958f fix email headers (#4100) 2025-02-26 03:12:30 +00:00
Kaveen Jayamanna
0e42ae9024 Content of .xlsl are not properly read during indexing. (#4035) 2025-02-25 21:10:47 -08:00
Weves
ce2b4de245 temp remove 2025-02-25 20:46:55 -08:00
Chris Weaver
a515aa78d2 Fix confluence test (#4130) 2025-02-26 03:03:54 +00:00
Weves
23073d91b9 reduce number of chars to index for search 2025-02-25 19:27:50 -08:00
Chris Weaver
f767b1f476 Fix confluence permission syncing at scale (#4129)
* Fix confluence permission syncing at scale

* Remove line

* Better log message

* Adjust log
2025-02-25 19:22:52 -08:00
pablonyx
9ffc8cb2c4 k 2025-02-25 18:15:49 -08:00
pablonyx
98bfb58147 Handle bad slack configurations– multi tenant (#4118)
* k

* quick nit

* k

* k
2025-02-25 22:22:54 +00:00
evan-danswer
6ce810e957 faster indexing status at scale plus minor cleanups (#4081)
* faster indexing status at scale plus minor cleanups

* mypy

* address chris comments

* remove extra prints
2025-02-25 21:22:26 +00:00
pablonyx
07b0b57b31 (nit) bump timeout 2025-02-25 14:10:30 -08:00
pablonyx
118cdd7701 Chat search (#4113)
* add chat search

* don't add the bible

* base functional

* k

* k

* functioning

* functioning well

* functioning well

* k

* delete bible

* quick cleanup

* quick cleanup

* k

* fixed frontend hooks

* delete bible

* nit

* nit

* nit

* fix build

* k

* improved debouncing

* address comments

* fix alembic

* k
2025-02-25 20:49:46 +00:00
rkuo-danswer
ac83b4c365 validate connector deletion (#4108)
* validate connector deletion

* fixes

---------

Co-authored-by: Richard Kuo (Danswer) <rkuo@onyx.app>
2025-02-25 20:35:21 +00:00
pablonyx
fa408ff447 add 3.7 (#4116) 2025-02-25 12:41:40 -08:00
rkuo-danswer
4aa8eb8b75 fix scrolling test (#4117)
Co-authored-by: Richard Kuo <rkuo@rkuo.com>
2025-02-25 10:23:04 -08:00
rkuo-danswer
60bd9271f7 Bugfix/model tests (#4092)
* trying out a fix

* add ability to manually run model tests

* add log dump

* check status code, not text?

* just the model server

* add port mapping to host

* pass through more api keys

* add azure tests

* fix litellm env vars

* fix env vars in github workflow

* temp disable litellm test

---------

Co-authored-by: Richard Kuo (Danswer) <rkuo@onyx.app>
2025-02-25 04:53:51 +00:00
Weves
5d58a5e3ea Add ability to index all of Github 2025-02-24 18:56:36 -08:00
Chris Weaver
a99dd05533 Add option to index all Jira projects (#4106)
* Add option to index all Jira projects

* Fix test

* Fix web build

* Address comment
2025-02-25 02:07:00 +00:00
pablonyx
0dce67094e Prettier formatting for bedrock (#4111)
* k

* k
2025-02-25 02:05:29 +00:00
pablonyx
ffd14435a4 Text overflow logic (#4051)
* proper components

* k

* k

* k
2025-02-25 01:05:22 +00:00
rkuo-danswer
c9a3b45ad4 more aggressive handling of tasks blocking deletion (#4093)
* more aggressive handling of tasks blocking deletion

* comment updated

---------

Co-authored-by: Richard Kuo (Danswer) <rkuo@onyx.app>
2025-02-24 22:41:13 +00:00
pablonyx
7d40676398 Heavy task improvements, logging, and validation (#4058) 2025-02-24 13:48:53 -08:00
rkuo-danswer
b9e79e5db3 tighten up logs (#4076)
Co-authored-by: Richard Kuo (Danswer) <rkuo@onyx.app>
2025-02-24 19:23:00 +00:00
rkuo-danswer
558bbe16e4 Bugfix/termination cleanup (#4077)
* move activity timeout cleanup to the function exit

* fix excessive logging

---------

Co-authored-by: Richard Kuo (Danswer) <rkuo@onyx.app>
2025-02-24 19:21:55 +00:00
evan-danswer
076619ce2c make Settings model match db (#4087) 2025-02-24 19:04:36 +00:00
pablonyx
1263e21eb5 k (#4102) 2025-02-24 17:44:18 +00:00
pablonyx
f0c13b6558 fix starter message editing (#4101) 2025-02-24 01:01:01 +00:00
evan-danswer
a7125662f1 Fix gpt o-series code block formatting (#4089)
* prompt addition for gpt o-series to encourage markdown formatting of code blocks

* fix to match https://simonwillison.net/tags/markdown/

* chris comment

* chris comment
2025-02-24 00:59:48 +00:00
evan-danswer
4a4e4a6c50 thread utils respect contextvars (#4074)
* thread utils respect contextvars now

* address pablo comments

* removed tenant id from places it was already being passed

* fix rate limit check and pablo comment
2025-02-24 00:43:21 +00:00
pablonyx
1f2af373e1 improve scroll (#4096) 2025-02-23 19:20:07 +00:00
Weves
bdaa293ae4 Fix nginx for prod compose file 2025-02-21 16:57:54 -08:00
pablonyx
5a131f4547 Fix integration tests (#4059) 2025-02-21 15:56:11 -08:00
rkuo-danswer
ffb7d5b85b enable manual testing for model server (#4003)
* trying out a fix

* add ability to manually run model tests

---------

Co-authored-by: Richard Kuo (Danswer) <rkuo@onyx.app>
2025-02-21 14:00:32 -08:00
rkuo-danswer
fe8a5d671a don't spam the logs with texts on auth errors (#4085)
* don't spam the logs with texts on auth errors

* refactor the logging a bit

---------

Co-authored-by: Richard Kuo (Danswer) <rkuo@onyx.app>
2025-02-21 13:40:07 -08:00
Yuhong Sun
6de53ebf60 README Touchup (#4088) 2025-02-21 13:31:07 -08:00
rkuo-danswer
61d536c782 tool fixes (#4075) 2025-02-21 12:30:33 -08:00
Chris Weaver
e1ff9086a4 Fix LLM selection (#4078) 2025-02-21 11:32:57 -08:00
evan-danswer
ba21bacbbf coerce useLanggraph to boolean (#4084)
* coerce useLanggraph to boolean
2025-02-21 09:43:46 -08:00
pablonyx
158bccc3fc Default on for non-ee (#4083) 2025-02-21 09:11:45 -08:00
Weves
599b7705c2 Fix gitbook connector issues 2025-02-20 15:29:11 -08:00
rkuo-danswer
4958a5355d try more efficient query (#4047) 2025-02-20 12:58:50 -08:00
Chris Weaver
c4b8519381 Add support for sending email invites for single tenant users (#4065) 2025-02-19 21:05:23 -08:00
rkuo-danswer
8b4413694a fix usage of tenant_id (#4062)
Co-authored-by: Richard Kuo (Danswer) <rkuo@onyx.app>
2025-02-19 17:50:58 -08:00
pablonyx
57cf7d9fac default agent search on 2025-02-19 17:21:26 -08:00
Chris Weaver
ad4efb5f20 Pin xmlsec version + improve SAML flow (#4054)
* Pin xmlsec version

* testing

* test nginx conf change

* Pass through more

* Cleanup + remove DOMAIN across the board
2025-02-19 16:02:05 -08:00
evan-danswer
e304ec4ab6 Agent search history displayed answer (#4052) 2025-02-19 15:52:16 -08:00
joachim-danswer
1690dc45ba timout bumps (#4057) 2025-02-19 15:51:45 -08:00
pablonyx
7582ba1640 Fix streaming (#4055) 2025-02-19 15:23:40 -08:00
pablonyx
99fc546943 Miscellaneous indexing fixes (#4042) 2025-02-19 11:34:49 -08:00
pablonyx
353c185856 Update error class (#4006) 2025-02-19 10:52:23 -08:00
pablonyx
7c96b7f24e minor alembic nit 2025-02-19 10:47:33 -08:00
pablonyx
31524a3eff add connector validation (#4016) 2025-02-19 10:46:06 -08:00
rkuo-danswer
c9f618798e support scrolling before scraping (#4040)
* support scrolling before scraping

* fix mypy

* install playwright deps

---------

Co-authored-by: Richard Kuo <rkuo@rkuo.com>
2025-02-19 17:54:58 +00:00
rkuo-danswer
11f6b44625 Feature/indexing hard timeout 3 (#3980)
* WIP

* implement hard timeout

* fix callbacks

* put back the timeout

* missed a file

* fixes

* try installing playwright deps

* Revert "try installing playwright deps"

This reverts commit 4217427568.

---------

Co-authored-by: Richard Kuo (Danswer) <rkuo@onyx.app>
Co-authored-by: Richard Kuo <rkuo@rkuo.com>
2025-02-19 04:12:13 +00:00
pablonyx
e82a25f49e Non-SMTP password reset (#4031)
* update

* validate

* k

* minor cleanup

* nit

* finalize

* k

* fix tests

* fix tests

* fix tests
2025-02-19 02:02:28 +00:00
Weves
5a9ec61446 Don't pass thorugh parallel_tool_calls for o-family models 2025-02-18 18:57:05 -08:00
pablonyx
9635522de8 Admin default (#4032)
* clean up

* minor cleanup

* building

* update agnetic message look

* k

* fix alembic history
2025-02-18 18:31:54 -08:00
Yuhong Sun
630bdf71a3 Update README (#4044) 2025-02-18 18:31:28 -08:00
pablonyx
47fd4fa233 Strict Tenant ID Enforcement (#3871)
* strict tenant id enforcement

* k

* k

* nit

* merge

* nit

* k
2025-02-19 00:52:56 +00:00
Weves
2013beb9e0 Adjust behavior when display_model_names is null 2025-02-18 16:19:08 -08:00
pablonyx
466276161c Quick link fix (#4039) 2025-02-18 16:18:41 -08:00
rkuo-danswer
c934892c68 add index to document__tag.tag_id (#4038)
Co-authored-by: Richard Kuo <rkuo@rkuo.com>
2025-02-18 19:51:36 +00:00
joachim-danswer
1daa3a663d timout bumps (#4037) 2025-02-18 18:26:29 +00:00
Chris Weaver
7324273233 Small confluence group sync tweaks (#4033) 2025-02-18 07:05:41 +00:00
evan-danswer
2b2ba5478c new is_agentic flag for chatmessages (#4026)
* new is_agentic flag for chatmessages

* added cancelled error to db

* added cancelled error to returned message
2025-02-18 04:20:33 +00:00
pablonyx
045a41d929 Add default slack bot disabling (#3935)
* add slack bot disabling

* update

* k

* minor
2025-02-18 04:08:33 +00:00
pablonyx
e3bc7cc747 improve validation schema (#3984) 2025-02-18 03:18:23 +00:00
evan-danswer
0826b035a2 Update README.md (#3908)
* Update README.md

help future integration test runners

* Update README.md

* Update README.md

---------

Co-authored-by: pablonyx <pablo@danswer.ai>
2025-02-18 03:08:47 +00:00
pablonyx
cf0e3d1ff4 fix main 2025-02-17 18:23:15 -08:00
evan-danswer
10c81f75e2 consistent refined answer improvement (#4027) 2025-02-17 21:02:03 +00:00
evan-danswer
5ca898bde2 Force use tool overrides (#4024)
* initial rename + timeout bump

* querry override
2025-02-17 21:01:24 +00:00
pablonyx
58b252727f UX (#4014) 2025-02-17 13:21:43 -08:00
joachim-danswer
86bd121806 no reranking if local model w/o GPU for Agent Search (#4011)
* no reranking if locql model w/o GPU

* more efficient gpu status calling

* fix unit tests

---------

Co-authored-by: Evan Lohn <evan@danswer.ai>
2025-02-17 14:13:24 +00:00
evan-danswer
9324f426c0 added timeouts for agent llm calls (#4019)
* added timeouts for agent llm calls

* timing suggestions in agent config

* improved timeout that actually exits early

* added new global timeout and connection timeout distinction

* fixed error raising bug and made entity extraction recoverable

* warnings and refactor

* mypy

---------

Co-authored-by: joachim-danswer <joachim@danswer.ai>
2025-02-17 07:02:19 +00:00
joachim-danswer
20d3efc86e By default, use primary LLM for initial & refined answer (#4012)
* By default, use primary LLM for initial & refined answer

Use of new env variable

* simplification
2025-02-16 23:20:07 +00:00
pablonyx
ec0e55fd39 Seeding count issue (#4009)
* k

* k

* quick nit

* nit
2025-02-16 20:49:25 +00:00
pablonyx
e441c899af Playwright + Chromatic update (#4015) 2025-02-16 13:03:45 -08:00
Chris Weaver
f1fc8ac19b Connector checkpointing (#3876)
* wip checkpointing/continue on failure

more stuff for checkpointing

Basic implementation

FE stuff

More checkpointing/failure handling

rebase

rebase

initial scaffolding for IT

IT to test checkpointing

Cleanup

cleanup

Fix it

Rebase

Add todo

Fix actions IT

Test more

Pagination + fixes + cleanup

Fix IT networking

fix it

* rebase

* Address misc comments

* Address comments

* Remove unused router

* rebase

* Fix mypy

* Fixes

* fix it

* Fix tests

* Add drop index

* Add retries

* reset lock timeout

* Try hard drop of schema

* Add timeout/retries to downgrade

* rebase

* test

* test

* test

* Close all connections

* test closing idle only

* Fix it

* fix

* try using null pool

* Test

* fix

* rebase

* log

* Fix

* apply null pool

* Fix other test

* Fix quality checks

* Test not using the fixture

* Fix ordering

* fix test

* Change pooling behavior
2025-02-16 02:34:39 +00:00
Weves
bc087fc20e Fix ruff 2025-02-15 16:35:15 -08:00
Yuhong Sun
ab8081c36b k 2025-02-15 13:42:43 -08:00
Adam Siemiginowski
f371efc916 Fix Zulip connector schema + links and enable temporal metadata (#4005) 2025-02-15 11:49:41 -08:00
pablonyx
7fd5d31dbe Minor background process log cleanup (#4010) 2025-02-15 11:03:10 -08:00
rkuo-danswer
2829e6715e Feature/propagate exceptions (#3974)
* better propagation of exceptions up the stack

* remove debug testing

* refactor the watchdog more to emit data consistently at the end of the function

* enumerate a lot more terminal statuses

* handle more codes

* improve logging

* handle "-9"

* single line exception logging

* typo/grammar

---------

Co-authored-by: Richard Kuo (Danswer) <rkuo@onyx.app>
2025-02-15 04:53:01 +00:00
Weves
bc7b4ec396 Fix typing for metadata 2025-02-14 18:19:37 -08:00
pablonyx
697f8bc1c6 Reduce background errors (#4004) 2025-02-14 17:35:26 -08:00
evan-danswer
3ba65214b8 bump version and fix related issues (#3996) 2025-02-14 19:57:12 +00:00
joachim-danswer
6687d5d499 major Agent Search Updates (#3994) 2025-02-14 19:40:21 +00:00
pablonyx
ec78f78f3c k (#3999) 2025-02-14 02:33:42 +00:00
rkuo-danswer
ed253e469a add nano and vim to base image (#3995)
Co-authored-by: Richard Kuo (Danswer) <rkuo@onyx.app>
2025-02-14 02:27:24 +00:00
pablodanswer
e3aafd95af k 2025-02-13 18:34:05 -08:00
Weves
3a704f1950 Add new vars to github action 2025-02-13 18:33:17 -08:00
Weves
2bf8a7aee5 Misc improvements 2025-02-13 18:33:17 -08:00
Weves
c2f3302aa0 Fix mypy 2025-02-13 18:33:17 -08:00
neo773
7f4d1f27a0 Gitbook connector (#3991)
* add parser

* add tests
2025-02-13 17:58:05 -08:00
pablonyx
b70db15622 Bugfix Vespa Deletion Script (#3998) 2025-02-13 17:26:04 -08:00
pablonyx
e9492ce9ec minor read replica fix (#3997) 2025-02-13 17:11:45 -08:00
pablodanswer
35574369ed update cloud build to use public stripe key 2025-02-13 16:55:56 -08:00
pablonyx
eff433bdc5 Reduce errors in workers (#3962) 2025-02-13 15:59:44 -08:00
pablonyx
3260d793d1 Billing fixes (#3976) 2025-02-13 15:59:10 -08:00
Yuhong Sun
1a7aca06b9 Fix Agent Slowness (#3979) 2025-02-13 15:54:34 -08:00
pablonyx
c6434db7eb Add delete all for tenants in Vespa (#3970) 2025-02-13 14:33:49 -08:00
joachim-danswer
667b9e04c5 updated rerank function arguments (#3988) 2025-02-13 14:13:14 -08:00
rkuo-danswer
29c84d7707 xfail this test (#3992)
Co-authored-by: Richard Kuo (Danswer) <rkuo@onyx.app>
2025-02-13 14:09:15 -08:00
pablonyx
17c915b11b Improved email formatting (#3985)
* prettier emails

* k

* remove mislieading comment

* minor typing
2025-02-13 21:11:57 +00:00
rkuo-danswer
95ca592d6d fix title check (#3993)
Co-authored-by: Richard Kuo (Danswer) <rkuo@onyx.app>
2025-02-13 13:14:55 -08:00
Yuhong Sun
e39a27fd6b Hope this actually skips the model server builds now (#3987) 2025-02-13 11:48:25 -08:00
rkuo-danswer
26d3c952c6 Bugfix/jira connector test 2 (#3986)
* fix jira connector test

* typo fix

---------

Co-authored-by: Richard Kuo (Danswer) <rkuo@onyx.app>
2025-02-13 10:21:54 -08:00
rkuo-danswer
53683e2f3c fix jira connector test (#3983)
Co-authored-by: Richard Kuo (Danswer) <rkuo@onyx.app>
2025-02-13 09:41:45 -08:00
rkuo-danswer
0c0113a481 ignore result when using send_task on lightweight tasks (#3978)
* ignore result when using send_task on lightweight tasks

* fix ignore_result

---------

Co-authored-by: Richard Kuo (Danswer) <rkuo@onyx.app>
Co-authored-by: Richard Kuo <rkuo@rkuo.com>
2025-02-13 03:22:13 -08:00
Chris Weaver
c0f381e471 Add background errors ability (#3982) 2025-02-13 00:44:55 -08:00
rkuo-danswer
5ed83f1148 no thread local locks in callbacks and raise permission sync timeout … (#3977)
* no thread local locks in callbacks and raise permission sync timeout by a lot based on empirical log observations

* more fixes

---------

Co-authored-by: Richard Kuo (Danswer) <rkuo@onyx.app>
2025-02-12 22:31:01 -08:00
pablonyx
9db7b67a6c Minor misc ux improvements (#3966)
* minor misc ux

* nit

* k

* quick nit

* k
2025-02-13 04:43:11 +00:00
Yuhong Sun
2850048c6b Jira add key to semantic id (#3981) 2025-02-12 20:04:47 -08:00
rkuo-danswer
61058e5fcd merge monitoring with kickoff tasks (#3953)
* move indexing

* all monitor work moved

* reacquire lock more

* remove monitor task completely

* fix import

* fix pruning finalization

* no multiplier on system/cloud tasks

* monitor queues every 30 seconds in the cloud

---------

Co-authored-by: Richard Kuo (Danswer) <rkuo@onyx.app>
2025-02-13 02:35:41 +00:00
Yuhong Sun
c87261cda7 Fix edge case with run functions in parallel 2025-02-12 17:57:39 -08:00
pablonyx
e030b0a6fc Address (#3955) 2025-02-12 13:53:13 -08:00
Yuhong Sun
61136975ad Don't build model server every night (#3973) 2025-02-12 13:08:05 -08:00
Weves
0c74bbf9ed Clean illegal chars in metadata 2025-02-12 11:49:16 -08:00
pablonyx
12b2126e69 Update assistants visibility, minor UX, .. (#3965)
* update assistant logic

* quick nit

* k

* fix "featured" logic

* Small tweaks

* k

---------

Co-authored-by: Weves <chrisweaver101@gmail.com>
2025-02-12 00:43:20 +00:00
Chris Weaver
037943c6ff Support share/view IDs for Airtable (#3967) 2025-02-11 16:19:38 -08:00
pablonyx
f9485b1325 Ensure sidepanel defaults sidebar off (#3844)
* ensure sidepanel defaults sidepanel off

* address comment

* reformat

* initial visible
2025-02-11 22:22:56 +00:00
rkuo-danswer
552a0630fe Merge pull request #3948 from onyx-dot-app/feature/beat_rtvar
refactoring and update multiplier in real time
2025-02-11 14:05:14 -08:00
Richard Kuo (Danswer)
5bf520d8b8 comments 2025-02-11 14:04:49 -08:00
Weves
7dc5a77946 Improve starter message splitting 2025-02-11 11:10:13 -08:00
rkuo-danswer
03abd4a1bc Merge pull request #3938 from onyx-dot-app/feature/model_server_logs
improve gpu detection functions and logging in model server
2025-02-11 09:43:25 -08:00
Richard Kuo (Danswer)
16d6d708f6 update logging 2025-02-11 09:15:39 -08:00
Richard Kuo
9740ed32b5 fix reading redis values as floats 2025-02-10 20:48:55 -08:00
rkuo-danswer
b56877cc2e Bugfix/dedupe ids (#3952)
* dedupe make_private_persona and update test

* add comment

* comments, and just have duplicate user id's for the test instead of modifying edit

* found the magic word

---------

Co-authored-by: Richard Kuo (Danswer) <rkuo@onyx.app>
2025-02-11 02:27:55 +00:00
pablodanswer
da5c83a96d k 2025-02-10 17:45:00 -08:00
Weves
818225c60e Fix starter message overflow 2025-02-10 17:17:31 -08:00
Weves
d78a1fe9c6 Fix for red background 2025-02-10 16:36:26 -08:00
Weves
05b3e594b5 Increase timeout for reasoning models + make o1 available by default 2025-02-10 16:11:01 -08:00
Richard Kuo (Danswer)
5a4d007cf9 comments 2025-02-10 15:03:59 -08:00
pablonyx
3b25a2dd84 Ux improvements (#3947)
* black history sidebar

* misc improvements

* minor misc ux improvemnts

* quick nit

* add nits

* quick nit
2025-02-10 12:18:41 -08:00
pablonyx
baee4c5f22 Multi tenant specific error page (#3928)
Multi tenant specific error page
2025-02-10 11:51:29 -08:00
Richard Kuo (Danswer)
5e32f9d922 refactoring and update multiplier in real time 2025-02-10 11:20:38 -08:00
pablonyx
1454e7e07d New ux dark (#3944) 2025-02-09 21:14:32 -08:00
rkuo-danswer
6848337445 add validation for pruning/group sync etc (#3882)
* add validation for pruning

* fix missing class

* get external group sync validation working

* backport fix for pruning check

* fix pruning

* log the payload id

* remove scan_iter from pruning

* missed removed scan_iter, also remove other scan_iters and replace with sscan_iter of the lookup table

* external group sync needs active signal. h

* log the payload id when the task starts

* log the payload id in more places

* use the replica

* increase primary pool and slow down beat

* scale sql pool based on concurrency

* fix concurrency

* add debugging for external group sync and tenant

* remove debugging and fix payload id

---------

Co-authored-by: Richard Kuo (Danswer) <rkuo@onyx.app>
2025-02-10 03:12:21 +00:00
pablonyx
519fbd897e Add Dark Mode (#3936)
* k

* intermediate unification

* many changes

* update dark mode configs

* updates

* decent state

* functional

* mostly clean

* updaet model selector

* finalize

* calendar update

* additional styling

* nit

* k

* update colors

* push change

* k

* update

* k

* update

* address additions

* quick nit
2025-02-09 23:09:40 +00:00
evan-danswer
217569104b added context type for when internet search tool is used (#3930) 2025-02-08 20:44:38 -08:00
rkuo-danswer
4c184bb7f0 Bugfix/slack stop 2 (#3916)
* use callback in slim doc functions

* more callbacks

---------

Co-authored-by: Richard Kuo (Danswer) <rkuo@onyx.app>
2025-02-08 23:45:41 +00:00
rkuo-danswer
a222fae7c8 Bugfix/beat templates (#3754)
* WIP

* migrate most beat tasks to fan out strategy

* fix kwargs

* migrate EE tasks

* lock on the task_name level

* typo fix

* transform beat tasks for cloud

* cloud multiplier is only for cloud tasks

* bumpity

---------

Co-authored-by: Richard Kuo (Danswer) <rkuo@onyx.app>
2025-02-08 06:57:57 +00:00
pablonyx
94788cda53 Update display (#3934)
* update display

* quick nit
2025-02-08 02:07:47 +00:00
Richard Kuo (Danswer)
fb931ee4de fixes 2025-02-07 17:28:17 -08:00
Richard Kuo (Danswer)
bc2c56dfb6 improve gpu detection functions and logging in model server 2025-02-07 16:59:02 -08:00
rkuo-danswer
ae37f01f62 event driven indexing/docset/usergroup triggers (#3918)
* WIP

* trigger indexing immediately when the ccpair is created

* add some logging and indexing trigger to the mock-credential endpoint

* better comments

* fix integration test

---------

Co-authored-by: Richard Kuo (Danswer) <rkuo@onyx.app>
2025-02-07 22:53:51 +00:00
pablodanswer
ef31e14518 remove debug logs for integration tests 2025-02-07 10:46:24 -08:00
evan-danswer
9b0cba367e small linear connector improvements (#3929)
* small linear connector improvements

* add todo for url handling
2025-02-07 01:31:49 +00:00
pablonyx
48ac690a70 Multi tenant tests (#3919)
* ensure fail on multi tenant successfully

* attempted fix

* udpate ingration tests

* minor update

* improve

* improve workflow

* fix migrations

* many more logs

* quick fix

* improve

* fix typo

* quick nit

* attempted fix

* very minor clean up
2025-02-07 01:24:00 +00:00
pablodanswer
bfa4fbd691 minor delay 2025-02-06 16:28:38 -08:00
rkuo-danswer
58fdc86d41 fix chromatic save/upload (#3927)
* try adding back some params

* raise timeout

* update chromatic version

* fix typo

* use chromatic imports

* update gitignore

* slim down the config file

* update readme

---------

Co-authored-by: Richard Kuo (Danswer) <rkuo@onyx.app>
2025-02-06 22:02:14 +00:00
pablonyx
6ff452a2e1 Update popup + misc standardization (#3906)
* pop

* various minor improvements

* improvement

* finalize

* update
2025-02-06 21:22:06 +00:00
pablonyx
e9b892301b Improvements to Redis + Vespa debugging
Improvements to Redis + Vespa debugging
2025-02-06 13:30:32 -08:00
pablodanswer
a202e2bf9d Improvements to Redis + Vespa debugging 2025-02-06 13:30:06 -08:00
pablonyx
3bc4e0d12f Very minor robustification (#3926)
* very minor robustification

* robust
2025-02-06 19:55:38 +00:00
trial-danswer
2fc41cd5df Helm Chart Fixes (#3900)
* initial commit for helm chart refactoring

* Continue refactoring helm. I was able to use helm to deploy all of the apps to a cluster in aws. The bottleneck was setting up PVC dynamic provisioning.

* use default storage class

* Fix linter errors

* Fix broken helm test

* update

* Helm chart fixes

* remove reference to ebsstorage

* Fix linter errors

---------

Co-authored-by: jpb80 <jordan.buttkevitz@gmail.com>
2025-02-06 10:41:09 -08:00
pablodanswer
8c42ff2ff8 slackbot configuration fix 2025-02-06 09:36:58 -08:00
rkuo-danswer
6ccb3f085a select only doc_id (#3920)
* select only doc_id

* select more doc ids

* fix user group

---------

Co-authored-by: Richard Kuo (Danswer) <rkuo@onyx.app>
2025-02-06 07:00:40 +00:00
pablonyx
a0a1b431be Various UX improvements
Various improvements
2025-02-05 21:13:22 -08:00
pablodanswer
f137fc78a6 various UX improvements 2025-02-05 21:12:55 -08:00
pablonyx
396f096dda Allows for Slackbots that do not have search enabled
Allow no search
2025-02-05 19:20:20 -08:00
pablodanswer
e04b2d6ff3 Allows for Slackbots that do not have search enabled 2025-02-05 19:19:50 -08:00
pablonyx
cbd8b094bd Minor misc docset updates
Minor misc docset updates
2025-02-05 19:14:32 -08:00
pablodanswer
5c7487e91f ensure tests pass 2025-02-05 17:02:49 -08:00
pablodanswer
477f8eeb68 minor update 2025-02-05 16:53:04 -08:00
pablodanswer
737e37170d minor updates 2025-02-05 16:53:02 -08:00
Yuhong Sun
c58a7ef819 Slackbot to know its name (#3917) 2025-02-05 16:39:42 -08:00
rkuo-danswer
bd08e6d787 alert if revisions are null or query fails (#3910)
* alert if revisions are null or query fails

* comment

* mypy

---------

Co-authored-by: Richard Kuo (Danswer) <rkuo@onyx.app>
2025-02-05 23:45:38 +00:00
rkuo-danswer
47e6192b99 fix bug in validation logic (#3915)
* fix bug in validation logic

* test

---------

Co-authored-by: Richard Kuo (Danswer) <rkuo@onyx.app>
2025-02-05 22:49:18 +00:00
pablonyx
d1e9760b92 Enforce Slack Channel Default Config
Enforce Slack Channel Default Config
2025-02-05 14:28:03 -08:00
pablodanswer
7153cb09f1 add default slack channel config 2025-02-05 14:26:26 -08:00
evan-danswer
29f5f4edfa fixed citations when sections selected (#3914)
* removed some dead code and fixed citations when a search request is made with sections selected

* fix black formatting issue
2025-02-05 22:16:07 +00:00
pablonyx
b469a7eff4 Put components in components directory + remove unused shortcut commands (#3909) 2025-02-05 14:29:29 -08:00
pablonyx
78153e5012 Merge pull request #3913 from onyx-dot-app/very_minor_ux
remove unused border
2025-02-05 11:57:41 -08:00
pablodanswer
b1ee1efecb remove minor border issue 2025-02-05 11:57:03 -08:00
Sam Warner
526932a7f6 fix chat image upload double read 2025-02-05 09:52:51 -08:00
Weves
6889152d81 Fix issue causing file connector to fail 2025-02-04 22:19:04 -08:00
pablonyx
4affc259a6 Password reset tenant (#3895)
* nots

* functional

* minor naming cleanup

* nit

* update constant

* k
2025-02-05 03:17:11 +00:00
pablonyx
0ec065f1fb Set GPT 4o as default and add O3 mini (#3899)
* quick update to models

* add reqs

* update version
2025-02-05 03:06:05 +00:00
Weves
8eb4320f76 Support not pausing connectors on initialization failure 2025-02-04 19:32:55 -08:00
Weves
1c12ab31f9 Fix extra __init__ file + allow adding API keys to user groups 2025-02-04 17:21:06 -08:00
Yuhong Sun
49fd76b336 Tool Call Error Display (#3897) 2025-02-04 16:12:50 -08:00
rkuo-danswer
5854b39dd4 Merge pull request #3893 from onyx-dot-app/mypy_random
Mypy random fixes
2025-02-04 16:02:18 -08:00
rkuo-danswer
c0271a948a Merge pull request #3856 from onyx-dot-app/feature/no_scan_iter
lessen usage of scan_iter
2025-02-04 15:57:03 -08:00
Richard Kuo (Danswer)
aff4ee5ebf commented code 2025-02-04 15:56:18 -08:00
Richard Kuo (Danswer)
675d2f3539 Merge branch 'main' of https://github.com/onyx-dot-app/onyx into feature/no_scan_iter 2025-02-04 15:55:42 -08:00
rkuo-danswer
2974b57ef4 Merge pull request #3898 from onyx-dot-app/bugfix/temporary_xfail
xfail test until fixed
2025-02-04 15:54:44 -08:00
Richard Kuo (Danswer)
679bdd5e04 xfail test until fixed 2025-02-04 15:53:45 -08:00
Yuhong Sun
e6cb47fcb8 Prompt 2025-02-04 14:42:18 -08:00
Yuhong Sun
a514818e13 Citations 2025-02-04 14:34:44 -08:00
Yuhong Sun
89021cde90 Citation Prompt 2025-02-04 14:17:23 -08:00
Chris Weaver
32ecc282a2 Update README.md
Fix Cal link in README
2025-02-04 13:11:46 -08:00
Yuhong Sun
59b1d4673f Updating some Prompts (#3894) 2025-02-04 12:23:15 -08:00
pablodanswer
ec0c655c8d misc improvement 2025-02-04 12:06:11 -08:00
pablodanswer
42a0f45a96 update 2025-02-04 12:06:11 -08:00
pablodanswer
125e5eaab1 various mypy improvements 2025-02-04 12:06:10 -08:00
Richard Kuo (Danswer)
f2dab9ba89 Merge branch 'main' of https://github.com/onyx-dot-app/onyx into feature/no_scan_iter 2025-02-04 12:01:57 -08:00
Richard Kuo
02a068a68b multiplier from 8 to 4 2025-02-03 23:59:36 -08:00
evan-danswer
91f0650071 Merge pull request #3749 from onyx-dot-app/agent-search-feature
Agent search
2025-02-03 21:31:46 -08:00
pablodanswer
b97819189b push various minor updates 2025-02-03 21:23:45 -08:00
Evan Lohn
b928201397 fixed rebase issue and some cleanup 2025-02-03 20:49:45 -08:00
Yuhong Sun
b500c914b0 cleanup 2025-02-03 20:10:51 -08:00
Yuhong Sun
4b0d22fae3 prompts 2025-02-03 20:10:51 -08:00
joachim-danswer
b46c09ac6c EL comments 2025-02-03 20:10:51 -08:00
joachim-danswer
3ce8923086 fix for citation update 2025-02-03 20:10:51 -08:00
joachim-danswer
7ac6d3ed50 logging level changes 2025-02-03 20:10:51 -08:00
joachim-danswer
3cd057d7a2 LangGraph comments 2025-02-03 20:10:51 -08:00
joachim-danswer
4834ee6223 new citation format 2025-02-03 20:10:51 -08:00
pablodanswer
cb85be41b1 add proper citation handling 2025-02-03 20:10:51 -08:00
joachim-danswer
eb227c0acc nit update 2025-02-03 20:10:51 -08:00
joachim-danswer
25a57e2292 add title and meta-data to doc 2025-02-03 20:10:51 -08:00
pablodanswer
3f3b04a4ee update width 2025-02-03 20:10:51 -08:00
Evan Lohn
3f6de7968a prompt improvements for wekaer models 2025-02-03 20:10:51 -08:00
pablodanswer
024207e2d9 update 2025-02-03 20:10:51 -08:00
Yuhong Sun
8f7db9212c k 2025-02-03 20:10:51 -08:00
pablodanswer
b1e9e03aa4 nit 2025-02-03 20:10:51 -08:00
pablodanswer
87a53d6d80 quick update 2025-02-03 20:10:51 -08:00
Yuhong Sun
59c65a4192 prompts 2025-02-03 20:10:51 -08:00
pablodanswer
c984c6c7f2 add pro search disable 2025-02-03 20:10:51 -08:00
Yuhong Sun
9a3ce504bc beta 2025-02-03 20:10:51 -08:00
Yuhong Sun
16265d27f5 k 2025-02-03 20:10:51 -08:00
Yuhong Sun
570fe43efb log level changes 2025-02-03 20:10:51 -08:00
Yuhong Sun
506a9f1b94 Yuhong 2025-02-03 20:10:51 -08:00
Yuhong Sun
a067b32467 Partial Prompt Updates (#3880) 2025-02-03 20:10:51 -08:00
pablodanswer
9b6e51b4fe k 2025-02-03 20:10:51 -08:00
joachim-danswer
e23dd0a3fa renames + fix of refined answer generation prompt 2025-02-03 20:10:51 -08:00
Evan Lohn
71304e4228 always persist in agent search 2025-02-03 20:10:51 -08:00
Evan Lohn
2adeaaeded loading object into model instead of json 2025-02-03 20:10:51 -08:00
Evan Lohn
a96728ff4d prompt piece optimizations 2025-02-03 20:10:51 -08:00
pablodanswer
eaffdee0dc broadly fixed minus some issues 2025-02-03 20:10:51 -08:00
pablodanswer
feaa3b653f fix misc issues 2025-02-03 20:10:51 -08:00
joachim-danswer
9438f9df05 removal of sone unused states/models 2025-02-03 20:10:51 -08:00
joachim-danswer
b90e0834a5 major renaming 2025-02-03 20:10:51 -08:00
Evan Lohn
29440f5482 alembic heads, basic citations, search pipeline state 2025-02-03 20:10:51 -08:00
Evan Lohn
5a95a5c9fd large number of PR comments addressed 2025-02-03 20:10:51 -08:00
Evan Lohn
118e8afbef reworked config to have logical structure 2025-02-03 20:10:51 -08:00
joachim-danswer
8342168658 initial variable renaming 2025-02-03 20:10:51 -08:00
joachim-danswer
d5661baf98 history summary fix
- adjusted prompt
 - adjusted citation removal
 - length cutoff by words, not characters
2025-02-03 20:10:51 -08:00
joachim-danswer
95fcc0019c history summary update 2025-02-03 20:10:51 -08:00
joachim-danswer
0ccd83e809 deep_search_a and agent_a_config renaming 2025-02-03 20:10:51 -08:00
joachim-danswer
732861a940 rename of documents to verified_reranked_documents 2025-02-03 20:10:51 -08:00
joachim-danswer
d53dd1e356 cited_docs -> cited_documents 2025-02-03 20:10:51 -08:00
joachim-danswer
1a2760edee improved logging through agent_state plus some default fixes 2025-02-03 20:10:51 -08:00
joachim-danswer
23ae4547ca default values of number of strings and other things 2025-02-03 20:10:51 -08:00
Evan Lohn
385b344a43 addressed TODOs 2025-02-03 20:10:51 -08:00
Evan Lohn
a340529de3 sync streaming impl 2025-02-03 20:10:51 -08:00
joachim-danswer
4a0b2a6c09 additional naming fixes 2025-02-03 20:10:51 -08:00
joachim-danswer
756a1cbf8f answer_refined_question_subgraphs 2025-02-03 20:10:51 -08:00
joachim-danswer
8af4f1da8e more renaming 2025-02-03 20:10:51 -08:00
Evan Lohn
4b82440915 finished rebase and fixed issues 2025-02-03 20:10:51 -08:00
Evan Lohn
bb6d55783e addressing PR comments 2025-02-03 20:10:51 -08:00
Evan Lohn
2b8cd63b34 main nodes renaming 2025-02-03 20:10:51 -08:00
joachim-danswer
b0c3098693 more renaming and consolidation 2025-02-03 20:10:51 -08:00
joachim-danswer
2517aa39b2 more renamings 2025-02-03 20:10:51 -08:00
joachim-danswer
ceaaa05af0 renamings and consolidation of formatting nodes in orig question retrieval 2025-02-03 20:10:51 -08:00
joachim-danswer
3b13380051 k 2025-02-03 20:10:51 -08:00
joachim-danswer
ef6e6f9556 more renaming 2025-02-03 20:10:51 -08:00
joachim-danswer
0a6808c4c1 rename initial_sub_question_creation 2025-02-03 20:10:51 -08:00
Evan Lohn
6442c56d82 remaining small find replace fix 2025-02-03 20:10:51 -08:00
Evan Lohn
e191e514b9 fixed find and replace issue 2025-02-03 20:10:51 -08:00
Evan Lohn
f33a2ffb01 node renaming 2025-02-03 20:10:51 -08:00
joachim-danswer
0578c31522 rename retrieval & consolidate_sub_answers (initial and refinement) 2025-02-03 20:10:51 -08:00
joachim-danswer
8cbdc6d8fe fix for refinement renaming 2025-02-03 20:10:51 -08:00
joachim-danswer
60fb06da4e rename initial_answer_generation pt 2 2025-02-03 20:10:51 -08:00
joachim-danswer
55ed6e2294 rename initial_answer_generation 2025-02-03 20:10:50 -08:00
joachim-danswer
42780d5f97 rename of individual_sub_answer_generation 2025-02-03 20:10:50 -08:00
Evan Lohn
f050d281fd refininement->refinement 2025-02-03 20:10:50 -08:00
joachim-danswer
3ca4d532b4 renamed directories, prompts, and small citation fix 2025-02-03 20:10:50 -08:00
pablodanswer
e3e855c526 potential question fix 2025-02-03 20:10:50 -08:00
pablodanswer
23bf50b90a address doc 2025-02-03 20:10:50 -08:00
Yuhong Sun
c43c2320e7 Tiny nits 2025-02-03 20:10:50 -08:00
Evan Lohn
01e6e9a2ba fixed errors on import 2025-02-03 20:10:50 -08:00
Evan Lohn
bd3b1943c4 WIP PR comments 2025-02-03 20:10:50 -08:00
Evan Lohn
1dbf561db0 fix revision to match internal alembic state 2025-02-03 20:10:50 -08:00
Evan Lohn
a43a6627eb fix revision to match internal alembic state 2025-02-03 20:10:50 -08:00
Evan Lohn
5bff8bc8ce collapsed db migrations post-rebase (added missing file) 2025-02-03 20:10:50 -08:00
Evan Lohn
7879ba6a77 collapsed db migrations post-rebase 2025-02-03 20:10:50 -08:00
pablodanswer
a63b341913 latex update 2025-02-03 20:10:50 -08:00
pablodanswer
c062097b2a post rebase fix 2025-02-03 20:10:50 -08:00
Evan Lohn
48e42af8e7 fix rebase issue 2025-02-03 20:10:50 -08:00
Evan Lohn
6c7f8eaefb first pass at dead code deletion 2025-02-03 20:10:50 -08:00
joachim-danswer
3d99ad7bc4 var initialization 2025-02-03 20:10:50 -08:00
joachim-danswer
8fea571f6e k 2025-02-03 20:10:50 -08:00
joachim-danswer
d70bbcc2ce k 2025-02-03 20:10:50 -08:00
joachim-danswer
73769c6cae k 2025-02-03 20:10:50 -08:00
joachim-danswer
7e98936c58 Enrichment prompts, prompt improvements, dispatch logging & reinsert empty tool response 2025-02-03 20:10:50 -08:00
joachim-danswer
4e17fc06ff variable renaming 2025-02-03 20:10:50 -08:00
joachim-danswer
ff4df6f3bf fix for merge error (#3814) 2025-02-03 20:10:50 -08:00
joachim-danswer
91b929d466 graph directory renamings 2025-02-03 20:10:50 -08:00
joachim-danswer
6bef5ca7a4 persona_prompt improvements 2025-02-03 20:10:50 -08:00
joachim-danswer
4817fa0bd1 average dispatch time collection for sub-answers 2025-02-03 20:10:50 -08:00
joachim-danswer
da4a086398 added total time to logging 2025-02-03 20:10:50 -08:00
joachim-danswer
69e8c5f0fc agent default changes/restructuring 2025-02-03 20:10:50 -08:00
joachim-danswer
12d1186888 increased logging 2025-02-03 20:10:50 -08:00
joachim-danswer
325892a21c cleanup of refined answer generation 2025-02-03 20:10:50 -08:00
joachim-danswer
18d92559b5 application of content limitation ion refined answer as well 2025-02-03 20:10:50 -08:00
joachim-danswer
f2aeeb7b3c Optimizations: docs for context & history
- summarize history if long
- introduced cited_docs from SQ as those must be provided to answer generations
- limit number of docs

TODO: same for refined flow
2025-02-03 20:10:50 -08:00
Evan Lohn
110c9f7e1b nit 2025-02-03 20:10:50 -08:00
Evan Lohn
1a22af4f27 AgentPromptConfig in Answer class 2025-02-03 20:10:50 -08:00
Evan Lohn
efa32a8c04 use reranking settings and persona during preprocessing in reranker 2025-02-03 20:10:50 -08:00
Evan Lohn
9bad12968f removed unused files 2025-02-03 20:10:50 -08:00
Evan Lohn
f1d96343a9 always send search response 2025-02-03 20:10:50 -08:00
Evan Lohn
0496ec3bb8 remove debug 2025-02-03 20:10:50 -08:00
pablodanswer
568f927b9b improve regeneration state 2025-02-03 20:10:50 -08:00
pablodanswer
f842e15d64 nit 2025-02-03 20:10:50 -08:00
pablodanswer
3a07093663 improved timing 2025-02-03 20:10:50 -08:00
Evan Lohn
1fe966d0f7 increased timeout to get rid of asyncio logger errors 2025-02-03 20:10:50 -08:00
joachim-danswer
812172f1bd addressing nits of EL 2025-02-03 20:10:50 -08:00
joachim-danswer
9e9bd440f4 updated answer_comparison prompt + small cleanup 2025-02-03 20:10:50 -08:00
joachim-danswer
7487b15522 refined search + question answering as sub-graphs 2025-02-03 20:10:50 -08:00
joachim-danswer
de5ce8a613 sub-graphs for initial question/search 2025-02-03 20:10:50 -08:00
joachim-danswer
8c9577aa95 refined search + question answering as sub-graphs 2025-02-03 20:10:50 -08:00
pablodanswer
4baf3dc484 minor update 2025-02-03 20:10:50 -08:00
pablodanswer
50ef5115e7 k 2025-02-03 20:10:50 -08:00
pablodanswer
a2247363af update switching logic 2025-02-03 20:10:50 -08:00
pablodanswer
a0af8ee91c fix toggling edge case 2025-02-03 20:10:50 -08:00
pablodanswer
25f6543443 update bool 2025-02-03 20:10:50 -08:00
pablodanswer
d52a0b96ac various improvements 2025-02-03 20:10:50 -08:00
pablodanswer
f14b282f0f quick nit 2025-02-03 20:10:50 -08:00
Evan Lohn
7d494cd65e allowed empty Search Tool for non-agentic search 2025-02-03 20:10:50 -08:00
pablodanswer
139374966f minor update - doc ordering 2025-02-03 20:10:50 -08:00
pablodanswer
bf06710215 k 2025-02-03 20:10:50 -08:00
pablodanswer
d4e0d0db05 quick nit 2025-02-03 20:10:50 -08:00
pablodanswer
f96a3ee29a k 2025-02-03 20:10:50 -08:00
joachim-danswer
3bf6b77319 Replaced additional limit with variable 2025-02-03 20:10:50 -08:00
joachim-danswer
3b3b0c8a87 Addressing EL's comments
- created vars for a couple of agent settings
 - moved agent configs
 - created a search function
2025-02-03 20:10:50 -08:00
joachim-danswer
aa8cb44a33 taking out Extraction for now 2025-02-03 20:10:50 -08:00
joachim-danswer
fc60fd0322 earlier entity extraction & sharper generation prompts 2025-02-03 20:10:50 -08:00
joachim-danswer
46402a97c7 tmp: force agent search 2025-02-03 20:10:50 -08:00
Evan Lohn
5bf6a47948 skip reranking for <=1 doc 2025-02-03 20:10:50 -08:00
Evan Lohn
2d8486bac4 stop infos when done streaming answers 2025-02-03 20:10:50 -08:00
Evan Lohn
eea6f2749a make field nullable 2025-02-03 20:10:50 -08:00
Evan Lohn
5e9b2e41ae persisting refined answer improvement 2025-02-03 20:10:50 -08:00
Evan Lohn
2bbe20edc3 address JR comments 2025-02-03 20:10:50 -08:00
Evan Lohn
db2004542e fixed chat tests 2025-02-03 20:10:50 -08:00
Evan Lohn
ddbfc65ad0 implemented top-level tool calling + force search 2025-02-03 20:10:50 -08:00
Evan Lohn
982040c792 WIP, but working basic search using initial tool choice node 2025-02-03 20:10:50 -08:00
pablodanswer
4b0a4a2741 k 2025-02-03 20:10:50 -08:00
pablodanswer
28ba01b361 updated + functional 2025-02-03 20:10:50 -08:00
pablodanswer
d32d1c6079 update- reorg 2025-02-03 20:10:50 -08:00
pablodanswer
dd494d2daa k 2025-02-03 20:10:50 -08:00
pablodanswer
eb6dbf49a1 build fix 2025-02-03 20:10:50 -08:00
joachim-danswer
e5fa411092 EL comments addressed 2025-02-03 20:10:50 -08:00
joachim-danswer
1ced8924b3 loser verification prompt 2025-02-03 20:10:50 -08:00
joachim-danswer
3c3900fac6 turning off initial search pre route decision 2025-02-03 20:10:50 -08:00
joachim-danswer
3b298e19bc change of sub-question answer if no docs recovered 2025-02-03 20:10:50 -08:00
joachim-danswer
71eafe04a8 various fixes from Yuhong's list 2025-02-03 20:10:50 -08:00
Yuhong Sun
80d248e02d Copy changes 2025-02-03 20:10:50 -08:00
Evan Lohn
2032fb10da removed print statements, fixed pass through handling 2025-02-03 20:10:50 -08:00
Evan Lohn
ca1f176c61 fixed basic flow citations and second test 2025-02-03 20:10:50 -08:00
Evan Lohn
3ced9bc28b fix for early cancellation test; solves issue with tasks being destroyed while pending 2025-02-03 20:10:50 -08:00
pablodanswer
deea9c8c3c add agent search frontend 2025-02-03 20:10:47 -08:00
Evan Lohn
4e47c81ed8 fix alembic history 2025-02-03 20:07:57 -08:00
joachim-danswer
00cee71c18 streaming + saving of search docs of no verified ones available
- sub-questions only
2025-02-03 20:07:57 -08:00
Evan Lohn
470c4d15dd reworked history messages in agent config 2025-02-03 20:07:57 -08:00
Evan Lohn
50bacc03b3 missed files from prev commit 2025-02-03 20:07:57 -08:00
Evan Lohn
dd260140b2 basic search restructure: WIP on fixing tests 2025-02-03 20:07:57 -08:00
joachim-danswer
8aa82be12a prompts that even further motivates to cite docs over sub-q's 2025-02-03 20:07:57 -08:00
joachim-danswer
b7f9e431a5 pydantic for LangGraph + changed ERT extraction flow 2025-02-03 20:07:57 -08:00
joachim-danswer
b9bd2ea4e2 history added to agent flow 2025-02-03 20:07:57 -08:00
pablodanswer
e4c93bed8b minor fixes to branch 2025-02-03 20:07:57 -08:00
Evan Lohn
4fd6e36c2f second clean commit 2025-02-03 20:07:57 -08:00
Richard Kuo (Danswer)
6f018d75ee use replica, remove some commented code 2025-02-03 10:10:05 -08:00
Richard Kuo (Danswer)
fd947aadea slow down to 8 again 2025-02-03 00:32:23 -08:00
Richard Kuo (Danswer)
3a950721b9 get rid of some more scan_iter 2025-02-02 01:14:10 -08:00
Richard Kuo (Danswer)
bbee2865e9 Merge branch 'main' of https://github.com/onyx-dot-app/onyx into feature/no_scan_iter 2025-02-01 10:46:38 -08:00
Richard Kuo (Danswer)
d3cf18160e lower CLOUD_BEAT_SCHEDULE_MULTIPLIER to 4 2025-01-31 16:13:13 -08:00
Richard Kuo (Danswer)
618e4addd8 better signal names 2025-01-31 13:25:27 -08:00
Richard Kuo (Danswer)
69f16cc972 dont add to the lookup table if it already exists 2025-01-31 13:23:52 -08:00
Richard Kuo (Danswer)
2676d40065 mereging 2025-01-31 12:14:24 -08:00
Richard Kuo (Danswer)
b64545c7c7 build a lookup table every so often to handle cloud migration 2025-01-31 12:12:52 -08:00
Richard Kuo (Danswer)
5232aeacad Merge branch 'main' of https://github.com/onyx-dot-app/onyx into feature/no_scan_iter
# Conflicts:
#	backend/onyx/background/celery/tasks/vespa/tasks.py
#	backend/onyx/redis/redis_connector_doc_perm_sync.py
2025-01-31 10:38:10 -08:00
Richard Kuo (Danswer)
30e8fb12e4 remove commented code 2025-01-30 15:34:00 -08:00
Richard Kuo (Danswer)
d8578bc1cb first full cut 2025-01-30 15:21:52 -08:00
Richard Kuo (Danswer)
7ccfe85ee5 WIP 2025-01-29 22:52:21 -08:00
2602 changed files with 285061 additions and 89452 deletions

3
.github/CODEOWNERS vendored Normal file
View File

@@ -0,0 +1,3 @@
* @onyx-dot-app/onyx-core-team
# Helm charts Owners
/helm/ @justin-tahara

43
.github/actionlint.yml vendored Normal file
View File

@@ -0,0 +1,43 @@
self-hosted-runner:
# Labels of self-hosted runner in array of strings.
labels:
- extras=ecr-cache
- extras=s3-cache
- hdd=256
- runs-on
- runner=1cpu-linux-arm64
- runner=1cpu-linux-x64
- runner=2cpu-linux-arm64
- runner=2cpu-linux-x64
- runner=4cpu-linux-arm64
- runner=4cpu-linux-x64
- runner=8cpu-linux-arm64
- runner=8cpu-linux-x64
- runner=16cpu-linux-arm64
- runner=16cpu-linux-x64
- ubuntu-slim # Currently in public preview
- volume=40gb
- volume=50gb
# Configuration variables in array of strings defined in your repository or
# organization. `null` means disabling configuration variables check.
# Empty array means no configuration variable is allowed.
config-variables: null
# Configuration for file paths. The keys are glob patterns to match to file
# paths relative to the repository root. The values are the configurations for
# the file paths. Note that the path separator is always '/'.
# The following configurations are available.
#
# "ignore" is an array of regular expression patterns. Matched error messages
# are ignored. This is similar to the "-ignore" command line option.
paths:
# Glob pattern relative to the repository root for matching files. The path separator is always '/'.
# This example configures any YAML file under the '.github/workflows/' directory.
.github/workflows/**/*.{yml,yaml}:
# TODO: These are real and should be fixed eventually.
ignore:
- 'shellcheck reported issue in this script: SC2038:.+'
- 'shellcheck reported issue in this script: SC2046:.+'
- 'shellcheck reported issue in this script: SC2086:.+'
- 'shellcheck reported issue in this script: SC2193:.+'

View File

@@ -1,109 +0,0 @@
name: 'Build and Push Docker Image with Retry'
description: 'Attempts to build and push a Docker image, with a retry on failure'
inputs:
context:
description: 'Build context'
required: true
file:
description: 'Dockerfile location'
required: true
platforms:
description: 'Target platforms'
required: true
pull:
description: 'Always attempt to pull a newer version of the image'
required: false
default: 'true'
push:
description: 'Push the image to registry'
required: false
default: 'true'
load:
description: 'Load the image into Docker daemon'
required: false
default: 'true'
tags:
description: 'Image tags'
required: true
cache-from:
description: 'Cache sources'
required: false
cache-to:
description: 'Cache destinations'
required: false
retry-wait-time:
description: 'Time to wait before attempt 2 in seconds'
required: false
default: '60'
retry-wait-time-2:
description: 'Time to wait before attempt 3 in seconds'
required: false
default: '120'
runs:
using: "composite"
steps:
- name: Build and push Docker image (Attempt 1 of 3)
id: buildx1
uses: docker/build-push-action@v6
continue-on-error: true
with:
context: ${{ inputs.context }}
file: ${{ inputs.file }}
platforms: ${{ inputs.platforms }}
pull: ${{ inputs.pull }}
push: ${{ inputs.push }}
load: ${{ inputs.load }}
tags: ${{ inputs.tags }}
cache-from: ${{ inputs.cache-from }}
cache-to: ${{ inputs.cache-to }}
- name: Wait before attempt 2
if: steps.buildx1.outcome != 'success'
run: |
echo "First attempt failed. Waiting ${{ inputs.retry-wait-time }} seconds before retry..."
sleep ${{ inputs.retry-wait-time }}
shell: bash
- name: Build and push Docker image (Attempt 2 of 3)
id: buildx2
if: steps.buildx1.outcome != 'success'
uses: docker/build-push-action@v6
with:
context: ${{ inputs.context }}
file: ${{ inputs.file }}
platforms: ${{ inputs.platforms }}
pull: ${{ inputs.pull }}
push: ${{ inputs.push }}
load: ${{ inputs.load }}
tags: ${{ inputs.tags }}
cache-from: ${{ inputs.cache-from }}
cache-to: ${{ inputs.cache-to }}
- name: Wait before attempt 3
if: steps.buildx1.outcome != 'success' && steps.buildx2.outcome != 'success'
run: |
echo "Second attempt failed. Waiting ${{ inputs.retry-wait-time-2 }} seconds before retry..."
sleep ${{ inputs.retry-wait-time-2 }}
shell: bash
- name: Build and push Docker image (Attempt 3 of 3)
id: buildx3
if: steps.buildx1.outcome != 'success' && steps.buildx2.outcome != 'success'
uses: docker/build-push-action@v6
with:
context: ${{ inputs.context }}
file: ${{ inputs.file }}
platforms: ${{ inputs.platforms }}
pull: ${{ inputs.pull }}
push: ${{ inputs.push }}
load: ${{ inputs.load }}
tags: ${{ inputs.tags }}
cache-from: ${{ inputs.cache-from }}
cache-to: ${{ inputs.cache-to }}
- name: Report failure
if: steps.buildx1.outcome != 'success' && steps.buildx2.outcome != 'success' && steps.buildx3.outcome != 'success'
run: |
echo "All attempts failed. Possible transient infrastucture issues? Try again later or inspect logs for details."
shell: bash

View File

@@ -0,0 +1,17 @@
name: "Setup Playwright"
description: "Sets up Playwright and system deps (assumes Python and Playwright are installed)"
runs:
using: "composite"
steps:
- name: Cache playwright cache
uses: runs-on/cache@50350ad4242587b6c8c2baa2e740b1bc11285ff4 # ratchet:runs-on/cache@v4
with:
path: ~/.cache/ms-playwright
key: ${{ runner.os }}-${{ runner.arch }}-playwright-${{ hashFiles('backend/requirements/default.txt') }}
restore-keys: |
${{ runner.os }}-${{ runner.arch }}-playwright-
- name: Install playwright
shell: bash
run: |
playwright install chromium --with-deps

View File

@@ -0,0 +1,72 @@
name: "Setup Python and Install Dependencies"
description: "Sets up Python with uv and installs deps"
inputs:
requirements:
description: "Newline-separated list of requirement files to install (relative to repo root)"
required: true
runs:
using: "composite"
steps:
- name: Setup uv
uses: astral-sh/setup-uv@caf0cab7a618c569241d31dcd442f54681755d39 # ratchet:astral-sh/setup-uv@v3
# TODO: Enable caching once there is a uv.lock file checked in.
# with:
# enable-cache: true
- name: Compute requirements hash
id: req-hash
shell: bash
env:
REQUIREMENTS: ${{ inputs.requirements }}
run: |
# Hash the contents of the specified requirement files
hash=""
while IFS= read -r req; do
if [ -n "$req" ] && [ -f "$req" ]; then
hash="$hash$(sha256sum "$req")"
fi
done <<< "$REQUIREMENTS"
echo "hash=$(echo "$hash" | sha256sum | cut -d' ' -f1)" >> "$GITHUB_OUTPUT"
- name: Cache uv cache directory
uses: runs-on/cache@50350ad4242587b6c8c2baa2e740b1bc11285ff4 # ratchet:runs-on/cache@v4
with:
path: ~/.cache/uv
key: ${{ runner.os }}-uv-${{ steps.req-hash.outputs.hash }}
restore-keys: |
${{ runner.os }}-uv-
- name: Setup Python
uses: actions/setup-python@a26af69be951a213d495a4c3e4e4022e16d87065 # ratchet:actions/setup-python@v5
with:
python-version: "3.11"
- name: Create virtual environment
shell: bash
env:
VENV_DIR: ${{ runner.temp }}/venv
run: | # zizmor: ignore[github-env]
uv venv "$VENV_DIR"
# Validate path before adding to GITHUB_PATH to prevent code injection
if [ -d "$VENV_DIR/bin" ]; then
realpath "$VENV_DIR/bin" >> "$GITHUB_PATH"
else
echo "Error: $VENV_DIR/bin does not exist"
exit 1
fi
- name: Install Python dependencies with uv
shell: bash
env:
REQUIREMENTS: ${{ inputs.requirements }}
run: |
# Build the uv pip install command with each requirement file as array elements
cmd=("uv" "pip" "install")
while IFS= read -r req; do
# Skip empty lines
if [ -n "$req" ]; then
cmd+=("-r" "$req")
fi
done <<< "$REQUIREMENTS"
echo "Running: ${cmd[*]}"
"${cmd[@]}"

102
.github/actions/slack-notify/action.yml vendored Normal file
View File

@@ -0,0 +1,102 @@
name: "Slack Notify on Failure"
description: "Sends a Slack notification when a workflow fails"
inputs:
webhook-url:
description: "Slack webhook URL (can also use SLACK_WEBHOOK_URL env var)"
required: false
failed-jobs:
description: "List of failed job names (newline-separated)"
required: false
title:
description: "Title for the notification"
required: false
default: "🚨 Workflow Failed"
ref-name:
description: "Git ref name (tag/branch)"
required: false
runs:
using: "composite"
steps:
- name: Send Slack notification
shell: bash
env:
SLACK_WEBHOOK_URL: ${{ inputs.webhook-url }}
FAILED_JOBS: ${{ inputs.failed-jobs }}
TITLE: ${{ inputs.title }}
REF_NAME: ${{ inputs.ref-name }}
REPO: ${{ github.repository }}
WORKFLOW: ${{ github.workflow }}
RUN_NUMBER: ${{ github.run_number }}
RUN_ID: ${{ github.run_id }}
SERVER_URL: ${{ github.server_url }}
GITHUB_REF_NAME: ${{ github.ref_name }}
run: |
if [ -z "$SLACK_WEBHOOK_URL" ]; then
echo "webhook-url input or SLACK_WEBHOOK_URL env var is not set, skipping notification"
exit 0
fi
# Build workflow URL
WORKFLOW_URL="${SERVER_URL}/${REPO}/actions/runs/${RUN_ID}"
# Use ref_name from input or fall back to github.ref_name
if [ -z "$REF_NAME" ]; then
REF_NAME="$GITHUB_REF_NAME"
fi
# Escape JSON special characters
escape_json() {
local input="$1"
# Escape backslashes first (but preserve \n sequences)
# Protect \n sequences temporarily
input=$(printf '%s' "$input" | sed 's/\\n/\x01NL\x01/g')
# Escape remaining backslashes
input=$(printf '%s' "$input" | sed 's/\\/\\\\/g')
# Restore \n sequences (single backslash, will be correct in JSON)
input=$(printf '%s' "$input" | sed 's/\x01NL\x01/\\n/g')
# Escape quotes
printf '%s' "$input" | sed 's/"/\\"/g'
}
REF_NAME_ESC=$(escape_json "$REF_NAME")
FAILED_JOBS_ESC=$(escape_json "$FAILED_JOBS")
WORKFLOW_URL_ESC=$(escape_json "$WORKFLOW_URL")
TITLE_ESC=$(escape_json "$TITLE")
# Build JSON payload piece by piece
# Note: FAILED_JOBS_ESC already contains \n sequences that should remain as \n in JSON
PAYLOAD="{"
PAYLOAD="${PAYLOAD}\"text\":\"${TITLE_ESC}\","
PAYLOAD="${PAYLOAD}\"blocks\":[{"
PAYLOAD="${PAYLOAD}\"type\":\"header\","
PAYLOAD="${PAYLOAD}\"text\":{\"type\":\"plain_text\",\"text\":\"${TITLE_ESC}\"}"
PAYLOAD="${PAYLOAD}},{"
PAYLOAD="${PAYLOAD}\"type\":\"section\","
PAYLOAD="${PAYLOAD}\"fields\":["
if [ -n "$REF_NAME" ]; then
PAYLOAD="${PAYLOAD}{\"type\":\"mrkdwn\",\"text\":\"*Ref:*\\n${REF_NAME_ESC}\"},"
fi
PAYLOAD="${PAYLOAD}{\"type\":\"mrkdwn\",\"text\":\"*Run ID:*\\n#${RUN_NUMBER}\"}"
PAYLOAD="${PAYLOAD}]"
PAYLOAD="${PAYLOAD}}"
if [ -n "$FAILED_JOBS" ]; then
PAYLOAD="${PAYLOAD},{"
PAYLOAD="${PAYLOAD}\"type\":\"section\","
PAYLOAD="${PAYLOAD}\"text\":{\"type\":\"mrkdwn\",\"text\":\"*Failed Jobs:*\\n${FAILED_JOBS_ESC}\"}"
PAYLOAD="${PAYLOAD}}"
fi
PAYLOAD="${PAYLOAD},{"
PAYLOAD="${PAYLOAD}\"type\":\"actions\","
PAYLOAD="${PAYLOAD}\"elements\":[{"
PAYLOAD="${PAYLOAD}\"type\":\"button\","
PAYLOAD="${PAYLOAD}\"text\":{\"type\":\"plain_text\",\"text\":\"View Workflow Run\"},"
PAYLOAD="${PAYLOAD}\"url\":\"${WORKFLOW_URL_ESC}\""
PAYLOAD="${PAYLOAD}}]"
PAYLOAD="${PAYLOAD}}"
PAYLOAD="${PAYLOAD}]"
PAYLOAD="${PAYLOAD}}"
curl -X POST -H 'Content-type: application/json' \
--data "$PAYLOAD" \
"$SLACK_WEBHOOK_URL"

24
.github/dependabot.yml vendored Normal file
View File

@@ -0,0 +1,24 @@
version: 2
updates:
- package-ecosystem: "github-actions"
directory: "/"
schedule:
interval: "weekly"
cooldown:
default-days: 7
open-pull-requests-limit: 3
assignees:
- "jmelahman"
labels:
- "dependabot:actions"
- package-ecosystem: "pip"
directory: "/backend"
schedule:
interval: "weekly"
cooldown:
default-days: 7
open-pull-requests-limit: 3
assignees:
- "jmelahman"
labels:
- "dependabot:python"

View File

@@ -6,9 +6,6 @@
[Describe the tests you ran to verify your changes]
## Backporting (check the box to trigger backport action)
## Additional Options
Note: You have to check that the action passes, otherwise resolve the conflicts manually and tag the patches.
- [ ] This PR should be backported (make sure to check that the backport attempt succeeds)
- [ ] [Optional] Override Linear Check

1
.github/runs-on.yml vendored Normal file
View File

@@ -0,0 +1 @@
_extend: .github-private

View File

@@ -0,0 +1,33 @@
name: Check Lazy Imports
concurrency:
group: Check-Lazy-Imports-${{ github.workflow }}-${{ github.head_ref || github.event.workflow_run.head_branch || github.run_id }}
cancel-in-progress: true
on:
merge_group:
pull_request:
branches:
- main
- 'release/**'
permissions:
contents: read
jobs:
check-lazy-imports:
runs-on: ubuntu-latest
timeout-minutes: 45
steps:
- name: Checkout code
uses: actions/checkout@1af3b93b6815bc44a9784bd300feb67ff0d1eeb3 # ratchet:actions/checkout@v6
with:
persist-credentials: false
- name: Set up Python
uses: actions/setup-python@83679a892e2d95755f2dac6acb0bfd1e9ac5d548 # ratchet:actions/setup-python@v6
with:
python-version: '3.11'
- name: Check lazy imports
run: python3 backend/scripts/check_lazy_imports.py

1068
.github/workflows/deployment.yml vendored Normal file

File diff suppressed because it is too large Load Diff

View File

@@ -1,63 +0,0 @@
name: Build and Push Backend Image on Tag
on:
push:
tags:
- "*"
env:
REGISTRY_IMAGE: ${{ contains(github.ref_name, 'cloud') && 'onyxdotapp/onyx-backend-cloud' || 'onyxdotapp/onyx-backend' }}
LATEST_TAG: ${{ contains(github.ref_name, 'latest') }}
jobs:
build-and-push:
# TODO: investigate a matrix build like the web container
# See https://runs-on.com/runners/linux/
runs-on: [runs-on, runner=8cpu-linux-x64, "run-id=${{ github.run_id }}"]
steps:
- name: Checkout code
uses: actions/checkout@v4
- name: Set up Docker Buildx
uses: docker/setup-buildx-action@v3
- name: Login to Docker Hub
uses: docker/login-action@v3
with:
username: ${{ secrets.DOCKER_USERNAME }}
password: ${{ secrets.DOCKER_TOKEN }}
- name: Install build-essential
run: |
sudo apt-get update
sudo apt-get install -y build-essential
- name: Backend Image Docker Build and Push
uses: docker/build-push-action@v5
with:
context: ./backend
file: ./backend/Dockerfile
platforms: linux/amd64,linux/arm64
push: true
tags: |
${{ env.REGISTRY_IMAGE }}:${{ github.ref_name }}
${{ env.LATEST_TAG == 'true' && format('{0}:latest', env.REGISTRY_IMAGE) || '' }}
build-args: |
ONYX_VERSION=${{ github.ref_name }}
# trivy has their own rate limiting issues causing this action to flake
# we worked around it by hardcoding to different db repos in env
# can re-enable when they figure it out
# https://github.com/aquasecurity/trivy/discussions/7538
# https://github.com/aquasecurity/trivy-action/issues/389
- name: Run Trivy vulnerability scanner
uses: aquasecurity/trivy-action@master
env:
TRIVY_DB_REPOSITORY: "public.ecr.aws/aquasecurity/trivy-db:2"
TRIVY_JAVA_DB_REPOSITORY: "public.ecr.aws/aquasecurity/trivy-java-db:1"
with:
# To run locally: trivy image --severity HIGH,CRITICAL onyxdotapp/onyx-backend
image-ref: docker.io/${{ env.REGISTRY_IMAGE }}:${{ github.ref_name }}
severity: "CRITICAL,HIGH"
trivyignores: ./backend/.trivyignore

View File

@@ -1,139 +0,0 @@
name: Build and Push Cloud Web Image on Tag
# Identical to the web container build, but with correct image tag and build args
on:
push:
tags:
- "*"
env:
REGISTRY_IMAGE: onyxdotapp/onyx-web-server-cloud
LATEST_TAG: ${{ contains(github.ref_name, 'latest') }}
jobs:
build:
runs-on:
- runs-on
- runner=${{ matrix.platform == 'linux/amd64' && '8cpu-linux-x64' || '8cpu-linux-arm64' }}
- run-id=${{ github.run_id }}
- tag=platform-${{ matrix.platform }}
strategy:
fail-fast: false
matrix:
platform:
- linux/amd64
- linux/arm64
steps:
- name: Prepare
run: |
platform=${{ matrix.platform }}
echo "PLATFORM_PAIR=${platform//\//-}" >> $GITHUB_ENV
- name: Checkout
uses: actions/checkout@v4
- name: Docker meta
id: meta
uses: docker/metadata-action@v5
with:
images: ${{ env.REGISTRY_IMAGE }}
tags: |
type=raw,value=${{ env.REGISTRY_IMAGE }}:${{ github.ref_name }}
type=raw,value=${{ env.LATEST_TAG == 'true' && format('{0}:latest', env.REGISTRY_IMAGE) || '' }}
- name: Set up Docker Buildx
uses: docker/setup-buildx-action@v3
- name: Login to Docker Hub
uses: docker/login-action@v3
with:
username: ${{ secrets.DOCKER_USERNAME }}
password: ${{ secrets.DOCKER_TOKEN }}
- name: Build and push by digest
id: build
uses: docker/build-push-action@v5
with:
context: ./web
file: ./web/Dockerfile
platforms: ${{ matrix.platform }}
push: true
build-args: |
ONYX_VERSION=${{ github.ref_name }}
NEXT_PUBLIC_CLOUD_ENABLED=true
NEXT_PUBLIC_POSTHOG_KEY=${{ secrets.POSTHOG_KEY }}
NEXT_PUBLIC_POSTHOG_HOST=${{ secrets.POSTHOG_HOST }}
NEXT_PUBLIC_SENTRY_DSN=${{ secrets.SENTRY_DSN }}
NEXT_PUBLIC_GTM_ENABLED=true
NEXT_PUBLIC_FORGOT_PASSWORD_ENABLED=true
NODE_OPTIONS=--max-old-space-size=8192
# needed due to weird interactions with the builds for different platforms
no-cache: true
labels: ${{ steps.meta.outputs.labels }}
outputs: type=image,name=${{ env.REGISTRY_IMAGE }},push-by-digest=true,name-canonical=true,push=true
- name: Export digest
run: |
mkdir -p /tmp/digests
digest="${{ steps.build.outputs.digest }}"
touch "/tmp/digests/${digest#sha256:}"
- name: Upload digest
uses: actions/upload-artifact@v4
with:
name: digests-${{ env.PLATFORM_PAIR }}
path: /tmp/digests/*
if-no-files-found: error
retention-days: 1
merge:
runs-on: ubuntu-latest
needs:
- build
steps:
- name: Download digests
uses: actions/download-artifact@v4
with:
path: /tmp/digests
pattern: digests-*
merge-multiple: true
- name: Set up Docker Buildx
uses: docker/setup-buildx-action@v3
- name: Docker meta
id: meta
uses: docker/metadata-action@v5
with:
images: ${{ env.REGISTRY_IMAGE }}
- name: Login to Docker Hub
uses: docker/login-action@v3
with:
username: ${{ secrets.DOCKER_USERNAME }}
password: ${{ secrets.DOCKER_TOKEN }}
- name: Create manifest list and push
working-directory: /tmp/digests
run: |
docker buildx imagetools create $(jq -cr '.tags | map("-t " + .) | join(" ")' <<< "$DOCKER_METADATA_OUTPUT_JSON") \
$(printf '${{ env.REGISTRY_IMAGE }}@sha256:%s ' *)
- name: Inspect image
run: |
docker buildx imagetools inspect ${{ env.REGISTRY_IMAGE }}:${{ steps.meta.outputs.version }}
# trivy has their own rate limiting issues causing this action to flake
# we worked around it by hardcoding to different db repos in env
# can re-enable when they figure it out
# https://github.com/aquasecurity/trivy/discussions/7538
# https://github.com/aquasecurity/trivy-action/issues/389
- name: Run Trivy vulnerability scanner
uses: aquasecurity/trivy-action@master
env:
TRIVY_DB_REPOSITORY: "public.ecr.aws/aquasecurity/trivy-db:2"
TRIVY_JAVA_DB_REPOSITORY: "public.ecr.aws/aquasecurity/trivy-java-db:1"
with:
image-ref: docker.io/${{ env.REGISTRY_IMAGE }}:${{ github.ref_name }}
severity: "CRITICAL,HIGH"

View File

@@ -1,123 +0,0 @@
name: Build and Push Model Server Image on Tag
on:
push:
tags:
- "*"
env:
REGISTRY_IMAGE: ${{ contains(github.ref_name, 'cloud') && 'onyxdotapp/onyx-model-server-cloud' || 'onyxdotapp/onyx-model-server' }}
LATEST_TAG: ${{ contains(github.ref_name, 'latest') }}
DOCKER_BUILDKIT: 1
BUILDKIT_PROGRESS: plain
jobs:
build-amd64:
runs-on:
[runs-on, runner=8cpu-linux-x64, "run-id=${{ github.run_id }}-amd64"]
steps:
- name: Checkout code
uses: actions/checkout@v4
- name: System Info
run: |
df -h
free -h
docker system prune -af --volumes
- name: Set up Docker Buildx
uses: docker/setup-buildx-action@v3
with:
driver-opts: |
image=moby/buildkit:latest
network=host
- name: Login to Docker Hub
uses: docker/login-action@v3
with:
username: ${{ secrets.DOCKER_USERNAME }}
password: ${{ secrets.DOCKER_TOKEN }}
- name: Build and Push AMD64
uses: docker/build-push-action@v5
with:
context: ./backend
file: ./backend/Dockerfile.model_server
platforms: linux/amd64
push: true
tags: ${{ env.REGISTRY_IMAGE }}:${{ github.ref_name }}-amd64
build-args: |
DANSWER_VERSION=${{ github.ref_name }}
outputs: type=registry
provenance: false
build-arm64:
runs-on:
[runs-on, runner=8cpu-linux-x64, "run-id=${{ github.run_id }}-arm64"]
steps:
- name: Checkout code
uses: actions/checkout@v4
- name: System Info
run: |
df -h
free -h
docker system prune -af --volumes
- name: Set up Docker Buildx
uses: docker/setup-buildx-action@v3
with:
driver-opts: |
image=moby/buildkit:latest
network=host
- name: Login to Docker Hub
uses: docker/login-action@v3
with:
username: ${{ secrets.DOCKER_USERNAME }}
password: ${{ secrets.DOCKER_TOKEN }}
- name: Build and Push ARM64
uses: docker/build-push-action@v5
with:
context: ./backend
file: ./backend/Dockerfile.model_server
platforms: linux/arm64
push: true
tags: ${{ env.REGISTRY_IMAGE }}:${{ github.ref_name }}-arm64
build-args: |
DANSWER_VERSION=${{ github.ref_name }}
outputs: type=registry
provenance: false
merge-and-scan:
needs: [build-amd64, build-arm64]
runs-on: ubuntu-latest
steps:
- name: Login to Docker Hub
uses: docker/login-action@v3
with:
username: ${{ secrets.DOCKER_USERNAME }}
password: ${{ secrets.DOCKER_TOKEN }}
- name: Create and Push Multi-arch Manifest
run: |
docker buildx create --use
docker buildx imagetools create -t ${{ env.REGISTRY_IMAGE }}:${{ github.ref_name }} \
${{ env.REGISTRY_IMAGE }}:${{ github.ref_name }}-amd64 \
${{ env.REGISTRY_IMAGE }}:${{ github.ref_name }}-arm64
if [[ "${{ env.LATEST_TAG }}" == "true" ]]; then
docker buildx imagetools create -t ${{ env.REGISTRY_IMAGE }}:latest \
${{ env.REGISTRY_IMAGE }}:${{ github.ref_name }}-amd64 \
${{ env.REGISTRY_IMAGE }}:${{ github.ref_name }}-arm64
fi
- name: Run Trivy vulnerability scanner
uses: aquasecurity/trivy-action@master
env:
TRIVY_DB_REPOSITORY: "public.ecr.aws/aquasecurity/trivy-db:2"
TRIVY_JAVA_DB_REPOSITORY: "public.ecr.aws/aquasecurity/trivy-java-db:1"
with:
image-ref: docker.io/${{ env.REGISTRY_IMAGE }}:${{ github.ref_name }}
severity: "CRITICAL,HIGH"
timeout: "10m"

View File

@@ -1,133 +0,0 @@
name: Build and Push Web Image on Tag
on:
push:
tags:
- "*"
env:
REGISTRY_IMAGE: onyxdotapp/onyx-web-server
LATEST_TAG: ${{ contains(github.ref_name, 'latest') }}
jobs:
build:
runs-on:
- runs-on
- runner=${{ matrix.platform == 'linux/amd64' && '8cpu-linux-x64' || '8cpu-linux-arm64' }}
- run-id=${{ github.run_id }}
- tag=platform-${{ matrix.platform }}
strategy:
fail-fast: false
matrix:
platform:
- linux/amd64
- linux/arm64
steps:
- name: Prepare
run: |
platform=${{ matrix.platform }}
echo "PLATFORM_PAIR=${platform//\//-}" >> $GITHUB_ENV
- name: Checkout
uses: actions/checkout@v4
- name: Docker meta
id: meta
uses: docker/metadata-action@v5
with:
images: ${{ env.REGISTRY_IMAGE }}
tags: |
type=raw,value=${{ env.REGISTRY_IMAGE }}:${{ github.ref_name }}
type=raw,value=${{ env.LATEST_TAG == 'true' && format('{0}:latest', env.REGISTRY_IMAGE) || '' }}
- name: Set up Docker Buildx
uses: docker/setup-buildx-action@v3
- name: Login to Docker Hub
uses: docker/login-action@v3
with:
username: ${{ secrets.DOCKER_USERNAME }}
password: ${{ secrets.DOCKER_TOKEN }}
- name: Build and push by digest
id: build
uses: docker/build-push-action@v5
with:
context: ./web
file: ./web/Dockerfile
platforms: ${{ matrix.platform }}
push: true
build-args: |
ONYX_VERSION=${{ github.ref_name }}
NODE_OPTIONS=--max-old-space-size=8192
# needed due to weird interactions with the builds for different platforms
no-cache: true
labels: ${{ steps.meta.outputs.labels }}
outputs: type=image,name=${{ env.REGISTRY_IMAGE }},push-by-digest=true,name-canonical=true,push=true
- name: Export digest
run: |
mkdir -p /tmp/digests
digest="${{ steps.build.outputs.digest }}"
touch "/tmp/digests/${digest#sha256:}"
- name: Upload digest
uses: actions/upload-artifact@v4
with:
name: digests-${{ env.PLATFORM_PAIR }}
path: /tmp/digests/*
if-no-files-found: error
retention-days: 1
merge:
runs-on: ubuntu-latest
needs:
- build
steps:
- name: Download digests
uses: actions/download-artifact@v4
with:
path: /tmp/digests
pattern: digests-*
merge-multiple: true
- name: Set up Docker Buildx
uses: docker/setup-buildx-action@v3
- name: Docker meta
id: meta
uses: docker/metadata-action@v5
with:
images: ${{ env.REGISTRY_IMAGE }}
- name: Login to Docker Hub
uses: docker/login-action@v3
with:
username: ${{ secrets.DOCKER_USERNAME }}
password: ${{ secrets.DOCKER_TOKEN }}
- name: Create manifest list and push
working-directory: /tmp/digests
run: |
docker buildx imagetools create $(jq -cr '.tags | map("-t " + .) | join(" ")' <<< "$DOCKER_METADATA_OUTPUT_JSON") \
$(printf '${{ env.REGISTRY_IMAGE }}@sha256:%s ' *)
- name: Inspect image
run: |
docker buildx imagetools inspect ${{ env.REGISTRY_IMAGE }}:${{ steps.meta.outputs.version }}
# trivy has their own rate limiting issues causing this action to flake
# we worked around it by hardcoding to different db repos in env
# can re-enable when they figure it out
# https://github.com/aquasecurity/trivy/discussions/7538
# https://github.com/aquasecurity/trivy-action/issues/389
- name: Run Trivy vulnerability scanner
uses: aquasecurity/trivy-action@master
env:
TRIVY_DB_REPOSITORY: "public.ecr.aws/aquasecurity/trivy-db:2"
TRIVY_JAVA_DB_REPOSITORY: "public.ecr.aws/aquasecurity/trivy-java-db:1"
with:
image-ref: docker.io/${{ env.REGISTRY_IMAGE }}:${{ github.ref_name }}
severity: "CRITICAL,HIGH"

51
.github/workflows/docker-tag-beta.yml vendored Normal file
View File

@@ -0,0 +1,51 @@
# This workflow is set up to be manually triggered via the GitHub Action tab.
# Given a version, it will tag those backend and webserver images as "beta".
name: Tag Beta Version
on:
workflow_dispatch:
inputs:
version:
description: "The version (ie v1.0.0-beta.0) to tag as beta"
required: true
permissions:
contents: read
jobs:
tag:
# See https://runs-on.com/runners/linux/
# use a lower powered instance since this just does i/o to docker hub
runs-on: [runs-on, runner=2cpu-linux-x64, "run-id=${{ github.run_id }}-tag"]
timeout-minutes: 45
steps:
- name: Set up Docker Buildx
uses: docker/setup-buildx-action@e468171a9de216ec08956ac3ada2f0791b6bd435 # ratchet:docker/setup-buildx-action@v3
- name: Login to Docker Hub
uses: docker/login-action@5e57cd118135c172c3672efd75eb46360885c0ef # ratchet:docker/login-action@v3
with:
username: ${{ secrets.DOCKER_USERNAME }}
password: ${{ secrets.DOCKER_TOKEN }}
- name: Enable Docker CLI experimental features
run: echo "DOCKER_CLI_EXPERIMENTAL=enabled" >> $GITHUB_ENV
- name: Pull, Tag and Push Web Server Image
env:
VERSION: ${{ github.event.inputs.version }}
run: |
docker buildx imagetools create -t onyxdotapp/onyx-web-server:beta onyxdotapp/onyx-web-server:${VERSION}
- name: Pull, Tag and Push API Server Image
env:
VERSION: ${{ github.event.inputs.version }}
run: |
docker buildx imagetools create -t onyxdotapp/onyx-backend:beta onyxdotapp/onyx-backend:${VERSION}
- name: Pull, Tag and Push Model Server Image
env:
VERSION: ${{ github.event.inputs.version }}
run: |
docker buildx imagetools create -t onyxdotapp/onyx-model-server:beta onyxdotapp/onyx-model-server:${VERSION}

View File

@@ -10,17 +10,21 @@ on:
description: "The version (ie v0.0.1) to tag as latest"
required: true
permissions:
contents: read
jobs:
tag:
# See https://runs-on.com/runners/linux/
# use a lower powered instance since this just does i/o to docker hub
runs-on: [runs-on, runner=2cpu-linux-x64, "run-id=${{ github.run_id }}"]
runs-on: [runs-on, runner=2cpu-linux-x64, "run-id=${{ github.run_id }}-tag"]
timeout-minutes: 45
steps:
- name: Set up Docker Buildx
uses: docker/setup-buildx-action@v1
uses: docker/setup-buildx-action@e468171a9de216ec08956ac3ada2f0791b6bd435 # ratchet:docker/setup-buildx-action@v3
- name: Login to Docker Hub
uses: docker/login-action@v1
uses: docker/login-action@5e57cd118135c172c3672efd75eb46360885c0ef # ratchet:docker/login-action@v3
with:
username: ${{ secrets.DOCKER_USERNAME }}
password: ${{ secrets.DOCKER_TOKEN }}
@@ -29,9 +33,19 @@ jobs:
run: echo "DOCKER_CLI_EXPERIMENTAL=enabled" >> $GITHUB_ENV
- name: Pull, Tag and Push Web Server Image
env:
VERSION: ${{ github.event.inputs.version }}
run: |
docker buildx imagetools create -t onyxdotapp/onyx-web-server:latest onyxdotapp/onyx-web-server:${{ github.event.inputs.version }}
docker buildx imagetools create -t onyxdotapp/onyx-web-server:latest onyxdotapp/onyx-web-server:${VERSION}
- name: Pull, Tag and Push API Server Image
env:
VERSION: ${{ github.event.inputs.version }}
run: |
docker buildx imagetools create -t onyxdotapp/onyx-backend:latest onyxdotapp/onyx-backend:${{ github.event.inputs.version }}
docker buildx imagetools create -t onyxdotapp/onyx-backend:latest onyxdotapp/onyx-backend:${VERSION}
- name: Pull, Tag and Push Model Server Image
env:
VERSION: ${{ github.event.inputs.version }}
run: |
docker buildx imagetools create -t onyxdotapp/onyx-model-server:latest onyxdotapp/onyx-model-server:${VERSION}

View File

@@ -0,0 +1,55 @@
name: Release Onyx Helm Charts
on:
push:
branches:
- main
permissions: write-all
jobs:
release:
permissions:
contents: write
runs-on: ubuntu-latest
timeout-minutes: 45
steps:
- name: Checkout
uses: actions/checkout@1af3b93b6815bc44a9784bd300feb67ff0d1eeb3 # ratchet:actions/checkout@v6
with:
fetch-depth: 0
persist-credentials: false
- name: Install Helm CLI
uses: azure/setup-helm@1a275c3b69536ee54be43f2070a358922e12c8d4 # ratchet:azure/setup-helm@v4
with:
version: v3.12.1
- name: Add required Helm repositories
run: |
helm repo add ingress-nginx https://kubernetes.github.io/ingress-nginx
helm repo add onyx-vespa https://onyx-dot-app.github.io/vespa-helm-charts
helm repo add cloudnative-pg https://cloudnative-pg.github.io/charts
helm repo add ot-container-kit https://ot-container-kit.github.io/helm-charts
helm repo add minio https://charts.min.io/
helm repo add code-interpreter https://onyx-dot-app.github.io/code-interpreter/
helm repo update
- name: Build chart dependencies
run: |
set -euo pipefail
for chart_dir in deployment/helm/charts/*; do
if [ -f "$chart_dir/Chart.yaml" ]; then
echo "Building dependencies for $chart_dir"
helm dependency build "$chart_dir"
fi
done
- name: Publish Helm charts to gh-pages
uses: stefanprodan/helm-gh-pages@0ad2bb377311d61ac04ad9eb6f252fb68e207260 # ratchet:stefanprodan/helm-gh-pages@v1.7.0
with:
token: ${{ secrets.GITHUB_TOKEN }}
charts_dir: deployment/helm/charts
branch: gh-pages
commit_username: ${{ github.actor }}
commit_email: ${{ github.actor }}@users.noreply.github.com

View File

@@ -1,171 +0,0 @@
# This workflow is intended to be manually triggered via the GitHub Action tab.
# Given a hotfix branch, it will attempt to open a PR to all release branches and
# by default auto merge them
name: Hotfix release branches
on:
workflow_dispatch:
inputs:
hotfix_commit:
description: "Hotfix commit hash"
required: true
hotfix_suffix:
description: "Hotfix branch suffix (e.g. hotfix/v0.8-{suffix})"
required: true
release_branch_pattern:
description: "Release branch pattern (regex)"
required: true
default: "release/.*"
auto_merge:
description: "Automatically merge the hotfix PRs"
required: true
type: choice
default: "true"
options:
- true
- false
jobs:
hotfix_release_branches:
permissions: write-all
# See https://runs-on.com/runners/linux/
# use a lower powered instance since this just does i/o to docker hub
runs-on: [runs-on, runner=2cpu-linux-x64, "run-id=${{ github.run_id }}"]
steps:
# needs RKUO_DEPLOY_KEY for write access to merge PR's
- name: Checkout Repository
uses: actions/checkout@v4
with:
ssh-key: "${{ secrets.RKUO_DEPLOY_KEY }}"
fetch-depth: 0
- name: Set up Git user
run: |
git config user.name "Richard Kuo [bot]"
git config user.email "rkuo[bot]@onyx.app"
- name: Fetch All Branches
run: |
git fetch --all --prune
- name: Verify Hotfix Commit Exists
run: |
git rev-parse --verify "${{ github.event.inputs.hotfix_commit }}" || { echo "Commit not found: ${{ github.event.inputs.hotfix_commit }}"; exit 1; }
- name: Get Release Branches
id: get_release_branches
run: |
BRANCHES=$(git branch -r | grep -E "${{ github.event.inputs.release_branch_pattern }}" | sed 's|origin/||' | tr -d ' ')
if [ -z "$BRANCHES" ]; then
echo "No release branches found matching pattern '${{ github.event.inputs.release_branch_pattern }}'."
exit 1
fi
echo "Found release branches:"
echo "$BRANCHES"
# Join the branches into a single line separated by commas
BRANCHES_JOINED=$(echo "$BRANCHES" | tr '\n' ',' | sed 's/,$//')
# Set the branches as an output
echo "branches=$BRANCHES_JOINED" >> $GITHUB_OUTPUT
# notes on all the vagaries of wiring up automated PR's
# https://github.com/peter-evans/create-pull-request/blob/main/docs/concepts-guidelines.md#triggering-further-workflow-runs
# we must use a custom token for GH_TOKEN to trigger the subsequent PR checks
- name: Create and Merge Pull Requests to Matching Release Branches
env:
HOTFIX_COMMIT: ${{ github.event.inputs.hotfix_commit }}
HOTFIX_SUFFIX: ${{ github.event.inputs.hotfix_suffix }}
AUTO_MERGE: ${{ github.event.inputs.auto_merge }}
GH_TOKEN: ${{ secrets.RKUO_PERSONAL_ACCESS_TOKEN }}
run: |
# Get the branches from the previous step
BRANCHES="${{ steps.get_release_branches.outputs.branches }}"
# Convert BRANCHES to an array
IFS=$',' read -ra BRANCH_ARRAY <<< "$BRANCHES"
# Loop through each release branch and create and merge a PR
for RELEASE_BRANCH in "${BRANCH_ARRAY[@]}"; do
echo "Processing $RELEASE_BRANCH..."
# Parse out the release version by removing "release/" from the branch name
RELEASE_VERSION=${RELEASE_BRANCH#release/}
echo "Release version parsed: $RELEASE_VERSION"
HOTFIX_BRANCH="hotfix/${RELEASE_VERSION}-${HOTFIX_SUFFIX}"
echo "Creating PR from $HOTFIX_BRANCH to $RELEASE_BRANCH"
# Checkout the release branch
echo "Checking out $RELEASE_BRANCH"
git checkout "$RELEASE_BRANCH"
# Create the new hotfix branch
if git rev-parse --verify "$HOTFIX_BRANCH" >/dev/null 2>&1; then
echo "Hotfix branch $HOTFIX_BRANCH already exists. Skipping branch creation."
else
echo "Branching $RELEASE_BRANCH to $HOTFIX_BRANCH"
git checkout -b "$HOTFIX_BRANCH"
fi
# Check if the hotfix commit is a merge commit
if git rev-list --merges -n 1 "$HOTFIX_COMMIT" >/dev/null 2>&1; then
# -m 1 uses the target branch as the base (which is what we want)
echo "Hotfix commit $HOTFIX_COMMIT is a merge commit, using -m 1 for cherry-pick"
CHERRY_PICK_CMD="git cherry-pick -m 1 $HOTFIX_COMMIT"
else
CHERRY_PICK_CMD="git cherry-pick $HOTFIX_COMMIT"
fi
# Perform the cherry-pick
echo "Executing: $CHERRY_PICK_CMD"
eval "$CHERRY_PICK_CMD"
if [ $? -ne 0 ]; then
echo "Cherry-pick failed for $HOTFIX_COMMIT on $HOTFIX_BRANCH. Aborting..."
git cherry-pick --abort
continue
fi
# Push the hotfix branch to the remote
echo "Pushing $HOTFIX_BRANCH..."
git push origin "$HOTFIX_BRANCH"
echo "Hotfix branch $HOTFIX_BRANCH created and pushed."
# Check if PR already exists
EXISTING_PR=$(gh pr list --head "$HOTFIX_BRANCH" --base "$RELEASE_BRANCH" --state open --json number --jq '.[0].number')
if [ -n "$EXISTING_PR" ]; then
echo "An open PR already exists: #$EXISTING_PR. Skipping..."
continue
fi
# Create a new PR and capture the output
PR_OUTPUT=$(gh pr create --title "Merge $HOTFIX_BRANCH into $RELEASE_BRANCH" \
--body "Automated PR to merge \`$HOTFIX_BRANCH\` into \`$RELEASE_BRANCH\`." \
--head "$HOTFIX_BRANCH" --base "$RELEASE_BRANCH")
# Extract the URL from the output
PR_URL=$(echo "$PR_OUTPUT" | grep -Eo 'https://github.com/[^ ]+')
echo "Pull request created: $PR_URL"
# Extract PR number from URL
PR_NUMBER=$(basename "$PR_URL")
echo "Pull request created: $PR_NUMBER"
if [ "$AUTO_MERGE" == "true" ]; then
echo "Attempting to merge pull request #$PR_NUMBER"
# Attempt to merge the PR
gh pr merge "$PR_NUMBER" --merge --auto --delete-branch
if [ $? -eq 0 ]; then
echo "Pull request #$PR_NUMBER merged successfully."
else
# Optionally, handle the error or continue
echo "Failed to merge pull request #$PR_NUMBER."
fi
fi
done

View File

@@ -7,12 +7,13 @@ permissions:
# contents: write # only for delete-branch option
issues: write
pull-requests: write
jobs:
stale:
runs-on: ubuntu-latest
timeout-minutes: 45
steps:
- uses: actions/stale@v9
- uses: actions/stale@5f858e3efba33a5ca4407a664cc011ad407f2008 # ratchet:actions/stale@v10
with:
stale-issue-message: 'This issue is stale because it has been open 75 days with no activity. Remove stale label or comment or this will be closed in 15 days.'
stale-pr-message: 'This PR is stale because it has been open 75 days with no activity. Remove stale label or comment or this will be closed in 15 days.'
@@ -20,4 +21,3 @@ jobs:
close-pr-message: 'This PR was closed because it has been stalled for 90 days with no activity.'
days-before-stale: 75
# days-before-close: 90 # uncomment after we test stale behavior

View File

@@ -15,19 +15,25 @@ on:
permissions:
actions: read
contents: read
security-events: write
jobs:
scan-licenses:
# See https://runs-on.com/runners/linux/
runs-on: [runs-on,runner=2cpu-linux-x64,"run-id=${{ github.run_id }}"]
runs-on: [runs-on,runner=2cpu-linux-x64,"run-id=${{ github.run_id }}-scan-licenses"]
timeout-minutes: 45
permissions:
actions: read
contents: read
security-events: write
steps:
- name: Checkout code
uses: actions/checkout@v4
uses: actions/checkout@1af3b93b6815bc44a9784bd300feb67ff0d1eeb3 # ratchet:actions/checkout@v6
with:
persist-credentials: false
- name: Set up Python
uses: actions/setup-python@v5
uses: actions/setup-python@83679a892e2d95755f2dac6acb0bfd1e9ac5d548 # ratchet:actions/setup-python@v6
with:
python-version: '3.11'
cache: 'pip'
@@ -35,7 +41,7 @@ jobs:
backend/requirements/default.txt
backend/requirements/dev.txt
backend/requirements/model_server.txt
- name: Get explicit and transitive dependencies
run: |
python -m pip install --upgrade pip
@@ -43,34 +49,103 @@ jobs:
pip install --retries 5 --timeout 30 -r backend/requirements/dev.txt
pip install --retries 5 --timeout 30 -r backend/requirements/model_server.txt
pip freeze > requirements-all.txt
- name: Check python
id: license_check_report
uses: pilosus/action-pip-license-checker@v2
uses: pilosus/action-pip-license-checker@e909b0226ff49d3235c99c4585bc617f49fff16a # ratchet:pilosus/action-pip-license-checker@v3
with:
requirements: 'requirements-all.txt'
fail: 'Copyleft'
exclude: '(?i)^(pylint|aio[-_]*).*'
- name: Print report
if: ${{ always() }}
run: echo "${{ steps.license_check_report.outputs.report }}"
if: always()
env:
REPORT: ${{ steps.license_check_report.outputs.report }}
run: echo "$REPORT"
- name: Install npm dependencies
working-directory: ./web
run: npm ci
- name: Run Trivy vulnerability scanner in repo mode
uses: aquasecurity/trivy-action@0.28.0
with:
scan-type: fs
scanners: license
format: table
# format: sarif
# output: trivy-results.sarif
severity: HIGH,CRITICAL
# - name: Upload Trivy scan results to GitHub Security tab
# uses: github/codeql-action/upload-sarif@v3
# be careful enabling the sarif and upload as it may spam the security tab
# with a huge amount of items. Work out the issues before enabling upload.
# - name: Run Trivy vulnerability scanner in repo mode
# if: always()
# uses: aquasecurity/trivy-action@b6643a29fecd7f34b3597bc6acb0a98b03d33ff8 # ratchet:aquasecurity/trivy-action@0.33.1
# with:
# sarif_file: trivy-results.sarif
# scan-type: fs
# scan-ref: .
# scanners: license
# format: table
# severity: HIGH,CRITICAL
# # format: sarif
# # output: trivy-results.sarif
#
# # - name: Upload Trivy scan results to GitHub Security tab
# # uses: github/codeql-action/upload-sarif@v3
# # with:
# # sarif_file: trivy-results.sarif
scan-trivy:
# See https://runs-on.com/runners/linux/
runs-on: [runs-on,runner=2cpu-linux-x64,"run-id=${{ github.run_id }}-scan-trivy"]
timeout-minutes: 45
steps:
- name: Set up Docker Buildx
uses: docker/setup-buildx-action@e468171a9de216ec08956ac3ada2f0791b6bd435 # ratchet:docker/setup-buildx-action@v3
- name: Login to Docker Hub
uses: docker/login-action@5e57cd118135c172c3672efd75eb46360885c0ef # ratchet:docker/login-action@v3
with:
username: ${{ secrets.DOCKER_USERNAME }}
password: ${{ secrets.DOCKER_TOKEN }}
# Backend
- name: Pull backend docker image
run: docker pull onyxdotapp/onyx-backend:latest
- name: Run Trivy vulnerability scanner on backend
uses: aquasecurity/trivy-action@b6643a29fecd7f34b3597bc6acb0a98b03d33ff8 # ratchet:aquasecurity/trivy-action@0.33.1
env:
TRIVY_DB_REPOSITORY: 'public.ecr.aws/aquasecurity/trivy-db:2'
TRIVY_JAVA_DB_REPOSITORY: 'public.ecr.aws/aquasecurity/trivy-java-db:1'
with:
image-ref: onyxdotapp/onyx-backend:latest
scanners: license
severity: HIGH,CRITICAL
vuln-type: library
exit-code: 0 # Set to 1 if we want a failed scan to fail the workflow
# Web server
- name: Pull web server docker image
run: docker pull onyxdotapp/onyx-web-server:latest
- name: Run Trivy vulnerability scanner on web server
uses: aquasecurity/trivy-action@b6643a29fecd7f34b3597bc6acb0a98b03d33ff8 # ratchet:aquasecurity/trivy-action@0.33.1
env:
TRIVY_DB_REPOSITORY: 'public.ecr.aws/aquasecurity/trivy-db:2'
TRIVY_JAVA_DB_REPOSITORY: 'public.ecr.aws/aquasecurity/trivy-java-db:1'
with:
image-ref: onyxdotapp/onyx-web-server:latest
scanners: license
severity: HIGH,CRITICAL
vuln-type: library
exit-code: 0
# Model server
- name: Pull model server docker image
run: docker pull onyxdotapp/onyx-model-server:latest
- name: Run Trivy vulnerability scanner
uses: aquasecurity/trivy-action@b6643a29fecd7f34b3597bc6acb0a98b03d33ff8 # ratchet:aquasecurity/trivy-action@0.33.1
env:
TRIVY_DB_REPOSITORY: 'public.ecr.aws/aquasecurity/trivy-db:2'
TRIVY_JAVA_DB_REPOSITORY: 'public.ecr.aws/aquasecurity/trivy-java-db:1'
with:
image-ref: onyxdotapp/onyx-model-server:latest
scanners: license
severity: HIGH,CRITICAL
vuln-type: library
exit-code: 0

View File

@@ -1,124 +0,0 @@
name: Backport on Merge
# Note this workflow does not trigger the builds, be sure to manually tag the branches to trigger the builds
on:
pull_request:
types: [closed] # Later we check for merge so only PRs that go in can get backported
permissions:
contents: write
actions: write
jobs:
backport:
if: github.event.pull_request.merged == true
runs-on: ubuntu-latest
env:
GITHUB_TOKEN: ${{ secrets.YUHONG_GH_ACTIONS }}
steps:
- name: Checkout code
uses: actions/checkout@v4
with:
ssh-key: "${{ secrets.RKUO_DEPLOY_KEY }}"
fetch-depth: 0
- name: Set up Git user
run: |
git config user.name "Richard Kuo [bot]"
git config user.email "rkuo[bot]@onyx.app"
git fetch --prune
- name: Check for Backport Checkbox
id: checkbox-check
run: |
PR_BODY="${{ github.event.pull_request.body }}"
if [[ "$PR_BODY" == *"[x] This PR should be backported"* ]]; then
echo "backport=true" >> $GITHUB_OUTPUT
else
echo "backport=false" >> $GITHUB_OUTPUT
fi
- name: List and sort release branches
id: list-branches
run: |
git fetch --all --tags
BRANCHES=$(git for-each-ref --format='%(refname:short)' refs/remotes/origin/release/* | sed 's|origin/release/||' | sort -Vr)
BETA=$(echo "$BRANCHES" | head -n 1)
STABLE=$(echo "$BRANCHES" | head -n 2 | tail -n 1)
echo "beta=release/$BETA" >> $GITHUB_OUTPUT
echo "stable=release/$STABLE" >> $GITHUB_OUTPUT
# Fetch latest tags for beta and stable
LATEST_BETA_TAG=$(git tag -l "v[0-9]*.[0-9]*.[0-9]*-beta.[0-9]*" | grep -E "^v[0-9]+\.[0-9]+\.[0-9]+-beta\.[0-9]+$" | grep -v -- "-cloud" | sort -Vr | head -n 1)
LATEST_STABLE_TAG=$(git tag -l "v[0-9]*.[0-9]*.[0-9]*" | grep -E "^v[0-9]+\.[0-9]+\.[0-9]+$" | sort -Vr | head -n 1)
# Handle case where no beta tags exist
if [[ -z "$LATEST_BETA_TAG" ]]; then
NEW_BETA_TAG="v1.0.0-beta.1"
else
NEW_BETA_TAG=$(echo $LATEST_BETA_TAG | awk -F '[.-]' '{print $1 "." $2 "." $3 "-beta." ($NF+1)}')
fi
# Increment latest stable tag
NEW_STABLE_TAG=$(echo $LATEST_STABLE_TAG | awk -F '.' '{print $1 "." $2 "." ($3+1)}')
echo "latest_beta_tag=$LATEST_BETA_TAG" >> $GITHUB_OUTPUT
echo "latest_stable_tag=$LATEST_STABLE_TAG" >> $GITHUB_OUTPUT
echo "new_beta_tag=$NEW_BETA_TAG" >> $GITHUB_OUTPUT
echo "new_stable_tag=$NEW_STABLE_TAG" >> $GITHUB_OUTPUT
- name: Echo branch and tag information
run: |
echo "Beta branch: ${{ steps.list-branches.outputs.beta }}"
echo "Stable branch: ${{ steps.list-branches.outputs.stable }}"
echo "Latest beta tag: ${{ steps.list-branches.outputs.latest_beta_tag }}"
echo "Latest stable tag: ${{ steps.list-branches.outputs.latest_stable_tag }}"
echo "New beta tag: ${{ steps.list-branches.outputs.new_beta_tag }}"
echo "New stable tag: ${{ steps.list-branches.outputs.new_stable_tag }}"
- name: Trigger Backport
if: steps.checkbox-check.outputs.backport == 'true'
run: |
set -e
echo "Backporting to beta ${{ steps.list-branches.outputs.beta }} and stable ${{ steps.list-branches.outputs.stable }}"
# Echo the merge commit SHA
echo "Merge commit SHA: ${{ github.event.pull_request.merge_commit_sha }}"
# Fetch all history for all branches and tags
git fetch --prune
# Reset and prepare the beta branch
git checkout ${{ steps.list-branches.outputs.beta }}
echo "Last 5 commits on beta branch:"
git log -n 5 --pretty=format:"%H"
echo "" # Newline for formatting
# Cherry-pick the merge commit from the merged PR
git cherry-pick -m 1 ${{ github.event.pull_request.merge_commit_sha }} || {
echo "Cherry-pick to beta failed due to conflicts."
exit 1
}
# Create new beta branch/tag
git tag ${{ steps.list-branches.outputs.new_beta_tag }}
# Push the changes and tag to the beta branch using PAT
git push origin ${{ steps.list-branches.outputs.beta }}
git push origin ${{ steps.list-branches.outputs.new_beta_tag }}
# Reset and prepare the stable branch
git checkout ${{ steps.list-branches.outputs.stable }}
echo "Last 5 commits on stable branch:"
git log -n 5 --pretty=format:"%H"
echo "" # Newline for formatting
# Cherry-pick the merge commit from the merged PR
git cherry-pick -m 1 ${{ github.event.pull_request.merge_commit_sha }} || {
echo "Cherry-pick to stable failed due to conflicts."
exit 1
}
# Create new stable branch/tag
git tag ${{ steps.list-branches.outputs.new_stable_tag }}
# Push the changes and tag to the stable branch using PAT
git push origin ${{ steps.list-branches.outputs.stable }}
git push origin ${{ steps.list-branches.outputs.new_stable_tag }}

View File

@@ -1,240 +0,0 @@
name: Run Chromatic Tests
concurrency:
group: Run-Chromatic-Tests-${{ github.workflow }}-${{ github.head_ref || github.event.workflow_run.head_branch || github.run_id }}
cancel-in-progress: true
on: push
env:
OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
SLACK_BOT_TOKEN: ${{ secrets.SLACK_BOT_TOKEN }}
GEN_AI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
MOCK_LLM_RESPONSE: true
jobs:
playwright-tests:
name: Playwright Tests
# See https://runs-on.com/runners/linux/
runs-on:
[
runs-on,
runner=32cpu-linux-x64,
disk=large,
"run-id=${{ github.run_id }}",
]
steps:
- name: Checkout code
uses: actions/checkout@v4
with:
fetch-depth: 0
- name: Set up Python
uses: actions/setup-python@v5
with:
python-version: "3.11"
cache: "pip"
cache-dependency-path: |
backend/requirements/default.txt
backend/requirements/dev.txt
backend/requirements/model_server.txt
- run: |
python -m pip install --upgrade pip
pip install --retries 5 --timeout 30 -r backend/requirements/default.txt
pip install --retries 5 --timeout 30 -r backend/requirements/dev.txt
pip install --retries 5 --timeout 30 -r backend/requirements/model_server.txt
- name: Setup node
uses: actions/setup-node@v4
with:
node-version: 22
- name: Install node dependencies
working-directory: ./web
run: npm ci
- name: Install playwright browsers
working-directory: ./web
run: npx playwright install --with-deps
- name: Set up Docker Buildx
uses: docker/setup-buildx-action@v3
- name: Login to Docker Hub
uses: docker/login-action@v3
with:
username: ${{ secrets.DOCKER_USERNAME }}
password: ${{ secrets.DOCKER_TOKEN }}
# tag every docker image with "test" so that we can spin up the correct set
# of images during testing
# we use the runs-on cache for docker builds
# in conjunction with runs-on runners, it has better speed and unlimited caching
# https://runs-on.com/caching/s3-cache-for-github-actions/
# https://runs-on.com/caching/docker/
# https://github.com/moby/buildkit#s3-cache-experimental
# images are built and run locally for testing purposes. Not pushed.
- name: Build Web Docker image
uses: ./.github/actions/custom-build-and-push
with:
context: ./web
file: ./web/Dockerfile
platforms: linux/amd64
tags: onyxdotapp/onyx-web-server:test
push: false
load: true
cache-from: type=s3,prefix=cache/${{ github.repository }}/integration-tests/web-server/,region=${{ env.RUNS_ON_AWS_REGION }},bucket=${{ env.RUNS_ON_S3_BUCKET_CACHE }}
cache-to: type=s3,prefix=cache/${{ github.repository }}/integration-tests/web-server/,region=${{ env.RUNS_ON_AWS_REGION }},bucket=${{ env.RUNS_ON_S3_BUCKET_CACHE }},mode=max
- name: Build Backend Docker image
uses: ./.github/actions/custom-build-and-push
with:
context: ./backend
file: ./backend/Dockerfile
platforms: linux/amd64
tags: onyxdotapp/onyx-backend:test
push: false
load: true
cache-from: type=s3,prefix=cache/${{ github.repository }}/integration-tests/backend/,region=${{ env.RUNS_ON_AWS_REGION }},bucket=${{ env.RUNS_ON_S3_BUCKET_CACHE }}
cache-to: type=s3,prefix=cache/${{ github.repository }}/integration-tests/backend/,region=${{ env.RUNS_ON_AWS_REGION }},bucket=${{ env.RUNS_ON_S3_BUCKET_CACHE }},mode=max
- name: Build Model Server Docker image
uses: ./.github/actions/custom-build-and-push
with:
context: ./backend
file: ./backend/Dockerfile.model_server
platforms: linux/amd64
tags: onyxdotapp/onyx-model-server:test
push: false
load: true
cache-from: type=s3,prefix=cache/${{ github.repository }}/integration-tests/model-server/,region=${{ env.RUNS_ON_AWS_REGION }},bucket=${{ env.RUNS_ON_S3_BUCKET_CACHE }}
cache-to: type=s3,prefix=cache/${{ github.repository }}/integration-tests/model-server/,region=${{ env.RUNS_ON_AWS_REGION }},bucket=${{ env.RUNS_ON_S3_BUCKET_CACHE }},mode=max
- name: Start Docker containers
run: |
cd deployment/docker_compose
ENABLE_PAID_ENTERPRISE_EDITION_FEATURES=true \
AUTH_TYPE=basic \
GEN_AI_API_KEY=${{ secrets.OPENAI_API_KEY }} \
REQUIRE_EMAIL_VERIFICATION=false \
DISABLE_TELEMETRY=true \
IMAGE_TAG=test \
docker compose -f docker-compose.dev.yml -p danswer-stack up -d
id: start_docker
- name: Wait for service to be ready
run: |
echo "Starting wait-for-service script..."
docker logs -f danswer-stack-api_server-1 &
start_time=$(date +%s)
timeout=300 # 5 minutes in seconds
while true; do
current_time=$(date +%s)
elapsed_time=$((current_time - start_time))
if [ $elapsed_time -ge $timeout ]; then
echo "Timeout reached. Service did not become ready in 5 minutes."
exit 1
fi
# Use curl with error handling to ignore specific exit code 56
response=$(curl -s -o /dev/null -w "%{http_code}" http://localhost:8080/health || echo "curl_error")
if [ "$response" = "200" ]; then
echo "Service is ready!"
break
elif [ "$response" = "curl_error" ]; then
echo "Curl encountered an error, possibly exit code 56. Continuing to retry..."
else
echo "Service not ready yet (HTTP status $response). Retrying in 5 seconds..."
fi
sleep 5
done
echo "Finished waiting for service."
- name: Run pytest playwright test init
working-directory: ./backend
env:
PYTEST_IGNORE_SKIP: true
run: pytest -s tests/integration/tests/playwright/test_playwright.py
- name: Run Playwright tests
working-directory: ./web
run: npx playwright test
- uses: actions/upload-artifact@v4
if: always()
with:
# Chromatic automatically defaults to the test-results directory.
# Replace with the path to your custom directory and adjust the CHROMATIC_ARCHIVE_LOCATION environment variable accordingly.
name: test-results
path: ./web/test-results
retention-days: 30
# save before stopping the containers so the logs can be captured
- name: Save Docker logs
if: success() || failure()
run: |
cd deployment/docker_compose
docker compose -f docker-compose.dev.yml -p danswer-stack logs > docker-compose.log
mv docker-compose.log ${{ github.workspace }}/docker-compose.log
- name: Upload logs
if: success() || failure()
uses: actions/upload-artifact@v4
with:
name: docker-logs
path: ${{ github.workspace }}/docker-compose.log
- name: Stop Docker containers
run: |
cd deployment/docker_compose
docker compose -f docker-compose.dev.yml -p danswer-stack down -v
chromatic-tests:
name: Chromatic Tests
needs: playwright-tests
runs-on:
[
runs-on,
runner=32cpu-linux-x64,
disk=large,
"run-id=${{ github.run_id }}",
]
steps:
- name: Checkout code
uses: actions/checkout@v4
with:
fetch-depth: 0
- name: Setup node
uses: actions/setup-node@v4
with:
node-version: 22
- name: Install node dependencies
working-directory: ./web
run: npm ci
- name: Download Playwright test results
uses: actions/download-artifact@v4
with:
name: test-results
path: ./web/test-results
- name: Run Chromatic
uses: chromaui/action@latest
with:
playwright: true
projectToken: ${{ secrets.CHROMATIC_PROJECT_TOKEN }}
workingDir: ./web
env:
CHROMATIC_ARCHIVE_LOCATION: ./test-results

View File

@@ -0,0 +1,175 @@
name: External Dependency Unit Tests
concurrency:
group: External-Dependency-Unit-Tests-${{ github.workflow }}-${{ github.head_ref || github.event.workflow_run.head_branch || github.run_id }}
cancel-in-progress: true
on:
merge_group:
pull_request:
branches: [main]
push:
tags:
- "v*.*.*"
permissions:
contents: read
env:
# AWS credentials for S3-specific test
S3_AWS_ACCESS_KEY_ID_FOR_TEST: ${{ secrets.S3_AWS_ACCESS_KEY_ID }}
S3_AWS_SECRET_ACCESS_KEY_FOR_TEST: ${{ secrets.S3_AWS_SECRET_ACCESS_KEY }}
# MinIO
S3_ENDPOINT_URL: "http://localhost:9004"
S3_AWS_ACCESS_KEY_ID: "minioadmin"
S3_AWS_SECRET_ACCESS_KEY: "minioadmin"
# Confluence
CONFLUENCE_TEST_SPACE_URL: ${{ vars.CONFLUENCE_TEST_SPACE_URL }}
CONFLUENCE_TEST_SPACE: ${{ vars.CONFLUENCE_TEST_SPACE }}
CONFLUENCE_TEST_PAGE_ID: ${{ secrets.CONFLUENCE_TEST_PAGE_ID }}
CONFLUENCE_USER_NAME: ${{ vars.CONFLUENCE_USER_NAME }}
CONFLUENCE_ACCESS_TOKEN: ${{ secrets.CONFLUENCE_ACCESS_TOKEN }}
CONFLUENCE_ACCESS_TOKEN_SCOPED: ${{ secrets.CONFLUENCE_ACCESS_TOKEN_SCOPED }}
# Jira
JIRA_ADMIN_API_TOKEN: ${{ secrets.JIRA_ADMIN_API_TOKEN }}
# LLMs
OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
ANTHROPIC_API_KEY: ${{ secrets.ANTHROPIC_API_KEY }}
# Code Interpreter
# TODO: debug why this is failing and enable
CODE_INTERPRETER_BASE_URL: http://localhost:8000
jobs:
discover-test-dirs:
# NOTE: Github-hosted runners have about 20s faster queue times and are preferred here.
runs-on: ubuntu-slim
timeout-minutes: 45
outputs:
test-dirs: ${{ steps.set-matrix.outputs.test-dirs }}
steps:
- name: Checkout code
uses: actions/checkout@1af3b93b6815bc44a9784bd300feb67ff0d1eeb3 # ratchet:actions/checkout@v6
with:
persist-credentials: false
- name: Discover test directories
id: set-matrix
run: |
# Find all subdirectories in backend/tests/external_dependency_unit
dirs=$(find backend/tests/external_dependency_unit -mindepth 1 -maxdepth 1 -type d -exec basename {} \; | sort | jq -R -s -c 'split("\n")[:-1]')
echo "test-dirs=$dirs" >> $GITHUB_OUTPUT
external-dependency-unit-tests:
needs: discover-test-dirs
# Use larger runner with more resources for Vespa
runs-on:
- runs-on
- runner=2cpu-linux-arm64
- ${{ format('run-id={0}-external-dependency-unit-tests-job-{1}', github.run_id, strategy['job-index']) }}
- extras=s3-cache
timeout-minutes: 45
strategy:
fail-fast: false
matrix:
test-dir: ${{ fromJson(needs.discover-test-dirs.outputs.test-dirs) }}
env:
PYTHONPATH: ./backend
MODEL_SERVER_HOST: "disabled"
steps:
- uses: runs-on/action@cd2b598b0515d39d78c38a02d529db87d2196d1e # ratchet:runs-on/action@v2
- name: Checkout code
uses: actions/checkout@1af3b93b6815bc44a9784bd300feb67ff0d1eeb3 # ratchet:actions/checkout@v6
with:
persist-credentials: false
- name: Setup Python and Install Dependencies
uses: ./.github/actions/setup-python-and-install-dependencies
with:
requirements: |
backend/requirements/default.txt
backend/requirements/dev.txt
backend/requirements/ee.txt
- name: Setup Playwright
uses: ./.github/actions/setup-playwright
# needed for pulling Vespa, Redis, Postgres, and Minio images
# otherwise, we hit the "Unauthenticated users" limit
# https://docs.docker.com/docker-hub/usage/
- name: Login to Docker Hub
uses: docker/login-action@5e57cd118135c172c3672efd75eb46360885c0ef # ratchet:docker/login-action@v3
with:
username: ${{ secrets.DOCKER_USERNAME }}
password: ${{ secrets.DOCKER_TOKEN }}
- name: Create .env file for Docker Compose
run: |
cat <<EOF > deployment/docker_compose/.env
CODE_INTERPRETER_BETA_ENABLED=true
EOF
- name: Set up Standard Dependencies
run: |
cd deployment/docker_compose
docker compose \
-f docker-compose.yml \
-f docker-compose.dev.yml \
up -d \
minio \
relational_db \
cache \
index \
code-interpreter
- name: Run migrations
run: |
cd backend
# Run migrations to head
alembic upgrade head
alembic heads --verbose
- name: Run Tests for ${{ matrix.test-dir }}
shell: script -q -e -c "bash --noprofile --norc -eo pipefail {0}"
env:
TEST_DIR: ${{ matrix.test-dir }}
run: |
py.test \
--durations=8 \
-o junit_family=xunit2 \
-xv \
--ff \
backend/tests/external_dependency_unit/${TEST_DIR}
- name: Collect Docker logs on failure
if: failure()
run: |
mkdir -p docker-logs
cd deployment/docker_compose
# Get list of running containers
containers=$(docker compose -f docker-compose.yml -f docker-compose.dev.yml ps -q)
# Collect logs from each container
for container in $containers; do
container_name=$(docker inspect --format='{{.Name}}' $container | sed 's/^\///')
echo "Collecting logs from $container_name..."
docker logs $container > ../../docker-logs/${container_name}.log 2>&1
done
cd ../..
echo "Docker logs collected in docker-logs directory"
- name: Upload Docker logs
if: failure()
uses: actions/upload-artifact@330a01c490aca151604b8cf639adc76d48f6c5d4 # ratchet:actions/upload-artifact@v5
with:
name: docker-logs-${{ matrix.test-dir }}
path: docker-logs/
retention-days: 7

View File

@@ -1,42 +1,60 @@
name: Helm - Lint and Test Charts
concurrency:
group: Helm-Lint-and-Test-Charts-${{ github.workflow }}-${{ github.head_ref || github.event.workflow_run.head_branch || github.run_id }}
cancel-in-progress: true
on:
merge_group:
pull_request:
branches: [ main ]
push:
tags:
- "v*.*.*"
workflow_dispatch: # Allows manual triggering
permissions:
contents: read
jobs:
helm-chart-check:
# See https://runs-on.com/runners/linux/
runs-on: [runs-on,runner=8cpu-linux-x64,hdd=256,"run-id=${{ github.run_id }}"]
runs-on: [runs-on,runner=8cpu-linux-x64,hdd=256,"run-id=${{ github.run_id }}-helm-chart-check"]
timeout-minutes: 45
# fetch-depth 0 is required for helm/chart-testing-action
steps:
- name: Checkout code
uses: actions/checkout@v4
uses: actions/checkout@1af3b93b6815bc44a9784bd300feb67ff0d1eeb3 # ratchet:actions/checkout@v6
with:
fetch-depth: 0
persist-credentials: false
- name: Set up Helm
uses: azure/setup-helm@v4.2.0
uses: azure/setup-helm@1a275c3b69536ee54be43f2070a358922e12c8d4 # ratchet:azure/setup-helm@v4.3.1
with:
version: v3.17.0
version: v3.19.0
- name: Set up chart-testing
uses: helm/chart-testing-action@v2.7.0
uses: helm/chart-testing-action@6ec842c01de15ebb84c8627d2744a0c2f2755c9f # ratchet:helm/chart-testing-action@v2.8.0
# even though we specify chart-dirs in ct.yaml, it isn't used by ct for the list-changed command...
- name: Run chart-testing (list-changed)
id: list-changed
env:
DEFAULT_BRANCH: ${{ github.event.repository.default_branch }}
run: |
echo "default_branch: ${{ github.event.repository.default_branch }}"
changed=$(ct list-changed --remote origin --target-branch ${{ github.event.repository.default_branch }} --chart-dirs deployment/helm/charts)
echo "default_branch: ${DEFAULT_BRANCH}"
changed=$(ct list-changed --remote origin --target-branch ${DEFAULT_BRANCH} --chart-dirs deployment/helm/charts)
echo "list-changed output: $changed"
if [[ -n "$changed" ]]; then
echo "changed=true" >> "$GITHUB_OUTPUT"
fi
# uncomment to force run chart-testing
# - name: Force run chart-testing (list-changed)
# id: list-changed
# run: echo "changed=true" >> $GITHUB_OUTPUT
# lint all charts if any changes were detected
- name: Run chart-testing (lint)
if: steps.list-changed.outputs.changed == 'true'
@@ -46,11 +64,179 @@ jobs:
- name: Create kind cluster
if: steps.list-changed.outputs.changed == 'true'
uses: helm/kind-action@v1.12.0
uses: helm/kind-action@92086f6be054225fa813e0a4b13787fc9088faab # ratchet:helm/kind-action@v1.13.0
- name: Run chart-testing (install)
- name: Pre-install cluster status check
if: steps.list-changed.outputs.changed == 'true'
run: ct install --all --helm-extra-set-args="--set=nginx.enabled=false" --debug --config ct.yaml
# the following would install only changed charts, but we only have one chart so
run: |
echo "=== Pre-install Cluster Status ==="
kubectl get nodes -o wide
kubectl get pods --all-namespaces
kubectl get storageclass
- name: Add Helm repositories and update
if: steps.list-changed.outputs.changed == 'true'
run: |
echo "=== Adding Helm repositories ==="
helm repo add ingress-nginx https://kubernetes.github.io/ingress-nginx
helm repo add vespa https://onyx-dot-app.github.io/vespa-helm-charts
helm repo add cloudnative-pg https://cloudnative-pg.github.io/charts
helm repo add ot-container-kit https://ot-container-kit.github.io/helm-charts
helm repo add minio https://charts.min.io/
helm repo add code-interpreter https://onyx-dot-app.github.io/code-interpreter/
helm repo update
- name: Install Redis operator
if: steps.list-changed.outputs.changed == 'true'
shell: bash
run: |
echo "=== Installing redis-operator CRDs ==="
helm upgrade --install redis-operator ot-container-kit/redis-operator \
--namespace redis-operator --create-namespace --wait --timeout 300s
- name: Pre-pull required images
if: steps.list-changed.outputs.changed == 'true'
run: |
echo "=== Pre-pulling required images to avoid timeout ==="
KIND_CLUSTER=$(kubectl config current-context | sed 's/kind-//')
echo "Kind cluster: $KIND_CLUSTER"
IMAGES=(
"ghcr.io/cloudnative-pg/cloudnative-pg:1.27.0"
"quay.io/opstree/redis:v7.0.15"
"docker.io/onyxdotapp/onyx-web-server:latest"
)
for image in "${IMAGES[@]}"; do
echo "Pre-pulling $image"
if docker pull "$image"; then
kind load docker-image "$image" --name "$KIND_CLUSTER" || echo "Failed to load $image into kind"
else
echo "Failed to pull $image"
fi
done
echo "=== Images loaded into Kind cluster ==="
docker exec "$KIND_CLUSTER"-control-plane crictl images | grep -E "(cloudnative-pg|redis|onyx)" || echo "Some images may still be loading..."
- name: Validate chart dependencies
if: steps.list-changed.outputs.changed == 'true'
run: |
echo "=== Validating chart dependencies ==="
cd deployment/helm/charts/onyx
helm dependency update
helm lint .
- name: Run chart-testing (install) with enhanced monitoring
timeout-minutes: 25
if: steps.list-changed.outputs.changed == 'true'
run: |
echo "=== Starting chart installation with monitoring ==="
# Function to monitor cluster state
monitor_cluster() {
while true; do
echo "=== Cluster Status Check at $(date) ==="
# Only show non-running pods to reduce noise
NON_RUNNING_PODS=$(kubectl get pods --all-namespaces --field-selector=status.phase!=Running,status.phase!=Succeeded --no-headers 2>/dev/null | wc -l)
if [ "$NON_RUNNING_PODS" -gt 0 ]; then
echo "Non-running pods:"
kubectl get pods --all-namespaces --field-selector=status.phase!=Running,status.phase!=Succeeded
else
echo "All pods running successfully"
fi
# Only show recent events if there are issues
RECENT_EVENTS=$(kubectl get events --sort-by=.lastTimestamp --all-namespaces --field-selector=type!=Normal 2>/dev/null | tail -5)
if [ -n "$RECENT_EVENTS" ]; then
echo "Recent warnings/errors:"
echo "$RECENT_EVENTS"
fi
sleep 60
done
}
# Start monitoring in background
monitor_cluster &
MONITOR_PID=$!
# Set up cleanup
cleanup() {
echo "=== Cleaning up monitoring process ==="
kill $MONITOR_PID 2>/dev/null || true
echo "=== Final cluster state ==="
kubectl get pods --all-namespaces
kubectl get events --all-namespaces --sort-by=.lastTimestamp | tail -20
}
# Trap cleanup on exit
trap cleanup EXIT
# Run the actual installation with detailed logging
echo "=== Starting ct install ==="
set +e
ct install --all \
--helm-extra-set-args="\
--set=nginx.enabled=false \
--set=minio.enabled=false \
--set=vespa.enabled=false \
--set=slackbot.enabled=false \
--set=postgresql.enabled=true \
--set=postgresql.nameOverride=cloudnative-pg \
--set=postgresql.cluster.storage.storageClass=standard \
--set=redis.enabled=true \
--set=redis.storageSpec.volumeClaimTemplate.spec.storageClassName=standard \
--set=webserver.replicaCount=1 \
--set=api.replicaCount=0 \
--set=inferenceCapability.replicaCount=0 \
--set=indexCapability.replicaCount=0 \
--set=celery_beat.replicaCount=0 \
--set=celery_worker_heavy.replicaCount=0 \
--set=celery_worker_docfetching.replicaCount=0 \
--set=celery_worker_docprocessing.replicaCount=0 \
--set=celery_worker_light.replicaCount=0 \
--set=celery_worker_monitoring.replicaCount=0 \
--set=celery_worker_primary.replicaCount=0 \
--set=celery_worker_user_file_processing.replicaCount=0 \
--set=celery_worker_user_files_indexing.replicaCount=0" \
--helm-extra-args="--timeout 900s --debug" \
--debug --config ct.yaml
CT_EXIT=$?
set -e
if [[ $CT_EXIT -ne 0 ]]; then
echo "ct install failed with exit code $CT_EXIT"
exit $CT_EXIT
else
echo "=== Installation completed successfully ==="
fi
kubectl get pods --all-namespaces
- name: Post-install verification
if: steps.list-changed.outputs.changed == 'true'
run: |
echo "=== Post-install verification ==="
kubectl get pods --all-namespaces
kubectl get services --all-namespaces
# Only show issues if they exist
kubectl describe pods --all-namespaces | grep -A 5 -B 2 "Failed\|Error\|Warning" || echo "No pod issues found"
- name: Cleanup on failure
if: failure() && steps.list-changed.outputs.changed == 'true'
run: |
echo "=== Cleanup on failure ==="
echo "=== Final cluster state ==="
kubectl get pods --all-namespaces
kubectl get events --all-namespaces --sort-by=.lastTimestamp | tail -10
echo "=== Pod descriptions for debugging ==="
kubectl describe pods --all-namespaces | grep -A 10 -B 3 "Failed\|Error\|Warning\|Pending" || echo "No problematic pods found"
echo "=== Recent logs for debugging ==="
kubectl logs --all-namespaces --tail=50 | grep -i "error\|timeout\|failed\|pull" || echo "No error logs found"
echo "=== Helm releases ==="
helm list --all-namespaces
# the following would install only changed charts, but we only have one chart so
# don't worry about that for now
# run: ct install --target-branch ${{ github.event.repository.default_branch }}

View File

@@ -9,234 +9,562 @@ on:
branches:
- main
- "release/**"
push:
tags:
- "v*.*.*"
permissions:
contents: read
env:
# Test Environment Variables
OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
SLACK_BOT_TOKEN: ${{ secrets.SLACK_BOT_TOKEN }}
CONFLUENCE_TEST_SPACE_URL: ${{ secrets.CONFLUENCE_TEST_SPACE_URL }}
CONFLUENCE_USER_NAME: ${{ secrets.CONFLUENCE_USER_NAME }}
CONFLUENCE_TEST_SPACE_URL: ${{ vars.CONFLUENCE_TEST_SPACE_URL }}
CONFLUENCE_USER_NAME: ${{ vars.CONFLUENCE_USER_NAME }}
CONFLUENCE_ACCESS_TOKEN: ${{ secrets.CONFLUENCE_ACCESS_TOKEN }}
CONFLUENCE_ACCESS_TOKEN_SCOPED: ${{ secrets.CONFLUENCE_ACCESS_TOKEN_SCOPED }}
JIRA_BASE_URL: ${{ secrets.JIRA_BASE_URL }}
JIRA_USER_EMAIL: ${{ secrets.JIRA_USER_EMAIL }}
JIRA_API_TOKEN: ${{ secrets.JIRA_API_TOKEN }}
JIRA_API_TOKEN_SCOPED: ${{ secrets.JIRA_API_TOKEN_SCOPED }}
PERM_SYNC_SHAREPOINT_CLIENT_ID: ${{ secrets.PERM_SYNC_SHAREPOINT_CLIENT_ID }}
PERM_SYNC_SHAREPOINT_PRIVATE_KEY: ${{ secrets.PERM_SYNC_SHAREPOINT_PRIVATE_KEY }}
PERM_SYNC_SHAREPOINT_CERTIFICATE_PASSWORD: ${{ secrets.PERM_SYNC_SHAREPOINT_CERTIFICATE_PASSWORD }}
PERM_SYNC_SHAREPOINT_DIRECTORY_ID: ${{ secrets.PERM_SYNC_SHAREPOINT_DIRECTORY_ID }}
EXA_API_KEY: ${{ secrets.EXA_API_KEY }}
jobs:
integration-tests:
# See https://runs-on.com/runners/linux/
runs-on: [runs-on, runner=32cpu-linux-x64, "run-id=${{ github.run_id }}"]
discover-test-dirs:
# NOTE: Github-hosted runners have about 20s faster queue times and are preferred here.
runs-on: ubuntu-slim
timeout-minutes: 45
outputs:
test-dirs: ${{ steps.set-matrix.outputs.test-dirs }}
steps:
- name: Checkout code
uses: actions/checkout@v4
uses: actions/checkout@1af3b93b6815bc44a9784bd300feb67ff0d1eeb3 # ratchet:actions/checkout@v6
with:
persist-credentials: false
- name: Discover test directories
id: set-matrix
run: |
# Find all leaf-level directories in both test directories
tests_dirs=$(find backend/tests/integration/tests -mindepth 1 -maxdepth 1 -type d ! -name "__pycache__" -exec basename {} \; | sort)
connector_dirs=$(find backend/tests/integration/connector_job_tests -mindepth 1 -maxdepth 1 -type d ! -name "__pycache__" -exec basename {} \; | sort)
# Create JSON array with directory info
all_dirs=""
for dir in $tests_dirs; do
all_dirs="$all_dirs{\"path\":\"tests/$dir\",\"name\":\"tests-$dir\"},"
done
for dir in $connector_dirs; do
all_dirs="$all_dirs{\"path\":\"connector_job_tests/$dir\",\"name\":\"connector-$dir\"},"
done
# Remove trailing comma and wrap in array
all_dirs="[${all_dirs%,}]"
echo "test-dirs=$all_dirs" >> $GITHUB_OUTPUT
build-backend-image:
runs-on: [runs-on, runner=1cpu-linux-arm64, "run-id=${{ github.run_id }}-build-backend-image", "extras=ecr-cache"]
timeout-minutes: 45
steps:
- uses: runs-on/action@cd2b598b0515d39d78c38a02d529db87d2196d1e # ratchet:runs-on/action@v2
- name: Checkout code
uses: actions/checkout@1af3b93b6815bc44a9784bd300feb67ff0d1eeb3 # ratchet:actions/checkout@v6
with:
persist-credentials: false
- name: Format branch name for cache
id: format-branch
env:
PR_NUMBER: ${{ github.event.pull_request.number }}
REF_NAME: ${{ github.ref_name }}
run: |
if [ -n "${PR_NUMBER}" ]; then
CACHE_SUFFIX="${PR_NUMBER}"
else
# shellcheck disable=SC2001
CACHE_SUFFIX=$(echo "${REF_NAME}" | sed 's/[^A-Za-z0-9._-]/-/g')
fi
echo "cache-suffix=${CACHE_SUFFIX}" >> $GITHUB_OUTPUT
- name: Set up Docker Buildx
uses: docker/setup-buildx-action@v3
uses: docker/setup-buildx-action@e468171a9de216ec08956ac3ada2f0791b6bd435 # ratchet:docker/setup-buildx-action@v3
# needed for pulling Vespa, Redis, Postgres, and Minio images
# otherwise, we hit the "Unauthenticated users" limit
# https://docs.docker.com/docker-hub/usage/
- name: Login to Docker Hub
uses: docker/login-action@v3
uses: docker/login-action@5e57cd118135c172c3672efd75eb46360885c0ef # ratchet:docker/login-action@v3
with:
username: ${{ secrets.DOCKER_USERNAME }}
password: ${{ secrets.DOCKER_TOKEN }}
# tag every docker image with "test" so that we can spin up the correct set
# of images during testing
# We don't need to build the Web Docker image since it's not yet used
# in the integration tests. We have a separate action to verify that it builds
# successfully.
- name: Pull Web Docker image
run: |
docker pull onyxdotapp/onyx-web-server:latest
docker tag onyxdotapp/onyx-web-server:latest onyxdotapp/onyx-web-server:test
# we use the runs-on cache for docker builds
# in conjunction with runs-on runners, it has better speed and unlimited caching
# https://runs-on.com/caching/s3-cache-for-github-actions/
# https://runs-on.com/caching/docker/
# https://github.com/moby/buildkit#s3-cache-experimental
# images are built and run locally for testing purposes. Not pushed.
- name: Build Backend Docker image
uses: ./.github/actions/custom-build-and-push
- name: Build and push Backend Docker image
uses: docker/build-push-action@263435318d21b8e681c14492fe198d362a7d2c83 # ratchet:docker/build-push-action@v6
with:
context: ./backend
file: ./backend/Dockerfile
platforms: linux/amd64
tags: onyxdotapp/onyx-backend:test
push: false
load: true
cache-from: type=s3,prefix=cache/${{ github.repository }}/integration-tests/backend/,region=${{ env.RUNS_ON_AWS_REGION }},bucket=${{ env.RUNS_ON_S3_BUCKET_CACHE }}
cache-to: type=s3,prefix=cache/${{ github.repository }}/integration-tests/backend/,region=${{ env.RUNS_ON_AWS_REGION }},bucket=${{ env.RUNS_ON_S3_BUCKET_CACHE }},mode=max
push: true
tags: ${{ env.RUNS_ON_ECR_CACHE }}:integration-test-backend-test-${{ github.run_id }}
cache-from: |
type=registry,ref=${{ env.RUNS_ON_ECR_CACHE }}:backend-cache-${{ github.event.pull_request.head.sha || github.sha }}
type=registry,ref=${{ env.RUNS_ON_ECR_CACHE }}:backend-cache-${{ steps.format-branch.outputs.cache-suffix }}
type=registry,ref=${{ env.RUNS_ON_ECR_CACHE }}:backend-cache
type=registry,ref=onyxdotapp/onyx-backend:latest
cache-to: |
type=registry,ref=${{ env.RUNS_ON_ECR_CACHE }}:backend-cache-${{ github.event.pull_request.head.sha || github.sha }},mode=max
type=registry,ref=${{ env.RUNS_ON_ECR_CACHE }}:backend-cache-${{ steps.format-branch.outputs.cache-suffix }},mode=max
type=registry,ref=${{ env.RUNS_ON_ECR_CACHE }}:backend-cache,mode=max
no-cache: ${{ vars.DOCKER_NO_CACHE == 'true' }}
- name: Build Model Server Docker image
uses: ./.github/actions/custom-build-and-push
build-model-server-image:
runs-on: [runs-on, runner=1cpu-linux-arm64, "run-id=${{ github.run_id }}-build-model-server-image", "extras=ecr-cache"]
timeout-minutes: 45
steps:
- uses: runs-on/action@cd2b598b0515d39d78c38a02d529db87d2196d1e # ratchet:runs-on/action@v2
- name: Checkout code
uses: actions/checkout@1af3b93b6815bc44a9784bd300feb67ff0d1eeb3 # ratchet:actions/checkout@v6
with:
persist-credentials: false
- name: Format branch name for cache
id: format-branch
env:
PR_NUMBER: ${{ github.event.pull_request.number }}
REF_NAME: ${{ github.ref_name }}
run: |
if [ -n "${PR_NUMBER}" ]; then
CACHE_SUFFIX="${PR_NUMBER}"
else
# shellcheck disable=SC2001
CACHE_SUFFIX=$(echo "${REF_NAME}" | sed 's/[^A-Za-z0-9._-]/-/g')
fi
echo "cache-suffix=${CACHE_SUFFIX}" >> $GITHUB_OUTPUT
- name: Set up Docker Buildx
uses: docker/setup-buildx-action@e468171a9de216ec08956ac3ada2f0791b6bd435 # ratchet:docker/setup-buildx-action@v3
# needed for pulling Vespa, Redis, Postgres, and Minio images
# otherwise, we hit the "Unauthenticated users" limit
# https://docs.docker.com/docker-hub/usage/
- name: Login to Docker Hub
uses: docker/login-action@5e57cd118135c172c3672efd75eb46360885c0ef # ratchet:docker/login-action@v3
with:
username: ${{ secrets.DOCKER_USERNAME }}
password: ${{ secrets.DOCKER_TOKEN }}
- name: Build and push Model Server Docker image
uses: docker/build-push-action@263435318d21b8e681c14492fe198d362a7d2c83 # ratchet:docker/build-push-action@v6
with:
context: ./backend
file: ./backend/Dockerfile.model_server
platforms: linux/amd64
tags: onyxdotapp/onyx-model-server:test
push: false
load: true
cache-from: type=s3,prefix=cache/${{ github.repository }}/integration-tests/model-server/,region=${{ env.RUNS_ON_AWS_REGION }},bucket=${{ env.RUNS_ON_S3_BUCKET_CACHE }}
cache-to: type=s3,prefix=cache/${{ github.repository }}/integration-tests/model-server/,region=${{ env.RUNS_ON_AWS_REGION }},bucket=${{ env.RUNS_ON_S3_BUCKET_CACHE }},mode=max
push: true
tags: ${{ env.RUNS_ON_ECR_CACHE }}:integration-test-model-server-test-${{ github.run_id }}
cache-from: |
type=registry,ref=${{ env.RUNS_ON_ECR_CACHE }}:model-server-cache-${{ github.event.pull_request.head.sha || github.sha }}
type=registry,ref=${{ env.RUNS_ON_ECR_CACHE }}:model-server-cache-${{ steps.format-branch.outputs.cache-suffix }}
type=registry,ref=${{ env.RUNS_ON_ECR_CACHE }}:model-server-cache
type=registry,ref=onyxdotapp/onyx-model-server:latest
cache-to: |
type=registry,ref=${{ env.RUNS_ON_ECR_CACHE }}:model-server-cache-${{ github.event.pull_request.head.sha || github.sha }},mode=max
type=registry,ref=${{ env.RUNS_ON_ECR_CACHE }}:model-server-cache-${{ steps.format-branch.outputs.cache-suffix }},mode=max
type=registry,ref=${{ env.RUNS_ON_ECR_CACHE }}:model-server-cache,mode=max
- name: Build integration test Docker image
uses: ./.github/actions/custom-build-and-push
build-integration-image:
runs-on: [runs-on, runner=2cpu-linux-arm64, "run-id=${{ github.run_id }}-build-integration-image", "extras=ecr-cache"]
timeout-minutes: 45
steps:
- uses: runs-on/action@cd2b598b0515d39d78c38a02d529db87d2196d1e # ratchet:runs-on/action@v2
- name: Checkout code
uses: actions/checkout@1af3b93b6815bc44a9784bd300feb67ff0d1eeb3 # ratchet:actions/checkout@v6
with:
context: ./backend
file: ./backend/tests/integration/Dockerfile
platforms: linux/amd64
tags: onyxdotapp/onyx-integration:test
push: false
load: true
cache-from: type=s3,prefix=cache/${{ github.repository }}/integration-tests/integration/,region=${{ env.RUNS_ON_AWS_REGION }},bucket=${{ env.RUNS_ON_S3_BUCKET_CACHE }}
cache-to: type=s3,prefix=cache/${{ github.repository }}/integration-tests/integration/,region=${{ env.RUNS_ON_AWS_REGION }},bucket=${{ env.RUNS_ON_S3_BUCKET_CACHE }},mode=max
persist-credentials: false
- name: Set up Docker Buildx
uses: docker/setup-buildx-action@e468171a9de216ec08956ac3ada2f0791b6bd435 # ratchet:docker/setup-buildx-action@v3
# needed for pulling openapitools/openapi-generator-cli
# otherwise, we hit the "Unauthenticated users" limit
# https://docs.docker.com/docker-hub/usage/
- name: Login to Docker Hub
uses: docker/login-action@5e57cd118135c172c3672efd75eb46360885c0ef # ratchet:docker/login-action@v3
with:
username: ${{ secrets.DOCKER_USERNAME }}
password: ${{ secrets.DOCKER_TOKEN }}
- name: Format branch name for cache
id: format-branch
env:
PR_NUMBER: ${{ github.event.pull_request.number }}
REF_NAME: ${{ github.ref_name }}
run: |
if [ -n "${PR_NUMBER}" ]; then
CACHE_SUFFIX="${PR_NUMBER}"
else
# shellcheck disable=SC2001
CACHE_SUFFIX=$(echo "${REF_NAME}" | sed 's/[^A-Za-z0-9._-]/-/g')
fi
echo "cache-suffix=${CACHE_SUFFIX}" >> $GITHUB_OUTPUT
- name: Build and push integration test image with Docker Bake
env:
INTEGRATION_REPOSITORY: ${{ env.RUNS_ON_ECR_CACHE }}
TAG: integration-test-${{ github.run_id }}
CACHE_SUFFIX: ${{ steps.format-branch.outputs.cache-suffix }}
HEAD_SHA: ${{ github.event.pull_request.head.sha || github.sha }}
run: |
cd backend && docker buildx bake --push \
--set backend.cache-from=type=registry,ref=${RUNS_ON_ECR_CACHE}:backend-cache-${HEAD_SHA} \
--set backend.cache-from=type=registry,ref=${RUNS_ON_ECR_CACHE}:backend-cache-${CACHE_SUFFIX} \
--set backend.cache-from=type=registry,ref=${RUNS_ON_ECR_CACHE}:backend-cache \
--set backend.cache-from=type=registry,ref=onyxdotapp/onyx-backend:latest \
--set backend.cache-to=type=registry,ref=${RUNS_ON_ECR_CACHE}:backend-cache-${HEAD_SHA},mode=max \
--set backend.cache-to=type=registry,ref=${RUNS_ON_ECR_CACHE}:backend-cache-${CACHE_SUFFIX},mode=max \
--set backend.cache-to=type=registry,ref=${RUNS_ON_ECR_CACHE}:backend-cache,mode=max \
--set integration.cache-from=type=registry,ref=${RUNS_ON_ECR_CACHE}:integration-cache-${HEAD_SHA} \
--set integration.cache-from=type=registry,ref=${RUNS_ON_ECR_CACHE}:integration-cache-${CACHE_SUFFIX} \
--set integration.cache-from=type=registry,ref=${RUNS_ON_ECR_CACHE}:integration-cache \
--set integration.cache-to=type=registry,ref=${RUNS_ON_ECR_CACHE}:integration-cache-${HEAD_SHA},mode=max \
--set integration.cache-to=type=registry,ref=${RUNS_ON_ECR_CACHE}:integration-cache-${CACHE_SUFFIX},mode=max \
--set integration.cache-to=type=registry,ref=${RUNS_ON_ECR_CACHE}:integration-cache,mode=max \
integration
integration-tests:
needs:
[
discover-test-dirs,
build-backend-image,
build-model-server-image,
build-integration-image,
]
runs-on:
- runs-on
- runner=4cpu-linux-arm64
- ${{ format('run-id={0}-integration-tests-job-{1}', github.run_id, strategy['job-index']) }}
- extras=ecr-cache
timeout-minutes: 45
strategy:
fail-fast: false
matrix:
test-dir: ${{ fromJson(needs.discover-test-dirs.outputs.test-dirs) }}
steps:
- uses: runs-on/action@cd2b598b0515d39d78c38a02d529db87d2196d1e # ratchet:runs-on/action@v2
- name: Checkout code
uses: actions/checkout@1af3b93b6815bc44a9784bd300feb67ff0d1eeb3 # ratchet:actions/checkout@v6
with:
persist-credentials: false
# needed for pulling Vespa, Redis, Postgres, and Minio images
# otherwise, we hit the "Unauthenticated users" limit
# https://docs.docker.com/docker-hub/usage/
- name: Login to Docker Hub
uses: docker/login-action@5e57cd118135c172c3672efd75eb46360885c0ef # ratchet:docker/login-action@v3
with:
username: ${{ secrets.DOCKER_USERNAME }}
password: ${{ secrets.DOCKER_TOKEN }}
# NOTE: Use pre-ping/null pool to reduce flakiness due to dropped connections
# NOTE: don't need web server for integration tests
- name: Start Docker containers
env:
ECR_CACHE: ${{ env.RUNS_ON_ECR_CACHE }}
RUN_ID: ${{ github.run_id }}
run: |
cd deployment/docker_compose
ENABLE_PAID_ENTERPRISE_EDITION_FEATURES=true \
AUTH_TYPE=basic \
POSTGRES_POOL_PRE_PING=true \
POSTGRES_USE_NULL_POOL=true \
REQUIRE_EMAIL_VERIFICATION=false \
DISABLE_TELEMETRY=true \
ONYX_BACKEND_IMAGE=${ECR_CACHE}:integration-test-backend-test-${RUN_ID} \
ONYX_MODEL_SERVER_IMAGE=${ECR_CACHE}:integration-test-model-server-test-${RUN_ID} \
INTEGRATION_TESTS_MODE=true \
CHECK_TTL_MANAGEMENT_TASK_FREQUENCY_IN_HOURS=0.001 \
MCP_SERVER_ENABLED=true \
docker compose -f docker-compose.yml -f docker-compose.dev.yml up \
relational_db \
index \
cache \
minio \
api_server \
inference_model_server \
indexing_model_server \
mcp_server \
background \
-d
id: start_docker
- name: Wait for services to be ready
run: |
echo "Starting wait-for-service script..."
wait_for_service() {
local url=$1
local label=$2
local timeout=${3:-300} # default 5 minutes
local start_time
start_time=$(date +%s)
while true; do
local current_time
current_time=$(date +%s)
local elapsed_time=$((current_time - start_time))
if [ $elapsed_time -ge $timeout ]; then
echo "Timeout reached. ${label} did not become ready in $timeout seconds."
exit 1
fi
local response
response=$(curl -s -o /dev/null -w "%{http_code}" "$url" || echo "curl_error")
if [ "$response" = "200" ]; then
echo "${label} is ready!"
break
elif [ "$response" = "curl_error" ]; then
echo "Curl encountered an error while checking ${label}. Retrying in 5 seconds..."
else
echo "${label} not ready yet (HTTP status $response). Retrying in 5 seconds..."
fi
sleep 5
done
}
wait_for_service "http://localhost:8080/health" "API server"
test_dir="${{ matrix.test-dir.path }}"
if [ "$test_dir" = "tests/mcp" ]; then
wait_for_service "http://localhost:8090/health" "MCP server"
else
echo "Skipping MCP server wait for non-MCP suite: $test_dir"
fi
echo "Finished waiting for services."
- name: Start Mock Services
run: |
cd backend/tests/integration/mock_services
docker compose -f docker-compose.mock-it-services.yml \
-p mock-it-services-stack up -d
- name: Run Integration Tests for ${{ matrix.test-dir.name }}
uses: nick-fields/retry@ce71cc2ab81d554ebbe88c79ab5975992d79ba08 # ratchet:nick-fields/retry@v3
with:
timeout_minutes: 20
max_attempts: 3
retry_wait_seconds: 10
command: |
echo "Running integration tests for ${{ matrix.test-dir.path }}..."
docker run --rm --network onyx_default \
--name test-runner \
-e POSTGRES_HOST=relational_db \
-e POSTGRES_USER=postgres \
-e POSTGRES_PASSWORD=password \
-e POSTGRES_DB=postgres \
-e DB_READONLY_USER=db_readonly_user \
-e DB_READONLY_PASSWORD=password \
-e POSTGRES_POOL_PRE_PING=true \
-e POSTGRES_USE_NULL_POOL=true \
-e VESPA_HOST=index \
-e REDIS_HOST=cache \
-e API_SERVER_HOST=api_server \
-e MCP_SERVER_HOST=mcp_server \
-e MCP_SERVER_PORT=8090 \
-e OPENAI_API_KEY=${OPENAI_API_KEY} \
-e EXA_API_KEY=${EXA_API_KEY} \
-e SLACK_BOT_TOKEN=${SLACK_BOT_TOKEN} \
-e CONFLUENCE_TEST_SPACE_URL=${CONFLUENCE_TEST_SPACE_URL} \
-e CONFLUENCE_USER_NAME=${CONFLUENCE_USER_NAME} \
-e CONFLUENCE_ACCESS_TOKEN=${CONFLUENCE_ACCESS_TOKEN} \
-e CONFLUENCE_ACCESS_TOKEN_SCOPED=${CONFLUENCE_ACCESS_TOKEN_SCOPED} \
-e JIRA_BASE_URL=${JIRA_BASE_URL} \
-e JIRA_USER_EMAIL=${JIRA_USER_EMAIL} \
-e JIRA_API_TOKEN=${JIRA_API_TOKEN} \
-e JIRA_API_TOKEN_SCOPED=${JIRA_API_TOKEN_SCOPED} \
-e PERM_SYNC_SHAREPOINT_CLIENT_ID=${PERM_SYNC_SHAREPOINT_CLIENT_ID} \
-e PERM_SYNC_SHAREPOINT_PRIVATE_KEY="${PERM_SYNC_SHAREPOINT_PRIVATE_KEY}" \
-e PERM_SYNC_SHAREPOINT_CERTIFICATE_PASSWORD=${PERM_SYNC_SHAREPOINT_CERTIFICATE_PASSWORD} \
-e PERM_SYNC_SHAREPOINT_DIRECTORY_ID=${PERM_SYNC_SHAREPOINT_DIRECTORY_ID} \
-e TEST_WEB_HOSTNAME=test-runner \
-e MOCK_CONNECTOR_SERVER_HOST=mock_connector_server \
-e MOCK_CONNECTOR_SERVER_PORT=8001 \
${{ env.RUNS_ON_ECR_CACHE }}:integration-test-${{ github.run_id }} \
/app/tests/integration/${{ matrix.test-dir.path }}
# ------------------------------------------------------------
# Always gather logs BEFORE "down":
- name: Dump API server logs
if: always()
run: |
cd deployment/docker_compose
docker compose logs --no-color api_server > $GITHUB_WORKSPACE/api_server.log || true
- name: Dump all-container logs (optional)
if: always()
run: |
cd deployment/docker_compose
docker compose logs --no-color > $GITHUB_WORKSPACE/docker-compose.log || true
- name: Upload logs
if: always()
uses: actions/upload-artifact@330a01c490aca151604b8cf639adc76d48f6c5d4 # ratchet:actions/upload-artifact@v4
with:
name: docker-all-logs-${{ matrix.test-dir.name }}
path: ${{ github.workspace }}/docker-compose.log
# ------------------------------------------------------------
multitenant-tests:
needs:
[
build-backend-image,
build-model-server-image,
build-integration-image,
]
runs-on: [runs-on, runner=8cpu-linux-arm64, "run-id=${{ github.run_id }}-multitenant-tests", "extras=ecr-cache"]
timeout-minutes: 45
steps:
- uses: runs-on/action@cd2b598b0515d39d78c38a02d529db87d2196d1e # ratchet:runs-on/action@v2
- name: Checkout code
uses: actions/checkout@1af3b93b6815bc44a9784bd300feb67ff0d1eeb3 # ratchet:actions/checkout@v6
with:
persist-credentials: false
- name: Login to Docker Hub
uses: docker/login-action@5e57cd118135c172c3672efd75eb46360885c0ef # ratchet:docker/login-action@v3
with:
username: ${{ secrets.DOCKER_USERNAME }}
password: ${{ secrets.DOCKER_TOKEN }}
# Start containers for multi-tenant tests
- name: Start Docker containers for multi-tenant tests
env:
ECR_CACHE: ${{ env.RUNS_ON_ECR_CACHE }}
RUN_ID: ${{ github.run_id }}
run: |
cd deployment/docker_compose
ENABLE_PAID_ENTERPRISE_EDITION_FEATURES=true \
MULTI_TENANT=true \
AUTH_TYPE=basic \
AUTH_TYPE=cloud \
REQUIRE_EMAIL_VERIFICATION=false \
DISABLE_TELEMETRY=true \
IMAGE_TAG=test \
docker compose -f docker-compose.dev.yml -p danswer-stack up -d
ONYX_BACKEND_IMAGE=${ECR_CACHE}:integration-test-backend-test-${RUN_ID} \
ONYX_MODEL_SERVER_IMAGE=${ECR_CACHE}:integration-test-model-server-test-${RUN_ID} \
DEV_MODE=true \
MCP_SERVER_ENABLED=true \
docker compose -f docker-compose.multitenant-dev.yml up \
relational_db \
index \
cache \
minio \
api_server \
inference_model_server \
indexing_model_server \
mcp_server \
background \
-d
id: start_docker_multi_tenant
# In practice, `cloud` Auth type would require OAUTH credentials to be set.
- name: Run Multi-Tenant Integration Tests
- name: Wait for service to be ready (multi-tenant)
run: |
echo "Running integration tests..."
docker run --rm --network danswer-stack_default \
--name test-runner \
-e POSTGRES_HOST=relational_db \
-e POSTGRES_USER=postgres \
-e POSTGRES_PASSWORD=password \
-e POSTGRES_DB=postgres \
-e VESPA_HOST=index \
-e REDIS_HOST=cache \
-e API_SERVER_HOST=api_server \
-e OPENAI_API_KEY=${OPENAI_API_KEY} \
-e SLACK_BOT_TOKEN=${SLACK_BOT_TOKEN} \
-e TEST_WEB_HOSTNAME=test-runner \
-e AUTH_TYPE=cloud \
-e MULTI_TENANT=true \
onyxdotapp/onyx-integration:test \
/app/tests/integration/multitenant_tests
continue-on-error: true
id: run_multitenant_tests
- name: Check multi-tenant test results
run: |
if [ ${{ steps.run_tests.outcome }} == 'failure' ]; then
echo "Integration tests failed. Exiting with error."
exit 1
else
echo "All integration tests passed successfully."
fi
- name: Stop multi-tenant Docker containers
run: |
cd deployment/docker_compose
docker compose -f docker-compose.dev.yml -p danswer-stack down -v
- name: Start Docker containers
run: |
cd deployment/docker_compose
ENABLE_PAID_ENTERPRISE_EDITION_FEATURES=true \
AUTH_TYPE=basic \
REQUIRE_EMAIL_VERIFICATION=false \
DISABLE_TELEMETRY=true \
IMAGE_TAG=test \
docker compose -f docker-compose.dev.yml -p danswer-stack up -d
id: start_docker
- name: Wait for service to be ready
run: |
echo "Starting wait-for-service script..."
docker logs -f danswer-stack-api_server-1 &
echo "Starting wait-for-service script for multi-tenant..."
docker logs -f onyx-api_server-1 &
start_time=$(date +%s)
timeout=300 # 5 minutes in seconds
timeout=300
while true; do
current_time=$(date +%s)
elapsed_time=$((current_time - start_time))
if [ $elapsed_time -ge $timeout ]; then
echo "Timeout reached. Service did not become ready in 5 minutes."
exit 1
fi
# Use curl with error handling to ignore specific exit code 56
response=$(curl -s -o /dev/null -w "%{http_code}" http://localhost:8080/health || echo "curl_error")
if [ "$response" = "200" ]; then
echo "Service is ready!"
break
elif [ "$response" = "curl_error" ]; then
echo "Curl encountered an error, possibly exit code 56. Continuing to retry..."
echo "Curl encountered an error; retrying..."
else
echo "Service not ready yet (HTTP status $response). Retrying in 5 seconds..."
echo "Service not ready yet (HTTP $response). Retrying in 5 seconds..."
fi
sleep 5
done
echo "Finished waiting for service."
- name: Run Standard Integration Tests
- name: Run Multi-Tenant Integration Tests
env:
ECR_CACHE: ${{ env.RUNS_ON_ECR_CACHE }}
RUN_ID: ${{ github.run_id }}
run: |
echo "Running integration tests..."
docker run --rm --network danswer-stack_default \
echo "Running multi-tenant integration tests..."
docker run --rm --network onyx_default \
--name test-runner \
-e POSTGRES_HOST=relational_db \
-e POSTGRES_USER=postgres \
-e POSTGRES_PASSWORD=password \
-e DB_READONLY_USER=db_readonly_user \
-e DB_READONLY_PASSWORD=password \
-e POSTGRES_DB=postgres \
-e POSTGRES_USE_NULL_POOL=true \
-e VESPA_HOST=index \
-e REDIS_HOST=cache \
-e API_SERVER_HOST=api_server \
-e MCP_SERVER_HOST=mcp_server \
-e MCP_SERVER_PORT=8090 \
-e OPENAI_API_KEY=${OPENAI_API_KEY} \
-e EXA_API_KEY=${EXA_API_KEY} \
-e SLACK_BOT_TOKEN=${SLACK_BOT_TOKEN} \
-e CONFLUENCE_TEST_SPACE_URL=${CONFLUENCE_TEST_SPACE_URL} \
-e CONFLUENCE_USER_NAME=${CONFLUENCE_USER_NAME} \
-e CONFLUENCE_ACCESS_TOKEN=${CONFLUENCE_ACCESS_TOKEN} \
-e TEST_WEB_HOSTNAME=test-runner \
onyxdotapp/onyx-integration:test \
/app/tests/integration/tests \
/app/tests/integration/connector_job_tests
continue-on-error: true
id: run_tests
-e AUTH_TYPE=cloud \
-e MULTI_TENANT=true \
-e SKIP_RESET=true \
-e REQUIRE_EMAIL_VERIFICATION=false \
-e DISABLE_TELEMETRY=true \
-e DEV_MODE=true \
${ECR_CACHE}:integration-test-${RUN_ID} \
/app/tests/integration/multitenant_tests
- name: Check test results
run: |
if [ ${{ steps.run_tests.outcome }} == 'failure' ]; then
echo "Integration tests failed. Exiting with error."
exit 1
else
echo "All integration tests passed successfully."
fi
# save before stopping the containers so the logs can be captured
- name: Save Docker logs
if: success() || failure()
- name: Dump API server logs (multi-tenant)
if: always()
run: |
cd deployment/docker_compose
docker compose -f docker-compose.dev.yml -p danswer-stack logs > docker-compose.log
mv docker-compose.log ${{ github.workspace }}/docker-compose.log
docker compose -f docker-compose.multitenant-dev.yml logs --no-color api_server > $GITHUB_WORKSPACE/api_server_multitenant.log || true
- name: Stop Docker containers
- name: Dump all-container logs (multi-tenant)
if: always()
run: |
cd deployment/docker_compose
docker compose -f docker-compose.dev.yml -p danswer-stack down -v
docker compose -f docker-compose.multitenant-dev.yml logs --no-color > $GITHUB_WORKSPACE/docker-compose-multitenant.log || true
- name: Upload logs
if: success() || failure()
uses: actions/upload-artifact@v4
- name: Upload logs (multi-tenant)
if: always()
uses: actions/upload-artifact@330a01c490aca151604b8cf639adc76d48f6c5d4 # ratchet:actions/upload-artifact@v4
with:
name: docker-logs
path: ${{ github.workspace }}/docker-compose.log
name: docker-all-logs-multitenant
path: ${{ github.workspace }}/docker-compose-multitenant.log
- name: Stop Docker containers
- name: Stop multi-tenant Docker containers
if: always()
run: |
cd deployment/docker_compose
docker compose -f docker-compose.dev.yml -p danswer-stack down -v
docker compose -f docker-compose.multitenant-dev.yml down -v
required:
# NOTE: Github-hosted runners have about 20s faster queue times and are preferred here.
runs-on: ubuntu-slim
timeout-minutes: 45
needs: [integration-tests, multitenant-tests]
if: ${{ always() }}
steps:
- name: Check job status
if: ${{ contains(needs.*.result, 'failure') || contains(needs.*.result, 'cancelled') || contains(needs.*.result, 'skipped') }}
run: exit 1

44
.github/workflows/pr-jest-tests.yml vendored Normal file
View File

@@ -0,0 +1,44 @@
name: Run Jest Tests
concurrency:
group: Run-Jest-Tests-${{ github.workflow }}-${{ github.head_ref || github.event.workflow_run.head_branch || github.run_id }}
cancel-in-progress: true
on:
push:
permissions:
contents: read
jobs:
jest-tests:
name: Jest Tests
runs-on: ubuntu-latest
timeout-minutes: 45
steps:
- name: Checkout code
uses: actions/checkout@1af3b93b6815bc44a9784bd300feb67ff0d1eeb3 # ratchet:actions/checkout@v6
with:
persist-credentials: false
- name: Setup node
uses: actions/setup-node@2028fbc5c25fe9cf00d9f06a71cc4710d4507903 # ratchet:actions/setup-node@v4
with:
node-version: 22
cache: "npm"
cache-dependency-path: ./web/package-lock.json
- name: Install node dependencies
working-directory: ./web
run: npm ci
- name: Run Jest tests
working-directory: ./web
run: npm test -- --ci --coverage --maxWorkers=50%
- name: Upload coverage reports
if: always()
uses: actions/upload-artifact@330a01c490aca151604b8cf639adc76d48f6c5d4 # ratchet:actions/upload-artifact@v4
with:
name: jest-coverage-${{ github.run_id }}
path: ./web/coverage
retention-days: 7

38
.github/workflows/pr-labeler.yml vendored Normal file
View File

@@ -0,0 +1,38 @@
name: PR Labeler
on:
pull_request:
branches:
- main
types:
- opened
- reopened
- synchronize
- edited
permissions:
contents: read
jobs:
validate_pr_title:
runs-on: ubuntu-latest
timeout-minutes: 45
steps:
- name: Check PR title for Conventional Commits
env:
PR_TITLE: ${{ github.event.pull_request.title }}
run: |
echo "PR Title: $PR_TITLE"
if [[ ! "$PR_TITLE" =~ ^(feat|fix|docs|test|ci|refactor|perf|chore|revert|build)(\(.+\))?:\ .+ ]]; then
echo "::error::❌ Your PR title does not follow the Conventional Commits format.
This check ensures that all pull requests use clear, consistent titles that help automate changelogs and improve project history.
Please update your PR title to follow the Conventional Commits style.
Here is a link to a blog explaining the reason why we've included the Conventional Commits style into our PR titles: https://xfuture-blog.com/working-with-conventional-commits
**Here are some examples of valid PR titles:**
- feat: add user authentication
- fix(login): handle null password error
- docs(readme): update installation instructions"
exit 1
fi

View File

@@ -1,12 +1,19 @@
name: Ensure PR references Linear
concurrency:
group: Ensure-PR-references-Linear-${{ github.workflow }}-${{ github.head_ref || github.event.workflow_run.head_branch || github.run_id }}
cancel-in-progress: true
on:
pull_request:
types: [opened, edited, reopened, synchronize]
permissions:
contents: read
jobs:
linear-check:
runs-on: ubuntu-latest
timeout-minutes: 45
steps:
- name: Check PR body for Linear link or override
env:

View File

@@ -0,0 +1,428 @@
name: Run MIT Integration Tests v2
concurrency:
group: Run-MIT-Integration-Tests-${{ github.workflow }}-${{ github.head_ref || github.event.workflow_run.head_branch || github.run_id }}
cancel-in-progress: true
on:
merge_group:
types: [checks_requested]
push:
tags:
- "v*.*.*"
permissions:
contents: read
env:
# Test Environment Variables
OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
SLACK_BOT_TOKEN: ${{ secrets.SLACK_BOT_TOKEN }}
EXA_API_KEY: ${{ secrets.EXA_API_KEY }}
CONFLUENCE_TEST_SPACE_URL: ${{ vars.CONFLUENCE_TEST_SPACE_URL }}
CONFLUENCE_USER_NAME: ${{ vars.CONFLUENCE_USER_NAME }}
CONFLUENCE_ACCESS_TOKEN: ${{ secrets.CONFLUENCE_ACCESS_TOKEN }}
CONFLUENCE_ACCESS_TOKEN_SCOPED: ${{ secrets.CONFLUENCE_ACCESS_TOKEN_SCOPED }}
JIRA_BASE_URL: ${{ secrets.JIRA_BASE_URL }}
JIRA_USER_EMAIL: ${{ secrets.JIRA_USER_EMAIL }}
JIRA_API_TOKEN: ${{ secrets.JIRA_API_TOKEN }}
JIRA_API_TOKEN_SCOPED: ${{ secrets.JIRA_API_TOKEN_SCOPED }}
PERM_SYNC_SHAREPOINT_CLIENT_ID: ${{ secrets.PERM_SYNC_SHAREPOINT_CLIENT_ID }}
PERM_SYNC_SHAREPOINT_PRIVATE_KEY: ${{ secrets.PERM_SYNC_SHAREPOINT_PRIVATE_KEY }}
PERM_SYNC_SHAREPOINT_CERTIFICATE_PASSWORD: ${{ secrets.PERM_SYNC_SHAREPOINT_CERTIFICATE_PASSWORD }}
PERM_SYNC_SHAREPOINT_DIRECTORY_ID: ${{ secrets.PERM_SYNC_SHAREPOINT_DIRECTORY_ID }}
jobs:
discover-test-dirs:
# NOTE: Github-hosted runners have about 20s faster queue times and are preferred here.
runs-on: ubuntu-slim
timeout-minutes: 45
outputs:
test-dirs: ${{ steps.set-matrix.outputs.test-dirs }}
steps:
- name: Checkout code
uses: actions/checkout@1af3b93b6815bc44a9784bd300feb67ff0d1eeb3 # ratchet:actions/checkout@v6
with:
persist-credentials: false
- name: Discover test directories
id: set-matrix
run: |
# Find all leaf-level directories in both test directories
tests_dirs=$(find backend/tests/integration/tests -mindepth 1 -maxdepth 1 -type d ! -name "__pycache__" -exec basename {} \; | sort)
connector_dirs=$(find backend/tests/integration/connector_job_tests -mindepth 1 -maxdepth 1 -type d ! -name "__pycache__" -exec basename {} \; | sort)
# Create JSON array with directory info
all_dirs=""
for dir in $tests_dirs; do
all_dirs="$all_dirs{\"path\":\"tests/$dir\",\"name\":\"tests-$dir\"},"
done
for dir in $connector_dirs; do
all_dirs="$all_dirs{\"path\":\"connector_job_tests/$dir\",\"name\":\"connector-$dir\"},"
done
# Remove trailing comma and wrap in array
all_dirs="[${all_dirs%,}]"
echo "test-dirs=$all_dirs" >> $GITHUB_OUTPUT
build-backend-image:
runs-on: [runs-on, runner=1cpu-linux-arm64, "run-id=${{ github.run_id }}-build-backend-image", "extras=ecr-cache"]
timeout-minutes: 45
steps:
- uses: runs-on/action@cd2b598b0515d39d78c38a02d529db87d2196d1e # ratchet:runs-on/action@v2
- name: Checkout code
uses: actions/checkout@1af3b93b6815bc44a9784bd300feb67ff0d1eeb3 # ratchet:actions/checkout@v6
with:
persist-credentials: false
- name: Format branch name for cache
id: format-branch
env:
PR_NUMBER: ${{ github.event.pull_request.number }}
REF_NAME: ${{ github.ref_name }}
run: |
if [ -n "${PR_NUMBER}" ]; then
CACHE_SUFFIX="${PR_NUMBER}"
else
# shellcheck disable=SC2001
CACHE_SUFFIX=$(echo "${REF_NAME}" | sed 's/[^A-Za-z0-9._-]/-/g')
fi
echo "cache-suffix=${CACHE_SUFFIX}" >> $GITHUB_OUTPUT
- name: Set up Docker Buildx
uses: docker/setup-buildx-action@e468171a9de216ec08956ac3ada2f0791b6bd435 # ratchet:docker/setup-buildx-action@v3
# needed for pulling Vespa, Redis, Postgres, and Minio images
# otherwise, we hit the "Unauthenticated users" limit
# https://docs.docker.com/docker-hub/usage/
- name: Login to Docker Hub
uses: docker/login-action@5e57cd118135c172c3672efd75eb46360885c0ef # ratchet:docker/login-action@v3
with:
username: ${{ secrets.DOCKER_USERNAME }}
password: ${{ secrets.DOCKER_TOKEN }}
- name: Build and push Backend Docker image
uses: docker/build-push-action@263435318d21b8e681c14492fe198d362a7d2c83 # ratchet:docker/build-push-action@v6
with:
context: ./backend
file: ./backend/Dockerfile
push: true
tags: ${{ env.RUNS_ON_ECR_CACHE }}:integration-test-backend-test-${{ github.run_id }}
cache-from: |
type=registry,ref=${{ env.RUNS_ON_ECR_CACHE }}:backend-cache-${{ github.event.pull_request.head.sha || github.sha }}
type=registry,ref=${{ env.RUNS_ON_ECR_CACHE }}:backend-cache-${{ steps.format-branch.outputs.cache-suffix }}
type=registry,ref=${{ env.RUNS_ON_ECR_CACHE }}:backend-cache
type=registry,ref=onyxdotapp/onyx-backend:latest
cache-to: |
type=registry,ref=${{ env.RUNS_ON_ECR_CACHE }}:backend-cache-${{ github.event.pull_request.head.sha || github.sha }},mode=max
type=registry,ref=${{ env.RUNS_ON_ECR_CACHE }}:backend-cache-${{ steps.format-branch.outputs.cache-suffix }},mode=max
type=registry,ref=${{ env.RUNS_ON_ECR_CACHE }}:backend-cache,mode=max
no-cache: ${{ vars.DOCKER_NO_CACHE == 'true' }}
build-model-server-image:
runs-on: [runs-on, runner=1cpu-linux-arm64, "run-id=${{ github.run_id }}-build-model-server-image", "extras=ecr-cache"]
timeout-minutes: 45
steps:
- uses: runs-on/action@cd2b598b0515d39d78c38a02d529db87d2196d1e # ratchet:runs-on/action@v2
- name: Checkout code
uses: actions/checkout@1af3b93b6815bc44a9784bd300feb67ff0d1eeb3 # ratchet:actions/checkout@v6
with:
persist-credentials: false
- name: Format branch name for cache
id: format-branch
env:
PR_NUMBER: ${{ github.event.pull_request.number }}
REF_NAME: ${{ github.ref_name }}
run: |
if [ -n "${PR_NUMBER}" ]; then
CACHE_SUFFIX="${PR_NUMBER}"
else
# shellcheck disable=SC2001
CACHE_SUFFIX=$(echo "${REF_NAME}" | sed 's/[^A-Za-z0-9._-]/-/g')
fi
echo "cache-suffix=${CACHE_SUFFIX}" >> $GITHUB_OUTPUT
- name: Set up Docker Buildx
uses: docker/setup-buildx-action@e468171a9de216ec08956ac3ada2f0791b6bd435 # ratchet:docker/setup-buildx-action@v3
# needed for pulling Vespa, Redis, Postgres, and Minio images
# otherwise, we hit the "Unauthenticated users" limit
# https://docs.docker.com/docker-hub/usage/
- name: Login to Docker Hub
uses: docker/login-action@5e57cd118135c172c3672efd75eb46360885c0ef # ratchet:docker/login-action@v3
with:
username: ${{ secrets.DOCKER_USERNAME }}
password: ${{ secrets.DOCKER_TOKEN }}
- name: Build and push Model Server Docker image
uses: docker/build-push-action@263435318d21b8e681c14492fe198d362a7d2c83 # ratchet:docker/build-push-action@v6
with:
context: ./backend
file: ./backend/Dockerfile.model_server
push: true
tags: ${{ env.RUNS_ON_ECR_CACHE }}:integration-test-model-server-test-${{ github.run_id }}
cache-from: |
type=registry,ref=${{ env.RUNS_ON_ECR_CACHE }}:model-server-cache-${{ github.event.pull_request.head.sha || github.sha }}
type=registry,ref=${{ env.RUNS_ON_ECR_CACHE }}:model-server-cache-${{ steps.format-branch.outputs.cache-suffix }}
type=registry,ref=${{ env.RUNS_ON_ECR_CACHE }}:model-server-cache
type=registry,ref=onyxdotapp/onyx-model-server:latest
cache-to: |
type=registry,ref=${{ env.RUNS_ON_ECR_CACHE }}:model-server-cache-${{ github.event.pull_request.head.sha || github.sha }},mode=max
type=registry,ref=${{ env.RUNS_ON_ECR_CACHE }}:model-server-cache-${{ steps.format-branch.outputs.cache-suffix }},mode=max
type=registry,ref=${{ env.RUNS_ON_ECR_CACHE }}:model-server-cache,mode=max
build-integration-image:
runs-on: [runs-on, runner=2cpu-linux-arm64, "run-id=${{ github.run_id }}-build-integration-image", "extras=ecr-cache"]
timeout-minutes: 45
steps:
- uses: runs-on/action@cd2b598b0515d39d78c38a02d529db87d2196d1e # ratchet:runs-on/action@v2
- name: Checkout code
uses: actions/checkout@1af3b93b6815bc44a9784bd300feb67ff0d1eeb3 # ratchet:actions/checkout@v6
with:
persist-credentials: false
- name: Format branch name for cache
id: format-branch
env:
PR_NUMBER: ${{ github.event.pull_request.number }}
REF_NAME: ${{ github.ref_name }}
run: |
if [ -n "${PR_NUMBER}" ]; then
CACHE_SUFFIX="${PR_NUMBER}"
else
# shellcheck disable=SC2001
CACHE_SUFFIX=$(echo "${REF_NAME}" | sed 's/[^A-Za-z0-9._-]/-/g')
fi
echo "cache-suffix=${CACHE_SUFFIX}" >> $GITHUB_OUTPUT
- name: Set up Docker Buildx
uses: docker/setup-buildx-action@e468171a9de216ec08956ac3ada2f0791b6bd435 # ratchet:docker/setup-buildx-action@v3
# needed for pulling openapitools/openapi-generator-cli
# otherwise, we hit the "Unauthenticated users" limit
# https://docs.docker.com/docker-hub/usage/
- name: Login to Docker Hub
uses: docker/login-action@5e57cd118135c172c3672efd75eb46360885c0ef # ratchet:docker/login-action@v3
with:
username: ${{ secrets.DOCKER_USERNAME }}
password: ${{ secrets.DOCKER_TOKEN }}
- name: Build and push integration test image with Docker Bake
env:
INTEGRATION_REPOSITORY: ${{ env.RUNS_ON_ECR_CACHE }}
TAG: integration-test-${{ github.run_id }}
CACHE_SUFFIX: ${{ steps.format-branch.outputs.cache-suffix }}
HEAD_SHA: ${{ github.event.pull_request.head.sha || github.sha }}
run: |
cd backend && docker buildx bake --push \
--set backend.cache-from=type=registry,ref=${RUNS_ON_ECR_CACHE}:backend-cache-${HEAD_SHA} \
--set backend.cache-from=type=registry,ref=${RUNS_ON_ECR_CACHE}:backend-cache-${CACHE_SUFFIX} \
--set backend.cache-from=type=registry,ref=${RUNS_ON_ECR_CACHE}:backend-cache \
--set backend.cache-from=type=registry,ref=onyxdotapp/onyx-backend:latest \
--set backend.cache-to=type=registry,ref=${RUNS_ON_ECR_CACHE}:backend-cache-${HEAD_SHA},mode=max \
--set backend.cache-to=type=registry,ref=${RUNS_ON_ECR_CACHE}:backend-cache-${CACHE_SUFFIX},mode=max \
--set backend.cache-to=type=registry,ref=${RUNS_ON_ECR_CACHE}:backend-cache,mode=max \
--set integration.cache-from=type=registry,ref=${RUNS_ON_ECR_CACHE}:integration-cache-${HEAD_SHA} \
--set integration.cache-from=type=registry,ref=${RUNS_ON_ECR_CACHE}:integration-cache-${CACHE_SUFFIX} \
--set integration.cache-from=type=registry,ref=${RUNS_ON_ECR_CACHE}:integration-cache \
--set integration.cache-to=type=registry,ref=${RUNS_ON_ECR_CACHE}:integration-cache-${HEAD_SHA},mode=max \
--set integration.cache-to=type=registry,ref=${RUNS_ON_ECR_CACHE}:integration-cache-${CACHE_SUFFIX},mode=max \
--set integration.cache-to=type=registry,ref=${RUNS_ON_ECR_CACHE}:integration-cache,mode=max \
integration
integration-tests-mit:
needs:
[
discover-test-dirs,
build-backend-image,
build-model-server-image,
build-integration-image,
]
runs-on:
- runs-on
- runner=4cpu-linux-arm64
- ${{ format('run-id={0}-integration-tests-mit-job-{1}', github.run_id, strategy['job-index']) }}
- extras=ecr-cache
timeout-minutes: 45
strategy:
fail-fast: false
matrix:
test-dir: ${{ fromJson(needs.discover-test-dirs.outputs.test-dirs) }}
steps:
- uses: runs-on/action@cd2b598b0515d39d78c38a02d529db87d2196d1e # ratchet:runs-on/action@v2
- name: Checkout code
uses: actions/checkout@1af3b93b6815bc44a9784bd300feb67ff0d1eeb3 # ratchet:actions/checkout@v6
with:
persist-credentials: false
# needed for pulling Vespa, Redis, Postgres, and Minio images
# otherwise, we hit the "Unauthenticated users" limit
# https://docs.docker.com/docker-hub/usage/
- name: Login to Docker Hub
uses: docker/login-action@5e57cd118135c172c3672efd75eb46360885c0ef # ratchet:docker/login-action@v3
with:
username: ${{ secrets.DOCKER_USERNAME }}
password: ${{ secrets.DOCKER_TOKEN }}
# NOTE: Use pre-ping/null pool to reduce flakiness due to dropped connections
# NOTE: don't need web server for integration tests
- name: Start Docker containers
env:
ECR_CACHE: ${{ env.RUNS_ON_ECR_CACHE }}
RUN_ID: ${{ github.run_id }}
run: |
cd deployment/docker_compose
AUTH_TYPE=basic \
POSTGRES_POOL_PRE_PING=true \
POSTGRES_USE_NULL_POOL=true \
REQUIRE_EMAIL_VERIFICATION=false \
DISABLE_TELEMETRY=true \
ONYX_BACKEND_IMAGE=${ECR_CACHE}:integration-test-backend-test-${RUN_ID} \
ONYX_MODEL_SERVER_IMAGE=${ECR_CACHE}:integration-test-model-server-test-${RUN_ID} \
INTEGRATION_TESTS_MODE=true \
MCP_SERVER_ENABLED=true \
docker compose -f docker-compose.yml -f docker-compose.dev.yml up \
relational_db \
index \
cache \
minio \
api_server \
inference_model_server \
indexing_model_server \
mcp_server \
background \
-d
id: start_docker
- name: Wait for services to be ready
run: |
echo "Starting wait-for-service script..."
wait_for_service() {
local url=$1
local label=$2
local timeout=${3:-300} # default 5 minutes
local start_time
start_time=$(date +%s)
while true; do
local current_time
current_time=$(date +%s)
local elapsed_time=$((current_time - start_time))
if [ $elapsed_time -ge $timeout ]; then
echo "Timeout reached. ${label} did not become ready in $timeout seconds."
exit 1
fi
local response
response=$(curl -s -o /dev/null -w "%{http_code}" "$url" || echo "curl_error")
if [ "$response" = "200" ]; then
echo "${label} is ready!"
break
elif [ "$response" = "curl_error" ]; then
echo "Curl encountered an error while checking ${label}. Retrying in 5 seconds..."
else
echo "${label} not ready yet (HTTP status $response). Retrying in 5 seconds..."
fi
sleep 5
done
}
wait_for_service "http://localhost:8080/health" "API server"
test_dir="${{ matrix.test-dir.path }}"
if [ "$test_dir" = "tests/mcp" ]; then
wait_for_service "http://localhost:8090/health" "MCP server"
else
echo "Skipping MCP server wait for non-MCP suite: $test_dir"
fi
echo "Finished waiting for services."
- name: Start Mock Services
run: |
cd backend/tests/integration/mock_services
docker compose -f docker-compose.mock-it-services.yml \
-p mock-it-services-stack up -d
# NOTE: Use pre-ping/null to reduce flakiness due to dropped connections
- name: Run Integration Tests for ${{ matrix.test-dir.name }}
uses: nick-fields/retry@ce71cc2ab81d554ebbe88c79ab5975992d79ba08 # ratchet:nick-fields/retry@v3
with:
timeout_minutes: 20
max_attempts: 3
retry_wait_seconds: 10
command: |
echo "Running integration tests for ${{ matrix.test-dir.path }}..."
docker run --rm --network onyx_default \
--name test-runner \
-e POSTGRES_HOST=relational_db \
-e POSTGRES_USER=postgres \
-e POSTGRES_PASSWORD=password \
-e POSTGRES_DB=postgres \
-e DB_READONLY_USER=db_readonly_user \
-e DB_READONLY_PASSWORD=password \
-e POSTGRES_POOL_PRE_PING=true \
-e POSTGRES_USE_NULL_POOL=true \
-e VESPA_HOST=index \
-e REDIS_HOST=cache \
-e API_SERVER_HOST=api_server \
-e MCP_SERVER_HOST=mcp_server \
-e MCP_SERVER_PORT=8090 \
-e OPENAI_API_KEY=${OPENAI_API_KEY} \
-e EXA_API_KEY=${EXA_API_KEY} \
-e SLACK_BOT_TOKEN=${SLACK_BOT_TOKEN} \
-e CONFLUENCE_TEST_SPACE_URL=${CONFLUENCE_TEST_SPACE_URL} \
-e CONFLUENCE_USER_NAME=${CONFLUENCE_USER_NAME} \
-e CONFLUENCE_ACCESS_TOKEN=${CONFLUENCE_ACCESS_TOKEN} \
-e CONFLUENCE_ACCESS_TOKEN_SCOPED=${CONFLUENCE_ACCESS_TOKEN_SCOPED} \
-e JIRA_BASE_URL=${JIRA_BASE_URL} \
-e JIRA_USER_EMAIL=${JIRA_USER_EMAIL} \
-e JIRA_API_TOKEN=${JIRA_API_TOKEN} \
-e JIRA_API_TOKEN_SCOPED=${JIRA_API_TOKEN_SCOPED} \
-e PERM_SYNC_SHAREPOINT_CLIENT_ID=${PERM_SYNC_SHAREPOINT_CLIENT_ID} \
-e PERM_SYNC_SHAREPOINT_PRIVATE_KEY="${PERM_SYNC_SHAREPOINT_PRIVATE_KEY}" \
-e PERM_SYNC_SHAREPOINT_CERTIFICATE_PASSWORD=${PERM_SYNC_SHAREPOINT_CERTIFICATE_PASSWORD} \
-e PERM_SYNC_SHAREPOINT_DIRECTORY_ID=${PERM_SYNC_SHAREPOINT_DIRECTORY_ID} \
-e TEST_WEB_HOSTNAME=test-runner \
-e MOCK_CONNECTOR_SERVER_HOST=mock_connector_server \
-e MOCK_CONNECTOR_SERVER_PORT=8001 \
${{ env.RUNS_ON_ECR_CACHE }}:integration-test-${{ github.run_id }} \
/app/tests/integration/${{ matrix.test-dir.path }}
# ------------------------------------------------------------
# Always gather logs BEFORE "down":
- name: Dump API server logs
if: always()
run: |
cd deployment/docker_compose
docker compose logs --no-color api_server > $GITHUB_WORKSPACE/api_server.log || true
- name: Dump all-container logs (optional)
if: always()
run: |
cd deployment/docker_compose
docker compose logs --no-color > $GITHUB_WORKSPACE/docker-compose.log || true
- name: Upload logs
if: always()
uses: actions/upload-artifact@330a01c490aca151604b8cf639adc76d48f6c5d4 # ratchet:actions/upload-artifact@v4
with:
name: docker-all-logs-${{ matrix.test-dir.name }}
path: ${{ github.workspace }}/docker-compose.log
# ------------------------------------------------------------
required:
# NOTE: Github-hosted runners have about 20s faster queue times and are preferred here.
runs-on: ubuntu-slim
timeout-minutes: 45
needs: [integration-tests-mit]
if: ${{ always() }}
steps:
- name: Check job status
if: ${{ contains(needs.*.result, 'failure') || contains(needs.*.result, 'cancelled') || contains(needs.*.result, 'skipped') }}
run: exit 1

View File

@@ -0,0 +1,494 @@
name: Run Playwright Tests
concurrency:
group: Run-Playwright-Tests-${{ github.workflow }}-${{ github.head_ref || github.event.workflow_run.head_branch || github.run_id }}
cancel-in-progress: true
on:
push:
permissions:
contents: read
env:
# Test Environment Variables
OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
SLACK_BOT_TOKEN: ${{ secrets.SLACK_BOT_TOKEN }}
GEN_AI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
EXA_API_KEY: ${{ secrets.EXA_API_KEY }}
# for federated slack tests
SLACK_CLIENT_ID: ${{ secrets.SLACK_CLIENT_ID }}
SLACK_CLIENT_SECRET: ${{ secrets.SLACK_CLIENT_SECRET }}
# for MCP Oauth tests
MCP_OAUTH_CLIENT_ID: ${{ secrets.MCP_OAUTH_CLIENT_ID }}
MCP_OAUTH_CLIENT_SECRET: ${{ secrets.MCP_OAUTH_CLIENT_SECRET }}
MCP_OAUTH_ISSUER: ${{ secrets.MCP_OAUTH_ISSUER }}
MCP_OAUTH_JWKS_URI: ${{ secrets.MCP_OAUTH_JWKS_URI }}
MCP_OAUTH_USERNAME: ${{ vars.MCP_OAUTH_USERNAME }}
MCP_OAUTH_PASSWORD: ${{ secrets.MCP_OAUTH_PASSWORD }}
# for MCP API Key tests
MCP_API_KEY: test-api-key-12345
MCP_API_KEY_TEST_PORT: 8005
MCP_API_KEY_TEST_URL: http://host.docker.internal:8005/mcp
MCP_API_KEY_SERVER_HOST: 0.0.0.0
MCP_API_KEY_SERVER_PUBLIC_HOST: host.docker.internal
MOCK_LLM_RESPONSE: true
MCP_TEST_SERVER_PORT: 8004
MCP_TEST_SERVER_URL: http://host.docker.internal:8004/mcp
MCP_TEST_SERVER_PUBLIC_URL: http://host.docker.internal:8004/mcp
MCP_TEST_SERVER_BIND_HOST: 0.0.0.0
MCP_TEST_SERVER_PUBLIC_HOST: host.docker.internal
MCP_SERVER_HOST: 0.0.0.0
MCP_SERVER_PUBLIC_HOST: host.docker.internal
MCP_SERVER_PUBLIC_URL: http://host.docker.internal:8004/mcp
jobs:
build-web-image:
runs-on: [runs-on, runner=4cpu-linux-arm64, "run-id=${{ github.run_id }}-build-web-image", "extras=ecr-cache"]
timeout-minutes: 45
steps:
- uses: runs-on/action@cd2b598b0515d39d78c38a02d529db87d2196d1e # ratchet:runs-on/action@v2
- name: Checkout code
uses: actions/checkout@1af3b93b6815bc44a9784bd300feb67ff0d1eeb3 # ratchet:actions/checkout@v6
with:
persist-credentials: false
- name: Format branch name for cache
id: format-branch
env:
PR_NUMBER: ${{ github.event.pull_request.number }}
REF_NAME: ${{ github.ref_name }}
run: |
if [ -n "${PR_NUMBER}" ]; then
CACHE_SUFFIX="${PR_NUMBER}"
else
# shellcheck disable=SC2001
CACHE_SUFFIX=$(echo "${REF_NAME}" | sed 's/[^A-Za-z0-9._-]/-/g')
fi
echo "cache-suffix=${CACHE_SUFFIX}" >> $GITHUB_OUTPUT
- name: Set up Docker Buildx
uses: docker/setup-buildx-action@e468171a9de216ec08956ac3ada2f0791b6bd435 # ratchet:docker/setup-buildx-action@v3
# needed for pulling external images otherwise, we hit the "Unauthenticated users" limit
# https://docs.docker.com/docker-hub/usage/
- name: Login to Docker Hub
uses: docker/login-action@5e57cd118135c172c3672efd75eb46360885c0ef # ratchet:docker/login-action@v3
with:
username: ${{ secrets.DOCKER_USERNAME }}
password: ${{ secrets.DOCKER_TOKEN }}
- name: Build and push Web Docker image
uses: docker/build-push-action@263435318d21b8e681c14492fe198d362a7d2c83 # ratchet:docker/build-push-action@v6
with:
context: ./web
file: ./web/Dockerfile
platforms: linux/arm64
tags: ${{ env.RUNS_ON_ECR_CACHE }}:playwright-test-web-${{ github.run_id }}
push: true
cache-from: |
type=registry,ref=${{ env.RUNS_ON_ECR_CACHE }}:web-cache-${{ github.event.pull_request.head.sha || github.sha }}
type=registry,ref=${{ env.RUNS_ON_ECR_CACHE }}:web-cache-${{ steps.format-branch.outputs.cache-suffix }}
type=registry,ref=${{ env.RUNS_ON_ECR_CACHE }}:web-cache
type=registry,ref=onyxdotapp/onyx-web-server:latest
cache-to: |
type=registry,ref=${{ env.RUNS_ON_ECR_CACHE }}:web-cache-${{ github.event.pull_request.head.sha || github.sha }},mode=max
type=registry,ref=${{ env.RUNS_ON_ECR_CACHE }}:web-cache-${{ steps.format-branch.outputs.cache-suffix }},mode=max
type=registry,ref=${{ env.RUNS_ON_ECR_CACHE }}:web-cache,mode=max
no-cache: ${{ vars.DOCKER_NO_CACHE == 'true' }}
build-backend-image:
runs-on: [runs-on, runner=1cpu-linux-arm64, "run-id=${{ github.run_id }}-build-backend-image", "extras=ecr-cache"]
timeout-minutes: 45
steps:
- uses: runs-on/action@cd2b598b0515d39d78c38a02d529db87d2196d1e # ratchet:runs-on/action@v2
- name: Checkout code
uses: actions/checkout@1af3b93b6815bc44a9784bd300feb67ff0d1eeb3 # ratchet:actions/checkout@v6
with:
persist-credentials: false
- name: Format branch name for cache
id: format-branch
env:
PR_NUMBER: ${{ github.event.pull_request.number }}
REF_NAME: ${{ github.ref_name }}
run: |
if [ -n "${PR_NUMBER}" ]; then
CACHE_SUFFIX="${PR_NUMBER}"
else
# shellcheck disable=SC2001
CACHE_SUFFIX=$(echo "${REF_NAME}" | sed 's/[^A-Za-z0-9._-]/-/g')
fi
echo "cache-suffix=${CACHE_SUFFIX}" >> $GITHUB_OUTPUT
- name: Set up Docker Buildx
uses: docker/setup-buildx-action@e468171a9de216ec08956ac3ada2f0791b6bd435 # ratchet:docker/setup-buildx-action@v3
# needed for pulling external images otherwise, we hit the "Unauthenticated users" limit
# https://docs.docker.com/docker-hub/usage/
- name: Login to Docker Hub
uses: docker/login-action@5e57cd118135c172c3672efd75eb46360885c0ef # ratchet:docker/login-action@v3
with:
username: ${{ secrets.DOCKER_USERNAME }}
password: ${{ secrets.DOCKER_TOKEN }}
- name: Build and push Backend Docker image
uses: docker/build-push-action@263435318d21b8e681c14492fe198d362a7d2c83 # ratchet:docker/build-push-action@v6
with:
context: ./backend
file: ./backend/Dockerfile
platforms: linux/arm64
tags: ${{ env.RUNS_ON_ECR_CACHE }}:playwright-test-backend-${{ github.run_id }}
push: true
cache-from: |
type=registry,ref=${{ env.RUNS_ON_ECR_CACHE }}:backend-cache-${{ github.event.pull_request.head.sha || github.sha }}
type=registry,ref=${{ env.RUNS_ON_ECR_CACHE }}:backend-cache-${{ steps.format-branch.outputs.cache-suffix }}
type=registry,ref=${{ env.RUNS_ON_ECR_CACHE }}:backend-cache
type=registry,ref=onyxdotapp/onyx-backend:latest
cache-to: |
type=registry,ref=${{ env.RUNS_ON_ECR_CACHE }}:backend-cache-${{ github.event.pull_request.head.sha || github.sha }},mode=max
type=registry,ref=${{ env.RUNS_ON_ECR_CACHE }}:backend-cache-${{ steps.format-branch.outputs.cache-suffix }},mode=max
type=registry,ref=${{ env.RUNS_ON_ECR_CACHE }}:backend-cache,mode=max
no-cache: ${{ vars.DOCKER_NO_CACHE == 'true' }}
build-model-server-image:
runs-on: [runs-on, runner=1cpu-linux-arm64, "run-id=${{ github.run_id }}-build-model-server-image", "extras=ecr-cache"]
timeout-minutes: 45
steps:
- uses: runs-on/action@cd2b598b0515d39d78c38a02d529db87d2196d1e # ratchet:runs-on/action@v2
- name: Checkout code
uses: actions/checkout@1af3b93b6815bc44a9784bd300feb67ff0d1eeb3 # ratchet:actions/checkout@v6
with:
persist-credentials: false
- name: Format branch name for cache
id: format-branch
env:
PR_NUMBER: ${{ github.event.pull_request.number }}
REF_NAME: ${{ github.ref_name }}
run: |
if [ -n "${PR_NUMBER}" ]; then
CACHE_SUFFIX="${PR_NUMBER}"
else
# shellcheck disable=SC2001
CACHE_SUFFIX=$(echo "${REF_NAME}" | sed 's/[^A-Za-z0-9._-]/-/g')
fi
echo "cache-suffix=${CACHE_SUFFIX}" >> $GITHUB_OUTPUT
- name: Set up Docker Buildx
uses: docker/setup-buildx-action@e468171a9de216ec08956ac3ada2f0791b6bd435 # ratchet:docker/setup-buildx-action@v3
# needed for pulling external images otherwise, we hit the "Unauthenticated users" limit
# https://docs.docker.com/docker-hub/usage/
- name: Login to Docker Hub
uses: docker/login-action@5e57cd118135c172c3672efd75eb46360885c0ef # ratchet:docker/login-action@v3
with:
username: ${{ secrets.DOCKER_USERNAME }}
password: ${{ secrets.DOCKER_TOKEN }}
- name: Build and push Model Server Docker image
uses: docker/build-push-action@263435318d21b8e681c14492fe198d362a7d2c83 # ratchet:docker/build-push-action@v6
with:
context: ./backend
file: ./backend/Dockerfile.model_server
platforms: linux/arm64
tags: ${{ env.RUNS_ON_ECR_CACHE }}:playwright-test-model-server-${{ github.run_id }}
push: true
cache-from: |
type=registry,ref=${{ env.RUNS_ON_ECR_CACHE }}:model-server-cache-${{ github.event.pull_request.head.sha || github.sha }}
type=registry,ref=${{ env.RUNS_ON_ECR_CACHE }}:model-server-cache-${{ steps.format-branch.outputs.cache-suffix }}
type=registry,ref=${{ env.RUNS_ON_ECR_CACHE }}:model-server-cache
type=registry,ref=onyxdotapp/onyx-model-server:latest
cache-to: |
type=registry,ref=${{ env.RUNS_ON_ECR_CACHE }}:model-server-cache-${{ github.event.pull_request.head.sha || github.sha }},mode=max
type=registry,ref=${{ env.RUNS_ON_ECR_CACHE }}:model-server-cache-${{ steps.format-branch.outputs.cache-suffix }},mode=max
type=registry,ref=${{ env.RUNS_ON_ECR_CACHE }}:model-server-cache,mode=max
no-cache: ${{ vars.DOCKER_NO_CACHE == 'true' }}
playwright-tests:
needs: [build-web-image, build-backend-image, build-model-server-image]
name: Playwright Tests (${{ matrix.project }})
runs-on:
- runs-on
- runner=8cpu-linux-arm64
- "run-id=${{ github.run_id }}-playwright-tests-${{ matrix.project }}"
- "extras=ecr-cache"
- volume=50gb
timeout-minutes: 45
strategy:
fail-fast: false
matrix:
project: [admin, no-auth, exclusive]
steps:
- uses: runs-on/action@cd2b598b0515d39d78c38a02d529db87d2196d1e # ratchet:runs-on/action@v2
- name: Checkout code
uses: actions/checkout@1af3b93b6815bc44a9784bd300feb67ff0d1eeb3 # ratchet:actions/checkout@v6
with:
fetch-depth: 0
persist-credentials: false
- name: Setup node
uses: actions/setup-node@2028fbc5c25fe9cf00d9f06a71cc4710d4507903 # ratchet:actions/setup-node@v4
with:
node-version: 22
cache: 'npm'
cache-dependency-path: ./web/package-lock.json
- name: Install node dependencies
working-directory: ./web
run: npm ci
- name: Cache playwright cache
uses: runs-on/cache@50350ad4242587b6c8c2baa2e740b1bc11285ff4 # ratchet:runs-on/cache@v4
with:
path: ~/.cache/ms-playwright
key: ${{ runner.os }}-playwright-npm-${{ hashFiles('web/package-lock.json') }}
restore-keys: |
${{ runner.os }}-playwright-npm-
- name: Install playwright browsers
working-directory: ./web
run: npx playwright install --with-deps
- name: Create .env file for Docker Compose
env:
OPENAI_API_KEY_VALUE: ${{ env.OPENAI_API_KEY }}
EXA_API_KEY_VALUE: ${{ env.EXA_API_KEY }}
ECR_CACHE: ${{ env.RUNS_ON_ECR_CACHE }}
RUN_ID: ${{ github.run_id }}
run: |
cat <<EOF > deployment/docker_compose/.env
ENABLE_PAID_ENTERPRISE_EDITION_FEATURES=true
AUTH_TYPE=basic
GEN_AI_API_KEY=${OPENAI_API_KEY_VALUE}
EXA_API_KEY=${EXA_API_KEY_VALUE}
REQUIRE_EMAIL_VERIFICATION=false
DISABLE_TELEMETRY=true
ONYX_BACKEND_IMAGE=${ECR_CACHE}:playwright-test-backend-${RUN_ID}
ONYX_MODEL_SERVER_IMAGE=${ECR_CACHE}:playwright-test-model-server-${RUN_ID}
ONYX_WEB_SERVER_IMAGE=${ECR_CACHE}:playwright-test-web-${RUN_ID}
EOF
if [ "${{ matrix.project }}" = "no-auth" ]; then
echo "PLAYWRIGHT_FORCE_EMPTY_LLM_PROVIDERS=true" >> deployment/docker_compose/.env
fi
# needed for pulling Vespa, Redis, Postgres, and Minio images
# otherwise, we hit the "Unauthenticated users" limit
# https://docs.docker.com/docker-hub/usage/
- name: Login to Docker Hub
uses: docker/login-action@5e57cd118135c172c3672efd75eb46360885c0ef # ratchet:docker/login-action@v3
with:
username: ${{ secrets.DOCKER_USERNAME }}
password: ${{ secrets.DOCKER_TOKEN }}
- name: Start Docker containers
run: |
cd deployment/docker_compose
docker compose -f docker-compose.yml -f docker-compose.dev.yml -f docker-compose.mcp-oauth-test.yml -f docker-compose.mcp-api-key-test.yml up -d
id: start_docker
- name: Wait for service to be ready
run: |
echo "Starting wait-for-service script..."
docker logs -f onyx-api_server-1 &
start_time=$(date +%s)
timeout=300 # 5 minutes in seconds
while true; do
current_time=$(date +%s)
elapsed_time=$((current_time - start_time))
if [ $elapsed_time -ge $timeout ]; then
echo "Timeout reached. Service did not become ready in 5 minutes."
exit 1
fi
# Use curl with error handling to ignore specific exit code 56
response=$(curl -s -o /dev/null -w "%{http_code}" http://localhost:8080/health || echo "curl_error")
if [ "$response" = "200" ]; then
echo "Service is ready!"
break
elif [ "$response" = "curl_error" ]; then
echo "Curl encountered an error, possibly exit code 56. Continuing to retry..."
else
echo "Service not ready yet (HTTP status $response). Retrying in 5 seconds..."
fi
sleep 5
done
echo "Finished waiting for service."
- name: Wait for MCP OAuth mock server
run: |
echo "Waiting for MCP OAuth mock server on port ${MCP_TEST_SERVER_PORT:-8004}..."
start_time=$(date +%s)
timeout=120
while true; do
current_time=$(date +%s)
elapsed_time=$((current_time - start_time))
if [ $elapsed_time -ge $timeout ]; then
echo "Timeout reached. MCP OAuth mock server did not become ready in ${timeout}s."
exit 1
fi
if curl -sf "http://localhost:${MCP_TEST_SERVER_PORT:-8004}/healthz" > /dev/null; then
echo "MCP OAuth mock server is ready!"
break
fi
sleep 3
done
- name: Wait for MCP API Key mock server
run: |
echo "Waiting for MCP API Key mock server on port ${MCP_API_KEY_TEST_PORT:-8005}..."
start_time=$(date +%s)
timeout=120
while true; do
current_time=$(date +%s)
elapsed_time=$((current_time - start_time))
if [ $elapsed_time -ge $timeout ]; then
echo "Timeout reached. MCP API Key mock server did not become ready in ${timeout}s."
exit 1
fi
if curl -sf "http://localhost:${MCP_API_KEY_TEST_PORT:-8005}/healthz" > /dev/null; then
echo "MCP API Key mock server is ready!"
break
fi
sleep 3
done
- name: Wait for web server to be ready
run: |
echo "Waiting for web server on port 3000..."
start_time=$(date +%s)
timeout=120
while true; do
current_time=$(date +%s)
elapsed_time=$((current_time - start_time))
if [ $elapsed_time -ge $timeout ]; then
echo "Timeout reached. Web server did not become ready in ${timeout}s."
exit 1
fi
if curl -sf "http://localhost:3000/api/health" > /dev/null 2>&1 || \
curl -sf "http://localhost:3000/" > /dev/null 2>&1; then
echo "Web server is ready!"
break
fi
echo "Web server not ready yet. Retrying in 3 seconds..."
sleep 3
done
- name: Run Playwright tests
working-directory: ./web
env:
PROJECT: ${{ matrix.project }}
run: |
# Create test-results directory to ensure it exists for artifact upload
mkdir -p test-results
if [ "${PROJECT}" = "no-auth" ]; then
export PLAYWRIGHT_FORCE_EMPTY_LLM_PROVIDERS=true
fi
npx playwright test --project ${PROJECT}
- uses: actions/upload-artifact@330a01c490aca151604b8cf639adc76d48f6c5d4 # ratchet:actions/upload-artifact@v4
if: always()
with:
# Includes test results and trace.zip files
name: playwright-test-results-${{ matrix.project }}-${{ github.run_id }}
path: ./web/test-results/
retention-days: 30
# save before stopping the containers so the logs can be captured
- name: Save Docker logs
if: success() || failure()
env:
WORKSPACE: ${{ github.workspace }}
run: |
cd deployment/docker_compose
docker compose logs > docker-compose.log
mv docker-compose.log ${WORKSPACE}/docker-compose.log
- name: Upload logs
if: success() || failure()
uses: actions/upload-artifact@330a01c490aca151604b8cf639adc76d48f6c5d4 # ratchet:actions/upload-artifact@v4
with:
name: docker-logs-${{ matrix.project }}-${{ github.run_id }}
path: ${{ github.workspace }}/docker-compose.log
playwright-required:
# NOTE: Github-hosted runners have about 20s faster queue times and are preferred here.
runs-on: ubuntu-slim
timeout-minutes: 45
needs: [playwright-tests]
if: ${{ always() }}
steps:
- name: Check job status
if: ${{ contains(needs.*.result, 'failure') || contains(needs.*.result, 'cancelled') || contains(needs.*.result, 'skipped') }}
run: exit 1
# NOTE: Chromatic UI diff testing is currently disabled.
# We are using Playwright for local and CI testing without visual regression checks.
# Chromatic may be reintroduced in the future for UI diff testing if needed.
# chromatic-tests:
# name: Chromatic Tests
# needs: playwright-tests
# runs-on:
# [
# runs-on,
# runner=32cpu-linux-x64,
# disk=large,
# "run-id=${{ github.run_id }}",
# ]
# steps:
# - name: Checkout code
# uses: actions/checkout@1af3b93b6815bc44a9784bd300feb67ff0d1eeb3 # ratchet:actions/checkout@v6
# with:
# fetch-depth: 0
# - name: Setup node
# uses: actions/setup-node@2028fbc5c25fe9cf00d9f06a71cc4710d4507903 # ratchet:actions/setup-node@v4
# with:
# node-version: 22
# - name: Install node dependencies
# working-directory: ./web
# run: npm ci
# - name: Download Playwright test results
# uses: actions/download-artifact@d3f86a106a0bac45b974a628896c90dbdf5c8093 # ratchet:actions/download-artifact@v4
# with:
# name: test-results
# path: ./web/test-results
# - name: Run Chromatic
# uses: chromaui/action@latest
# with:
# playwright: true
# projectToken: ${{ secrets.CHROMATIC_PROJECT_TOKEN }}
# workingDir: ./web
# env:
# CHROMATIC_ARCHIVE_LOCATION: ./test-results

View File

@@ -1,4 +1,7 @@
name: Python Checks
concurrency:
group: Python-Checks-${{ github.workflow }}-${{ github.head_ref || github.event.workflow_run.head_branch || github.run_id }}
cancel-in-progress: true
on:
merge_group:
@@ -6,47 +9,97 @@ on:
branches:
- main
- 'release/**'
push:
tags:
- "v*.*.*"
permissions:
contents: read
jobs:
validate-requirements:
runs-on: ubuntu-slim
timeout-minutes: 45
steps:
- name: Checkout code
uses: actions/checkout@1af3b93b6815bc44a9784bd300feb67ff0d1eeb3 # ratchet:actions/checkout@v6
with:
persist-credentials: false
- name: Setup uv
uses: astral-sh/setup-uv@1e862dfacbd1d6d858c55d9b792c756523627244 # ratchet:astral-sh/setup-uv@v7.1.4
# TODO: Enable caching once there is a uv.lock file checked in.
# with:
# enable-cache: true
- name: Validate requirements lock files
run: ./backend/scripts/compile_requirements.py --check
mypy-check:
# See https://runs-on.com/runners/linux/
runs-on: [runs-on,runner=8cpu-linux-x64,"run-id=${{ github.run_id }}"]
# Note: Mypy seems quite optimized for x64 compared to arm64.
# Similarly, mypy is single-threaded and incremental, so 2cpu is sufficient.
runs-on: [runs-on, runner=2cpu-linux-x64, "run-id=${{ github.run_id }}-mypy-check", "extras=s3-cache"]
timeout-minutes: 45
steps:
- name: Checkout code
uses: actions/checkout@v4
- uses: runs-on/action@cd2b598b0515d39d78c38a02d529db87d2196d1e # ratchet:runs-on/action@v2
- name: Checkout code
uses: actions/checkout@1af3b93b6815bc44a9784bd300feb67ff0d1eeb3 # ratchet:actions/checkout@v6
with:
persist-credentials: false
- name: Set up Python
uses: actions/setup-python@v5
with:
python-version: '3.11'
cache: 'pip'
cache-dependency-path: |
backend/requirements/default.txt
backend/requirements/dev.txt
backend/requirements/model_server.txt
- run: |
python -m pip install --upgrade pip
pip install --retries 5 --timeout 30 -r backend/requirements/default.txt
pip install --retries 5 --timeout 30 -r backend/requirements/dev.txt
pip install --retries 5 --timeout 30 -r backend/requirements/model_server.txt
- name: Setup Python and Install Dependencies
uses: ./.github/actions/setup-python-and-install-dependencies
with:
requirements: |
backend/requirements/default.txt
backend/requirements/dev.txt
backend/requirements/model_server.txt
backend/requirements/ee.txt
- name: Run MyPy
run: |
cd backend
mypy .
- name: Generate OpenAPI schema
shell: bash
working-directory: backend
env:
PYTHONPATH: "."
run: |
python scripts/onyx_openapi_schema.py --filename generated/openapi.json
- name: Run ruff
run: |
cd backend
ruff .
# needed for pulling openapitools/openapi-generator-cli
# otherwise, we hit the "Unauthenticated users" limit
# https://docs.docker.com/docker-hub/usage/
- name: Login to Docker Hub
uses: docker/login-action@5e57cd118135c172c3672efd75eb46360885c0ef # ratchet:docker/login-action@v3
with:
username: ${{ secrets.DOCKER_USERNAME }}
password: ${{ secrets.DOCKER_TOKEN }}
- name: Check import order with reorder-python-imports
run: |
cd backend
find ./danswer -name "*.py" | xargs reorder-python-imports --py311-plus
- name: Generate OpenAPI Python client
shell: bash
run: |
docker run --rm \
-v "${{ github.workspace }}/backend/generated:/local" \
openapitools/openapi-generator-cli generate \
-i /local/openapi.json \
-g python \
-o /local/onyx_openapi_client \
--package-name onyx_openapi_client \
--skip-validate-spec \
--openapi-normalizer "SIMPLIFY_ONEOF_ANYOF=true,SET_OAS3_NULLABLE=true"
- name: Check code formatting with Black
run: |
cd backend
black --check .
- name: Cache mypy cache
if: ${{ vars.DISABLE_MYPY_CACHE != 'true' }}
uses: runs-on/cache@50350ad4242587b6c8c2baa2e740b1bc11285ff4 # ratchet:runs-on/cache@v4
with:
path: backend/.mypy_cache
key: mypy-${{ runner.os }}-${{ hashFiles('**/*.py', '**/*.pyi', 'backend/pyproject.toml') }}
restore-keys: |
mypy-${{ runner.os }}-
- name: Run MyPy
working-directory: ./backend
env:
MYPY_FORCE_COLOR: 1
TERM: xterm-256color
run: mypy .

View File

@@ -1,87 +1,236 @@
name: Connector Tests
concurrency:
group: Connector-Tests-${{ github.workflow }}-${{ github.head_ref || github.event.workflow_run.head_branch || github.run_id }}
cancel-in-progress: true
on:
merge_group:
pull_request:
branches: [main]
push:
tags:
- "v*.*.*"
schedule:
# This cron expression runs the job daily at 16:00 UTC (9am PT)
- cron: "0 16 * * *"
permissions:
contents: read
env:
# AWS
AWS_ACCESS_KEY_ID_DAILY_CONNECTOR_TESTS: ${{ secrets.AWS_ACCESS_KEY_ID_DAILY_CONNECTOR_TESTS }}
AWS_SECRET_ACCESS_KEY_DAILY_CONNECTOR_TESTS: ${{ secrets.AWS_SECRET_ACCESS_KEY_DAILY_CONNECTOR_TESTS }}
# Cloudflare R2
R2_ACCOUNT_ID_DAILY_CONNECTOR_TESTS: ${{ vars.R2_ACCOUNT_ID_DAILY_CONNECTOR_TESTS }}
R2_ACCESS_KEY_ID_DAILY_CONNECTOR_TESTS: ${{ secrets.R2_ACCESS_KEY_ID_DAILY_CONNECTOR_TESTS }}
R2_SECRET_ACCESS_KEY_DAILY_CONNECTOR_TESTS: ${{ secrets.R2_SECRET_ACCESS_KEY_DAILY_CONNECTOR_TESTS }}
# Google Cloud Storage
GCS_ACCESS_KEY_ID_DAILY_CONNECTOR_TESTS: ${{ secrets.GCS_ACCESS_KEY_ID_DAILY_CONNECTOR_TESTS }}
GCS_SECRET_ACCESS_KEY_DAILY_CONNECTOR_TESTS: ${{ secrets.GCS_SECRET_ACCESS_KEY_DAILY_CONNECTOR_TESTS }}
# Confluence
CONFLUENCE_TEST_SPACE_URL: ${{ secrets.CONFLUENCE_TEST_SPACE_URL }}
CONFLUENCE_TEST_SPACE: ${{ secrets.CONFLUENCE_TEST_SPACE }}
CONFLUENCE_IS_CLOUD: ${{ secrets.CONFLUENCE_IS_CLOUD }}
CONFLUENCE_TEST_SPACE_URL: ${{ vars.CONFLUENCE_TEST_SPACE_URL }}
CONFLUENCE_TEST_SPACE: ${{ vars.CONFLUENCE_TEST_SPACE }}
CONFLUENCE_TEST_PAGE_ID: ${{ secrets.CONFLUENCE_TEST_PAGE_ID }}
CONFLUENCE_USER_NAME: ${{ secrets.CONFLUENCE_USER_NAME }}
CONFLUENCE_USER_NAME: ${{ vars.CONFLUENCE_USER_NAME }}
CONFLUENCE_ACCESS_TOKEN: ${{ secrets.CONFLUENCE_ACCESS_TOKEN }}
CONFLUENCE_ACCESS_TOKEN_SCOPED: ${{ secrets.CONFLUENCE_ACCESS_TOKEN_SCOPED }}
# Jira
JIRA_BASE_URL: ${{ secrets.JIRA_BASE_URL }}
JIRA_USER_EMAIL: ${{ secrets.JIRA_USER_EMAIL }}
JIRA_API_TOKEN: ${{ secrets.JIRA_API_TOKEN }}
JIRA_API_TOKEN_SCOPED: ${{ secrets.JIRA_API_TOKEN_SCOPED }}
# Gong
GONG_ACCESS_KEY: ${{ secrets.GONG_ACCESS_KEY }}
GONG_ACCESS_KEY_SECRET: ${{ secrets.GONG_ACCESS_KEY_SECRET }}
# Google
GOOGLE_DRIVE_SERVICE_ACCOUNT_JSON_STR: ${{ secrets.GOOGLE_DRIVE_SERVICE_ACCOUNT_JSON_STR }}
GOOGLE_DRIVE_OAUTH_CREDENTIALS_JSON_STR_TEST_USER_1: ${{ secrets.GOOGLE_DRIVE_OAUTH_CREDENTIALS_JSON_STR_TEST_USER_1 }}
GOOGLE_DRIVE_OAUTH_CREDENTIALS_JSON_STR: ${{ secrets.GOOGLE_DRIVE_OAUTH_CREDENTIALS_JSON_STR }}
GOOGLE_GMAIL_SERVICE_ACCOUNT_JSON_STR: ${{ secrets.GOOGLE_GMAIL_SERVICE_ACCOUNT_JSON_STR }}
GOOGLE_GMAIL_OAUTH_CREDENTIALS_JSON_STR: ${{ secrets.GOOGLE_GMAIL_OAUTH_CREDENTIALS_JSON_STR }}
# Slab
SLAB_BOT_TOKEN: ${{ secrets.SLAB_BOT_TOKEN }}
# Zendesk
ZENDESK_SUBDOMAIN: ${{ secrets.ZENDESK_SUBDOMAIN }}
ZENDESK_EMAIL: ${{ secrets.ZENDESK_EMAIL }}
ZENDESK_TOKEN: ${{ secrets.ZENDESK_TOKEN }}
# Salesforce
SF_USERNAME: ${{ secrets.SF_USERNAME }}
SF_PASSWORD: ${{ secrets.SF_PASSWORD }}
SF_SECURITY_TOKEN: ${{ secrets.SF_SECURITY_TOKEN }}
# Hubspot
HUBSPOT_ACCESS_TOKEN: ${{ secrets.HUBSPOT_ACCESS_TOKEN }}
# IMAP
IMAP_HOST: ${{ vars.IMAP_HOST }}
IMAP_USERNAME: ${{ vars.IMAP_USERNAME }}
IMAP_PASSWORD: ${{ secrets.IMAP_PASSWORD }}
IMAP_MAILBOXES: ${{ vars.IMAP_MAILBOXES }}
# Airtable
AIRTABLE_TEST_BASE_ID: ${{ secrets.AIRTABLE_TEST_BASE_ID }}
AIRTABLE_TEST_TABLE_ID: ${{ secrets.AIRTABLE_TEST_TABLE_ID }}
AIRTABLE_TEST_TABLE_NAME: ${{ secrets.AIRTABLE_TEST_TABLE_NAME }}
AIRTABLE_TEST_BASE_ID: ${{ vars.AIRTABLE_TEST_BASE_ID }}
AIRTABLE_TEST_TABLE_ID: ${{ vars.AIRTABLE_TEST_TABLE_ID }}
AIRTABLE_TEST_TABLE_NAME: ${{ vars.AIRTABLE_TEST_TABLE_NAME }}
AIRTABLE_ACCESS_TOKEN: ${{ secrets.AIRTABLE_ACCESS_TOKEN }}
# Sharepoint
SHAREPOINT_CLIENT_ID: ${{ secrets.SHAREPOINT_CLIENT_ID }}
SHAREPOINT_CLIENT_ID: ${{ vars.SHAREPOINT_CLIENT_ID }}
SHAREPOINT_CLIENT_SECRET: ${{ secrets.SHAREPOINT_CLIENT_SECRET }}
SHAREPOINT_CLIENT_DIRECTORY_ID: ${{ secrets.SHAREPOINT_CLIENT_DIRECTORY_ID }}
SHAREPOINT_SITE: ${{ secrets.SHAREPOINT_SITE }}
SHAREPOINT_CLIENT_DIRECTORY_ID: ${{ vars.SHAREPOINT_CLIENT_DIRECTORY_ID }}
SHAREPOINT_SITE: ${{ vars.SHAREPOINT_SITE }}
# Github
ACCESS_TOKEN_GITHUB: ${{ secrets.ACCESS_TOKEN_GITHUB }}
# Gitlab
GITLAB_ACCESS_TOKEN: ${{ secrets.GITLAB_ACCESS_TOKEN }}
# Gitbook
GITBOOK_SPACE_ID: ${{ secrets.GITBOOK_SPACE_ID }}
GITBOOK_API_KEY: ${{ secrets.GITBOOK_API_KEY }}
# Notion
NOTION_INTEGRATION_TOKEN: ${{ secrets.NOTION_INTEGRATION_TOKEN }}
# Highspot
HIGHSPOT_KEY: ${{ secrets.HIGHSPOT_KEY }}
HIGHSPOT_SECRET: ${{ secrets.HIGHSPOT_SECRET }}
# Slack
SLACK_BOT_TOKEN: ${{ secrets.SLACK_BOT_TOKEN }}
# Teams
TEAMS_APPLICATION_ID: ${{ secrets.TEAMS_APPLICATION_ID }}
TEAMS_DIRECTORY_ID: ${{ secrets.TEAMS_DIRECTORY_ID }}
TEAMS_SECRET: ${{ secrets.TEAMS_SECRET }}
# Bitbucket
BITBUCKET_WORKSPACE: ${{ secrets.BITBUCKET_WORKSPACE }}
BITBUCKET_REPOSITORIES: ${{ secrets.BITBUCKET_REPOSITORIES }}
BITBUCKET_PROJECTS: ${{ secrets.BITBUCKET_PROJECTS }}
BITBUCKET_EMAIL: ${{ vars.BITBUCKET_EMAIL }}
BITBUCKET_API_TOKEN: ${{ secrets.BITBUCKET_API_TOKEN }}
# Fireflies
FIREFLIES_API_KEY: ${{ secrets.FIREFLIES_API_KEY }}
jobs:
connectors-check:
# See https://runs-on.com/runners/linux/
runs-on: [runs-on,runner=8cpu-linux-x64,"run-id=${{ github.run_id }}"]
runs-on: [runs-on, runner=8cpu-linux-x64, "run-id=${{ github.run_id }}-connectors-check", "extras=s3-cache"]
timeout-minutes: 45
env:
PYTHONPATH: ./backend
steps:
- name: Checkout code
uses: actions/checkout@v4
- uses: runs-on/action@cd2b598b0515d39d78c38a02d529db87d2196d1e # ratchet:runs-on/action@v2
- name: Set up Python
uses: actions/setup-python@v5
- name: Checkout code
uses: actions/checkout@1af3b93b6815bc44a9784bd300feb67ff0d1eeb3 # ratchet:actions/checkout@v6
with:
python-version: "3.11"
cache: "pip"
cache-dependency-path: |
persist-credentials: false
- name: Setup Python and Install Dependencies
uses: ./.github/actions/setup-python-and-install-dependencies
with:
requirements: |
backend/requirements/default.txt
backend/requirements/dev.txt
- name: Install Dependencies
run: |
python -m pip install --upgrade pip
pip install --retries 5 --timeout 30 -r backend/requirements/default.txt
pip install --retries 5 --timeout 30 -r backend/requirements/dev.txt
- name: Setup Playwright
uses: ./.github/actions/setup-playwright
- name: Run Tests
- name: Detect Connector changes
id: changes
uses: dorny/paths-filter@de90cc6fb38fc0963ad72b210f1f284cd68cea36 # ratchet:dorny/paths-filter@v3
with:
filters: |
hubspot:
- 'backend/onyx/connectors/hubspot/**'
- 'backend/tests/daily/connectors/hubspot/**'
salesforce:
- 'backend/onyx/connectors/salesforce/**'
- 'backend/tests/daily/connectors/salesforce/**'
github:
- 'backend/onyx/connectors/github/**'
- 'backend/tests/daily/connectors/github/**'
file_processing:
- 'backend/onyx/file_processing/**'
- name: Run Tests (excluding HubSpot, Salesforce, and GitHub)
shell: script -q -e -c "bash --noprofile --norc -eo pipefail {0}"
run: py.test -o junit_family=xunit2 -xv --ff backend/tests/daily/connectors
run: |
py.test \
-n 8 \
--dist loadfile \
--durations=8 \
-o junit_family=xunit2 \
-xv \
--ff \
backend/tests/daily/connectors \
--ignore backend/tests/daily/connectors/hubspot \
--ignore backend/tests/daily/connectors/salesforce \
--ignore backend/tests/daily/connectors/github
- name: Run HubSpot Connector Tests
if: ${{ github.event_name == 'schedule' || steps.changes.outputs.hubspot == 'true' || steps.changes.outputs.file_processing == 'true' }}
shell: script -q -e -c "bash --noprofile --norc -eo pipefail {0}"
run: |
py.test \
-n 8 \
--dist loadfile \
--durations=8 \
-o junit_family=xunit2 \
-xv \
--ff \
backend/tests/daily/connectors/hubspot
- name: Run Salesforce Connector Tests
if: ${{ github.event_name == 'schedule' || steps.changes.outputs.salesforce == 'true' || steps.changes.outputs.file_processing == 'true' }}
shell: script -q -e -c "bash --noprofile --norc -eo pipefail {0}"
run: |
py.test \
-n 8 \
--dist loadfile \
--durations=8 \
-o junit_family=xunit2 \
-xv \
--ff \
backend/tests/daily/connectors/salesforce
- name: Run GitHub Connector Tests
if: ${{ github.event_name == 'schedule' || steps.changes.outputs.github == 'true' || steps.changes.outputs.file_processing == 'true' }}
shell: script -q -e -c "bash --noprofile --norc -eo pipefail {0}"
run: |
py.test \
-n 8 \
--dist loadfile \
--durations=8 \
-o junit_family=xunit2 \
-xv \
--ff \
backend/tests/daily/connectors/github
- name: Alert on Failure
if: failure() && github.event_name == 'schedule'
env:
SLACK_WEBHOOK: ${{ secrets.SLACK_WEBHOOK }}
REPO: ${{ github.repository }}
RUN_ID: ${{ github.run_id }}
run: |
curl -X POST \
-H 'Content-type: application/json' \
--data '{"text":"Scheduled Connector Tests failed! Check the run at: https://github.com/${{ github.repository }}/actions/runs/${{ github.run_id }}"}' \
--data "{\"text\":\"Scheduled Connector Tests failed! Check the run at: https://github.com/${REPO}/actions/runs/${RUN_ID}\"}" \
$SLACK_WEBHOOK

View File

@@ -1,33 +1,67 @@
name: Connector Tests
name: Model Server Tests
on:
schedule:
# This cron expression runs the job daily at 16:00 UTC (9am PT)
- cron: "0 16 * * *"
workflow_dispatch:
inputs:
branch:
description: 'Branch to run the workflow on'
required: false
default: 'main'
permissions:
contents: read
env:
# Bedrock
AWS_ACCESS_KEY_ID: ${{ secrets.AWS_ACCESS_KEY_ID }}
AWS_SECRET_ACCESS_KEY: ${{ secrets.AWS_SECRET_ACCESS_KEY }}
AWS_REGION_NAME: ${{ secrets.AWS_REGION_NAME }}
AWS_REGION_NAME: ${{ vars.AWS_REGION_NAME }}
# OpenAI
# API keys for testing
COHERE_API_KEY: ${{ secrets.COHERE_API_KEY }}
LITELLM_API_KEY: ${{ secrets.LITELLM_API_KEY }}
LITELLM_API_URL: ${{ secrets.LITELLM_API_URL }}
OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
AZURE_API_KEY: ${{ secrets.AZURE_API_KEY }}
AZURE_API_URL: ${{ vars.AZURE_API_URL }}
jobs:
model-check:
# See https://runs-on.com/runners/linux/
runs-on: [runs-on,runner=8cpu-linux-x64,"run-id=${{ github.run_id }}"]
runs-on: [runs-on,runner=8cpu-linux-x64,"run-id=${{ github.run_id }}-model-check"]
timeout-minutes: 45
env:
PYTHONPATH: ./backend
steps:
- name: Checkout code
uses: actions/checkout@v4
uses: actions/checkout@1af3b93b6815bc44a9784bd300feb67ff0d1eeb3 # ratchet:actions/checkout@v6
with:
persist-credentials: false
- name: Login to Docker Hub
uses: docker/login-action@5e57cd118135c172c3672efd75eb46360885c0ef # ratchet:docker/login-action@v3
with:
username: ${{ secrets.DOCKER_USERNAME }}
password: ${{ secrets.DOCKER_TOKEN }}
# tag every docker image with "test" so that we can spin up the correct set
# of images during testing
# We don't need to build the Web Docker image since it's not yet used
# in the integration tests. We have a separate action to verify that it builds
# successfully.
- name: Pull Model Server Docker image
run: |
docker pull onyxdotapp/onyx-model-server:latest
docker tag onyxdotapp/onyx-model-server:latest onyxdotapp/onyx-model-server:test
- name: Set up Python
uses: actions/setup-python@v5
uses: actions/setup-python@83679a892e2d95755f2dac6acb0bfd1e9ac5d548 # ratchet:actions/setup-python@v6
with:
python-version: "3.11"
cache: "pip"
@@ -41,6 +75,49 @@ jobs:
pip install --retries 5 --timeout 30 -r backend/requirements/default.txt
pip install --retries 5 --timeout 30 -r backend/requirements/dev.txt
- name: Start Docker containers
run: |
cd deployment/docker_compose
ENABLE_PAID_ENTERPRISE_EDITION_FEATURES=true \
AUTH_TYPE=basic \
REQUIRE_EMAIL_VERIFICATION=false \
DISABLE_TELEMETRY=true \
IMAGE_TAG=test \
docker compose -f docker-compose.model-server-test.yml up -d indexing_model_server
id: start_docker
- name: Wait for service to be ready
run: |
echo "Starting wait-for-service script..."
start_time=$(date +%s)
timeout=300 # 5 minutes in seconds
while true; do
current_time=$(date +%s)
elapsed_time=$((current_time - start_time))
if [ $elapsed_time -ge $timeout ]; then
echo "Timeout reached. Service did not become ready in 5 minutes."
exit 1
fi
# Use curl with error handling to ignore specific exit code 56
response=$(curl -s -o /dev/null -w "%{http_code}" http://localhost:9000/api/health || echo "curl_error")
if [ "$response" = "200" ]; then
echo "Service is ready!"
break
elif [ "$response" = "curl_error" ]; then
echo "Curl encountered an error, possibly exit code 56. Continuing to retry..."
else
echo "Service not ready yet (HTTP status $response). Retrying in 5 seconds..."
fi
sleep 5
done
echo "Finished waiting for service."
- name: Run Tests
shell: script -q -e -c "bash --noprofile --norc -eo pipefail {0}"
run: |
@@ -51,8 +128,23 @@ jobs:
if: failure() && github.event_name == 'schedule'
env:
SLACK_WEBHOOK: ${{ secrets.SLACK_WEBHOOK }}
REPO: ${{ github.repository }}
RUN_ID: ${{ github.run_id }}
run: |
curl -X POST \
-H 'Content-type: application/json' \
--data '{"text":"Scheduled Model Tests failed! Check the run at: https://github.com/${{ github.repository }}/actions/runs/${{ github.run_id }}"}' \
--data "{\"text\":\"Scheduled Model Tests failed! Check the run at: https://github.com/${REPO}/actions/runs/${RUN_ID}\"}" \
$SLACK_WEBHOOK
- name: Dump all-container logs (optional)
if: always()
run: |
cd deployment/docker_compose
docker compose -f docker-compose.model-server-test.yml logs --no-color > $GITHUB_WORKSPACE/docker-compose.log || true
- name: Upload logs
if: always()
uses: actions/upload-artifact@330a01c490aca151604b8cf639adc76d48f6c5d4 # ratchet:actions/upload-artifact@v4
with:
name: docker-all-logs
path: ${{ github.workspace }}/docker-compose.log

View File

@@ -1,4 +1,7 @@
name: Python Unit Tests
concurrency:
group: Python-Unit-Tests-${{ github.workflow }}-${{ github.head_ref || github.event.workflow_run.head_branch || github.run_id }}
cancel-in-progress: true
on:
merge_group:
@@ -6,34 +9,43 @@ on:
branches:
- main
- 'release/**'
push:
tags:
- "v*.*.*"
permissions:
contents: read
jobs:
backend-check:
# See https://runs-on.com/runners/linux/
runs-on: [runs-on,runner=8cpu-linux-x64,"run-id=${{ github.run_id }}"]
runs-on: [runs-on, runner=2cpu-linux-arm64, "run-id=${{ github.run_id }}-backend-check"]
timeout-minutes: 45
env:
PYTHONPATH: ./backend
REDIS_CLOUD_PYTEST_PASSWORD: ${{ secrets.REDIS_CLOUD_PYTEST_PASSWORD }}
steps:
- name: Checkout code
uses: actions/checkout@v4
SF_USERNAME: ${{ secrets.SF_USERNAME }}
SF_PASSWORD: ${{ secrets.SF_PASSWORD }}
SF_SECURITY_TOKEN: ${{ secrets.SF_SECURITY_TOKEN }}
- name: Set up Python
uses: actions/setup-python@v5
steps:
- uses: runs-on/action@cd2b598b0515d39d78c38a02d529db87d2196d1e # ratchet:runs-on/action@v2
- name: Checkout code
uses: actions/checkout@1af3b93b6815bc44a9784bd300feb67ff0d1eeb3 # ratchet:actions/checkout@v6
with:
python-version: '3.11'
cache: 'pip'
cache-dependency-path: |
persist-credentials: false
- name: Setup Python and Install Dependencies
uses: ./.github/actions/setup-python-and-install-dependencies
with:
requirements: |
backend/requirements/default.txt
backend/requirements/dev.txt
- name: Install Dependencies
run: |
python -m pip install --upgrade pip
pip install --retries 5 --timeout 30 -r backend/requirements/default.txt
pip install --retries 5 --timeout 30 -r backend/requirements/dev.txt
backend/requirements/model_server.txt
backend/requirements/ee.txt
- name: Run Tests
shell: script -q -e -c "bash --noprofile --norc -eo pipefail {0}"

View File

@@ -6,18 +6,51 @@ concurrency:
on:
merge_group:
pull_request: null
push:
tags:
- "v*.*.*"
permissions:
contents: read
jobs:
quality-checks:
# See https://runs-on.com/runners/linux/
runs-on: [runs-on,runner=8cpu-linux-x64,"run-id=${{ github.run_id }}"]
runs-on:
[
runs-on,
runner=1cpu-linux-arm64,
"run-id=${{ github.run_id }}-quality-checks",
]
timeout-minutes: 45
steps:
- uses: actions/checkout@v4
- uses: runs-on/action@cd2b598b0515d39d78c38a02d529db87d2196d1e # ratchet:runs-on/action@v2
- uses: actions/checkout@1af3b93b6815bc44a9784bd300feb67ff0d1eeb3 # ratchet:actions/checkout@v6
with:
fetch-depth: 0
- uses: actions/setup-python@v5
persist-credentials: false
- uses: actions/setup-python@83679a892e2d95755f2dac6acb0bfd1e9ac5d548 # ratchet:actions/setup-python@v6
with:
python-version: "3.11"
- uses: pre-commit/action@v3.0.1
- name: Setup Terraform
uses: hashicorp/setup-terraform@b9cd54a3c349d3f38e8881555d616ced269862dd # ratchet:hashicorp/setup-terraform@v3
- name: Setup node
uses: actions/setup-node@2028fbc5c25fe9cf00d9f06a71cc4710d4507903 # ratchet:actions/setup-node@v4
with: # zizmor: ignore[cache-poisoning]
node-version: 22
cache: "npm"
cache-dependency-path: ./web/package-lock.json
- name: Install node dependencies
working-directory: ./web
run: npm ci
- uses: pre-commit/action@2c7b3805fd2a0fd8c1884dcaebf91fc102a13ecd # ratchet:pre-commit/action@v3.0.1
env:
# uv-run is mypy's id and mypy is covered by the Python Checks which caches dependencies better.
SKIP: uv-run
with:
extra_args: ${{ github.event_name == 'pull_request' && format('--from-ref {0} --to-ref {1}', github.event.pull_request.base.sha, github.event.pull_request.head.sha) || '' }}
- name: Check Actions
uses: giner/check-actions@28d366c7cbbe235f9624a88aa31a628167eee28c # ratchet:giner/check-actions@v1.0.1
with:
check_permissions: false
check_versions: false

40
.github/workflows/release-devtools.yml vendored Normal file
View File

@@ -0,0 +1,40 @@
name: Release Devtools
on:
push:
tags:
- "ods/v*.*.*"
jobs:
pypi:
runs-on: ubuntu-latest
environment:
name: release-devtools
permissions:
id-token: write
timeout-minutes: 10
strategy:
matrix:
os-arch:
- {goos: "linux", goarch: "amd64"}
- {goos: "linux", goarch: "arm64"}
- {goos: "windows", goarch: "amd64"}
- {goos: "windows", goarch: "arm64"}
- {goos: "darwin", goarch: "amd64"}
- {goos: "darwin", goarch: "arm64"}
- {goos: "", goarch: ""}
steps:
- uses: actions/checkout@1af3b93b6815bc44a9784bd300feb67ff0d1eeb3 # ratchet:actions/checkout@v6
with:
persist-credentials: false
fetch-depth: 0
- uses: astral-sh/setup-uv@1e862dfacbd1d6d858c55d9b792c756523627244 # ratchet:astral-sh/setup-uv@v7
with:
enable-cache: false
- run: |
GOOS="${{ matrix.os-arch.goos }}" \
GOARCH="${{ matrix.os-arch.goarch }}" \
uv build --wheel
working-directory: tools/ods
- run: uv publish
working-directory: tools/ods

49
.github/workflows/sync_foss.yml vendored Normal file
View File

@@ -0,0 +1,49 @@
name: Sync FOSS Repo
on:
schedule:
# Run daily at 3am PT (11am UTC during PST)
- cron: '0 11 * * *'
workflow_dispatch:
jobs:
sync-foss:
runs-on: ubuntu-latest
timeout-minutes: 45
permissions:
contents: read
steps:
- name: Checkout main Onyx repo
uses: actions/checkout@1af3b93b6815bc44a9784bd300feb67ff0d1eeb3 # ratchet:actions/checkout@v6
with:
fetch-depth: 0
persist-credentials: false
- name: Install git-filter-repo
run: |
sudo apt-get update && sudo apt-get install -y git-filter-repo
- name: Configure SSH for deploy key
env:
FOSS_REPO_DEPLOY_KEY: ${{ secrets.FOSS_REPO_DEPLOY_KEY }}
run: |
mkdir -p ~/.ssh
echo "$FOSS_REPO_DEPLOY_KEY" > ~/.ssh/id_ed25519
chmod 600 ~/.ssh/id_ed25519
ssh-keyscan github.com >> ~/.ssh/known_hosts
- name: Set Git config
run: |
git config --global user.name "onyx-bot"
git config --global user.email "bot@onyx.app"
- name: Build FOSS version
run: bash backend/scripts/make_foss_repo.sh
- name: Push to FOSS repo
env:
FOSS_REPO_URL: git@github.com:onyx-dot-app/onyx-foss.git
run: |
cd /tmp/foss_repo
git remote add public "$FOSS_REPO_URL"
git push --force public main

View File

@@ -3,27 +3,30 @@ name: Nightly Tag Push
on:
schedule:
- cron: "0 10 * * *" # Runs every day at 2 AM PST / 3 AM PDT / 10 AM UTC
workflow_dispatch:
permissions:
contents: write # Allows pushing tags to the repository
jobs:
create-and-push-tag:
runs-on: [runs-on, runner=2cpu-linux-x64, "run-id=${{ github.run_id }}"]
runs-on: ubuntu-slim
timeout-minutes: 45
steps:
# actions using GITHUB_TOKEN cannot trigger another workflow, but we do want this to trigger docker pushes
# see https://github.com/orgs/community/discussions/27028#discussioncomment-3254367 for the workaround we
# implement here which needs an actual user's deploy key
- name: Checkout code
uses: actions/checkout@v4
uses: actions/checkout@1af3b93b6815bc44a9784bd300feb67ff0d1eeb3 # ratchet:actions/checkout@v6
with:
ssh-key: "${{ secrets.RKUO_DEPLOY_KEY }}"
ssh-key: "${{ secrets.DEPLOY_KEY }}"
persist-credentials: true
- name: Set up Git user
run: |
git config user.name "Richard Kuo [bot]"
git config user.email "rkuo[bot]@onyx.app"
git config user.name "Onyx Bot [bot]"
git config user.email "onyx-bot[bot]@onyx.app"
- name: Check for existing nightly tag
id: check_tag
@@ -51,3 +54,12 @@ jobs:
run: |
TAG_NAME="nightly-latest-$(date +'%Y%m%d')"
git push origin $TAG_NAME
- name: Send Slack notification
if: failure()
uses: ./.github/actions/slack-notify
with:
webhook-url: ${{ secrets.MONITOR_DEPLOYMENTS_WEBHOOK }}
title: "🚨 Nightly Tag Push Failed"
ref-name: ${{ github.ref_name }}
failed-jobs: "create-and-push-tag"

38
.github/workflows/zizmor.yml vendored Normal file
View File

@@ -0,0 +1,38 @@
name: Run Zizmor
on:
push:
branches: ["main"]
pull_request:
branches: ["**"]
permissions: {}
jobs:
zizmor:
name: zizmor
runs-on: ubuntu-slim
timeout-minutes: 45
permissions:
security-events: write # needed for SARIF uploads
steps:
- name: Checkout repository
uses: actions/checkout@1af3b93b6815bc44a9784bd300feb67ff0d1eeb3 # ratchet:actions/checkout@v6.0.0
with:
persist-credentials: false
- name: Install the latest version of uv
uses: astral-sh/setup-uv@1e862dfacbd1d6d858c55d9b792c756523627244 # ratchet:astral-sh/setup-uv@v7.1.4
with:
enable-cache: false
- name: Run zizmor
run: uv run --no-sync --with zizmor zizmor --format=sarif . > results.sarif
env:
GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
- name: Upload SARIF file
uses: github/codeql-action/upload-sarif@ba454b8ab46733eb6145342877cd148270bb77ab # ratchet:github/codeql-action/upload-sarif@codeql-bundle-v2.23.5
with:
sarif_file: results.sarif
category: zizmor

51
.gitignore vendored
View File

@@ -1,10 +1,55 @@
.env
# editors
.vscode
.zed
.cursor
# macos
.DS_store
# python
.venv
.mypy_cache
.idea
# testing
/web/test-results/
backend/onyx/agent_search/main/test_data.json
backend/tests/regression/answer_quality/test_data.json
backend/tests/regression/search_quality/eval-*
backend/tests/regression/search_quality/search_eval_config.yaml
backend/tests/regression/search_quality/*.json
backend/onyx/evals/data/
backend/onyx/evals/one_off/*.json
*.log
# secret files
.env
jira_test_env
settings.json
# others
/deployment/data/nginx/app.conf
.vscode/
/deployment/data/nginx/mcp.conf.inc
/deployment/data/nginx/mcp_upstream.conf.inc
*.sw?
/backend/tests/regression/answer_quality/search_test_config.yaml
/web/test-results/
*.egg-info
# Claude
AGENTS.md
CLAUDE.md
# Local .terraform directories
**/.terraform/*
# Local .tfstate files
*.tfstate
*.tfstate.*
# Local .terraform.lock.hcl file
.terraform.lock.hcl
node_modules
# MCP configs
.playwright-mcp

View File

@@ -1,12 +1,40 @@
default_install_hook_types:
- pre-commit
- post-checkout
- post-merge
- post-rewrite
repos:
- repo: https://github.com/astral-sh/uv-pre-commit
rev: 569ddf04117761eb74cef7afb5143bbb96fcdfbb # frozen: 0.9.15
hooks:
- id: uv-sync
# NOTE: This takes ~6s on a single, large module which is prohibitively slow.
# - id: uv-run
# name: mypy
# args: ["mypy"]
# pass_filenames: true
# files: ^backend/.*\.py$
- repo: https://github.com/pre-commit/pre-commit-hooks
rev: 3e8a8703264a2f4a69428a0aa4dcb512790b2c8c # frozen: v6.0.0
hooks:
- id: check-yaml
files: ^.github/
- repo: https://github.com/rhysd/actionlint
rev: a443f344ff32813837fa49f7aa6cbc478d770e62 # frozen: v1.7.9
hooks:
- id: actionlint
- repo: https://github.com/psf/black
rev: 23.3.0
rev: 8a737e727ac5ab2f1d4cf5876720ed276dc8dc4b # frozen: 25.1.0
hooks:
- id: black
language_version: python3.11
- repo: https://github.com/asottile/reorder_python_imports
rev: v3.9.0
# this is a fork which keeps compatibility with black
- repo: https://github.com/wimglenn/reorder-python-imports-black
rev: f55cd27f90f0cf0ee775002c2383ce1c7820013d # frozen: v3.14.0
hooks:
- id: reorder-python-imports
args: ['--py311-plus', '--application-directories=backend/']
@@ -18,48 +46,57 @@ repos:
# These settings will remove unused imports with side effects
# Note: The repo currently does not and should not have imports with side effects
- repo: https://github.com/PyCQA/autoflake
rev: v2.2.0
rev: 0544741e2b4a22b472d9d93e37d4ea9153820bb1 # frozen: v2.3.1
hooks:
- id: autoflake
args: [ '--remove-all-unused-imports', '--remove-unused-variables', '--in-place' , '--recursive']
- repo: https://github.com/golangci/golangci-lint
rev: e6ebea0145f385056bce15041d3244c0e5e15848 # frozen: v2.7.0
hooks:
- id: golangci-lint
entry: bash -c "find tools/ -name go.mod -print0 | xargs -0 -I{} bash -c 'cd \"$(dirname {})\" && golangci-lint run ./...'"
- repo: https://github.com/astral-sh/ruff-pre-commit
# Ruff version.
rev: v0.0.286
rev: 971923581912ef60a6b70dbf0c3e9a39563c9d47 # frozen: v0.11.4
hooks:
- id: ruff
- repo: https://github.com/pre-commit/mirrors-prettier
rev: v3.1.0
rev: ffb6a759a979008c0e6dff86e39f4745a2d9eac4 # frozen: v3.1.0
hooks:
- id: prettier
types_or: [html, css, javascript, ts, tsx]
additional_dependencies:
- prettier
language_version: system
# We would like to have a mypy pre-commit hook, but due to the fact that
# pre-commit runs in it's own isolated environment, we would need to install
# and keep in sync all dependencies so mypy has access to the appropriate type
# stubs. This does not seem worth it at the moment, so for now we will stick to
# having mypy run via Github Actions / manually by contributors
# - repo: https://github.com/pre-commit/mirrors-mypy
# rev: v1.1.1
# hooks:
# - id: mypy
# exclude: ^tests/
# # below are needed for type stubs since pre-commit runs in it's own
# # isolated environment. Unfortunately, this needs to be kept in sync
# # with requirements/dev.txt + requirements/default.txt
# additional_dependencies: [
# alembic==1.10.4,
# types-beautifulsoup4==4.12.0.3,
# types-html5lib==1.1.11.13,
# types-oauthlib==3.2.0.9,
# types-psycopg2==2.9.21.10,
# types-python-dateutil==2.8.19.13,
# types-regex==2023.3.23.1,
# types-requests==2.28.11.17,
# types-retry==0.9.9.3,
# types-urllib3==1.26.25.11
# ]
# # TODO: add back once errors are addressed
# # args: [--strict]
- repo: https://github.com/sirwart/ripsecrets
rev: 7d94620933e79b8acaa0cd9e60e9864b07673d86 # frozen: v0.1.11
hooks:
- id: ripsecrets
args:
- --additional-pattern
- ^sk-[A-Za-z0-9_\-]{20,}$
- repo: local
hooks:
- id: terraform-fmt
name: terraform fmt
entry: terraform fmt -recursive
language: system
pass_filenames: false
files: \.tf$
- id: check-lazy-imports
name: Check lazy imports
entry: python3 backend/scripts/check_lazy_imports.py
language: system
files: ^backend/(?!\.venv/).*\.py$
- id: typescript-check
name: TypeScript type check
entry: bash -c 'cd web && npm run types:check'
language: system
pass_filenames: false
files: ^web/.*\.(ts|tsx)$

View File

@@ -1,16 +1,13 @@
# Copy this file to .env in the .vscode folder
# Fill in the <REPLACE THIS> values as needed, it is recommended to set the GEN_AI_API_KEY value to avoid having to set up an LLM in the UI
# Also check out danswer/backend/scripts/restart_containers.sh for a script to restart the containers which Danswer relies on outside of VSCode/Cursor processes
# Also check out onyx/backend/scripts/restart_containers.sh for a script to restart the containers which Onyx relies on outside of VSCode/Cursor processes
# For local dev, often user Authentication is not needed
AUTH_TYPE=disabled
# Skip warm up for dev
SKIP_WARM_UP=True
# Always keep these on for Dev
# Logs all model prompts to stdout
LOG_DANSWER_MODEL_INTERACTIONS=True
# Logs model prompts, reasoning, and answer to stdout
LOG_ONYX_MODEL_INTERACTIONS=True
# More verbose logging
LOG_LEVEL=debug
@@ -23,6 +20,9 @@ DISABLE_LLM_DOC_RELEVANCE=False
# Useful if you want to toggle auth on/off (google_oauth/OIDC specifically)
OAUTH_CLIENT_ID=<REPLACE THIS>
OAUTH_CLIENT_SECRET=<REPLACE THIS>
OPENID_CONFIG_URL=<REPLACE THIS>
SAML_CONF_DIR=/<ABSOLUTE PATH TO ONYX>/onyx/backend/ee/onyx/configs/saml_config
# Generally not useful for dev, we don't generally want to set up an SMTP server for dev
REQUIRE_EMAIL_VERIFICATION=False
@@ -34,21 +34,32 @@ OPENAI_API_KEY=<REPLACE THIS>
GEN_AI_MODEL_VERSION=gpt-4o
FAST_GEN_AI_MODEL_VERSION=gpt-4o
# For Danswer Slack Bot, overrides the UI values so no need to set this up via UI every time
# Only needed if using DanswerBot
#DANSWER_BOT_SLACK_APP_TOKEN=<REPLACE THIS>
#DANSWER_BOT_SLACK_BOT_TOKEN=<REPLACE THIS>
# Python stuff
PYTHONPATH=../backend
PYTHONUNBUFFERED=1
# Internet Search
BING_API_KEY=<REPLACE THIS>
# Enable the full set of Danswer Enterprise Edition features
# NOTE: DO NOT ENABLE THIS UNLESS YOU HAVE A PAID ENTERPRISE LICENSE (or if you are using this for local testing/development)
ENABLE_PAID_ENTERPRISE_EDITION_FEATURES=False
# S3 File Store Configuration (MinIO for local development)
S3_ENDPOINT_URL=http://localhost:9004
S3_FILE_STORE_BUCKET_NAME=onyx-file-store-bucket
S3_AWS_ACCESS_KEY_ID=minioadmin
S3_AWS_SECRET_ACCESS_KEY=minioadmin
# Show extra/uncommon connectors
SHOW_EXTRA_CONNECTORS=True
# Local langsmith tracing
LANGSMITH_TRACING="true"
LANGSMITH_ENDPOINT="https://api.smith.langchain.com"
LANGSMITH_API_KEY=<REPLACE_THIS>
LANGSMITH_PROJECT=<REPLACE_THIS>
# Local Confluence OAuth testing
# OAUTH_CONFLUENCE_CLOUD_CLIENT_ID=<REPLACE_THIS>
# OAUTH_CONFLUENCE_CLOUD_CLIENT_SECRET=<REPLACE_THIS>
# NEXT_PUBLIC_TEST_ENV=True

File diff suppressed because it is too large Load Diff

101
.vscode/tasks.template.jsonc vendored Normal file
View File

@@ -0,0 +1,101 @@
{
"version": "2.0.0",
"tasks": [
{
"type": "austin",
"label": "Profile celery beat",
"envFile": "${workspaceFolder}/.env",
"options": {
"cwd": "${workspaceFolder}/backend"
},
"command": [
"sudo",
"-E"
],
"args": [
"celery",
"-A",
"onyx.background.celery.versioned_apps.beat",
"beat",
"--loglevel=INFO"
]
},
{
"type": "shell",
"label": "Generate Onyx OpenAPI Python client",
"cwd": "${workspaceFolder}/backend",
"envFile": "${workspaceFolder}/.env",
"options": {
"cwd": "${workspaceFolder}/backend"
},
"command": [
"openapi-generator"
],
"args": [
"generate",
"-i",
"generated/openapi.json",
"-g",
"python",
"-o",
"generated/onyx_openapi_client",
"--package-name",
"onyx_openapi_client",
]
},
{
"type": "shell",
"label": "Generate Typescript Fetch client (openapi-generator)",
"envFile": "${workspaceFolder}/.env",
"options": {
"cwd": "${workspaceFolder}"
},
"command": [
"openapi-generator"
],
"args": [
"generate",
"-i",
"backend/generated/openapi.json",
"-g",
"typescript-fetch",
"-o",
"${workspaceFolder}/web/src/lib/generated/onyx_api",
"--additional-properties=disallowAdditionalPropertiesIfNotPresent=false,legacyDiscriminatorBehavior=false,supportsES6=true",
]
},
{
"type": "shell",
"label": "Generate TypeScript Client (openapi-ts)",
"envFile": "${workspaceFolder}/.env",
"options": {
"cwd": "${workspaceFolder}/web"
},
"command": [
"npx"
],
"args": [
"openapi-typescript",
"../backend/generated/openapi.json",
"--output",
"./src/lib/generated/onyx-schema.ts",
]
},
{
"type": "shell",
"label": "Generate TypeScript Client (orval)",
"envFile": "${workspaceFolder}/.env",
"options": {
"cwd": "${workspaceFolder}/web"
},
"command": [
"npx"
],
"args": [
"orval",
"--config",
"orval.config.js",
]
}
]
}

327
AGENTS.md.template Normal file
View File

@@ -0,0 +1,327 @@
# AGENTS.md
This file provides guidance to Codex when working with code in this repository.
## KEY NOTES
- If you run into any missing python dependency errors, try running your command with `source backend/.venv/bin/activate` \
to assume the python venv.
- To make tests work, check the `.env` file at the root of the project to find an OpenAI key.
- If using `playwright` to explore the frontend, you can usually log in with username `a@test.com` and password
`a`. The app can be accessed at `http://localhost:3000`.
- You should assume that all Onyx services are running. To verify, you can check the `backend/log` directory to
make sure we see logs coming out from the relevant service.
- To connect to the Postgres database, use: `docker exec -it onyx-relational_db-1 psql -U postgres -c "<SQL>"`
- When making calls to the backend, always go through the frontend. E.g. make a call to `http://localhost:3000/api/persona` not `http://localhost:8080/api/persona`
- Put ALL db operations under the `backend/onyx/db` / `backend/ee/onyx/db` directories. Don't run queries
outside of those directories.
## Project Overview
**Onyx** (formerly Danswer) is an open-source Gen-AI and Enterprise Search platform that connects to company documents, apps, and people. It features a modular architecture with both Community Edition (MIT licensed) and Enterprise Edition offerings.
### Background Workers (Celery)
Onyx uses Celery for asynchronous task processing with multiple specialized workers:
#### Worker Types
1. **Primary Worker** (`celery_app.py`)
- Coordinates core background tasks and system-wide operations
- Handles connector management, document sync, pruning, and periodic checks
- Runs with 4 threads concurrency
- Tasks: connector deletion, vespa sync, pruning, LLM model updates, user file sync
2. **Docfetching Worker** (`docfetching`)
- Fetches documents from external data sources (connectors)
- Spawns docprocessing tasks for each document batch
- Implements watchdog monitoring for stuck connectors
- Configurable concurrency (default from env)
3. **Docprocessing Worker** (`docprocessing`)
- Processes fetched documents through the indexing pipeline:
- Upserts documents to PostgreSQL
- Chunks documents and adds contextual information
- Embeds chunks via model server
- Writes chunks to Vespa vector database
- Updates document metadata
- Configurable concurrency (default from env)
4. **Light Worker** (`light`)
- Handles lightweight, fast operations
- Tasks: vespa operations, document permissions sync, external group sync
- Higher concurrency for quick tasks
5. **Heavy Worker** (`heavy`)
- Handles resource-intensive operations
- Primary task: document pruning operations
- Runs with 4 threads concurrency
6. **KG Processing Worker** (`kg_processing`)
- Handles Knowledge Graph processing and clustering
- Builds relationships between documents
- Runs clustering algorithms
- Configurable concurrency
7. **Monitoring Worker** (`monitoring`)
- System health monitoring and metrics collection
- Monitors Celery queues, process memory, and system status
- Single thread (monitoring doesn't need parallelism)
- Cloud-specific monitoring tasks
8. **User File Processing Worker** (`user_file_processing`)
- Processes user-uploaded files
- Handles user file indexing and project synchronization
- Configurable concurrency
9. **Beat Worker** (`beat`)
- Celery's scheduler for periodic tasks
- Uses DynamicTenantScheduler for multi-tenant support
- Schedules tasks like:
- Indexing checks (every 15 seconds)
- Connector deletion checks (every 20 seconds)
- Vespa sync checks (every 20 seconds)
- Pruning checks (every 20 seconds)
- KG processing (every 60 seconds)
- Monitoring tasks (every 5 minutes)
- Cleanup tasks (hourly)
#### Worker Deployment Modes
Onyx supports two deployment modes for background workers, controlled by the `USE_LIGHTWEIGHT_BACKGROUND_WORKER` environment variable:
**Lightweight Mode** (default, `USE_LIGHTWEIGHT_BACKGROUND_WORKER=true`):
- Runs a single consolidated `background` worker that handles all background tasks:
- Pruning operations (from `heavy` worker)
- Knowledge graph processing (from `kg_processing` worker)
- Monitoring tasks (from `monitoring` worker)
- User file processing (from `user_file_processing` worker)
- Lower resource footprint (single worker process)
- Suitable for smaller deployments or development environments
- Default concurrency: 6 threads
**Standard Mode** (`USE_LIGHTWEIGHT_BACKGROUND_WORKER=false`):
- Runs separate specialized workers as documented above (heavy, kg_processing, monitoring, user_file_processing)
- Better isolation and scalability
- Can scale individual workers independently based on workload
- Suitable for production deployments with higher load
The deployment mode affects:
- **Backend**: Worker processes spawned by supervisord or dev scripts
- **Helm**: Which Kubernetes deployments are created
- **Dev Environment**: Which workers `dev_run_background_jobs.py` spawns
#### Key Features
- **Thread-based Workers**: All workers use thread pools (not processes) for stability
- **Tenant Awareness**: Multi-tenant support with per-tenant task isolation. There is a
middleware layer that automatically finds the appropriate tenant ID when sending tasks
via Celery Beat.
- **Task Prioritization**: High, Medium, Low priority queues
- **Monitoring**: Built-in heartbeat and liveness checking
- **Failure Handling**: Automatic retry and failure recovery mechanisms
- **Redis Coordination**: Inter-process communication via Redis
- **PostgreSQL State**: Task state and metadata stored in PostgreSQL
#### Important Notes
**Defining Tasks**:
- Always use `@shared_task` rather than `@celery_app`
- Put tasks under `background/celery/tasks/` or `ee/background/celery/tasks`
**Defining APIs**:
When creating new FastAPI APIs, do NOT use the `response_model` field. Instead, just type the
function.
**Testing Updates**:
If you make any updates to a celery worker and you want to test these changes, you will need
to ask me to restart the celery worker. There is no auto-restart on code-change mechanism.
### Code Quality
```bash
# Install and run pre-commit hooks
pre-commit install
pre-commit run --all-files
```
NOTE: Always make sure everything is strictly typed (both in Python and Typescript).
## Architecture Overview
### Technology Stack
- **Backend**: Python 3.11, FastAPI, SQLAlchemy, Alembic, Celery
- **Frontend**: Next.js 15+, React 18, TypeScript, Tailwind CSS
- **Database**: PostgreSQL with Redis caching
- **Search**: Vespa vector database
- **Auth**: OAuth2, SAML, multi-provider support
- **AI/ML**: LangChain, LiteLLM, multiple embedding models
### Directory Structure
```
backend/
├── onyx/
│ ├── auth/ # Authentication & authorization
│ ├── chat/ # Chat functionality & LLM interactions
│ ├── connectors/ # Data source connectors
│ ├── db/ # Database models & operations
│ ├── document_index/ # Vespa integration
│ ├── federated_connectors/ # External search connectors
│ ├── llm/ # LLM provider integrations
│ └── server/ # API endpoints & routers
├── ee/ # Enterprise Edition features
├── alembic/ # Database migrations
└── tests/ # Test suites
web/
├── src/app/ # Next.js app router pages
├── src/components/ # Reusable React components
└── src/lib/ # Utilities & business logic
```
## Database & Migrations
### Running Migrations
```bash
# Standard migrations
alembic upgrade head
# Multi-tenant (Enterprise)
alembic -n schema_private upgrade head
```
### Creating Migrations
```bash
# Create migration
alembic revision -m "description"
# Multi-tenant migration
alembic -n schema_private revision -m "description"
```
Write the migration manually and place it in the file that alembic creates when running the above command.
## Testing Strategy
There are 4 main types of tests within Onyx:
### Unit Tests
These should not assume any Onyx/external services are available to be called.
Interactions with the outside world should be mocked using `unittest.mock`. Generally, only
write these for complex, isolated modules e.g. `citation_processing.py`.
To run them:
```bash
python -m dotenv -f .vscode/.env run -- pytest -xv backend/tests/unit
```
### External Dependency Unit Tests
These tests assume that all external dependencies of Onyx are available and callable (e.g. Postgres, Redis,
MinIO/S3, Vespa are running + OpenAI can be called + any request to the internet is fine + etc.).
However, the actual Onyx containers are not running and with these tests we call the function to test directly.
We can also mock components/calls at will.
The goal with these tests are to minimize mocking while giving some flexibility to mock things that are flakey,
need strictly controlled behavior, or need to have their internal behavior validated (e.g. verify a function is called
with certain args, something that would be impossible with proper integration tests).
A great example of this type of test is `backend/tests/external_dependency_unit/connectors/confluence/test_confluence_group_sync.py`.
To run them:
```bash
python -m dotenv -f .vscode/.env run -- pytest backend/tests/external_dependency_unit
```
### Integration Tests
Standard integration tests. Every test in `backend/tests/integration` runs against a real Onyx deployment. We cannot
mock anything in these tests. Prefer writing integration tests (or External Dependency Unit Tests if mocking/internal
verification is necessary) over any other type of test.
Tests are parallelized at a directory level.
When writing integration tests, make sure to check the root `conftest.py` for useful fixtures + the `backend/tests/integration/common_utils` directory for utilities. Prefer (if one exists), calling the appropriate Manager
class in the utils over directly calling the APIs with a library like `requests`. Prefer using fixtures rather than
calling the utilities directly (e.g. do NOT create admin users with
`admin_user = UserManager.create(name="admin_user")`, instead use the `admin_user` fixture).
A great example of this type of test is `backend/tests/integration/dev_apis/test_simple_chat_api.py`.
To run them:
```bash
python -m dotenv -f .vscode/.env run -- pytest backend/tests/integration
```
### Playwright (E2E) Tests
These tests are an even more complete version of the Integration Tests mentioned above. Has all services of Onyx
running, *including* the Web Server.
Use these tests for anything that requires significant frontend <-> backend coordination.
Tests are located at `web/tests/e2e`. Tests are written in TypeScript.
To run them:
```bash
npx playwright test <TEST_NAME>
```
## Logs
When (1) writing integration tests or (2) doing live tests (e.g. curl / playwright) you can get access
to logs via the `backend/log/<service_name>_debug.log` file. All Onyx services (api_server, web_server, celery_X)
will be tailing their logs to this file.
## Security Considerations
- Never commit API keys or secrets to repository
- Use encrypted credential storage for connector credentials
- Follow RBAC patterns for new features
- Implement proper input validation with Pydantic models
- Use parameterized queries to prevent SQL injection
## AI/LLM Integration
- Multiple LLM providers supported via LiteLLM
- Configurable models per feature (chat, search, embeddings)
- Streaming support for real-time responses
- Token management and rate limiting
- Custom prompts and agent actions
## UI/UX Patterns
- Tailwind CSS with design system in `web/src/components/ui/`
- Radix UI and Headless UI for accessible components
- SWR for data fetching and caching
- Form validation with react-hook-form
- Error handling with popup notifications
## Creating a Plan
When creating a plan in the `plans` directory, make sure to include at least these elements:
**Issues to Address**
What the change is meant to do.
**Important Notes**
Things you come across in your research that are important to the implementation.
**Implementation strategy**
How you are going to make the changes happen. High level approach.
**Tests**
What unit (use rarely), external dependency unit, integration, and playwright tests you plan to write to
verify the correct behavior. Don't overtest. Usually, a given change only needs one type of test.
Do NOT include these: *Timeline*, *Rollback plan*
This is a minimal list - feel free to include more. Do NOT write code as part of your plan.
Keep it high level. You can reference certain files or functions though.
Before writing your plan, make sure to do research. Explore the relevant sections in the codebase.

332
CLAUDE.md.template Normal file
View File

@@ -0,0 +1,332 @@
# CLAUDE.md
This file provides guidance to Claude Code (claude.ai/code) when working with code in this repository.
## KEY NOTES
- If you run into any missing python dependency errors, try running your command with `source backend/.venv/bin/activate` \
to assume the python venv.
- To make tests work, check the `.env` file at the root of the project to find an OpenAI key.
- If using `playwright` to explore the frontend, you can usually log in with username `a@test.com` and password
`a`. The app can be accessed at `http://localhost:3000`.
- You should assume that all Onyx services are running. To verify, you can check the `backend/log` directory to
make sure we see logs coming out from the relevant service.
- To connect to the Postgres database, use: `docker exec -it onyx-relational_db-1 psql -U postgres -c "<SQL>"`
- When making calls to the backend, always go through the frontend. E.g. make a call to `http://localhost:3000/api/persona` not `http://localhost:8080/api/persona`
- Put ALL db operations under the `backend/onyx/db` / `backend/ee/onyx/db` directories. Don't run queries
outside of those directories.
## Project Overview
**Onyx** (formerly Danswer) is an open-source Gen-AI and Enterprise Search platform that connects to company documents, apps, and people. It features a modular architecture with both Community Edition (MIT licensed) and Enterprise Edition offerings.
### Background Workers (Celery)
Onyx uses Celery for asynchronous task processing with multiple specialized workers:
#### Worker Types
1. **Primary Worker** (`celery_app.py`)
- Coordinates core background tasks and system-wide operations
- Handles connector management, document sync, pruning, and periodic checks
- Runs with 4 threads concurrency
- Tasks: connector deletion, vespa sync, pruning, LLM model updates, user file sync
2. **Docfetching Worker** (`docfetching`)
- Fetches documents from external data sources (connectors)
- Spawns docprocessing tasks for each document batch
- Implements watchdog monitoring for stuck connectors
- Configurable concurrency (default from env)
3. **Docprocessing Worker** (`docprocessing`)
- Processes fetched documents through the indexing pipeline:
- Upserts documents to PostgreSQL
- Chunks documents and adds contextual information
- Embeds chunks via model server
- Writes chunks to Vespa vector database
- Updates document metadata
- Configurable concurrency (default from env)
4. **Light Worker** (`light`)
- Handles lightweight, fast operations
- Tasks: vespa operations, document permissions sync, external group sync
- Higher concurrency for quick tasks
5. **Heavy Worker** (`heavy`)
- Handles resource-intensive operations
- Primary task: document pruning operations
- Runs with 4 threads concurrency
6. **KG Processing Worker** (`kg_processing`)
- Handles Knowledge Graph processing and clustering
- Builds relationships between documents
- Runs clustering algorithms
- Configurable concurrency
7. **Monitoring Worker** (`monitoring`)
- System health monitoring and metrics collection
- Monitors Celery queues, process memory, and system status
- Single thread (monitoring doesn't need parallelism)
- Cloud-specific monitoring tasks
8. **User File Processing Worker** (`user_file_processing`)
- Processes user-uploaded files
- Handles user file indexing and project synchronization
- Configurable concurrency
9. **Beat Worker** (`beat`)
- Celery's scheduler for periodic tasks
- Uses DynamicTenantScheduler for multi-tenant support
- Schedules tasks like:
- Indexing checks (every 15 seconds)
- Connector deletion checks (every 20 seconds)
- Vespa sync checks (every 20 seconds)
- Pruning checks (every 20 seconds)
- KG processing (every 60 seconds)
- Monitoring tasks (every 5 minutes)
- Cleanup tasks (hourly)
#### Worker Deployment Modes
Onyx supports two deployment modes for background workers, controlled by the `USE_LIGHTWEIGHT_BACKGROUND_WORKER` environment variable:
**Lightweight Mode** (default, `USE_LIGHTWEIGHT_BACKGROUND_WORKER=true`):
- Runs a single consolidated `background` worker that handles all background tasks:
- Light worker tasks (Vespa operations, permissions sync, deletion)
- Document processing (indexing pipeline)
- Document fetching (connector data retrieval)
- Pruning operations (from `heavy` worker)
- Knowledge graph processing (from `kg_processing` worker)
- Monitoring tasks (from `monitoring` worker)
- User file processing (from `user_file_processing` worker)
- Lower resource footprint (fewer worker processes)
- Suitable for smaller deployments or development environments
- Default concurrency: 20 threads (increased to handle combined workload)
**Standard Mode** (`USE_LIGHTWEIGHT_BACKGROUND_WORKER=false`):
- Runs separate specialized workers as documented above (light, docprocessing, docfetching, heavy, kg_processing, monitoring, user_file_processing)
- Better isolation and scalability
- Can scale individual workers independently based on workload
- Suitable for production deployments with higher load
The deployment mode affects:
- **Backend**: Worker processes spawned by supervisord or dev scripts
- **Helm**: Which Kubernetes deployments are created
- **Dev Environment**: Which workers `dev_run_background_jobs.py` spawns
#### Key Features
- **Thread-based Workers**: All workers use thread pools (not processes) for stability
- **Tenant Awareness**: Multi-tenant support with per-tenant task isolation. There is a
middleware layer that automatically finds the appropriate tenant ID when sending tasks
via Celery Beat.
- **Task Prioritization**: High, Medium, Low priority queues
- **Monitoring**: Built-in heartbeat and liveness checking
- **Failure Handling**: Automatic retry and failure recovery mechanisms
- **Redis Coordination**: Inter-process communication via Redis
- **PostgreSQL State**: Task state and metadata stored in PostgreSQL
#### Important Notes
**Defining Tasks**:
- Always use `@shared_task` rather than `@celery_app`
- Put tasks under `background/celery/tasks/` or `ee/background/celery/tasks`
**Defining APIs**:
When creating new FastAPI APIs, do NOT use the `response_model` field. Instead, just type the
function.
**Testing Updates**:
If you make any updates to a celery worker and you want to test these changes, you will need
to ask me to restart the celery worker. There is no auto-restart on code-change mechanism.
### Code Quality
```bash
# Install and run pre-commit hooks
pre-commit install
pre-commit run --all-files
```
NOTE: Always make sure everything is strictly typed (both in Python and Typescript).
## Architecture Overview
### Technology Stack
- **Backend**: Python 3.11, FastAPI, SQLAlchemy, Alembic, Celery
- **Frontend**: Next.js 15+, React 18, TypeScript, Tailwind CSS
- **Database**: PostgreSQL with Redis caching
- **Search**: Vespa vector database
- **Auth**: OAuth2, SAML, multi-provider support
- **AI/ML**: LangChain, LiteLLM, multiple embedding models
### Directory Structure
```
backend/
├── onyx/
│ ├── auth/ # Authentication & authorization
│ ├── chat/ # Chat functionality & LLM interactions
│ ├── connectors/ # Data source connectors
│ ├── db/ # Database models & operations
│ ├── document_index/ # Vespa integration
│ ├── federated_connectors/ # External search connectors
│ ├── llm/ # LLM provider integrations
│ └── server/ # API endpoints & routers
├── ee/ # Enterprise Edition features
├── alembic/ # Database migrations
└── tests/ # Test suites
web/
├── src/app/ # Next.js app router pages
├── src/components/ # Reusable React components
└── src/lib/ # Utilities & business logic
```
## Database & Migrations
### Running Migrations
```bash
# Standard migrations
alembic upgrade head
# Multi-tenant (Enterprise)
alembic -n schema_private upgrade head
```
### Creating Migrations
```bash
# Create migration
alembic revision -m "description"
# Multi-tenant migration
alembic -n schema_private revision -m "description"
```
Write the migration manually and place it in the file that alembic creates when running the above command.
## Testing Strategy
First, you must activate the virtual environment with `source .venv/bin/activate`.
There are 4 main types of tests within Onyx:
### Unit Tests
These should not assume any Onyx/external services are available to be called.
Interactions with the outside world should be mocked using `unittest.mock`. Generally, only
write these for complex, isolated modules e.g. `citation_processing.py`.
To run them:
```bash
pytest -xv backend/tests/unit
```
### External Dependency Unit Tests
These tests assume that all external dependencies of Onyx are available and callable (e.g. Postgres, Redis,
MinIO/S3, Vespa are running + OpenAI can be called + any request to the internet is fine + etc.).
However, the actual Onyx containers are not running and with these tests we call the function to test directly.
We can also mock components/calls at will.
The goal with these tests are to minimize mocking while giving some flexibility to mock things that are flakey,
need strictly controlled behavior, or need to have their internal behavior validated (e.g. verify a function is called
with certain args, something that would be impossible with proper integration tests).
A great example of this type of test is `backend/tests/external_dependency_unit/connectors/confluence/test_confluence_group_sync.py`.
To run them:
```bash
python -m dotenv -f .vscode/.env run -- pytest backend/tests/external_dependency_unit
```
### Integration Tests
Standard integration tests. Every test in `backend/tests/integration` runs against a real Onyx deployment. We cannot
mock anything in these tests. Prefer writing integration tests (or External Dependency Unit Tests if mocking/internal
verification is necessary) over any other type of test.
Tests are parallelized at a directory level.
When writing integration tests, make sure to check the root `conftest.py` for useful fixtures + the `backend/tests/integration/common_utils` directory for utilities. Prefer (if one exists), calling the appropriate Manager
class in the utils over directly calling the APIs with a library like `requests`. Prefer using fixtures rather than
calling the utilities directly (e.g. do NOT create admin users with
`admin_user = UserManager.create(name="admin_user")`, instead use the `admin_user` fixture).
A great example of this type of test is `backend/tests/integration/dev_apis/test_simple_chat_api.py`.
To run them:
```bash
python -m dotenv -f .vscode/.env run -- pytest backend/tests/integration
```
### Playwright (E2E) Tests
These tests are an even more complete version of the Integration Tests mentioned above. Has all services of Onyx
running, *including* the Web Server.
Use these tests for anything that requires significant frontend <-> backend coordination.
Tests are located at `web/tests/e2e`. Tests are written in TypeScript.
To run them:
```bash
npx playwright test <TEST_NAME>
```
## Logs
When (1) writing integration tests or (2) doing live tests (e.g. curl / playwright) you can get access
to logs via the `backend/log/<service_name>_debug.log` file. All Onyx services (api_server, web_server, celery_X)
will be tailing their logs to this file.
## Security Considerations
- Never commit API keys or secrets to repository
- Use encrypted credential storage for connector credentials
- Follow RBAC patterns for new features
- Implement proper input validation with Pydantic models
- Use parameterized queries to prevent SQL injection
## AI/LLM Integration
- Multiple LLM providers supported via LiteLLM
- Configurable models per feature (chat, search, embeddings)
- Streaming support for real-time responses
- Token management and rate limiting
- Custom prompts and agent actions
## UI/UX Patterns
- Tailwind CSS with design system in `web/src/components/ui/`
- Radix UI and Headless UI for accessible components
- SWR for data fetching and caching
- Form validation with react-hook-form
- Error handling with popup notifications
## Creating a Plan
When creating a plan in the `plans` directory, make sure to include at least these elements:
**Issues to Address**
What the change is meant to do.
**Important Notes**
Things you come across in your research that are important to the implementation.
**Implementation strategy**
How you are going to make the changes happen. High level approach.
**Tests**
What unit (use rarely), external dependency unit, integration, and playwright tests you plan to write to
verify the correct behavior. Don't overtest. Usually, a given change only needs one type of test.
Do NOT include these: *Timeline*, *Rollback plan*
This is a minimal list - feel free to include more. Do NOT write code as part of your plan.
Keep it high level. You can reference certain files or functions though.
Before writing your plan, make sure to do research. Explore the relevant sections in the codebase.

View File

@@ -1,4 +1,4 @@
<!-- DANSWER_METADATA={"link": "https://github.com/onyx-dot-app/onyx/blob/main/CONTRIBUTING.md"} -->
<!-- ONYX_METADATA={"link": "https://github.com/onyx-dot-app/onyx/blob/main/CONTRIBUTING.md"} -->
# Contributing to Onyx
@@ -12,9 +12,8 @@ As an open source project in a rapidly changing space, we welcome all contributi
The [GitHub Issues](https://github.com/onyx-dot-app/onyx/issues) page is a great place to start for contribution ideas.
To ensure that your contribution is aligned with the project's direction, please reach out to Hagen (or any other maintainer) on the Onyx team
via [Slack](https://join.slack.com/t/onyx-dot-app/shared_invite/zt-2twesxdr6-5iQitKZQpgq~hYIZ~dv3KA) /
[Discord](https://discord.gg/TDJ59cGV2X) or [email](mailto:founders@onyx.app).
To ensure that your contribution is aligned with the project's direction, please reach out to any maintainer on the Onyx team
via [Discord](https://discord.gg/4NA5SbzrWb) or [email](mailto:hello@onyx.app).
Issues that have been explicitly approved by the maintainers (aligned with the direction of the project)
will be marked with the `approved by maintainers` label.
@@ -28,8 +27,7 @@ Your input is vital to making sure that Onyx moves in the right direction.
Before starting on implementation, please raise a GitHub issue.
Also, always feel free to message the founders (Chris Weaver / Yuhong Sun) on
[Slack](https://join.slack.com/t/onyx-dot-app/shared_invite/zt-2twesxdr6-5iQitKZQpgq~hYIZ~dv3KA) /
[Discord](https://discord.gg/TDJ59cGV2X) directly about anything at all.
[Discord](https://discord.gg/4NA5SbzrWb) directly about anything at all.
### Contributing Code
@@ -46,9 +44,7 @@ Our goal is to make contributing as easy as possible. If you run into any issues
That way we can help future contributors and users can avoid the same issue.
We also have support channels and generally interesting discussions on our
[Slack](https://join.slack.com/t/onyx-dot-app/shared_invite/zt-2twesxdr6-5iQitKZQpgq~hYIZ~dv3KA)
and
[Discord](https://discord.gg/TDJ59cGV2X).
[Discord](https://discord.gg/4NA5SbzrWb).
We would love to see you there!
@@ -59,6 +55,7 @@ Onyx being a fully functional app, relies on some external software, specificall
- [Postgres](https://www.postgresql.org/) (Relational DB)
- [Vespa](https://vespa.ai/) (Vector DB/Search Engine)
- [Redis](https://redis.io/) (Cache)
- [MinIO](https://min.io/) (File Store)
- [Nginx](https://nginx.org/) (Not needed for development flows generally)
> **Note:**
@@ -83,10 +80,6 @@ python -m venv .venv
source .venv/bin/activate
```
> **Note:**
> This virtual environment MUST NOT be set up WITHIN the onyx directory if you plan on using mypy within certain IDEs.
> For simplicity, we recommend setting up the virtual environment outside of the onyx directory.
_For Windows, activate the virtual environment using Command Prompt:_
```bash
@@ -102,10 +95,21 @@ If using PowerShell, the command slightly differs:
Install the required python dependencies:
```bash
pip install -r onyx/backend/requirements/default.txt
pip install -r onyx/backend/requirements/dev.txt
pip install -r onyx/backend/requirements/ee.txt
pip install -r onyx/backend/requirements/model_server.txt
pip install -r backend/requirements/combined.txt
```
or
```bash
pip install -r backend/requirements/default.txt
pip install -r backend/requirements/dev.txt
pip install -r backend/requirements/ee.txt
pip install -r backend/requirements/model_server.txt
```
Fix vscode/cursor auto-imports:
```bash
pip install -e .
```
Install Playwright for Python (headless browser required by the Web Connector)
@@ -120,8 +124,15 @@ You may have to deactivate and reactivate your virtualenv for `playwright` to ap
#### Frontend: Node dependencies
Install [Node.js and npm](https://docs.npmjs.com/downloading-and-installing-node-js-and-npm) for the frontend.
Once the above is done, navigate to `onyx/web` run:
Onyx uses Node v22.20.0. We highly recommend you use [Node Version Manager (nvm)](https://github.com/nvm-sh/nvm)
to manage your Node installations. Once installed, you can run
```bash
nvm install 22 && nvm use 22
node -v # verify your active version
```
Navigate to `onyx/web` and run:
```bash
npm i
@@ -132,8 +143,6 @@ npm i
### Backend
For the backend, you'll need to setup pre-commit hooks (black / reorder-python-imports).
First, install pre-commit (if you don't have it already) following the instructions
[here](https://pre-commit.com/#installation).
With the virtual environment active, install the pre-commit library with:
@@ -153,15 +162,17 @@ To run the mypy checks manually, run `python -m mypy .` from the `onyx/backend`
### Web
We use `prettier` for formatting. The desired version (2.8.8) will be installed via a `npm i` from the `onyx/web` directory.
We use `prettier` for formatting. The desired version will be installed via a `npm i` from the `onyx/web` directory.
To run the formatter, use `npx prettier --write .` from the `onyx/web` directory.
Please double check that prettier passes before creating a pull request.
Pre-commit will also run prettier automatically on files you've recently touched. If re-formatted, your commit will fail.
Re-stage your changes and commit again.
# Running the application for development
## Developing using VSCode Debugger (recommended)
We highly recommend using VSCode debugger for development.
**We highly recommend using VSCode debugger for development.**
See [CONTRIBUTING_VSCODE.md](./CONTRIBUTING_VSCODE.md) for more details.
Otherwise, you can follow the instructions below to run the application for development.
@@ -171,10 +182,10 @@ Otherwise, you can follow the instructions below to run the application for deve
You will need Docker installed to run these containers.
First navigate to `onyx/deployment/docker_compose`, then start up Postgres/Vespa/Redis with:
First navigate to `onyx/deployment/docker_compose`, then start up Postgres/Vespa/Redis/MinIO with:
```bash
docker compose -f docker-compose.dev.yml -p onyx-stack up -d index relational_db cache
docker compose up -d index relational_db cache minio
```
(index refers to Vespa, relational_db refers to Postgres, and cache refers to Redis)
@@ -256,7 +267,7 @@ You can run the full Onyx application stack from pre-built images including all
Navigate to `onyx/deployment/docker_compose` and run:
```bash
docker compose -f docker-compose.dev.yml -p onyx-stack up -d
docker compose up -d
```
After Docker pulls and starts these containers, navigate to `http://localhost:3000` to use Onyx.
@@ -264,7 +275,7 @@ After Docker pulls and starts these containers, navigate to `http://localhost:30
If you want to make changes to Onyx and run those changes in Docker, you can also build a local version of the Onyx container images that incorporates your changes like so:
```bash
docker compose -f docker-compose.dev.yml -p onyx-stack up -d --build
docker compose up -d --build
```

View File

@@ -5,7 +5,7 @@ This guide explains how to set up and use VSCode's debugging capabilities with t
## Initial Setup
1. **Environment Setup**:
- Copy `.vscode/.env.template` to `.vscode/.env`
- Copy `.vscode/env_template.txt` to `.vscode/.env`
- Fill in the necessary environment variables in `.vscode/.env`
2. **launch.json**:
- Copy `.vscode/launch.template.jsonc` to `.vscode/launch.json`
@@ -17,10 +17,12 @@ Before starting, make sure the Docker Daemon is running.
1. Open the Debug view in VSCode (Cmd+Shift+D on macOS)
2. From the dropdown at the top, select "Clear and Restart External Volumes and Containers" and press the green play button
3. From the dropdown at the top, select "Run All Onyx Services" and press the green play button
4. CD into web, run "npm i" followed by npm run dev.
5. Now, you can navigate to onyx in your browser (default is http://localhost:3000) and start using the app
6. You can set breakpoints by clicking to the left of line numbers to help debug while the app is running
7. Use the debug toolbar to step through code, inspect variables, etc.
4. Now, you can navigate to onyx in your browser (default is http://localhost:3000) and start using the app
5. You can set breakpoints by clicking to the left of line numbers to help debug while the app is running
6. Use the debug toolbar to step through code, inspect variables, etc.
Note: Clear and Restart External Volumes and Containers will reset your postgres and Vespa (relational-db and index).
Only run this if you are okay with wiping your data.
## Features

177
README.md
View File

@@ -1,135 +1,108 @@
<!-- DANSWER_METADATA={"link": "https://github.com/onyx-dot-app/onyx/blob/main/README.md"} -->
<a name="readme-top"></a>
<h2 align="center">
<a href="https://www.onyx.app/"> <img width="50%" src="https://github.com/onyx-dot-app/onyx/blob/logo/OnyxLogoCropped.jpg?raw=true)" /></a>
<a href="https://www.onyx.app/?utm_source=onyx_repo&utm_medium=github&utm_campaign=readme"> <img width="50%" src="https://github.com/onyx-dot-app/onyx/blob/logo/OnyxLogoCropped.jpg?raw=true" /></a>
</h2>
<p align="center">
<p align="center">Open Source Gen-AI + Enterprise Search.</p>
<p align="center">Open Source AI Platform</p>
<p align="center">
<a href="https://docs.onyx.app/" target="_blank">
<img src="https://img.shields.io/badge/docs-view-blue" alt="Documentation">
</a>
<a href="https://join.slack.com/t/onyx-dot-app/shared_invite/zt-2twesxdr6-5iQitKZQpgq~hYIZ~dv3KA" target="_blank">
<img src="https://img.shields.io/badge/slack-join-blue.svg?logo=slack" alt="Slack">
</a>
<a href="https://discord.gg/TDJ59cGV2X" target="_blank">
<img src="https://img.shields.io/badge/discord-join-blue.svg?logo=discord&logoColor=white" alt="Discord">
</a>
<a href="https://github.com/onyx-dot-app/onyx/blob/main/README.md" target="_blank">
<img src="https://img.shields.io/static/v1?label=license&message=MIT&color=blue" alt="License">
</a>
<a href="https://discord.gg/TDJ59cGV2X" target="_blank">
<img src="https://img.shields.io/badge/discord-join-blue.svg?logo=discord&logoColor=white" alt="Discord" />
</a>
<a href="https://docs.onyx.app/?utm_source=onyx_repo&utm_medium=github&utm_campaign=readme" target="_blank">
<img src="https://img.shields.io/badge/docs-view-blue" alt="Documentation" />
</a>
<a href="https://www.onyx.app/?utm_source=onyx_repo&utm_medium=github&utm_campaign=readme" target="_blank">
<img src="https://img.shields.io/website?url=https://www.onyx.app&up_message=visit&up_color=blue" alt="Documentation" />
</a>
<a href="https://github.com/onyx-dot-app/onyx/blob/main/LICENSE" target="_blank">
<img src="https://img.shields.io/static/v1?label=license&message=MIT&color=blue" alt="License" />
</a>
</p>
<strong>[Onyx](https://www.onyx.app/)</strong> (formerly Danswer) is the AI Assistant connected to your company's docs, apps, and people.
Onyx provides a Chat interface and plugs into any LLM of your choice. Onyx can be deployed anywhere and for any
scale - on a laptop, on-premise, or to cloud. Since you own the deployment, your user data and chats are fully in your
own control. Onyx is dual Licensed with most of it under MIT license and designed to be modular and easily extensible. The system also comes fully ready
for production usage with user authentication, role management (admin/basic users), chat persistence, and a UI for
configuring AI Assistants.
<p align="center">
<a href="https://trendshift.io/repositories/12516" target="_blank">
<img src="https://trendshift.io/api/badge/repositories/12516" alt="onyx-dot-app/onyx | Trendshift" style="width: 250px; height: 55px;" />
</a>
</p>
Onyx also serves as a Enterprise Search across all common workplace tools such as Slack, Google Drive, Confluence, etc.
By combining LLMs and team specific knowledge, Onyx becomes a subject matter expert for the team. Imagine ChatGPT if
it had access to your team's unique knowledge! It enables questions such as "A customer wants feature X, is this already
supported?" or "Where's the pull request for feature Y?"
<h3>Usage</h3>
**[Onyx](https://www.onyx.app/?utm_source=onyx_repo&utm_medium=github&utm_campaign=readme)** is a feature-rich, self-hostable Chat UI that works with any LLM. It is easy to deploy and can run in a completely airgapped environment.
Onyx Web App:
Onyx comes loaded with advanced features like Agents, Web Search, RAG, MCP, Deep Research, Connectors to 40+ knowledge sources, and more.
https://github.com/onyx-dot-app/onyx/assets/32520769/563be14c-9304-47b5-bf0a-9049c2b6f410
> [!TIP]
> Run Onyx with one command (or see deployment section below):
> ```
> curl -fsSL https://raw.githubusercontent.com/onyx-dot-app/onyx/main/deployment/docker_compose/install.sh > install.sh && chmod +x install.sh && ./install.sh
> ```
Or, plug Onyx into your existing Slack workflows (more integrations to come 😁):
****
https://github.com/onyx-dot-app/onyx/assets/25087905/3e19739b-d178-4371-9a38-011430bdec1b
![Onyx Chat Silent Demo](https://github.com/onyx-dot-app/onyx/releases/download/v0.21.1/OnyxChatSilentDemo.gif)
For more details on the Admin UI to manage connectors and users, check out our
<strong><a href="https://www.youtube.com/watch?v=geNzY1nbCnU">Full Video Demo</a></strong>!
## Deployment
Onyx can easily be run locally (even on a laptop) or deployed on a virtual machine with a single
`docker compose` command. Checkout our [docs](https://docs.onyx.app/quickstart) to learn more.
## ⭐ Features
- **🤖 Custom Agents:** Build AI Agents with unique instructions, knowledge and actions.
- **🌍 Web Search:** Browse the web with Google PSE, Exa, and Serper as well as an in-house scraper or Firecrawl.
- **🔍 RAG:** Best in class hybrid-search + knowledge graph for uploaded files and ingested documents from connectors.
- **🔄 Connectors:** Pull knowledge, metadata, and access information from over 40 applications.
- **🔬 Deep Research:** Get in depth answers with an agentic multi-step search.
- **▶️ Actions & MCP:** Give AI Agents the ability to interact with external systems.
- **💻 Code Interpreter:** Execute code to analyze data, render graphs and create files.
- **🎨 Image Generation:** Generate images based on user prompts.
- **👥 Collaboration:** Chat sharing, feedback gathering, user management, usage analytics, and more.
We also have built-in support for deployment on Kubernetes. Files for that can be found [here](https://github.com/onyx-dot-app/onyx/tree/main/deployment/kubernetes).
Onyx works with all LLMs (like OpenAI, Anthropic, Gemini, etc.) and self-hosted LLMs (like Ollama, vLLM, etc.)
To learn more about the features, check out our [documentation](https://docs.onyx.app/welcome?utm_source=onyx_repo&utm_medium=github&utm_campaign=readme)!
## 🚀 Deployment
Onyx supports deployments in Docker, Kubernetes, Terraform, along with guides for major cloud providers.
See guides below:
- [Docker](https://docs.onyx.app/deployment/local/docker?utm_source=onyx_repo&utm_medium=github&utm_campaign=readme) or [Quickstart](https://docs.onyx.app/deployment/getting_started/quickstart?utm_source=onyx_repo&utm_medium=github&utm_campaign=readme) (best for most users)
- [Kubernetes](https://docs.onyx.app/deployment/local/kubernetes?utm_source=onyx_repo&utm_medium=github&utm_campaign=readme) (best for large teams)
- [Terraform](https://docs.onyx.app/deployment/local/terraform?utm_source=onyx_repo&utm_medium=github&utm_campaign=readme) (best for teams already using Terraform)
- Cloud specific guides (best if specifically using [AWS EKS](https://docs.onyx.app/deployment/cloud/aws/eks?utm_source=onyx_repo&utm_medium=github&utm_campaign=readme), [Azure VMs](https://docs.onyx.app/deployment/cloud/azure?utm_source=onyx_repo&utm_medium=github&utm_campaign=readme), etc.)
> [!TIP]
> **To try Onyx for free without deploying, check out [Onyx Cloud](https://cloud.onyx.app/signup?utm_source=onyx_repo&utm_medium=github&utm_campaign=readme)**.
## 🔍 Other Notable Benefits
Onyx is built for teams of all sizes, from individual users to the largest global enterprises.
- **Enterprise Search**: far more than simple RAG, Onyx has custom indexing and retrieval that remains performant and accurate for scales of up to tens of millions of documents.
- **Security**: SSO (OIDC/SAML/OAuth2), RBAC, encryption of credentials, etc.
- **Management UI**: different user roles such as basic, curator, and admin.
- **Document Permissioning**: mirrors user access from external apps for RAG use cases.
## 💃 Main Features
- Chat UI with the ability to select documents to chat with.
- Create custom AI Assistants with different prompts and backing knowledge sets.
- Connect Onyx with LLM of your choice (self-host for a fully airgapped solution).
- Document Search + AI Answers for natural language queries.
- Connectors to all common workplace tools like Google Drive, Confluence, Slack, etc.
- Slack integration to get answers and search results directly in Slack.
## 🚧 Roadmap
To see ongoing and upcoming projects, check out our [roadmap](https://github.com/orgs/onyx-dot-app/projects/2)!
- Chat/Prompt sharing with specific teammates and user groups.
- Multimodal model support, chat with images, video etc.
- Choosing between LLMs and parameters during chat session.
- Tool calling and agent configurations options.
- Organizational understanding and ability to locate and suggest experts from your team.
## Other Notable Benefits of Onyx
- User Authentication with document level access management.
- Best in class Hybrid Search across all sources (BM-25 + prefix aware embedding models).
- Admin Dashboard to configure connectors, document-sets, access, etc.
- Custom deep learning models + learn from user feedback.
- Easy deployment and ability to host Onyx anywhere of your choosing.
## 🔌 Connectors
Efficiently pulls the latest changes from:
- Slack
- GitHub
- Google Drive
- Confluence
- Jira
- Zendesk
- Gmail
- Notion
- Gong
- Slab
- Linear
- Productboard
- Guru
- Bookstack
- Document360
- Sharepoint
- Hubspot
- Local Files
- Websites
- And more ...
## 📚 Editions
## 📚 Licensing
There are two editions of Onyx:
- Onyx Community Edition (CE) is available freely under the MIT Expat license. This version has ALL the core features discussed above. This is the version of Onyx you will get if you follow the Deployment guide above.
- Onyx Enterprise Edition (EE) includes extra features that are primarily useful for larger organizations. Specifically, this includes:
- Single Sign-On (SSO), with support for both SAML and OIDC
- Role-based access control
- Document permission inheritance from connected sources
- Usage analytics and query history accessible to admins
- Whitelabeling
- API key authentication
- Encryption of secrets
- And many more! Checkout [our website](https://www.onyx.app/) for the latest.
- Onyx Community Edition (CE) is available freely under the MIT license.
- Onyx Enterprise Edition (EE) includes extra features that are primarily useful for larger organizations.
For feature details, check out [our website](https://www.onyx.app/pricing?utm_source=onyx_repo&utm_medium=github&utm_campaign=readme).
## 👪 Community
Join our open source community on **[Discord](https://discord.gg/TDJ59cGV2X)**!
To try the Onyx Enterprise Edition:
1. Checkout our [Cloud product](https://cloud.onyx.app/signup).
2. For self-hosting, contact us at [founders@onyx.app](mailto:founders@onyx.app) or book a call with us on our [Cal](https://cal.com/team/danswer/founders).
## 💡 Contributing
Looking to contribute? Please check out the [Contribution Guide](CONTRIBUTING.md) for more details.
## ⭐Star History
[![Star History Chart](https://api.star-history.com/svg?repos=onyx-dot-app/onyx&type=Date)](https://star-history.com/#onyx-dot-app/onyx&Date)

4
backend/.gitignore vendored
View File

@@ -9,4 +9,6 @@ api_keys.py
vespa-app.zip
dynamic_config_storage/
celerybeat-schedule*
onyx/connectors/salesforce/data/
onyx/connectors/salesforce/data/
.test.env
/generated

View File

@@ -7,15 +7,20 @@ have a contract or agreement with DanswerAI, you are not permitted to use the En
Edition features outside of personal development or testing purposes. Please reach out to \
founders@onyx.app for more information. Please visit https://github.com/onyx-dot-app/onyx"
# Default ONYX_VERSION, typically overriden during builds by GitHub Actions.
ARG ONYX_VERSION=0.8-dev
# DO_NOT_TRACK is used to disable telemetry for Unstructured
ENV ONYX_VERSION=${ONYX_VERSION} \
DANSWER_RUNNING_IN_DOCKER="true" \
DO_NOT_TRACK="true"
ENV DANSWER_RUNNING_IN_DOCKER="true" \
DO_NOT_TRACK="true" \
PLAYWRIGHT_BROWSERS_PATH="/app/.cache/ms-playwright"
# Create non-root user for security best practices
RUN groupadd -g 1001 onyx && \
useradd -u 1001 -g onyx -m -s /bin/bash onyx && \
mkdir -p /var/log/onyx && \
chmod 755 /var/log/onyx && \
chown onyx:onyx /var/log/onyx
COPY --from=ghcr.io/astral-sh/uv:0.9.9 /uv /uvx /bin/
RUN echo "ONYX_VERSION: ${ONYX_VERSION}"
# Install system dependencies
# cmake needed for psycopg (postgres)
# libpq-dev needed for psycopg (postgres)
@@ -28,14 +33,16 @@ RUN apt-get update && \
curl \
zip \
ca-certificates \
libgnutls30=3.7.9-2+deb12u3 \
libblkid1=2.38.1-5+deb12u1 \
libmount1=2.38.1-5+deb12u1 \
libsmartcols1=2.38.1-5+deb12u1 \
libuuid1=2.38.1-5+deb12u1 \
libgnutls30 \
libblkid1 \
libmount1 \
libsmartcols1 \
libuuid1 \
libxmlsec1-dev \
pkg-config \
gcc && \
gcc \
nano \
vim && \
rm -rf /var/lib/apt/lists/* && \
apt-get clean
@@ -45,22 +52,20 @@ RUN apt-get update && \
# Remove py which is pulled in by retry, py is not needed and is a CVE
COPY ./requirements/default.txt /tmp/requirements.txt
COPY ./requirements/ee.txt /tmp/ee-requirements.txt
RUN pip install --no-cache-dir --upgrade \
--retries 5 \
--timeout 30 \
RUN uv pip install --system --no-cache-dir --upgrade \
-r /tmp/requirements.txt \
-r /tmp/ee-requirements.txt && \
pip uninstall -y py && \
playwright install chromium && \
playwright install-deps chromium && \
ln -s /usr/local/bin/supervisord /usr/bin/supervisord
# Cleanup for CVEs and size reduction
# https://github.com/tornadoweb/tornado/issues/3107
# xserver-common and xvfb included by playwright installation but not needed after
# perl-base is part of the base Python Debian image but not needed for Onyx functionality
# perl-base could only be removed with --allow-remove-essential
RUN apt-get update && \
chown -R onyx:onyx /app && \
ln -s /usr/local/bin/supervisord /usr/bin/supervisord && \
# Cleanup for CVEs and size reduction
# https://github.com/tornadoweb/tornado/issues/3107
# xserver-common and xvfb included by playwright installation but not needed after
# perl-base is part of the base Python Debian image but not needed for Onyx functionality
# perl-base could only be removed with --allow-remove-essential
apt-get update && \
apt-get remove -y --allow-remove-essential \
perl-base \
xserver-common \
@@ -70,12 +75,16 @@ RUN apt-get update && \
libxmlsec1-dev \
pkg-config \
gcc && \
apt-get install -y libxmlsec1-openssl && \
# Install here to avoid some packages being cleaned up above
apt-get install -y \
libxmlsec1-openssl \
# Install postgresql-client for easy manual tests
postgresql-client && \
apt-get autoremove -y && \
rm -rf /var/lib/apt/lists/* && \
rm -rf ~/.cache/uv /tmp/*.txt && \
rm -f /usr/local/lib/python3.11/site-packages/tornado/test/test.key
# Pre-downloading models for setups with limited egress
RUN python -c "from tokenizers import Tokenizer; \
Tokenizer.from_pretrained('nomic-ai/nomic-embed-text-v1')"
@@ -83,32 +92,44 @@ Tokenizer.from_pretrained('nomic-ai/nomic-embed-text-v1')"
# Pre-downloading NLTK for setups with limited egress
RUN python -c "import nltk; \
nltk.download('stopwords', quiet=True); \
nltk.download('punkt', quiet=True);"
nltk.download('punkt_tab', quiet=True);"
# nltk.download('wordnet', quiet=True); introduce this back if lemmatization is needed
# Pre-downloading tiktoken for setups with limited egress
RUN python -c "import tiktoken; \
tiktoken.get_encoding('cl100k_base')"
# Set up application files
WORKDIR /app
# Enterprise Version Files
COPY ./ee /app/ee
COPY --chown=onyx:onyx ./ee /app/ee
COPY supervisord.conf /etc/supervisor/conf.d/supervisord.conf
# Set up application files
COPY ./onyx /app/onyx
COPY ./shared_configs /app/shared_configs
COPY ./alembic /app/alembic
COPY ./alembic_tenants /app/alembic_tenants
COPY ./alembic.ini /app/alembic.ini
COPY --chown=onyx:onyx ./onyx /app/onyx
COPY --chown=onyx:onyx ./shared_configs /app/shared_configs
COPY --chown=onyx:onyx ./alembic /app/alembic
COPY --chown=onyx:onyx ./alembic_tenants /app/alembic_tenants
COPY --chown=onyx:onyx ./alembic.ini /app/alembic.ini
COPY supervisord.conf /usr/etc/supervisord.conf
COPY --chown=onyx:onyx ./static /app/static
# Escape hatch
COPY ./scripts/force_delete_connector_by_id.py /app/scripts/force_delete_connector_by_id.py
# Escape hatch scripts
COPY --chown=onyx:onyx ./scripts/debugging /app/scripts/debugging
COPY --chown=onyx:onyx ./scripts/force_delete_connector_by_id.py /app/scripts/force_delete_connector_by_id.py
COPY --chown=onyx:onyx ./scripts/supervisord_entrypoint.sh /app/scripts/supervisord_entrypoint.sh
RUN chmod +x /app/scripts/supervisord_entrypoint.sh
# Put logo in assets
COPY ./assets /app/assets
COPY --chown=onyx:onyx ./assets /app/assets
ENV PYTHONPATH=/app
# Default ONYX_VERSION, typically overriden during builds by GitHub Actions.
ARG ONYX_VERSION=0.0.0-dev
ENV ONYX_VERSION=${ONYX_VERSION}
# Default command which does nothing
# This container is used by api server and background which specify their own CMD
CMD ["tail", "-f", "/dev/null"]

View File

@@ -1,4 +1,42 @@
FROM python:3.11.7-slim-bookworm
# Base stage with dependencies
FROM python:3.11.7-slim-bookworm AS base
ENV DANSWER_RUNNING_IN_DOCKER="true" \
HF_HOME=/app/.cache/huggingface
COPY --from=ghcr.io/astral-sh/uv:0.9.9 /uv /uvx /bin/
RUN mkdir -p /app/.cache/huggingface
COPY ./requirements/model_server.txt /tmp/requirements.txt
RUN uv pip install --system --no-cache-dir --upgrade \
-r /tmp/requirements.txt && \
rm -rf ~/.cache/uv /tmp/*.txt
# Stage for downloading tokenizers
FROM base AS tokenizers
RUN python -c "from transformers import AutoTokenizer; \
AutoTokenizer.from_pretrained('distilbert-base-uncased'); \
AutoTokenizer.from_pretrained('mixedbread-ai/mxbai-rerank-xsmall-v1');"
# Stage for downloading Onyx models
FROM base AS onyx-models
RUN python -c "from huggingface_hub import snapshot_download; \
snapshot_download(repo_id='onyx-dot-app/hybrid-intent-token-classifier'); \
snapshot_download(repo_id='onyx-dot-app/information-content-model');"
# Stage for downloading embedding and reranking models
FROM base AS embedding-models
RUN python -c "from huggingface_hub import snapshot_download; \
snapshot_download('nomic-ai/nomic-embed-text-v1'); \
snapshot_download('mixedbread-ai/mxbai-rerank-xsmall-v1');"
# Initialize SentenceTransformer to cache the custom architecture
RUN python -c "from sentence_transformers import SentenceTransformer; \
SentenceTransformer(model_name_or_path='nomic-ai/nomic-embed-text-v1', trust_remote_code=True);"
# Final stage - combine all downloads
FROM base AS final
LABEL com.danswer.maintainer="founders@onyx.app"
LABEL com.danswer.description="This image is for the Onyx model server which runs all of the \
@@ -6,45 +44,25 @@ AI models for Onyx. This container and all the code is MIT Licensed and free for
You can find it at https://hub.docker.com/r/onyx/onyx-model-server. For more details, \
visit https://github.com/onyx-dot-app/onyx."
# Default ONYX_VERSION, typically overriden during builds by GitHub Actions.
ARG ONYX_VERSION=0.8-dev
ENV ONYX_VERSION=${ONYX_VERSION} \
DANSWER_RUNNING_IN_DOCKER="true"
# Create non-root user for security best practices
RUN groupadd -g 1001 onyx && \
useradd -u 1001 -g onyx -m -s /bin/bash onyx && \
mkdir -p /var/log/onyx && \
chmod 755 /var/log/onyx && \
chown onyx:onyx /var/log/onyx
RUN echo "ONYX_VERSION: ${ONYX_VERSION}"
COPY ./requirements/model_server.txt /tmp/requirements.txt
RUN pip install --no-cache-dir --upgrade \
--retries 5 \
--timeout 30 \
-r /tmp/requirements.txt
RUN apt-get remove -y --allow-remove-essential perl-base && \
apt-get autoremove -y
# Pre-downloading models for setups with limited egress
# Download tokenizers, distilbert for the Onyx model
# Download model weights
# Run Nomic to pull in the custom architecture and have it cached locally
RUN python -c "from transformers import AutoTokenizer; \
AutoTokenizer.from_pretrained('distilbert-base-uncased'); \
AutoTokenizer.from_pretrained('mixedbread-ai/mxbai-rerank-xsmall-v1'); \
from huggingface_hub import snapshot_download; \
snapshot_download(repo_id='danswer/hybrid-intent-token-classifier', revision='v1.0.3'); \
snapshot_download('nomic-ai/nomic-embed-text-v1'); \
snapshot_download('mixedbread-ai/mxbai-rerank-xsmall-v1'); \
from sentence_transformers import SentenceTransformer; \
SentenceTransformer(model_name_or_path='nomic-ai/nomic-embed-text-v1', trust_remote_code=True);"
# In case the user has volumes mounted to /root/.cache/huggingface that they've downloaded while
# running Onyx, don't overwrite it with the built in cache folder
RUN mv /root/.cache/huggingface /root/.cache/temp_huggingface
# In case the user has volumes mounted to /app/.cache/huggingface that they've downloaded while
# running Onyx, move the current contents of the cache folder to a temporary location to ensure
# it's preserved in order to combine with the user's cache contents
COPY --chown=onyx:onyx --from=tokenizers /app/.cache/huggingface /app/.cache/temp_huggingface
COPY --chown=onyx:onyx --from=onyx-models /app/.cache/huggingface /app/.cache/temp_huggingface
COPY --chown=onyx:onyx --from=embedding-models /app/.cache/huggingface /app/.cache/temp_huggingface
WORKDIR /app
# Utils used by model server
COPY ./onyx/utils/logger.py /app/onyx/utils/logger.py
COPY ./onyx/utils/middleware.py /app/onyx/utils/middleware.py
# Place to fetch version information
COPY ./onyx/__init__.py /app/onyx/__init__.py
@@ -57,4 +75,8 @@ COPY ./model_server /app/model_server
ENV PYTHONPATH=/app
# Default ONYX_VERSION, typically overriden during builds by GitHub Actions.
ARG ONYX_VERSION=0.0.0-dev
ENV ONYX_VERSION=${ONYX_VERSION}
CMD ["uvicorn", "model_server.main:app", "--host", "0.0.0.0", "--port", "9000"]

View File

@@ -84,7 +84,7 @@ keys = console
keys = generic
[logger_root]
level = WARN
level = INFO
handlers = console
qualname =

View File

@@ -1,4 +1,4 @@
<!-- DANSWER_METADATA={"link": "https://github.com/onyx-dot-app/onyx/blob/main/backend/alembic/README.md"} -->
<!-- ONYX_METADATA={"link": "https://github.com/onyx-dot-app/onyx/blob/main/backend/alembic/README.md"} -->
# Alembic DB Migrations
@@ -20,3 +20,44 @@ To run all un-applied migrations:
To undo migrations:
`alembic downgrade -X`
where X is the number of migrations you want to undo from the current state
### Multi-tenant migrations
For multi-tenant deployments, you can use additional options:
**Upgrade all tenants:**
```bash
alembic -x upgrade_all_tenants=true upgrade head
```
**Upgrade specific schemas:**
```bash
# Single schema
alembic -x schemas=tenant_12345678-1234-1234-1234-123456789012 upgrade head
# Multiple schemas (comma-separated)
alembic -x schemas=tenant_12345678-1234-1234-1234-123456789012,public,another_tenant upgrade head
```
**Upgrade tenants within an alphabetical range:**
```bash
# Upgrade tenants 100-200 when sorted alphabetically (positions 100 to 200)
alembic -x upgrade_all_tenants=true -x tenant_range_start=100 -x tenant_range_end=200 upgrade head
# Upgrade tenants starting from position 1000 alphabetically
alembic -x upgrade_all_tenants=true -x tenant_range_start=1000 upgrade head
# Upgrade first 500 tenants alphabetically
alembic -x upgrade_all_tenants=true -x tenant_range_end=500 upgrade head
```
**Continue on error (for batch operations):**
```bash
alembic -x upgrade_all_tenants=true -x continue=true upgrade head
```
The tenant range filtering works by:
1. Sorting tenant IDs alphabetically
2. Using 1-based position numbers (1st, 2nd, 3rd tenant, etc.)
3. Filtering to the specified range of positions
4. Non-tenant schemas (like 'public') are always included

View File

@@ -1,12 +1,12 @@
from typing import Any, Literal
from onyx.db.engine import get_iam_auth_token
from onyx.db.engine.iam_auth import get_iam_auth_token
from onyx.configs.app_configs import USE_IAM_AUTH
from onyx.configs.app_configs import POSTGRES_HOST
from onyx.configs.app_configs import POSTGRES_PORT
from onyx.configs.app_configs import POSTGRES_USER
from onyx.configs.app_configs import AWS_REGION_NAME
from onyx.db.engine import build_connection_string
from onyx.db.engine import get_all_tenant_ids
from onyx.db.engine.sql_engine import build_connection_string
from onyx.db.engine.tenant_utils import get_all_tenant_ids
from sqlalchemy import event
from sqlalchemy import pool
from sqlalchemy import text
@@ -21,9 +21,17 @@ from alembic import context
from sqlalchemy.ext.asyncio import create_async_engine
from sqlalchemy.sql.schema import SchemaItem
from onyx.configs.constants import SSL_CERT_FILE
from shared_configs.configs import MULTI_TENANT, POSTGRES_DEFAULT_SCHEMA
from shared_configs.configs import (
MULTI_TENANT,
POSTGRES_DEFAULT_SCHEMA,
TENANT_ID_PREFIX,
)
from onyx.db.models import Base
from celery.backends.database.session import ResultModelBase # type: ignore
from onyx.db.engine.sql_engine import SqlEngine
# Make sure in alembic.ini [logger_root] level=INFO is set or most logging will be
# hidden! (defaults to level=WARN)
# Alembic Config object
config = context.config
@@ -36,6 +44,7 @@ if config.config_file_name is not None and config.attributes.get(
target_metadata = [Base.metadata, ResultModelBase.metadata]
EXCLUDE_TABLES = {"kombu_queue", "kombu_message"}
logger = logging.getLogger(__name__)
ssl_context: ssl.SSLContext | None = None
@@ -64,36 +73,154 @@ def include_object(
return True
def get_schema_options() -> tuple[str, bool, bool]:
def filter_tenants_by_range(
tenant_ids: list[str], start_range: int | None = None, end_range: int | None = None
) -> list[str]:
"""
Filter tenant IDs by alphabetical position range.
Args:
tenant_ids: List of tenant IDs to filter
start_range: Starting position in alphabetically sorted list (1-based, inclusive)
end_range: Ending position in alphabetically sorted list (1-based, inclusive)
Returns:
Filtered list of tenant IDs in their original order
"""
if start_range is None and end_range is None:
return tenant_ids
# Separate tenant IDs from non-tenant schemas
tenant_schemas = [tid for tid in tenant_ids if tid.startswith(TENANT_ID_PREFIX)]
non_tenant_schemas = [
tid for tid in tenant_ids if not tid.startswith(TENANT_ID_PREFIX)
]
# Sort tenant schemas alphabetically.
# NOTE: can cause missed schemas if a schema is created in between workers
# fetching of all tenant IDs. We accept this risk for now. Just re-running
# the migration will fix the issue.
sorted_tenant_schemas = sorted(tenant_schemas)
# Apply range filtering (0-based indexing)
start_idx = start_range if start_range is not None else 0
end_idx = end_range if end_range is not None else len(sorted_tenant_schemas)
# Ensure indices are within bounds
start_idx = max(0, start_idx)
end_idx = min(len(sorted_tenant_schemas), end_idx)
# Get the filtered tenant schemas
filtered_tenant_schemas = sorted_tenant_schemas[start_idx:end_idx]
# Combine with non-tenant schemas and preserve original order
filtered_tenants = []
for tenant_id in tenant_ids:
if tenant_id in filtered_tenant_schemas or tenant_id in non_tenant_schemas:
filtered_tenants.append(tenant_id)
return filtered_tenants
def get_schema_options() -> (
tuple[bool, bool, bool, int | None, int | None, list[str] | None]
):
x_args_raw = context.get_x_argument()
x_args = {}
for arg in x_args_raw:
for pair in arg.split(","):
if "=" in pair:
key, value = pair.split("=", 1)
x_args[key.strip()] = value.strip()
schema_name = x_args.get("schema", POSTGRES_DEFAULT_SCHEMA)
if "=" in arg:
key, value = arg.split("=", 1)
x_args[key.strip()] = value.strip()
else:
raise ValueError(f"Invalid argument: {arg}")
create_schema = x_args.get("create_schema", "true").lower() == "true"
upgrade_all_tenants = x_args.get("upgrade_all_tenants", "false").lower() == "true"
if (
MULTI_TENANT
and schema_name == POSTGRES_DEFAULT_SCHEMA
and not upgrade_all_tenants
):
# continue on error with individual tenant
# only applies to online migrations
continue_on_error = x_args.get("continue", "false").lower() == "true"
# Tenant range filtering
tenant_range_start = None
tenant_range_end = None
if "tenant_range_start" in x_args:
try:
tenant_range_start = int(x_args["tenant_range_start"])
except ValueError:
raise ValueError(
f"Invalid tenant_range_start value: {x_args['tenant_range_start']}. Must be an integer."
)
if "tenant_range_end" in x_args:
try:
tenant_range_end = int(x_args["tenant_range_end"])
except ValueError:
raise ValueError(
f"Invalid tenant_range_end value: {x_args['tenant_range_end']}. Must be an integer."
)
# Validate range
if tenant_range_start is not None and tenant_range_end is not None:
if tenant_range_start > tenant_range_end:
raise ValueError(
f"tenant_range_start ({tenant_range_start}) cannot be greater than tenant_range_end ({tenant_range_end})"
)
# Specific schema names filtering (replaces both schema_name and the old tenant_ids approach)
schemas = None
if "schemas" in x_args:
schema_names_str = x_args["schemas"].strip()
if schema_names_str:
# Split by comma and strip whitespace
schemas = [
name.strip() for name in schema_names_str.split(",") if name.strip()
]
if schemas:
logger.info(f"Specific schema names specified: {schemas}")
# Validate that only one method is used at a time
range_filtering = tenant_range_start is not None or tenant_range_end is not None
specific_filtering = schemas is not None and len(schemas) > 0
if range_filtering and specific_filtering:
raise ValueError(
"Cannot run default migrations in public schema when multi-tenancy is enabled. "
"Please specify a tenant-specific schema."
"Cannot use both tenant range filtering (tenant_range_start/tenant_range_end) "
"and specific schema filtering (schemas) at the same time. "
"Please use only one filtering method."
)
return schema_name, create_schema, upgrade_all_tenants
if upgrade_all_tenants and specific_filtering:
raise ValueError(
"Cannot use both upgrade_all_tenants=true and schemas at the same time. "
"Use either upgrade_all_tenants=true for all tenants, or schemas for specific schemas."
)
# If any filtering parameters are specified, we're not doing the default single schema migration
if range_filtering:
upgrade_all_tenants = True
# Validate multi-tenant requirements
if MULTI_TENANT and not upgrade_all_tenants and not specific_filtering:
raise ValueError(
"In multi-tenant mode, you must specify either upgrade_all_tenants=true "
"or provide schemas. Cannot run default migration."
)
return (
create_schema,
upgrade_all_tenants,
continue_on_error,
tenant_range_start,
tenant_range_end,
schemas,
)
def do_run_migrations(
connection: Connection, schema_name: str, create_schema: bool
) -> None:
logger.info(f"About to migrate schema: {schema_name}")
if create_schema:
connection.execute(text(f'CREATE SCHEMA IF NOT EXISTS "{schema_name}"'))
connection.execute(text("COMMIT"))
@@ -134,7 +261,20 @@ def provide_iam_token_for_alembic(
async def run_async_migrations() -> None:
schema_name, create_schema, upgrade_all_tenants = get_schema_options()
(
create_schema,
upgrade_all_tenants,
continue_on_error,
tenant_range_start,
tenant_range_end,
schemas,
) = get_schema_options()
if not schemas and not MULTI_TENANT:
schemas = [POSTGRES_DEFAULT_SCHEMA]
# without init_engine, subsequent engine calls fail hard intentionally
SqlEngine.init_engine(pool_size=20, max_overflow=5)
engine = create_async_engine(
build_connection_string(),
@@ -149,11 +289,18 @@ async def run_async_migrations() -> None:
) -> None:
provide_iam_token_for_alembic(dialect, conn_rec, cargs, cparams)
if upgrade_all_tenants:
tenant_schemas = get_all_tenant_ids()
for schema in tenant_schemas:
if schemas:
# Use specific schema names directly without fetching all tenants
logger.info(f"Migrating specific schema names: {schemas}")
i_schema = 0
num_schemas = len(schemas)
for schema in schemas:
i_schema += 1
logger.info(
f"Migrating schema: index={i_schema} num_schemas={num_schemas} schema={schema}"
)
try:
logger.info(f"Migrating schema: {schema}")
async with engine.connect() as connection:
await connection.run_sync(
do_run_migrations,
@@ -162,28 +309,108 @@ async def run_async_migrations() -> None:
)
except Exception as e:
logger.error(f"Error migrating schema {schema}: {e}")
raise
if not continue_on_error:
logger.error("--continue=true is not set, raising exception!")
raise
logger.warning("--continue=true is set, continuing to next schema.")
elif upgrade_all_tenants:
tenant_schemas = get_all_tenant_ids()
filtered_tenant_schemas = filter_tenants_by_range(
tenant_schemas, tenant_range_start, tenant_range_end
)
if tenant_range_start is not None or tenant_range_end is not None:
logger.info(
f"Filtering tenants by range: start={tenant_range_start}, end={tenant_range_end}"
)
logger.info(
f"Total tenants: {len(tenant_schemas)}, Filtered tenants: {len(filtered_tenant_schemas)}"
)
i_tenant = 0
num_tenants = len(filtered_tenant_schemas)
for schema in filtered_tenant_schemas:
i_tenant += 1
logger.info(
f"Migrating schema: index={i_tenant} num_tenants={num_tenants} schema={schema}"
)
try:
async with engine.connect() as connection:
await connection.run_sync(
do_run_migrations,
schema_name=schema,
create_schema=create_schema,
)
except Exception as e:
logger.error(f"Error migrating schema {schema}: {e}")
if not continue_on_error:
logger.error("--continue=true is not set, raising exception!")
raise
logger.warning("--continue=true is set, continuing to next schema.")
else:
try:
logger.info(f"Migrating schema: {schema_name}")
async with engine.connect() as connection:
await connection.run_sync(
do_run_migrations,
schema_name=schema_name,
create_schema=create_schema,
)
except Exception as e:
logger.error(f"Error migrating schema {schema_name}: {e}")
raise
# This should not happen in the new design since we require either
# upgrade_all_tenants=true or schemas in multi-tenant mode
# and for non-multi-tenant mode, we should use schemas with the default schema
raise ValueError(
"No migration target specified. Use either upgrade_all_tenants=true for all tenants "
"or schemas for specific schemas."
)
await engine.dispose()
def run_migrations_offline() -> None:
schema_name, _, upgrade_all_tenants = get_schema_options()
"""
NOTE(rkuo): This generates a sql script that can be used to migrate the database ...
instead of migrating the db live via an open connection
Not clear on when this would be used by us or if it even works.
If it is offline, then why are there calls to the db engine?
This doesn't really get used when we migrate in the cloud."""
logger.info("run_migrations_offline starting.")
# without init_engine, subsequent engine calls fail hard intentionally
SqlEngine.init_engine(pool_size=20, max_overflow=5)
(
create_schema,
upgrade_all_tenants,
continue_on_error,
tenant_range_start,
tenant_range_end,
schemas,
) = get_schema_options()
url = build_connection_string()
if upgrade_all_tenants:
if schemas:
# Use specific schema names directly without fetching all tenants
logger.info(f"Migrating specific schema names: {schemas}")
for schema in schemas:
logger.info(f"Migrating schema: {schema}")
context.configure(
url=url,
target_metadata=target_metadata, # type: ignore
literal_binds=True,
include_object=include_object,
version_table_schema=schema,
include_schemas=True,
script_location=config.get_main_option("script_location"),
dialect_opts={"paramstyle": "named"},
)
with context.begin_transaction():
context.run_migrations()
elif upgrade_all_tenants:
engine = create_async_engine(url)
if USE_IAM_AUTH:
@@ -197,7 +424,19 @@ def run_migrations_offline() -> None:
tenant_schemas = get_all_tenant_ids()
engine.sync_engine.dispose()
for schema in tenant_schemas:
filtered_tenant_schemas = filter_tenants_by_range(
tenant_schemas, tenant_range_start, tenant_range_end
)
if tenant_range_start is not None or tenant_range_end is not None:
logger.info(
f"Filtering tenants by range: start={tenant_range_start}, end={tenant_range_end}"
)
logger.info(
f"Total tenants: {len(tenant_schemas)}, Filtered tenants: {len(filtered_tenant_schemas)}"
)
for schema in filtered_tenant_schemas:
logger.info(f"Migrating schema: {schema}")
context.configure(
url=url,
@@ -213,23 +452,15 @@ def run_migrations_offline() -> None:
with context.begin_transaction():
context.run_migrations()
else:
logger.info(f"Migrating schema: {schema_name}")
context.configure(
url=url,
target_metadata=target_metadata, # type: ignore
literal_binds=True,
include_object=include_object,
version_table_schema=schema_name,
include_schemas=True,
script_location=config.get_main_option("script_location"),
dialect_opts={"paramstyle": "named"},
# This should not happen in the new design
raise ValueError(
"No migration target specified. Use either upgrade_all_tenants=true for all tenants "
"or schemas for specific schemas."
)
with context.begin_transaction():
context.run_migrations()
def run_migrations_online() -> None:
logger.info("run_migrations_online starting.")
asyncio.run(run_async_migrations())

View File

@@ -5,6 +5,7 @@ Revises: 6fc7886d665d
Create Date: 2025-01-14 12:14:00.814390
"""
from alembic import op
import sqlalchemy as sa

View File

@@ -0,0 +1,121 @@
"""rework-kg-config
Revision ID: 03bf8be6b53a
Revises: 65bc6e0f8500
Create Date: 2025-06-16 10:52:34.815335
"""
import json
from datetime import datetime
from datetime import timedelta
from sqlalchemy.dialects import postgresql
from sqlalchemy import text
from alembic import op
import sqlalchemy as sa
# revision identifiers, used by Alembic.
revision = "03bf8be6b53a"
down_revision = "65bc6e0f8500"
branch_labels = None
depends_on = None
def upgrade() -> None:
# get current config
current_configs = (
op.get_bind()
.execute(text("SELECT kg_variable_name, kg_variable_values FROM kg_config"))
.all()
)
current_config_dict = {
config.kg_variable_name: (
config.kg_variable_values[0]
if config.kg_variable_name
not in ("KG_VENDOR_DOMAINS", "KG_IGNORE_EMAIL_DOMAINS")
else config.kg_variable_values
)
for config in current_configs
if config.kg_variable_values
}
# not using the KGConfigSettings model here in case it changes in the future
kg_config_settings = json.dumps(
{
"KG_EXPOSED": current_config_dict.get("KG_EXPOSED", False),
"KG_ENABLED": current_config_dict.get("KG_ENABLED", False),
"KG_VENDOR": current_config_dict.get("KG_VENDOR", None),
"KG_VENDOR_DOMAINS": current_config_dict.get("KG_VENDOR_DOMAINS", []),
"KG_IGNORE_EMAIL_DOMAINS": current_config_dict.get(
"KG_IGNORE_EMAIL_DOMAINS", []
),
"KG_COVERAGE_START": current_config_dict.get(
"KG_COVERAGE_START",
(datetime.now() - timedelta(days=90)).strftime("%Y-%m-%d"),
),
"KG_MAX_COVERAGE_DAYS": current_config_dict.get("KG_MAX_COVERAGE_DAYS", 90),
"KG_MAX_PARENT_RECURSION_DEPTH": current_config_dict.get(
"KG_MAX_PARENT_RECURSION_DEPTH", 2
),
"KG_BETA_PERSONA_ID": current_config_dict.get("KG_BETA_PERSONA_ID", None),
}
)
op.execute(
f"INSERT INTO key_value_store (key, value) VALUES ('kg_config', '{kg_config_settings}')"
)
# drop kg config table
op.drop_table("kg_config")
def downgrade() -> None:
# get current config
current_config_dict = {
"KG_EXPOSED": False,
"KG_ENABLED": False,
"KG_VENDOR": [],
"KG_VENDOR_DOMAINS": [],
"KG_IGNORE_EMAIL_DOMAINS": [],
"KG_COVERAGE_START": (datetime.now() - timedelta(days=90)).strftime("%Y-%m-%d"),
"KG_MAX_COVERAGE_DAYS": 90,
"KG_MAX_PARENT_RECURSION_DEPTH": 2,
}
current_configs = (
op.get_bind()
.execute(text("SELECT value FROM key_value_store WHERE key = 'kg_config'"))
.one_or_none()
)
if current_configs is not None:
current_config_dict.update(current_configs[0])
insert_values = [
{
"kg_variable_name": name,
"kg_variable_values": (
[str(val).lower() if isinstance(val, bool) else str(val)]
if not isinstance(val, list)
else val
),
}
for name, val in current_config_dict.items()
]
op.create_table(
"kg_config",
sa.Column("id", sa.Integer(), primary_key=True, nullable=False, index=True),
sa.Column("kg_variable_name", sa.String(), nullable=False, index=True),
sa.Column("kg_variable_values", postgresql.ARRAY(sa.String()), nullable=False),
sa.UniqueConstraint("kg_variable_name", name="uq_kg_config_variable_name"),
)
op.bulk_insert(
sa.table(
"kg_config",
sa.column("kg_variable_name", sa.String),
sa.column("kg_variable_values", postgresql.ARRAY(sa.String)),
),
insert_values,
)
op.execute("DELETE FROM key_value_store WHERE key = 'kg_config'")

View File

@@ -0,0 +1,153 @@
"""add permission sync attempt tables
Revision ID: 03d710ccf29c
Revises: 96a5702df6aa
Create Date: 2025-09-11 13:30:00.000000
"""
from alembic import op
import sqlalchemy as sa
# revision identifiers, used by Alembic.
revision = "03d710ccf29c" # Generate a new unique ID
down_revision = "96a5702df6aa"
branch_labels = None
depends_on = None
def upgrade() -> None:
# Create the permission sync status enum
permission_sync_status_enum = sa.Enum(
"not_started",
"in_progress",
"success",
"canceled",
"failed",
"completed_with_errors",
name="permissionsyncstatus",
native_enum=False,
)
# Create doc_permission_sync_attempt table
op.create_table(
"doc_permission_sync_attempt",
sa.Column("id", sa.Integer(), nullable=False),
sa.Column("connector_credential_pair_id", sa.Integer(), nullable=False),
sa.Column("status", permission_sync_status_enum, nullable=False),
sa.Column("total_docs_synced", sa.Integer(), nullable=True),
sa.Column("docs_with_permission_errors", sa.Integer(), nullable=True),
sa.Column("error_message", sa.Text(), nullable=True),
sa.Column(
"time_created",
sa.DateTime(timezone=True),
server_default=sa.text("now()"),
nullable=False,
),
sa.Column("time_started", sa.DateTime(timezone=True), nullable=True),
sa.Column("time_finished", sa.DateTime(timezone=True), nullable=True),
sa.ForeignKeyConstraint(
["connector_credential_pair_id"],
["connector_credential_pair.id"],
),
sa.PrimaryKeyConstraint("id"),
)
# Create indexes for doc_permission_sync_attempt
op.create_index(
"ix_doc_permission_sync_attempt_time_created",
"doc_permission_sync_attempt",
["time_created"],
unique=False,
)
op.create_index(
"ix_permission_sync_attempt_latest_for_cc_pair",
"doc_permission_sync_attempt",
["connector_credential_pair_id", "time_created"],
unique=False,
)
op.create_index(
"ix_permission_sync_attempt_status_time",
"doc_permission_sync_attempt",
["status", sa.text("time_finished DESC")],
unique=False,
)
# Create external_group_permission_sync_attempt table
# connector_credential_pair_id is nullable - group syncs can be global (e.g., Confluence)
op.create_table(
"external_group_permission_sync_attempt",
sa.Column("id", sa.Integer(), nullable=False),
sa.Column("connector_credential_pair_id", sa.Integer(), nullable=True),
sa.Column("status", permission_sync_status_enum, nullable=False),
sa.Column("total_users_processed", sa.Integer(), nullable=True),
sa.Column("total_groups_processed", sa.Integer(), nullable=True),
sa.Column("total_group_memberships_synced", sa.Integer(), nullable=True),
sa.Column("error_message", sa.Text(), nullable=True),
sa.Column(
"time_created",
sa.DateTime(timezone=True),
server_default=sa.text("now()"),
nullable=False,
),
sa.Column("time_started", sa.DateTime(timezone=True), nullable=True),
sa.Column("time_finished", sa.DateTime(timezone=True), nullable=True),
sa.ForeignKeyConstraint(
["connector_credential_pair_id"],
["connector_credential_pair.id"],
),
sa.PrimaryKeyConstraint("id"),
)
# Create indexes for external_group_permission_sync_attempt
op.create_index(
"ix_external_group_permission_sync_attempt_time_created",
"external_group_permission_sync_attempt",
["time_created"],
unique=False,
)
op.create_index(
"ix_group_sync_attempt_cc_pair_time",
"external_group_permission_sync_attempt",
["connector_credential_pair_id", "time_created"],
unique=False,
)
op.create_index(
"ix_group_sync_attempt_status_time",
"external_group_permission_sync_attempt",
["status", sa.text("time_finished DESC")],
unique=False,
)
def downgrade() -> None:
# Drop indexes
op.drop_index(
"ix_group_sync_attempt_status_time",
table_name="external_group_permission_sync_attempt",
)
op.drop_index(
"ix_group_sync_attempt_cc_pair_time",
table_name="external_group_permission_sync_attempt",
)
op.drop_index(
"ix_external_group_permission_sync_attempt_time_created",
table_name="external_group_permission_sync_attempt",
)
op.drop_index(
"ix_permission_sync_attempt_status_time",
table_name="doc_permission_sync_attempt",
)
op.drop_index(
"ix_permission_sync_attempt_latest_for_cc_pair",
table_name="doc_permission_sync_attempt",
)
op.drop_index(
"ix_doc_permission_sync_attempt_time_created",
table_name="doc_permission_sync_attempt",
)
# Drop tables
op.drop_table("external_group_permission_sync_attempt")
op.drop_table("doc_permission_sync_attempt")

View File

@@ -0,0 +1,72 @@
"""add federated connector tables
Revision ID: 0816326d83aa
Revises: 12635f6655b7
Create Date: 2025-06-29 14:09:45.109518
"""
from alembic import op
import sqlalchemy as sa
from sqlalchemy.dialects import postgresql
# revision identifiers, used by Alembic.
revision = "0816326d83aa"
down_revision = "12635f6655b7"
branch_labels = None
depends_on = None
def upgrade() -> None:
# Create federated_connector table
op.create_table(
"federated_connector",
sa.Column("id", sa.Integer(), nullable=False),
sa.Column("source", sa.String(), nullable=False),
sa.Column("credentials", sa.LargeBinary(), nullable=False),
sa.PrimaryKeyConstraint("id"),
)
# Create federated_connector_oauth_token table
op.create_table(
"federated_connector_oauth_token",
sa.Column("id", sa.Integer(), nullable=False),
sa.Column("federated_connector_id", sa.Integer(), nullable=False),
sa.Column("user_id", postgresql.UUID(as_uuid=True), nullable=False),
sa.Column("token", sa.LargeBinary(), nullable=False),
sa.Column("expires_at", sa.DateTime(), nullable=True),
sa.ForeignKeyConstraint(
["federated_connector_id"], ["federated_connector.id"], ondelete="CASCADE"
),
sa.ForeignKeyConstraint(["user_id"], ["user.id"], ondelete="CASCADE"),
sa.PrimaryKeyConstraint("id"),
)
# Create federated_connector__document_set table
op.create_table(
"federated_connector__document_set",
sa.Column("id", sa.Integer(), nullable=False),
sa.Column("federated_connector_id", sa.Integer(), nullable=False),
sa.Column("document_set_id", sa.Integer(), nullable=False),
sa.Column("entities", postgresql.JSONB(), nullable=False),
sa.ForeignKeyConstraint(
["federated_connector_id"], ["federated_connector.id"], ondelete="CASCADE"
),
sa.ForeignKeyConstraint(
["document_set_id"], ["document_set.id"], ondelete="CASCADE"
),
sa.PrimaryKeyConstraint("id"),
sa.UniqueConstraint(
"federated_connector_id",
"document_set_id",
name="uq_federated_connector_document_set",
),
)
def downgrade() -> None:
# Drop tables in reverse order due to foreign key dependencies
op.drop_table("federated_connector__document_set")
op.drop_table("federated_connector_oauth_token")
op.drop_table("federated_connector")

View File

@@ -5,6 +5,7 @@ Revises: 8a87bd6ec550
Create Date: 2024-07-23 11:12:39.462397
"""
from alembic import op
import sqlalchemy as sa

View File

@@ -0,0 +1,33 @@
"""add theme_preference to user
Revision ID: 09995b8811eb
Revises: 3d1cca026fe8
Create Date: 2025-10-24 08:58:50.246949
"""
from alembic import op
import sqlalchemy as sa
from onyx.db.enums import ThemePreference
# revision identifiers, used by Alembic.
revision = "09995b8811eb"
down_revision = "3d1cca026fe8"
branch_labels = None
depends_on = None
def upgrade() -> None:
op.add_column(
"user",
sa.Column(
"theme_preference",
sa.Enum(ThemePreference, native_enum=False),
nullable=True,
),
)
def downgrade() -> None:
op.drop_column("user", "theme_preference")

View File

@@ -5,6 +5,7 @@ Revises: 5f4b8568a221
Create Date: 2024-03-02 23:23:49.960309
"""
from alembic import op
import sqlalchemy as sa
from sqlalchemy.dialects import postgresql

View File

@@ -5,6 +5,7 @@ Revises: 570282d33c49
Create Date: 2024-05-05 19:30:34.317972
"""
from alembic import op
import sqlalchemy as sa
from sqlalchemy.sql import table

View File

@@ -0,0 +1,389 @@
"""Migration 2: User file data preparation and backfill
Revision ID: 0cd424f32b1d
Revises: 9b66d3156fc6
Create Date: 2025-09-22 09:44:42.727034
This migration populates the new columns added in migration 1.
It prepares data for the UUID transition and relationship migration.
"""
from alembic import op
import sqlalchemy as sa
from sqlalchemy import text
import logging
logger = logging.getLogger("alembic.runtime.migration")
# revision identifiers, used by Alembic.
revision = "0cd424f32b1d"
down_revision = "9b66d3156fc6"
branch_labels = None
depends_on = None
def upgrade() -> None:
"""Populate new columns with data."""
bind = op.get_bind()
inspector = sa.inspect(bind)
# === Step 1: Populate user_file.new_id ===
user_file_columns = [col["name"] for col in inspector.get_columns("user_file")]
has_new_id = "new_id" in user_file_columns
if has_new_id:
logger.info("Populating user_file.new_id with UUIDs...")
# Count rows needing UUIDs
null_count = bind.execute(
text("SELECT COUNT(*) FROM user_file WHERE new_id IS NULL")
).scalar_one()
if null_count > 0:
logger.info(f"Generating UUIDs for {null_count} user_file records...")
# Populate in batches to avoid long locks
batch_size = 10000
total_updated = 0
while True:
result = bind.execute(
text(
"""
UPDATE user_file
SET new_id = gen_random_uuid()
WHERE new_id IS NULL
AND id IN (
SELECT id FROM user_file
WHERE new_id IS NULL
LIMIT :batch_size
)
"""
),
{"batch_size": batch_size},
)
updated = result.rowcount
total_updated += updated
if updated < batch_size:
break
logger.info(f" Updated {total_updated}/{null_count} records...")
logger.info(f"Generated UUIDs for {total_updated} user_file records")
# Verify all records have UUIDs
remaining_null = bind.execute(
text("SELECT COUNT(*) FROM user_file WHERE new_id IS NULL")
).scalar_one()
if remaining_null > 0:
raise Exception(
f"Failed to populate all user_file.new_id values ({remaining_null} NULL)"
)
# Lock down the column
op.alter_column("user_file", "new_id", nullable=False)
op.alter_column("user_file", "new_id", server_default=None)
logger.info("Locked down user_file.new_id column")
# === Step 2: Populate persona__user_file.user_file_id_uuid ===
persona_user_file_columns = [
col["name"] for col in inspector.get_columns("persona__user_file")
]
if has_new_id and "user_file_id_uuid" in persona_user_file_columns:
logger.info("Populating persona__user_file.user_file_id_uuid...")
# Count rows needing update
null_count = bind.execute(
text(
"""
SELECT COUNT(*) FROM persona__user_file
WHERE user_file_id IS NOT NULL AND user_file_id_uuid IS NULL
"""
)
).scalar_one()
if null_count > 0:
logger.info(f"Updating {null_count} persona__user_file records...")
# Update in batches
batch_size = 10000
total_updated = 0
while True:
result = bind.execute(
text(
"""
UPDATE persona__user_file p
SET user_file_id_uuid = uf.new_id
FROM user_file uf
WHERE p.user_file_id = uf.id
AND p.user_file_id_uuid IS NULL
AND p.persona_id IN (
SELECT persona_id
FROM persona__user_file
WHERE user_file_id_uuid IS NULL
LIMIT :batch_size
)
"""
),
{"batch_size": batch_size},
)
updated = result.rowcount
total_updated += updated
if updated < batch_size:
break
logger.info(f" Updated {total_updated}/{null_count} records...")
logger.info(f"Updated {total_updated} persona__user_file records")
# Verify all records are populated
remaining_null = bind.execute(
text(
"""
SELECT COUNT(*) FROM persona__user_file
WHERE user_file_id IS NOT NULL AND user_file_id_uuid IS NULL
"""
)
).scalar_one()
if remaining_null > 0:
raise Exception(
f"Failed to populate all persona__user_file.user_file_id_uuid values ({remaining_null} NULL)"
)
op.alter_column("persona__user_file", "user_file_id_uuid", nullable=False)
logger.info("Locked down persona__user_file.user_file_id_uuid column")
# === Step 3: Create user_project records from chat_folder ===
if "chat_folder" in inspector.get_table_names():
logger.info("Creating user_project records from chat_folder...")
result = bind.execute(
text(
"""
INSERT INTO user_project (user_id, name)
SELECT cf.user_id, cf.name
FROM chat_folder cf
WHERE NOT EXISTS (
SELECT 1
FROM user_project up
WHERE up.user_id = cf.user_id AND up.name = cf.name
)
"""
)
)
logger.info(f"Created {result.rowcount} user_project records from chat_folder")
# === Step 4: Populate chat_session.project_id ===
chat_session_columns = [
col["name"] for col in inspector.get_columns("chat_session")
]
if "folder_id" in chat_session_columns and "project_id" in chat_session_columns:
logger.info("Populating chat_session.project_id...")
# Count sessions needing update
null_count = bind.execute(
text(
"""
SELECT COUNT(*) FROM chat_session
WHERE project_id IS NULL AND folder_id IS NOT NULL
"""
)
).scalar_one()
if null_count > 0:
logger.info(f"Updating {null_count} chat_session records...")
result = bind.execute(
text(
"""
UPDATE chat_session cs
SET project_id = up.id
FROM chat_folder cf
JOIN user_project up ON up.user_id = cf.user_id AND up.name = cf.name
WHERE cs.folder_id = cf.id AND cs.project_id IS NULL
"""
)
)
logger.info(f"Updated {result.rowcount} chat_session records")
# Verify all records are populated
remaining_null = bind.execute(
text(
"""
SELECT COUNT(*) FROM chat_session
WHERE project_id IS NULL AND folder_id IS NOT NULL
"""
)
).scalar_one()
if remaining_null > 0:
logger.warning(
f"Warning: {remaining_null} chat_session records could not be mapped to projects"
)
# === Step 5: Update plaintext FileRecord IDs/display names to UUID scheme ===
# Prior to UUID migration, plaintext cache files were stored with file_id like 'plain_text_<int_id>'.
# After migration, we use 'plaintext_<uuid>' (note the name change to 'plaintext_').
# This step remaps existing FileRecord rows to the new naming while preserving object_key/bucket.
logger.info("Updating plaintext FileRecord ids and display names to UUID scheme...")
# Count legacy plaintext records that can be mapped to UUID user_file ids
count_query = text(
"""
SELECT COUNT(*)
FROM file_record fr
JOIN user_file uf ON fr.file_id = CONCAT('plaintext_', uf.id::text)
WHERE LOWER(fr.file_origin::text) = 'plaintext_cache'
"""
)
legacy_count = bind.execute(count_query).scalar_one()
if legacy_count and legacy_count > 0:
logger.info(f"Found {legacy_count} legacy plaintext file records to update")
# Update display_name first for readability (safe regardless of rename)
bind.execute(
text(
"""
UPDATE file_record fr
SET display_name = CONCAT('Plaintext for user file ', uf.new_id::text)
FROM user_file uf
WHERE LOWER(fr.file_origin::text) = 'plaintext_cache'
AND fr.file_id = CONCAT('plaintext_', uf.id::text)
"""
)
)
# Remap file_id from 'plaintext_<int>' -> 'plaintext_<uuid>' using transitional new_id
# Use a single UPDATE ... WHERE file_id LIKE 'plain_text_%'
# and ensure it aligns to existing user_file ids to avoid renaming unrelated rows
result = bind.execute(
text(
"""
UPDATE file_record fr
SET file_id = CONCAT('plaintext_', uf.new_id::text)
FROM user_file uf
WHERE LOWER(fr.file_origin::text) = 'plaintext_cache'
AND fr.file_id = CONCAT('plaintext_', uf.id::text)
"""
)
)
logger.info(
f"Updated {result.rowcount} plaintext file_record ids to UUID scheme"
)
# === Step 6: Ensure document_id_migrated default TRUE and backfill existing FALSE ===
# New records should default to migrated=True so the migration task won't run for them.
# Existing rows that had a legacy document_id should be marked as not migrated to be processed.
# Backfill existing records: if document_id is not null, set to FALSE
bind.execute(
text(
"""
UPDATE user_file
SET document_id_migrated = FALSE
WHERE document_id IS NOT NULL
"""
)
)
# === Step 7: Backfill user_file.status from index_attempt ===
logger.info("Backfilling user_file.status from index_attempt...")
# Update user_file status based on latest index attempt
# Using CTEs instead of temp tables for asyncpg compatibility
result = bind.execute(
text(
"""
WITH latest_attempt AS (
SELECT DISTINCT ON (ia.connector_credential_pair_id)
ia.connector_credential_pair_id,
ia.status
FROM index_attempt ia
ORDER BY ia.connector_credential_pair_id, ia.time_updated DESC
),
uf_to_ccp AS (
SELECT DISTINCT uf.id AS uf_id, ccp.id AS cc_pair_id
FROM user_file uf
JOIN document_by_connector_credential_pair dcc
ON dcc.id = REPLACE(uf.document_id, 'USER_FILE_CONNECTOR__', 'FILE_CONNECTOR__')
JOIN connector_credential_pair ccp
ON ccp.connector_id = dcc.connector_id
AND ccp.credential_id = dcc.credential_id
)
UPDATE user_file uf
SET status = CASE
WHEN la.status IN ('NOT_STARTED', 'IN_PROGRESS') THEN 'PROCESSING'
WHEN la.status = 'SUCCESS' THEN 'COMPLETED'
ELSE 'FAILED'
END
FROM uf_to_ccp ufc
LEFT JOIN latest_attempt la
ON la.connector_credential_pair_id = ufc.cc_pair_id
WHERE uf.id = ufc.uf_id
AND uf.status = 'PROCESSING'
"""
)
)
logger.info(f"Updated status for {result.rowcount} user_file records")
logger.info("Migration 2 (data preparation) completed successfully")
def downgrade() -> None:
"""Reset populated data to allow clean downgrade of schema."""
bind = op.get_bind()
inspector = sa.inspect(bind)
logger.info("Starting downgrade of data preparation...")
# Reset user_file columns to allow nulls before data removal
if "user_file" in inspector.get_table_names():
columns = [col["name"] for col in inspector.get_columns("user_file")]
if "new_id" in columns:
op.alter_column(
"user_file",
"new_id",
nullable=True,
server_default=sa.text("gen_random_uuid()"),
)
# Optionally clear the data
# bind.execute(text("UPDATE user_file SET new_id = NULL"))
logger.info("Reset user_file.new_id to nullable")
# Reset persona__user_file.user_file_id_uuid
if "persona__user_file" in inspector.get_table_names():
columns = [col["name"] for col in inspector.get_columns("persona__user_file")]
if "user_file_id_uuid" in columns:
op.alter_column("persona__user_file", "user_file_id_uuid", nullable=True)
# Optionally clear the data
# bind.execute(text("UPDATE persona__user_file SET user_file_id_uuid = NULL"))
logger.info("Reset persona__user_file.user_file_id_uuid to nullable")
# Note: We don't delete user_project records or reset chat_session.project_id
# as these might be in use and can be handled by the schema downgrade
# Reset user_file.status to default
if "user_file" in inspector.get_table_names():
columns = [col["name"] for col in inspector.get_columns("user_file")]
if "status" in columns:
bind.execute(text("UPDATE user_file SET status = 'PROCESSING'"))
logger.info("Reset user_file.status to default")
logger.info("Downgrade completed successfully")

View File

@@ -5,6 +5,7 @@ Revises: 52a219fb5233
Create Date: 2024-09-10 15:03:48.233926
"""
from alembic import op
import sqlalchemy as sa

View File

@@ -5,6 +5,7 @@ Revises: 369644546676
Create Date: 2025-01-10 14:01:14.067144
"""
from alembic import op
# revision identifiers, used by Alembic.

View File

@@ -0,0 +1,596 @@
"""drive-canonical-ids
Revision ID: 12635f6655b7
Revises: 58c50ef19f08
Create Date: 2025-06-20 14:44:54.241159
"""
from alembic import op
import sqlalchemy as sa
from urllib.parse import urlparse, urlunparse
from httpx import HTTPStatusError
import httpx
from onyx.document_index.factory import get_default_document_index
from onyx.db.search_settings import SearchSettings
from onyx.document_index.vespa.shared_utils.utils import get_vespa_http_client
from onyx.document_index.vespa.shared_utils.utils import (
replace_invalid_doc_id_characters,
)
from onyx.document_index.vespa_constants import DOCUMENT_ID_ENDPOINT
from onyx.utils.logger import setup_logger
import os
logger = setup_logger()
# revision identifiers, used by Alembic.
revision = "12635f6655b7"
down_revision = "58c50ef19f08"
branch_labels = None
depends_on = None
SKIP_CANON_DRIVE_IDS = os.environ.get("SKIP_CANON_DRIVE_IDS", "true").lower() == "true"
def active_search_settings() -> tuple[SearchSettings, SearchSettings | None]:
result = op.get_bind().execute(
sa.text(
"""
SELECT * FROM search_settings WHERE status = 'PRESENT' ORDER BY id DESC LIMIT 1
"""
)
)
search_settings_fetch = result.fetchall()
search_settings = (
SearchSettings(**search_settings_fetch[0]._asdict())
if search_settings_fetch
else None
)
result2 = op.get_bind().execute(
sa.text(
"""
SELECT * FROM search_settings WHERE status = 'FUTURE' ORDER BY id DESC LIMIT 1
"""
)
)
search_settings_future_fetch = result2.fetchall()
search_settings_future = (
SearchSettings(**search_settings_future_fetch[0]._asdict())
if search_settings_future_fetch
else None
)
if not isinstance(search_settings, SearchSettings):
raise RuntimeError(
"current search settings is of type " + str(type(search_settings))
)
if (
not isinstance(search_settings_future, SearchSettings)
and search_settings_future is not None
):
raise RuntimeError(
"future search settings is of type " + str(type(search_settings_future))
)
return search_settings, search_settings_future
def normalize_google_drive_url(url: str) -> str:
"""Remove query parameters from Google Drive URLs to create canonical document IDs.
NOTE: copied from drive doc_conversion.py
"""
parsed_url = urlparse(url)
parsed_url = parsed_url._replace(query="")
spl_path = parsed_url.path.split("/")
if spl_path and (spl_path[-1] in ["edit", "view", "preview"]):
spl_path.pop()
parsed_url = parsed_url._replace(path="/".join(spl_path))
# Remove query parameters and reconstruct URL
return urlunparse(parsed_url)
def get_google_drive_documents_from_database() -> list[dict]:
"""Get all Google Drive documents from the database."""
bind = op.get_bind()
result = bind.execute(
sa.text(
"""
SELECT d.id
FROM document d
JOIN document_by_connector_credential_pair dcc ON d.id = dcc.id
JOIN connector_credential_pair cc ON dcc.connector_id = cc.connector_id
AND dcc.credential_id = cc.credential_id
JOIN connector c ON cc.connector_id = c.id
WHERE c.source = 'GOOGLE_DRIVE'
"""
)
)
documents = []
for row in result:
documents.append({"document_id": row.id})
return documents
def update_document_id_in_database(
old_doc_id: str, new_doc_id: str, index_name: str
) -> None:
"""Update document IDs in all relevant database tables using copy-and-swap approach."""
bind = op.get_bind()
# print(f"Updating database tables for document {old_doc_id} -> {new_doc_id}")
# Check if new document ID already exists
result = bind.execute(
sa.text("SELECT COUNT(*) FROM document WHERE id = :new_id"),
{"new_id": new_doc_id},
)
row = result.fetchone()
if row and row[0] > 0:
# print(f"Document with ID {new_doc_id} already exists, deleting old one")
delete_document_from_db(old_doc_id, index_name)
return
# Step 1: Create a new document row with the new ID (copy all fields from old row)
# Use a conservative approach to handle columns that might not exist in all installations
try:
bind.execute(
sa.text(
"""
INSERT INTO document (id, from_ingestion_api, boost, hidden, semantic_id,
link, doc_updated_at, primary_owners, secondary_owners,
external_user_emails, external_user_group_ids, is_public,
chunk_count, last_modified, last_synced, kg_stage, kg_processing_time)
SELECT :new_id, from_ingestion_api, boost, hidden, semantic_id,
link, doc_updated_at, primary_owners, secondary_owners,
external_user_emails, external_user_group_ids, is_public,
chunk_count, last_modified, last_synced, kg_stage, kg_processing_time
FROM document
WHERE id = :old_id
"""
),
{"new_id": new_doc_id, "old_id": old_doc_id},
)
# print(f"Successfully updated database tables for document {old_doc_id} -> {new_doc_id}")
except Exception as e:
# If the full INSERT fails, try a more basic version with only core columns
logger.warning(f"Full INSERT failed, trying basic version: {e}")
bind.execute(
sa.text(
"""
INSERT INTO document (id, from_ingestion_api, boost, hidden, semantic_id,
link, doc_updated_at, primary_owners, secondary_owners)
SELECT :new_id, from_ingestion_api, boost, hidden, semantic_id,
link, doc_updated_at, primary_owners, secondary_owners
FROM document
WHERE id = :old_id
"""
),
{"new_id": new_doc_id, "old_id": old_doc_id},
)
# Step 2: Update all foreign key references to point to the new ID
# Update document_by_connector_credential_pair table
bind.execute(
sa.text(
"UPDATE document_by_connector_credential_pair SET id = :new_id WHERE id = :old_id"
),
{"new_id": new_doc_id, "old_id": old_doc_id},
)
# print(f"Successfully updated document_by_connector_credential_pair table for document {old_doc_id} -> {new_doc_id}")
# Update search_doc table (stores search results for chat replay)
# This is critical for agent functionality
bind.execute(
sa.text(
"UPDATE search_doc SET document_id = :new_id WHERE document_id = :old_id"
),
{"new_id": new_doc_id, "old_id": old_doc_id},
)
# print(f"Successfully updated search_doc table for document {old_doc_id} -> {new_doc_id}")
# Update document_retrieval_feedback table (user feedback on documents)
bind.execute(
sa.text(
"UPDATE document_retrieval_feedback SET document_id = :new_id WHERE document_id = :old_id"
),
{"new_id": new_doc_id, "old_id": old_doc_id},
)
# print(f"Successfully updated document_retrieval_feedback table for document {old_doc_id} -> {new_doc_id}")
# Update document__tag table (document-tag relationships)
bind.execute(
sa.text(
"UPDATE document__tag SET document_id = :new_id WHERE document_id = :old_id"
),
{"new_id": new_doc_id, "old_id": old_doc_id},
)
# print(f"Successfully updated document__tag table for document {old_doc_id} -> {new_doc_id}")
# Update user_file table (user uploaded files linked to documents)
bind.execute(
sa.text(
"UPDATE user_file SET document_id = :new_id WHERE document_id = :old_id"
),
{"new_id": new_doc_id, "old_id": old_doc_id},
)
# print(f"Successfully updated user_file table for document {old_doc_id} -> {new_doc_id}")
# Update KG and chunk_stats tables (these may not exist in all installations)
try:
# Update kg_entity table
bind.execute(
sa.text(
"UPDATE kg_entity SET document_id = :new_id WHERE document_id = :old_id"
),
{"new_id": new_doc_id, "old_id": old_doc_id},
)
# print(f"Successfully updated kg_entity table for document {old_doc_id} -> {new_doc_id}")
# Update kg_entity_extraction_staging table
bind.execute(
sa.text(
"UPDATE kg_entity_extraction_staging SET document_id = :new_id WHERE document_id = :old_id"
),
{"new_id": new_doc_id, "old_id": old_doc_id},
)
# print(f"Successfully updated kg_entity_extraction_staging table for document {old_doc_id} -> {new_doc_id}")
# Update kg_relationship table
bind.execute(
sa.text(
"UPDATE kg_relationship SET source_document = :new_id WHERE source_document = :old_id"
),
{"new_id": new_doc_id, "old_id": old_doc_id},
)
# print(f"Successfully updated kg_relationship table for document {old_doc_id} -> {new_doc_id}")
# Update kg_relationship_extraction_staging table
bind.execute(
sa.text(
"UPDATE kg_relationship_extraction_staging SET source_document = :new_id WHERE source_document = :old_id"
),
{"new_id": new_doc_id, "old_id": old_doc_id},
)
# print(f"Successfully updated kg_relationship_extraction_staging table for document {old_doc_id} -> {new_doc_id}")
# Update chunk_stats table
bind.execute(
sa.text(
"UPDATE chunk_stats SET document_id = :new_id WHERE document_id = :old_id"
),
{"new_id": new_doc_id, "old_id": old_doc_id},
)
# print(f"Successfully updated chunk_stats table for document {old_doc_id} -> {new_doc_id}")
# Update chunk_stats ID field which includes document_id
bind.execute(
sa.text(
"""
UPDATE chunk_stats
SET id = REPLACE(id, :old_id, :new_id)
WHERE id LIKE :old_id_pattern
"""
),
{
"new_id": new_doc_id,
"old_id": old_doc_id,
"old_id_pattern": f"{old_doc_id}__%",
},
)
# print(f"Successfully updated chunk_stats ID field for document {old_doc_id} -> {new_doc_id}")
except Exception as e:
logger.warning(f"Some KG/chunk tables may not exist or failed to update: {e}")
# Step 3: Delete the old document row (this should now be safe since all FKs point to new row)
bind.execute(
sa.text("DELETE FROM document WHERE id = :old_id"), {"old_id": old_doc_id}
)
# print(f"Successfully deleted document {old_doc_id} from database")
def _visit_chunks(
*,
http_client: httpx.Client,
index_name: str,
selection: str,
continuation: str | None = None,
) -> tuple[list[dict], str | None]:
"""Helper that calls the /document/v1 visit API once and returns (docs, next_token)."""
# Use the same URL as the document API, but with visit-specific params
base_url = DOCUMENT_ID_ENDPOINT.format(index_name=index_name)
params: dict[str, str] = {
"selection": selection,
"wantedDocumentCount": "1000",
}
if continuation:
params["continuation"] = continuation
# print(f"Visiting chunks for selection '{selection}' with params {params}")
resp = http_client.get(base_url, params=params, timeout=None)
# print(f"Visited chunks for document {selection}")
resp.raise_for_status()
payload = resp.json()
return payload.get("documents", []), payload.get("continuation")
def delete_document_chunks_from_vespa(index_name: str, doc_id: str) -> None:
"""Delete all chunks for *doc_id* from Vespa using continuation-token paging (no offset)."""
total_deleted = 0
# Use exact match instead of contains - Document Selector Language doesn't support contains
selection = f'{index_name}.document_id=="{doc_id}"'
with get_vespa_http_client() as http_client:
continuation: str | None = None
while True:
docs, continuation = _visit_chunks(
http_client=http_client,
index_name=index_name,
selection=selection,
continuation=continuation,
)
if not docs:
break
for doc in docs:
vespa_full_id = doc.get("id")
if not vespa_full_id:
continue
vespa_doc_uuid = vespa_full_id.split("::")[-1]
delete_url = f"{DOCUMENT_ID_ENDPOINT.format(index_name=index_name)}/{vespa_doc_uuid}"
try:
resp = http_client.delete(delete_url)
resp.raise_for_status()
total_deleted += 1
except Exception as e:
print(f"Failed to delete chunk {vespa_doc_uuid}: {e}")
if not continuation:
break
def update_document_id_in_vespa(
index_name: str, old_doc_id: str, new_doc_id: str
) -> None:
"""Update all chunks' document_id field from *old_doc_id* to *new_doc_id* using continuation paging."""
clean_new_doc_id = replace_invalid_doc_id_characters(new_doc_id)
# Use exact match instead of contains - Document Selector Language doesn't support contains
selection = f'{index_name}.document_id=="{old_doc_id}"'
with get_vespa_http_client() as http_client:
continuation: str | None = None
while True:
# print(f"Visiting chunks for document {old_doc_id} -> {new_doc_id}")
docs, continuation = _visit_chunks(
http_client=http_client,
index_name=index_name,
selection=selection,
continuation=continuation,
)
if not docs:
break
for doc in docs:
vespa_full_id = doc.get("id")
if not vespa_full_id:
continue
vespa_doc_uuid = vespa_full_id.split("::")[-1]
vespa_url = f"{DOCUMENT_ID_ENDPOINT.format(index_name=index_name)}/{vespa_doc_uuid}"
update_request = {
"fields": {"document_id": {"assign": clean_new_doc_id}}
}
try:
resp = http_client.put(vespa_url, json=update_request)
resp.raise_for_status()
except Exception as e:
print(f"Failed to update chunk {vespa_doc_uuid}: {e}")
raise
if not continuation:
break
def delete_document_from_db(current_doc_id: str, index_name: str) -> None:
# Delete all foreign key references first, then delete the document
try:
bind = op.get_bind()
# Delete from agent-related tables first (order matters due to foreign keys)
# Delete from agent__sub_query__search_doc first since it references search_doc
bind.execute(
sa.text(
"""
DELETE FROM agent__sub_query__search_doc
WHERE search_doc_id IN (
SELECT id FROM search_doc WHERE document_id = :doc_id
)
"""
),
{"doc_id": current_doc_id},
)
# Delete from chat_message__search_doc
bind.execute(
sa.text(
"""
DELETE FROM chat_message__search_doc
WHERE search_doc_id IN (
SELECT id FROM search_doc WHERE document_id = :doc_id
)
"""
),
{"doc_id": current_doc_id},
)
# Now we can safely delete from search_doc
bind.execute(
sa.text("DELETE FROM search_doc WHERE document_id = :doc_id"),
{"doc_id": current_doc_id},
)
# Delete from document_by_connector_credential_pair
bind.execute(
sa.text(
"DELETE FROM document_by_connector_credential_pair WHERE id = :doc_id"
),
{"doc_id": current_doc_id},
)
# Delete from other tables that reference this document
bind.execute(
sa.text(
"DELETE FROM document_retrieval_feedback WHERE document_id = :doc_id"
),
{"doc_id": current_doc_id},
)
bind.execute(
sa.text("DELETE FROM document__tag WHERE document_id = :doc_id"),
{"doc_id": current_doc_id},
)
bind.execute(
sa.text("DELETE FROM user_file WHERE document_id = :doc_id"),
{"doc_id": current_doc_id},
)
# Delete from KG tables if they exist
try:
bind.execute(
sa.text("DELETE FROM kg_entity WHERE document_id = :doc_id"),
{"doc_id": current_doc_id},
)
bind.execute(
sa.text(
"DELETE FROM kg_entity_extraction_staging WHERE document_id = :doc_id"
),
{"doc_id": current_doc_id},
)
bind.execute(
sa.text("DELETE FROM kg_relationship WHERE source_document = :doc_id"),
{"doc_id": current_doc_id},
)
bind.execute(
sa.text(
"DELETE FROM kg_relationship_extraction_staging WHERE source_document = :doc_id"
),
{"doc_id": current_doc_id},
)
bind.execute(
sa.text("DELETE FROM chunk_stats WHERE document_id = :doc_id"),
{"doc_id": current_doc_id},
)
bind.execute(
sa.text("DELETE FROM chunk_stats WHERE id LIKE :doc_id_pattern"),
{"doc_id_pattern": f"{current_doc_id}__%"},
)
except Exception as e:
logger.warning(
f"Some KG/chunk tables may not exist or failed to delete from: {e}"
)
# Finally delete the document itself
bind.execute(
sa.text("DELETE FROM document WHERE id = :doc_id"),
{"doc_id": current_doc_id},
)
# Delete chunks from vespa
delete_document_chunks_from_vespa(index_name, current_doc_id)
except Exception as e:
print(f"Failed to delete duplicate document {current_doc_id}: {e}")
# Continue with other documents instead of failing the entire migration
def upgrade() -> None:
if SKIP_CANON_DRIVE_IDS:
return
current_search_settings, future_search_settings = active_search_settings()
document_index = get_default_document_index(
current_search_settings,
future_search_settings,
)
# Get the index name
if hasattr(document_index, "index_name"):
index_name = document_index.index_name
else:
# Default index name if we can't get it from the document_index
index_name = "danswer_index"
# Get all Google Drive documents from the database (this is faster and more reliable)
gdrive_documents = get_google_drive_documents_from_database()
if not gdrive_documents:
return
# Track normalized document IDs to detect duplicates
all_normalized_doc_ids = set()
updated_count = 0
for doc_info in gdrive_documents:
current_doc_id = doc_info["document_id"]
normalized_doc_id = normalize_google_drive_url(current_doc_id)
print(f"Processing document {current_doc_id} -> {normalized_doc_id}")
# Check for duplicates
if normalized_doc_id in all_normalized_doc_ids:
# print(f"Deleting duplicate document {current_doc_id}")
delete_document_from_db(current_doc_id, index_name)
continue
all_normalized_doc_ids.add(normalized_doc_id)
# If the document ID already doesn't have query parameters, skip it
if current_doc_id == normalized_doc_id:
# print(f"Skipping document {current_doc_id} -> {normalized_doc_id} because it already has no query parameters")
continue
try:
# Update both database and Vespa in order
# Database first to ensure consistency
update_document_id_in_database(
current_doc_id, normalized_doc_id, index_name
)
# For Vespa, we can now use the original document IDs since we're using contains matching
update_document_id_in_vespa(index_name, current_doc_id, normalized_doc_id)
updated_count += 1
# print(f"Finished updating document {current_doc_id} -> {normalized_doc_id}")
except Exception as e:
print(f"Failed to update document {current_doc_id}: {e}")
if isinstance(e, HTTPStatusError):
print(f"HTTPStatusError: {e}")
print(f"Response: {e.response.text}")
print(f"Status: {e.response.status_code}")
print(f"Headers: {e.response.headers}")
print(f"Request: {e.request.url}")
print(f"Request headers: {e.request.headers}")
# Note: Rollback is complex with copy-and-swap approach since the old document is already deleted
# In case of failure, manual intervention may be required
# Continue with other documents instead of failing the entire migration
continue
logger.info(f"Migration complete. Updated {updated_count} Google Drive documents")
def downgrade() -> None:
# this is a one way migration, so no downgrade.
# It wouldn't make sense to store the extra query parameters
# and duplicate documents to allow a reversal.
pass

View File

@@ -5,6 +5,7 @@ Revises: 77d07dffae64
Create Date: 2023-11-11 20:51:24.228999
"""
from alembic import op
import sqlalchemy as sa

View File

@@ -0,0 +1,261 @@
"""Migration 3: User file relationship migration
Revision ID: 16c37a30adf2
Revises: 0cd424f32b1d
Create Date: 2025-09-22 09:47:34.175596
This migration converts folder-based relationships to project-based relationships.
It migrates persona__user_folder to persona__user_file and populates project__user_file.
"""
from alembic import op
import sqlalchemy as sa
from sqlalchemy import text
import logging
logger = logging.getLogger("alembic.runtime.migration")
# revision identifiers, used by Alembic.
revision = "16c37a30adf2"
down_revision = "0cd424f32b1d"
branch_labels = None
depends_on = None
def upgrade() -> None:
"""Migrate folder-based relationships to project-based relationships."""
bind = op.get_bind()
inspector = sa.inspect(bind)
# === Step 1: Migrate persona__user_folder to persona__user_file ===
table_names = inspector.get_table_names()
if "persona__user_folder" in table_names and "user_file" in table_names:
user_file_columns = [col["name"] for col in inspector.get_columns("user_file")]
has_new_id = "new_id" in user_file_columns
if has_new_id and "folder_id" in user_file_columns:
logger.info(
"Migrating persona__user_folder relationships to persona__user_file..."
)
# Count relationships to migrate (asyncpg-compatible)
count_query = text(
"""
SELECT COUNT(*)
FROM (
SELECT DISTINCT puf.persona_id, uf.id
FROM persona__user_folder puf
JOIN user_file uf ON uf.folder_id = puf.user_folder_id
WHERE NOT EXISTS (
SELECT 1
FROM persona__user_file p2
WHERE p2.persona_id = puf.persona_id
AND p2.user_file_id = uf.id
)
) AS distinct_pairs
"""
)
to_migrate = bind.execute(count_query).scalar_one()
if to_migrate > 0:
logger.info(f"Creating {to_migrate} persona-file relationships...")
# Migrate in batches to avoid memory issues
batch_size = 10000
total_inserted = 0
while True:
# Insert batch directly using subquery (asyncpg compatible)
result = bind.execute(
text(
"""
INSERT INTO persona__user_file (persona_id, user_file_id, user_file_id_uuid)
SELECT DISTINCT puf.persona_id, uf.id as file_id, uf.new_id
FROM persona__user_folder puf
JOIN user_file uf ON uf.folder_id = puf.user_folder_id
WHERE NOT EXISTS (
SELECT 1
FROM persona__user_file p2
WHERE p2.persona_id = puf.persona_id
AND p2.user_file_id = uf.id
)
LIMIT :batch_size
"""
),
{"batch_size": batch_size},
)
inserted = result.rowcount
total_inserted += inserted
if inserted < batch_size:
break
logger.info(
f" Migrated {total_inserted}/{to_migrate} relationships..."
)
logger.info(
f"Created {total_inserted} persona__user_file relationships"
)
# === Step 2: Add foreign key for chat_session.project_id ===
chat_session_fks = inspector.get_foreign_keys("chat_session")
fk_exists = any(
fk["name"] == "fk_chat_session_project_id" for fk in chat_session_fks
)
if not fk_exists:
logger.info("Adding foreign key constraint for chat_session.project_id...")
op.create_foreign_key(
"fk_chat_session_project_id",
"chat_session",
"user_project",
["project_id"],
["id"],
)
logger.info("Added foreign key constraint")
# === Step 3: Populate project__user_file from user_file.folder_id ===
user_file_columns = [col["name"] for col in inspector.get_columns("user_file")]
has_new_id = "new_id" in user_file_columns
if has_new_id and "folder_id" in user_file_columns:
logger.info("Populating project__user_file from folder relationships...")
# Count relationships to create
count_query = text(
"""
SELECT COUNT(*)
FROM user_file uf
WHERE uf.folder_id IS NOT NULL
AND NOT EXISTS (
SELECT 1
FROM project__user_file puf
WHERE puf.project_id = uf.folder_id
AND puf.user_file_id = uf.new_id
)
"""
)
to_create = bind.execute(count_query).scalar_one()
if to_create > 0:
logger.info(f"Creating {to_create} project-file relationships...")
# Insert in batches
batch_size = 10000
total_inserted = 0
while True:
result = bind.execute(
text(
"""
INSERT INTO project__user_file (project_id, user_file_id)
SELECT uf.folder_id, uf.new_id
FROM user_file uf
WHERE uf.folder_id IS NOT NULL
AND NOT EXISTS (
SELECT 1
FROM project__user_file puf
WHERE puf.project_id = uf.folder_id
AND puf.user_file_id = uf.new_id
)
LIMIT :batch_size
ON CONFLICT (project_id, user_file_id) DO NOTHING
"""
),
{"batch_size": batch_size},
)
inserted = result.rowcount
total_inserted += inserted
if inserted < batch_size:
break
logger.info(f" Created {total_inserted}/{to_create} relationships...")
logger.info(f"Created {total_inserted} project__user_file relationships")
# === Step 4: Create index on chat_session.project_id ===
try:
indexes = [ix.get("name") for ix in inspector.get_indexes("chat_session")]
except Exception:
indexes = []
if "ix_chat_session_project_id" not in indexes:
logger.info("Creating index on chat_session.project_id...")
op.create_index(
"ix_chat_session_project_id", "chat_session", ["project_id"], unique=False
)
logger.info("Created index")
logger.info("Migration 3 (relationship migration) completed successfully")
def downgrade() -> None:
"""Remove migrated relationships and constraints."""
bind = op.get_bind()
inspector = sa.inspect(bind)
logger.info("Starting downgrade of relationship migration...")
# Drop index on chat_session.project_id
try:
indexes = [ix.get("name") for ix in inspector.get_indexes("chat_session")]
if "ix_chat_session_project_id" in indexes:
op.drop_index("ix_chat_session_project_id", "chat_session")
logger.info("Dropped index on chat_session.project_id")
except Exception:
pass
# Drop foreign key constraint
try:
chat_session_fks = inspector.get_foreign_keys("chat_session")
fk_exists = any(
fk["name"] == "fk_chat_session_project_id" for fk in chat_session_fks
)
if fk_exists:
op.drop_constraint(
"fk_chat_session_project_id", "chat_session", type_="foreignkey"
)
logger.info("Dropped foreign key constraint on chat_session.project_id")
except Exception:
pass
# Clear project__user_file relationships (but keep the table for migration 1 to handle)
if "project__user_file" in inspector.get_table_names():
result = bind.execute(text("DELETE FROM project__user_file"))
logger.info(f"Cleared {result.rowcount} records from project__user_file")
# Remove migrated persona__user_file relationships
# Only remove those that came from folder relationships
if all(
table in inspector.get_table_names()
for table in ["persona__user_file", "persona__user_folder", "user_file"]
):
user_file_columns = [col["name"] for col in inspector.get_columns("user_file")]
if "folder_id" in user_file_columns:
result = bind.execute(
text(
"""
DELETE FROM persona__user_file puf
WHERE EXISTS (
SELECT 1
FROM user_file uf
JOIN persona__user_folder puf2
ON puf2.user_folder_id = uf.folder_id
WHERE puf.persona_id = puf2.persona_id
AND puf.user_file_id = uf.id
)
"""
)
)
logger.info(
f"Removed {result.rowcount} migrated persona__user_file relationships"
)
logger.info("Downgrade completed successfully")

View File

@@ -5,6 +5,7 @@ Revises: e50154680a5c
Create Date: 2024-03-19 15:30:44.425436
"""
from alembic import op
import sqlalchemy as sa
from sqlalchemy.dialects import postgresql

View File

@@ -5,6 +5,7 @@ Revises: 4ee1287bd26a
Create Date: 2024-11-21 11:49:04.488677
"""
from alembic import op
import sqlalchemy as sa
from sqlalchemy.dialects import postgresql

View File

@@ -0,0 +1,28 @@
"""Add indexes to document__tag
Revision ID: 1a03d2c2856b
Revises: 9c00a2bccb83
Create Date: 2025-02-18 10:45:13.957807
"""
from alembic import op
# revision identifiers, used by Alembic.
revision = "1a03d2c2856b"
down_revision = "9c00a2bccb83"
branch_labels: None = None
depends_on: None = None
def upgrade() -> None:
op.create_index(
op.f("ix_document__tag_tag_id"),
"document__tag",
["tag_id"],
unique=False,
)
def downgrade() -> None:
op.drop_index(op.f("ix_document__tag_tag_id"), table_name="document__tag")

View File

@@ -5,6 +5,7 @@ Revises: 6756efa39ada
Create Date: 2024-10-15 19:26:44.071259
"""
from alembic import op
import sqlalchemy as sa
from sqlalchemy.dialects import postgresql

View File

@@ -5,6 +5,7 @@ Revises: 35e6853a51d5
Create Date: 2024-09-18 11:48:59.418726
"""
from alembic import op

View File

@@ -0,0 +1,89 @@
"""add internet search and content provider tables
Revision ID: 1f2a3b4c5d6e
Revises: 9drpiiw74ljy
Create Date: 2025-11-10 19:45:00.000000
"""
from alembic import op
import sqlalchemy as sa
from sqlalchemy.dialects import postgresql
# revision identifiers, used by Alembic.
revision = "1f2a3b4c5d6e"
down_revision = "9drpiiw74ljy"
branch_labels = None
depends_on = None
def upgrade() -> None:
op.create_table(
"internet_search_provider",
sa.Column("id", sa.Integer(), primary_key=True),
sa.Column("name", sa.String(), nullable=False, unique=True),
sa.Column("provider_type", sa.String(), nullable=False),
sa.Column("api_key", sa.LargeBinary(), nullable=True),
sa.Column("config", postgresql.JSONB(astext_type=sa.Text()), nullable=True),
sa.Column(
"is_active", sa.Boolean(), nullable=False, server_default=sa.text("false")
),
sa.Column(
"time_created",
sa.DateTime(timezone=True),
nullable=False,
server_default=sa.text("now()"),
),
sa.Column(
"time_updated",
sa.DateTime(timezone=True),
nullable=False,
server_default=sa.text("now()"),
),
)
op.create_index(
"ix_internet_search_provider_is_active",
"internet_search_provider",
["is_active"],
)
op.create_table(
"internet_content_provider",
sa.Column("id", sa.Integer(), primary_key=True),
sa.Column("name", sa.String(), nullable=False, unique=True),
sa.Column("provider_type", sa.String(), nullable=False),
sa.Column("api_key", sa.LargeBinary(), nullable=True),
sa.Column("config", postgresql.JSONB(astext_type=sa.Text()), nullable=True),
sa.Column(
"is_active", sa.Boolean(), nullable=False, server_default=sa.text("false")
),
sa.Column(
"time_created",
sa.DateTime(timezone=True),
nullable=False,
server_default=sa.text("now()"),
),
sa.Column(
"time_updated",
sa.DateTime(timezone=True),
nullable=False,
server_default=sa.text("now()"),
),
)
op.create_index(
"ix_internet_content_provider_is_active",
"internet_content_provider",
["is_active"],
)
def downgrade() -> None:
op.drop_index(
"ix_internet_content_provider_is_active", table_name="internet_content_provider"
)
op.drop_table("internet_content_provider")
op.drop_index(
"ix_internet_search_provider_is_active", table_name="internet_search_provider"
)
op.drop_table("internet_search_provider")

View File

@@ -5,6 +5,7 @@ Revises: 5fc1f54cc252
Create Date: 2024-08-10 11:13:36.070790
"""
from alembic import op
import sqlalchemy as sa

View File

@@ -0,0 +1,45 @@
"""Add foreign key to user__external_user_group_id
Revision ID: 238b84885828
Revises: a7688ab35c45
Create Date: 2025-05-19 17:15:33.424584
"""
from alembic import op
# revision identifiers, used by Alembic.
revision = "238b84885828"
down_revision = "a7688ab35c45"
branch_labels = None
depends_on = None
def upgrade() -> None:
# First, clean up any entries that don't have a valid cc_pair_id
op.execute(
"""
DELETE FROM user__external_user_group_id
WHERE cc_pair_id NOT IN (SELECT id FROM connector_credential_pair)
"""
)
# Add foreign key constraint with cascade delete
op.create_foreign_key(
"fk_user__external_user_group_id_cc_pair_id",
"user__external_user_group_id",
"connector_credential_pair",
["cc_pair_id"],
["id"],
ondelete="CASCADE",
)
def downgrade() -> None:
# Drop the foreign key constraint
op.drop_constraint(
"fk_user__external_user_group_id_cc_pair_id",
"user__external_user_group_id",
type_="foreignkey",
)

View File

@@ -5,6 +5,7 @@ Revises: bc9771dccadf
Create Date: 2024-06-27 16:04:51.480437
"""
from alembic import op
import sqlalchemy as sa

View File

@@ -5,6 +5,7 @@ Revises: 6d387b3196c2
Create Date: 2023-05-05 15:49:35.716016
"""
import fastapi_users_db_sqlalchemy
import sqlalchemy as sa
from alembic import op

View File

@@ -5,6 +5,7 @@ Revises: 2daa494a0851
Create Date: 2024-11-12 13:23:29.858995
"""
from alembic import op
import sqlalchemy as sa
from sqlalchemy.dialects import postgresql

View File

@@ -5,6 +5,7 @@ Revises: 2666d766cb9b
Create Date: 2023-05-24 18:45:17.244495
"""
import fastapi_users_db_sqlalchemy
import sqlalchemy as sa
from alembic import op
@@ -143,27 +144,34 @@ def upgrade() -> None:
def downgrade() -> None:
op.execute("TRUNCATE TABLE index_attempt")
op.add_column(
"index_attempt",
sa.Column("input_type", sa.VARCHAR(), autoincrement=False, nullable=False),
)
op.add_column(
"index_attempt",
sa.Column("source", sa.VARCHAR(), autoincrement=False, nullable=False),
)
op.add_column(
"index_attempt",
sa.Column(
"connector_specific_config",
postgresql.JSONB(astext_type=sa.Text()),
autoincrement=False,
nullable=False,
),
)
# Check if the constraint exists before dropping
conn = op.get_bind()
inspector = sa.inspect(conn)
existing_columns = {col["name"] for col in inspector.get_columns("index_attempt")}
if "input_type" not in existing_columns:
op.add_column(
"index_attempt",
sa.Column("input_type", sa.VARCHAR(), autoincrement=False, nullable=False),
)
if "source" not in existing_columns:
op.add_column(
"index_attempt",
sa.Column("source", sa.VARCHAR(), autoincrement=False, nullable=False),
)
if "connector_specific_config" not in existing_columns:
op.add_column(
"index_attempt",
sa.Column(
"connector_specific_config",
postgresql.JSONB(astext_type=sa.Text()),
autoincrement=False,
nullable=False,
),
)
# Check if the constraint exists before dropping
constraints = inspector.get_foreign_keys("index_attempt")
if any(
@@ -182,8 +190,12 @@ def downgrade() -> None:
"fk_index_attempt_connector_id", "index_attempt", type_="foreignkey"
)
op.drop_column("index_attempt", "credential_id")
op.drop_column("index_attempt", "connector_id")
op.drop_table("connector_credential_pair")
op.drop_table("credential")
op.drop_table("connector")
if "credential_id" in existing_columns:
op.drop_column("index_attempt", "credential_id")
if "connector_id" in existing_columns:
op.drop_column("index_attempt", "connector_id")
op.execute("DROP TABLE IF EXISTS connector_credential_pair CASCADE")
op.execute("DROP TABLE IF EXISTS credential CASCADE")
op.execute("DROP TABLE IF EXISTS connector CASCADE")

View File

@@ -5,6 +5,7 @@ Revises: c0aab6edb6dd
Create Date: 2025-01-04 11:39:43.268612
"""
from alembic import op
import sqlalchemy as sa

View File

@@ -0,0 +1,72 @@
"""add switchover_type field and remove background_reindex_enabled
Revision ID: 2acdef638fc2
Revises: a4f23d6b71c8
Create Date: 2025-01-XX XX:XX:XX.XXXXXX
"""
from alembic import op
import sqlalchemy as sa
from onyx.db.enums import SwitchoverType
# revision identifiers, used by Alembic.
revision = "2acdef638fc2"
down_revision = "a4f23d6b71c8"
branch_labels = None
depends_on = None
def upgrade() -> None:
# Add switchover_type column with default value of REINDEX
op.add_column(
"search_settings",
sa.Column(
"switchover_type",
sa.Enum(SwitchoverType, native_enum=False),
nullable=False,
server_default=SwitchoverType.REINDEX.value,
),
)
# Migrate existing data: set switchover_type based on background_reindex_enabled
# REINDEX where background_reindex_enabled=True, INSTANT where False
op.execute(
"""
UPDATE search_settings
SET switchover_type = CASE
WHEN background_reindex_enabled = true THEN 'REINDEX'
ELSE 'INSTANT'
END
"""
)
# Remove the background_reindex_enabled column (replaced by switchover_type)
op.drop_column("search_settings", "background_reindex_enabled")
def downgrade() -> None:
# Re-add the background_reindex_enabled column with default value of True
op.add_column(
"search_settings",
sa.Column(
"background_reindex_enabled",
sa.Boolean(),
nullable=False,
server_default="true",
),
)
# Set background_reindex_enabled based on switchover_type
op.execute(
"""
UPDATE search_settings
SET background_reindex_enabled = CASE
WHEN switchover_type = 'INSTANT' THEN false
ELSE true
END
"""
)
# Remove the switchover_type column
op.drop_column("search_settings", "switchover_type")

View File

@@ -0,0 +1,228 @@
"""Migration 6: User file schema cleanup
Revision ID: 2b75d0a8ffcb
Revises: 3a78dba1080a
Create Date: 2025-09-22 10:09:26.375377
This migration removes legacy columns and tables after data migration is complete.
It should only be run after verifying all data has been successfully migrated.
"""
from alembic import op
import sqlalchemy as sa
from sqlalchemy import text
import logging
import fastapi_users_db_sqlalchemy
logger = logging.getLogger("alembic.runtime.migration")
# revision identifiers, used by Alembic.
revision = "2b75d0a8ffcb"
down_revision = "3a78dba1080a"
branch_labels = None
depends_on = None
def upgrade() -> None:
"""Remove legacy columns and tables."""
bind = op.get_bind()
inspector = sa.inspect(bind)
logger.info("Starting schema cleanup...")
# === Step 1: Verify data migration is complete ===
logger.info("Verifying data migration completion...")
# Check if any chat sessions still have folder_id references
chat_session_columns = [
col["name"] for col in inspector.get_columns("chat_session")
]
if "folder_id" in chat_session_columns:
orphaned_count = bind.execute(
text(
"""
SELECT COUNT(*) FROM chat_session
WHERE folder_id IS NOT NULL AND project_id IS NULL
"""
)
).scalar_one()
if orphaned_count > 0:
logger.warning(
f"WARNING: {orphaned_count} chat_session records still have "
f"folder_id without project_id. Proceeding anyway."
)
# === Step 2: Drop chat_session.folder_id ===
if "folder_id" in chat_session_columns:
logger.info("Dropping chat_session.folder_id...")
# Drop foreign key constraint first
op.execute(
"ALTER TABLE chat_session DROP CONSTRAINT IF EXISTS chat_session_chat_folder_fk"
)
op.execute(
"ALTER TABLE chat_session DROP CONSTRAINT IF EXISTS chat_session_folder_fk"
)
# Drop the column
op.drop_column("chat_session", "folder_id")
logger.info("Dropped chat_session.folder_id")
# === Step 3: Drop persona__user_folder table ===
if "persona__user_folder" in inspector.get_table_names():
logger.info("Dropping persona__user_folder table...")
# Check for any remaining data
remaining = bind.execute(
text("SELECT COUNT(*) FROM persona__user_folder")
).scalar_one()
if remaining > 0:
logger.warning(
f"WARNING: Dropping persona__user_folder with {remaining} records"
)
op.drop_table("persona__user_folder")
logger.info("Dropped persona__user_folder table")
# === Step 4: Drop chat_folder table ===
if "chat_folder" in inspector.get_table_names():
logger.info("Dropping chat_folder table...")
# Check for any remaining data
remaining = bind.execute(text("SELECT COUNT(*) FROM chat_folder")).scalar_one()
if remaining > 0:
logger.warning(f"WARNING: Dropping chat_folder with {remaining} records")
op.drop_table("chat_folder")
logger.info("Dropped chat_folder table")
# === Step 5: Drop user_file legacy columns ===
user_file_columns = [col["name"] for col in inspector.get_columns("user_file")]
# Drop folder_id
if "folder_id" in user_file_columns:
logger.info("Dropping user_file.folder_id...")
op.drop_column("user_file", "folder_id")
logger.info("Dropped user_file.folder_id")
# Drop cc_pair_id (already handled in migration 5, but be sure)
if "cc_pair_id" in user_file_columns:
logger.info("Dropping user_file.cc_pair_id...")
# Drop any remaining foreign key constraints
bind.execute(
text(
"""
DO $$
DECLARE r RECORD;
BEGIN
FOR r IN (
SELECT conname
FROM pg_constraint c
JOIN pg_class t ON c.conrelid = t.oid
WHERE c.contype = 'f'
AND t.relname = 'user_file'
AND EXISTS (
SELECT 1 FROM pg_attribute a
WHERE a.attrelid = t.oid
AND a.attname = 'cc_pair_id'
)
) LOOP
EXECUTE format('ALTER TABLE user_file DROP CONSTRAINT IF EXISTS %I', r.conname);
END LOOP;
END$$;
"""
)
)
op.drop_column("user_file", "cc_pair_id")
logger.info("Dropped user_file.cc_pair_id")
# === Step 6: Clean up any remaining constraints ===
logger.info("Cleaning up remaining constraints...")
# Drop any unique constraints on removed columns
op.execute(
"ALTER TABLE user_file DROP CONSTRAINT IF EXISTS user_file_cc_pair_id_key"
)
logger.info("Migration 6 (schema cleanup) completed successfully")
logger.info("Legacy schema has been fully removed")
def downgrade() -> None:
"""Recreate dropped columns and tables (structure only, no data)."""
bind = op.get_bind()
inspector = sa.inspect(bind)
logger.warning("Downgrading schema cleanup - recreating structure only, no data!")
# Recreate user_file columns
if "user_file" in inspector.get_table_names():
columns = [col["name"] for col in inspector.get_columns("user_file")]
if "cc_pair_id" not in columns:
op.add_column(
"user_file", sa.Column("cc_pair_id", sa.Integer(), nullable=True)
)
if "folder_id" not in columns:
op.add_column(
"user_file", sa.Column("folder_id", sa.Integer(), nullable=True)
)
# Recreate persona__user_folder table
if "persona__user_folder" not in inspector.get_table_names():
op.create_table(
"persona__user_folder",
sa.Column("persona_id", sa.Integer(), nullable=False),
sa.Column("user_folder_id", sa.Integer(), nullable=False),
sa.PrimaryKeyConstraint("persona_id", "user_folder_id"),
sa.ForeignKeyConstraint(["persona_id"], ["persona.id"]),
sa.ForeignKeyConstraint(["user_folder_id"], ["user_project.id"]),
)
# Recreate chat_folder table and related structures
if "chat_folder" not in inspector.get_table_names():
op.create_table(
"chat_folder",
sa.Column("id", sa.Integer(), nullable=False),
sa.Column(
"user_id",
fastapi_users_db_sqlalchemy.generics.GUID(),
nullable=True,
),
sa.Column("name", sa.String(), nullable=True),
sa.Column("display_priority", sa.Integer(), nullable=False),
sa.ForeignKeyConstraint(
["user_id"],
["user.id"],
name="chat_folder_user_id_fkey",
),
sa.PrimaryKeyConstraint("id"),
)
# Add folder_id back to chat_session
if "chat_session" in inspector.get_table_names():
columns = [col["name"] for col in inspector.get_columns("chat_session")]
if "folder_id" not in columns:
op.add_column(
"chat_session", sa.Column("folder_id", sa.Integer(), nullable=True)
)
# Add foreign key if chat_folder exists
if "chat_folder" in inspector.get_table_names():
op.create_foreign_key(
"chat_session_chat_folder_fk",
"chat_session",
"chat_folder",
["folder_id"],
["id"],
)
logger.info("Downgrade completed - structure recreated but data is lost")

View File

@@ -0,0 +1,33 @@
"""set built in to default
Revision ID: 2cdeff6d8c93
Revises: f5437cc136c5
Create Date: 2025-02-11 14:57:51.308775
"""
from alembic import op
# revision identifiers, used by Alembic.
revision = "2cdeff6d8c93"
down_revision = "f5437cc136c5"
branch_labels = None
depends_on = None
def upgrade() -> None:
# Prior to this migration / point in the codebase history,
# built in personas were implicitly treated as default personas (with no option to change this)
# This migration makes that explicit
op.execute(
"""
UPDATE persona
SET is_default_persona = TRUE
WHERE builtin_persona = TRUE
"""
)
def downgrade() -> None:
pass

View File

@@ -5,6 +5,7 @@ Revises: 4b08d97e175a
Create Date: 2024-08-21 19:15:15.762948
"""
from alembic import op
import sqlalchemy as sa

View File

@@ -5,6 +5,7 @@ Revises: c0fd6e4da83a
Create Date: 2024-11-11 10:57:22.991157
"""
from alembic import op
import sqlalchemy as sa

View File

@@ -5,6 +5,7 @@ Revises: 33ea50e88f24
Create Date: 2025-01-31 10:30:27.289646
"""
from alembic import op
import sqlalchemy as sa

View File

@@ -0,0 +1,115 @@
"""add_indexing_coordination
Revision ID: 2f95e36923e6
Revises: 0816326d83aa
Create Date: 2025-07-10 16:17:57.762182
"""
from alembic import op
import sqlalchemy as sa
# revision identifiers, used by Alembic.
revision = "2f95e36923e6"
down_revision = "0816326d83aa"
branch_labels = None
depends_on = None
def upgrade() -> None:
# Add database-based coordination fields (replacing Redis fencing)
op.add_column(
"index_attempt", sa.Column("celery_task_id", sa.String(), nullable=True)
)
op.add_column(
"index_attempt",
sa.Column(
"cancellation_requested",
sa.Boolean(),
nullable=False,
server_default="false",
),
)
# Add batch coordination fields (replacing FileStore state)
op.add_column(
"index_attempt", sa.Column("total_batches", sa.Integer(), nullable=True)
)
op.add_column(
"index_attempt",
sa.Column(
"completed_batches", sa.Integer(), nullable=False, server_default="0"
),
)
op.add_column(
"index_attempt",
sa.Column(
"total_failures_batch_level",
sa.Integer(),
nullable=False,
server_default="0",
),
)
op.add_column(
"index_attempt",
sa.Column("total_chunks", sa.Integer(), nullable=False, server_default="0"),
)
# Progress tracking for stall detection
op.add_column(
"index_attempt",
sa.Column("last_progress_time", sa.DateTime(timezone=True), nullable=True),
)
op.add_column(
"index_attempt",
sa.Column(
"last_batches_completed_count",
sa.Integer(),
nullable=False,
server_default="0",
),
)
# Heartbeat tracking for worker liveness detection
op.add_column(
"index_attempt",
sa.Column(
"heartbeat_counter", sa.Integer(), nullable=False, server_default="0"
),
)
op.add_column(
"index_attempt",
sa.Column(
"last_heartbeat_value", sa.Integer(), nullable=False, server_default="0"
),
)
op.add_column(
"index_attempt",
sa.Column("last_heartbeat_time", sa.DateTime(timezone=True), nullable=True),
)
# Add index for coordination queries
op.create_index(
"ix_index_attempt_active_coordination",
"index_attempt",
["connector_credential_pair_id", "search_settings_id", "status"],
)
def downgrade() -> None:
# Remove the new index
op.drop_index("ix_index_attempt_active_coordination", table_name="index_attempt")
# Remove the new columns
op.drop_column("index_attempt", "last_batches_completed_count")
op.drop_column("index_attempt", "last_progress_time")
op.drop_column("index_attempt", "last_heartbeat_time")
op.drop_column("index_attempt", "last_heartbeat_value")
op.drop_column("index_attempt", "heartbeat_counter")
op.drop_column("index_attempt", "total_chunks")
op.drop_column("index_attempt", "total_failures_batch_level")
op.drop_column("index_attempt", "completed_batches")
op.drop_column("index_attempt", "total_batches")
op.drop_column("index_attempt", "cancellation_requested")
op.drop_column("index_attempt", "celery_task_id")

View File

@@ -5,6 +5,7 @@ Revises: 7f99be1cb9f5
Create Date: 2023-10-16 23:21:01.283424
"""
from alembic import op
import sqlalchemy as sa

View File

@@ -5,6 +5,7 @@ Revises: 91ffac7e65b3
Create Date: 2024-07-24 21:29:31.784562
"""
import random
from alembic import op
import sqlalchemy as sa

View File

@@ -5,6 +5,7 @@ Revises: 5b29123cd710
Create Date: 2024-11-01 12:51:01.535003
"""
from alembic import op
import sqlalchemy as sa

View File

@@ -5,6 +5,7 @@ Revises: a6df6b88ef81
Create Date: 2025-01-29 10:54:22.141765
"""
from alembic import op

View File

@@ -5,6 +5,7 @@ Revises: ee3f4b47fad5
Create Date: 2024-08-15 22:37:08.397052
"""
from alembic import op
import sqlalchemy as sa

View File

@@ -5,6 +5,7 @@ Revises: 91a0a4d62b14
Create Date: 2024-09-20 21:24:04.891018
"""
from alembic import op

View File

@@ -5,6 +5,7 @@ Revises: c99d76fcd298
Create Date: 2024-09-13 13:20:32.885317
"""
from alembic import op
import sqlalchemy as sa
from sqlalchemy.dialects import postgresql

View File

@@ -5,6 +5,7 @@ Revises: 2955778aa44c
Create Date: 2025-01-08 15:38:17.224380
"""
from alembic import op
from sqlalchemy import text

View File

@@ -0,0 +1,136 @@
"""update_kg_trigger_functions
Revision ID: 36e9220ab794
Revises: c9e2cd766c29
Create Date: 2025-06-22 17:33:25.833733
"""
from alembic import op
from sqlalchemy.orm import Session
from sqlalchemy import text
from shared_configs.configs import POSTGRES_DEFAULT_SCHEMA
# revision identifiers, used by Alembic.
revision = "36e9220ab794"
down_revision = "c9e2cd766c29"
branch_labels = None
depends_on = None
def _get_tenant_contextvar(session: Session) -> str:
"""Get the current schema for the migration"""
current_tenant = session.execute(text("SELECT current_schema()")).scalar()
if isinstance(current_tenant, str):
return current_tenant
else:
raise ValueError("Current tenant is not a string")
def upgrade() -> None:
bind = op.get_bind()
session = Session(bind=bind)
# Create kg_entity trigger to update kg_entity.name and its trigrams
tenant_id = _get_tenant_contextvar(session)
alphanum_pattern = r"[^a-z0-9]+"
truncate_length = 1000
function = "update_kg_entity_name"
op.execute(
text(
f"""
CREATE OR REPLACE FUNCTION "{tenant_id}".{function}()
RETURNS TRIGGER AS $$
DECLARE
name text;
cleaned_name text;
BEGIN
-- Set name to semantic_id if document_id is not NULL
IF NEW.document_id IS NOT NULL THEN
SELECT lower(semantic_id) INTO name
FROM "{tenant_id}".document
WHERE id = NEW.document_id;
ELSE
name = lower(NEW.name);
END IF;
-- Clean name and truncate if too long
cleaned_name = regexp_replace(
name,
'{alphanum_pattern}', '', 'g'
);
IF length(cleaned_name) > {truncate_length} THEN
cleaned_name = left(cleaned_name, {truncate_length});
END IF;
-- Set name and name trigrams
NEW.name = name;
NEW.name_trigrams = {POSTGRES_DEFAULT_SCHEMA}.show_trgm(cleaned_name);
RETURN NEW;
END;
$$ LANGUAGE plpgsql;
"""
)
)
trigger = f"{function}_trigger"
op.execute(f'DROP TRIGGER IF EXISTS {trigger} ON "{tenant_id}".kg_entity')
op.execute(
f"""
CREATE TRIGGER {trigger}
BEFORE INSERT OR UPDATE OF name
ON "{tenant_id}".kg_entity
FOR EACH ROW
EXECUTE FUNCTION "{tenant_id}".{function}();
"""
)
# Create kg_entity trigger to update kg_entity.name and its trigrams
function = "update_kg_entity_name_from_doc"
op.execute(
text(
f"""
CREATE OR REPLACE FUNCTION "{tenant_id}".{function}()
RETURNS TRIGGER AS $$
DECLARE
doc_name text;
cleaned_name text;
BEGIN
doc_name = lower(NEW.semantic_id);
-- Clean name and truncate if too long
cleaned_name = regexp_replace(
doc_name,
'{alphanum_pattern}', '', 'g'
);
IF length(cleaned_name) > {truncate_length} THEN
cleaned_name = left(cleaned_name, {truncate_length});
END IF;
-- Set name and name trigrams for all entities referencing this document
UPDATE "{tenant_id}".kg_entity
SET
name = doc_name,
name_trigrams = {POSTGRES_DEFAULT_SCHEMA}.show_trgm(cleaned_name)
WHERE document_id = NEW.id;
RETURN NEW;
END;
$$ LANGUAGE plpgsql;
"""
)
)
trigger = f"{function}_trigger"
op.execute(f'DROP TRIGGER IF EXISTS {trigger} ON "{tenant_id}".document')
op.execute(
f"""
CREATE TRIGGER {trigger}
AFTER UPDATE OF semantic_id
ON "{tenant_id}".document
FOR EACH ROW
EXECUTE FUNCTION "{tenant_id}".{function}();
"""
)
def downgrade() -> None:
pass

View File

@@ -0,0 +1,52 @@
"""add chunk stats table
Revision ID: 3781a5eb12cb
Revises: df46c75b714e
Create Date: 2025-03-10 10:02:30.586666
"""
from alembic import op
import sqlalchemy as sa
# revision identifiers, used by Alembic.
revision = "3781a5eb12cb"
down_revision = "df46c75b714e"
branch_labels = None
depends_on = None
def upgrade() -> None:
op.create_table(
"chunk_stats",
sa.Column("id", sa.String(), primary_key=True, index=True),
sa.Column(
"document_id",
sa.String(),
sa.ForeignKey("document.id"),
nullable=False,
index=True,
),
sa.Column("chunk_in_doc_id", sa.Integer(), nullable=False),
sa.Column("information_content_boost", sa.Float(), nullable=True),
sa.Column(
"last_modified",
sa.DateTime(timezone=True),
nullable=False,
index=True,
server_default=sa.func.now(),
),
sa.Column("last_synced", sa.DateTime(timezone=True), nullable=True, index=True),
sa.UniqueConstraint(
"document_id", "chunk_in_doc_id", name="uq_chunk_stats_doc_chunk"
),
)
op.create_index(
"ix_chunk_sync_status", "chunk_stats", ["last_modified", "last_synced"]
)
def downgrade() -> None:
op.drop_index("ix_chunk_sync_status", table_name="chunk_stats")
op.drop_table("chunk_stats")

Some files were not shown because too many files have changed in this diff Show More