Compare commits

..

191 Commits

Author SHA1 Message Date
Richard Kuo (Onyx)
bda264f1d6 set some default resources 2025-05-13 13:45:41 -07:00
Richard Kuo (Onyx)
daa1746b4a just readme fixes 2025-05-13 09:56:07 -07:00
rkuo-danswer
d8068f0a68 Feature/helm separate workers (#4679)
* add test

* try breaking out background workers

* fix helm lint complaints

* rename disabled files more

* try different folder structure

* fix beat selector

* vespa setup should break on success

* improved instructions for basic helm chart testing

---------

Co-authored-by: Richard Kuo (Onyx) <rkuo@onyx.app>
2025-05-13 02:23:32 +00:00
Chris Weaver
d91f776c2d Fix initial checkpoint save (#4697)
* Fix initial checkpoint save

* Improve comment

* Another small fix
2025-05-13 01:59:07 +00:00
Chris Weaver
a01135581f Small GitHub enhancements (#4696)
* Small github enhancements

* Fix manual run

* Address EL comments
2025-05-13 01:14:16 +00:00
rkuo-danswer
392b87fb4f Bugfix/limit permission size (#4695)
* add utility function

* add utility functions to DocExternalAccess

* refactor db access out of individual celery tasks and put it directly into the heavy task

* code review and remove leftovers

* fix circular imports

---------

Co-authored-by: Richard Kuo (Onyx) <rkuo@onyx.app>
2025-05-13 00:46:31 +00:00
Evan Lohn
551a05aef0 light worker discovers beat task (#4694)
* light worker discovers beat task

* v2: put in right place
2025-05-12 21:20:18 +00:00
rkuo-danswer
6b9d0b5af9 ensure we don't tag 'latest' with cloud images (#4688)
* ensure we don't tag 'latest' with cloud images

* add docker login to trivy

* fix tag names

* flavor latest false (no auto latest tags)

* fix typo

* only run the appropriate workflow for web

---------

Co-authored-by: Richard Kuo (Onyx) <rkuo@onyx.app>
Co-authored-by: Richard Kuo <rkuo@rkuo.com>
2025-05-12 17:23:01 +00:00
Chris Weaver
b8f3ad3e5d Fix/remove ee fe (#4690)
* Remove ee imports from FE

* Remove ee imports from FE

* Style
2025-05-12 02:31:04 +00:00
Chris Weaver
b19515e25d Fix window_start (#4689)
* Fix window_start

* Add comment
2025-05-12 00:11:20 +00:00
Chris Weaver
913f7cc7d4 Fix/remove ee from mit (#4682)
* Remove some ee imports

* more

* Remove all ee imports

* Fix

* Autodiscover

* fix

* Fix typing

* More celery task stuff

* Fix import
2025-05-11 22:09:50 +00:00
rkuo-danswer
84566debab set field size limit (#4683)
* set field size limit

* don't use sys.maxsize

---------

Co-authored-by: Richard Kuo <rkuo@rkuo.com>
Co-authored-by: Richard Kuo (Onyx) <rkuo@onyx.app>
2025-05-09 22:46:13 +00:00
rkuo-danswer
1a8b7abd00 add test (#4676)
* add test

* comment

---------

Co-authored-by: Richard Kuo (Onyx) <rkuo@onyx.app>
2025-05-09 21:38:51 +00:00
Evan Lohn
4c0423f27b fix github cursor pagination infinite loop (#4673)
* fix infinite loop

* unit test for infinite loop issue

* mypy version

* more logging

* unbound locals
2025-05-09 21:35:37 +00:00
rkuo-danswer
7965fd9cbb run testing (#4681)
* run testing

* need to break on success

* add a readme

* raise vespa to 6GB

* allow test to retry

* add 20 attempts

* put memory limits back to normal

* restore chart testing on changes only

* increase retries to 40

---------

Co-authored-by: Richard Kuo (Onyx) <rkuo@onyx.app>
2025-05-09 11:49:43 -07:00
Chris Weaver
91831f4d07 Fix user count (#4677)
* Fix user count

* Add helper + fix async function as well

* fix mypy

* Address RK comment
2025-05-08 17:19:40 -07:00
Chris Weaver
1dd98a87cc Try to reduce memory usage on group sync (#4678) 2025-05-08 22:53:53 +00:00
rkuo-danswer
0dd65cc839 enterprise settings needs to 403 on tenant id absence (#4675)
Co-authored-by: Richard Kuo (Onyx) <rkuo@onyx.app>
2025-05-08 18:32:12 +00:00
Chris Weaver
519aeb6a1f Drive perm sync enhancement (#4672)
* Enhance drive perm sync

* add tests

* more stuff

* fixes

* Fix

* Speed up

* Add missing file

* Address EL comments

* Add ondelete=CASCADE

* Improve comment
2025-05-08 03:12:41 +00:00
Evan Lohn
0eab6ab935 fix drive slowness (#4668)
* fix slowness

* no more silent failing for users

* nits

* no silly info transfer
2025-05-07 22:48:08 +00:00
Evan Lohn
ee09cb95af fixes foreign key violation (#4670)
* fixes foreign key violation

* nit
2025-05-07 18:27:32 +00:00
Evan Lohn
8a9a66947e make 404s skippable (#4671) 2025-05-07 18:04:35 +00:00
Raunak Bhagat
d744c0dab4 fix: Fix error in which channel names would not have the leading "#" removed (#4664)
* Fix failing entrypoint into slack connector

* Pre-filter channel names upon instantiation of slack connector class

* Add decrypt script

* Add slack connector tests

* Fix mypy errors on decrypt.py

* Add property to SlackConnector class

* Add some basic tests

* Move location of tests

* Change name of env token

* Add secrets for Slack

* Add more parameterized cases

* Change env variable name

* Change names

* Update channel names

* Edit tests

* Modify tests

* Only import type in __main__

* Fix tests to actually test connectors

* Pass parameter to fixture directly
2025-05-07 04:55:21 +00:00
Chris Weaver
70df685709 Non default schema fix (#4667)
* Use correct postgres schema

* Remove raw Session() use

* Refactor + add test

* Fix comment
2025-05-06 20:35:59 -07:00
Chris Weaver
f85ef78238 Add more logging for confluence perm-sync + handle case where permiss… (#4586)
* Add more logging for confluence perm-sync + handle case where permissions are removed from the access token

* Make required permissions are explicit

* more

* Add slim fetch limit + mark all cc pairs of source type as successful upon group sync

* Add to dev compose

* Small teams fix

* Add file

* Add single limit pagination for confluence

* Restrict to server only

* more logging

* cleanup

* Cleanup

* Remove CONFLUENCE_CONNECTOR_SLIM_FETCH_LIMIT

* Handle teams error

* Fix ut

* Remove db dependency from confluence_doc_sync

* move stuff back to debug
2025-05-06 18:35:14 +00:00
Evan Lohn
2d7e48d8e8 possible mangling fix (#4666)
* possible mangling fix

* fixed nextUrl setting

* global bad
2025-05-06 15:51:39 +00:00
rkuo-danswer
8231328dc6 restore caching and fix up some prefixing (#4649)
* restore caching and fix up some prefixing

* try backend matrix build and fix artifact names

* need id

* add backslashes to be consistent

* fix no-cache

* leave docker tags to the meta action

* need checkout in merge

* add comment

* move spammy logs to debug status

* bunch of no-cache updates

* prefix

---------

Co-authored-by: Richard Kuo (Onyx) <rkuo@onyx.app>
2025-05-05 16:43:29 +00:00
Chris Weaver
7763e2fa23 Fix non-default schema in KV store (#4655)
* Fix non-default schema in KV store

* Fix custom schema
2025-05-04 22:19:35 +00:00
Chris Weaver
6085bff12d Fix test / display models (#4657)
* Fix test / display models

* Address greptile comments

* Increase wait time

* Increase overall timeout

* Move stuff to utils file

* Updates
2025-05-04 14:04:03 -07:00
Weves
97d60a89ae Add LRU cache to get_model_map 2025-05-03 17:43:58 -07:00
Raunak Bhagat
79b981075e perf: Optimize query history exporting process (#4602)
* Update mode to be a default parameter in `FileStore.read`

* Move query history exporting process to be a background job instead

* Move hardcoded report-file-naming to a common utility function

* Add type annotations

* Update download component

* Implement button to re-ping and download CSV file; fix up some backend file-checking logic

* De-indent logic (w/ early return)

* Return different error codes dependings on the type of task status

* Add more resistant failure retrying mechanisms

* Remove default parameter in helper function

* Use popup for error messaging

* Update return code

* Update web/src/app/ee/admin/performance/query-history/DownloadAsCSV.tsx

Co-authored-by: greptile-apps[bot] <165735046+greptile-apps[bot]@users.noreply.github.com>

* Add type to useState call

* Update backend/ee/onyx/server/query_history/api.py

Co-authored-by: greptile-apps[bot] <165735046+greptile-apps[bot]@users.noreply.github.com>

* Update backend/onyx/file_store/file_store.py

Co-authored-by: greptile-apps[bot] <165735046+greptile-apps[bot]@users.noreply.github.com>

* Update backend/ee/onyx/background/celery/apps/primary.py

Co-authored-by: greptile-apps[bot] <165735046+greptile-apps[bot]@users.noreply.github.com>

* Move rerender call to after check

* Run formatter

* Add type conversions back (smh greptile)

* Remove duplicated call to save_file

* Move non-fallible logic out of try-except block

* Pass date-ranges into API call

* Convert to ISO strings before passing it into the API call

* Add API to list all tasks

* Create new pydantic model to represent tasks to return instead

* Change helper to only fetch query-history tasks

* Use `shared_tasks` instead of old method

* Address more comments from PR; consolidate how task name is generated

* Mark task as failed if any exception is raised

* Change the task object which is returned back to the FE

* Add a table to display previously generated query-history-csv's

* Add timestamps to task; delete tasks as soon as file finishes processing

* Raise exception if start_time is not present

* Convert hard-coded string to constant

* Add "Generated At" field to table

* Return task list in sorted order (based off of start-time)

* Implement pagination

* Remove unused props and cleanup tailwind classes

* Change the name of kickoff button

* Redesign how previous query exports are viewed

* Make button a constant width even when contents change

* Remove timezone information before comparing

* Decrease interval time for re-pinging API

* Add timezone to start-time creation

* Add a refreshInterval for getting updated task status

* Add new background queue

* Edit small verbiage and remove error popup when max-retries is hit

* Change up heavy worker to recognize new task in new module

* Ensure `celery_app` is imported

* Change how `celery_app` is imported and defined

* Update comment on why `celery_app` must be imported

* Add basic skeleton for new beat task to cleanup any dead / failed query-history-export tasks

* Move cleanup task to different worker / queue

* Implement cleanup task

* Add return type

* Address comment on PR

* Remove delimiter from prefix

* Change name of function to be more descriptive

* Remove delimiter from prefix constant

* Move function invocation closer to usage location

* Move imports to top of file

* Move variable up a scope due to undefined error

* Remove dangling if-statement

* Make function more pure-functional

* Remove redefinition

---------

Co-authored-by: greptile-apps[bot] <165735046+greptile-apps[bot]@users.noreply.github.com>
2025-05-03 00:16:35 +00:00
Evan Lohn
113876b276 id not set in checkpoint FINAL (#4656)
* it will never happen again.

* fix perm sync issue

* fix perm sync issue2

* ensure member emails map is populated

* other fix for perm sync

* address CW comments

* nit
2025-05-03 00:10:21 +00:00
rkuo-danswer
5c3820b39f Bugfix/slack timeout (#4652)
* don't log all channels

* print number of channels

* sanitize indexing exception messages

* harden vespa index swap

* use constants and fix list generation

---------

Co-authored-by: Richard Kuo (Onyx) <rkuo@onyx.app>
2025-05-02 18:24:45 +00:00
Evan Lohn
55e4465782 orphan tag cleanup optimization (#4651)
* move orphan tag cleanup to final cleanup section of associated tparent tasks

* naming
2025-05-02 17:22:59 +00:00
Evan Lohn
6d9693dc51 drive file deduping (#4648)
* drive file deduping

* switched to version that does not require thread safety

* thanks greptile

* CW comments
2025-05-02 10:58:16 -07:00
Weves
75fa10cead fix highspot 2025-05-01 14:34:35 -07:00
Richard Kuo (Onyx)
0497bfdf78 fix double entry 2025-05-01 11:26:57 -07:00
rkuo-danswer
0db2ad2132 memory optimize task generation for connector deletion (#4645)
* memory optimize task generation for connector deletion

* test

* fix up integration test docker file

* more no-cache

---------

Co-authored-by: Richard Kuo (Onyx) <rkuo@onyx.app>
2025-05-01 10:47:26 -07:00
Chris Weaver
49cd38fb2d Update README.md 2025-05-01 09:58:33 -07:00
Raunak Bhagat
bd36b2ad6d Remove cursor-help for tooltip (#4643) 2025-05-01 09:38:13 -07:00
Evan Lohn
6436b60763 github cursor pagination (#4642)
* v1 of cursor pagination

* mypy

* unit tests

* CW comments
2025-04-30 19:09:20 -07:00
Raunak Bhagat
a6cc1c84dc Add padding to bottom of pages (#4641) 2025-04-30 22:34:18 +00:00
Weves
8515f4b57a Highspot cleanup 2025-04-30 14:57:36 -07:00
joachim-danswer
f68b74ff4a disable Agent Search refinement by default (#4638)
- created env variable  AGENT_ALLOW_REFINEMENT  with default "". Must be set to true to enable Refinement.
 - added an environment variable for the upper limit of docs that can be sent to verification
2025-04-30 19:51:08 +00:00
rkuo-danswer
e254fdc066 add sendgrid as option (#4639)
* add sendgrid as option

* code review

* mypy

---------

Co-authored-by: Richard Kuo (Onyx) <rkuo@onyx.app>
2025-04-30 07:33:15 +00:00
Raunak Bhagat
f26de37878 Remove info hoverable (#4637) 2025-04-30 03:16:14 +00:00
rkuo-danswer
94de23fe87 Bugfix/chat images 2 (#4630)
* don't hardcode -1

* extra spaces

* fix binary data in blurb

* add note to binary handling

---------

Co-authored-by: Richard Kuo (Onyx) <rkuo@onyx.app>
2025-04-30 01:29:10 +00:00
Chris Weaver
dd242c9926 Fix race condition with archived channels (#4635) 2025-04-29 23:35:40 +00:00
Chris Weaver
47767c1666 Small improvements to checkpoint pickup logic (#4634) 2025-04-29 21:23:54 +00:00
Chris Weaver
9be3da2357 Fix gitlab (#4629)
* Fix gitlab

* Add back assert
2025-04-28 17:42:49 -07:00
Evan Lohn
8961a3cc72 page token for drive group sync (#4627) 2025-04-28 19:06:06 +00:00
Chris Weaver
47b9e7aa62 Fix teams (#4628)
* Fix teams

* Use get_all

* Add comment
2025-04-28 11:53:22 -07:00
Evan Lohn
eebfa5be18 Confluence server api time fix (#4589)
* tolerance of confluence api weirdness

* remove checkpointing

* remove skipping logic from checkpointing

* add back checkpointing

* switch confluence checkpointing to be based on page starts

* address CW comments and fix unit tests

* some mitigations of bad confluence api

* new checkpointing approach and testing fixes

* fix test

* CW comments
2025-04-28 06:06:29 +00:00
Chris Weaver
5047d256b4 Add support for restrictions w/o any access (#4624)
* Add support for restrictions w/o any access

* Fix
2025-04-28 03:09:36 +00:00
Evan Lohn
5db676967f no more duplicate files during folder indexing (#4579)
* no more duplicate files during folder indexing

* cleanup checkpoint after a shared folder has been finished

* cleanup

* lint
2025-04-28 01:01:20 +00:00
Chris Weaver
ea0664e203 Fix LLM API key (#4623)
* Fix LLM API key

* Remove unused import

* Update web/src/app/admin/configuration/llm/LLMProviderUpdateForm.tsx

Co-authored-by: greptile-apps[bot] <165735046+greptile-apps[bot]@users.noreply.github.com>

---------

Co-authored-by: greptile-apps[bot] <165735046+greptile-apps[bot]@users.noreply.github.com>
2025-04-27 23:10:36 +00:00
Weves
bbd0874200 fix 2025-04-27 14:37:53 -07:00
rkuo-danswer
c6d100b415 Bugfix/chat images (#4618)
* keep chatfiletype as image instead of user_knowledge

* improve continue message

* fix to image handling

* greptile code review

---------

Co-authored-by: Richard Kuo (Onyx) <rkuo@onyx.app>
2025-04-27 20:34:30 +00:00
Evan Lohn
5ca7a7def9 fix migration and add test (#4615) 2025-04-25 21:27:59 +00:00
Chris Weaver
92b5e1adf4 Add support for overriding user list (#4616)
* Add support for overriding user list

* Fix

* Add typing

* pythonify
2025-04-25 15:15:23 -07:00
Chris Weaver
23c6e0f3bf Single source of truth for image capability (#4612)
* Single source of truth for image capability

* Update web/src/app/admin/assistants/AssistantEditor.tsx

Co-authored-by: greptile-apps[bot] <165735046+greptile-apps[bot]@users.noreply.github.com>

* Fix tests

---------

Co-authored-by: greptile-apps[bot] <165735046+greptile-apps[bot]@users.noreply.github.com>
2025-04-25 20:37:16 +00:00
Chris Weaver
ad76e6ac9e Adjust confluence perm sync frequency (#4613)
* Adjust confluence perm sync frequency

* Fiux comment
2025-04-25 19:36:10 +00:00
Evan Lohn
151aabea73 specific user emails for drive connector (#4608)
* specific user emails for drive connector

* fix drive connector tests

* fix connector tests
2025-04-25 18:49:20 +00:00
Chris Weaver
d711680069 Add e2e test for assistant creation/edit (#4597)
* Add e2e test for assistant creation/edit

* Skip initial full reset to have seeded connector
2025-04-25 13:21:34 -07:00
Evan Lohn
9835d55ecb transfer old fileds to new config 2025-04-25 12:25:20 -07:00
Raunak Bhagat
69c539df6e fix: Create migration to re-introduce display_model_names (#4600)
* Fix migration

* Fix migration to take care of various nullability cases

* Address comments on PR

* Rename variables to be more descriptive

* Make helpers private

* Fix select statement

* Add comments to explain the involved logic

* Saving changes

* Finish script to revalidate `display_model_names`

* Address comments on PR by greptile

* Add missing columns

* Pull difference operator out into binding

* Add deletion prior to re-insertion

* Use map from shared llm-provider file instead

* Use helper function instead of copying code

* Remove delete and convert into an update statement

* Use pydantic for ModelConfigurations

* Update to do nothing on-conflict rather than update

* Address nits on PR

* Add default visible model(s) for bedrock

* Perform an update on conflict instead of doing nothing
2025-04-25 10:44:13 -07:00
pablonyx
df67ca18d8 My docs cleanup (#4519)
* update

* improved my docs

* nit

* nit

* k

* push changes

* update

* looking good

* k

* fix preprocessing

* try a fix

* k

* update

* nit

* k

* quick nits

* Cleanup / fixes

* Fixes

* Fix build

* fix

* fix quality checks

---------

Co-authored-by: Weves <chrisweaver101@gmail.com>
2025-04-25 05:20:33 +00:00
Chris Weaver
115cfb6ae9 Fix tool choice (#4596)
* Fix tool choice

* fix
2025-04-24 21:51:14 -07:00
rkuo-danswer
672f3a1c34 fix provisioning and don't spawn tasks which could result in a race condition (#4604)
Co-authored-by: Richard Kuo (Onyx) <rkuo@onyx.app>
2025-04-25 02:41:05 +00:00
Raunak Bhagat
13b71f559f fix: Fix migration issue in which display-model-names were not being appropriately set (#4594)
* Fix migration

* Fix migration to take care of various nullability cases

* Address comments on PR

* Rename variables to be more descriptive

* Make helpers private

* Fix select statement

* Add comments to explain the involved logic

* Add helpers for viewing visible model names

* Fix logic for missing model + display-model names in migration
2025-04-24 21:26:33 +00:00
Evan Lohn
2981b7a425 linear dupe docs fix (#4607) 2025-04-24 21:00:21 +00:00
rkuo-danswer
37adf31a3b fix priority on vespa metadata sync (#4603)
Co-authored-by: Richard Kuo (Onyx) <rkuo@onyx.app>
2025-04-24 19:50:49 +00:00
Raunak Bhagat
5d59850a17 Fix slack formatting bug (#4587) 2025-04-24 17:18:54 +00:00
Evan Lohn
91d6b739a4 ensure drive id set in checkpoint (#4595)
* ensure drive id set in checkpoint

* asserts gone

* address CW
2025-04-24 01:20:13 +00:00
rkuo-danswer
c83ee06062 Feature/salesforce correctness 2 (#4506)
* refactor salesforce sqlite db access

* more refactoring

* refactor again

* refactor again

* rename object

* add finalizer to ensure db connection is always closed

* avoid unnecessarily nesting connections and commit regularly when possible

* remove db usage from csv download

* dead code

* hide deprecation warning in ddtrace

* remove unused param

---------

Co-authored-by: Richard Kuo (Onyx) <rkuo@onyx.app>
2025-04-24 01:05:52 +00:00
Raunak Bhagat
c93cebe1ab fix: Add minor fixes to how model configurations are displayed (#4593)
* Add minor fixes to how model configurations are interacted with

* Remove azure entry
2025-04-23 21:42:02 +00:00
rkuo-danswer
ea1d3c1eda Feature/db script (#4574)
* debug script + slight refactor of db class

* better comments

* move setup logger

---------

Co-authored-by: Richard Kuo (Onyx) <rkuo@onyx.app>
Co-authored-by: Richard Kuo <rkuo@rkuo.com>
2025-04-23 20:00:35 +00:00
rkuo-danswer
c9a609b7d8 Bugfix/slack bot channel config (#4585)
* friendlier handling of slack channel retrieval

* retry on downgrade_postgres deadlock

* fix comment

* text

---------

Co-authored-by: Richard Kuo (Onyx) <rkuo@onyx.app>
2025-04-23 20:00:03 +00:00
rkuo-danswer
07f04e35ec Bugfix/alembic sqlengine (#4592)
* need sqlengine to work

* add comments

---------

Co-authored-by: Richard Kuo (Onyx) <rkuo@onyx.app>
2025-04-23 19:21:34 +00:00
joachim-danswer
d8b050026d removal of keyword 1st phase 2025-04-22 20:29:57 -07:00
Raunak Bhagat
c76dc2ea2c fix: Fix the add_model_configuration migration by removing duplicate model-names during insertion (#4588)
* Convert the model_names and display_model_names into a set instead

* Update backend/alembic/versions/7a70b7664e37_add_model_configuration_table.py

Co-authored-by: greptile-apps[bot] <165735046+greptile-apps[bot]@users.noreply.github.com>

---------

Co-authored-by: greptile-apps[bot] <165735046+greptile-apps[bot]@users.noreply.github.com>
2025-04-22 18:59:31 -07:00
rkuo-danswer
5e11c635d9 wrong logger imported in a lot of wrong places (#4582)
Co-authored-by: Richard Kuo (Onyx) <rkuo@onyx.app>
2025-04-22 23:34:02 +00:00
joachim-danswer
669b668463 updated logging and basic search expansion procedure 2025-04-22 11:58:02 -07:00
Raunak Bhagat
85fa083717 fix: Return default value instead of throwing error (#4575)
* Return default value instead of throwing error

* Add default parameter

* Move logic around

* Use dummy value for max_input_tokens in testing flow

* Remove unnecessary assignment
2025-04-22 17:33:36 +00:00
Chris Weaver
420d2614d4 Fix assistants forms (#4578)
* Fix assistant num chunk setting

* test

* Fix test

* Update web/src/app/assistants/mine/AssistantModal.tsx

Co-authored-by: greptile-apps[bot] <165735046+greptile-apps[bot]@users.noreply.github.com>

* Update web/src/app/assistants/mine/AssistantModal.tsx

Co-authored-by: greptile-apps[bot] <165735046+greptile-apps[bot]@users.noreply.github.com>

---------

Co-authored-by: greptile-apps[bot] <165735046+greptile-apps[bot]@users.noreply.github.com>
2025-04-22 16:09:03 +00:00
Raunak Bhagat
e3218d358d feat: Add assistant name to UI (#4569)
* Add assistant name to UI

* Fix tailwind styling class
2025-04-22 09:35:35 -07:00
Weves
ae632b5fab Fix missing Connector Configuration 2025-04-21 18:50:25 -07:00
rkuo-danswer
0d4c600852 out of process retry for multitenant test reset (#4566)
* tool to generate vespa schema variations for our cloud

* extraneous assign

* use a real templating system instead of search/replace

* fix float

* maybe this should be double

* remove redundant var

* template the other files

* try a spawned process

* move the wrapper

* fix args

* increase timeout

* run multitenant reset operations out of process as well

---------

Co-authored-by: Richard Kuo (Onyx) <rkuo@onyx.app>
Co-authored-by: Richard Kuo <rkuo@rkuo.com>
2025-04-21 23:30:18 +00:00
Evan Lohn
eb569bf79d add emails to retry with on 403 (#4565)
* add emails to retry with on 403

* attempted fix for connector test

* CW comments

* connector test fix

* test fixes and continue on 403

* fix tests

* fix tests

* fix concurrency tests

* fix integration tests with llmprovider eager loading
2025-04-21 23:27:31 +00:00
Chris Weaver
f3d5303d93 Fix slack bot feedback (#4573)
* Fix slack bot feedback

* Fix

* Make safe
2025-04-21 15:54:48 -07:00
Raunak Bhagat
b97628070e feat: Add ability to specify max input token limit for custom LLM providers (#4510)
* Add multi text array field

* Add multiple values to model configuration for a custom LLM provider

* Fix reference to old field name

* Add migration

* Update all instances of model_names / display_model_names to use new schema migration

* Update background task

* Update endpoints to not throw errors

* Add test

* Update backend/alembic/versions/7a70b7664e37_add_models_configuration_table.py

Co-authored-by: greptile-apps[bot] <165735046+greptile-apps[bot]@users.noreply.github.com>

* Update backend/onyx/background/celery/tasks/llm_model_update/tasks.py

Co-authored-by: greptile-apps[bot] <165735046+greptile-apps[bot]@users.noreply.github.com>

* Fix list comprehension nits

* Update web/src/components/admin/connectors/Field.tsx

Co-authored-by: greptile-apps[bot] <165735046+greptile-apps[bot]@users.noreply.github.com>

* Update web/src/app/admin/configuration/llm/interfaces.ts

Co-authored-by: greptile-apps[bot] <165735046+greptile-apps[bot]@users.noreply.github.com>

* Implement greptile recommendations

* Update backend/onyx/db/llm.py

Co-authored-by: greptile-apps[bot] <165735046+greptile-apps[bot]@users.noreply.github.com>

* Update backend/onyx/server/manage/llm/api.py

Co-authored-by: greptile-apps[bot] <165735046+greptile-apps[bot]@users.noreply.github.com>

* Update backend/onyx/background/celery/tasks/llm_model_update/tasks.py

Co-authored-by: greptile-apps[bot] <165735046+greptile-apps[bot]@users.noreply.github.com>

* Update backend/onyx/db/llm.py

Co-authored-by: greptile-apps[bot] <165735046+greptile-apps[bot]@users.noreply.github.com>

* Fix more greptile suggestions

* Run formatter again

* Update backend/onyx/db/models.py

Co-authored-by: greptile-apps[bot] <165735046+greptile-apps[bot]@users.noreply.github.com>

* Add relationship to `LLMProvider` and `ModelConfigurations` classes

* Use sqlalchemy ORM relationships instead of manually populating fields

* Upgrade migration

* Update interface

* Remove all instances of model_names and display_model_names from backend

* Add more tests and fix bugs

* Run prettier

* Add types

* Update migration to perform data transformation

* Ensure native llm providers don't have custom max input tokens

* Start updating frontend logic to support custom max input tokens

* Pass max input tokens to LLM class (to be passed into `litellm.completion` call later)

* Add ModelConfigurationField component for custom llm providers

* Edit spacing and styling of model configuration matrix

* Fix error message displaying bug

* Edit opacity of `FiX` field for first index

* Change opacity back

* Change roundness

* Address comments on PR

* Perform fetching of `max_input_tokens` at the beginning of the callgraph and rope it throughout the entire callstack

* Change `add` to `execute`

* Move `max_input_tokens` into `LLMConfig`

* Fix bug with error messages not being cleared

* Change field used to fetch LLMProvider

* Fix model-configuration UI

* Address comments

* Remove circular import

* Fix failing tests in GH

* Fix failing tests

* Use `isSubset` instead of equality to determine native vs custom LLM Provider

* Remove unused import

* Make responses always display max_input_tokens

* Fix api endpoint to hit

* Update types in web application

* Update object field

* Fix more type errors

* Fix failing llm provider tests

---------

Co-authored-by: greptile-apps[bot] <165735046+greptile-apps[bot]@users.noreply.github.com>
2025-04-21 04:30:21 -07:00
pablonyx
72d3a7ff21 Frontend testing (#4500)
* add o3 + o4 mini

* k

* see which ones fail

* attempt

* k

* k

* llm ordering passing

* all tests passing

* quick bump

* Revert "add o3 + o4 mini"

This reverts commit 4cfa1984ec.

* k

* k
2025-04-20 23:29:47 +00:00
rkuo-danswer
2111eccf07 Feature/vespa jinja (#4558)
* tool to generate vespa schema variations for our cloud

* extraneous assign

* use a real templating system instead of search/replace

* fix float

* maybe this should be double

* remove redundant var

* template the other files

* try a spawned process

* move the wrapper

* fix args

* increase timeout

---------

Co-authored-by: Richard Kuo (Onyx) <rkuo@onyx.app>
Co-authored-by: Richard Kuo <rkuo@rkuo.com>
2025-04-20 22:28:55 +00:00
Chris Weaver
87478c5ca6 Parallelize connector tests (#4563)
* Parallelize connector tests

* Use --dist loadfile

* Add slow test logging
2025-04-19 18:10:50 -07:00
evan-danswer
dc62d83a06 File connector tests (#4561)
* danswer to onyx plus tests for file connector

* actually add test
2025-04-19 15:54:30 -07:00
evan-danswer
5681df9095 address getting attachments forever (#4562)
* address getting attachments forever

* fix unit tests
2025-04-19 15:53:27 -07:00
Chris Weaver
6666300f37 Fix flakey web test (#4551)
* Fix flakey web test

* Increase wait time

* Another attempt to fix

* Simplify + add new test

* Fix web tests
2025-04-19 15:12:11 -07:00
Chris Weaver
7f99c54527 Small improvements to connector UI (#4559)
* Small improvements to connector UI

* Update web/src/app/admin/connector/[ccPairId]/IndexingAttemptsTable.tsx

Co-authored-by: greptile-apps[bot] <165735046+greptile-apps[bot]@users.noreply.github.com>

* Fix last_permission_sync

* Handle cases where a source doesn't need group sync

* fix

---------

Co-authored-by: greptile-apps[bot] <165735046+greptile-apps[bot]@users.noreply.github.com>
2025-04-19 19:14:05 +00:00
Chris Weaver
4b8ef4b151 Update README.md 2025-04-18 18:29:56 -07:00
rkuo-danswer
e5e0944049 tool to generate vespa schema variations for our cloud (#4556)
* tool to generate vespa schema variations for our cloud

* extraneous assign

* float, not double

* back to double

---------

Co-authored-by: Richard Kuo (Onyx) <rkuo@onyx.app>
2025-04-18 20:47:17 +00:00
pablonyx
356336a842 add o3 + o4 mini (#4555) 2025-04-18 20:42:35 +00:00
rkuo-danswer
5bc059881e ping with keep alive (#4550)
Co-authored-by: Richard Kuo (Onyx) <rkuo@onyx.app>
2025-04-18 18:44:07 +00:00
rkuo-danswer
fa80842afe Bugfix/harden activity timeout (#4545)
* add some hardening

* add info memory logging

* fix last_observed

* remove log spam

* properly cache last activity details

* default values

---------

Co-authored-by: Richard Kuo (Onyx) <rkuo@onyx.app>
2025-04-18 02:28:22 +00:00
rkuo-danswer
a8a5a82251 slightly better slack logging (#4554)
Co-authored-by: Richard Kuo (Onyx) <rkuo@onyx.app>
2025-04-17 18:45:48 -07:00
evan-danswer
953a4e3793 v1 file connector with metadata (#4552) 2025-04-17 23:02:34 +00:00
rkuo-danswer
04ebde7838 refactor a mega function for readability and make sure to increment r… (#4542)
* refactor a mega function for readability and make sure to increment retry_count on exception so that we don't infinitely loop

* improve session and page level context handling

* don't use pydantic for the session context

* we don't need retry success

* move playwright handling into the session context

* need to break on ok

* return doc from scrape

* fix comment

---------

Co-authored-by: Richard Kuo (Onyx) <rkuo@onyx.app>
2025-04-17 06:43:30 +00:00
Chris Weaver
6df1c6c72f Pull in more fields for Jira (#4547)
* Pull in more fields for Jira

* Fix tests

* Fix

* more fix

* Fix

* Fix S3 test

* fix
2025-04-17 01:52:50 +00:00
Raunak Bhagat
fe94bdf936 fix: Fix duplicate kwarg issue when calling litellm.main.completion (#4533)
* Fix duplicate kwarg issue

* Change how vertex_credentials are passed

* Modify temporary dict instead

* Change string to a global constant

* Add extra condition to if-check during population of map
2025-04-16 19:29:53 -07:00
rkuo-danswer
2a9fd9342e small improvement to checking for image attachments (#4543)
* small improvement to checking for image attachments

* better comments

* check centralized list of types instead of hardcoding them in the connector

---------

Co-authored-by: Richard Kuo (Onyx) <rkuo@onyx.app>
2025-04-17 00:34:22 +00:00
pablonyx
597ad806e3 Skip image files for S3 (#4535)
* skip image files

* process images s3

* tests

* k

* update

* nit

* update
2025-04-16 23:41:00 +00:00
evan-danswer
5acae2dc80 fix re-processing of previously seen docs Confluence (#4544)
* fix re-processing of previously seen docs

* performance
2025-04-16 23:16:21 +00:00
pablonyx
99455db26c add 4.1 (#4540) 2025-04-16 15:34:01 -07:00
pablonyx
0d12e96362 Fix bug with saml validation (#4522)
* fix bug with saml validation

* k
2025-04-16 19:35:58 +00:00
Chris Weaver
7e7b6e08ff Fix confluence perm sync ancestry (#4536)
* Fix confluence perm sync ancestry

* Address EL comments

* add test for special case

* remove print

* Fix test
2025-04-16 03:02:54 +00:00
Raunak Bhagat
1dd32ebfce Remove alert upon submission (#4537) 2025-04-15 19:12:12 -07:00
Weves
c3ffaa19a4 Small no-letsencrypt improvement 2025-04-15 18:29:07 -07:00
pablonyx
f4ea7e62a7 Miscellaneous cleanup (#4516)
* stricter typing

* k
2025-04-15 23:35:13 +00:00
rkuo-danswer
2ac41c3719 Feature/celery beat watchdog (#4534)
* upgrade celery to release version

* make the watchdog script more reusable

* use constant

* code review

* catch interrupt

---------

Co-authored-by: Richard Kuo (Onyx) <rkuo@onyx.app>
2025-04-15 22:05:37 +00:00
evan-danswer
a8cba7abae extra logging for uncommon permissions cases (#4532)
* extra logging for uncommon permissions cases

* address CW comments
2025-04-15 18:56:17 +00:00
evan-danswer
ae9f8c3071 checkpointed confluence (#4473)
* checkpointed confluence

* confluence checkpointing tested

* fixed integration tests

* attempt to fix connector test flakiness

* fix rebase
2025-04-14 23:59:53 +00:00
evan-danswer
742041d97a fix font for dark mode (#4527) 2025-04-14 22:43:03 +00:00
pablonyx
187b93275d k (#4525) 2025-04-14 22:29:47 +00:00
Weves
ca2aeac2cc Fix black 2025-04-14 15:53:09 -07:00
ThomaciousD
f7543c6285 Fix #3764: Dynamically handle default branch in GitLab connector 2025-04-14 15:52:10 -07:00
pablonyx
1430a18d44 cohere validation logic update (#4523) 2025-04-14 21:49:22 +00:00
rkuo-danswer
7c4487585d rollback properly on exception (#4073)
* rollback properly on exception

* rollback on exception

* don't continue if we can't set the search path

* cleaner handling via context manager

---------

Co-authored-by: Richard Kuo (Danswer) <rkuo@onyx.app>
2025-04-14 21:48:35 +00:00
pablonyx
e572ce95e7 Shore up multi tenant tests (#4484)
* update

* fix

* finalize`

* remove unnecessary prints

* fix

* k
2025-04-14 18:34:57 +00:00
evan-danswer
68c6c1f4f8 refactor to use stricter typing (#4513)
* refactor to use stricter typing

* older version of ruff
2025-04-14 17:23:07 +00:00
Chris Weaver
a5edc8aa0f Fix default log level (#4501)
* Fix default log level

* fix
2025-04-14 16:40:11 +00:00
evan-danswer
a377f6ffb6 Unify document deduping (#4520)
* minor cleanup

* cleanup doc deduping and add unit tests
2025-04-14 16:33:00 +00:00
Weves
72ce2f75cc Add env var to docker compose file 2025-04-13 23:14:06 -07:00
joachim-danswer
2683207a24 Expanded basic search (#4517)
* initial working version

* ranking profile

* modification for keyword/instruction retrieval

* mypy fixes

* EL comments

* added env var (True for now)

* flipped default to False

* mypy & final EL/CW comments + import issue
2025-04-13 23:13:01 -07:00
Chris Weaver
e3aab8e85e Improve index attempt display (#4511) 2025-04-13 15:57:47 -07:00
pablonyx
65fd8b90a8 add image indexing tests (#4477)
* address file path

* k

* update

* update

* nit- fix typing

* k

* should path

* in a good state

* k

* k

* clean up file

* update

* update

* k

* k

* k
2025-04-11 22:16:37 +00:00
Chris Weaver
6eaa774051 Confluence timeout fix? (#4509) 2025-04-11 20:06:27 +00:00
evan-danswer
60da282dd1 ensure individual search tool runs do not affect each other (#4503)
* ensure individual search tool runs do not affect each other

* small bug fixes

* nit
2025-04-11 17:24:57 +00:00
rkuo-danswer
493e5386ec Bugfix/salesforce correctness (#4497)
* refactor salesforce sqlite db access

* more refactoring

* refactor again

* refactor again

* rename object

* add finalizer to ensure db connection is always closed

* avoid unnecessarily nesting connections and commit regularly when possible

---------

Co-authored-by: Richard Kuo (Onyx) <rkuo@onyx.app>
2025-04-11 08:41:22 +00:00
rkuo-danswer
bc74bcae3a updating more packages (#4502)
* updating more packages

* mypy fixes

---------

Co-authored-by: Richard Kuo (Onyx) <rkuo@onyx.app>
2025-04-10 20:53:36 -07:00
evan-danswer
e51e4b33b6 fix max 10 drives issue (#4505) 2025-04-11 02:22:38 +00:00
rkuo-danswer
1d7d5e1809 fix scheduler init (#4504) 2025-04-10 18:21:47 -07:00
Patrick Weston
4a6998b7e3 If an assistant limits knowledge, don't let a user override it in the Sets filter 2025-04-10 11:56:00 -07:00
Weves
6d48b9b4fd fix drive permission sync 2025-04-10 10:41:40 -07:00
Weves
86680cd45b Fix google drive group sync 2025-04-10 10:41:40 -07:00
rkuo-danswer
77e60b9812 remove try update in init ... we really don't need the init to access the db or do any work. (#4498)
Co-authored-by: Richard Kuo (Onyx) <rkuo@onyx.app>
2025-04-10 10:15:28 -07:00
rkuo-danswer
24184024bb Bugfix/dependency updates (#4482)
* bump fastapi and starlette

* bumping llama index and nltk and associated deps

* bump to fix python-multipart

* bump aiohttp

* update package lock for examples/widget

* bump black

* sentencesplitter has changed namespaces

* fix reorder import check, fix missing passlib

* update package-lock.json

* black formatter updated

* reformatted again

* change to black compatible reorder

* change to black compatible reorder-python-imports fork

* fix pytest dependency

* black format again

* we don't need cdk.txt. update packages to be consistent across all packages

---------

Co-authored-by: Richard Kuo (Onyx) <rkuo@onyx.app>
Co-authored-by: Richard Kuo <rkuo@rkuo.com>
2025-04-10 08:23:02 +00:00
evan-danswer
e79134eaa0 don't yield expected auth errors (#4494)
* don't yield expected auth errors

* only catch 403s
2025-04-10 01:53:02 +00:00
evan-danswer
b5be1fb948 important clarity comment (#4492) 2025-04-10 01:28:34 +00:00
evan-danswer
1718b8f677 fix claude bug (#4493)
* fix claude bug

* fixed tests
2025-04-10 00:59:18 +00:00
rkuo-danswer
3fc8027e73 pass through various id's and log them in the model server for better… (#4485)
* pass through various id's and log them in the model server for better tracking

* fix test

---------

Co-authored-by: Richard Kuo (Onyx) <rkuo@onyx.app>
2025-04-10 00:40:57 +00:00
pablonyx
caa9b106e4 k (#4487) 2025-04-10 00:19:47 +00:00
Chris Weaver
89688f0cef Fix naming of volume (#4491) 2025-04-09 23:46:10 +00:00
Raunak Bhagat
eeab3f06ec fix: Remove advanced options toggle if enterprise features are not enabled (#4489)
* Only show advanced options for custom llm providers *if* the paid features are enabled

* Change variable name
2025-04-09 20:42:20 +00:00
rkuo-danswer
15c74224ad xfail bedrock test (#4490)
Co-authored-by: Richard Kuo (Onyx) <rkuo@onyx.app>
2025-04-09 14:44:02 -07:00
Raunak Bhagat
2da26c16a9 Edit .gitignore file to add zed editor configurations (#4483) 2025-04-08 23:10:42 +00:00
pablonyx
8db80a6bb1 Add latency metrics (#4472)
* k

* update

* Update chat_backend.py

nit

---------

Co-authored-by: evan-danswer <evan@danswer.ai>
2025-04-08 21:23:26 +00:00
rkuo-danswer
9b6c7625fd Bugfix/cloud checkpoint cleanup (#4478)
* use send_task to be consistent

* add pidbox monitoring task

* add logging so we can track the task execution

* log the idletime of the pidbox

---------

Co-authored-by: Richard Kuo (Onyx) <rkuo@onyx.app>
2025-04-08 19:47:07 +00:00
Chris Weaver
634d990cb8 Fix startup w/ seed_db (#4481) 2025-04-08 19:46:41 +00:00
pablonyx
5792261a4f Minor doc set fix (#4480)
* update

* update

* update

* k
2025-04-08 19:14:56 +00:00
Chris Weaver
71839e723f Add stuff to better avoid bot-detection in web connector (#4479)
* Add stuff to better avoid bot-detection in web connector

* Switch to exception log
2025-04-08 12:31:30 -07:00
evan-danswer
10f1ac5da1 use persona info when creating tool args (#4397)
* use persona info when creating tool args

* fixed unit test

* include system message

* fix unit test

* nit
2025-04-08 02:55:36 +00:00
Weves
1f80ed11d9 Fix black 2025-04-07 20:33:15 -07:00
Emerson Gomes
ba80191f5b Handle exception for token cost calculation (#4474)
The code for token cost calculation fails when using a LiteLLM proxy due to mismatch with the provider naming. For now, just handle this exception and assume cost 0 when that happens instead of breaking the flow - A more precise, LiteLLM proxy based cost calculation (relying in the `/model/info`) LiteLLM Proxy method will be needed
2025-04-07 20:30:50 -07:00
Raunak Bhagat
206daa6903 feat: Vertex AI support (#4458)
* Add gemini well-known-llm-provider

* Edit styling of anonymous function

* Remove space

* Edit how advanced options are displayed

* Add VertexAI to acceptable llm providers

* Add new `FileUploadFormField` component

* Edit FileUpload component

* Clean up logic for displaying native llm providers; add support for more complex `CustomConfigKey` types

* Fix minor nits in web app

* Add ability to pass vertex credentials to `litellm`

* Remove unused prop

* Change name of enum value

* Add back ability to change form based on first time configurations

* Create new Error with string instead of throwing raw string

* Add more Gemini models

* Edit mappings for Gemini models

* Edit comment

* Rearrange llm models

* Run black formatter

* Remove complex configurations during first time registration

* Fix nit

* Update llm provider name

* Edit temporary formik field to also have the filename

* Run reformatter

* Reorder commits

* Add advanced configurations for enabled LLM Providers
2025-04-08 00:56:47 +00:00
evan-danswer
17562f9b8f Id not set in checkpoint2 (#4468)
* unconditionally set completion

* drive connector improvements

* fixing broader typing issue

* fix tests, CW comments

* actual test fix
2025-04-07 17:00:42 -07:00
evan-danswer
9c73099241 Drive smart chip indexing (#4459)
* WIP

* WIP almost done, but realized we can just do basic retrieval

* rebased and added scripts

* improved approach to extracting smart chips

* remove files from previous branch

* fix connector tests

* fix test
2025-04-07 21:52:45 +00:00
Emerson Gomes
88d4a65e7b Fix hardcoded temperature 2025-04-07 13:56:11 -07:00
Weves
614d0f8d72 Add more options to dev compose file 2025-04-07 10:03:34 -07:00
SubashMohan
157da24504 update test expectations for Highspot connector (#4464) 2025-04-07 05:12:22 +00:00
Evan Lohn
989dab51b9 unconditionally set completion 2025-04-06 22:39:42 -07:00
Weves
6a13401172 Small tweaks to thinking 2025-04-06 15:59:00 -07:00
rkuo-danswer
bb73bb224a slack permission tests are enterprise only (#4463)
* slack permission tests are enterprise only

* xfail highspot connector

* test is broken

---------

Co-authored-by: Richard Kuo (Onyx) <rkuo@onyx.app>
2025-04-06 15:23:17 -07:00
Richard Kuo (Onyx)
4dc382b571 Revert "slack permission tests are enterprise only"
This reverts commit 056a83493f.
2025-04-06 14:24:42 -07:00
Richard Kuo (Onyx)
056a83493f slack permission tests are enterprise only 2025-04-06 14:24:25 -07:00
Chris Weaver
aadd4f212a Adjust pg engine intialization (#4408)
* Adjust pg engine intialization

* Fix mypy

* Rename var

* fix typo

* Fix tests
2025-04-06 12:44:49 -07:00
Ferdinand Loesch
8b05f98d54 Thinking mode UI. (#4370)
* Update web connector implementation and fix line length issues

* Update configurations and fix connector issues

* Update Slack connector

* Update connectors and add jira_test_env to gitignore, removing sensitive information

* Restore checkpointing functionality and remove sensitive information

* Fix agent mode to properly handle thinking tokens

* up

* Enhance ThinkingBox component with improved content handling and animations. Added support for partial thinking tokens, refined scrolling behavior, and updated CSS for better visual feedback during thinking states.

* Create clean branch with frontend thinking mode changes only

* Update ThinkingBox component to include new props for completion and streaming states. Refactor smooth scrolling logic into a dedicated function for improved readability. Add new entry to .gitignore for jira_test_env.

* Remove autoCollapse prop from AIMessage component for improved flexibility in message display.

* Update thinking tokens handling in chat utils

* Remove unused cleanThinkingContent import from Messages component to streamline code.

---------

Co-authored-by: ferdinand loesch <f.loesch@sportradar.com>
Co-authored-by: EC2 Default User <ec2-user@ip-10-73-128-233.eu-central-1.compute.internal>
Co-authored-by: Your Name <you@example.com>
Co-authored-by: Chris Weaver <25087905+Weves@users.noreply.github.com>
2025-04-05 17:31:02 -07:00
Weves
1c16c4ea3d Adjusting default search assistant 2025-04-05 16:00:47 -07:00
Weves
cf6ff3ce4a Fix run-nginx 2025-04-05 16:00:10 -07:00
Weves
86d9f5d9dd Update resource limits 2025-04-05 16:00:10 -07:00
pablonyx
09450010cd refresh token limit (#4456) 2025-04-05 01:27:57 +00:00
pablonyx
0acd50b75d docx bugfix 2025-04-04 18:20:31 -07:00
pablonyx
c3c9a0e57c Docx parsing (#4455)
* looks okay

* k

* k

* k

* update values

* k

* quick fix
2025-04-04 23:36:43 +00:00
pablonyx
ef978aea97 Additional ACL Tests + Slackbot fix (#4430)
* try turning drive perm sync on

* try passing in env var

* add some logs

* Update pr-integration-tests.yml

* revert "Update pr-integration-tests.yml"

This reverts commit 76a44adbfe.

* Revert "add some logs"

This reverts commit ab9e6bcfb1.

* Revert "try passing in env var"

This reverts commit 9c0b6162ea.

* Revert "try turning drive perm sync on"

This reverts commit 2d35f61f42.

* try slack connector

* k

* update

* remove logs

* remove more logs

* nit

* k

* k

* address nits

* run test with additional logs

* Revert "run test with additional logs"

This reverts commit 1397a2c4a0.

* Revert "address nits"

This reverts commit d5e24b019d.
2025-04-04 22:00:17 +00:00
rkuo-danswer
15ab0586df handle gong api race condition (#4457)
* working around a gong race condition in their api

* add back gong basic test

* formatting

* add the call index

---------

Co-authored-by: Richard Kuo (Onyx) <rkuo@onyx.app>
2025-04-04 19:33:47 +00:00
rkuo-danswer
839c8611b7 Bugfix/salesforce (#4335)
* add some gc

* small refactoring for temp directories

* WIP

* add some gc collects and size calculations

* un-xfail

* fix salesforce test

* loose check for number of docs

* adjust test again

* cleanup

* nuke directory param, remove using sqlite db to cache email / id mappings

---------

Co-authored-by: Richard Kuo (Onyx) <rkuo@onyx.app>
2025-04-04 16:21:34 +00:00
joachim-danswer
68f9f157a6 Adding research topics for better search context (#4448)
* research topics addition

* allow for question to overwrite research area
2025-04-04 09:53:39 -07:00
SubashMohan
9dd56a5c80 Enhance Highspot connector with error handling and add unit tests (#4454)
* Enhance Highspot connector with error handling and add unit tests for poll_source functionality

* Fix file extension validation logic to allow either plain text or document format
2025-04-04 09:53:16 -07:00
pablonyx
842a73a242 Mock connector fix (#4446) 2025-04-04 09:26:10 -07:00
Weves
c04c1ea31b Fix onyx_config.jsonl 2025-04-03 22:44:56 -07:00
Chris Weaver
2380c2266c Infra and Deployment for ECS Fargate (#4449)
* Infra and Deployment for ECS Fargate
---------

Co-authored-by: jpb80 <jordan.buttkevitz@gmail.com>
2025-04-03 22:43:56 -07:00
pablonyx
b02af9b280 Div Con (#4442)
* base setup

* Improvements + time boxing

* time box fix

* mypy fix

* EL Comments

* CW comments

* date awareness

---------

Co-authored-by: joachim-danswer <joachim@danswer.ai>
2025-04-04 00:52:00 +00:00
rkuo-danswer
42938dcf62 Bugfix/gong tweaks (#4444)
* gong debugging

* add retries via class level session, add debugging

* add gong connector test

---------

Co-authored-by: Richard Kuo (Onyx) <rkuo@onyx.app>
2025-04-03 22:22:45 +00:00
867 changed files with 26877 additions and 9144 deletions

View File

@@ -25,6 +25,10 @@ inputs:
tags:
description: 'Image tags'
required: true
no-cache:
description: 'Read from cache'
required: false
default: 'false'
cache-from:
description: 'Cache sources'
required: false
@@ -55,6 +59,7 @@ runs:
push: ${{ inputs.push }}
load: ${{ inputs.load }}
tags: ${{ inputs.tags }}
no-cache: ${{ inputs.no-cache }}
cache-from: ${{ inputs.cache-from }}
cache-to: ${{ inputs.cache-to }}
@@ -77,6 +82,7 @@ runs:
push: ${{ inputs.push }}
load: ${{ inputs.load }}
tags: ${{ inputs.tags }}
no-cache: ${{ inputs.no-cache }}
cache-from: ${{ inputs.cache-from }}
cache-to: ${{ inputs.cache-to }}
@@ -99,6 +105,7 @@ runs:
push: ${{ inputs.push }}
load: ${{ inputs.load }}
tags: ${{ inputs.tags }}
no-cache: ${{ inputs.no-cache }}
cache-from: ${{ inputs.cache-from }}
cache-to: ${{ inputs.cache-to }}

View File

@@ -7,18 +7,47 @@ on:
env:
REGISTRY_IMAGE: ${{ contains(github.ref_name, 'cloud') && 'onyxdotapp/onyx-backend-cloud' || 'onyxdotapp/onyx-backend' }}
LATEST_TAG: ${{ contains(github.ref_name, 'latest') }}
DEPLOYMENT: ${{ contains(github.ref_name, 'cloud') && 'cloud' || 'standalone' }}
# don't tag cloud images with "latest"
LATEST_TAG: ${{ contains(github.ref_name, 'latest') && !contains(github.ref_name, 'cloud') }}
jobs:
build-and-push:
# TODO: investigate a matrix build like the web container
# See https://runs-on.com/runners/linux/
runs-on: [runs-on, runner=8cpu-linux-x64, "run-id=${{ github.run_id }}"]
runs-on:
- runs-on
- runner=${{ matrix.platform == 'linux/amd64' && '8cpu-linux-x64' || '8cpu-linux-arm64' }}
- run-id=${{ github.run_id }}
- tag=platform-${{ matrix.platform }}
strategy:
fail-fast: false
matrix:
platform:
- linux/amd64
- linux/arm64
steps:
- name: Prepare
run: |
platform=${{ matrix.platform }}
echo "PLATFORM_PAIR=${platform//\//-}" >> $GITHUB_ENV
- name: Checkout code
uses: actions/checkout@v4
- name: Docker meta
id: meta
uses: docker/metadata-action@v5
with:
images: ${{ env.REGISTRY_IMAGE }}
flavor: |
latest=false
tags: |
type=raw,value=${{ github.ref_name }}
type=raw,value=${{ env.LATEST_TAG == 'true' && 'latest' || '' }}
- name: Set up Docker Buildx
uses: docker/setup-buildx-action@v3
@@ -34,18 +63,80 @@ jobs:
sudo apt-get install -y build-essential
- name: Backend Image Docker Build and Push
id: build
uses: docker/build-push-action@v5
with:
context: ./backend
file: ./backend/Dockerfile
platforms: linux/amd64,linux/arm64
platforms: ${{ matrix.platform }}
push: true
tags: |
${{ env.REGISTRY_IMAGE }}:${{ github.ref_name }}
${{ env.LATEST_TAG == 'true' && format('{0}:latest', env.REGISTRY_IMAGE) || '' }}
build-args: |
ONYX_VERSION=${{ github.ref_name }}
labels: ${{ steps.meta.outputs.labels }}
outputs: type=image,name=${{ env.REGISTRY_IMAGE }},push-by-digest=true,name-canonical=true,push=true
cache-from: type=s3,prefix=cache/${{ github.repository }}/${{ env.DEPLOYMENT }}/backend-${{ env.PLATFORM_PAIR }}/,region=${{ env.RUNS_ON_AWS_REGION }},bucket=${{ env.RUNS_ON_S3_BUCKET_CACHE }}
cache-to: type=s3,prefix=cache/${{ github.repository }}/${{ env.DEPLOYMENT }}/backend-${{ env.PLATFORM_PAIR }}/,region=${{ env.RUNS_ON_AWS_REGION }},bucket=${{ env.RUNS_ON_S3_BUCKET_CACHE }},mode=max
- name: Export digest
run: |
mkdir -p /tmp/digests
digest="${{ steps.build.outputs.digest }}"
touch "/tmp/digests/${digest#sha256:}"
- name: Upload digest
uses: actions/upload-artifact@v4
with:
name: backend-digests-${{ env.PLATFORM_PAIR }}-${{ github.run_id }}
path: /tmp/digests/*
if-no-files-found: error
retention-days: 1
merge:
runs-on: ubuntu-latest
needs:
- build-and-push
steps:
# Needed for trivyignore
- name: Checkout
uses: actions/checkout@v4
- name: Download digests
uses: actions/download-artifact@v4
with:
path: /tmp/digests
pattern: backend-digests-*-${{ github.run_id }}
merge-multiple: true
- name: Set up Docker Buildx
uses: docker/setup-buildx-action@v3
- name: Docker meta
id: meta
uses: docker/metadata-action@v5
with:
images: ${{ env.REGISTRY_IMAGE }}
flavor: |
latest=false
tags: |
type=raw,value=${{ github.ref_name }}
type=raw,value=${{ env.LATEST_TAG == 'true' && 'latest' || '' }}
- name: Login to Docker Hub
uses: docker/login-action@v3
with:
username: ${{ secrets.DOCKER_USERNAME }}
password: ${{ secrets.DOCKER_TOKEN }}
- name: Create manifest list and push
working-directory: /tmp/digests
run: |
docker buildx imagetools create $(jq -cr '.tags | map("-t " + .) | join(" ")' <<< "$DOCKER_METADATA_OUTPUT_JSON") \
$(printf '${{ env.REGISTRY_IMAGE }}@sha256:%s ' *)
- name: Inspect image
run: |
docker buildx imagetools inspect ${{ env.REGISTRY_IMAGE }}:${{ steps.meta.outputs.version }}
# trivy has their own rate limiting issues causing this action to flake
# we worked around it by hardcoding to different db repos in env
# can re-enable when they figure it out
@@ -56,6 +147,8 @@ jobs:
env:
TRIVY_DB_REPOSITORY: "public.ecr.aws/aquasecurity/trivy-db:2"
TRIVY_JAVA_DB_REPOSITORY: "public.ecr.aws/aquasecurity/trivy-java-db:1"
TRIVY_USERNAME: ${{ secrets.DOCKER_USERNAME }}
TRIVY_PASSWORD: ${{ secrets.DOCKER_TOKEN }}
with:
# To run locally: trivy image --severity HIGH,CRITICAL onyxdotapp/onyx-backend
image-ref: docker.io/${{ env.REGISTRY_IMAGE }}:${{ github.ref_name }}

View File

@@ -4,12 +4,12 @@ name: Build and Push Cloud Web Image on Tag
on:
push:
tags:
- "*"
- "*cloud*"
env:
REGISTRY_IMAGE: onyxdotapp/onyx-web-server-cloud
LATEST_TAG: ${{ contains(github.ref_name, 'latest') }}
DEPLOYMENT: cloud
jobs:
build:
runs-on:
@@ -38,9 +38,10 @@ jobs:
uses: docker/metadata-action@v5
with:
images: ${{ env.REGISTRY_IMAGE }}
flavor: |
latest=false
tags: |
type=raw,value=${{ env.REGISTRY_IMAGE }}:${{ github.ref_name }}
type=raw,value=${{ env.LATEST_TAG == 'true' && format('{0}:latest', env.REGISTRY_IMAGE) || '' }}
type=raw,value=${{ github.ref_name }}
- name: Set up Docker Buildx
uses: docker/setup-buildx-action@v3
@@ -70,10 +71,12 @@ jobs:
NEXT_PUBLIC_FORGOT_PASSWORD_ENABLED=true
NEXT_PUBLIC_INCLUDE_ERROR_POPUP_SUPPORT_LINK=true
NODE_OPTIONS=--max-old-space-size=8192
# needed due to weird interactions with the builds for different platforms
no-cache: true
labels: ${{ steps.meta.outputs.labels }}
outputs: type=image,name=${{ env.REGISTRY_IMAGE }},push-by-digest=true,name-canonical=true,push=true
cache-from: type=s3,prefix=cache/${{ github.repository }}/${{ env.DEPLOYMENT }}/cloudweb-${{ env.PLATFORM_PAIR }}/,region=${{ env.RUNS_ON_AWS_REGION }},bucket=${{ env.RUNS_ON_S3_BUCKET_CACHE }}
cache-to: type=s3,prefix=cache/${{ github.repository }}/${{ env.DEPLOYMENT }}/cloudweb-${{ env.PLATFORM_PAIR }}/,region=${{ env.RUNS_ON_AWS_REGION }},bucket=${{ env.RUNS_ON_S3_BUCKET_CACHE }},mode=max
# no-cache needed due to weird interactions with the builds for different platforms
# NOTE(rkuo): this may not be true any more with the proper cache prefixing by architecture - currently testing with it off
- name: Export digest
run: |
@@ -84,7 +87,7 @@ jobs:
- name: Upload digest
uses: actions/upload-artifact@v4
with:
name: digests-${{ env.PLATFORM_PAIR }}
name: cloudweb-digests-${{ env.PLATFORM_PAIR }}-${{ github.run_id }}
path: /tmp/digests/*
if-no-files-found: error
retention-days: 1
@@ -98,7 +101,7 @@ jobs:
uses: actions/download-artifact@v4
with:
path: /tmp/digests
pattern: digests-*
pattern: cloudweb-digests-*-${{ github.run_id }}
merge-multiple: true
- name: Set up Docker Buildx
@@ -109,6 +112,10 @@ jobs:
uses: docker/metadata-action@v5
with:
images: ${{ env.REGISTRY_IMAGE }}
flavor: |
latest=false
tags: |
type=raw,value=${{ github.ref_name }}
- name: Login to Docker Hub
uses: docker/login-action@v3
@@ -136,6 +143,8 @@ jobs:
env:
TRIVY_DB_REPOSITORY: "public.ecr.aws/aquasecurity/trivy-db:2"
TRIVY_JAVA_DB_REPOSITORY: "public.ecr.aws/aquasecurity/trivy-java-db:1"
TRIVY_USERNAME: ${{ secrets.DOCKER_USERNAME }}
TRIVY_PASSWORD: ${{ secrets.DOCKER_TOKEN }}
with:
image-ref: docker.io/${{ env.REGISTRY_IMAGE }}:${{ github.ref_name }}
severity: "CRITICAL,HIGH"

View File

@@ -7,10 +7,13 @@ on:
env:
REGISTRY_IMAGE: ${{ contains(github.ref_name, 'cloud') && 'onyxdotapp/onyx-model-server-cloud' || 'onyxdotapp/onyx-model-server' }}
LATEST_TAG: ${{ contains(github.ref_name, 'latest') }}
DOCKER_BUILDKIT: 1
BUILDKIT_PROGRESS: plain
DEPLOYMENT: ${{ contains(github.ref_name, 'cloud') && 'cloud' || 'standalone' }}
# don't tag cloud images with "latest"
LATEST_TAG: ${{ contains(github.ref_name, 'latest') && !contains(github.ref_name, 'cloud') }}
jobs:
# Bypassing this for now as the idea of not building is glitching
@@ -51,6 +54,8 @@ jobs:
if: needs.check_model_server_changes.outputs.changed == 'true'
runs-on:
[runs-on, runner=8cpu-linux-x64, "run-id=${{ github.run_id }}-amd64"]
env:
PLATFORM_PAIR: linux-amd64
steps:
- name: Checkout code
uses: actions/checkout@v4
@@ -86,12 +91,17 @@ jobs:
DANSWER_VERSION=${{ github.ref_name }}
outputs: type=registry
provenance: false
cache-from: type=s3,prefix=cache/${{ github.repository }}/${{ env.DEPLOYMENT }}/model-server-${{ env.PLATFORM_PAIR }}/,region=${{ env.RUNS_ON_AWS_REGION }},bucket=${{ env.RUNS_ON_S3_BUCKET_CACHE }}
cache-to: type=s3,prefix=cache/${{ github.repository }}/${{ env.DEPLOYMENT }}/model-server-${{ env.PLATFORM_PAIR }}/,region=${{ env.RUNS_ON_AWS_REGION }},bucket=${{ env.RUNS_ON_S3_BUCKET_CACHE }},mode=max
# no-cache: true
build-arm64:
needs: [check_model_server_changes]
if: needs.check_model_server_changes.outputs.changed == 'true'
runs-on:
[runs-on, runner=8cpu-linux-x64, "run-id=${{ github.run_id }}-arm64"]
env:
PLATFORM_PAIR: linux-arm64
steps:
- name: Checkout code
uses: actions/checkout@v4
@@ -127,6 +137,8 @@ jobs:
DANSWER_VERSION=${{ github.ref_name }}
outputs: type=registry
provenance: false
cache-from: type=s3,prefix=cache/${{ github.repository }}/${{ env.DEPLOYMENT }}/model-server-${{ env.PLATFORM_PAIR }}/,region=${{ env.RUNS_ON_AWS_REGION }},bucket=${{ env.RUNS_ON_S3_BUCKET_CACHE }}
cache-to: type=s3,prefix=cache/${{ github.repository }}/${{ env.DEPLOYMENT }}/model-server-${{ env.PLATFORM_PAIR }}/,region=${{ env.RUNS_ON_AWS_REGION }},bucket=${{ env.RUNS_ON_S3_BUCKET_CACHE }},mode=max
merge-and-scan:
needs: [build-amd64, build-arm64, check_model_server_changes]
@@ -156,6 +168,8 @@ jobs:
env:
TRIVY_DB_REPOSITORY: "public.ecr.aws/aquasecurity/trivy-db:2"
TRIVY_JAVA_DB_REPOSITORY: "public.ecr.aws/aquasecurity/trivy-java-db:1"
TRIVY_USERNAME: ${{ secrets.DOCKER_USERNAME }}
TRIVY_PASSWORD: ${{ secrets.DOCKER_TOKEN }}
with:
image-ref: docker.io/${{ env.REGISTRY_IMAGE }}:${{ github.ref_name }}
severity: "CRITICAL,HIGH"

View File

@@ -8,9 +8,25 @@ on:
env:
REGISTRY_IMAGE: onyxdotapp/onyx-web-server
LATEST_TAG: ${{ contains(github.ref_name, 'latest') }}
DEPLOYMENT: standalone
jobs:
precheck:
runs-on: [runs-on, runner=2cpu-linux-x64, "run-id=${{ github.run_id }}"]
outputs:
should-run: ${{ steps.set-output.outputs.should-run }}
steps:
- name: Check if tag contains "cloud"
id: set-output
run: |
if [[ "${{ github.ref_name }}" == *cloud* ]]; then
echo "should-run=false" >> "$GITHUB_OUTPUT"
else
echo "should-run=true" >> "$GITHUB_OUTPUT"
fi
build:
needs: precheck
if: needs.precheck.outputs.should-run == 'true'
runs-on:
- runs-on
- runner=${{ matrix.platform == 'linux/amd64' && '8cpu-linux-x64' || '8cpu-linux-arm64' }}
@@ -37,9 +53,11 @@ jobs:
uses: docker/metadata-action@v5
with:
images: ${{ env.REGISTRY_IMAGE }}
flavor: |
latest=false
tags: |
type=raw,value=${{ env.REGISTRY_IMAGE }}:${{ github.ref_name }}
type=raw,value=${{ env.LATEST_TAG == 'true' && format('{0}:latest', env.REGISTRY_IMAGE) || '' }}
type=raw,value=${{ github.ref_name }}
type=raw,value=${{ env.LATEST_TAG == 'true' && 'latest' || '' }}
- name: Set up Docker Buildx
uses: docker/setup-buildx-action@v3
@@ -62,11 +80,13 @@ jobs:
ONYX_VERSION=${{ github.ref_name }}
NODE_OPTIONS=--max-old-space-size=8192
# needed due to weird interactions with the builds for different platforms
no-cache: true
labels: ${{ steps.meta.outputs.labels }}
outputs: type=image,name=${{ env.REGISTRY_IMAGE }},push-by-digest=true,name-canonical=true,push=true
cache-from: type=s3,prefix=cache/${{ github.repository }}/${{ env.DEPLOYMENT }}/web-${{ env.PLATFORM_PAIR }}/,region=${{ env.RUNS_ON_AWS_REGION }},bucket=${{ env.RUNS_ON_S3_BUCKET_CACHE }}
cache-to: type=s3,prefix=cache/${{ github.repository }}/${{ env.DEPLOYMENT }}/web-${{ env.PLATFORM_PAIR }}/,region=${{ env.RUNS_ON_AWS_REGION }},bucket=${{ env.RUNS_ON_S3_BUCKET_CACHE }},mode=max
# no-cache needed due to weird interactions with the builds for different platforms
# NOTE(rkuo): this may not be true any more with the proper cache prefixing by architecture - currently testing with it off
- name: Export digest
run: |
mkdir -p /tmp/digests
@@ -76,21 +96,22 @@ jobs:
- name: Upload digest
uses: actions/upload-artifact@v4
with:
name: digests-${{ env.PLATFORM_PAIR }}
name: web-digests-${{ env.PLATFORM_PAIR }}-${{ github.run_id }}
path: /tmp/digests/*
if-no-files-found: error
retention-days: 1
merge:
runs-on: ubuntu-latest
needs:
- build
if: needs.precheck.outputs.should-run == 'true'
runs-on: ubuntu-latest
steps:
- name: Download digests
uses: actions/download-artifact@v4
with:
path: /tmp/digests
pattern: digests-*
pattern: web-digests-*-${{ github.run_id }}
merge-multiple: true
- name: Set up Docker Buildx
@@ -101,6 +122,11 @@ jobs:
uses: docker/metadata-action@v5
with:
images: ${{ env.REGISTRY_IMAGE }}
flavor: |
latest=false
tags: |
type=raw,value=${{ github.ref_name }}
type=raw,value=${{ env.LATEST_TAG == 'true' && 'latest' || '' }}
- name: Login to Docker Hub
uses: docker/login-action@v3
@@ -128,6 +154,8 @@ jobs:
env:
TRIVY_DB_REPOSITORY: "public.ecr.aws/aquasecurity/trivy-db:2"
TRIVY_JAVA_DB_REPOSITORY: "public.ecr.aws/aquasecurity/trivy-java-db:1"
TRIVY_USERNAME: ${{ secrets.DOCKER_USERNAME }}
TRIVY_PASSWORD: ${{ secrets.DOCKER_TOKEN }}
with:
image-ref: docker.io/${{ env.REGISTRY_IMAGE }}:${{ github.ref_name }}
severity: "CRITICAL,HIGH"

View File

@@ -37,6 +37,11 @@ jobs:
echo "changed=true" >> "$GITHUB_OUTPUT"
fi
# uncomment to force run chart-testing
# - name: Force run chart-testing (list-changed)
# id: list-changed
# run: echo "changed=true" >> $GITHUB_OUTPUT
# lint all charts if any changes were detected
- name: Run chart-testing (lint)
if: steps.list-changed.outputs.changed == 'true'

View File

@@ -16,6 +16,7 @@ env:
CONFLUENCE_TEST_SPACE_URL: ${{ secrets.CONFLUENCE_TEST_SPACE_URL }}
CONFLUENCE_USER_NAME: ${{ secrets.CONFLUENCE_USER_NAME }}
CONFLUENCE_ACCESS_TOKEN: ${{ secrets.CONFLUENCE_ACCESS_TOKEN }}
PLATFORM_PAIR: linux-amd64
jobs:
integration-tests:
@@ -61,8 +62,8 @@ jobs:
tags: onyxdotapp/onyx-backend:test
push: false
load: true
cache-from: type=s3,prefix=cache/${{ github.repository }}/integration-tests/backend/,region=${{ env.RUNS_ON_AWS_REGION }},bucket=${{ env.RUNS_ON_S3_BUCKET_CACHE }}
cache-to: type=s3,prefix=cache/${{ github.repository }}/integration-tests/backend/,region=${{ env.RUNS_ON_AWS_REGION }},bucket=${{ env.RUNS_ON_S3_BUCKET_CACHE }},mode=max
cache-from: type=s3,prefix=cache/${{ github.repository }}/integration-tests/backend-${{ env.PLATFORM_PAIR }}/,region=${{ env.RUNS_ON_AWS_REGION }},bucket=${{ env.RUNS_ON_S3_BUCKET_CACHE }}
cache-to: type=s3,prefix=cache/${{ github.repository }}/integration-tests/backend-${{ env.PLATFORM_PAIR }}/,region=${{ env.RUNS_ON_AWS_REGION }},bucket=${{ env.RUNS_ON_S3_BUCKET_CACHE }},mode=max
- name: Build Model Server Docker image
uses: ./.github/actions/custom-build-and-push
@@ -73,8 +74,8 @@ jobs:
tags: onyxdotapp/onyx-model-server:test
push: false
load: true
cache-from: type=s3,prefix=cache/${{ github.repository }}/integration-tests/model-server/,region=${{ env.RUNS_ON_AWS_REGION }},bucket=${{ env.RUNS_ON_S3_BUCKET_CACHE }}
cache-to: type=s3,prefix=cache/${{ github.repository }}/integration-tests/model-server/,region=${{ env.RUNS_ON_AWS_REGION }},bucket=${{ env.RUNS_ON_S3_BUCKET_CACHE }},mode=max
cache-from: type=s3,prefix=cache/${{ github.repository }}/integration-tests/model-server-${{ env.PLATFORM_PAIR }}/,region=${{ env.RUNS_ON_AWS_REGION }},bucket=${{ env.RUNS_ON_S3_BUCKET_CACHE }}
cache-to: type=s3,prefix=cache/${{ github.repository }}/integration-tests/model-server-${{ env.PLATFORM_PAIR }}/,region=${{ env.RUNS_ON_AWS_REGION }},bucket=${{ env.RUNS_ON_S3_BUCKET_CACHE }},mode=max
- name: Build integration test Docker image
uses: ./.github/actions/custom-build-and-push
@@ -85,8 +86,8 @@ jobs:
tags: onyxdotapp/onyx-integration:test
push: false
load: true
cache-from: type=s3,prefix=cache/${{ github.repository }}/integration-tests/integration/,region=${{ env.RUNS_ON_AWS_REGION }},bucket=${{ env.RUNS_ON_S3_BUCKET_CACHE }}
cache-to: type=s3,prefix=cache/${{ github.repository }}/integration-tests/integration/,region=${{ env.RUNS_ON_AWS_REGION }},bucket=${{ env.RUNS_ON_S3_BUCKET_CACHE }},mode=max
cache-from: type=s3,prefix=cache/${{ github.repository }}/integration-tests/integration-${{ env.PLATFORM_PAIR }}/,region=${{ env.RUNS_ON_AWS_REGION }},bucket=${{ env.RUNS_ON_S3_BUCKET_CACHE }}
cache-to: type=s3,prefix=cache/${{ github.repository }}/integration-tests/integration-${{ env.PLATFORM_PAIR }}/,region=${{ env.RUNS_ON_AWS_REGION }},bucket=${{ env.RUNS_ON_S3_BUCKET_CACHE }},mode=max
# Start containers for multi-tenant tests
- name: Start Docker containers for multi-tenant tests
@@ -158,6 +159,7 @@ jobs:
DISABLE_TELEMETRY=true \
IMAGE_TAG=test \
INTEGRATION_TESTS_MODE=true \
CHECK_TTL_MANAGEMENT_TASK_FREQUENCY_IN_HOURS=0.001 \
docker compose -f docker-compose.dev.yml -p onyx-stack up -d
id: start_docker

View File

@@ -16,7 +16,7 @@ env:
CONFLUENCE_TEST_SPACE_URL: ${{ secrets.CONFLUENCE_TEST_SPACE_URL }}
CONFLUENCE_USER_NAME: ${{ secrets.CONFLUENCE_USER_NAME }}
CONFLUENCE_ACCESS_TOKEN: ${{ secrets.CONFLUENCE_ACCESS_TOKEN }}
PLATFORM_PAIR: linux-amd64
jobs:
integration-tests-mit:
# See https://runs-on.com/runners/linux/
@@ -61,8 +61,8 @@ jobs:
tags: onyxdotapp/onyx-backend:test
push: false
load: true
cache-from: type=s3,prefix=cache/${{ github.repository }}/integration-tests/backend/,region=${{ env.RUNS_ON_AWS_REGION }},bucket=${{ env.RUNS_ON_S3_BUCKET_CACHE }}
cache-to: type=s3,prefix=cache/${{ github.repository }}/integration-tests/backend/,region=${{ env.RUNS_ON_AWS_REGION }},bucket=${{ env.RUNS_ON_S3_BUCKET_CACHE }},mode=max
cache-from: type=s3,prefix=cache/${{ github.repository }}/mit-integration-tests/backend-${{ env.PLATFORM_PAIR }}/,region=${{ env.RUNS_ON_AWS_REGION }},bucket=${{ env.RUNS_ON_S3_BUCKET_CACHE }}
cache-to: type=s3,prefix=cache/${{ github.repository }}/mit-integration-tests/backend-${{ env.PLATFORM_PAIR }}/,region=${{ env.RUNS_ON_AWS_REGION }},bucket=${{ env.RUNS_ON_S3_BUCKET_CACHE }},mode=max
- name: Build Model Server Docker image
uses: ./.github/actions/custom-build-and-push
@@ -73,8 +73,8 @@ jobs:
tags: onyxdotapp/onyx-model-server:test
push: false
load: true
cache-from: type=s3,prefix=cache/${{ github.repository }}/integration-tests/model-server/,region=${{ env.RUNS_ON_AWS_REGION }},bucket=${{ env.RUNS_ON_S3_BUCKET_CACHE }}
cache-to: type=s3,prefix=cache/${{ github.repository }}/integration-tests/model-server/,region=${{ env.RUNS_ON_AWS_REGION }},bucket=${{ env.RUNS_ON_S3_BUCKET_CACHE }},mode=max
cache-from: type=s3,prefix=cache/${{ github.repository }}/mit-integration-tests/model-server-${{ env.PLATFORM_PAIR }}/,region=${{ env.RUNS_ON_AWS_REGION }},bucket=${{ env.RUNS_ON_S3_BUCKET_CACHE }}
cache-to: type=s3,prefix=cache/${{ github.repository }}/mit-integration-tests/model-server-${{ env.PLATFORM_PAIR }}/,region=${{ env.RUNS_ON_AWS_REGION }},bucket=${{ env.RUNS_ON_S3_BUCKET_CACHE }},mode=max
- name: Build integration test Docker image
uses: ./.github/actions/custom-build-and-push
@@ -85,8 +85,8 @@ jobs:
tags: onyxdotapp/onyx-integration:test
push: false
load: true
cache-from: type=s3,prefix=cache/${{ github.repository }}/integration-tests/integration/,region=${{ env.RUNS_ON_AWS_REGION }},bucket=${{ env.RUNS_ON_S3_BUCKET_CACHE }}
cache-to: type=s3,prefix=cache/${{ github.repository }}/integration-tests/integration/,region=${{ env.RUNS_ON_AWS_REGION }},bucket=${{ env.RUNS_ON_S3_BUCKET_CACHE }},mode=max
cache-from: type=s3,prefix=cache/${{ github.repository }}/mit-integration-tests/integration-${{ env.PLATFORM_PAIR }}/,region=${{ env.RUNS_ON_AWS_REGION }},bucket=${{ env.RUNS_ON_S3_BUCKET_CACHE }}
cache-to: type=s3,prefix=cache/${{ github.repository }}/mit-integration-tests/integration-${{ env.PLATFORM_PAIR }}/,region=${{ env.RUNS_ON_AWS_REGION }},bucket=${{ env.RUNS_ON_S3_BUCKET_CACHE }},mode=max
# NOTE: Use pre-ping/null pool to reduce flakiness due to dropped connections
- name: Start Docker containers

View File

@@ -10,6 +10,7 @@ env:
SLACK_BOT_TOKEN: ${{ secrets.SLACK_BOT_TOKEN }}
GEN_AI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
MOCK_LLM_RESPONSE: true
PYTEST_PLAYWRIGHT_SKIP_INITIAL_RESET: true
jobs:
playwright-tests:

View File

@@ -44,7 +44,7 @@ jobs:
- name: Check import order with reorder-python-imports
run: |
cd backend
find ./danswer -name "*.py" | xargs reorder-python-imports --py311-plus
find ./onyx -name "*.py" | xargs reorder-python-imports --py311-plus
- name: Check code formatting with Black
run: |

View File

@@ -12,7 +12,7 @@ env:
# AWS
AWS_ACCESS_KEY_ID_DAILY_CONNECTOR_TESTS: ${{ secrets.AWS_ACCESS_KEY_ID_DAILY_CONNECTOR_TESTS }}
AWS_SECRET_ACCESS_KEY_DAILY_CONNECTOR_TESTS: ${{ secrets.AWS_SECRET_ACCESS_KEY_DAILY_CONNECTOR_TESTS }}
# Confluence
CONFLUENCE_TEST_SPACE_URL: ${{ secrets.CONFLUENCE_TEST_SPACE_URL }}
CONFLUENCE_TEST_SPACE: ${{ secrets.CONFLUENCE_TEST_SPACE }}
@@ -20,46 +20,67 @@ env:
CONFLUENCE_TEST_PAGE_ID: ${{ secrets.CONFLUENCE_TEST_PAGE_ID }}
CONFLUENCE_USER_NAME: ${{ secrets.CONFLUENCE_USER_NAME }}
CONFLUENCE_ACCESS_TOKEN: ${{ secrets.CONFLUENCE_ACCESS_TOKEN }}
# Jira
JIRA_USER_EMAIL: ${{ secrets.JIRA_USER_EMAIL }}
JIRA_API_TOKEN: ${{ secrets.JIRA_API_TOKEN }}
# Gong
GONG_ACCESS_KEY: ${{ secrets.GONG_ACCESS_KEY }}
GONG_ACCESS_KEY_SECRET: ${{ secrets.GONG_ACCESS_KEY_SECRET }}
# Google
GOOGLE_DRIVE_SERVICE_ACCOUNT_JSON_STR: ${{ secrets.GOOGLE_DRIVE_SERVICE_ACCOUNT_JSON_STR }}
GOOGLE_DRIVE_OAUTH_CREDENTIALS_JSON_STR_TEST_USER_1: ${{ secrets.GOOGLE_DRIVE_OAUTH_CREDENTIALS_JSON_STR_TEST_USER_1 }}
GOOGLE_DRIVE_OAUTH_CREDENTIALS_JSON_STR: ${{ secrets.GOOGLE_DRIVE_OAUTH_CREDENTIALS_JSON_STR }}
GOOGLE_GMAIL_SERVICE_ACCOUNT_JSON_STR: ${{ secrets.GOOGLE_GMAIL_SERVICE_ACCOUNT_JSON_STR }}
GOOGLE_GMAIL_OAUTH_CREDENTIALS_JSON_STR: ${{ secrets.GOOGLE_GMAIL_OAUTH_CREDENTIALS_JSON_STR }}
# Slab
SLAB_BOT_TOKEN: ${{ secrets.SLAB_BOT_TOKEN }}
# Zendesk
ZENDESK_SUBDOMAIN: ${{ secrets.ZENDESK_SUBDOMAIN }}
ZENDESK_EMAIL: ${{ secrets.ZENDESK_EMAIL }}
ZENDESK_TOKEN: ${{ secrets.ZENDESK_TOKEN }}
# Salesforce
SF_USERNAME: ${{ secrets.SF_USERNAME }}
SF_PASSWORD: ${{ secrets.SF_PASSWORD }}
SF_SECURITY_TOKEN: ${{ secrets.SF_SECURITY_TOKEN }}
# Airtable
AIRTABLE_TEST_BASE_ID: ${{ secrets.AIRTABLE_TEST_BASE_ID }}
AIRTABLE_TEST_TABLE_ID: ${{ secrets.AIRTABLE_TEST_TABLE_ID }}
AIRTABLE_TEST_TABLE_NAME: ${{ secrets.AIRTABLE_TEST_TABLE_NAME }}
AIRTABLE_ACCESS_TOKEN: ${{ secrets.AIRTABLE_ACCESS_TOKEN }}
# Sharepoint
SHAREPOINT_CLIENT_ID: ${{ secrets.SHAREPOINT_CLIENT_ID }}
SHAREPOINT_CLIENT_SECRET: ${{ secrets.SHAREPOINT_CLIENT_SECRET }}
SHAREPOINT_CLIENT_DIRECTORY_ID: ${{ secrets.SHAREPOINT_CLIENT_DIRECTORY_ID }}
SHAREPOINT_SITE: ${{ secrets.SHAREPOINT_SITE }}
# Github
ACCESS_TOKEN_GITHUB: ${{ secrets.ACCESS_TOKEN_GITHUB }}
# Gitlab
GITLAB_ACCESS_TOKEN: ${{ secrets.GITLAB_ACCESS_TOKEN }}
# Gitbook
GITBOOK_SPACE_ID: ${{ secrets.GITBOOK_SPACE_ID }}
GITBOOK_API_KEY: ${{ secrets.GITBOOK_API_KEY }}
# Notion
NOTION_INTEGRATION_TOKEN: ${{ secrets.NOTION_INTEGRATION_TOKEN }}
# Highspot
HIGHSPOT_KEY: ${{ secrets.HIGHSPOT_KEY }}
HIGHSPOT_SECRET: ${{ secrets.HIGHSPOT_SECRET }}
# Slack
SLACK_BOT_TOKEN: ${{ secrets.SLACK_BOT_TOKEN }}
jobs:
connectors-check:
# See https://runs-on.com/runners/linux/
@@ -91,7 +112,15 @@ jobs:
- name: Run Tests
shell: script -q -e -c "bash --noprofile --norc -eo pipefail {0}"
run: py.test -o junit_family=xunit2 -xv --ff backend/tests/daily/connectors
run: |
py.test \
-n 8 \
--dist loadfile \
--durations=8 \
-o junit_family=xunit2 \
-xv \
--ff \
backend/tests/daily/connectors
- name: Alert on Failure
if: failure() && github.event_name == 'schedule'

23
.gitignore vendored
View File

@@ -1,12 +1,25 @@
.env
# editors
.vscode
.zed
# macos
.DS_store
# python
.venv
.mypy_cache
.idea
/deployment/data/nginx/app.conf
.vscode/
*.sw?
/backend/tests/regression/answer_quality/search_test_config.yaml
# testing
/web/test-results/
backend/onyx/agent_search/main/test_data.json
backend/tests/regression/answer_quality/test_data.json
# secret files
.env
jira_test_env
# others
/deployment/data/nginx/app.conf
*.sw?
/backend/tests/regression/answer_quality/search_test_config.yaml

View File

@@ -1,12 +1,13 @@
repos:
- repo: https://github.com/psf/black
rev: 23.3.0
rev: 25.1.0
hooks:
- id: black
language_version: python3.11
- repo: https://github.com/asottile/reorder_python_imports
rev: v3.9.0
# this is a fork which keeps compatibility with black
- repo: https://github.com/wimglenn/reorder-python-imports-black
rev: v3.14.0
hooks:
- id: reorder-python-imports
args: ['--py311-plus', '--application-directories=backend/']
@@ -18,14 +19,14 @@ repos:
# These settings will remove unused imports with side effects
# Note: The repo currently does not and should not have imports with side effects
- repo: https://github.com/PyCQA/autoflake
rev: v2.2.0
rev: v2.3.1
hooks:
- id: autoflake
args: [ '--remove-all-unused-imports', '--remove-unused-variables', '--in-place' , '--recursive']
- repo: https://github.com/astral-sh/ruff-pre-commit
# Ruff version.
rev: v0.0.286
rev: v0.11.4
hooks:
- id: ruff
- repo: https://github.com/pre-commit/mirrors-prettier

View File

@@ -1,4 +1,4 @@
<!-- DANSWER_METADATA={"link": "https://github.com/onyx-dot-app/onyx/blob/main/CONTRIBUTING.md"} -->
<!-- ONYX_METADATA={"link": "https://github.com/onyx-dot-app/onyx/blob/main/CONTRIBUTING.md"} -->
# Contributing to Onyx

View File

@@ -1,4 +1,4 @@
<!-- DANSWER_METADATA={"link": "https://github.com/onyx-dot-app/onyx/blob/main/README.md"} -->
<!-- ONYX_METADATA={"link": "https://github.com/onyx-dot-app/onyx/blob/main/README.md"} -->
<a name="readme-top"></a>
@@ -13,7 +13,7 @@
<a href="https://docs.onyx.app/" target="_blank">
<img src="https://img.shields.io/badge/docs-view-blue" alt="Documentation">
</a>
<a href="https://join.slack.com/t/onyx-dot-app/shared_invite/zt-2twesxdr6-5iQitKZQpgq~hYIZ~dv3KA" target="_blank">
<a href="https://join.slack.com/t/onyx-dot-app/shared_invite/zt-34lu4m7xg-TsKGO6h8PDvR5W27zTdyhA" target="_blank">
<img src="https://img.shields.io/badge/slack-join-blue.svg?logo=slack" alt="Slack">
</a>
<a href="https://discord.gg/TDJ59cGV2X" target="_blank">

4
backend/.gitignore vendored
View File

@@ -9,4 +9,6 @@ api_keys.py
vespa-app.zip
dynamic_config_storage/
celerybeat-schedule*
onyx/connectors/salesforce/data/
onyx/connectors/salesforce/data/
.test.env

View File

@@ -1,4 +1,4 @@
<!-- DANSWER_METADATA={"link": "https://github.com/onyx-dot-app/onyx/blob/main/backend/alembic/README.md"} -->
<!-- ONYX_METADATA={"link": "https://github.com/onyx-dot-app/onyx/blob/main/backend/alembic/README.md"} -->
# Alembic DB Migrations

View File

@@ -24,6 +24,7 @@ from onyx.configs.constants import SSL_CERT_FILE
from shared_configs.configs import MULTI_TENANT, POSTGRES_DEFAULT_SCHEMA
from onyx.db.models import Base
from celery.backends.database.session import ResultModelBase # type: ignore
from onyx.db.engine import SqlEngine
# Make sure in alembic.ini [logger_root] level=INFO is set or most logging will be
# hidden! (defaults to level=WARN)
@@ -147,6 +148,9 @@ async def run_async_migrations() -> None:
continue_on_error,
) = get_schema_options()
# without init_engine, subsequent engine calls fail hard intentionally
SqlEngine.init_engine(pool_size=20, max_overflow=5)
engine = create_async_engine(
build_connection_string(),
poolclass=pool.NullPool,
@@ -180,10 +184,10 @@ async def run_async_migrations() -> None:
except Exception as e:
logger.error(f"Error migrating schema {schema}: {e}")
if not continue_on_error:
logger.error("--continue is not set, raising exception!")
logger.error("--continue=true is not set, raising exception!")
raise
logger.warning("--continue is set, continuing to next schema.")
logger.warning("--continue=true is set, continuing to next schema.")
else:
try:
@@ -202,10 +206,21 @@ async def run_async_migrations() -> None:
def run_migrations_offline() -> None:
"""This doesn't really get used when we migrate in the cloud."""
"""
NOTE(rkuo): This generates a sql script that can be used to migrate the database ...
instead of migrating the db live via an open connection
Not clear on when this would be used by us or if it even works.
If it is offline, then why are there calls to the db engine?
This doesn't really get used when we migrate in the cloud."""
logger.info("run_migrations_offline starting.")
# without init_engine, subsequent engine calls fail hard intentionally
SqlEngine.init_engine(pool_size=20, max_overflow=5)
schema_name, _, upgrade_all_tenants, continue_on_error = get_schema_options()
url = build_connection_string()

View File

@@ -5,6 +5,7 @@ Revises: 6fc7886d665d
Create Date: 2025-01-14 12:14:00.814390
"""
from alembic import op
import sqlalchemy as sa

View File

@@ -5,6 +5,7 @@ Revises: 8a87bd6ec550
Create Date: 2024-07-23 11:12:39.462397
"""
from alembic import op
import sqlalchemy as sa

View File

@@ -5,6 +5,7 @@ Revises: 5f4b8568a221
Create Date: 2024-03-02 23:23:49.960309
"""
from alembic import op
import sqlalchemy as sa
from sqlalchemy.dialects import postgresql

View File

@@ -5,6 +5,7 @@ Revises: 570282d33c49
Create Date: 2024-05-05 19:30:34.317972
"""
from alembic import op
import sqlalchemy as sa
from sqlalchemy.sql import table

View File

@@ -5,6 +5,7 @@ Revises: 52a219fb5233
Create Date: 2024-09-10 15:03:48.233926
"""
from alembic import op
import sqlalchemy as sa

View File

@@ -5,6 +5,7 @@ Revises: 369644546676
Create Date: 2025-01-10 14:01:14.067144
"""
from alembic import op
# revision identifiers, used by Alembic.

View File

@@ -5,6 +5,7 @@ Revises: 77d07dffae64
Create Date: 2023-11-11 20:51:24.228999
"""
from alembic import op
import sqlalchemy as sa

View File

@@ -5,6 +5,7 @@ Revises: e50154680a5c
Create Date: 2024-03-19 15:30:44.425436
"""
from alembic import op
import sqlalchemy as sa
from sqlalchemy.dialects import postgresql

View File

@@ -5,6 +5,7 @@ Revises: 4ee1287bd26a
Create Date: 2024-11-21 11:49:04.488677
"""
from alembic import op
import sqlalchemy as sa
from sqlalchemy.dialects import postgresql

View File

@@ -5,6 +5,7 @@ Revises: 9c00a2bccb83
Create Date: 2025-02-18 10:45:13.957807
"""
from alembic import op
# revision identifiers, used by Alembic.

View File

@@ -5,6 +5,7 @@ Revises: 6756efa39ada
Create Date: 2024-10-15 19:26:44.071259
"""
from alembic import op
import sqlalchemy as sa
from sqlalchemy.dialects import postgresql

View File

@@ -5,6 +5,7 @@ Revises: 35e6853a51d5
Create Date: 2024-09-18 11:48:59.418726
"""
from alembic import op

View File

@@ -5,6 +5,7 @@ Revises: 5fc1f54cc252
Create Date: 2024-08-10 11:13:36.070790
"""
from alembic import op
import sqlalchemy as sa

View File

@@ -5,6 +5,7 @@ Revises: bc9771dccadf
Create Date: 2024-06-27 16:04:51.480437
"""
from alembic import op
import sqlalchemy as sa

View File

@@ -5,6 +5,7 @@ Revises: 6d387b3196c2
Create Date: 2023-05-05 15:49:35.716016
"""
import fastapi_users_db_sqlalchemy
import sqlalchemy as sa
from alembic import op

View File

@@ -5,6 +5,7 @@ Revises: 2daa494a0851
Create Date: 2024-11-12 13:23:29.858995
"""
from alembic import op
import sqlalchemy as sa
from sqlalchemy.dialects import postgresql

View File

@@ -5,6 +5,7 @@ Revises: 2666d766cb9b
Create Date: 2023-05-24 18:45:17.244495
"""
import fastapi_users_db_sqlalchemy
import sqlalchemy as sa
from alembic import op

View File

@@ -5,6 +5,7 @@ Revises: c0aab6edb6dd
Create Date: 2025-01-04 11:39:43.268612
"""
from alembic import op
import sqlalchemy as sa

View File

@@ -5,6 +5,7 @@ Revises: f5437cc136c5
Create Date: 2025-02-11 14:57:51.308775
"""
from alembic import op

View File

@@ -5,6 +5,7 @@ Revises: 4b08d97e175a
Create Date: 2024-08-21 19:15:15.762948
"""
from alembic import op
import sqlalchemy as sa

View File

@@ -5,6 +5,7 @@ Revises: c0fd6e4da83a
Create Date: 2024-11-11 10:57:22.991157
"""
from alembic import op
import sqlalchemy as sa

View File

@@ -5,6 +5,7 @@ Revises: 33ea50e88f24
Create Date: 2025-01-31 10:30:27.289646
"""
from alembic import op
import sqlalchemy as sa

View File

@@ -5,6 +5,7 @@ Revises: 7f99be1cb9f5
Create Date: 2023-10-16 23:21:01.283424
"""
from alembic import op
import sqlalchemy as sa

View File

@@ -5,6 +5,7 @@ Revises: 91ffac7e65b3
Create Date: 2024-07-24 21:29:31.784562
"""
import random
from alembic import op
import sqlalchemy as sa

View File

@@ -5,6 +5,7 @@ Revises: 5b29123cd710
Create Date: 2024-11-01 12:51:01.535003
"""
from alembic import op
import sqlalchemy as sa

View File

@@ -5,6 +5,7 @@ Revises: a6df6b88ef81
Create Date: 2025-01-29 10:54:22.141765
"""
from alembic import op

View File

@@ -5,6 +5,7 @@ Revises: ee3f4b47fad5
Create Date: 2024-08-15 22:37:08.397052
"""
from alembic import op
import sqlalchemy as sa

View File

@@ -5,6 +5,7 @@ Revises: 91a0a4d62b14
Create Date: 2024-09-20 21:24:04.891018
"""
from alembic import op

View File

@@ -5,6 +5,7 @@ Revises: c99d76fcd298
Create Date: 2024-09-13 13:20:32.885317
"""
from alembic import op
import sqlalchemy as sa
from sqlalchemy.dialects import postgresql

View File

@@ -5,6 +5,7 @@ Revises: 2955778aa44c
Create Date: 2025-01-08 15:38:17.224380
"""
from alembic import op
from sqlalchemy import text

View File

@@ -5,6 +5,7 @@ Revises: df46c75b714e
Create Date: 2025-03-10 10:02:30.586666
"""
from alembic import op
import sqlalchemy as sa

View File

@@ -5,6 +5,7 @@ Revises: f1c6478c3fd8
Create Date: 2024-05-11 16:11:23.718084
"""
from alembic import op
import sqlalchemy as sa

View File

@@ -5,6 +5,7 @@ Revises: 776b3bbe9092
Create Date: 2024-03-27 19:41:29.073594
"""
from alembic import op
import sqlalchemy as sa

View File

@@ -5,6 +5,7 @@ Revises: b7c2b63c4a03
Create Date: 2025-03-05 10:50:30.516962
"""
from alembic import op
import sqlalchemy as sa
import json

View File

@@ -5,6 +5,7 @@ Revises: e0a68a81d434
Create Date: 2023-10-05 18:47:09.582849
"""
from alembic import op
import sqlalchemy as sa

View File

@@ -5,6 +5,7 @@ Revises: 8f43500ee275
Create Date: 2025-02-26 13:07:56.217791
"""
from alembic import op

View File

@@ -5,6 +5,7 @@ Revises: 27c6ecc08586
Create Date: 2023-06-14 23:45:51.760440
"""
import sqlalchemy as sa
from alembic import op

View File

@@ -5,6 +5,7 @@ Revises: aeda5f2df4f6
Create Date: 2025-01-13 12:49:51.705235
"""
from alembic import op
import sqlalchemy as sa
import fastapi_users_db_sqlalchemy

View File

@@ -5,6 +5,7 @@ Revises: 703313b75876
Create Date: 2024-04-13 18:07:29.153817
"""
from alembic import op
import sqlalchemy as sa
from sqlalchemy.dialects import postgresql

View File

@@ -5,6 +5,7 @@ Revises: e1392f05e840
Create Date: 2024-08-01 12:38:54.466081
"""
from alembic import op
# revision identifiers, used by Alembic.

View File

@@ -5,6 +5,7 @@ Revises: d716b0791ddd
Create Date: 2024-06-28 20:01:05.927647
"""
from alembic import op
import sqlalchemy as sa

View File

@@ -5,6 +5,7 @@ Revises: c18cdf4b497e
Create Date: 2024-06-18 20:46:09.095034
"""
from alembic import op
import sqlalchemy as sa

View File

@@ -5,6 +5,7 @@ Revises: 3c5e35aa9af0
Create Date: 2023-07-18 17:33:40.365034
"""
from alembic import op
import sqlalchemy as sa

View File

@@ -5,6 +5,7 @@ Revises: 9d97fecfab7f
Create Date: 2023-10-27 11:38:33.803145
"""
from alembic import op
from sqlalchemy import String

View File

@@ -5,6 +5,7 @@ Revises: f32615f71aeb
Create Date: 2024-09-23 12:58:03.894038
"""
from alembic import op
# revision identifiers, used by Alembic.

View File

@@ -5,6 +5,7 @@ Revises: e91df4e935ef
Create Date: 2024-03-20 18:53:32.461518
"""
from alembic import op
import sqlalchemy as sa

View File

@@ -5,6 +5,7 @@ Revises:
Create Date: 2023-05-04 00:55:32.971991
"""
import sqlalchemy as sa
from alembic import op
from sqlalchemy.dialects import postgresql

View File

@@ -5,6 +5,7 @@ Revises: ecab2b3f1a3b
Create Date: 2024-04-11 11:05:18.414438
"""
from alembic import op
import sqlalchemy as sa

View File

@@ -5,6 +5,7 @@ Revises: f7505c5b0284
Create Date: 2025-04-02 11:26:36.180328
"""
from alembic import op
import sqlalchemy as sa

View File

@@ -0,0 +1,150 @@
"""Fix invalid model-configurations state
Revision ID: 47a07e1a38f1
Revises: 7a70b7664e37
Create Date: 2025-04-23 15:39:43.159504
"""
from alembic import op
from pydantic import BaseModel, ConfigDict
import sqlalchemy as sa
from sqlalchemy.dialects import postgresql
from onyx.llm.llm_provider_options import (
fetch_model_names_for_provider_as_set,
fetch_visible_model_names_for_provider_as_set,
)
# revision identifiers, used by Alembic.
revision = "47a07e1a38f1"
down_revision = "7a70b7664e37"
branch_labels = None
depends_on = None
class _SimpleModelConfiguration(BaseModel):
# Configure model to read from attributes
model_config = ConfigDict(from_attributes=True)
id: int
llm_provider_id: int
name: str
is_visible: bool
max_input_tokens: int | None
def upgrade() -> None:
llm_provider_table = sa.sql.table(
"llm_provider",
sa.column("id", sa.Integer),
sa.column("provider", sa.String),
sa.column("model_names", postgresql.ARRAY(sa.String)),
sa.column("display_model_names", postgresql.ARRAY(sa.String)),
sa.column("default_model_name", sa.String),
sa.column("fast_default_model_name", sa.String),
)
model_configuration_table = sa.sql.table(
"model_configuration",
sa.column("id", sa.Integer),
sa.column("llm_provider_id", sa.Integer),
sa.column("name", sa.String),
sa.column("is_visible", sa.Boolean),
sa.column("max_input_tokens", sa.Integer),
)
connection = op.get_bind()
llm_providers = connection.execute(
sa.select(
llm_provider_table.c.id,
llm_provider_table.c.provider,
)
).fetchall()
for llm_provider in llm_providers:
llm_provider_id, provider_name = llm_provider
default_models = fetch_model_names_for_provider_as_set(provider_name)
display_models = fetch_visible_model_names_for_provider_as_set(
provider_name=provider_name
)
# if `fetch_model_names_for_provider_as_set` returns `None`, then
# that means that `provider_name` is not a well-known llm provider.
if not default_models:
continue
if not display_models:
raise RuntimeError(
"If `default_models` is non-None, `display_models` must be non-None too."
)
model_configurations = [
_SimpleModelConfiguration.model_validate(model_configuration)
for model_configuration in connection.execute(
sa.select(
model_configuration_table.c.id,
model_configuration_table.c.llm_provider_id,
model_configuration_table.c.name,
model_configuration_table.c.is_visible,
model_configuration_table.c.max_input_tokens,
).where(model_configuration_table.c.llm_provider_id == llm_provider_id)
).fetchall()
]
if model_configurations:
at_least_one_is_visible = any(
[
model_configuration.is_visible
for model_configuration in model_configurations
]
)
# If there is at least one model which is public, this is a valid state.
# Therefore, don't touch it and move on to the next one.
if at_least_one_is_visible:
continue
existing_visible_model_names: set[str] = set(
[
model_configuration.name
for model_configuration in model_configurations
if model_configuration.is_visible
]
)
difference = display_models.difference(existing_visible_model_names)
for model_name in difference:
if not model_name:
continue
insert_statement = postgresql.insert(model_configuration_table).values(
llm_provider_id=llm_provider_id,
name=model_name,
is_visible=True,
max_input_tokens=None,
)
connection.execute(
insert_statement.on_conflict_do_update(
index_elements=["llm_provider_id", "name"],
set_={"is_visible": insert_statement.excluded.is_visible},
)
)
else:
for model_name in default_models:
connection.execute(
model_configuration_table.insert().values(
llm_provider_id=llm_provider_id,
name=model_name,
is_visible=model_name in display_models,
max_input_tokens=None,
)
)
def downgrade() -> None:
pass

View File

@@ -5,6 +5,7 @@ Revises: dfbe9e93d3c7
Create Date: 2024-11-05 18:55:02.221064
"""
from alembic import op
import sqlalchemy as sa

View File

@@ -5,6 +5,7 @@ Revises: b85f02ec1308
Create Date: 2024-06-09 14:58:19.946509
"""
from alembic import op
import fastapi_users_db_sqlalchemy
import sqlalchemy as sa

View File

@@ -5,6 +5,7 @@ Revises: 7477a5f5d728
Create Date: 2024-08-10 19:20:34.527559
"""
from alembic import op
import sqlalchemy as sa

View File

@@ -5,6 +5,7 @@ Revises: d9ec13955951
Create Date: 2024-08-20 15:28:52.993827
"""
from alembic import op
# revision identifiers, used by Alembic.

View File

@@ -5,6 +5,7 @@ Revises: f1ca58b2f2ec
Create Date: 2025-01-29 07:48:46.784041
"""
import logging
from typing import cast
from alembic import op

View File

@@ -5,6 +5,7 @@ Revises: 47e5bef3a1d7
Create Date: 2024-11-06 13:15:53.302644
"""
from typing import cast
from alembic import op
import sqlalchemy as sa

View File

@@ -5,6 +5,7 @@ Revises: 7da0ae5ad583
Create Date: 2023-11-27 17:23:29.668422
"""
from alembic import op
import sqlalchemy as sa

View File

@@ -5,6 +5,7 @@ Revises: f7e58d357687
Create Date: 2024-08-28 17:40:46.077470
"""
from alembic import op
import sqlalchemy as sa
from sqlalchemy.sql import func

View File

@@ -5,6 +5,7 @@ Revises: 94dc3d0236f8
Create Date: 2024-12-11 18:05:05.490737
"""
from alembic import op

View File

@@ -5,6 +5,7 @@ Revises: 61ff3651add4
Create Date: 2024-09-18 17:00:23.755399
"""
from alembic import op
import sqlalchemy as sa
from sqlalchemy.dialects import postgresql

View File

@@ -5,6 +5,7 @@ Revises: 7547d982db8f
Create Date: 2024-05-04 17:49:28.568109
"""
from alembic import op
import sqlalchemy as sa

View File

@@ -5,6 +5,7 @@ Revises: 800f48024ae9
Create Date: 2023-09-20 16:59:39.097177
"""
from alembic import op
import fastapi_users_db_sqlalchemy
import sqlalchemy as sa

View File

@@ -5,6 +5,7 @@ Revises: d929f0c1c6af
Create Date: 2023-09-04 15:29:44.002164
"""
import fastapi_users_db_sqlalchemy
from alembic import op
import sqlalchemy as sa

View File

@@ -5,6 +5,7 @@ Revises: 949b4a92a401
Create Date: 2024-10-30 19:37:59.630704
"""
from alembic import op
import sqlalchemy as sa

View File

@@ -0,0 +1,24 @@
"""Add content type to UserFile
Revision ID: 5c448911b12f
Revises: 47a07e1a38f1
Create Date: 2025-04-25 16:59:48.182672
"""
from alembic import op
import sqlalchemy as sa
# revision identifiers, used by Alembic.
revision = "5c448911b12f"
down_revision = "47a07e1a38f1"
branch_labels: None = None
depends_on: None = None
def upgrade() -> None:
op.add_column("user_file", sa.Column("content_type", sa.String(), nullable=True))
def downgrade() -> None:
op.drop_column("user_file", "content_type")

View File

@@ -5,6 +5,7 @@ Revises: efb35676026c
Create Date: 2024-09-13 18:52:59.256478
"""
from alembic import op
import sqlalchemy as sa

View File

@@ -5,6 +5,7 @@ Revises: e4334d5b33ba
Create Date: 2024-10-08 15:56:07.975636
"""
from alembic import op
import sqlalchemy as sa

View File

@@ -5,6 +5,7 @@ Revises: e6a4bbc13fe4
Create Date: 2023-08-10 21:43:09.069523
"""
from alembic import op
import sqlalchemy as sa

View File

@@ -5,6 +5,7 @@ Revises: dbaa756c2ccf
Create Date: 2024-02-16 15:02:03.319907
"""
from alembic import op
import sqlalchemy as sa

View File

@@ -5,6 +5,7 @@ Revises: 1d6ad76d1f37
Create Date: 2024-08-06 15:35:40.278485
"""
from alembic import op
import sqlalchemy as sa

View File

@@ -5,6 +5,7 @@ Revises: 1b8206b29c5d
Create Date: 2024-09-05 13:57:11.770413
"""
import fastapi_users_db_sqlalchemy
from alembic import op

View File

@@ -5,6 +5,7 @@ Revises: 0a98909f2757
Create Date: 2024-05-07 14:54:55.493100
"""
from alembic import op
import sqlalchemy as sa

View File

@@ -5,6 +5,7 @@ Revises: 5d12a446f5c0
Create Date: 2024-10-15 17:47:44.108537
"""
from alembic import op
import sqlalchemy as sa

View File

@@ -5,11 +5,6 @@ Revises: 8e1ac4f39a9f
Create Date: 2025-04-01 07:26:10.539362
"""
from alembic import op
import sqlalchemy as sa
from sqlalchemy import inspect
import datetime
# revision identifiers, used by Alembic.
revision = "6a804aeb4830"
@@ -18,99 +13,10 @@ branch_labels = None
depends_on = None
# Leaving this around only because some people might be on this migration
# originally was a duplicate of the user files migration
def upgrade() -> None:
# Check if user_file table already exists
conn = op.get_bind()
inspector = inspect(conn)
if not inspector.has_table("user_file"):
# Create user_folder table without parent_id
op.create_table(
"user_folder",
sa.Column("id", sa.Integer(), primary_key=True, autoincrement=True),
sa.Column("user_id", sa.UUID(), sa.ForeignKey("user.id"), nullable=True),
sa.Column("name", sa.String(length=255), nullable=True),
sa.Column("description", sa.String(length=255), nullable=True),
sa.Column("display_priority", sa.Integer(), nullable=True, default=0),
sa.Column(
"created_at", sa.DateTime(timezone=True), server_default=sa.func.now()
),
)
# Create user_file table with folder_id instead of parent_folder_id
op.create_table(
"user_file",
sa.Column("id", sa.Integer(), primary_key=True, autoincrement=True),
sa.Column("user_id", sa.UUID(), sa.ForeignKey("user.id"), nullable=True),
sa.Column(
"folder_id",
sa.Integer(),
sa.ForeignKey("user_folder.id"),
nullable=True,
),
sa.Column("link_url", sa.String(), nullable=True),
sa.Column("token_count", sa.Integer(), nullable=True),
sa.Column("file_type", sa.String(), nullable=True),
sa.Column("file_id", sa.String(length=255), nullable=False),
sa.Column("document_id", sa.String(length=255), nullable=False),
sa.Column("name", sa.String(length=255), nullable=False),
sa.Column(
"created_at",
sa.DateTime(),
default=datetime.datetime.utcnow,
),
sa.Column(
"cc_pair_id",
sa.Integer(),
sa.ForeignKey("connector_credential_pair.id"),
nullable=True,
unique=True,
),
)
# Create persona__user_file table
op.create_table(
"persona__user_file",
sa.Column(
"persona_id",
sa.Integer(),
sa.ForeignKey("persona.id"),
primary_key=True,
),
sa.Column(
"user_file_id",
sa.Integer(),
sa.ForeignKey("user_file.id"),
primary_key=True,
),
)
# Create persona__user_folder table
op.create_table(
"persona__user_folder",
sa.Column(
"persona_id",
sa.Integer(),
sa.ForeignKey("persona.id"),
primary_key=True,
),
sa.Column(
"user_folder_id",
sa.Integer(),
sa.ForeignKey("user_folder.id"),
primary_key=True,
),
)
op.add_column(
"connector_credential_pair",
sa.Column("is_user_file", sa.Boolean(), nullable=True, default=False),
)
# Update existing records to have is_user_file=False instead of NULL
op.execute(
"UPDATE connector_credential_pair SET is_user_file = FALSE WHERE is_user_file IS NULL"
)
pass
def downgrade() -> None:

View File

@@ -5,6 +5,7 @@ Revises: 47433d30de82
Create Date: 2023-05-05 14:40:10.242502
"""
import fastapi_users_db_sqlalchemy
import sqlalchemy as sa
from alembic import op

View File

@@ -5,6 +5,7 @@ Revises: 177de57c21c9
Create Date: 2024-11-22 11:51:29.331336
"""
from alembic import op
import sqlalchemy as sa

View File

@@ -5,6 +5,7 @@ Revises: 3c6531f32351
Create Date: 2025-01-13 18:12:18.029112
"""
from alembic import op
import sqlalchemy as sa

View File

@@ -5,6 +5,7 @@ Revises: fad14119fb92
Create Date: 2024-04-15 01:36:02.952809
"""
import json
from typing import cast
from alembic import op

View File

@@ -5,6 +5,7 @@ Revises: 3879338f8ba1
Create Date: 2024-05-17 17:51:41.926893
"""
from alembic import op
import sqlalchemy as sa
from sqlalchemy.dialects import postgresql

View File

@@ -5,6 +5,7 @@ Revises: 475fcefe8826
Create Date: 2024-04-14 21:15:28.659634
"""
from alembic import op
import sqlalchemy as sa

View File

@@ -5,6 +5,7 @@ Revises: ef7da92f7213
Create Date: 2024-05-02 15:18:56.573347
"""
from alembic import op
import sqlalchemy as sa
import fastapi_users_db_sqlalchemy

View File

@@ -5,6 +5,7 @@ Revises: dba7f71618f5
Create Date: 2023-09-21 10:03:21.509899
"""
from alembic import op
import sqlalchemy as sa

Some files were not shown because too many files have changed in this diff Show More