mirror of
https://github.com/onyx-dot-app/onyx.git
synced 2026-02-17 07:45:47 +00:00
Compare commits
385 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
8a38fdf8a5 | ||
|
|
9155d4aa21 | ||
|
|
b20591611a | ||
|
|
83e756bf05 | ||
|
|
19b485cffd | ||
|
|
f5a99053ac | ||
|
|
91f0377dd5 | ||
|
|
25522dfbb8 | ||
|
|
b0e124ec89 | ||
|
|
b699a65384 | ||
|
|
cc82d6e506 | ||
|
|
8a6db7474d | ||
|
|
fd9aea212b | ||
|
|
4aed383e49 | ||
|
|
d0ce313b1a | ||
|
|
4d32c9f5e0 | ||
|
|
158fe31b71 | ||
|
|
97cddc1dd4 | ||
|
|
c520a4ec17 | ||
|
|
9c1f8cc98c | ||
|
|
58ba8cc68a | ||
|
|
a307b0d366 | ||
|
|
e34f58e994 | ||
|
|
7f6dd2dc93 | ||
|
|
ef3daa58b3 | ||
|
|
972c33046e | ||
|
|
802248c4e4 | ||
|
|
f359c44183 | ||
|
|
bab2220091 | ||
|
|
bc35354ced | ||
|
|
742dd23fdd | ||
|
|
ea5690db81 | ||
|
|
853ca635d2 | ||
|
|
c4d2fc9492 | ||
|
|
7aa12c0a36 | ||
|
|
e74cf14401 | ||
|
|
75c42ffa9d | ||
|
|
d6fbb7affd | ||
|
|
75cee70bbb | ||
|
|
1c8b819aa2 | ||
|
|
b7cf33a4cc | ||
|
|
b06459f674 | ||
|
|
920db6b3c2 | ||
|
|
b7e4b65a74 | ||
|
|
e648e0f725 | ||
|
|
c8a3368fce | ||
|
|
f74b02ad9e | ||
|
|
65b59c4a73 | ||
|
|
b74bcd0efc | ||
|
|
8c133b3853 | ||
|
|
67554cef96 | ||
|
|
07e03f3677 | ||
|
|
33fee46d71 | ||
|
|
72f5e3d38f | ||
|
|
f89380ad87 | ||
|
|
e6f00098f2 | ||
|
|
9100afa594 | ||
|
|
93d2febf2a | ||
|
|
693286411a | ||
|
|
01a3064ca3 | ||
|
|
09a80265ee | ||
|
|
2a77481c1e | ||
|
|
6838487689 | ||
|
|
1713c24080 | ||
|
|
73b3a2525a | ||
|
|
59738d9243 | ||
|
|
c0ff9c623b | ||
|
|
c03979209a | ||
|
|
a0b7639693 | ||
|
|
e3ede3c186 | ||
|
|
092dbebdf2 | ||
|
|
838e2fe924 | ||
|
|
48e2bfa3eb | ||
|
|
2a004ad257 | ||
|
|
416c7fd75e | ||
|
|
a4372b461f | ||
|
|
7eb13db6d9 | ||
|
|
c0075d5f59 | ||
|
|
475a3afe56 | ||
|
|
bf5b8e7bae | ||
|
|
4ff28c897b | ||
|
|
ec9e9be42e | ||
|
|
af5fa8fe54 | ||
|
|
03a9e9e068 | ||
|
|
ad81c3f9eb | ||
|
|
62129f4ab9 | ||
|
|
b30d38c747 | ||
|
|
0596b57501 | ||
|
|
482b2c4204 | ||
|
|
df155835b1 | ||
|
|
fd0762a1ee | ||
|
|
bd41618dd9 | ||
|
|
5a7c6312af | ||
|
|
a477508bd7 | ||
|
|
8ac34a8433 | ||
|
|
2c51466bc3 | ||
|
|
62966bd172 | ||
|
|
a8d4482b59 | ||
|
|
dd42a45008 | ||
|
|
a368556282 | ||
|
|
679d1a5ef6 | ||
|
|
12e49cd661 | ||
|
|
1859a0ad79 | ||
|
|
9199d146be | ||
|
|
9c1208ffd6 | ||
|
|
c3387e33eb | ||
|
|
c37f633a37 | ||
|
|
6677e12e55 | ||
|
|
7175b93a4c | ||
|
|
fbbcd9646d | ||
|
|
7afc9d417c | ||
|
|
a905f2d3fb | ||
|
|
3d1994a515 | ||
|
|
7f507c7be0 | ||
|
|
c0e418d63e | ||
|
|
db49e14f12 | ||
|
|
e87d6403e8 | ||
|
|
2b6e02a775 | ||
|
|
26e1f349b9 | ||
|
|
ba83d7e6c3 | ||
|
|
f869e44497 | ||
|
|
b367a60680 | ||
|
|
98a7e8b7e2 | ||
|
|
f93752a2b3 | ||
|
|
0d20140cad | ||
|
|
bdd6dc036e | ||
|
|
27fe196df3 | ||
|
|
18dad51bf8 | ||
|
|
b6d60fb6a9 | ||
|
|
86e7975c42 | ||
|
|
bb1fb2250e | ||
|
|
8fdc3411ed | ||
|
|
d5038e8e68 | ||
|
|
bc035a78e4 | ||
|
|
9e1043b2fa | ||
|
|
107e83bf2a | ||
|
|
f5aade9f69 | ||
|
|
9b9ca43671 | ||
|
|
0c61cc3f65 | ||
|
|
553853c7f4 | ||
|
|
15a05663ca | ||
|
|
940773b9c5 | ||
|
|
a95ae6e88b | ||
|
|
369f923929 | ||
|
|
3eefbfb646 | ||
|
|
3919a2d0a2 | ||
|
|
4553e811b0 | ||
|
|
7f7389692e | ||
|
|
30147c03cf | ||
|
|
dc48ccc117 | ||
|
|
ee366c50c4 | ||
|
|
caf92a6cce | ||
|
|
259bc9d64b | ||
|
|
60664f7e5b | ||
|
|
07f55c6ae2 | ||
|
|
256ece05a6 | ||
|
|
530d6d8284 | ||
|
|
6299bc30b6 | ||
|
|
0607ea9741 | ||
|
|
3ba4bdfe78 | ||
|
|
a9165ad329 | ||
|
|
24aea2d7ce | ||
|
|
aa30008419 | ||
|
|
3605676f61 | ||
|
|
1faa9e7812 | ||
|
|
d85b702cac | ||
|
|
a724f53e5b | ||
|
|
68fcc5cb8a | ||
|
|
3eb1ca01a2 | ||
|
|
c2c3d1a722 | ||
|
|
f79a8533fb | ||
|
|
c1dce9fabd | ||
|
|
244bf82c7a | ||
|
|
188ea3faff | ||
|
|
c04f624891 | ||
|
|
43ae02a870 | ||
|
|
14123926a7 | ||
|
|
d14d1b833f | ||
|
|
ff06f10af6 | ||
|
|
5d26c1bafc | ||
|
|
dbf06c6a1b | ||
|
|
d31e83900f | ||
|
|
1ac92e6bd0 | ||
|
|
5e159c35f3 | ||
|
|
550271abd9 | ||
|
|
db3d0bfb34 | ||
|
|
860bdd3c0f | ||
|
|
3bc63b30ce | ||
|
|
78a23eeec0 | ||
|
|
096e4029ba | ||
|
|
e8da5722df | ||
|
|
a1a261f68e | ||
|
|
ac57b10240 | ||
|
|
ce35e01ce3 | ||
|
|
808f82de0b | ||
|
|
9518bd14bb | ||
|
|
54eb655634 | ||
|
|
a773c398af | ||
|
|
53131e7669 | ||
|
|
d5cb56b0e9 | ||
|
|
de6226e192 | ||
|
|
a1d502804a | ||
|
|
76fc01968b | ||
|
|
f9de82c135 | ||
|
|
db4b074938 | ||
|
|
bc5a574cf1 | ||
|
|
c14414c9be | ||
|
|
770bfcf360 | ||
|
|
67c1099f98 | ||
|
|
67eb54734f | ||
|
|
f819fdf09b | ||
|
|
b39a4a075a | ||
|
|
8a244aff0d | ||
|
|
6a74e54eda | ||
|
|
e87818c961 | ||
|
|
fbec393faa | ||
|
|
da167e93ab | ||
|
|
91c0b273bf | ||
|
|
72d1cfa36a | ||
|
|
1f45ebc818 | ||
|
|
c1428d03f5 | ||
|
|
904bcdb0fe | ||
|
|
9caf136f0e | ||
|
|
e06ad214cb | ||
|
|
fe53ae9d79 | ||
|
|
5a2796d285 | ||
|
|
aba5bee4d7 | ||
|
|
a0eaf126be | ||
|
|
28712aab1d | ||
|
|
25de38fcf7 | ||
|
|
53123e2870 | ||
|
|
fa8487a1a8 | ||
|
|
3f0bcd516d | ||
|
|
76d25ff489 | ||
|
|
f99d0285f9 | ||
|
|
988221550e | ||
|
|
6b636c1b90 | ||
|
|
1fee528d86 | ||
|
|
815ab5ebc2 | ||
|
|
c8553331ae | ||
|
|
362da495ff | ||
|
|
abb444cd85 | ||
|
|
fc7e6f798d | ||
|
|
8b39d60bca | ||
|
|
9ac8331cd3 | ||
|
|
295dc2d28c | ||
|
|
c15266227a | ||
|
|
1c54b357ee | ||
|
|
791346eca8 | ||
|
|
2d2a2452bf | ||
|
|
4d3094c09f | ||
|
|
882e5f999d | ||
|
|
c2cf3991a0 | ||
|
|
402dfdad2c | ||
|
|
ef8de62478 | ||
|
|
8101be42ea | ||
|
|
0c615cd76d | ||
|
|
421e9899b8 | ||
|
|
6379423dfc | ||
|
|
1c742e675a | ||
|
|
5c3b2320a7 | ||
|
|
198fc145fc | ||
|
|
0f84391f60 | ||
|
|
1e101f8028 | ||
|
|
7e40cbe0d1 | ||
|
|
a528dbe241 | ||
|
|
587cca4b13 | ||
|
|
990842c1cf | ||
|
|
a3a420a6de | ||
|
|
03c2e62aee | ||
|
|
b7d7c62a7c | ||
|
|
6f5c466f38 | ||
|
|
2b19b84245 | ||
|
|
16e1b45cee | ||
|
|
d9c2793341 | ||
|
|
644920f2f9 | ||
|
|
e38e96d4d1 | ||
|
|
fc84e83fdb | ||
|
|
c428ad6dfe | ||
|
|
98a7a04633 | ||
|
|
9b42a8c1f3 | ||
|
|
eacc663d5b | ||
|
|
3020ee5964 | ||
|
|
472d080239 | ||
|
|
922069bfd3 | ||
|
|
bffca81477 | ||
|
|
561b487102 | ||
|
|
cc9b14c99b | ||
|
|
de674a19e0 | ||
|
|
79114bf92c | ||
|
|
b5dccd96b3 | ||
|
|
a55cc5a537 | ||
|
|
cdf3cc444b | ||
|
|
cd3941f4b7 | ||
|
|
0182743619 | ||
|
|
0e2f596aa2 | ||
|
|
0be45676b7 | ||
|
|
30a3470001 | ||
|
|
d52fa83afa | ||
|
|
9eb5643cc3 | ||
|
|
afe34218b8 | ||
|
|
4776947dfa | ||
|
|
c4bc25f540 | ||
|
|
b77078b339 | ||
|
|
88b28a303b | ||
|
|
59d7d3905a | ||
|
|
a48fe7550a | ||
|
|
c25a99955c | ||
|
|
ac509f865a | ||
|
|
5819389ae8 | ||
|
|
eae5774cdc | ||
|
|
8fed0a8138 | ||
|
|
c04196941d | ||
|
|
19461955ed | ||
|
|
cb3152ff5c | ||
|
|
cf187e8f58 | ||
|
|
deaa3df42f | ||
|
|
d6e98bfbc8 | ||
|
|
ff58ad0b87 | ||
|
|
eb7cb02cc0 | ||
|
|
7876d8da1b | ||
|
|
8a6f83115e | ||
|
|
b7f81aed10 | ||
|
|
a415a997cf | ||
|
|
7781afd74e | ||
|
|
d0a4f4ce66 | ||
|
|
ba00de8904 | ||
|
|
91f21bb22b | ||
|
|
491f3127c5 | ||
|
|
0987fb852b | ||
|
|
5f68141335 | ||
|
|
b5793ee522 | ||
|
|
238c244fec | ||
|
|
c103a878b7 | ||
|
|
03deb064cc | ||
|
|
09062195b4 | ||
|
|
dc57a5451c | ||
|
|
781f60a5ab | ||
|
|
423961fefb | ||
|
|
324b6ceeef | ||
|
|
d9e14bf5da | ||
|
|
eb2cb1bb25 | ||
|
|
0de9f47694 | ||
|
|
2757f3936c | ||
|
|
8ba61e9123 | ||
|
|
c10d7fbc32 | ||
|
|
b6ed217781 | ||
|
|
7d20f73f71 | ||
|
|
2b306255f9 | ||
|
|
e149d08d47 | ||
|
|
e98ddb9fe6 | ||
|
|
b9a5297694 | ||
|
|
4666312df2 | ||
|
|
d4e524cd83 | ||
|
|
a719228034 | ||
|
|
2fe8b5e33a | ||
|
|
af243b0ef5 | ||
|
|
c96ac04619 | ||
|
|
e2f2950fee | ||
|
|
8b84c59d29 | ||
|
|
b718a276cf | ||
|
|
700511720f | ||
|
|
6bd1719156 | ||
|
|
c8bfe9e0a1 | ||
|
|
037bc04740 | ||
|
|
c3704d47df | ||
|
|
397a153ff6 | ||
|
|
870c432ccf | ||
|
|
c4a81a590f | ||
|
|
017c095eed | ||
|
|
ee37d21aa4 | ||
|
|
e492d88b2d | ||
|
|
3512fdcd9d | ||
|
|
3550795cab | ||
|
|
b26306d678 | ||
|
|
85140b4ba6 | ||
|
|
c241f79f97 | ||
|
|
9808dec6b7 | ||
|
|
632c74af6d | ||
|
|
79073d878c | ||
|
|
620df88c51 | ||
|
|
717f05975d | ||
|
|
d2176342c1 | ||
|
|
bb198b05e1 | ||
|
|
085013d8c3 |
43
.github/actionlint.yml
vendored
Normal file
43
.github/actionlint.yml
vendored
Normal file
@@ -0,0 +1,43 @@
|
||||
self-hosted-runner:
|
||||
# Labels of self-hosted runner in array of strings.
|
||||
labels:
|
||||
- extras=ecr-cache
|
||||
- extras=s3-cache
|
||||
- hdd=256
|
||||
- runs-on
|
||||
- runner=1cpu-linux-arm64
|
||||
- runner=1cpu-linux-x64
|
||||
- runner=2cpu-linux-arm64
|
||||
- runner=2cpu-linux-x64
|
||||
- runner=4cpu-linux-arm64
|
||||
- runner=4cpu-linux-x64
|
||||
- runner=8cpu-linux-arm64
|
||||
- runner=8cpu-linux-x64
|
||||
- runner=16cpu-linux-arm64
|
||||
- runner=16cpu-linux-x64
|
||||
- ubuntu-slim # Currently in public preview
|
||||
- volume=40gb
|
||||
- volume=50gb
|
||||
|
||||
# Configuration variables in array of strings defined in your repository or
|
||||
# organization. `null` means disabling configuration variables check.
|
||||
# Empty array means no configuration variable is allowed.
|
||||
config-variables: null
|
||||
|
||||
# Configuration for file paths. The keys are glob patterns to match to file
|
||||
# paths relative to the repository root. The values are the configurations for
|
||||
# the file paths. Note that the path separator is always '/'.
|
||||
# The following configurations are available.
|
||||
#
|
||||
# "ignore" is an array of regular expression patterns. Matched error messages
|
||||
# are ignored. This is similar to the "-ignore" command line option.
|
||||
paths:
|
||||
# Glob pattern relative to the repository root for matching files. The path separator is always '/'.
|
||||
# This example configures any YAML file under the '.github/workflows/' directory.
|
||||
.github/workflows/**/*.{yml,yaml}:
|
||||
# TODO: These are real and should be fixed eventually.
|
||||
ignore:
|
||||
- 'shellcheck reported issue in this script: SC2038:.+'
|
||||
- 'shellcheck reported issue in this script: SC2046:.+'
|
||||
- 'shellcheck reported issue in this script: SC2086:.+'
|
||||
- 'shellcheck reported issue in this script: SC2193:.+'
|
||||
135
.github/actions/custom-build-and-push/action.yml
vendored
135
.github/actions/custom-build-and-push/action.yml
vendored
@@ -1,135 +0,0 @@
|
||||
name: 'Build and Push Docker Image with Retry'
|
||||
description: 'Attempts to build and push a Docker image, with a retry on failure'
|
||||
inputs:
|
||||
context:
|
||||
description: 'Build context'
|
||||
required: true
|
||||
file:
|
||||
description: 'Dockerfile location'
|
||||
required: true
|
||||
platforms:
|
||||
description: 'Target platforms'
|
||||
required: true
|
||||
pull:
|
||||
description: 'Always attempt to pull a newer version of the image'
|
||||
required: false
|
||||
default: 'true'
|
||||
push:
|
||||
description: 'Push the image to registry'
|
||||
required: false
|
||||
default: 'true'
|
||||
load:
|
||||
description: 'Load the image into Docker daemon'
|
||||
required: false
|
||||
default: 'true'
|
||||
tags:
|
||||
description: 'Image tags'
|
||||
required: true
|
||||
no-cache:
|
||||
description: 'Read from cache'
|
||||
required: false
|
||||
default: 'false'
|
||||
cache-from:
|
||||
description: 'Cache sources'
|
||||
required: false
|
||||
cache-to:
|
||||
description: 'Cache destinations'
|
||||
required: false
|
||||
outputs:
|
||||
description: 'Output destinations'
|
||||
required: false
|
||||
provenance:
|
||||
description: 'Generate provenance attestation'
|
||||
required: false
|
||||
default: 'false'
|
||||
build-args:
|
||||
description: 'Build arguments'
|
||||
required: false
|
||||
retry-wait-time:
|
||||
description: 'Time to wait before attempt 2 in seconds'
|
||||
required: false
|
||||
default: '60'
|
||||
retry-wait-time-2:
|
||||
description: 'Time to wait before attempt 3 in seconds'
|
||||
required: false
|
||||
default: '120'
|
||||
|
||||
runs:
|
||||
using: "composite"
|
||||
steps:
|
||||
- name: Build and push Docker image (Attempt 1 of 3)
|
||||
id: buildx1
|
||||
uses: docker/build-push-action@v6
|
||||
continue-on-error: true
|
||||
with:
|
||||
context: ${{ inputs.context }}
|
||||
file: ${{ inputs.file }}
|
||||
platforms: ${{ inputs.platforms }}
|
||||
pull: ${{ inputs.pull }}
|
||||
push: ${{ inputs.push }}
|
||||
load: ${{ inputs.load }}
|
||||
tags: ${{ inputs.tags }}
|
||||
no-cache: ${{ inputs.no-cache }}
|
||||
cache-from: ${{ inputs.cache-from }}
|
||||
cache-to: ${{ inputs.cache-to }}
|
||||
outputs: ${{ inputs.outputs }}
|
||||
provenance: ${{ inputs.provenance }}
|
||||
build-args: ${{ inputs.build-args }}
|
||||
|
||||
- name: Wait before attempt 2
|
||||
if: steps.buildx1.outcome != 'success'
|
||||
run: |
|
||||
echo "First attempt failed. Waiting ${{ inputs.retry-wait-time }} seconds before retry..."
|
||||
sleep ${{ inputs.retry-wait-time }}
|
||||
shell: bash
|
||||
|
||||
- name: Build and push Docker image (Attempt 2 of 3)
|
||||
id: buildx2
|
||||
if: steps.buildx1.outcome != 'success'
|
||||
uses: docker/build-push-action@v6
|
||||
with:
|
||||
context: ${{ inputs.context }}
|
||||
file: ${{ inputs.file }}
|
||||
platforms: ${{ inputs.platforms }}
|
||||
pull: ${{ inputs.pull }}
|
||||
push: ${{ inputs.push }}
|
||||
load: ${{ inputs.load }}
|
||||
tags: ${{ inputs.tags }}
|
||||
no-cache: ${{ inputs.no-cache }}
|
||||
cache-from: ${{ inputs.cache-from }}
|
||||
cache-to: ${{ inputs.cache-to }}
|
||||
outputs: ${{ inputs.outputs }}
|
||||
provenance: ${{ inputs.provenance }}
|
||||
build-args: ${{ inputs.build-args }}
|
||||
|
||||
- name: Wait before attempt 3
|
||||
if: steps.buildx1.outcome != 'success' && steps.buildx2.outcome != 'success'
|
||||
run: |
|
||||
echo "Second attempt failed. Waiting ${{ inputs.retry-wait-time-2 }} seconds before retry..."
|
||||
sleep ${{ inputs.retry-wait-time-2 }}
|
||||
shell: bash
|
||||
|
||||
- name: Build and push Docker image (Attempt 3 of 3)
|
||||
id: buildx3
|
||||
if: steps.buildx1.outcome != 'success' && steps.buildx2.outcome != 'success'
|
||||
uses: docker/build-push-action@v6
|
||||
with:
|
||||
context: ${{ inputs.context }}
|
||||
file: ${{ inputs.file }}
|
||||
platforms: ${{ inputs.platforms }}
|
||||
pull: ${{ inputs.pull }}
|
||||
push: ${{ inputs.push }}
|
||||
load: ${{ inputs.load }}
|
||||
tags: ${{ inputs.tags }}
|
||||
no-cache: ${{ inputs.no-cache }}
|
||||
cache-from: ${{ inputs.cache-from }}
|
||||
cache-to: ${{ inputs.cache-to }}
|
||||
outputs: ${{ inputs.outputs }}
|
||||
provenance: ${{ inputs.provenance }}
|
||||
build-args: ${{ inputs.build-args }}
|
||||
|
||||
- name: Report failure
|
||||
if: steps.buildx1.outcome != 'success' && steps.buildx2.outcome != 'success' && steps.buildx3.outcome != 'success'
|
||||
run: |
|
||||
echo "All attempts failed. Possible transient infrastucture issues? Try again later or inspect logs for details."
|
||||
shell: bash
|
||||
50
.github/actions/prepare-build/action.yml
vendored
50
.github/actions/prepare-build/action.yml
vendored
@@ -1,50 +0,0 @@
|
||||
name: "Prepare Build (OpenAPI generation)"
|
||||
description: "Sets up Python with uv, installs deps, generates OpenAPI schema and Python client, uploads artifact"
|
||||
runs:
|
||||
using: "composite"
|
||||
steps:
|
||||
- name: Checkout code
|
||||
uses: actions/checkout@v4
|
||||
|
||||
- name: Setup uv
|
||||
uses: astral-sh/setup-uv@v3
|
||||
|
||||
- name: Setup Python
|
||||
uses: actions/setup-python@v5
|
||||
with:
|
||||
python-version: "3.11"
|
||||
|
||||
- name: Install Python dependencies with uv
|
||||
shell: bash
|
||||
run: |
|
||||
uv pip install --system \
|
||||
-r backend/requirements/default.txt \
|
||||
-r backend/requirements/dev.txt
|
||||
|
||||
- name: Generate OpenAPI schema
|
||||
shell: bash
|
||||
working-directory: backend
|
||||
env:
|
||||
PYTHONPATH: "."
|
||||
run: |
|
||||
python scripts/onyx_openapi_schema.py --filename generated/openapi.json
|
||||
|
||||
- name: Generate OpenAPI Python client
|
||||
shell: bash
|
||||
run: |
|
||||
docker run --rm \
|
||||
-v "${{ github.workspace }}/backend/generated:/local" \
|
||||
openapitools/openapi-generator-cli generate \
|
||||
-i /local/openapi.json \
|
||||
-g python \
|
||||
-o /local/onyx_openapi_client \
|
||||
--package-name onyx_openapi_client \
|
||||
--skip-validate-spec \
|
||||
--openapi-normalizer "SIMPLIFY_ONEOF_ANYOF=true,SET_OAS3_NULLABLE=true"
|
||||
|
||||
- name: Upload OpenAPI artifacts
|
||||
uses: actions/upload-artifact@v4
|
||||
with:
|
||||
name: openapi-artifacts
|
||||
path: backend/generated/
|
||||
|
||||
17
.github/actions/setup-playwright/action.yml
vendored
Normal file
17
.github/actions/setup-playwright/action.yml
vendored
Normal file
@@ -0,0 +1,17 @@
|
||||
name: "Setup Playwright"
|
||||
description: "Sets up Playwright and system deps (assumes Python and Playwright are installed)"
|
||||
runs:
|
||||
using: "composite"
|
||||
steps:
|
||||
- name: Cache playwright cache
|
||||
uses: runs-on/cache@50350ad4242587b6c8c2baa2e740b1bc11285ff4 # ratchet:runs-on/cache@v4
|
||||
with:
|
||||
path: ~/.cache/ms-playwright
|
||||
key: ${{ runner.os }}-${{ runner.arch }}-playwright-${{ hashFiles('backend/requirements/default.txt') }}
|
||||
restore-keys: |
|
||||
${{ runner.os }}-${{ runner.arch }}-playwright-
|
||||
|
||||
- name: Install playwright
|
||||
shell: bash
|
||||
run: |
|
||||
playwright install chromium --with-deps
|
||||
72
.github/actions/setup-python-and-install-dependencies/action.yml
vendored
Normal file
72
.github/actions/setup-python-and-install-dependencies/action.yml
vendored
Normal file
@@ -0,0 +1,72 @@
|
||||
name: "Setup Python and Install Dependencies"
|
||||
description: "Sets up Python with uv and installs deps"
|
||||
inputs:
|
||||
requirements:
|
||||
description: "Newline-separated list of requirement files to install (relative to repo root)"
|
||||
required: true
|
||||
runs:
|
||||
using: "composite"
|
||||
steps:
|
||||
- name: Setup uv
|
||||
uses: astral-sh/setup-uv@caf0cab7a618c569241d31dcd442f54681755d39 # ratchet:astral-sh/setup-uv@v3
|
||||
# TODO: Enable caching once there is a uv.lock file checked in.
|
||||
# with:
|
||||
# enable-cache: true
|
||||
|
||||
- name: Compute requirements hash
|
||||
id: req-hash
|
||||
shell: bash
|
||||
env:
|
||||
REQUIREMENTS: ${{ inputs.requirements }}
|
||||
run: |
|
||||
# Hash the contents of the specified requirement files
|
||||
hash=""
|
||||
while IFS= read -r req; do
|
||||
if [ -n "$req" ] && [ -f "$req" ]; then
|
||||
hash="$hash$(sha256sum "$req")"
|
||||
fi
|
||||
done <<< "$REQUIREMENTS"
|
||||
echo "hash=$(echo "$hash" | sha256sum | cut -d' ' -f1)" >> "$GITHUB_OUTPUT"
|
||||
|
||||
- name: Cache uv cache directory
|
||||
uses: runs-on/cache@50350ad4242587b6c8c2baa2e740b1bc11285ff4 # ratchet:runs-on/cache@v4
|
||||
with:
|
||||
path: ~/.cache/uv
|
||||
key: ${{ runner.os }}-uv-${{ steps.req-hash.outputs.hash }}
|
||||
restore-keys: |
|
||||
${{ runner.os }}-uv-
|
||||
|
||||
- name: Setup Python
|
||||
uses: actions/setup-python@a26af69be951a213d495a4c3e4e4022e16d87065 # ratchet:actions/setup-python@v5
|
||||
with:
|
||||
python-version: "3.11"
|
||||
|
||||
- name: Create virtual environment
|
||||
shell: bash
|
||||
env:
|
||||
VENV_DIR: ${{ runner.temp }}/venv
|
||||
run: | # zizmor: ignore[github-env]
|
||||
uv venv "$VENV_DIR"
|
||||
# Validate path before adding to GITHUB_PATH to prevent code injection
|
||||
if [ -d "$VENV_DIR/bin" ]; then
|
||||
realpath "$VENV_DIR/bin" >> "$GITHUB_PATH"
|
||||
else
|
||||
echo "Error: $VENV_DIR/bin does not exist"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
- name: Install Python dependencies with uv
|
||||
shell: bash
|
||||
env:
|
||||
REQUIREMENTS: ${{ inputs.requirements }}
|
||||
run: |
|
||||
# Build the uv pip install command with each requirement file as array elements
|
||||
cmd=("uv" "pip" "install")
|
||||
while IFS= read -r req; do
|
||||
# Skip empty lines
|
||||
if [ -n "$req" ]; then
|
||||
cmd+=("-r" "$req")
|
||||
fi
|
||||
done <<< "$REQUIREMENTS"
|
||||
echo "Running: ${cmd[*]}"
|
||||
"${cmd[@]}"
|
||||
102
.github/actions/slack-notify/action.yml
vendored
Normal file
102
.github/actions/slack-notify/action.yml
vendored
Normal file
@@ -0,0 +1,102 @@
|
||||
name: "Slack Notify on Failure"
|
||||
description: "Sends a Slack notification when a workflow fails"
|
||||
inputs:
|
||||
webhook-url:
|
||||
description: "Slack webhook URL (can also use SLACK_WEBHOOK_URL env var)"
|
||||
required: false
|
||||
failed-jobs:
|
||||
description: "List of failed job names (newline-separated)"
|
||||
required: false
|
||||
title:
|
||||
description: "Title for the notification"
|
||||
required: false
|
||||
default: "🚨 Workflow Failed"
|
||||
ref-name:
|
||||
description: "Git ref name (tag/branch)"
|
||||
required: false
|
||||
runs:
|
||||
using: "composite"
|
||||
steps:
|
||||
- name: Send Slack notification
|
||||
shell: bash
|
||||
env:
|
||||
SLACK_WEBHOOK_URL: ${{ inputs.webhook-url }}
|
||||
FAILED_JOBS: ${{ inputs.failed-jobs }}
|
||||
TITLE: ${{ inputs.title }}
|
||||
REF_NAME: ${{ inputs.ref-name }}
|
||||
REPO: ${{ github.repository }}
|
||||
WORKFLOW: ${{ github.workflow }}
|
||||
RUN_NUMBER: ${{ github.run_number }}
|
||||
RUN_ID: ${{ github.run_id }}
|
||||
SERVER_URL: ${{ github.server_url }}
|
||||
GITHUB_REF_NAME: ${{ github.ref_name }}
|
||||
run: |
|
||||
if [ -z "$SLACK_WEBHOOK_URL" ]; then
|
||||
echo "webhook-url input or SLACK_WEBHOOK_URL env var is not set, skipping notification"
|
||||
exit 0
|
||||
fi
|
||||
|
||||
# Build workflow URL
|
||||
WORKFLOW_URL="${SERVER_URL}/${REPO}/actions/runs/${RUN_ID}"
|
||||
|
||||
# Use ref_name from input or fall back to github.ref_name
|
||||
if [ -z "$REF_NAME" ]; then
|
||||
REF_NAME="$GITHUB_REF_NAME"
|
||||
fi
|
||||
|
||||
# Escape JSON special characters
|
||||
escape_json() {
|
||||
local input="$1"
|
||||
# Escape backslashes first (but preserve \n sequences)
|
||||
# Protect \n sequences temporarily
|
||||
input=$(printf '%s' "$input" | sed 's/\\n/\x01NL\x01/g')
|
||||
# Escape remaining backslashes
|
||||
input=$(printf '%s' "$input" | sed 's/\\/\\\\/g')
|
||||
# Restore \n sequences (single backslash, will be correct in JSON)
|
||||
input=$(printf '%s' "$input" | sed 's/\x01NL\x01/\\n/g')
|
||||
# Escape quotes
|
||||
printf '%s' "$input" | sed 's/"/\\"/g'
|
||||
}
|
||||
|
||||
REF_NAME_ESC=$(escape_json "$REF_NAME")
|
||||
FAILED_JOBS_ESC=$(escape_json "$FAILED_JOBS")
|
||||
WORKFLOW_URL_ESC=$(escape_json "$WORKFLOW_URL")
|
||||
TITLE_ESC=$(escape_json "$TITLE")
|
||||
|
||||
# Build JSON payload piece by piece
|
||||
# Note: FAILED_JOBS_ESC already contains \n sequences that should remain as \n in JSON
|
||||
PAYLOAD="{"
|
||||
PAYLOAD="${PAYLOAD}\"text\":\"${TITLE_ESC}\","
|
||||
PAYLOAD="${PAYLOAD}\"blocks\":[{"
|
||||
PAYLOAD="${PAYLOAD}\"type\":\"header\","
|
||||
PAYLOAD="${PAYLOAD}\"text\":{\"type\":\"plain_text\",\"text\":\"${TITLE_ESC}\"}"
|
||||
PAYLOAD="${PAYLOAD}},{"
|
||||
PAYLOAD="${PAYLOAD}\"type\":\"section\","
|
||||
PAYLOAD="${PAYLOAD}\"fields\":["
|
||||
if [ -n "$REF_NAME" ]; then
|
||||
PAYLOAD="${PAYLOAD}{\"type\":\"mrkdwn\",\"text\":\"*Ref:*\\n${REF_NAME_ESC}\"},"
|
||||
fi
|
||||
PAYLOAD="${PAYLOAD}{\"type\":\"mrkdwn\",\"text\":\"*Run ID:*\\n#${RUN_NUMBER}\"}"
|
||||
PAYLOAD="${PAYLOAD}]"
|
||||
PAYLOAD="${PAYLOAD}}"
|
||||
if [ -n "$FAILED_JOBS" ]; then
|
||||
PAYLOAD="${PAYLOAD},{"
|
||||
PAYLOAD="${PAYLOAD}\"type\":\"section\","
|
||||
PAYLOAD="${PAYLOAD}\"text\":{\"type\":\"mrkdwn\",\"text\":\"*Failed Jobs:*\\n${FAILED_JOBS_ESC}\"}"
|
||||
PAYLOAD="${PAYLOAD}}"
|
||||
fi
|
||||
PAYLOAD="${PAYLOAD},{"
|
||||
PAYLOAD="${PAYLOAD}\"type\":\"actions\","
|
||||
PAYLOAD="${PAYLOAD}\"elements\":[{"
|
||||
PAYLOAD="${PAYLOAD}\"type\":\"button\","
|
||||
PAYLOAD="${PAYLOAD}\"text\":{\"type\":\"plain_text\",\"text\":\"View Workflow Run\"},"
|
||||
PAYLOAD="${PAYLOAD}\"url\":\"${WORKFLOW_URL_ESC}\""
|
||||
PAYLOAD="${PAYLOAD}}]"
|
||||
PAYLOAD="${PAYLOAD}}"
|
||||
PAYLOAD="${PAYLOAD}]"
|
||||
PAYLOAD="${PAYLOAD}}"
|
||||
|
||||
curl -X POST -H 'Content-type: application/json' \
|
||||
--data "$PAYLOAD" \
|
||||
"$SLACK_WEBHOOK_URL"
|
||||
|
||||
24
.github/dependabot.yml
vendored
Normal file
24
.github/dependabot.yml
vendored
Normal file
@@ -0,0 +1,24 @@
|
||||
version: 2
|
||||
updates:
|
||||
- package-ecosystem: "github-actions"
|
||||
directory: "/"
|
||||
schedule:
|
||||
interval: "weekly"
|
||||
cooldown:
|
||||
default-days: 4
|
||||
open-pull-requests-limit: 3
|
||||
assignees:
|
||||
- "jmelahman"
|
||||
labels:
|
||||
- "dependabot:actions"
|
||||
- package-ecosystem: "pip"
|
||||
directory: "/backend"
|
||||
schedule:
|
||||
interval: "weekly"
|
||||
cooldown:
|
||||
default-days: 4
|
||||
open-pull-requests-limit: 3
|
||||
assignees:
|
||||
- "jmelahman"
|
||||
labels:
|
||||
- "dependabot:python"
|
||||
1
.github/runs-on.yml
vendored
Normal file
1
.github/runs-on.yml
vendored
Normal file
@@ -0,0 +1 @@
|
||||
_extend: .github-private
|
||||
15
.github/workflows/check-lazy-imports.yml
vendored
15
.github/workflows/check-lazy-imports.yml
vendored
@@ -1,4 +1,7 @@
|
||||
name: Check Lazy Imports
|
||||
concurrency:
|
||||
group: Check-Lazy-Imports-${{ github.workflow }}-${{ github.head_ref || github.event.workflow_run.head_branch || github.run_id }}
|
||||
cancel-in-progress: true
|
||||
|
||||
on:
|
||||
merge_group:
|
||||
@@ -7,18 +10,24 @@ on:
|
||||
- main
|
||||
- 'release/**'
|
||||
|
||||
permissions:
|
||||
contents: read
|
||||
|
||||
jobs:
|
||||
check-lazy-imports:
|
||||
runs-on: ubuntu-latest
|
||||
timeout-minutes: 45
|
||||
|
||||
steps:
|
||||
- name: Checkout code
|
||||
uses: actions/checkout@v4
|
||||
uses: actions/checkout@1af3b93b6815bc44a9784bd300feb67ff0d1eeb3 # ratchet:actions/checkout@v6
|
||||
with:
|
||||
persist-credentials: false
|
||||
|
||||
- name: Set up Python
|
||||
uses: actions/setup-python@v4
|
||||
uses: actions/setup-python@e797f83bcb11b83ae66e0230d6156d7c80228e7c # ratchet:actions/setup-python@v6
|
||||
with:
|
||||
python-version: '3.11'
|
||||
|
||||
- name: Check lazy imports
|
||||
run: python3 backend/scripts/check_lazy_imports.py
|
||||
run: python3 backend/scripts/check_lazy_imports.py
|
||||
|
||||
1028
.github/workflows/deployment.yml
vendored
Normal file
1028
.github/workflows/deployment.yml
vendored
Normal file
File diff suppressed because it is too large
Load Diff
@@ -1,186 +0,0 @@
|
||||
name: Build and Push Backend Image on Tag
|
||||
|
||||
on:
|
||||
push:
|
||||
tags:
|
||||
- "*"
|
||||
|
||||
env:
|
||||
REGISTRY_IMAGE: ${{ contains(github.ref_name, 'cloud') && 'onyxdotapp/onyx-backend-cloud' || 'onyxdotapp/onyx-backend' }}
|
||||
DEPLOYMENT: ${{ contains(github.ref_name, 'cloud') && 'cloud' || 'standalone' }}
|
||||
|
||||
# tag nightly builds with "edge"
|
||||
EDGE_TAG: ${{ startsWith(github.ref_name, 'nightly-latest') }}
|
||||
|
||||
jobs:
|
||||
build-and-push:
|
||||
# TODO: investigate a matrix build like the web container
|
||||
# See https://runs-on.com/runners/linux/
|
||||
runs-on:
|
||||
- runs-on
|
||||
- runner=${{ matrix.platform == 'linux/amd64' && '8cpu-linux-x64' || '8cpu-linux-arm64' }}
|
||||
- run-id=${{ github.run_id }}
|
||||
- tag=platform-${{ matrix.platform }}
|
||||
strategy:
|
||||
fail-fast: false
|
||||
matrix:
|
||||
platform:
|
||||
- linux/amd64
|
||||
- linux/arm64
|
||||
|
||||
steps:
|
||||
- name: Prepare
|
||||
run: |
|
||||
platform=${{ matrix.platform }}
|
||||
echo "PLATFORM_PAIR=${platform//\//-}" >> $GITHUB_ENV
|
||||
|
||||
- name: Check if stable release version
|
||||
id: check_version
|
||||
run: |
|
||||
if [[ "${{ github.ref_name }}" =~ ^v[0-9]+\.[0-9]+\.[0-9]+$ ]] && [[ "${{ github.ref_name }}" != *"cloud"* ]]; then
|
||||
echo "is_stable=true" >> $GITHUB_OUTPUT
|
||||
else
|
||||
echo "is_stable=false" >> $GITHUB_OUTPUT
|
||||
fi
|
||||
|
||||
- name: Checkout code
|
||||
uses: actions/checkout@v4
|
||||
|
||||
- name: Docker meta
|
||||
id: meta
|
||||
uses: docker/metadata-action@v5
|
||||
with:
|
||||
images: ${{ env.REGISTRY_IMAGE }}
|
||||
flavor: |
|
||||
latest=false
|
||||
tags: |
|
||||
type=raw,value=${{ github.ref_name }}
|
||||
type=raw,value=${{ steps.check_version.outputs.is_stable == 'true' && 'latest' || '' }}
|
||||
type=raw,value=${{ env.EDGE_TAG == 'true' && 'edge' || '' }}
|
||||
|
||||
- name: Set up Docker Buildx
|
||||
uses: docker/setup-buildx-action@v3
|
||||
|
||||
- name: Login to Docker Hub
|
||||
uses: docker/login-action@v3
|
||||
with:
|
||||
username: ${{ secrets.DOCKER_USERNAME }}
|
||||
password: ${{ secrets.DOCKER_TOKEN }}
|
||||
|
||||
- name: Install build-essential
|
||||
run: |
|
||||
sudo apt-get update
|
||||
sudo apt-get install -y build-essential
|
||||
|
||||
- name: Backend Image Docker Build and Push
|
||||
id: build
|
||||
uses: docker/build-push-action@v6
|
||||
with:
|
||||
context: ./backend
|
||||
file: ./backend/Dockerfile
|
||||
platforms: ${{ matrix.platform }}
|
||||
push: true
|
||||
build-args: |
|
||||
ONYX_VERSION=${{ github.ref_name }}
|
||||
labels: ${{ steps.meta.outputs.labels }}
|
||||
outputs: type=image,name=${{ env.REGISTRY_IMAGE }},push-by-digest=true,name-canonical=true,push=true
|
||||
cache-from: type=s3,prefix=cache/${{ github.repository }}/${{ env.DEPLOYMENT }}/backend-${{ env.PLATFORM_PAIR }}/,region=${{ env.RUNS_ON_AWS_REGION }},bucket=${{ env.RUNS_ON_S3_BUCKET_CACHE }}
|
||||
cache-to: type=s3,prefix=cache/${{ github.repository }}/${{ env.DEPLOYMENT }}/backend-${{ env.PLATFORM_PAIR }}/,region=${{ env.RUNS_ON_AWS_REGION }},bucket=${{ env.RUNS_ON_S3_BUCKET_CACHE }},mode=max
|
||||
|
||||
- name: Export digest
|
||||
run: |
|
||||
mkdir -p /tmp/digests
|
||||
digest="${{ steps.build.outputs.digest }}"
|
||||
touch "/tmp/digests/${digest#sha256:}"
|
||||
|
||||
- name: Upload digest
|
||||
uses: actions/upload-artifact@v4
|
||||
with:
|
||||
name: backend-digests-${{ env.PLATFORM_PAIR }}-${{ github.run_id }}
|
||||
path: /tmp/digests/*
|
||||
if-no-files-found: error
|
||||
retention-days: 1
|
||||
|
||||
merge:
|
||||
runs-on: ubuntu-latest
|
||||
needs:
|
||||
- build-and-push
|
||||
steps:
|
||||
# Needed for trivyignore
|
||||
- name: Checkout
|
||||
uses: actions/checkout@v4
|
||||
|
||||
- name: Check if stable release version
|
||||
id: check_version
|
||||
run: |
|
||||
if [[ "${{ github.ref_name }}" =~ ^v[0-9]+\.[0-9]+\.[0-9]+$ ]] && [[ "${{ github.ref_name }}" != *"cloud"* ]]; then
|
||||
echo "is_stable=true" >> $GITHUB_OUTPUT
|
||||
else
|
||||
echo "is_stable=false" >> $GITHUB_OUTPUT
|
||||
fi
|
||||
|
||||
- name: Download digests
|
||||
uses: actions/download-artifact@v4
|
||||
with:
|
||||
path: /tmp/digests
|
||||
pattern: backend-digests-*-${{ github.run_id }}
|
||||
merge-multiple: true
|
||||
|
||||
- name: Set up Docker Buildx
|
||||
uses: docker/setup-buildx-action@v3
|
||||
|
||||
- name: Docker meta
|
||||
id: meta
|
||||
uses: docker/metadata-action@v5
|
||||
with:
|
||||
images: ${{ env.REGISTRY_IMAGE }}
|
||||
flavor: |
|
||||
latest=false
|
||||
tags: |
|
||||
type=raw,value=${{ github.ref_name }}
|
||||
type=raw,value=${{ steps.check_version.outputs.is_stable == 'true' && 'latest' || '' }}
|
||||
type=raw,value=${{ env.EDGE_TAG == 'true' && 'edge' || '' }}
|
||||
|
||||
- name: Login to Docker Hub
|
||||
uses: docker/login-action@v3
|
||||
with:
|
||||
username: ${{ secrets.DOCKER_USERNAME }}
|
||||
password: ${{ secrets.DOCKER_TOKEN }}
|
||||
|
||||
- name: Create manifest list and push
|
||||
working-directory: /tmp/digests
|
||||
run: |
|
||||
docker buildx imagetools create $(jq -cr '.tags | map("-t " + .) | join(" ")' <<< "$DOCKER_METADATA_OUTPUT_JSON") \
|
||||
$(printf '${{ env.REGISTRY_IMAGE }}@sha256:%s ' *)
|
||||
|
||||
- name: Inspect image
|
||||
run: |
|
||||
docker buildx imagetools inspect ${{ env.REGISTRY_IMAGE }}:${{ steps.meta.outputs.version }}
|
||||
|
||||
# trivy has their own rate limiting issues causing this action to flake
|
||||
# we worked around it by hardcoding to different db repos in env
|
||||
# can re-enable when they figure it out
|
||||
# https://github.com/aquasecurity/trivy/discussions/7538
|
||||
# https://github.com/aquasecurity/trivy-action/issues/389
|
||||
# Security: Using pinned digest (0.65.0@sha256:a22415a38938a56c379387a8163fcb0ce38b10ace73e593475d3658d578b2436)
|
||||
# Security: No Docker socket mount needed for remote registry scanning
|
||||
- name: Run Trivy vulnerability scanner
|
||||
uses: nick-fields/retry@v3
|
||||
with:
|
||||
timeout_minutes: 30
|
||||
max_attempts: 3
|
||||
retry_wait_seconds: 10
|
||||
command: |
|
||||
docker run --rm -v $HOME/.cache/trivy:/root/.cache/trivy \
|
||||
-v ${{ github.workspace }}/backend/.trivyignore:/tmp/.trivyignore:ro \
|
||||
-e TRIVY_DB_REPOSITORY="public.ecr.aws/aquasecurity/trivy-db:2" \
|
||||
-e TRIVY_JAVA_DB_REPOSITORY="public.ecr.aws/aquasecurity/trivy-java-db:1" \
|
||||
-e TRIVY_USERNAME="${{ secrets.DOCKER_USERNAME }}" \
|
||||
-e TRIVY_PASSWORD="${{ secrets.DOCKER_TOKEN }}" \
|
||||
aquasec/trivy@sha256:a22415a38938a56c379387a8163fcb0ce38b10ace73e593475d3658d578b2436 \
|
||||
image \
|
||||
--skip-version-check \
|
||||
--timeout 20m \
|
||||
--severity CRITICAL,HIGH \
|
||||
--ignorefile /tmp/.trivyignore \
|
||||
docker.io/${{ env.REGISTRY_IMAGE }}:${{ github.ref_name }}
|
||||
@@ -1,158 +0,0 @@
|
||||
name: Build and Push Cloud Web Image on Tag
|
||||
# Identical to the web container build, but with correct image tag and build args
|
||||
|
||||
on:
|
||||
push:
|
||||
tags:
|
||||
- "*cloud*"
|
||||
|
||||
env:
|
||||
REGISTRY_IMAGE: onyxdotapp/onyx-web-server-cloud
|
||||
DEPLOYMENT: cloud
|
||||
|
||||
jobs:
|
||||
build:
|
||||
runs-on:
|
||||
- runs-on
|
||||
- runner=${{ matrix.platform == 'linux/amd64' && '8cpu-linux-x64' || '8cpu-linux-arm64' }}
|
||||
- run-id=${{ github.run_id }}
|
||||
- tag=platform-${{ matrix.platform }}
|
||||
strategy:
|
||||
fail-fast: false
|
||||
matrix:
|
||||
platform:
|
||||
- linux/amd64
|
||||
- linux/arm64
|
||||
|
||||
steps:
|
||||
- name: Prepare
|
||||
run: |
|
||||
platform=${{ matrix.platform }}
|
||||
echo "PLATFORM_PAIR=${platform//\//-}" >> $GITHUB_ENV
|
||||
|
||||
- name: Checkout
|
||||
uses: actions/checkout@v4
|
||||
|
||||
- name: Docker meta
|
||||
id: meta
|
||||
uses: docker/metadata-action@v5
|
||||
with:
|
||||
images: ${{ env.REGISTRY_IMAGE }}
|
||||
flavor: |
|
||||
latest=false
|
||||
tags: |
|
||||
type=raw,value=${{ github.ref_name }}
|
||||
|
||||
- name: Set up Docker Buildx
|
||||
uses: docker/setup-buildx-action@v3
|
||||
|
||||
- name: Login to Docker Hub
|
||||
uses: docker/login-action@v3
|
||||
with:
|
||||
username: ${{ secrets.DOCKER_USERNAME }}
|
||||
password: ${{ secrets.DOCKER_TOKEN }}
|
||||
|
||||
- name: Build and push by digest
|
||||
id: build
|
||||
uses: docker/build-push-action@v6
|
||||
with:
|
||||
context: ./web
|
||||
file: ./web/Dockerfile
|
||||
platforms: ${{ matrix.platform }}
|
||||
push: true
|
||||
build-args: |
|
||||
ONYX_VERSION=${{ github.ref_name }}
|
||||
NEXT_PUBLIC_CLOUD_ENABLED=true
|
||||
NEXT_PUBLIC_POSTHOG_KEY=${{ secrets.POSTHOG_KEY }}
|
||||
NEXT_PUBLIC_POSTHOG_HOST=${{ secrets.POSTHOG_HOST }}
|
||||
NEXT_PUBLIC_SENTRY_DSN=${{ secrets.SENTRY_DSN }}
|
||||
NEXT_PUBLIC_STRIPE_PUBLISHABLE_KEY=${{ secrets.STRIPE_PUBLISHABLE_KEY }}
|
||||
NEXT_PUBLIC_GTM_ENABLED=true
|
||||
NEXT_PUBLIC_FORGOT_PASSWORD_ENABLED=true
|
||||
NEXT_PUBLIC_INCLUDE_ERROR_POPUP_SUPPORT_LINK=true
|
||||
NODE_OPTIONS=--max-old-space-size=8192
|
||||
labels: ${{ steps.meta.outputs.labels }}
|
||||
outputs: type=image,name=${{ env.REGISTRY_IMAGE }},push-by-digest=true,name-canonical=true,push=true
|
||||
cache-from: type=s3,prefix=cache/${{ github.repository }}/${{ env.DEPLOYMENT }}/cloudweb-${{ env.PLATFORM_PAIR }}/,region=${{ env.RUNS_ON_AWS_REGION }},bucket=${{ env.RUNS_ON_S3_BUCKET_CACHE }}
|
||||
cache-to: type=s3,prefix=cache/${{ github.repository }}/${{ env.DEPLOYMENT }}/cloudweb-${{ env.PLATFORM_PAIR }}/,region=${{ env.RUNS_ON_AWS_REGION }},bucket=${{ env.RUNS_ON_S3_BUCKET_CACHE }},mode=max
|
||||
# no-cache needed due to weird interactions with the builds for different platforms
|
||||
# NOTE(rkuo): this may not be true any more with the proper cache prefixing by architecture - currently testing with it off
|
||||
|
||||
- name: Export digest
|
||||
run: |
|
||||
mkdir -p /tmp/digests
|
||||
digest="${{ steps.build.outputs.digest }}"
|
||||
touch "/tmp/digests/${digest#sha256:}"
|
||||
|
||||
- name: Upload digest
|
||||
uses: actions/upload-artifact@v4
|
||||
with:
|
||||
name: cloudweb-digests-${{ env.PLATFORM_PAIR }}-${{ github.run_id }}
|
||||
path: /tmp/digests/*
|
||||
if-no-files-found: error
|
||||
retention-days: 1
|
||||
|
||||
merge:
|
||||
runs-on: ubuntu-latest
|
||||
needs:
|
||||
- build
|
||||
steps:
|
||||
- name: Download digests
|
||||
uses: actions/download-artifact@v4
|
||||
with:
|
||||
path: /tmp/digests
|
||||
pattern: cloudweb-digests-*-${{ github.run_id }}
|
||||
merge-multiple: true
|
||||
|
||||
- name: Set up Docker Buildx
|
||||
uses: docker/setup-buildx-action@v3
|
||||
|
||||
- name: Docker meta
|
||||
id: meta
|
||||
uses: docker/metadata-action@v5
|
||||
with:
|
||||
images: ${{ env.REGISTRY_IMAGE }}
|
||||
flavor: |
|
||||
latest=false
|
||||
tags: |
|
||||
type=raw,value=${{ github.ref_name }}
|
||||
|
||||
- name: Login to Docker Hub
|
||||
uses: docker/login-action@v3
|
||||
with:
|
||||
username: ${{ secrets.DOCKER_USERNAME }}
|
||||
password: ${{ secrets.DOCKER_TOKEN }}
|
||||
|
||||
- name: Create manifest list and push
|
||||
working-directory: /tmp/digests
|
||||
run: |
|
||||
docker buildx imagetools create $(jq -cr '.tags | map("-t " + .) | join(" ")' <<< "$DOCKER_METADATA_OUTPUT_JSON") \
|
||||
$(printf '${{ env.REGISTRY_IMAGE }}@sha256:%s ' *)
|
||||
|
||||
- name: Inspect image
|
||||
run: |
|
||||
docker buildx imagetools inspect ${{ env.REGISTRY_IMAGE }}:${{ steps.meta.outputs.version }}
|
||||
|
||||
# trivy has their own rate limiting issues causing this action to flake
|
||||
# we worked around it by hardcoding to different db repos in env
|
||||
# can re-enable when they figure it out
|
||||
# https://github.com/aquasecurity/trivy/discussions/7538
|
||||
# https://github.com/aquasecurity/trivy-action/issues/389
|
||||
- name: Run Trivy vulnerability scanner
|
||||
uses: nick-fields/retry@v3
|
||||
with:
|
||||
timeout_minutes: 30
|
||||
max_attempts: 3
|
||||
retry_wait_seconds: 10
|
||||
command: |
|
||||
docker run --rm -v $HOME/.cache/trivy:/root/.cache/trivy \
|
||||
-e TRIVY_DB_REPOSITORY="public.ecr.aws/aquasecurity/trivy-db:2" \
|
||||
-e TRIVY_JAVA_DB_REPOSITORY="public.ecr.aws/aquasecurity/trivy-java-db:1" \
|
||||
-e TRIVY_USERNAME="${{ secrets.DOCKER_USERNAME }}" \
|
||||
-e TRIVY_PASSWORD="${{ secrets.DOCKER_TOKEN }}" \
|
||||
aquasec/trivy@sha256:a22415a38938a56c379387a8163fcb0ce38b10ace73e593475d3658d578b2436 \
|
||||
image \
|
||||
--skip-version-check \
|
||||
--timeout 20m \
|
||||
--severity CRITICAL,HIGH \
|
||||
docker.io/${{ env.REGISTRY_IMAGE }}:${{ github.ref_name }}
|
||||
@@ -1,197 +0,0 @@
|
||||
name: Build and Push Model Server Image on Tag
|
||||
|
||||
on:
|
||||
push:
|
||||
tags:
|
||||
- "*"
|
||||
|
||||
env:
|
||||
REGISTRY_IMAGE: ${{ contains(github.ref_name, 'cloud') && 'onyxdotapp/onyx-model-server-cloud' || 'onyxdotapp/onyx-model-server' }}
|
||||
DOCKER_BUILDKIT: 1
|
||||
BUILDKIT_PROGRESS: plain
|
||||
DEPLOYMENT: ${{ contains(github.ref_name, 'cloud') && 'cloud' || 'standalone' }}
|
||||
|
||||
# tag nightly builds with "edge"
|
||||
EDGE_TAG: ${{ startsWith(github.ref_name, 'nightly-latest') }}
|
||||
|
||||
jobs:
|
||||
|
||||
# Bypassing this for now as the idea of not building is glitching
|
||||
# releases and builds that depends on everything being tagged in docker
|
||||
# 1) Preliminary job to check if the changed files are relevant
|
||||
# check_model_server_changes:
|
||||
# runs-on: ubuntu-latest
|
||||
# outputs:
|
||||
# changed: ${{ steps.check.outputs.changed }}
|
||||
# steps:
|
||||
# - name: Checkout code
|
||||
# uses: actions/checkout@v4
|
||||
#
|
||||
# - name: Check if relevant files changed
|
||||
# id: check
|
||||
# run: |
|
||||
# # Default to "false"
|
||||
# echo "changed=false" >> $GITHUB_OUTPUT
|
||||
#
|
||||
# # Compare the previous commit (github.event.before) to the current one (github.sha)
|
||||
# # If any file in backend/model_server/** or backend/Dockerfile.model_server is changed,
|
||||
# # set changed=true
|
||||
# if git diff --name-only ${{ github.event.before }} ${{ github.sha }} \
|
||||
# | grep -E '^backend/model_server/|^backend/Dockerfile.model_server'; then
|
||||
# echo "changed=true" >> $GITHUB_OUTPUT
|
||||
# fi
|
||||
|
||||
check_model_server_changes:
|
||||
runs-on: ubuntu-latest
|
||||
outputs:
|
||||
changed: "true"
|
||||
steps:
|
||||
- name: Bypass check and set output
|
||||
run: echo "changed=true" >> $GITHUB_OUTPUT
|
||||
|
||||
build-amd64:
|
||||
needs: [check_model_server_changes]
|
||||
if: needs.check_model_server_changes.outputs.changed == 'true'
|
||||
runs-on:
|
||||
[runs-on, runner=8cpu-linux-x64, "run-id=${{ github.run_id }}-amd64"]
|
||||
env:
|
||||
PLATFORM_PAIR: linux-amd64
|
||||
steps:
|
||||
- name: Checkout code
|
||||
uses: actions/checkout@v4
|
||||
|
||||
- name: System Info
|
||||
run: |
|
||||
df -h
|
||||
free -h
|
||||
docker system prune -af --volumes
|
||||
|
||||
- name: Set up Docker Buildx
|
||||
uses: docker/setup-buildx-action@v3
|
||||
with:
|
||||
driver-opts: |
|
||||
image=moby/buildkit:latest
|
||||
network=host
|
||||
|
||||
- name: Login to Docker Hub
|
||||
uses: docker/login-action@v3
|
||||
with:
|
||||
username: ${{ secrets.DOCKER_USERNAME }}
|
||||
password: ${{ secrets.DOCKER_TOKEN }}
|
||||
|
||||
- name: Build and Push AMD64
|
||||
uses: docker/build-push-action@v6
|
||||
with:
|
||||
context: ./backend
|
||||
file: ./backend/Dockerfile.model_server
|
||||
platforms: linux/amd64
|
||||
push: true
|
||||
tags: ${{ env.REGISTRY_IMAGE }}:${{ github.ref_name }}-amd64
|
||||
build-args: |
|
||||
ONYX_VERSION=${{ github.ref_name }}
|
||||
outputs: type=registry
|
||||
provenance: false
|
||||
cache-from: type=s3,prefix=cache/${{ github.repository }}/${{ env.DEPLOYMENT }}/model-server-${{ env.PLATFORM_PAIR }}/,region=${{ env.RUNS_ON_AWS_REGION }},bucket=${{ env.RUNS_ON_S3_BUCKET_CACHE }}
|
||||
cache-to: type=s3,prefix=cache/${{ github.repository }}/${{ env.DEPLOYMENT }}/model-server-${{ env.PLATFORM_PAIR }}/,region=${{ env.RUNS_ON_AWS_REGION }},bucket=${{ env.RUNS_ON_S3_BUCKET_CACHE }},mode=max
|
||||
# no-cache: true
|
||||
|
||||
build-arm64:
|
||||
needs: [check_model_server_changes]
|
||||
if: needs.check_model_server_changes.outputs.changed == 'true'
|
||||
runs-on:
|
||||
[runs-on, runner=8cpu-linux-arm64, "run-id=${{ github.run_id }}-arm64"]
|
||||
env:
|
||||
PLATFORM_PAIR: linux-arm64
|
||||
steps:
|
||||
- name: Checkout code
|
||||
uses: actions/checkout@v4
|
||||
|
||||
- name: System Info
|
||||
run: |
|
||||
df -h
|
||||
free -h
|
||||
docker system prune -af --volumes
|
||||
|
||||
- name: Set up Docker Buildx
|
||||
uses: docker/setup-buildx-action@v3
|
||||
with:
|
||||
driver-opts: |
|
||||
image=moby/buildkit:latest
|
||||
network=host
|
||||
|
||||
- name: Login to Docker Hub
|
||||
uses: docker/login-action@v3
|
||||
with:
|
||||
username: ${{ secrets.DOCKER_USERNAME }}
|
||||
password: ${{ secrets.DOCKER_TOKEN }}
|
||||
|
||||
- name: Build and Push ARM64
|
||||
uses: docker/build-push-action@v6
|
||||
with:
|
||||
context: ./backend
|
||||
file: ./backend/Dockerfile.model_server
|
||||
platforms: linux/arm64
|
||||
push: true
|
||||
tags: ${{ env.REGISTRY_IMAGE }}:${{ github.ref_name }}-arm64
|
||||
build-args: |
|
||||
ONYX_VERSION=${{ github.ref_name }}
|
||||
outputs: type=registry
|
||||
provenance: false
|
||||
cache-from: type=s3,prefix=cache/${{ github.repository }}/${{ env.DEPLOYMENT }}/model-server-${{ env.PLATFORM_PAIR }}/,region=${{ env.RUNS_ON_AWS_REGION }},bucket=${{ env.RUNS_ON_S3_BUCKET_CACHE }}
|
||||
cache-to: type=s3,prefix=cache/${{ github.repository }}/${{ env.DEPLOYMENT }}/model-server-${{ env.PLATFORM_PAIR }}/,region=${{ env.RUNS_ON_AWS_REGION }},bucket=${{ env.RUNS_ON_S3_BUCKET_CACHE }},mode=max
|
||||
|
||||
merge-and-scan:
|
||||
needs: [build-amd64, build-arm64, check_model_server_changes]
|
||||
if: needs.check_model_server_changes.outputs.changed == 'true'
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- name: Check if stable release version
|
||||
id: check_version
|
||||
run: |
|
||||
if [[ "${{ github.ref_name }}" =~ ^v[0-9]+\.[0-9]+\.[0-9]+$ ]] && [[ "${{ github.ref_name }}" != *"cloud"* ]]; then
|
||||
echo "is_stable=true" >> $GITHUB_OUTPUT
|
||||
else
|
||||
echo "is_stable=false" >> $GITHUB_OUTPUT
|
||||
fi
|
||||
|
||||
- name: Login to Docker Hub
|
||||
uses: docker/login-action@v3
|
||||
with:
|
||||
username: ${{ secrets.DOCKER_USERNAME }}
|
||||
password: ${{ secrets.DOCKER_TOKEN }}
|
||||
|
||||
- name: Create and Push Multi-arch Manifest
|
||||
run: |
|
||||
docker buildx create --use
|
||||
docker buildx imagetools create -t ${{ env.REGISTRY_IMAGE }}:${{ github.ref_name }} \
|
||||
${{ env.REGISTRY_IMAGE }}:${{ github.ref_name }}-amd64 \
|
||||
${{ env.REGISTRY_IMAGE }}:${{ github.ref_name }}-arm64
|
||||
if [[ "${{ steps.check_version.outputs.is_stable }}" == "true" ]]; then
|
||||
docker buildx imagetools create -t ${{ env.REGISTRY_IMAGE }}:latest \
|
||||
${{ env.REGISTRY_IMAGE }}:${{ github.ref_name }}-amd64 \
|
||||
${{ env.REGISTRY_IMAGE }}:${{ github.ref_name }}-arm64
|
||||
fi
|
||||
if [[ "${{ env.EDGE_TAG }}" == "true" ]]; then
|
||||
docker buildx imagetools create -t ${{ env.REGISTRY_IMAGE }}:edge \
|
||||
${{ env.REGISTRY_IMAGE }}:${{ github.ref_name }}-amd64 \
|
||||
${{ env.REGISTRY_IMAGE }}:${{ github.ref_name }}-arm64
|
||||
fi
|
||||
|
||||
- name: Run Trivy vulnerability scanner
|
||||
uses: nick-fields/retry@v3
|
||||
with:
|
||||
timeout_minutes: 30
|
||||
max_attempts: 3
|
||||
retry_wait_seconds: 10
|
||||
command: |
|
||||
docker run --rm -v $HOME/.cache/trivy:/root/.cache/trivy \
|
||||
-e TRIVY_DB_REPOSITORY="public.ecr.aws/aquasecurity/trivy-db:2" \
|
||||
-e TRIVY_JAVA_DB_REPOSITORY="public.ecr.aws/aquasecurity/trivy-java-db:1" \
|
||||
-e TRIVY_USERNAME="${{ secrets.DOCKER_USERNAME }}" \
|
||||
-e TRIVY_PASSWORD="${{ secrets.DOCKER_TOKEN }}" \
|
||||
aquasec/trivy@sha256:a22415a38938a56c379387a8163fcb0ce38b10ace73e593475d3658d578b2436 \
|
||||
image \
|
||||
--skip-version-check \
|
||||
--timeout 20m \
|
||||
--severity CRITICAL,HIGH \
|
||||
docker.io/${{ env.REGISTRY_IMAGE }}:${{ github.ref_name }}
|
||||
@@ -1,192 +0,0 @@
|
||||
name: Build and Push Web Image on Tag
|
||||
|
||||
on:
|
||||
push:
|
||||
tags:
|
||||
- "*"
|
||||
|
||||
env:
|
||||
REGISTRY_IMAGE: onyxdotapp/onyx-web-server
|
||||
|
||||
# tag nightly builds with "edge"
|
||||
EDGE_TAG: ${{ startsWith(github.ref_name, 'nightly-latest') }}
|
||||
|
||||
DEPLOYMENT: standalone
|
||||
|
||||
jobs:
|
||||
precheck:
|
||||
runs-on: [runs-on, runner=2cpu-linux-x64, "run-id=${{ github.run_id }}"]
|
||||
outputs:
|
||||
should-run: ${{ steps.set-output.outputs.should-run }}
|
||||
steps:
|
||||
- name: Check if tag contains "cloud"
|
||||
id: set-output
|
||||
run: |
|
||||
if [[ "${{ github.ref_name }}" == *cloud* ]]; then
|
||||
echo "should-run=false" >> "$GITHUB_OUTPUT"
|
||||
else
|
||||
echo "should-run=true" >> "$GITHUB_OUTPUT"
|
||||
fi
|
||||
build:
|
||||
needs: precheck
|
||||
if: needs.precheck.outputs.should-run == 'true'
|
||||
runs-on:
|
||||
- runs-on
|
||||
- runner=${{ matrix.platform == 'linux/amd64' && '8cpu-linux-x64' || '8cpu-linux-arm64' }}
|
||||
- run-id=${{ github.run_id }}
|
||||
- tag=platform-${{ matrix.platform }}
|
||||
strategy:
|
||||
fail-fast: false
|
||||
matrix:
|
||||
platform:
|
||||
- linux/amd64
|
||||
- linux/arm64
|
||||
|
||||
steps:
|
||||
- name: Prepare
|
||||
run: |
|
||||
platform=${{ matrix.platform }}
|
||||
echo "PLATFORM_PAIR=${platform//\//-}" >> $GITHUB_ENV
|
||||
|
||||
- name: Check if stable release version
|
||||
id: check_version
|
||||
run: |
|
||||
if [[ "${{ github.ref_name }}" =~ ^v[0-9]+\.[0-9]+\.[0-9]+$ ]]; then
|
||||
echo "is_stable=true" >> $GITHUB_OUTPUT
|
||||
else
|
||||
echo "is_stable=false" >> $GITHUB_OUTPUT
|
||||
fi
|
||||
|
||||
- name: Checkout
|
||||
uses: actions/checkout@v4
|
||||
|
||||
- name: Docker meta
|
||||
id: meta
|
||||
uses: docker/metadata-action@v5
|
||||
with:
|
||||
images: ${{ env.REGISTRY_IMAGE }}
|
||||
flavor: |
|
||||
latest=false
|
||||
tags: |
|
||||
type=raw,value=${{ github.ref_name }}
|
||||
type=raw,value=${{ steps.check_version.outputs.is_stable == 'true' && 'latest' || '' }}
|
||||
type=raw,value=${{ env.EDGE_TAG == 'true' && 'edge' || '' }}
|
||||
|
||||
- name: Set up Docker Buildx
|
||||
uses: docker/setup-buildx-action@v3
|
||||
|
||||
- name: Login to Docker Hub
|
||||
uses: docker/login-action@v3
|
||||
with:
|
||||
username: ${{ secrets.DOCKER_USERNAME }}
|
||||
password: ${{ secrets.DOCKER_TOKEN }}
|
||||
|
||||
- name: Build and push by digest
|
||||
id: build
|
||||
uses: docker/build-push-action@v6
|
||||
with:
|
||||
context: ./web
|
||||
file: ./web/Dockerfile
|
||||
platforms: ${{ matrix.platform }}
|
||||
push: true
|
||||
build-args: |
|
||||
ONYX_VERSION=${{ github.ref_name }}
|
||||
NODE_OPTIONS=--max-old-space-size=8192
|
||||
|
||||
labels: ${{ steps.meta.outputs.labels }}
|
||||
outputs: type=image,name=${{ env.REGISTRY_IMAGE }},push-by-digest=true,name-canonical=true,push=true
|
||||
cache-from: type=s3,prefix=cache/${{ github.repository }}/${{ env.DEPLOYMENT }}/web-${{ env.PLATFORM_PAIR }}/,region=${{ env.RUNS_ON_AWS_REGION }},bucket=${{ env.RUNS_ON_S3_BUCKET_CACHE }}
|
||||
cache-to: type=s3,prefix=cache/${{ github.repository }}/${{ env.DEPLOYMENT }}/web-${{ env.PLATFORM_PAIR }}/,region=${{ env.RUNS_ON_AWS_REGION }},bucket=${{ env.RUNS_ON_S3_BUCKET_CACHE }},mode=max
|
||||
# no-cache needed due to weird interactions with the builds for different platforms
|
||||
# NOTE(rkuo): this may not be true any more with the proper cache prefixing by architecture - currently testing with it off
|
||||
|
||||
- name: Export digest
|
||||
run: |
|
||||
mkdir -p /tmp/digests
|
||||
digest="${{ steps.build.outputs.digest }}"
|
||||
touch "/tmp/digests/${digest#sha256:}"
|
||||
|
||||
- name: Upload digest
|
||||
uses: actions/upload-artifact@v4
|
||||
with:
|
||||
name: web-digests-${{ env.PLATFORM_PAIR }}-${{ github.run_id }}
|
||||
path: /tmp/digests/*
|
||||
if-no-files-found: error
|
||||
retention-days: 1
|
||||
|
||||
merge:
|
||||
needs:
|
||||
- build
|
||||
if: needs.precheck.outputs.should-run == 'true'
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- name: Check if stable release version
|
||||
id: check_version
|
||||
run: |
|
||||
if [[ "${{ github.ref_name }}" =~ ^v[0-9]+\.[0-9]+\.[0-9]+$ ]] && [[ "${{ github.ref_name }}" != *"cloud"* ]]; then
|
||||
echo "is_stable=true" >> $GITHUB_OUTPUT
|
||||
else
|
||||
echo "is_stable=false" >> $GITHUB_OUTPUT
|
||||
fi
|
||||
|
||||
- name: Download digests
|
||||
uses: actions/download-artifact@v4
|
||||
with:
|
||||
path: /tmp/digests
|
||||
pattern: web-digests-*-${{ github.run_id }}
|
||||
merge-multiple: true
|
||||
|
||||
- name: Set up Docker Buildx
|
||||
uses: docker/setup-buildx-action@v3
|
||||
|
||||
- name: Docker meta
|
||||
id: meta
|
||||
uses: docker/metadata-action@v5
|
||||
with:
|
||||
images: ${{ env.REGISTRY_IMAGE }}
|
||||
flavor: |
|
||||
latest=false
|
||||
tags: |
|
||||
type=raw,value=${{ github.ref_name }}
|
||||
type=raw,value=${{ steps.check_version.outputs.is_stable == 'true' && 'latest' || '' }}
|
||||
type=raw,value=${{ env.EDGE_TAG == 'true' && 'edge' || '' }}
|
||||
|
||||
- name: Login to Docker Hub
|
||||
uses: docker/login-action@v3
|
||||
with:
|
||||
username: ${{ secrets.DOCKER_USERNAME }}
|
||||
password: ${{ secrets.DOCKER_TOKEN }}
|
||||
|
||||
- name: Create manifest list and push
|
||||
working-directory: /tmp/digests
|
||||
run: |
|
||||
docker buildx imagetools create $(jq -cr '.tags | map("-t " + .) | join(" ")' <<< "$DOCKER_METADATA_OUTPUT_JSON") \
|
||||
$(printf '${{ env.REGISTRY_IMAGE }}@sha256:%s ' *)
|
||||
|
||||
- name: Inspect image
|
||||
run: |
|
||||
docker buildx imagetools inspect ${{ env.REGISTRY_IMAGE }}:${{ steps.meta.outputs.version }}
|
||||
|
||||
# trivy has their own rate limiting issues causing this action to flake
|
||||
# we worked around it by hardcoding to different db repos in env
|
||||
# can re-enable when they figure it out
|
||||
# https://github.com/aquasecurity/trivy/discussions/7538
|
||||
# https://github.com/aquasecurity/trivy-action/issues/389
|
||||
- name: Run Trivy vulnerability scanner
|
||||
uses: nick-fields/retry@v3
|
||||
with:
|
||||
timeout_minutes: 30
|
||||
max_attempts: 3
|
||||
retry_wait_seconds: 10
|
||||
command: |
|
||||
docker run --rm -v $HOME/.cache/trivy:/root/.cache/trivy \
|
||||
-e TRIVY_DB_REPOSITORY="public.ecr.aws/aquasecurity/trivy-db:2" \
|
||||
-e TRIVY_JAVA_DB_REPOSITORY="public.ecr.aws/aquasecurity/trivy-java-db:1" \
|
||||
-e TRIVY_USERNAME="${{ secrets.DOCKER_USERNAME }}" \
|
||||
-e TRIVY_PASSWORD="${{ secrets.DOCKER_TOKEN }}" \
|
||||
aquasec/trivy@sha256:a22415a38938a56c379387a8163fcb0ce38b10ace73e593475d3658d578b2436 \
|
||||
image \
|
||||
--skip-version-check \
|
||||
--timeout 20m \
|
||||
--severity CRITICAL,HIGH \
|
||||
docker.io/${{ env.REGISTRY_IMAGE }}:${{ github.ref_name }}
|
||||
51
.github/workflows/docker-tag-beta.yml
vendored
Normal file
51
.github/workflows/docker-tag-beta.yml
vendored
Normal file
@@ -0,0 +1,51 @@
|
||||
# This workflow is set up to be manually triggered via the GitHub Action tab.
|
||||
# Given a version, it will tag those backend and webserver images as "beta".
|
||||
|
||||
name: Tag Beta Version
|
||||
|
||||
on:
|
||||
workflow_dispatch:
|
||||
inputs:
|
||||
version:
|
||||
description: "The version (ie v1.0.0-beta.0) to tag as beta"
|
||||
required: true
|
||||
|
||||
permissions:
|
||||
contents: read
|
||||
|
||||
jobs:
|
||||
tag:
|
||||
# See https://runs-on.com/runners/linux/
|
||||
# use a lower powered instance since this just does i/o to docker hub
|
||||
runs-on: [runs-on, runner=2cpu-linux-x64, "run-id=${{ github.run_id }}-tag"]
|
||||
timeout-minutes: 45
|
||||
steps:
|
||||
- name: Set up Docker Buildx
|
||||
uses: docker/setup-buildx-action@e468171a9de216ec08956ac3ada2f0791b6bd435 # ratchet:docker/setup-buildx-action@v3
|
||||
|
||||
- name: Login to Docker Hub
|
||||
uses: docker/login-action@5e57cd118135c172c3672efd75eb46360885c0ef # ratchet:docker/login-action@v3
|
||||
with:
|
||||
username: ${{ secrets.DOCKER_USERNAME }}
|
||||
password: ${{ secrets.DOCKER_TOKEN }}
|
||||
|
||||
- name: Enable Docker CLI experimental features
|
||||
run: echo "DOCKER_CLI_EXPERIMENTAL=enabled" >> $GITHUB_ENV
|
||||
|
||||
- name: Pull, Tag and Push Web Server Image
|
||||
env:
|
||||
VERSION: ${{ github.event.inputs.version }}
|
||||
run: |
|
||||
docker buildx imagetools create -t onyxdotapp/onyx-web-server:beta onyxdotapp/onyx-web-server:${VERSION}
|
||||
|
||||
- name: Pull, Tag and Push API Server Image
|
||||
env:
|
||||
VERSION: ${{ github.event.inputs.version }}
|
||||
run: |
|
||||
docker buildx imagetools create -t onyxdotapp/onyx-backend:beta onyxdotapp/onyx-backend:${VERSION}
|
||||
|
||||
- name: Pull, Tag and Push Model Server Image
|
||||
env:
|
||||
VERSION: ${{ github.event.inputs.version }}
|
||||
run: |
|
||||
docker buildx imagetools create -t onyxdotapp/onyx-model-server:beta onyxdotapp/onyx-model-server:${VERSION}
|
||||
22
.github/workflows/docker-tag-latest.yml
vendored
22
.github/workflows/docker-tag-latest.yml
vendored
@@ -10,17 +10,21 @@ on:
|
||||
description: "The version (ie v0.0.1) to tag as latest"
|
||||
required: true
|
||||
|
||||
permissions:
|
||||
contents: read
|
||||
|
||||
jobs:
|
||||
tag:
|
||||
# See https://runs-on.com/runners/linux/
|
||||
# use a lower powered instance since this just does i/o to docker hub
|
||||
runs-on: [runs-on, runner=2cpu-linux-x64, "run-id=${{ github.run_id }}"]
|
||||
runs-on: [runs-on, runner=2cpu-linux-x64, "run-id=${{ github.run_id }}-tag"]
|
||||
timeout-minutes: 45
|
||||
steps:
|
||||
- name: Set up Docker Buildx
|
||||
uses: docker/setup-buildx-action@v1
|
||||
uses: docker/setup-buildx-action@e468171a9de216ec08956ac3ada2f0791b6bd435 # ratchet:docker/setup-buildx-action@v3
|
||||
|
||||
- name: Login to Docker Hub
|
||||
uses: docker/login-action@v1
|
||||
uses: docker/login-action@5e57cd118135c172c3672efd75eb46360885c0ef # ratchet:docker/login-action@v3
|
||||
with:
|
||||
username: ${{ secrets.DOCKER_USERNAME }}
|
||||
password: ${{ secrets.DOCKER_TOKEN }}
|
||||
@@ -29,13 +33,19 @@ jobs:
|
||||
run: echo "DOCKER_CLI_EXPERIMENTAL=enabled" >> $GITHUB_ENV
|
||||
|
||||
- name: Pull, Tag and Push Web Server Image
|
||||
env:
|
||||
VERSION: ${{ github.event.inputs.version }}
|
||||
run: |
|
||||
docker buildx imagetools create -t onyxdotapp/onyx-web-server:latest onyxdotapp/onyx-web-server:${{ github.event.inputs.version }}
|
||||
docker buildx imagetools create -t onyxdotapp/onyx-web-server:latest onyxdotapp/onyx-web-server:${VERSION}
|
||||
|
||||
- name: Pull, Tag and Push API Server Image
|
||||
env:
|
||||
VERSION: ${{ github.event.inputs.version }}
|
||||
run: |
|
||||
docker buildx imagetools create -t onyxdotapp/onyx-backend:latest onyxdotapp/onyx-backend:${{ github.event.inputs.version }}
|
||||
docker buildx imagetools create -t onyxdotapp/onyx-backend:latest onyxdotapp/onyx-backend:${VERSION}
|
||||
|
||||
- name: Pull, Tag and Push Model Server Image
|
||||
env:
|
||||
VERSION: ${{ github.event.inputs.version }}
|
||||
run: |
|
||||
docker buildx imagetools create -t onyxdotapp/onyx-model-server:latest onyxdotapp/onyx-model-server:${{ github.event.inputs.version }}
|
||||
docker buildx imagetools create -t onyxdotapp/onyx-model-server:latest onyxdotapp/onyx-model-server:${VERSION}
|
||||
|
||||
8
.github/workflows/helm-chart-releases.yml
vendored
8
.github/workflows/helm-chart-releases.yml
vendored
@@ -12,14 +12,16 @@ jobs:
|
||||
permissions:
|
||||
contents: write
|
||||
runs-on: ubuntu-latest
|
||||
timeout-minutes: 45
|
||||
steps:
|
||||
- name: Checkout
|
||||
uses: actions/checkout@v4
|
||||
uses: actions/checkout@1af3b93b6815bc44a9784bd300feb67ff0d1eeb3 # ratchet:actions/checkout@v6
|
||||
with:
|
||||
fetch-depth: 0
|
||||
persist-credentials: false
|
||||
|
||||
- name: Install Helm CLI
|
||||
uses: azure/setup-helm@v4
|
||||
uses: azure/setup-helm@1a275c3b69536ee54be43f2070a358922e12c8d4 # ratchet:azure/setup-helm@v4
|
||||
with:
|
||||
version: v3.12.1
|
||||
|
||||
@@ -43,7 +45,7 @@ jobs:
|
||||
done
|
||||
|
||||
- name: Publish Helm charts to gh-pages
|
||||
uses: stefanprodan/helm-gh-pages@v1.7.0
|
||||
uses: stefanprodan/helm-gh-pages@0ad2bb377311d61ac04ad9eb6f252fb68e207260 # ratchet:stefanprodan/helm-gh-pages@v1.7.0
|
||||
with:
|
||||
token: ${{ secrets.GITHUB_TOKEN }}
|
||||
charts_dir: deployment/helm/charts
|
||||
|
||||
@@ -7,12 +7,13 @@ permissions:
|
||||
# contents: write # only for delete-branch option
|
||||
issues: write
|
||||
pull-requests: write
|
||||
|
||||
|
||||
jobs:
|
||||
stale:
|
||||
runs-on: ubuntu-latest
|
||||
timeout-minutes: 45
|
||||
steps:
|
||||
- uses: actions/stale@v9
|
||||
- uses: actions/stale@5f858e3efba33a5ca4407a664cc011ad407f2008 # ratchet:actions/stale@v10
|
||||
with:
|
||||
stale-issue-message: 'This issue is stale because it has been open 75 days with no activity. Remove stale label or comment or this will be closed in 15 days.'
|
||||
stale-pr-message: 'This PR is stale because it has been open 75 days with no activity. Remove stale label or comment or this will be closed in 15 days.'
|
||||
@@ -20,4 +21,3 @@ jobs:
|
||||
close-pr-message: 'This PR was closed because it has been stalled for 90 days with no activity.'
|
||||
days-before-stale: 75
|
||||
# days-before-close: 90 # uncomment after we test stale behavior
|
||||
|
||||
57
.github/workflows/nightly-scan-licenses.yml
vendored
57
.github/workflows/nightly-scan-licenses.yml
vendored
@@ -15,19 +15,25 @@ on:
|
||||
permissions:
|
||||
actions: read
|
||||
contents: read
|
||||
security-events: write
|
||||
|
||||
|
||||
jobs:
|
||||
scan-licenses:
|
||||
# See https://runs-on.com/runners/linux/
|
||||
runs-on: [runs-on,runner=2cpu-linux-x64,"run-id=${{ github.run_id }}"]
|
||||
runs-on: [runs-on,runner=2cpu-linux-x64,"run-id=${{ github.run_id }}-scan-licenses"]
|
||||
timeout-minutes: 45
|
||||
permissions:
|
||||
actions: read
|
||||
contents: read
|
||||
security-events: write
|
||||
|
||||
steps:
|
||||
- name: Checkout code
|
||||
uses: actions/checkout@v4
|
||||
|
||||
uses: actions/checkout@1af3b93b6815bc44a9784bd300feb67ff0d1eeb3 # ratchet:actions/checkout@v6
|
||||
with:
|
||||
persist-credentials: false
|
||||
|
||||
- name: Set up Python
|
||||
uses: actions/setup-python@v5
|
||||
uses: actions/setup-python@e797f83bcb11b83ae66e0230d6156d7c80228e7c # ratchet:actions/setup-python@v6
|
||||
with:
|
||||
python-version: '3.11'
|
||||
cache: 'pip'
|
||||
@@ -35,7 +41,7 @@ jobs:
|
||||
backend/requirements/default.txt
|
||||
backend/requirements/dev.txt
|
||||
backend/requirements/model_server.txt
|
||||
|
||||
|
||||
- name: Get explicit and transitive dependencies
|
||||
run: |
|
||||
python -m pip install --upgrade pip
|
||||
@@ -43,28 +49,30 @@ jobs:
|
||||
pip install --retries 5 --timeout 30 -r backend/requirements/dev.txt
|
||||
pip install --retries 5 --timeout 30 -r backend/requirements/model_server.txt
|
||||
pip freeze > requirements-all.txt
|
||||
|
||||
|
||||
- name: Check python
|
||||
id: license_check_report
|
||||
uses: pilosus/action-pip-license-checker@v2
|
||||
uses: pilosus/action-pip-license-checker@e909b0226ff49d3235c99c4585bc617f49fff16a # ratchet:pilosus/action-pip-license-checker@v3
|
||||
with:
|
||||
requirements: 'requirements-all.txt'
|
||||
fail: 'Copyleft'
|
||||
exclude: '(?i)^(pylint|aio[-_]*).*'
|
||||
|
||||
|
||||
- name: Print report
|
||||
if: always()
|
||||
run: echo "${{ steps.license_check_report.outputs.report }}"
|
||||
|
||||
env:
|
||||
REPORT: ${{ steps.license_check_report.outputs.report }}
|
||||
run: echo "$REPORT"
|
||||
|
||||
- name: Install npm dependencies
|
||||
working-directory: ./web
|
||||
run: npm ci
|
||||
|
||||
# be careful enabling the sarif and upload as it may spam the security tab
|
||||
# with a huge amount of items. Work out the issues before enabling upload.
|
||||
# with a huge amount of items. Work out the issues before enabling upload.
|
||||
# - name: Run Trivy vulnerability scanner in repo mode
|
||||
# if: always()
|
||||
# uses: aquasecurity/trivy-action@0.29.0
|
||||
# uses: aquasecurity/trivy-action@b6643a29fecd7f34b3597bc6acb0a98b03d33ff8 # ratchet:aquasecurity/trivy-action@0.33.1
|
||||
# with:
|
||||
# scan-type: fs
|
||||
# scan-ref: .
|
||||
@@ -73,7 +81,7 @@ jobs:
|
||||
# severity: HIGH,CRITICAL
|
||||
# # format: sarif
|
||||
# # output: trivy-results.sarif
|
||||
#
|
||||
#
|
||||
# # - name: Upload Trivy scan results to GitHub Security tab
|
||||
# # uses: github/codeql-action/upload-sarif@v3
|
||||
# # with:
|
||||
@@ -81,14 +89,15 @@ jobs:
|
||||
|
||||
scan-trivy:
|
||||
# See https://runs-on.com/runners/linux/
|
||||
runs-on: [runs-on,runner=2cpu-linux-x64,"run-id=${{ github.run_id }}"]
|
||||
|
||||
runs-on: [runs-on,runner=2cpu-linux-x64,"run-id=${{ github.run_id }}-scan-trivy"]
|
||||
timeout-minutes: 45
|
||||
|
||||
steps:
|
||||
- name: Set up Docker Buildx
|
||||
uses: docker/setup-buildx-action@v3
|
||||
uses: docker/setup-buildx-action@e468171a9de216ec08956ac3ada2f0791b6bd435 # ratchet:docker/setup-buildx-action@v3
|
||||
|
||||
- name: Login to Docker Hub
|
||||
uses: docker/login-action@v3
|
||||
uses: docker/login-action@5e57cd118135c172c3672efd75eb46360885c0ef # ratchet:docker/login-action@v3
|
||||
with:
|
||||
username: ${{ secrets.DOCKER_USERNAME }}
|
||||
password: ${{ secrets.DOCKER_TOKEN }}
|
||||
@@ -98,7 +107,7 @@ jobs:
|
||||
run: docker pull onyxdotapp/onyx-backend:latest
|
||||
|
||||
- name: Run Trivy vulnerability scanner on backend
|
||||
uses: aquasecurity/trivy-action@0.29.0
|
||||
uses: aquasecurity/trivy-action@b6643a29fecd7f34b3597bc6acb0a98b03d33ff8 # ratchet:aquasecurity/trivy-action@0.33.1
|
||||
env:
|
||||
TRIVY_DB_REPOSITORY: 'public.ecr.aws/aquasecurity/trivy-db:2'
|
||||
TRIVY_JAVA_DB_REPOSITORY: 'public.ecr.aws/aquasecurity/trivy-java-db:1'
|
||||
@@ -112,9 +121,9 @@ jobs:
|
||||
# Web server
|
||||
- name: Pull web server docker image
|
||||
run: docker pull onyxdotapp/onyx-web-server:latest
|
||||
|
||||
|
||||
- name: Run Trivy vulnerability scanner on web server
|
||||
uses: aquasecurity/trivy-action@0.29.0
|
||||
uses: aquasecurity/trivy-action@b6643a29fecd7f34b3597bc6acb0a98b03d33ff8 # ratchet:aquasecurity/trivy-action@0.33.1
|
||||
env:
|
||||
TRIVY_DB_REPOSITORY: 'public.ecr.aws/aquasecurity/trivy-db:2'
|
||||
TRIVY_JAVA_DB_REPOSITORY: 'public.ecr.aws/aquasecurity/trivy-java-db:1'
|
||||
@@ -130,7 +139,7 @@ jobs:
|
||||
run: docker pull onyxdotapp/onyx-model-server:latest
|
||||
|
||||
- name: Run Trivy vulnerability scanner
|
||||
uses: aquasecurity/trivy-action@0.29.0
|
||||
uses: aquasecurity/trivy-action@b6643a29fecd7f34b3597bc6acb0a98b03d33ff8 # ratchet:aquasecurity/trivy-action@0.33.1
|
||||
env:
|
||||
TRIVY_DB_REPOSITORY: 'public.ecr.aws/aquasecurity/trivy-db:2'
|
||||
TRIVY_JAVA_DB_REPOSITORY: 'public.ecr.aws/aquasecurity/trivy-java-db:1'
|
||||
@@ -139,4 +148,4 @@ jobs:
|
||||
scanners: license
|
||||
severity: HIGH,CRITICAL
|
||||
vuln-type: library
|
||||
exit-code: 0
|
||||
exit-code: 0
|
||||
|
||||
@@ -1,17 +1,25 @@
|
||||
name: External Dependency Unit Tests
|
||||
concurrency:
|
||||
group: External-Dependency-Unit-Tests-${{ github.workflow }}-${{ github.head_ref || github.event.workflow_run.head_branch || github.run_id }}
|
||||
cancel-in-progress: true
|
||||
|
||||
on:
|
||||
merge_group:
|
||||
pull_request:
|
||||
branches: [main]
|
||||
|
||||
permissions:
|
||||
contents: read
|
||||
|
||||
env:
|
||||
# AWS
|
||||
S3_AWS_ACCESS_KEY_ID: ${{ secrets.S3_AWS_ACCESS_KEY_ID }}
|
||||
S3_AWS_SECRET_ACCESS_KEY: ${{ secrets.S3_AWS_SECRET_ACCESS_KEY }}
|
||||
# AWS credentials for S3-specific test
|
||||
S3_AWS_ACCESS_KEY_ID_FOR_TEST: ${{ secrets.S3_AWS_ACCESS_KEY_ID }}
|
||||
S3_AWS_SECRET_ACCESS_KEY_FOR_TEST: ${{ secrets.S3_AWS_SECRET_ACCESS_KEY }}
|
||||
|
||||
# MinIO
|
||||
S3_ENDPOINT_URL: "http://localhost:9004"
|
||||
S3_AWS_ACCESS_KEY_ID: "minioadmin"
|
||||
S3_AWS_SECRET_ACCESS_KEY: "minioadmin"
|
||||
|
||||
# Confluence
|
||||
CONFLUENCE_TEST_SPACE_URL: ${{ vars.CONFLUENCE_TEST_SPACE_URL }}
|
||||
@@ -25,15 +33,23 @@ env:
|
||||
OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
|
||||
ANTHROPIC_API_KEY: ${{ secrets.ANTHROPIC_API_KEY }}
|
||||
|
||||
# Code Interpreter
|
||||
# TODO: debug why this is failing and enable
|
||||
CODE_INTERPRETER_BASE_URL: http://localhost:8000
|
||||
|
||||
jobs:
|
||||
discover-test-dirs:
|
||||
runs-on: ubuntu-latest
|
||||
# NOTE: Github-hosted runners have about 20s faster queue times and are preferred here.
|
||||
runs-on: ubuntu-slim
|
||||
timeout-minutes: 45
|
||||
outputs:
|
||||
test-dirs: ${{ steps.set-matrix.outputs.test-dirs }}
|
||||
steps:
|
||||
- name: Checkout code
|
||||
uses: actions/checkout@v4
|
||||
|
||||
uses: actions/checkout@1af3b93b6815bc44a9784bd300feb67ff0d1eeb3 # ratchet:actions/checkout@v6
|
||||
with:
|
||||
persist-credentials: false
|
||||
|
||||
- name: Discover test directories
|
||||
id: set-matrix
|
||||
run: |
|
||||
@@ -44,8 +60,12 @@ jobs:
|
||||
external-dependency-unit-tests:
|
||||
needs: discover-test-dirs
|
||||
# Use larger runner with more resources for Vespa
|
||||
runs-on: [runs-on, runner=16cpu-linux-x64, "run-id=${{ github.run_id }}"]
|
||||
|
||||
runs-on:
|
||||
- runs-on
|
||||
- runner=2cpu-linux-arm64
|
||||
- ${{ format('run-id={0}-external-dependency-unit-tests-job-{1}', github.run_id, strategy['job-index']) }}
|
||||
- extras=s3-cache
|
||||
timeout-minutes: 45
|
||||
strategy:
|
||||
fail-fast: false
|
||||
matrix:
|
||||
@@ -56,41 +76,51 @@ jobs:
|
||||
MODEL_SERVER_HOST: "disabled"
|
||||
|
||||
steps:
|
||||
- name: Checkout code
|
||||
uses: actions/checkout@v4
|
||||
- uses: runs-on/action@cd2b598b0515d39d78c38a02d529db87d2196d1e # ratchet:runs-on/action@v2
|
||||
|
||||
- name: Set up Python
|
||||
uses: actions/setup-python@v5
|
||||
- name: Checkout code
|
||||
uses: actions/checkout@1af3b93b6815bc44a9784bd300feb67ff0d1eeb3 # ratchet:actions/checkout@v6
|
||||
with:
|
||||
python-version: "3.11"
|
||||
cache: "pip"
|
||||
cache-dependency-path: |
|
||||
persist-credentials: false
|
||||
|
||||
- name: Setup Python and Install Dependencies
|
||||
uses: ./.github/actions/setup-python-and-install-dependencies
|
||||
with:
|
||||
requirements: |
|
||||
backend/requirements/default.txt
|
||||
backend/requirements/dev.txt
|
||||
backend/requirements/ee.txt
|
||||
|
||||
- name: Install Dependencies
|
||||
- name: Setup Playwright
|
||||
uses: ./.github/actions/setup-playwright
|
||||
|
||||
# needed for pulling Vespa, Redis, Postgres, and Minio images
|
||||
# otherwise, we hit the "Unauthenticated users" limit
|
||||
# https://docs.docker.com/docker-hub/usage/
|
||||
- name: Login to Docker Hub
|
||||
uses: docker/login-action@5e57cd118135c172c3672efd75eb46360885c0ef # ratchet:docker/login-action@v3
|
||||
with:
|
||||
username: ${{ secrets.DOCKER_USERNAME }}
|
||||
password: ${{ secrets.DOCKER_TOKEN }}
|
||||
|
||||
- name: Create .env file for Docker Compose
|
||||
run: |
|
||||
python -m pip install --upgrade pip
|
||||
pip install --retries 5 --timeout 30 -r backend/requirements/default.txt
|
||||
pip install --retries 5 --timeout 30 -r backend/requirements/dev.txt
|
||||
playwright install chromium
|
||||
playwright install-deps chromium
|
||||
cat <<EOF > deployment/docker_compose/.env
|
||||
CODE_INTERPRETER_BETA_ENABLED=true
|
||||
EOF
|
||||
|
||||
- name: Set up Standard Dependencies
|
||||
run: |
|
||||
cd deployment/docker_compose
|
||||
docker compose -f docker-compose.yml -f docker-compose.dev.yml up -d minio relational_db cache index
|
||||
|
||||
- name: Wait for services
|
||||
run: |
|
||||
echo "Waiting for services to be ready..."
|
||||
sleep 30
|
||||
|
||||
# Wait for Vespa specifically
|
||||
echo "Waiting for Vespa to be ready..."
|
||||
timeout 300 bash -c 'until curl -f -s http://localhost:8081/ApplicationStatus > /dev/null 2>&1; do echo "Vespa not ready, waiting..."; sleep 10; done' || echo "Vespa timeout - continuing anyway"
|
||||
|
||||
echo "Services should be ready now"
|
||||
docker compose \
|
||||
-f docker-compose.yml \
|
||||
-f docker-compose.dev.yml \
|
||||
up -d \
|
||||
minio \
|
||||
relational_db \
|
||||
cache \
|
||||
index \
|
||||
code-interpreter
|
||||
|
||||
- name: Run migrations
|
||||
run: |
|
||||
@@ -101,10 +131,39 @@ jobs:
|
||||
|
||||
- name: Run Tests for ${{ matrix.test-dir }}
|
||||
shell: script -q -e -c "bash --noprofile --norc -eo pipefail {0}"
|
||||
env:
|
||||
TEST_DIR: ${{ matrix.test-dir }}
|
||||
run: |
|
||||
py.test \
|
||||
--durations=8 \
|
||||
-o junit_family=xunit2 \
|
||||
-xv \
|
||||
--ff \
|
||||
backend/tests/external_dependency_unit/${{ matrix.test-dir }}
|
||||
backend/tests/external_dependency_unit/${TEST_DIR}
|
||||
|
||||
- name: Collect Docker logs on failure
|
||||
if: failure()
|
||||
run: |
|
||||
mkdir -p docker-logs
|
||||
cd deployment/docker_compose
|
||||
|
||||
# Get list of running containers
|
||||
containers=$(docker compose -f docker-compose.yml -f docker-compose.dev.yml ps -q)
|
||||
|
||||
# Collect logs from each container
|
||||
for container in $containers; do
|
||||
container_name=$(docker inspect --format='{{.Name}}' $container | sed 's/^\///')
|
||||
echo "Collecting logs from $container_name..."
|
||||
docker logs $container > ../../docker-logs/${container_name}.log 2>&1
|
||||
done
|
||||
|
||||
cd ../..
|
||||
echo "Docker logs collected in docker-logs directory"
|
||||
|
||||
- name: Upload Docker logs
|
||||
if: failure()
|
||||
uses: actions/upload-artifact@v4
|
||||
with:
|
||||
name: docker-logs-${{ matrix.test-dir }}
|
||||
path: docker-logs/
|
||||
retention-days: 7
|
||||
|
||||
50
.github/workflows/pr-helm-chart-testing.yml
vendored
50
.github/workflows/pr-helm-chart-testing.yml
vendored
@@ -1,37 +1,47 @@
|
||||
name: Helm - Lint and Test Charts
|
||||
concurrency:
|
||||
group: Helm-Lint-and-Test-Charts-${{ github.workflow }}-${{ github.head_ref || github.event.workflow_run.head_branch || github.run_id }}
|
||||
cancel-in-progress: true
|
||||
|
||||
on:
|
||||
merge_group:
|
||||
pull_request:
|
||||
branches: [ main ]
|
||||
workflow_dispatch: # Allows manual triggering
|
||||
|
||||
|
||||
permissions:
|
||||
contents: read
|
||||
|
||||
jobs:
|
||||
helm-chart-check:
|
||||
# See https://runs-on.com/runners/linux/
|
||||
runs-on: [runs-on,runner=8cpu-linux-x64,hdd=256,"run-id=${{ github.run_id }}"]
|
||||
runs-on: [runs-on,runner=8cpu-linux-x64,hdd=256,"run-id=${{ github.run_id }}-helm-chart-check"]
|
||||
timeout-minutes: 45
|
||||
|
||||
# fetch-depth 0 is required for helm/chart-testing-action
|
||||
steps:
|
||||
- name: Checkout code
|
||||
uses: actions/checkout@v4
|
||||
uses: actions/checkout@1af3b93b6815bc44a9784bd300feb67ff0d1eeb3 # ratchet:actions/checkout@v6
|
||||
with:
|
||||
fetch-depth: 0
|
||||
|
||||
persist-credentials: false
|
||||
|
||||
- name: Set up Helm
|
||||
uses: azure/setup-helm@v4.2.0
|
||||
uses: azure/setup-helm@1a275c3b69536ee54be43f2070a358922e12c8d4 # ratchet:azure/setup-helm@v4.3.1
|
||||
with:
|
||||
version: v3.17.0
|
||||
|
||||
version: v3.19.0
|
||||
|
||||
- name: Set up chart-testing
|
||||
uses: helm/chart-testing-action@v2.7.0
|
||||
uses: helm/chart-testing-action@6ec842c01de15ebb84c8627d2744a0c2f2755c9f # ratchet:helm/chart-testing-action@v2.8.0
|
||||
|
||||
# even though we specify chart-dirs in ct.yaml, it isn't used by ct for the list-changed command...
|
||||
- name: Run chart-testing (list-changed)
|
||||
id: list-changed
|
||||
env:
|
||||
DEFAULT_BRANCH: ${{ github.event.repository.default_branch }}
|
||||
run: |
|
||||
echo "default_branch: ${{ github.event.repository.default_branch }}"
|
||||
changed=$(ct list-changed --remote origin --target-branch ${{ github.event.repository.default_branch }} --chart-dirs deployment/helm/charts)
|
||||
echo "default_branch: ${DEFAULT_BRANCH}"
|
||||
changed=$(ct list-changed --remote origin --target-branch ${DEFAULT_BRANCH} --chart-dirs deployment/helm/charts)
|
||||
echo "list-changed output: $changed"
|
||||
if [[ -n "$changed" ]]; then
|
||||
echo "changed=true" >> "$GITHUB_OUTPUT"
|
||||
@@ -41,7 +51,7 @@ jobs:
|
||||
# - name: Force run chart-testing (list-changed)
|
||||
# id: list-changed
|
||||
# run: echo "changed=true" >> $GITHUB_OUTPUT
|
||||
|
||||
|
||||
# lint all charts if any changes were detected
|
||||
- name: Run chart-testing (lint)
|
||||
if: steps.list-changed.outputs.changed == 'true'
|
||||
@@ -51,7 +61,7 @@ jobs:
|
||||
|
||||
- name: Create kind cluster
|
||||
if: steps.list-changed.outputs.changed == 'true'
|
||||
uses: helm/kind-action@v1.12.0
|
||||
uses: helm/kind-action@92086f6be054225fa813e0a4b13787fc9088faab # ratchet:helm/kind-action@v1.13.0
|
||||
|
||||
- name: Pre-install cluster status check
|
||||
if: steps.list-changed.outputs.changed == 'true'
|
||||
@@ -118,7 +128,7 @@ jobs:
|
||||
if: steps.list-changed.outputs.changed == 'true'
|
||||
run: |
|
||||
echo "=== Starting chart installation with monitoring ==="
|
||||
|
||||
|
||||
# Function to monitor cluster state
|
||||
monitor_cluster() {
|
||||
while true; do
|
||||
@@ -140,11 +150,11 @@ jobs:
|
||||
sleep 60
|
||||
done
|
||||
}
|
||||
|
||||
|
||||
# Start monitoring in background
|
||||
monitor_cluster &
|
||||
MONITOR_PID=$!
|
||||
|
||||
|
||||
# Set up cleanup
|
||||
cleanup() {
|
||||
echo "=== Cleaning up monitoring process ==="
|
||||
@@ -153,10 +163,10 @@ jobs:
|
||||
kubectl get pods --all-namespaces
|
||||
kubectl get events --all-namespaces --sort-by=.lastTimestamp | tail -20
|
||||
}
|
||||
|
||||
|
||||
# Trap cleanup on exit
|
||||
trap cleanup EXIT
|
||||
|
||||
|
||||
# Run the actual installation with detailed logging
|
||||
echo "=== Starting ct install ==="
|
||||
set +e
|
||||
@@ -214,15 +224,15 @@ jobs:
|
||||
echo "=== Final cluster state ==="
|
||||
kubectl get pods --all-namespaces
|
||||
kubectl get events --all-namespaces --sort-by=.lastTimestamp | tail -10
|
||||
|
||||
|
||||
echo "=== Pod descriptions for debugging ==="
|
||||
kubectl describe pods --all-namespaces | grep -A 10 -B 3 "Failed\|Error\|Warning\|Pending" || echo "No problematic pods found"
|
||||
|
||||
|
||||
echo "=== Recent logs for debugging ==="
|
||||
kubectl logs --all-namespaces --tail=50 | grep -i "error\|timeout\|failed\|pull" || echo "No error logs found"
|
||||
|
||||
echo "=== Helm releases ==="
|
||||
helm list --all-namespaces
|
||||
# the following would install only changed charts, but we only have one chart so
|
||||
# the following would install only changed charts, but we only have one chart so
|
||||
# don't worry about that for now
|
||||
# run: ct install --target-branch ${{ github.event.repository.default_branch }}
|
||||
|
||||
338
.github/workflows/pr-integration-tests.yml
vendored
338
.github/workflows/pr-integration-tests.yml
vendored
@@ -10,12 +10,10 @@ on:
|
||||
- main
|
||||
- "release/**"
|
||||
|
||||
env:
|
||||
# Private Registry Configuration
|
||||
PRIVATE_REGISTRY: experimental-registry.blacksmith.sh:5000
|
||||
PRIVATE_REGISTRY_USERNAME: ${{ secrets.PRIVATE_REGISTRY_USERNAME }}
|
||||
PRIVATE_REGISTRY_PASSWORD: ${{ secrets.PRIVATE_REGISTRY_PASSWORD }}
|
||||
permissions:
|
||||
contents: read
|
||||
|
||||
env:
|
||||
# Test Environment Variables
|
||||
OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
|
||||
SLACK_BOT_TOKEN: ${{ secrets.SLACK_BOT_TOKEN }}
|
||||
@@ -31,15 +29,20 @@ env:
|
||||
PERM_SYNC_SHAREPOINT_PRIVATE_KEY: ${{ secrets.PERM_SYNC_SHAREPOINT_PRIVATE_KEY }}
|
||||
PERM_SYNC_SHAREPOINT_CERTIFICATE_PASSWORD: ${{ secrets.PERM_SYNC_SHAREPOINT_CERTIFICATE_PASSWORD }}
|
||||
PERM_SYNC_SHAREPOINT_DIRECTORY_ID: ${{ secrets.PERM_SYNC_SHAREPOINT_DIRECTORY_ID }}
|
||||
EXA_API_KEY: ${{ secrets.EXA_API_KEY }}
|
||||
|
||||
jobs:
|
||||
discover-test-dirs:
|
||||
runs-on: blacksmith-2vcpu-ubuntu-2404-arm
|
||||
# NOTE: Github-hosted runners have about 20s faster queue times and are preferred here.
|
||||
runs-on: ubuntu-slim
|
||||
timeout-minutes: 45
|
||||
outputs:
|
||||
test-dirs: ${{ steps.set-matrix.outputs.test-dirs }}
|
||||
steps:
|
||||
- name: Checkout code
|
||||
uses: actions/checkout@v4
|
||||
uses: actions/checkout@1af3b93b6815bc44a9784bd300feb67ff0d1eeb3 # ratchet:actions/checkout@v6
|
||||
with:
|
||||
persist-credentials: false
|
||||
|
||||
- name: Discover test directories
|
||||
id: set-matrix
|
||||
@@ -61,105 +64,113 @@ jobs:
|
||||
all_dirs="[${all_dirs%,}]"
|
||||
echo "test-dirs=$all_dirs" >> $GITHUB_OUTPUT
|
||||
|
||||
prepare-build:
|
||||
runs-on: blacksmith-2vcpu-ubuntu-2404-arm
|
||||
steps:
|
||||
- name: Checkout code
|
||||
uses: actions/checkout@v4
|
||||
|
||||
- name: Prepare build
|
||||
uses: ./.github/actions/prepare-build
|
||||
|
||||
build-backend-image:
|
||||
runs-on: blacksmith-16vcpu-ubuntu-2404-arm
|
||||
runs-on: [runs-on, runner=1cpu-linux-arm64, "run-id=${{ github.run_id }}-build-backend-image", "extras=ecr-cache"]
|
||||
timeout-minutes: 45
|
||||
steps:
|
||||
- uses: runs-on/action@cd2b598b0515d39d78c38a02d529db87d2196d1e # ratchet:runs-on/action@v2
|
||||
- name: Checkout code
|
||||
uses: actions/checkout@v4
|
||||
|
||||
- name: Login to Private Registry
|
||||
uses: docker/login-action@v3
|
||||
uses: actions/checkout@1af3b93b6815bc44a9784bd300feb67ff0d1eeb3 # ratchet:actions/checkout@v6
|
||||
with:
|
||||
registry: ${{ env.PRIVATE_REGISTRY }}
|
||||
username: ${{ env.PRIVATE_REGISTRY_USERNAME }}
|
||||
password: ${{ env.PRIVATE_REGISTRY_PASSWORD }}
|
||||
persist-credentials: false
|
||||
|
||||
- name: Set up Docker Buildx
|
||||
uses: useblacksmith/setup-docker-builder@v1
|
||||
uses: docker/setup-buildx-action@e468171a9de216ec08956ac3ada2f0791b6bd435 # ratchet:docker/setup-buildx-action@v3
|
||||
|
||||
# needed for pulling Vespa, Redis, Postgres, and Minio images
|
||||
# otherwise, we hit the "Unauthenticated users" limit
|
||||
# https://docs.docker.com/docker-hub/usage/
|
||||
- name: Login to Docker Hub
|
||||
uses: docker/login-action@5e57cd118135c172c3672efd75eb46360885c0ef # ratchet:docker/login-action@v3
|
||||
with:
|
||||
username: ${{ secrets.DOCKER_USERNAME }}
|
||||
password: ${{ secrets.DOCKER_TOKEN }}
|
||||
|
||||
- name: Build and push Backend Docker image
|
||||
uses: useblacksmith/build-push-action@v2
|
||||
uses: docker/build-push-action@263435318d21b8e681c14492fe198d362a7d2c83 # ratchet:docker/build-push-action@v6
|
||||
with:
|
||||
context: ./backend
|
||||
file: ./backend/Dockerfile
|
||||
platforms: linux/arm64
|
||||
tags: ${{ env.PRIVATE_REGISTRY }}/integration-test-onyx-backend:test-${{ github.run_id }}
|
||||
push: true
|
||||
outputs: type=registry
|
||||
tags: ${{ env.RUNS_ON_ECR_CACHE }}:integration-test-backend-test-${{ github.run_id }}
|
||||
cache-from: |
|
||||
type=registry,ref=${{ env.RUNS_ON_ECR_CACHE }}:backend-cache
|
||||
type=registry,ref=onyxdotapp/onyx-backend:latest
|
||||
cache-to: |
|
||||
type=registry,ref=${{ env.RUNS_ON_ECR_CACHE }}:backend-cache,mode=max
|
||||
no-cache: ${{ vars.DOCKER_NO_CACHE == 'true' }}
|
||||
|
||||
|
||||
build-model-server-image:
|
||||
runs-on: blacksmith-16vcpu-ubuntu-2404-arm
|
||||
runs-on: [runs-on, runner=1cpu-linux-arm64, "run-id=${{ github.run_id }}-build-model-server-image", "extras=ecr-cache"]
|
||||
timeout-minutes: 45
|
||||
steps:
|
||||
- uses: runs-on/action@cd2b598b0515d39d78c38a02d529db87d2196d1e # ratchet:runs-on/action@v2
|
||||
- name: Checkout code
|
||||
uses: actions/checkout@v4
|
||||
|
||||
- name: Login to Private Registry
|
||||
uses: docker/login-action@v3
|
||||
uses: actions/checkout@1af3b93b6815bc44a9784bd300feb67ff0d1eeb3 # ratchet:actions/checkout@v6
|
||||
with:
|
||||
registry: ${{ env.PRIVATE_REGISTRY }}
|
||||
username: ${{ env.PRIVATE_REGISTRY_USERNAME }}
|
||||
password: ${{ env.PRIVATE_REGISTRY_PASSWORD }}
|
||||
persist-credentials: false
|
||||
|
||||
- name: Set up Docker Buildx
|
||||
uses: useblacksmith/setup-docker-builder@v1
|
||||
uses: docker/setup-buildx-action@e468171a9de216ec08956ac3ada2f0791b6bd435 # ratchet:docker/setup-buildx-action@v3
|
||||
|
||||
# needed for pulling Vespa, Redis, Postgres, and Minio images
|
||||
# otherwise, we hit the "Unauthenticated users" limit
|
||||
# https://docs.docker.com/docker-hub/usage/
|
||||
- name: Login to Docker Hub
|
||||
uses: docker/login-action@5e57cd118135c172c3672efd75eb46360885c0ef # ratchet:docker/login-action@v3
|
||||
with:
|
||||
username: ${{ secrets.DOCKER_USERNAME }}
|
||||
password: ${{ secrets.DOCKER_TOKEN }}
|
||||
|
||||
- name: Build and push Model Server Docker image
|
||||
uses: useblacksmith/build-push-action@v2
|
||||
uses: docker/build-push-action@263435318d21b8e681c14492fe198d362a7d2c83 # ratchet:docker/build-push-action@v6
|
||||
with:
|
||||
context: ./backend
|
||||
file: ./backend/Dockerfile.model_server
|
||||
platforms: linux/arm64
|
||||
tags: ${{ env.PRIVATE_REGISTRY }}/integration-test-onyx-model-server:test-${{ github.run_id }}
|
||||
push: true
|
||||
outputs: type=registry
|
||||
provenance: false
|
||||
no-cache: ${{ vars.DOCKER_NO_CACHE == 'true' }}
|
||||
tags: ${{ env.RUNS_ON_ECR_CACHE }}:integration-test-model-server-test-${{ github.run_id }}
|
||||
cache-from: |
|
||||
type=registry,ref=${{ env.RUNS_ON_ECR_CACHE }}:model-server-cache
|
||||
type=registry,ref=onyxdotapp/onyx-model-server:latest
|
||||
cache-to: type=registry,ref=${{ env.RUNS_ON_ECR_CACHE }}:model-server-cache,mode=max
|
||||
|
||||
|
||||
build-integration-image:
|
||||
needs: prepare-build
|
||||
runs-on: blacksmith-16vcpu-ubuntu-2404-arm
|
||||
runs-on: [runs-on, runner=2cpu-linux-arm64, "run-id=${{ github.run_id }}-build-integration-image", "extras=ecr-cache"]
|
||||
timeout-minutes: 45
|
||||
steps:
|
||||
- uses: runs-on/action@cd2b598b0515d39d78c38a02d529db87d2196d1e # ratchet:runs-on/action@v2
|
||||
- name: Checkout code
|
||||
uses: actions/checkout@v4
|
||||
|
||||
- name: Login to Private Registry
|
||||
uses: docker/login-action@v3
|
||||
uses: actions/checkout@1af3b93b6815bc44a9784bd300feb67ff0d1eeb3 # ratchet:actions/checkout@v6
|
||||
with:
|
||||
registry: ${{ env.PRIVATE_REGISTRY }}
|
||||
username: ${{ env.PRIVATE_REGISTRY_USERNAME }}
|
||||
password: ${{ env.PRIVATE_REGISTRY_PASSWORD }}
|
||||
|
||||
- name: Download OpenAPI artifacts
|
||||
uses: actions/download-artifact@v4
|
||||
with:
|
||||
name: openapi-artifacts
|
||||
path: backend/generated/
|
||||
persist-credentials: false
|
||||
|
||||
- name: Set up Docker Buildx
|
||||
uses: useblacksmith/setup-docker-builder@v1
|
||||
uses: docker/setup-buildx-action@e468171a9de216ec08956ac3ada2f0791b6bd435 # ratchet:docker/setup-buildx-action@v3
|
||||
|
||||
- name: Build and push integration test Docker image
|
||||
uses: useblacksmith/build-push-action@v2
|
||||
# needed for pulling openapitools/openapi-generator-cli
|
||||
# otherwise, we hit the "Unauthenticated users" limit
|
||||
# https://docs.docker.com/docker-hub/usage/
|
||||
- name: Login to Docker Hub
|
||||
uses: docker/login-action@5e57cd118135c172c3672efd75eb46360885c0ef # ratchet:docker/login-action@v3
|
||||
with:
|
||||
context: ./backend
|
||||
file: ./backend/tests/integration/Dockerfile
|
||||
platforms: linux/arm64
|
||||
tags: ${{ env.PRIVATE_REGISTRY }}/integration-test-onyx-integration:test-${{ github.run_id }}
|
||||
push: true
|
||||
outputs: type=registry
|
||||
no-cache: ${{ vars.DOCKER_NO_CACHE == 'true' }}
|
||||
username: ${{ secrets.DOCKER_USERNAME }}
|
||||
password: ${{ secrets.DOCKER_TOKEN }}
|
||||
|
||||
- name: Build and push integration test image with Docker Bake
|
||||
env:
|
||||
INTEGRATION_REPOSITORY: ${{ env.RUNS_ON_ECR_CACHE }}
|
||||
TAG: integration-test-${{ github.run_id }}
|
||||
run: |
|
||||
cd backend && docker buildx bake --push \
|
||||
--set backend.cache-from=type=registry,ref=${RUNS_ON_ECR_CACHE}:backend-cache \
|
||||
--set backend.cache-from=type=registry,ref=onyxdotapp/onyx-backend:latest \
|
||||
--set backend.cache-to=type=registry,ref=${RUNS_ON_ECR_CACHE}:backend-cache,mode=max \
|
||||
--set integration.cache-from=type=registry,ref=${RUNS_ON_ECR_CACHE}:integration-cache \
|
||||
--set integration.cache-to=type=registry,ref=${RUNS_ON_ECR_CACHE}:integration-cache,mode=max \
|
||||
integration
|
||||
|
||||
integration-tests:
|
||||
needs:
|
||||
@@ -169,7 +180,12 @@ jobs:
|
||||
build-model-server-image,
|
||||
build-integration-image,
|
||||
]
|
||||
runs-on: blacksmith-8vcpu-ubuntu-2404-arm
|
||||
runs-on:
|
||||
- runs-on
|
||||
- runner=4cpu-linux-arm64
|
||||
- ${{ format('run-id={0}-integration-tests-job-{1}', github.run_id, strategy['job-index']) }}
|
||||
- extras=ecr-cache
|
||||
timeout-minutes: 45
|
||||
|
||||
strategy:
|
||||
fail-fast: false
|
||||
@@ -177,46 +193,27 @@ jobs:
|
||||
test-dir: ${{ fromJson(needs.discover-test-dirs.outputs.test-dirs) }}
|
||||
|
||||
steps:
|
||||
- uses: runs-on/action@cd2b598b0515d39d78c38a02d529db87d2196d1e # ratchet:runs-on/action@v2
|
||||
- name: Checkout code
|
||||
uses: actions/checkout@v4
|
||||
|
||||
- name: Login to Private Registry
|
||||
uses: docker/login-action@v3
|
||||
uses: actions/checkout@1af3b93b6815bc44a9784bd300feb67ff0d1eeb3 # ratchet:actions/checkout@v6
|
||||
with:
|
||||
registry: ${{ env.PRIVATE_REGISTRY }}
|
||||
username: ${{ env.PRIVATE_REGISTRY_USERNAME }}
|
||||
password: ${{ env.PRIVATE_REGISTRY_PASSWORD }}
|
||||
persist-credentials: false
|
||||
|
||||
# needed for pulling Vespa, Redis, Postgres, and Minio images
|
||||
# otherwise, we hit the "Unauthenticated users" limit
|
||||
# https://docs.docker.com/docker-hub/usage/
|
||||
- name: Login to Docker Hub
|
||||
uses: docker/login-action@v3
|
||||
uses: docker/login-action@5e57cd118135c172c3672efd75eb46360885c0ef # ratchet:docker/login-action@v3
|
||||
with:
|
||||
username: ${{ secrets.DOCKER_USERNAME }}
|
||||
password: ${{ secrets.DOCKER_TOKEN }}
|
||||
|
||||
- name: Pull Docker images
|
||||
run: |
|
||||
# Pull all images from registry in parallel
|
||||
echo "Pulling Docker images in parallel..."
|
||||
# Pull images from private registry
|
||||
(docker pull --platform linux/arm64 ${{ env.PRIVATE_REGISTRY }}/integration-test-onyx-backend:test-${{ github.run_id }}) &
|
||||
(docker pull --platform linux/arm64 ${{ env.PRIVATE_REGISTRY }}/integration-test-onyx-model-server:test-${{ github.run_id }}) &
|
||||
(docker pull --platform linux/arm64 ${{ env.PRIVATE_REGISTRY }}/integration-test-onyx-integration:test-${{ github.run_id }}) &
|
||||
|
||||
# Wait for all background jobs to complete
|
||||
wait
|
||||
echo "All Docker images pulled successfully"
|
||||
|
||||
# Re-tag to remove registry prefix for docker-compose
|
||||
docker tag ${{ env.PRIVATE_REGISTRY }}/integration-test-onyx-backend:test-${{ github.run_id }} onyxdotapp/onyx-backend:test
|
||||
docker tag ${{ env.PRIVATE_REGISTRY }}/integration-test-onyx-model-server:test-${{ github.run_id }} onyxdotapp/onyx-model-server:test
|
||||
docker tag ${{ env.PRIVATE_REGISTRY }}/integration-test-onyx-integration:test-${{ github.run_id }} onyxdotapp/onyx-integration:test
|
||||
|
||||
# NOTE: Use pre-ping/null pool to reduce flakiness due to dropped connections
|
||||
# NOTE: don't need web server for integration tests
|
||||
- name: Start Docker containers
|
||||
env:
|
||||
ECR_CACHE: ${{ env.RUNS_ON_ECR_CACHE }}
|
||||
RUN_ID: ${{ github.run_id }}
|
||||
run: |
|
||||
cd deployment/docker_compose
|
||||
ENABLE_PAID_ENTERPRISE_EDITION_FEATURES=true \
|
||||
@@ -225,9 +222,11 @@ jobs:
|
||||
POSTGRES_USE_NULL_POOL=true \
|
||||
REQUIRE_EMAIL_VERIFICATION=false \
|
||||
DISABLE_TELEMETRY=true \
|
||||
IMAGE_TAG=test \
|
||||
ONYX_BACKEND_IMAGE=${ECR_CACHE}:integration-test-backend-test-${RUN_ID} \
|
||||
ONYX_MODEL_SERVER_IMAGE=${ECR_CACHE}:integration-test-model-server-test-${RUN_ID} \
|
||||
INTEGRATION_TESTS_MODE=true \
|
||||
CHECK_TTL_MANAGEMENT_TASK_FREQUENCY_IN_HOURS=0.001 \
|
||||
MCP_SERVER_ENABLED=true \
|
||||
docker compose -f docker-compose.yml -f docker-compose.dev.yml up \
|
||||
relational_db \
|
||||
index \
|
||||
@@ -236,43 +235,56 @@ jobs:
|
||||
api_server \
|
||||
inference_model_server \
|
||||
indexing_model_server \
|
||||
mcp_server \
|
||||
background \
|
||||
-d
|
||||
id: start_docker
|
||||
|
||||
- name: Wait for service to be ready
|
||||
- name: Wait for services to be ready
|
||||
run: |
|
||||
echo "Starting wait-for-service script..."
|
||||
|
||||
docker logs -f onyx-api_server-1 &
|
||||
wait_for_service() {
|
||||
local url=$1
|
||||
local label=$2
|
||||
local timeout=${3:-300} # default 5 minutes
|
||||
local start_time
|
||||
start_time=$(date +%s)
|
||||
|
||||
start_time=$(date +%s)
|
||||
timeout=300 # 5 minutes in seconds
|
||||
while true; do
|
||||
local current_time
|
||||
current_time=$(date +%s)
|
||||
local elapsed_time=$((current_time - start_time))
|
||||
|
||||
while true; do
|
||||
current_time=$(date +%s)
|
||||
elapsed_time=$((current_time - start_time))
|
||||
|
||||
if [ $elapsed_time -ge $timeout ]; then
|
||||
echo "Timeout reached. Service did not become ready in 5 minutes."
|
||||
exit 1
|
||||
fi
|
||||
|
||||
# Use curl with error handling to ignore specific exit code 56
|
||||
response=$(curl -s -o /dev/null -w "%{http_code}" http://localhost:8080/health || echo "curl_error")
|
||||
|
||||
if [ "$response" = "200" ]; then
|
||||
echo "Service is ready!"
|
||||
break
|
||||
elif [ "$response" = "curl_error" ]; then
|
||||
echo "Curl encountered an error, possibly exit code 56. Continuing to retry..."
|
||||
else
|
||||
echo "Service not ready yet (HTTP status $response). Retrying in 5 seconds..."
|
||||
fi
|
||||
|
||||
sleep 5
|
||||
done
|
||||
echo "Finished waiting for service."
|
||||
if [ $elapsed_time -ge $timeout ]; then
|
||||
echo "Timeout reached. ${label} did not become ready in $timeout seconds."
|
||||
exit 1
|
||||
fi
|
||||
|
||||
local response
|
||||
response=$(curl -s -o /dev/null -w "%{http_code}" "$url" || echo "curl_error")
|
||||
|
||||
if [ "$response" = "200" ]; then
|
||||
echo "${label} is ready!"
|
||||
break
|
||||
elif [ "$response" = "curl_error" ]; then
|
||||
echo "Curl encountered an error while checking ${label}. Retrying in 5 seconds..."
|
||||
else
|
||||
echo "${label} not ready yet (HTTP status $response). Retrying in 5 seconds..."
|
||||
fi
|
||||
|
||||
sleep 5
|
||||
done
|
||||
}
|
||||
|
||||
wait_for_service "http://localhost:8080/health" "API server"
|
||||
test_dir="${{ matrix.test-dir.path }}"
|
||||
if [ "$test_dir" = "tests/mcp" ]; then
|
||||
wait_for_service "http://localhost:8090/health" "MCP server"
|
||||
else
|
||||
echo "Skipping MCP server wait for non-MCP suite: $test_dir"
|
||||
fi
|
||||
echo "Finished waiting for services."
|
||||
|
||||
- name: Start Mock Services
|
||||
run: |
|
||||
@@ -281,7 +293,7 @@ jobs:
|
||||
-p mock-it-services-stack up -d
|
||||
|
||||
- name: Run Integration Tests for ${{ matrix.test-dir.name }}
|
||||
uses: nick-fields/retry@v3
|
||||
uses: nick-fields/retry@ce71cc2ab81d554ebbe88c79ab5975992d79ba08 # ratchet:nick-fields/retry@v3
|
||||
with:
|
||||
timeout_minutes: 20
|
||||
max_attempts: 3
|
||||
@@ -301,7 +313,10 @@ jobs:
|
||||
-e VESPA_HOST=index \
|
||||
-e REDIS_HOST=cache \
|
||||
-e API_SERVER_HOST=api_server \
|
||||
-e MCP_SERVER_HOST=mcp_server \
|
||||
-e MCP_SERVER_PORT=8090 \
|
||||
-e OPENAI_API_KEY=${OPENAI_API_KEY} \
|
||||
-e EXA_API_KEY=${EXA_API_KEY} \
|
||||
-e SLACK_BOT_TOKEN=${SLACK_BOT_TOKEN} \
|
||||
-e CONFLUENCE_TEST_SPACE_URL=${CONFLUENCE_TEST_SPACE_URL} \
|
||||
-e CONFLUENCE_USER_NAME=${CONFLUENCE_USER_NAME} \
|
||||
@@ -318,7 +333,7 @@ jobs:
|
||||
-e TEST_WEB_HOSTNAME=test-runner \
|
||||
-e MOCK_CONNECTOR_SERVER_HOST=mock_connector_server \
|
||||
-e MOCK_CONNECTOR_SERVER_PORT=8001 \
|
||||
onyxdotapp/onyx-integration:test \
|
||||
${{ env.RUNS_ON_ECR_CACHE }}:integration-test-${{ github.run_id }} \
|
||||
/app/tests/integration/${{ matrix.test-dir.path }}
|
||||
|
||||
# ------------------------------------------------------------
|
||||
@@ -337,18 +352,12 @@ jobs:
|
||||
|
||||
- name: Upload logs
|
||||
if: always()
|
||||
uses: actions/upload-artifact@v4
|
||||
uses: actions/upload-artifact@330a01c490aca151604b8cf639adc76d48f6c5d4 # ratchet:actions/upload-artifact@v4
|
||||
with:
|
||||
name: docker-all-logs-${{ matrix.test-dir.name }}
|
||||
path: ${{ github.workspace }}/docker-compose.log
|
||||
# ------------------------------------------------------------
|
||||
|
||||
- name: Stop Docker containers
|
||||
if: always()
|
||||
run: |
|
||||
cd deployment/docker_compose
|
||||
docker compose down -v
|
||||
|
||||
|
||||
multitenant-tests:
|
||||
needs:
|
||||
@@ -357,36 +366,26 @@ jobs:
|
||||
build-model-server-image,
|
||||
build-integration-image,
|
||||
]
|
||||
runs-on: blacksmith-8vcpu-ubuntu-2404-arm
|
||||
runs-on: [runs-on, runner=8cpu-linux-arm64, "run-id=${{ github.run_id }}-multitenant-tests", "extras=ecr-cache"]
|
||||
timeout-minutes: 45
|
||||
|
||||
steps:
|
||||
- uses: runs-on/action@cd2b598b0515d39d78c38a02d529db87d2196d1e # ratchet:runs-on/action@v2
|
||||
- name: Checkout code
|
||||
uses: actions/checkout@v4
|
||||
|
||||
- name: Login to Private Registry
|
||||
uses: docker/login-action@v3
|
||||
uses: actions/checkout@1af3b93b6815bc44a9784bd300feb67ff0d1eeb3 # ratchet:actions/checkout@v6
|
||||
with:
|
||||
registry: ${{ env.PRIVATE_REGISTRY }}
|
||||
username: ${{ env.PRIVATE_REGISTRY_USERNAME }}
|
||||
password: ${{ env.PRIVATE_REGISTRY_PASSWORD }}
|
||||
persist-credentials: false
|
||||
|
||||
- name: Login to Docker Hub
|
||||
uses: docker/login-action@v3
|
||||
uses: docker/login-action@5e57cd118135c172c3672efd75eb46360885c0ef # ratchet:docker/login-action@v3
|
||||
with:
|
||||
username: ${{ secrets.DOCKER_USERNAME }}
|
||||
password: ${{ secrets.DOCKER_TOKEN }}
|
||||
|
||||
- name: Pull Docker images
|
||||
run: |
|
||||
(docker pull --platform linux/arm64 ${{ env.PRIVATE_REGISTRY }}/integration-test-onyx-backend:test-${{ github.run_id }}) &
|
||||
(docker pull --platform linux/arm64 ${{ env.PRIVATE_REGISTRY }}/integration-test-onyx-model-server:test-${{ github.run_id }}) &
|
||||
(docker pull --platform linux/arm64 ${{ env.PRIVATE_REGISTRY }}/integration-test-onyx-integration:test-${{ github.run_id }}) &
|
||||
wait
|
||||
docker tag ${{ env.PRIVATE_REGISTRY }}/integration-test-onyx-backend:test-${{ github.run_id }} onyxdotapp/onyx-backend:test
|
||||
docker tag ${{ env.PRIVATE_REGISTRY }}/integration-test-onyx-model-server:test-${{ github.run_id }} onyxdotapp/onyx-model-server:test
|
||||
docker tag ${{ env.PRIVATE_REGISTRY }}/integration-test-onyx-integration:test-${{ github.run_id }} onyxdotapp/onyx-integration:test
|
||||
|
||||
- name: Start Docker containers for multi-tenant tests
|
||||
env:
|
||||
ECR_CACHE: ${{ env.RUNS_ON_ECR_CACHE }}
|
||||
RUN_ID: ${{ github.run_id }}
|
||||
run: |
|
||||
cd deployment/docker_compose
|
||||
ENABLE_PAID_ENTERPRISE_EDITION_FEATURES=true \
|
||||
@@ -394,8 +393,10 @@ jobs:
|
||||
AUTH_TYPE=cloud \
|
||||
REQUIRE_EMAIL_VERIFICATION=false \
|
||||
DISABLE_TELEMETRY=true \
|
||||
IMAGE_TAG=test \
|
||||
ONYX_BACKEND_IMAGE=${ECR_CACHE}:integration-test-backend-test-${RUN_ID} \
|
||||
ONYX_MODEL_SERVER_IMAGE=${ECR_CACHE}:integration-test-model-server-test-${RUN_ID} \
|
||||
DEV_MODE=true \
|
||||
MCP_SERVER_ENABLED=true \
|
||||
docker compose -f docker-compose.multitenant-dev.yml up \
|
||||
relational_db \
|
||||
index \
|
||||
@@ -404,6 +405,7 @@ jobs:
|
||||
api_server \
|
||||
inference_model_server \
|
||||
indexing_model_server \
|
||||
mcp_server \
|
||||
background \
|
||||
-d
|
||||
id: start_docker_multi_tenant
|
||||
@@ -435,6 +437,9 @@ jobs:
|
||||
echo "Finished waiting for service."
|
||||
|
||||
- name: Run Multi-Tenant Integration Tests
|
||||
env:
|
||||
ECR_CACHE: ${{ env.RUNS_ON_ECR_CACHE }}
|
||||
RUN_ID: ${{ github.run_id }}
|
||||
run: |
|
||||
echo "Running multi-tenant integration tests..."
|
||||
docker run --rm --network onyx_default \
|
||||
@@ -449,7 +454,10 @@ jobs:
|
||||
-e VESPA_HOST=index \
|
||||
-e REDIS_HOST=cache \
|
||||
-e API_SERVER_HOST=api_server \
|
||||
-e MCP_SERVER_HOST=mcp_server \
|
||||
-e MCP_SERVER_PORT=8090 \
|
||||
-e OPENAI_API_KEY=${OPENAI_API_KEY} \
|
||||
-e EXA_API_KEY=${EXA_API_KEY} \
|
||||
-e SLACK_BOT_TOKEN=${SLACK_BOT_TOKEN} \
|
||||
-e TEST_WEB_HOSTNAME=test-runner \
|
||||
-e AUTH_TYPE=cloud \
|
||||
@@ -457,9 +465,8 @@ jobs:
|
||||
-e SKIP_RESET=true \
|
||||
-e REQUIRE_EMAIL_VERIFICATION=false \
|
||||
-e DISABLE_TELEMETRY=true \
|
||||
-e IMAGE_TAG=test \
|
||||
-e DEV_MODE=true \
|
||||
onyxdotapp/onyx-integration:test \
|
||||
${ECR_CACHE}:integration-test-${RUN_ID} \
|
||||
/app/tests/integration/multitenant_tests
|
||||
|
||||
- name: Dump API server logs (multi-tenant)
|
||||
@@ -476,7 +483,7 @@ jobs:
|
||||
|
||||
- name: Upload logs (multi-tenant)
|
||||
if: always()
|
||||
uses: actions/upload-artifact@v4
|
||||
uses: actions/upload-artifact@330a01c490aca151604b8cf639adc76d48f6c5d4 # ratchet:actions/upload-artifact@v4
|
||||
with:
|
||||
name: docker-all-logs-multitenant
|
||||
path: ${{ github.workspace }}/docker-compose-multitenant.log
|
||||
@@ -487,18 +494,13 @@ jobs:
|
||||
cd deployment/docker_compose
|
||||
docker compose -f docker-compose.multitenant-dev.yml down -v
|
||||
|
||||
required:
|
||||
runs-on: blacksmith-2vcpu-ubuntu-2404-arm
|
||||
required:
|
||||
# NOTE: Github-hosted runners have about 20s faster queue times and are preferred here.
|
||||
runs-on: ubuntu-slim
|
||||
timeout-minutes: 45
|
||||
needs: [integration-tests, multitenant-tests]
|
||||
if: ${{ always() }}
|
||||
steps:
|
||||
- uses: actions/github-script@v7
|
||||
with:
|
||||
script: |
|
||||
const needs = ${{ toJSON(needs) }};
|
||||
const failed = Object.values(needs).some(n => n.result !== 'success');
|
||||
if (failed) {
|
||||
core.setFailed('One or more upstream jobs failed or were cancelled.');
|
||||
} else {
|
||||
core.notice('All required jobs succeeded.');
|
||||
}
|
||||
- name: Check job status
|
||||
if: ${{ contains(needs.*.result, 'failure') || contains(needs.*.result, 'cancelled') || contains(needs.*.result, 'skipped') }}
|
||||
run: exit 1
|
||||
|
||||
14
.github/workflows/pr-jest-tests.yml
vendored
14
.github/workflows/pr-jest-tests.yml
vendored
@@ -5,18 +5,26 @@ concurrency:
|
||||
|
||||
on: push
|
||||
|
||||
permissions:
|
||||
contents: read
|
||||
|
||||
jobs:
|
||||
jest-tests:
|
||||
name: Jest Tests
|
||||
runs-on: ubuntu-latest
|
||||
timeout-minutes: 45
|
||||
steps:
|
||||
- name: Checkout code
|
||||
uses: actions/checkout@v4
|
||||
uses: actions/checkout@1af3b93b6815bc44a9784bd300feb67ff0d1eeb3 # ratchet:actions/checkout@v6
|
||||
with:
|
||||
persist-credentials: false
|
||||
|
||||
- name: Setup node
|
||||
uses: actions/setup-node@v4
|
||||
uses: actions/setup-node@2028fbc5c25fe9cf00d9f06a71cc4710d4507903 # ratchet:actions/setup-node@v4
|
||||
with:
|
||||
node-version: 22
|
||||
cache: 'npm'
|
||||
cache-dependency-path: ./web/package-lock.json
|
||||
|
||||
- name: Install node dependencies
|
||||
working-directory: ./web
|
||||
@@ -28,7 +36,7 @@ jobs:
|
||||
|
||||
- name: Upload coverage reports
|
||||
if: always()
|
||||
uses: actions/upload-artifact@v4
|
||||
uses: actions/upload-artifact@330a01c490aca151604b8cf639adc76d48f6c5d4 # ratchet:actions/upload-artifact@v4
|
||||
with:
|
||||
name: jest-coverage-${{ github.run_id }}
|
||||
path: ./web/coverage
|
||||
|
||||
6
.github/workflows/pr-labeler.yml
vendored
6
.github/workflows/pr-labeler.yml
vendored
@@ -1,7 +1,7 @@
|
||||
name: PR Labeler
|
||||
|
||||
on:
|
||||
pull_request_target:
|
||||
pull_request:
|
||||
branches:
|
||||
- main
|
||||
types:
|
||||
@@ -12,11 +12,11 @@ on:
|
||||
|
||||
permissions:
|
||||
contents: read
|
||||
pull-requests: write
|
||||
|
||||
jobs:
|
||||
validate_pr_title:
|
||||
runs-on: ubuntu-latest
|
||||
timeout-minutes: 45
|
||||
steps:
|
||||
- name: Check PR title for Conventional Commits
|
||||
env:
|
||||
@@ -27,7 +27,7 @@ jobs:
|
||||
echo "::error::❌ Your PR title does not follow the Conventional Commits format.
|
||||
This check ensures that all pull requests use clear, consistent titles that help automate changelogs and improve project history.
|
||||
|
||||
Please update your PR title to follow the Conventional Commits style.
|
||||
Please update your PR title to follow the Conventional Commits style.
|
||||
Here is a link to a blog explaining the reason why we've included the Conventional Commits style into our PR titles: https://xfuture-blog.com/working-with-conventional-commits
|
||||
|
||||
**Here are some examples of valid PR titles:**
|
||||
|
||||
7
.github/workflows/pr-linear-check.yml
vendored
7
.github/workflows/pr-linear-check.yml
vendored
@@ -1,12 +1,19 @@
|
||||
name: Ensure PR references Linear
|
||||
concurrency:
|
||||
group: Ensure-PR-references-Linear-${{ github.workflow }}-${{ github.head_ref || github.event.workflow_run.head_branch || github.run_id }}
|
||||
cancel-in-progress: true
|
||||
|
||||
on:
|
||||
pull_request:
|
||||
types: [opened, edited, reopened, synchronize]
|
||||
|
||||
permissions:
|
||||
contents: read
|
||||
|
||||
jobs:
|
||||
linear-check:
|
||||
runs-on: ubuntu-latest
|
||||
timeout-minutes: 45
|
||||
steps:
|
||||
- name: Check PR body for Linear link or override
|
||||
env:
|
||||
|
||||
296
.github/workflows/pr-mit-integration-tests.yml
vendored
296
.github/workflows/pr-mit-integration-tests.yml
vendored
@@ -7,15 +7,14 @@ on:
|
||||
merge_group:
|
||||
types: [checks_requested]
|
||||
|
||||
env:
|
||||
# Private Registry Configuration
|
||||
PRIVATE_REGISTRY: experimental-registry.blacksmith.sh:5000
|
||||
PRIVATE_REGISTRY_USERNAME: ${{ secrets.PRIVATE_REGISTRY_USERNAME }}
|
||||
PRIVATE_REGISTRY_PASSWORD: ${{ secrets.PRIVATE_REGISTRY_PASSWORD }}
|
||||
permissions:
|
||||
contents: read
|
||||
|
||||
env:
|
||||
# Test Environment Variables
|
||||
OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
|
||||
SLACK_BOT_TOKEN: ${{ secrets.SLACK_BOT_TOKEN }}
|
||||
EXA_API_KEY: ${{ secrets.EXA_API_KEY }}
|
||||
CONFLUENCE_TEST_SPACE_URL: ${{ vars.CONFLUENCE_TEST_SPACE_URL }}
|
||||
CONFLUENCE_USER_NAME: ${{ vars.CONFLUENCE_USER_NAME }}
|
||||
CONFLUENCE_ACCESS_TOKEN: ${{ secrets.CONFLUENCE_ACCESS_TOKEN }}
|
||||
@@ -31,12 +30,16 @@ env:
|
||||
|
||||
jobs:
|
||||
discover-test-dirs:
|
||||
runs-on: blacksmith-2vcpu-ubuntu-2404-arm
|
||||
# NOTE: Github-hosted runners have about 20s faster queue times and are preferred here.
|
||||
runs-on: ubuntu-slim
|
||||
timeout-minutes: 45
|
||||
outputs:
|
||||
test-dirs: ${{ steps.set-matrix.outputs.test-dirs }}
|
||||
steps:
|
||||
- name: Checkout code
|
||||
uses: actions/checkout@v4
|
||||
uses: actions/checkout@1af3b93b6815bc44a9784bd300feb67ff0d1eeb3 # ratchet:actions/checkout@v6
|
||||
with:
|
||||
persist-credentials: false
|
||||
|
||||
- name: Discover test directories
|
||||
id: set-matrix
|
||||
@@ -58,106 +61,109 @@ jobs:
|
||||
all_dirs="[${all_dirs%,}]"
|
||||
echo "test-dirs=$all_dirs" >> $GITHUB_OUTPUT
|
||||
|
||||
prepare-build:
|
||||
runs-on: blacksmith-2vcpu-ubuntu-2404-arm
|
||||
steps:
|
||||
- name: Checkout code
|
||||
uses: actions/checkout@v4
|
||||
|
||||
- name: Prepare build
|
||||
uses: ./.github/actions/prepare-build
|
||||
|
||||
build-backend-image:
|
||||
runs-on: blacksmith-16vcpu-ubuntu-2404-arm
|
||||
runs-on: [runs-on, runner=1cpu-linux-arm64, "run-id=${{ github.run_id }}-build-backend-image", "extras=ecr-cache"]
|
||||
timeout-minutes: 45
|
||||
steps:
|
||||
- uses: runs-on/action@cd2b598b0515d39d78c38a02d529db87d2196d1e # ratchet:runs-on/action@v2
|
||||
- name: Checkout code
|
||||
uses: actions/checkout@v4
|
||||
|
||||
- name: Login to Private Registry
|
||||
uses: docker/login-action@v3
|
||||
uses: actions/checkout@1af3b93b6815bc44a9784bd300feb67ff0d1eeb3 # ratchet:actions/checkout@v6
|
||||
with:
|
||||
registry: ${{ env.PRIVATE_REGISTRY }}
|
||||
username: ${{ env.PRIVATE_REGISTRY_USERNAME }}
|
||||
password: ${{ env.PRIVATE_REGISTRY_PASSWORD }}
|
||||
persist-credentials: false
|
||||
|
||||
- name: Set up Docker Buildx
|
||||
uses: useblacksmith/setup-docker-builder@v1
|
||||
uses: docker/setup-buildx-action@e468171a9de216ec08956ac3ada2f0791b6bd435 # ratchet:docker/setup-buildx-action@v3
|
||||
|
||||
# needed for pulling Vespa, Redis, Postgres, and Minio images
|
||||
# otherwise, we hit the "Unauthenticated users" limit
|
||||
# https://docs.docker.com/docker-hub/usage/
|
||||
- name: Login to Docker Hub
|
||||
uses: docker/login-action@5e57cd118135c172c3672efd75eb46360885c0ef # ratchet:docker/login-action@v3
|
||||
with:
|
||||
username: ${{ secrets.DOCKER_USERNAME }}
|
||||
password: ${{ secrets.DOCKER_TOKEN }}
|
||||
|
||||
- name: Build and push Backend Docker image
|
||||
uses: useblacksmith/build-push-action@v2
|
||||
uses: docker/build-push-action@263435318d21b8e681c14492fe198d362a7d2c83 # ratchet:docker/build-push-action@v6
|
||||
with:
|
||||
context: ./backend
|
||||
file: ./backend/Dockerfile
|
||||
platforms: linux/arm64
|
||||
tags: ${{ env.PRIVATE_REGISTRY }}/integration-test-onyx-backend:test-${{ github.run_id }}
|
||||
push: true
|
||||
outputs: type=registry
|
||||
tags: ${{ env.RUNS_ON_ECR_CACHE }}:integration-test-backend-test-${{ github.run_id }}
|
||||
cache-from: |
|
||||
type=registry,ref=${{ env.RUNS_ON_ECR_CACHE }}:backend-cache
|
||||
type=registry,ref=onyxdotapp/onyx-backend:latest
|
||||
cache-to: type=registry,ref=${{ env.RUNS_ON_ECR_CACHE }}:backend-cache,mode=max
|
||||
no-cache: ${{ vars.DOCKER_NO_CACHE == 'true' }}
|
||||
|
||||
|
||||
build-model-server-image:
|
||||
runs-on: blacksmith-16vcpu-ubuntu-2404-arm
|
||||
runs-on: [runs-on, runner=1cpu-linux-arm64, "run-id=${{ github.run_id }}-build-model-server-image", "extras=ecr-cache"]
|
||||
timeout-minutes: 45
|
||||
steps:
|
||||
- uses: runs-on/action@cd2b598b0515d39d78c38a02d529db87d2196d1e # ratchet:runs-on/action@v2
|
||||
- name: Checkout code
|
||||
uses: actions/checkout@v4
|
||||
|
||||
- name: Login to Private Registry
|
||||
uses: docker/login-action@v3
|
||||
uses: actions/checkout@1af3b93b6815bc44a9784bd300feb67ff0d1eeb3 # ratchet:actions/checkout@v6
|
||||
with:
|
||||
registry: ${{ env.PRIVATE_REGISTRY }}
|
||||
username: ${{ env.PRIVATE_REGISTRY_USERNAME }}
|
||||
password: ${{ env.PRIVATE_REGISTRY_PASSWORD }}
|
||||
persist-credentials: false
|
||||
|
||||
- name: Set up Docker Buildx
|
||||
uses: useblacksmith/setup-docker-builder@v1
|
||||
uses: docker/setup-buildx-action@e468171a9de216ec08956ac3ada2f0791b6bd435 # ratchet:docker/setup-buildx-action@v3
|
||||
|
||||
# needed for pulling Vespa, Redis, Postgres, and Minio images
|
||||
# otherwise, we hit the "Unauthenticated users" limit
|
||||
# https://docs.docker.com/docker-hub/usage/
|
||||
- name: Login to Docker Hub
|
||||
uses: docker/login-action@5e57cd118135c172c3672efd75eb46360885c0ef # ratchet:docker/login-action@v3
|
||||
with:
|
||||
username: ${{ secrets.DOCKER_USERNAME }}
|
||||
password: ${{ secrets.DOCKER_TOKEN }}
|
||||
|
||||
- name: Build and push Model Server Docker image
|
||||
uses: useblacksmith/build-push-action@v2
|
||||
uses: docker/build-push-action@263435318d21b8e681c14492fe198d362a7d2c83 # ratchet:docker/build-push-action@v6
|
||||
with:
|
||||
context: ./backend
|
||||
file: ./backend/Dockerfile.model_server
|
||||
platforms: linux/arm64
|
||||
tags: ${{ env.PRIVATE_REGISTRY }}/integration-test-onyx-model-server:test-${{ github.run_id }}
|
||||
push: true
|
||||
outputs: type=registry
|
||||
provenance: false
|
||||
no-cache: ${{ vars.DOCKER_NO_CACHE == 'true' }}
|
||||
|
||||
tags: ${{ env.RUNS_ON_ECR_CACHE }}:integration-test-model-server-test-${{ github.run_id }}
|
||||
cache-from: |
|
||||
type=registry,ref=${{ env.RUNS_ON_ECR_CACHE }}:model-server-cache
|
||||
type=registry,ref=onyxdotapp/onyx-model-server:latest
|
||||
cache-to: type=registry,ref=${{ env.RUNS_ON_ECR_CACHE }}:model-server-cache,mode=max
|
||||
|
||||
build-integration-image:
|
||||
needs: prepare-build
|
||||
runs-on: blacksmith-16vcpu-ubuntu-2404-arm
|
||||
runs-on: [runs-on, runner=2cpu-linux-arm64, "run-id=${{ github.run_id }}-build-integration-image", "extras=ecr-cache"]
|
||||
timeout-minutes: 45
|
||||
steps:
|
||||
- uses: runs-on/action@cd2b598b0515d39d78c38a02d529db87d2196d1e # ratchet:runs-on/action@v2
|
||||
- name: Checkout code
|
||||
uses: actions/checkout@v4
|
||||
|
||||
- name: Login to Private Registry
|
||||
uses: docker/login-action@v3
|
||||
uses: actions/checkout@1af3b93b6815bc44a9784bd300feb67ff0d1eeb3 # ratchet:actions/checkout@v6
|
||||
with:
|
||||
registry: ${{ env.PRIVATE_REGISTRY }}
|
||||
username: ${{ env.PRIVATE_REGISTRY_USERNAME }}
|
||||
password: ${{ env.PRIVATE_REGISTRY_PASSWORD }}
|
||||
|
||||
- name: Download OpenAPI artifacts
|
||||
uses: actions/download-artifact@v4
|
||||
with:
|
||||
name: openapi-artifacts
|
||||
path: backend/generated/
|
||||
persist-credentials: false
|
||||
|
||||
- name: Set up Docker Buildx
|
||||
uses: useblacksmith/setup-docker-builder@v1
|
||||
uses: docker/setup-buildx-action@e468171a9de216ec08956ac3ada2f0791b6bd435 # ratchet:docker/setup-buildx-action@v3
|
||||
|
||||
- name: Build and push integration test Docker image
|
||||
uses: useblacksmith/build-push-action@v2
|
||||
# needed for pulling openapitools/openapi-generator-cli
|
||||
# otherwise, we hit the "Unauthenticated users" limit
|
||||
# https://docs.docker.com/docker-hub/usage/
|
||||
- name: Login to Docker Hub
|
||||
uses: docker/login-action@5e57cd118135c172c3672efd75eb46360885c0ef # ratchet:docker/login-action@v3
|
||||
with:
|
||||
context: ./backend
|
||||
file: ./backend/tests/integration/Dockerfile
|
||||
platforms: linux/arm64
|
||||
tags: ${{ env.PRIVATE_REGISTRY }}/integration-test-onyx-integration:test-${{ github.run_id }}
|
||||
push: true
|
||||
outputs: type=registry
|
||||
no-cache: ${{ vars.DOCKER_NO_CACHE == 'true' }}
|
||||
username: ${{ secrets.DOCKER_USERNAME }}
|
||||
password: ${{ secrets.DOCKER_TOKEN }}
|
||||
|
||||
- name: Build and push integration test image with Docker Bake
|
||||
env:
|
||||
INTEGRATION_REPOSITORY: ${{ env.RUNS_ON_ECR_CACHE }}
|
||||
TAG: integration-test-${{ github.run_id }}
|
||||
run: |
|
||||
cd backend && docker buildx bake --push \
|
||||
--set backend.cache-from=type=registry,ref=${RUNS_ON_ECR_CACHE}:backend-cache \
|
||||
--set backend.cache-from=type=registry,ref=onyxdotapp/onyx-backend:latest \
|
||||
--set backend.cache-to=type=registry,ref=${RUNS_ON_ECR_CACHE}:backend-cache,mode=max \
|
||||
--set integration.cache-from=type=registry,ref=${RUNS_ON_ECR_CACHE}:integration-cache \
|
||||
--set integration.cache-to=type=registry,ref=${RUNS_ON_ECR_CACHE}:integration-cache,mode=max \
|
||||
integration
|
||||
|
||||
integration-tests-mit:
|
||||
needs:
|
||||
@@ -167,8 +173,12 @@ jobs:
|
||||
build-model-server-image,
|
||||
build-integration-image,
|
||||
]
|
||||
# See https://docs.blacksmith.sh/blacksmith-runners/overview
|
||||
runs-on: blacksmith-8vcpu-ubuntu-2404-arm
|
||||
runs-on:
|
||||
- runs-on
|
||||
- runner=4cpu-linux-arm64
|
||||
- ${{ format('run-id={0}-integration-tests-mit-job-{1}', github.run_id, strategy['job-index']) }}
|
||||
- extras=ecr-cache
|
||||
timeout-minutes: 45
|
||||
|
||||
strategy:
|
||||
fail-fast: false
|
||||
@@ -176,46 +186,27 @@ jobs:
|
||||
test-dir: ${{ fromJson(needs.discover-test-dirs.outputs.test-dirs) }}
|
||||
|
||||
steps:
|
||||
- uses: runs-on/action@cd2b598b0515d39d78c38a02d529db87d2196d1e # ratchet:runs-on/action@v2
|
||||
- name: Checkout code
|
||||
uses: actions/checkout@v4
|
||||
|
||||
- name: Login to Private Registry
|
||||
uses: docker/login-action@v3
|
||||
uses: actions/checkout@1af3b93b6815bc44a9784bd300feb67ff0d1eeb3 # ratchet:actions/checkout@v6
|
||||
with:
|
||||
registry: ${{ env.PRIVATE_REGISTRY }}
|
||||
username: ${{ env.PRIVATE_REGISTRY_USERNAME }}
|
||||
password: ${{ env.PRIVATE_REGISTRY_PASSWORD }}
|
||||
persist-credentials: false
|
||||
|
||||
# needed for pulling Vespa, Redis, Postgres, and Minio images
|
||||
# otherwise, we hit the "Unauthenticated users" limit
|
||||
# https://docs.docker.com/docker-hub/usage/
|
||||
- name: Login to Docker Hub
|
||||
uses: docker/login-action@v3
|
||||
uses: docker/login-action@5e57cd118135c172c3672efd75eb46360885c0ef # ratchet:docker/login-action@v3
|
||||
with:
|
||||
username: ${{ secrets.DOCKER_USERNAME }}
|
||||
password: ${{ secrets.DOCKER_TOKEN }}
|
||||
|
||||
- name: Pull Docker images
|
||||
run: |
|
||||
# Pull all images from registry in parallel
|
||||
echo "Pulling Docker images in parallel..."
|
||||
# Pull images from private registry
|
||||
(docker pull --platform linux/arm64 ${{ env.PRIVATE_REGISTRY }}/integration-test-onyx-backend:test-${{ github.run_id }}) &
|
||||
(docker pull --platform linux/arm64 ${{ env.PRIVATE_REGISTRY }}/integration-test-onyx-model-server:test-${{ github.run_id }}) &
|
||||
(docker pull --platform linux/arm64 ${{ env.PRIVATE_REGISTRY }}/integration-test-onyx-integration:test-${{ github.run_id }}) &
|
||||
|
||||
# Wait for all background jobs to complete
|
||||
wait
|
||||
echo "All Docker images pulled successfully"
|
||||
|
||||
# Re-tag to remove registry prefix for docker-compose
|
||||
docker tag ${{ env.PRIVATE_REGISTRY }}/integration-test-onyx-backend:test-${{ github.run_id }} onyxdotapp/onyx-backend:test
|
||||
docker tag ${{ env.PRIVATE_REGISTRY }}/integration-test-onyx-model-server:test-${{ github.run_id }} onyxdotapp/onyx-model-server:test
|
||||
docker tag ${{ env.PRIVATE_REGISTRY }}/integration-test-onyx-integration:test-${{ github.run_id }} onyxdotapp/onyx-integration:test
|
||||
|
||||
# NOTE: Use pre-ping/null pool to reduce flakiness due to dropped connections
|
||||
# NOTE: don't need web server for integration tests
|
||||
- name: Start Docker containers
|
||||
env:
|
||||
ECR_CACHE: ${{ env.RUNS_ON_ECR_CACHE }}
|
||||
RUN_ID: ${{ github.run_id }}
|
||||
run: |
|
||||
cd deployment/docker_compose
|
||||
AUTH_TYPE=basic \
|
||||
@@ -223,8 +214,10 @@ jobs:
|
||||
POSTGRES_USE_NULL_POOL=true \
|
||||
REQUIRE_EMAIL_VERIFICATION=false \
|
||||
DISABLE_TELEMETRY=true \
|
||||
IMAGE_TAG=test \
|
||||
ONYX_BACKEND_IMAGE=${ECR_CACHE}:integration-test-backend-test-${RUN_ID} \
|
||||
ONYX_MODEL_SERVER_IMAGE=${ECR_CACHE}:integration-test-model-server-test-${RUN_ID} \
|
||||
INTEGRATION_TESTS_MODE=true \
|
||||
MCP_SERVER_ENABLED=true \
|
||||
docker compose -f docker-compose.yml -f docker-compose.dev.yml up \
|
||||
relational_db \
|
||||
index \
|
||||
@@ -233,43 +226,56 @@ jobs:
|
||||
api_server \
|
||||
inference_model_server \
|
||||
indexing_model_server \
|
||||
mcp_server \
|
||||
background \
|
||||
-d
|
||||
id: start_docker
|
||||
|
||||
- name: Wait for service to be ready
|
||||
- name: Wait for services to be ready
|
||||
run: |
|
||||
echo "Starting wait-for-service script..."
|
||||
|
||||
docker logs -f onyx-api_server-1 &
|
||||
wait_for_service() {
|
||||
local url=$1
|
||||
local label=$2
|
||||
local timeout=${3:-300} # default 5 minutes
|
||||
local start_time
|
||||
start_time=$(date +%s)
|
||||
|
||||
start_time=$(date +%s)
|
||||
timeout=300 # 5 minutes in seconds
|
||||
while true; do
|
||||
local current_time
|
||||
current_time=$(date +%s)
|
||||
local elapsed_time=$((current_time - start_time))
|
||||
|
||||
while true; do
|
||||
current_time=$(date +%s)
|
||||
elapsed_time=$((current_time - start_time))
|
||||
|
||||
if [ $elapsed_time -ge $timeout ]; then
|
||||
echo "Timeout reached. Service did not become ready in 5 minutes."
|
||||
exit 1
|
||||
fi
|
||||
|
||||
# Use curl with error handling to ignore specific exit code 56
|
||||
response=$(curl -s -o /dev/null -w "%{http_code}" http://localhost:8080/health || echo "curl_error")
|
||||
|
||||
if [ "$response" = "200" ]; then
|
||||
echo "Service is ready!"
|
||||
break
|
||||
elif [ "$response" = "curl_error" ]; then
|
||||
echo "Curl encountered an error, possibly exit code 56. Continuing to retry..."
|
||||
else
|
||||
echo "Service not ready yet (HTTP status $response). Retrying in 5 seconds..."
|
||||
fi
|
||||
|
||||
sleep 5
|
||||
done
|
||||
echo "Finished waiting for service."
|
||||
if [ $elapsed_time -ge $timeout ]; then
|
||||
echo "Timeout reached. ${label} did not become ready in $timeout seconds."
|
||||
exit 1
|
||||
fi
|
||||
|
||||
local response
|
||||
response=$(curl -s -o /dev/null -w "%{http_code}" "$url" || echo "curl_error")
|
||||
|
||||
if [ "$response" = "200" ]; then
|
||||
echo "${label} is ready!"
|
||||
break
|
||||
elif [ "$response" = "curl_error" ]; then
|
||||
echo "Curl encountered an error while checking ${label}. Retrying in 5 seconds..."
|
||||
else
|
||||
echo "${label} not ready yet (HTTP status $response). Retrying in 5 seconds..."
|
||||
fi
|
||||
|
||||
sleep 5
|
||||
done
|
||||
}
|
||||
|
||||
wait_for_service "http://localhost:8080/health" "API server"
|
||||
test_dir="${{ matrix.test-dir.path }}"
|
||||
if [ "$test_dir" = "tests/mcp" ]; then
|
||||
wait_for_service "http://localhost:8090/health" "MCP server"
|
||||
else
|
||||
echo "Skipping MCP server wait for non-MCP suite: $test_dir"
|
||||
fi
|
||||
echo "Finished waiting for services."
|
||||
|
||||
- name: Start Mock Services
|
||||
run: |
|
||||
@@ -279,7 +285,7 @@ jobs:
|
||||
|
||||
# NOTE: Use pre-ping/null to reduce flakiness due to dropped connections
|
||||
- name: Run Integration Tests for ${{ matrix.test-dir.name }}
|
||||
uses: nick-fields/retry@v3
|
||||
uses: nick-fields/retry@ce71cc2ab81d554ebbe88c79ab5975992d79ba08 # ratchet:nick-fields/retry@v3
|
||||
with:
|
||||
timeout_minutes: 20
|
||||
max_attempts: 3
|
||||
@@ -299,7 +305,10 @@ jobs:
|
||||
-e VESPA_HOST=index \
|
||||
-e REDIS_HOST=cache \
|
||||
-e API_SERVER_HOST=api_server \
|
||||
-e MCP_SERVER_HOST=mcp_server \
|
||||
-e MCP_SERVER_PORT=8090 \
|
||||
-e OPENAI_API_KEY=${OPENAI_API_KEY} \
|
||||
-e EXA_API_KEY=${EXA_API_KEY} \
|
||||
-e SLACK_BOT_TOKEN=${SLACK_BOT_TOKEN} \
|
||||
-e CONFLUENCE_TEST_SPACE_URL=${CONFLUENCE_TEST_SPACE_URL} \
|
||||
-e CONFLUENCE_USER_NAME=${CONFLUENCE_USER_NAME} \
|
||||
@@ -316,7 +325,7 @@ jobs:
|
||||
-e TEST_WEB_HOSTNAME=test-runner \
|
||||
-e MOCK_CONNECTOR_SERVER_HOST=mock_connector_server \
|
||||
-e MOCK_CONNECTOR_SERVER_PORT=8001 \
|
||||
onyxdotapp/onyx-integration:test \
|
||||
${{ env.RUNS_ON_ECR_CACHE }}:integration-test-${{ github.run_id }} \
|
||||
/app/tests/integration/${{ matrix.test-dir.path }}
|
||||
|
||||
# ------------------------------------------------------------
|
||||
@@ -335,31 +344,20 @@ jobs:
|
||||
|
||||
- name: Upload logs
|
||||
if: always()
|
||||
uses: actions/upload-artifact@v4
|
||||
uses: actions/upload-artifact@330a01c490aca151604b8cf639adc76d48f6c5d4 # ratchet:actions/upload-artifact@v4
|
||||
with:
|
||||
name: docker-all-logs-${{ matrix.test-dir.name }}
|
||||
path: ${{ github.workspace }}/docker-compose.log
|
||||
# ------------------------------------------------------------
|
||||
|
||||
- name: Stop Docker containers
|
||||
if: always()
|
||||
run: |
|
||||
cd deployment/docker_compose
|
||||
docker compose down -v
|
||||
|
||||
|
||||
required:
|
||||
runs-on: blacksmith-2vcpu-ubuntu-2404-arm
|
||||
required:
|
||||
# NOTE: Github-hosted runners have about 20s faster queue times and are preferred here.
|
||||
runs-on: ubuntu-slim
|
||||
timeout-minutes: 45
|
||||
needs: [integration-tests-mit]
|
||||
if: ${{ always() }}
|
||||
steps:
|
||||
- uses: actions/github-script@v7
|
||||
with:
|
||||
script: |
|
||||
const needs = ${{ toJSON(needs) }};
|
||||
const failed = Object.values(needs).some(n => n.result !== 'success');
|
||||
if (failed) {
|
||||
core.setFailed('One or more upstream jobs failed or were cancelled.');
|
||||
} else {
|
||||
core.notice('All required jobs succeeded.');
|
||||
}
|
||||
- name: Check job status
|
||||
if: ${{ contains(needs.*.result, 'failure') || contains(needs.*.result, 'cancelled') || contains(needs.*.result, 'skipped') }}
|
||||
run: exit 1
|
||||
|
||||
441
.github/workflows/pr-playwright-tests.yml
vendored
441
.github/workflows/pr-playwright-tests.yml
vendored
@@ -5,14 +5,10 @@ concurrency:
|
||||
|
||||
on: push
|
||||
|
||||
permissions:
|
||||
contents: read
|
||||
|
||||
env:
|
||||
# AWS ECR Configuration
|
||||
AWS_REGION: ${{ secrets.AWS_REGION || 'us-west-2' }}
|
||||
ECR_REGISTRY: ${{ secrets.ECR_REGISTRY }}
|
||||
AWS_ACCESS_KEY_ID: ${{ secrets.AWS_ACCESS_KEY_ID_ECR }}
|
||||
AWS_SECRET_ACCESS_KEY: ${{ secrets.AWS_SECRET_ACCESS_KEY_ECR }}
|
||||
BUILDX_NO_DEFAULT_ATTESTATIONS: 1
|
||||
|
||||
# Test Environment Variables
|
||||
OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
|
||||
SLACK_BOT_TOKEN: ${{ secrets.SLACK_BOT_TOKEN }}
|
||||
@@ -23,184 +19,222 @@ env:
|
||||
SLACK_CLIENT_ID: ${{ secrets.SLACK_CLIENT_ID }}
|
||||
SLACK_CLIENT_SECRET: ${{ secrets.SLACK_CLIENT_SECRET }}
|
||||
|
||||
# for MCP Oauth tests
|
||||
MCP_OAUTH_CLIENT_ID: ${{ secrets.MCP_OAUTH_CLIENT_ID }}
|
||||
MCP_OAUTH_CLIENT_SECRET: ${{ secrets.MCP_OAUTH_CLIENT_SECRET }}
|
||||
MCP_OAUTH_ISSUER: ${{ secrets.MCP_OAUTH_ISSUER }}
|
||||
MCP_OAUTH_JWKS_URI: ${{ secrets.MCP_OAUTH_JWKS_URI }}
|
||||
MCP_OAUTH_USERNAME: ${{ vars.MCP_OAUTH_USERNAME }}
|
||||
MCP_OAUTH_PASSWORD: ${{ secrets.MCP_OAUTH_PASSWORD }}
|
||||
|
||||
# for MCP API Key tests
|
||||
MCP_API_KEY: test-api-key-12345
|
||||
MCP_API_KEY_TEST_PORT: 8005
|
||||
MCP_API_KEY_TEST_URL: http://host.docker.internal:8005/mcp
|
||||
MCP_API_KEY_SERVER_HOST: 0.0.0.0
|
||||
MCP_API_KEY_SERVER_PUBLIC_HOST: host.docker.internal
|
||||
|
||||
MOCK_LLM_RESPONSE: true
|
||||
MCP_TEST_SERVER_PORT: 8004
|
||||
MCP_TEST_SERVER_URL: http://host.docker.internal:8004/mcp
|
||||
MCP_TEST_SERVER_PUBLIC_URL: http://host.docker.internal:8004/mcp
|
||||
MCP_TEST_SERVER_BIND_HOST: 0.0.0.0
|
||||
MCP_TEST_SERVER_PUBLIC_HOST: host.docker.internal
|
||||
MCP_SERVER_HOST: 0.0.0.0
|
||||
MCP_SERVER_PUBLIC_HOST: host.docker.internal
|
||||
MCP_SERVER_PUBLIC_URL: http://host.docker.internal:8004/mcp
|
||||
|
||||
jobs:
|
||||
build-web-image:
|
||||
runs-on: blacksmith-8vcpu-ubuntu-2404-arm
|
||||
runs-on: [runs-on, runner=4cpu-linux-arm64, "run-id=${{ github.run_id }}-build-web-image", "extras=ecr-cache"]
|
||||
timeout-minutes: 45
|
||||
steps:
|
||||
- uses: runs-on/action@cd2b598b0515d39d78c38a02d529db87d2196d1e # ratchet:runs-on/action@v2
|
||||
|
||||
- name: Checkout code
|
||||
uses: actions/checkout@v4
|
||||
|
||||
- name: Configure AWS credentials
|
||||
uses: aws-actions/configure-aws-credentials@v4
|
||||
uses: actions/checkout@1af3b93b6815bc44a9784bd300feb67ff0d1eeb3 # ratchet:actions/checkout@v6
|
||||
with:
|
||||
aws-access-key-id: ${{ env.AWS_ACCESS_KEY_ID }}
|
||||
aws-secret-access-key: ${{ env.AWS_SECRET_ACCESS_KEY }}
|
||||
aws-region: ${{ env.AWS_REGION }}
|
||||
|
||||
- name: Login to Amazon ECR
|
||||
id: login-ecr
|
||||
uses: aws-actions/amazon-ecr-login@v2
|
||||
persist-credentials: false
|
||||
|
||||
- name: Set up Docker Buildx
|
||||
uses: useblacksmith/setup-docker-builder@v1
|
||||
uses: docker/setup-buildx-action@e468171a9de216ec08956ac3ada2f0791b6bd435 # ratchet:docker/setup-buildx-action@v3
|
||||
|
||||
- name: Build and push Web Docker image
|
||||
uses: useblacksmith/build-push-action@v2
|
||||
with:
|
||||
context: ./web
|
||||
file: ./web/Dockerfile
|
||||
platforms: linux/arm64
|
||||
tags: ${{ env.ECR_REGISTRY }}/integration-test-onyx-web-server:playwright-test-${{ github.run_id }}
|
||||
provenance: false
|
||||
sbom: false
|
||||
push: true
|
||||
outputs: type=registry
|
||||
no-cache: ${{ vars.DOCKER_NO_CACHE == 'true' }}
|
||||
|
||||
build-backend-image:
|
||||
runs-on: blacksmith-8vcpu-ubuntu-2404-arm
|
||||
steps:
|
||||
- name: Checkout code
|
||||
uses: actions/checkout@v4
|
||||
|
||||
- name: Configure AWS credentials
|
||||
uses: aws-actions/configure-aws-credentials@v4
|
||||
with:
|
||||
aws-access-key-id: ${{ env.AWS_ACCESS_KEY_ID }}
|
||||
aws-secret-access-key: ${{ env.AWS_SECRET_ACCESS_KEY }}
|
||||
aws-region: ${{ env.AWS_REGION }}
|
||||
|
||||
- name: Login to Amazon ECR
|
||||
id: login-ecr
|
||||
uses: aws-actions/amazon-ecr-login@v2
|
||||
|
||||
- name: Set up Docker Buildx
|
||||
uses: useblacksmith/setup-docker-builder@v1
|
||||
|
||||
- name: Build and push Backend Docker image
|
||||
uses: useblacksmith/build-push-action@v2
|
||||
with:
|
||||
context: ./backend
|
||||
file: ./backend/Dockerfile
|
||||
platforms: linux/arm64
|
||||
tags: ${{ env.ECR_REGISTRY }}/integration-test-onyx-backend:playwright-test-${{ github.run_id }}
|
||||
provenance: false
|
||||
sbom: false
|
||||
push: true
|
||||
outputs: type=registry
|
||||
no-cache: ${{ vars.DOCKER_NO_CACHE == 'true' }}
|
||||
|
||||
build-model-server-image:
|
||||
runs-on: blacksmith-8vcpu-ubuntu-2404-arm
|
||||
steps:
|
||||
- name: Checkout code
|
||||
uses: actions/checkout@v4
|
||||
|
||||
- name: Configure AWS credentials
|
||||
uses: aws-actions/configure-aws-credentials@v4
|
||||
with:
|
||||
aws-access-key-id: ${{ env.AWS_ACCESS_KEY_ID }}
|
||||
aws-secret-access-key: ${{ env.AWS_SECRET_ACCESS_KEY }}
|
||||
aws-region: ${{ env.AWS_REGION }}
|
||||
|
||||
- name: Login to Amazon ECR
|
||||
id: login-ecr
|
||||
uses: aws-actions/amazon-ecr-login@v2
|
||||
|
||||
- name: Set up Docker Buildx
|
||||
uses: useblacksmith/setup-docker-builder@v1
|
||||
|
||||
- name: Build and push Model Server Docker image
|
||||
uses: useblacksmith/build-push-action@v2
|
||||
with:
|
||||
context: ./backend
|
||||
file: ./backend/Dockerfile.model_server
|
||||
platforms: linux/arm64
|
||||
tags: ${{ env.ECR_REGISTRY }}/integration-test-onyx-model-server:playwright-test-${{ github.run_id }}
|
||||
provenance: false
|
||||
sbom: false
|
||||
push: true
|
||||
outputs: type=registry
|
||||
no-cache: ${{ vars.DOCKER_NO_CACHE == 'true' }}
|
||||
|
||||
playwright-tests:
|
||||
needs: [build-web-image, build-backend-image, build-model-server-image]
|
||||
name: Playwright Tests
|
||||
runs-on: blacksmith-8vcpu-ubuntu-2404-arm
|
||||
steps:
|
||||
- name: Checkout code
|
||||
uses: actions/checkout@v4
|
||||
with:
|
||||
fetch-depth: 0
|
||||
|
||||
- name: Configure AWS credentials
|
||||
uses: aws-actions/configure-aws-credentials@v4
|
||||
with:
|
||||
aws-access-key-id: ${{ env.AWS_ACCESS_KEY_ID }}
|
||||
aws-secret-access-key: ${{ env.AWS_SECRET_ACCESS_KEY }}
|
||||
aws-region: ${{ env.AWS_REGION }}
|
||||
|
||||
- name: Login to Amazon ECR
|
||||
id: login-ecr
|
||||
uses: aws-actions/amazon-ecr-login@v2
|
||||
|
||||
# needed for pulling Vespa, Redis, Postgres, and Minio images
|
||||
# otherwise, we hit the "Unauthenticated users" limit
|
||||
# needed for pulling external images otherwise, we hit the "Unauthenticated users" limit
|
||||
# https://docs.docker.com/docker-hub/usage/
|
||||
- name: Login to Docker Hub
|
||||
uses: docker/login-action@v3
|
||||
uses: docker/login-action@5e57cd118135c172c3672efd75eb46360885c0ef # ratchet:docker/login-action@v3
|
||||
with:
|
||||
username: ${{ secrets.DOCKER_USERNAME }}
|
||||
password: ${{ secrets.DOCKER_TOKEN }}
|
||||
|
||||
- name: Pull Docker images
|
||||
run: |
|
||||
# Pull all images from ECR in parallel
|
||||
echo "Pulling Docker images in parallel..."
|
||||
(docker pull ${{ env.ECR_REGISTRY }}/integration-test-onyx-web-server:playwright-test-${{ github.run_id }}) &
|
||||
(docker pull ${{ env.ECR_REGISTRY }}/integration-test-onyx-backend:playwright-test-${{ github.run_id }}) &
|
||||
(docker pull ${{ env.ECR_REGISTRY }}/integration-test-onyx-model-server:playwright-test-${{ github.run_id }}) &
|
||||
- name: Build and push Web Docker image
|
||||
uses: docker/build-push-action@263435318d21b8e681c14492fe198d362a7d2c83 # ratchet:docker/build-push-action@v6
|
||||
with:
|
||||
context: ./web
|
||||
file: ./web/Dockerfile
|
||||
platforms: linux/arm64
|
||||
tags: ${{ env.RUNS_ON_ECR_CACHE }}:playwright-test-web-${{ github.run_id }}
|
||||
push: true
|
||||
cache-from: |
|
||||
type=registry,ref=${{ env.RUNS_ON_ECR_CACHE }}:web-cache
|
||||
type=registry,ref=onyxdotapp/onyx-web-server:latest
|
||||
cache-to: type=registry,ref=${{ env.RUNS_ON_ECR_CACHE }}:web-cache,mode=max
|
||||
no-cache: ${{ vars.DOCKER_NO_CACHE == 'true' }}
|
||||
|
||||
# Wait for all background jobs to complete
|
||||
wait
|
||||
echo "All Docker images pulled successfully"
|
||||
build-backend-image:
|
||||
runs-on: [runs-on, runner=1cpu-linux-arm64, "run-id=${{ github.run_id }}-build-backend-image", "extras=ecr-cache"]
|
||||
timeout-minutes: 45
|
||||
steps:
|
||||
- uses: runs-on/action@cd2b598b0515d39d78c38a02d529db87d2196d1e # ratchet:runs-on/action@v2
|
||||
|
||||
# Re-tag with expected names for docker-compose
|
||||
docker tag ${{ env.ECR_REGISTRY }}/integration-test-onyx-web-server:playwright-test-${{ github.run_id }} onyxdotapp/onyx-web-server:test
|
||||
docker tag ${{ env.ECR_REGISTRY }}/integration-test-onyx-backend:playwright-test-${{ github.run_id }} onyxdotapp/onyx-backend:test
|
||||
docker tag ${{ env.ECR_REGISTRY }}/integration-test-onyx-model-server:playwright-test-${{ github.run_id }} onyxdotapp/onyx-model-server:test
|
||||
- name: Checkout code
|
||||
uses: actions/checkout@1af3b93b6815bc44a9784bd300feb67ff0d1eeb3 # ratchet:actions/checkout@v6
|
||||
with:
|
||||
persist-credentials: false
|
||||
|
||||
- name: Set up Docker Buildx
|
||||
uses: docker/setup-buildx-action@e468171a9de216ec08956ac3ada2f0791b6bd435 # ratchet:docker/setup-buildx-action@v3
|
||||
|
||||
# needed for pulling external images otherwise, we hit the "Unauthenticated users" limit
|
||||
# https://docs.docker.com/docker-hub/usage/
|
||||
- name: Login to Docker Hub
|
||||
uses: docker/login-action@5e57cd118135c172c3672efd75eb46360885c0ef # ratchet:docker/login-action@v3
|
||||
with:
|
||||
username: ${{ secrets.DOCKER_USERNAME }}
|
||||
password: ${{ secrets.DOCKER_TOKEN }}
|
||||
|
||||
- name: Build and push Backend Docker image
|
||||
uses: docker/build-push-action@263435318d21b8e681c14492fe198d362a7d2c83 # ratchet:docker/build-push-action@v6
|
||||
with:
|
||||
context: ./backend
|
||||
file: ./backend/Dockerfile
|
||||
platforms: linux/arm64
|
||||
tags: ${{ env.RUNS_ON_ECR_CACHE }}:playwright-test-backend-${{ github.run_id }}
|
||||
push: true
|
||||
cache-from: |
|
||||
type=registry,ref=${{ env.RUNS_ON_ECR_CACHE }}:backend-cache
|
||||
type=registry,ref=onyxdotapp/onyx-backend:latest
|
||||
cache-to: |
|
||||
type=registry,ref=${{ env.RUNS_ON_ECR_CACHE }}:backend-cache,mode=max
|
||||
no-cache: ${{ vars.DOCKER_NO_CACHE == 'true' }}
|
||||
|
||||
build-model-server-image:
|
||||
runs-on: [runs-on, runner=1cpu-linux-arm64, "run-id=${{ github.run_id }}-build-model-server-image", "extras=ecr-cache"]
|
||||
timeout-minutes: 45
|
||||
steps:
|
||||
- uses: runs-on/action@cd2b598b0515d39d78c38a02d529db87d2196d1e # ratchet:runs-on/action@v2
|
||||
|
||||
- name: Checkout code
|
||||
uses: actions/checkout@1af3b93b6815bc44a9784bd300feb67ff0d1eeb3 # ratchet:actions/checkout@v6
|
||||
with:
|
||||
persist-credentials: false
|
||||
|
||||
- name: Set up Docker Buildx
|
||||
uses: docker/setup-buildx-action@e468171a9de216ec08956ac3ada2f0791b6bd435 # ratchet:docker/setup-buildx-action@v3
|
||||
|
||||
# needed for pulling external images otherwise, we hit the "Unauthenticated users" limit
|
||||
# https://docs.docker.com/docker-hub/usage/
|
||||
- name: Login to Docker Hub
|
||||
uses: docker/login-action@5e57cd118135c172c3672efd75eb46360885c0ef # ratchet:docker/login-action@v3
|
||||
with:
|
||||
username: ${{ secrets.DOCKER_USERNAME }}
|
||||
password: ${{ secrets.DOCKER_TOKEN }}
|
||||
|
||||
- name: Build and push Model Server Docker image
|
||||
uses: docker/build-push-action@263435318d21b8e681c14492fe198d362a7d2c83 # ratchet:docker/build-push-action@v6
|
||||
with:
|
||||
context: ./backend
|
||||
file: ./backend/Dockerfile.model_server
|
||||
platforms: linux/arm64
|
||||
tags: ${{ env.RUNS_ON_ECR_CACHE }}:playwright-test-model-server-${{ github.run_id }}
|
||||
push: true
|
||||
cache-from: |
|
||||
type=registry,ref=${{ env.RUNS_ON_ECR_CACHE }}:model-server-cache
|
||||
type=registry,ref=onyxdotapp/onyx-model-server:latest
|
||||
cache-to: type=registry,ref=${{ env.RUNS_ON_ECR_CACHE }}:model-server-cache,mode=max
|
||||
no-cache: ${{ vars.DOCKER_NO_CACHE == 'true' }}
|
||||
|
||||
playwright-tests:
|
||||
needs: [build-web-image, build-backend-image, build-model-server-image]
|
||||
name: Playwright Tests (${{ matrix.project }})
|
||||
runs-on:
|
||||
- runs-on
|
||||
- runner=8cpu-linux-arm64
|
||||
- "run-id=${{ github.run_id }}-playwright-tests-${{ matrix.project }}"
|
||||
- "extras=ecr-cache"
|
||||
- volume=50gb
|
||||
timeout-minutes: 45
|
||||
strategy:
|
||||
fail-fast: false
|
||||
matrix:
|
||||
project: [admin, no-auth, exclusive]
|
||||
steps:
|
||||
- uses: runs-on/action@cd2b598b0515d39d78c38a02d529db87d2196d1e # ratchet:runs-on/action@v2
|
||||
|
||||
- name: Checkout code
|
||||
uses: actions/checkout@1af3b93b6815bc44a9784bd300feb67ff0d1eeb3 # ratchet:actions/checkout@v6
|
||||
with:
|
||||
fetch-depth: 0
|
||||
persist-credentials: false
|
||||
|
||||
- name: Setup node
|
||||
uses: actions/setup-node@v4
|
||||
uses: actions/setup-node@2028fbc5c25fe9cf00d9f06a71cc4710d4507903 # ratchet:actions/setup-node@v4
|
||||
with:
|
||||
node-version: 22
|
||||
cache: 'npm'
|
||||
cache-dependency-path: ./web/package-lock.json
|
||||
|
||||
- name: Install node dependencies
|
||||
working-directory: ./web
|
||||
run: npm ci
|
||||
|
||||
- name: Cache playwright cache
|
||||
uses: runs-on/cache@50350ad4242587b6c8c2baa2e740b1bc11285ff4 # ratchet:runs-on/cache@v4
|
||||
with:
|
||||
path: ~/.cache/ms-playwright
|
||||
key: ${{ runner.os }}-playwright-npm-${{ hashFiles('web/package-lock.json') }}
|
||||
restore-keys: |
|
||||
${{ runner.os }}-playwright-npm-
|
||||
|
||||
- name: Install playwright browsers
|
||||
working-directory: ./web
|
||||
run: npx playwright install --with-deps
|
||||
|
||||
- name: Create .env file for Docker Compose
|
||||
env:
|
||||
OPENAI_API_KEY_VALUE: ${{ env.OPENAI_API_KEY }}
|
||||
EXA_API_KEY_VALUE: ${{ env.EXA_API_KEY }}
|
||||
ECR_CACHE: ${{ env.RUNS_ON_ECR_CACHE }}
|
||||
RUN_ID: ${{ github.run_id }}
|
||||
run: |
|
||||
cat <<EOF > deployment/docker_compose/.env
|
||||
ENABLE_PAID_ENTERPRISE_EDITION_FEATURES=true
|
||||
AUTH_TYPE=basic
|
||||
GEN_AI_API_KEY=${{ env.OPENAI_API_KEY }}
|
||||
EXA_API_KEY=${{ env.EXA_API_KEY }}
|
||||
GEN_AI_API_KEY=${OPENAI_API_KEY_VALUE}
|
||||
EXA_API_KEY=${EXA_API_KEY_VALUE}
|
||||
REQUIRE_EMAIL_VERIFICATION=false
|
||||
DISABLE_TELEMETRY=true
|
||||
IMAGE_TAG=test
|
||||
ONYX_BACKEND_IMAGE=${ECR_CACHE}:playwright-test-backend-${RUN_ID}
|
||||
ONYX_MODEL_SERVER_IMAGE=${ECR_CACHE}:playwright-test-model-server-${RUN_ID}
|
||||
ONYX_WEB_SERVER_IMAGE=${ECR_CACHE}:playwright-test-web-${RUN_ID}
|
||||
EOF
|
||||
if [ "${{ matrix.project }}" = "no-auth" ]; then
|
||||
echo "PLAYWRIGHT_FORCE_EMPTY_LLM_PROVIDERS=true" >> deployment/docker_compose/.env
|
||||
fi
|
||||
|
||||
# needed for pulling Vespa, Redis, Postgres, and Minio images
|
||||
# otherwise, we hit the "Unauthenticated users" limit
|
||||
# https://docs.docker.com/docker-hub/usage/
|
||||
- name: Login to Docker Hub
|
||||
uses: docker/login-action@5e57cd118135c172c3672efd75eb46360885c0ef # ratchet:docker/login-action@v3
|
||||
with:
|
||||
username: ${{ secrets.DOCKER_USERNAME }}
|
||||
password: ${{ secrets.DOCKER_TOKEN }}
|
||||
|
||||
- name: Start Docker containers
|
||||
run: |
|
||||
cd deployment/docker_compose
|
||||
docker compose -f docker-compose.yml -f docker-compose.dev.yml up -d
|
||||
docker compose -f docker-compose.yml -f docker-compose.dev.yml -f docker-compose.mcp-oauth-test.yml -f docker-compose.mcp-api-key-test.yml up -d
|
||||
id: start_docker
|
||||
|
||||
- name: Wait for service to be ready
|
||||
@@ -215,15 +249,15 @@ jobs:
|
||||
while true; do
|
||||
current_time=$(date +%s)
|
||||
elapsed_time=$((current_time - start_time))
|
||||
|
||||
|
||||
if [ $elapsed_time -ge $timeout ]; then
|
||||
echo "Timeout reached. Service did not become ready in 5 minutes."
|
||||
exit 1
|
||||
fi
|
||||
|
||||
|
||||
# Use curl with error handling to ignore specific exit code 56
|
||||
response=$(curl -s -o /dev/null -w "%{http_code}" http://localhost:8080/health || echo "curl_error")
|
||||
|
||||
|
||||
if [ "$response" = "200" ]; then
|
||||
echo "Service is ready!"
|
||||
break
|
||||
@@ -232,45 +266,130 @@ jobs:
|
||||
else
|
||||
echo "Service not ready yet (HTTP status $response). Retrying in 5 seconds..."
|
||||
fi
|
||||
|
||||
|
||||
sleep 5
|
||||
done
|
||||
echo "Finished waiting for service."
|
||||
|
||||
- name: Wait for MCP OAuth mock server
|
||||
run: |
|
||||
echo "Waiting for MCP OAuth mock server on port ${MCP_TEST_SERVER_PORT:-8004}..."
|
||||
start_time=$(date +%s)
|
||||
timeout=120
|
||||
|
||||
while true; do
|
||||
current_time=$(date +%s)
|
||||
elapsed_time=$((current_time - start_time))
|
||||
|
||||
if [ $elapsed_time -ge $timeout ]; then
|
||||
echo "Timeout reached. MCP OAuth mock server did not become ready in ${timeout}s."
|
||||
exit 1
|
||||
fi
|
||||
|
||||
if curl -sf "http://localhost:${MCP_TEST_SERVER_PORT:-8004}/healthz" > /dev/null; then
|
||||
echo "MCP OAuth mock server is ready!"
|
||||
break
|
||||
fi
|
||||
|
||||
sleep 3
|
||||
done
|
||||
|
||||
- name: Wait for MCP API Key mock server
|
||||
run: |
|
||||
echo "Waiting for MCP API Key mock server on port ${MCP_API_KEY_TEST_PORT:-8005}..."
|
||||
start_time=$(date +%s)
|
||||
timeout=120
|
||||
|
||||
while true; do
|
||||
current_time=$(date +%s)
|
||||
elapsed_time=$((current_time - start_time))
|
||||
|
||||
if [ $elapsed_time -ge $timeout ]; then
|
||||
echo "Timeout reached. MCP API Key mock server did not become ready in ${timeout}s."
|
||||
exit 1
|
||||
fi
|
||||
|
||||
if curl -sf "http://localhost:${MCP_API_KEY_TEST_PORT:-8005}/healthz" > /dev/null; then
|
||||
echo "MCP API Key mock server is ready!"
|
||||
break
|
||||
fi
|
||||
|
||||
sleep 3
|
||||
done
|
||||
|
||||
- name: Wait for web server to be ready
|
||||
run: |
|
||||
echo "Waiting for web server on port 3000..."
|
||||
start_time=$(date +%s)
|
||||
timeout=120
|
||||
|
||||
while true; do
|
||||
current_time=$(date +%s)
|
||||
elapsed_time=$((current_time - start_time))
|
||||
|
||||
if [ $elapsed_time -ge $timeout ]; then
|
||||
echo "Timeout reached. Web server did not become ready in ${timeout}s."
|
||||
exit 1
|
||||
fi
|
||||
|
||||
if curl -sf "http://localhost:3000/api/health" > /dev/null 2>&1 || \
|
||||
curl -sf "http://localhost:3000/" > /dev/null 2>&1; then
|
||||
echo "Web server is ready!"
|
||||
break
|
||||
fi
|
||||
|
||||
echo "Web server not ready yet. Retrying in 3 seconds..."
|
||||
sleep 3
|
||||
done
|
||||
|
||||
- name: Run Playwright tests
|
||||
working-directory: ./web
|
||||
env:
|
||||
PROJECT: ${{ matrix.project }}
|
||||
run: |
|
||||
# Create test-results directory to ensure it exists for artifact upload
|
||||
mkdir -p test-results
|
||||
npx playwright test
|
||||
if [ "${PROJECT}" = "no-auth" ]; then
|
||||
export PLAYWRIGHT_FORCE_EMPTY_LLM_PROVIDERS=true
|
||||
fi
|
||||
npx playwright test --project ${PROJECT}
|
||||
|
||||
- uses: actions/upload-artifact@v4
|
||||
- uses: actions/upload-artifact@330a01c490aca151604b8cf639adc76d48f6c5d4 # ratchet:actions/upload-artifact@v4
|
||||
if: always()
|
||||
with:
|
||||
# Includes test results and debug screenshots
|
||||
name: playwright-test-results-${{ github.run_id }}
|
||||
path: ./web/test-results
|
||||
# Includes test results and trace.zip files
|
||||
name: playwright-test-results-${{ matrix.project }}-${{ github.run_id }}
|
||||
path: ./web/test-results/
|
||||
retention-days: 30
|
||||
|
||||
# save before stopping the containers so the logs can be captured
|
||||
- name: Save Docker logs
|
||||
if: success() || failure()
|
||||
env:
|
||||
WORKSPACE: ${{ github.workspace }}
|
||||
run: |
|
||||
cd deployment/docker_compose
|
||||
docker compose logs > docker-compose.log
|
||||
mv docker-compose.log ${{ github.workspace }}/docker-compose.log
|
||||
mv docker-compose.log ${WORKSPACE}/docker-compose.log
|
||||
|
||||
- name: Upload logs
|
||||
if: success() || failure()
|
||||
uses: actions/upload-artifact@v4
|
||||
uses: actions/upload-artifact@330a01c490aca151604b8cf639adc76d48f6c5d4 # ratchet:actions/upload-artifact@v4
|
||||
with:
|
||||
name: docker-logs
|
||||
name: docker-logs-${{ matrix.project }}-${{ github.run_id }}
|
||||
path: ${{ github.workspace }}/docker-compose.log
|
||||
|
||||
- name: Stop Docker containers
|
||||
run: |
|
||||
cd deployment/docker_compose
|
||||
docker compose down -v
|
||||
playwright-required:
|
||||
# NOTE: Github-hosted runners have about 20s faster queue times and are preferred here.
|
||||
runs-on: ubuntu-slim
|
||||
timeout-minutes: 45
|
||||
needs: [playwright-tests]
|
||||
if: ${{ always() }}
|
||||
steps:
|
||||
- name: Check job status
|
||||
if: ${{ contains(needs.*.result, 'failure') || contains(needs.*.result, 'cancelled') || contains(needs.*.result, 'skipped') }}
|
||||
run: exit 1
|
||||
|
||||
|
||||
# NOTE: Chromatic UI diff testing is currently disabled.
|
||||
# We are using Playwright for local and CI testing without visual regression checks.
|
||||
@@ -289,12 +408,12 @@ jobs:
|
||||
# ]
|
||||
# steps:
|
||||
# - name: Checkout code
|
||||
# uses: actions/checkout@v4
|
||||
# uses: actions/checkout@1af3b93b6815bc44a9784bd300feb67ff0d1eeb3 # ratchet:actions/checkout@v6
|
||||
# with:
|
||||
# fetch-depth: 0
|
||||
|
||||
# - name: Setup node
|
||||
# uses: actions/setup-node@v4
|
||||
# uses: actions/setup-node@2028fbc5c25fe9cf00d9f06a71cc4710d4507903 # ratchet:actions/setup-node@v4
|
||||
# with:
|
||||
# node-version: 22
|
||||
|
||||
@@ -303,7 +422,7 @@ jobs:
|
||||
# run: npm ci
|
||||
|
||||
# - name: Download Playwright test results
|
||||
# uses: actions/download-artifact@v4
|
||||
# uses: actions/download-artifact@d3f86a106a0bac45b974a628896c90dbdf5c8093 # ratchet:actions/download-artifact@v4
|
||||
# with:
|
||||
# name: test-results
|
||||
# path: ./web/test-results
|
||||
|
||||
140
.github/workflows/pr-python-checks.yml
vendored
140
.github/workflows/pr-python-checks.yml
vendored
@@ -1,4 +1,7 @@
|
||||
name: Python Checks
|
||||
concurrency:
|
||||
group: Python-Checks-${{ github.workflow }}-${{ github.head_ref || github.event.workflow_run.head_branch || github.run_id }}
|
||||
cancel-in-progress: true
|
||||
|
||||
on:
|
||||
merge_group:
|
||||
@@ -7,61 +10,102 @@ on:
|
||||
- main
|
||||
- 'release/**'
|
||||
|
||||
permissions:
|
||||
contents: read
|
||||
|
||||
jobs:
|
||||
validate-requirements:
|
||||
runs-on: ubuntu-slim
|
||||
timeout-minutes: 45
|
||||
steps:
|
||||
- name: Checkout code
|
||||
uses: actions/checkout@1af3b93b6815bc44a9784bd300feb67ff0d1eeb3 # ratchet:actions/checkout@v6
|
||||
with:
|
||||
persist-credentials: false
|
||||
|
||||
- name: Setup uv
|
||||
uses: astral-sh/setup-uv@caf0cab7a618c569241d31dcd442f54681755d39 # ratchet:astral-sh/setup-uv@v3
|
||||
# TODO: Enable caching once there is a uv.lock file checked in.
|
||||
# with:
|
||||
# enable-cache: true
|
||||
|
||||
- name: Validate requirements lock files
|
||||
run: ./backend/scripts/compile_requirements.py --check
|
||||
|
||||
mypy-check:
|
||||
# See https://runs-on.com/runners/linux/
|
||||
runs-on: [runs-on,runner=8cpu-linux-x64,"run-id=${{ github.run_id }}"]
|
||||
# Note: Mypy seems quite optimized for x64 compared to arm64.
|
||||
# Similarly, mypy is single-threaded and incremental, so 2cpu is sufficient.
|
||||
runs-on: [runs-on, runner=2cpu-linux-x64, "run-id=${{ github.run_id }}-mypy-check", "extras=s3-cache"]
|
||||
timeout-minutes: 45
|
||||
|
||||
steps:
|
||||
- name: Checkout code
|
||||
uses: actions/checkout@v4
|
||||
- uses: runs-on/action@cd2b598b0515d39d78c38a02d529db87d2196d1e # ratchet:runs-on/action@v2
|
||||
- name: Checkout code
|
||||
uses: actions/checkout@1af3b93b6815bc44a9784bd300feb67ff0d1eeb3 # ratchet:actions/checkout@v6
|
||||
with:
|
||||
persist-credentials: false
|
||||
|
||||
- name: Set up Python
|
||||
uses: actions/setup-python@v5
|
||||
with:
|
||||
python-version: '3.11'
|
||||
cache: 'pip'
|
||||
cache-dependency-path: |
|
||||
backend/requirements/default.txt
|
||||
backend/requirements/dev.txt
|
||||
backend/requirements/model_server.txt
|
||||
- run: |
|
||||
python -m pip install --upgrade pip
|
||||
pip install --retries 5 --timeout 30 -r backend/requirements/default.txt
|
||||
pip install --retries 5 --timeout 30 -r backend/requirements/dev.txt
|
||||
pip install --retries 5 --timeout 30 -r backend/requirements/model_server.txt
|
||||
- name: Setup Python and Install Dependencies
|
||||
uses: ./.github/actions/setup-python-and-install-dependencies
|
||||
with:
|
||||
requirements: |
|
||||
backend/requirements/default.txt
|
||||
backend/requirements/dev.txt
|
||||
backend/requirements/model_server.txt
|
||||
backend/requirements/ee.txt
|
||||
|
||||
- name: Generate OpenAPI schema
|
||||
working-directory: ./backend
|
||||
env:
|
||||
PYTHONPATH: "."
|
||||
run: |
|
||||
python scripts/onyx_openapi_schema.py --filename generated/openapi.json
|
||||
- name: Generate OpenAPI schema
|
||||
shell: bash
|
||||
working-directory: backend
|
||||
env:
|
||||
PYTHONPATH: "."
|
||||
run: |
|
||||
python scripts/onyx_openapi_schema.py --filename generated/openapi.json
|
||||
|
||||
- name: Generate OpenAPI Python client
|
||||
working-directory: ./backend
|
||||
run: |
|
||||
docker run --rm \
|
||||
-v "${{ github.workspace }}/backend/generated:/local" \
|
||||
openapitools/openapi-generator-cli generate \
|
||||
-i /local/openapi.json \
|
||||
-g python \
|
||||
-o /local/onyx_openapi_client \
|
||||
--package-name onyx_openapi_client \
|
||||
--skip-validate-spec \
|
||||
--openapi-normalizer "SIMPLIFY_ONEOF_ANYOF=true,SET_OAS3_NULLABLE=true"
|
||||
|
||||
- name: Run MyPy
|
||||
run: |
|
||||
cd backend
|
||||
mypy .
|
||||
# needed for pulling openapitools/openapi-generator-cli
|
||||
# otherwise, we hit the "Unauthenticated users" limit
|
||||
# https://docs.docker.com/docker-hub/usage/
|
||||
- name: Login to Docker Hub
|
||||
uses: docker/login-action@5e57cd118135c172c3672efd75eb46360885c0ef # ratchet:docker/login-action@v3
|
||||
with:
|
||||
username: ${{ secrets.DOCKER_USERNAME }}
|
||||
password: ${{ secrets.DOCKER_TOKEN }}
|
||||
|
||||
- name: Check import order with reorder-python-imports
|
||||
run: |
|
||||
cd backend
|
||||
find ./onyx -name "*.py" | xargs reorder-python-imports --py311-plus
|
||||
- name: Generate OpenAPI Python client
|
||||
shell: bash
|
||||
run: |
|
||||
docker run --rm \
|
||||
-v "${{ github.workspace }}/backend/generated:/local" \
|
||||
openapitools/openapi-generator-cli generate \
|
||||
-i /local/openapi.json \
|
||||
-g python \
|
||||
-o /local/onyx_openapi_client \
|
||||
--package-name onyx_openapi_client \
|
||||
--skip-validate-spec \
|
||||
--openapi-normalizer "SIMPLIFY_ONEOF_ANYOF=true,SET_OAS3_NULLABLE=true"
|
||||
|
||||
- name: Check code formatting with Black
|
||||
run: |
|
||||
cd backend
|
||||
black --check .
|
||||
- name: Cache mypy cache
|
||||
if: ${{ vars.DISABLE_MYPY_CACHE != 'true' }}
|
||||
uses: runs-on/cache@50350ad4242587b6c8c2baa2e740b1bc11285ff4 # ratchet:runs-on/cache@v4
|
||||
with:
|
||||
path: backend/.mypy_cache
|
||||
key: mypy-${{ runner.os }}-${{ hashFiles('**/*.py', '**/*.pyi', 'backend/pyproject.toml') }}
|
||||
restore-keys: |
|
||||
mypy-${{ runner.os }}-
|
||||
|
||||
- name: Run MyPy
|
||||
working-directory: ./backend
|
||||
env:
|
||||
MYPY_FORCE_COLOR: 1
|
||||
TERM: xterm-256color
|
||||
run: mypy .
|
||||
|
||||
- name: Check import order with reorder-python-imports
|
||||
working-directory: ./backend
|
||||
run: |
|
||||
find ./onyx -name "*.py" | xargs reorder-python-imports --py311-plus
|
||||
|
||||
- name: Check code formatting with Black
|
||||
working-directory: ./backend
|
||||
run: black --check .
|
||||
|
||||
40
.github/workflows/pr-python-connector-tests.yml
vendored
40
.github/workflows/pr-python-connector-tests.yml
vendored
@@ -1,4 +1,7 @@
|
||||
name: Connector Tests
|
||||
concurrency:
|
||||
group: Connector-Tests-${{ github.workflow }}-${{ github.head_ref || github.event.workflow_run.head_branch || github.run_id }}
|
||||
cancel-in-progress: true
|
||||
|
||||
on:
|
||||
merge_group:
|
||||
@@ -8,6 +11,9 @@ on:
|
||||
# This cron expression runs the job daily at 16:00 UTC (9am PT)
|
||||
- cron: "0 16 * * *"
|
||||
|
||||
permissions:
|
||||
contents: read
|
||||
|
||||
env:
|
||||
# AWS
|
||||
AWS_ACCESS_KEY_ID_DAILY_CONNECTOR_TESTS: ${{ secrets.AWS_ACCESS_KEY_ID_DAILY_CONNECTOR_TESTS }}
|
||||
@@ -119,35 +125,33 @@ env:
|
||||
jobs:
|
||||
connectors-check:
|
||||
# See https://runs-on.com/runners/linux/
|
||||
runs-on: [runs-on, runner=8cpu-linux-x64, "run-id=${{ github.run_id }}"]
|
||||
runs-on: [runs-on, runner=8cpu-linux-x64, "run-id=${{ github.run_id }}-connectors-check", "extras=s3-cache"]
|
||||
timeout-minutes: 45
|
||||
|
||||
env:
|
||||
PYTHONPATH: ./backend
|
||||
|
||||
steps:
|
||||
- name: Checkout code
|
||||
uses: actions/checkout@v4
|
||||
- uses: runs-on/action@cd2b598b0515d39d78c38a02d529db87d2196d1e # ratchet:runs-on/action@v2
|
||||
|
||||
- name: Set up Python
|
||||
uses: actions/setup-python@v5
|
||||
- name: Checkout code
|
||||
uses: actions/checkout@1af3b93b6815bc44a9784bd300feb67ff0d1eeb3 # ratchet:actions/checkout@v6
|
||||
with:
|
||||
python-version: "3.11"
|
||||
cache: "pip"
|
||||
cache-dependency-path: |
|
||||
persist-credentials: false
|
||||
|
||||
- name: Setup Python and Install Dependencies
|
||||
uses: ./.github/actions/setup-python-and-install-dependencies
|
||||
with:
|
||||
requirements: |
|
||||
backend/requirements/default.txt
|
||||
backend/requirements/dev.txt
|
||||
|
||||
- name: Install Dependencies
|
||||
run: |
|
||||
python -m pip install --upgrade pip
|
||||
pip install --retries 5 --timeout 30 -r backend/requirements/default.txt
|
||||
pip install --retries 5 --timeout 30 -r backend/requirements/dev.txt
|
||||
playwright install chromium
|
||||
playwright install-deps chromium
|
||||
- name: Setup Playwright
|
||||
uses: ./.github/actions/setup-playwright
|
||||
|
||||
- name: Detect Connector changes
|
||||
id: changes
|
||||
uses: dorny/paths-filter@v3
|
||||
uses: dorny/paths-filter@de90cc6fb38fc0963ad72b210f1f284cd68cea36 # ratchet:dorny/paths-filter@v3
|
||||
with:
|
||||
filters: |
|
||||
hubspot:
|
||||
@@ -220,8 +224,10 @@ jobs:
|
||||
if: failure() && github.event_name == 'schedule'
|
||||
env:
|
||||
SLACK_WEBHOOK: ${{ secrets.SLACK_WEBHOOK }}
|
||||
REPO: ${{ github.repository }}
|
||||
RUN_ID: ${{ github.run_id }}
|
||||
run: |
|
||||
curl -X POST \
|
||||
-H 'Content-type: application/json' \
|
||||
--data '{"text":"Scheduled Connector Tests failed! Check the run at: https://github.com/${{ github.repository }}/actions/runs/${{ github.run_id }}"}' \
|
||||
--data "{\"text\":\"Scheduled Connector Tests failed! Check the run at: https://github.com/${REPO}/actions/runs/${RUN_ID}\"}" \
|
||||
$SLACK_WEBHOOK
|
||||
|
||||
43
.github/workflows/pr-python-model-tests.yml
vendored
43
.github/workflows/pr-python-model-tests.yml
vendored
@@ -10,7 +10,10 @@ on:
|
||||
description: 'Branch to run the workflow on'
|
||||
required: false
|
||||
default: 'main'
|
||||
|
||||
|
||||
permissions:
|
||||
contents: read
|
||||
|
||||
env:
|
||||
# Bedrock
|
||||
AWS_ACCESS_KEY_ID: ${{ secrets.AWS_ACCESS_KEY_ID }}
|
||||
@@ -28,17 +31,20 @@ env:
|
||||
jobs:
|
||||
model-check:
|
||||
# See https://runs-on.com/runners/linux/
|
||||
runs-on: [runs-on,runner=8cpu-linux-x64,"run-id=${{ github.run_id }}"]
|
||||
runs-on: [runs-on,runner=8cpu-linux-x64,"run-id=${{ github.run_id }}-model-check"]
|
||||
timeout-minutes: 45
|
||||
|
||||
env:
|
||||
PYTHONPATH: ./backend
|
||||
|
||||
steps:
|
||||
- name: Checkout code
|
||||
uses: actions/checkout@v4
|
||||
uses: actions/checkout@1af3b93b6815bc44a9784bd300feb67ff0d1eeb3 # ratchet:actions/checkout@v6
|
||||
with:
|
||||
persist-credentials: false
|
||||
|
||||
- name: Login to Docker Hub
|
||||
uses: docker/login-action@v3
|
||||
uses: docker/login-action@5e57cd118135c172c3672efd75eb46360885c0ef # ratchet:docker/login-action@v3
|
||||
with:
|
||||
username: ${{ secrets.DOCKER_USERNAME }}
|
||||
password: ${{ secrets.DOCKER_TOKEN }}
|
||||
@@ -53,9 +59,9 @@ jobs:
|
||||
run: |
|
||||
docker pull onyxdotapp/onyx-model-server:latest
|
||||
docker tag onyxdotapp/onyx-model-server:latest onyxdotapp/onyx-model-server:test
|
||||
|
||||
|
||||
- name: Set up Python
|
||||
uses: actions/setup-python@v5
|
||||
uses: actions/setup-python@e797f83bcb11b83ae66e0230d6156d7c80228e7c # ratchet:actions/setup-python@v6
|
||||
with:
|
||||
python-version: "3.11"
|
||||
cache: "pip"
|
||||
@@ -90,15 +96,15 @@ jobs:
|
||||
while true; do
|
||||
current_time=$(date +%s)
|
||||
elapsed_time=$((current_time - start_time))
|
||||
|
||||
|
||||
if [ $elapsed_time -ge $timeout ]; then
|
||||
echo "Timeout reached. Service did not become ready in 5 minutes."
|
||||
exit 1
|
||||
fi
|
||||
|
||||
|
||||
# Use curl with error handling to ignore specific exit code 56
|
||||
response=$(curl -s -o /dev/null -w "%{http_code}" http://localhost:9000/api/health || echo "curl_error")
|
||||
|
||||
|
||||
if [ "$response" = "200" ]; then
|
||||
echo "Service is ready!"
|
||||
break
|
||||
@@ -107,11 +113,11 @@ jobs:
|
||||
else
|
||||
echo "Service not ready yet (HTTP status $response). Retrying in 5 seconds..."
|
||||
fi
|
||||
|
||||
|
||||
sleep 5
|
||||
done
|
||||
echo "Finished waiting for service."
|
||||
|
||||
|
||||
- name: Run Tests
|
||||
shell: script -q -e -c "bash --noprofile --norc -eo pipefail {0}"
|
||||
run: |
|
||||
@@ -122,12 +128,14 @@ jobs:
|
||||
if: failure() && github.event_name == 'schedule'
|
||||
env:
|
||||
SLACK_WEBHOOK: ${{ secrets.SLACK_WEBHOOK }}
|
||||
REPO: ${{ github.repository }}
|
||||
RUN_ID: ${{ github.run_id }}
|
||||
run: |
|
||||
curl -X POST \
|
||||
-H 'Content-type: application/json' \
|
||||
--data '{"text":"Scheduled Model Tests failed! Check the run at: https://github.com/${{ github.repository }}/actions/runs/${{ github.run_id }}"}' \
|
||||
--data "{\"text\":\"Scheduled Model Tests failed! Check the run at: https://github.com/${REPO}/actions/runs/${RUN_ID}\"}" \
|
||||
$SLACK_WEBHOOK
|
||||
|
||||
|
||||
- name: Dump all-container logs (optional)
|
||||
if: always()
|
||||
run: |
|
||||
@@ -136,14 +144,7 @@ jobs:
|
||||
|
||||
- name: Upload logs
|
||||
if: always()
|
||||
uses: actions/upload-artifact@v4
|
||||
uses: actions/upload-artifact@330a01c490aca151604b8cf639adc76d48f6c5d4 # ratchet:actions/upload-artifact@v4
|
||||
with:
|
||||
name: docker-all-logs
|
||||
path: ${{ github.workspace }}/docker-compose.log
|
||||
|
||||
- name: Stop Docker containers
|
||||
if: always()
|
||||
run: |
|
||||
cd deployment/docker_compose
|
||||
docker compose -f docker-compose.model-server-test.yml down -v
|
||||
|
||||
|
||||
38
.github/workflows/pr-python-tests.yml
vendored
38
.github/workflows/pr-python-tests.yml
vendored
@@ -1,4 +1,7 @@
|
||||
name: Python Unit Tests
|
||||
concurrency:
|
||||
group: Python-Unit-Tests-${{ github.workflow }}-${{ github.head_ref || github.event.workflow_run.head_branch || github.run_id }}
|
||||
cancel-in-progress: true
|
||||
|
||||
on:
|
||||
merge_group:
|
||||
@@ -7,10 +10,15 @@ on:
|
||||
- main
|
||||
- 'release/**'
|
||||
|
||||
permissions:
|
||||
contents: read
|
||||
|
||||
jobs:
|
||||
backend-check:
|
||||
# See https://runs-on.com/runners/linux/
|
||||
runs-on: [runs-on,runner=8cpu-linux-x64,"run-id=${{ github.run_id }}"]
|
||||
runs-on: [runs-on, runner=2cpu-linux-arm64, "run-id=${{ github.run_id }}-backend-check"]
|
||||
timeout-minutes: 45
|
||||
|
||||
|
||||
env:
|
||||
PYTHONPATH: ./backend
|
||||
@@ -18,27 +26,23 @@ jobs:
|
||||
SF_USERNAME: ${{ secrets.SF_USERNAME }}
|
||||
SF_PASSWORD: ${{ secrets.SF_PASSWORD }}
|
||||
SF_SECURITY_TOKEN: ${{ secrets.SF_SECURITY_TOKEN }}
|
||||
|
||||
steps:
|
||||
- name: Checkout code
|
||||
uses: actions/checkout@v4
|
||||
|
||||
- name: Set up Python
|
||||
uses: actions/setup-python@v5
|
||||
steps:
|
||||
- uses: runs-on/action@cd2b598b0515d39d78c38a02d529db87d2196d1e # ratchet:runs-on/action@v2
|
||||
|
||||
- name: Checkout code
|
||||
uses: actions/checkout@1af3b93b6815bc44a9784bd300feb67ff0d1eeb3 # ratchet:actions/checkout@v6
|
||||
with:
|
||||
python-version: '3.11'
|
||||
cache: 'pip'
|
||||
cache-dependency-path: |
|
||||
persist-credentials: false
|
||||
|
||||
- name: Setup Python and Install Dependencies
|
||||
uses: ./.github/actions/setup-python-and-install-dependencies
|
||||
with:
|
||||
requirements: |
|
||||
backend/requirements/default.txt
|
||||
backend/requirements/dev.txt
|
||||
backend/requirements/model_server.txt
|
||||
|
||||
- name: Install Dependencies
|
||||
run: |
|
||||
python -m pip install --upgrade pip
|
||||
pip install --retries 5 --timeout 30 -r backend/requirements/default.txt
|
||||
pip install --retries 5 --timeout 30 -r backend/requirements/dev.txt
|
||||
pip install --retries 5 --timeout 30 -r backend/requirements/model_server.txt
|
||||
backend/requirements/ee.txt
|
||||
|
||||
- name: Run Tests
|
||||
shell: script -q -e -c "bash --noprofile --norc -eo pipefail {0}"
|
||||
|
||||
24
.github/workflows/pr-quality-checks.yml
vendored
24
.github/workflows/pr-quality-checks.yml
vendored
@@ -7,17 +7,33 @@ on:
|
||||
merge_group:
|
||||
pull_request: null
|
||||
|
||||
permissions:
|
||||
contents: read
|
||||
|
||||
jobs:
|
||||
quality-checks:
|
||||
# See https://runs-on.com/runners/linux/
|
||||
runs-on: [runs-on,runner=8cpu-linux-x64,"run-id=${{ github.run_id }}"]
|
||||
runs-on: [runs-on, runner=1cpu-linux-arm64, "run-id=${{ github.run_id }}-quality-checks"]
|
||||
timeout-minutes: 45
|
||||
steps:
|
||||
- uses: actions/checkout@v4
|
||||
- uses: runs-on/action@cd2b598b0515d39d78c38a02d529db87d2196d1e # ratchet:runs-on/action@v2
|
||||
- uses: actions/checkout@1af3b93b6815bc44a9784bd300feb67ff0d1eeb3 # ratchet:actions/checkout@v6
|
||||
with:
|
||||
fetch-depth: 0
|
||||
- uses: actions/setup-python@v5
|
||||
persist-credentials: false
|
||||
- uses: actions/setup-python@e797f83bcb11b83ae66e0230d6156d7c80228e7c # ratchet:actions/setup-python@v6
|
||||
with:
|
||||
python-version: "3.11"
|
||||
- uses: pre-commit/action@v3.0.1
|
||||
- name: Setup Terraform
|
||||
uses: hashicorp/setup-terraform@b9cd54a3c349d3f38e8881555d616ced269862dd # ratchet:hashicorp/setup-terraform@v3
|
||||
- uses: pre-commit/action@2c7b3805fd2a0fd8c1884dcaebf91fc102a13ecd # ratchet:pre-commit/action@v3.0.1
|
||||
env:
|
||||
# uv-run is mypy's id and mypy is covered by the Python Checks which caches dependencies better.
|
||||
SKIP: uv-run
|
||||
with:
|
||||
extra_args: ${{ github.event_name == 'pull_request' && format('--from-ref {0} --to-ref {1}', github.event.pull_request.base.sha, github.event.pull_request.head.sha) || '' }}
|
||||
- name: Check Actions
|
||||
uses: giner/check-actions@28d366c7cbbe235f9624a88aa31a628167eee28c # ratchet:giner/check-actions@v1.0.1
|
||||
with:
|
||||
check_permissions: false
|
||||
check_versions: false
|
||||
|
||||
49
.github/workflows/sync_foss.yml
vendored
Normal file
49
.github/workflows/sync_foss.yml
vendored
Normal file
@@ -0,0 +1,49 @@
|
||||
name: Sync FOSS Repo
|
||||
|
||||
on:
|
||||
schedule:
|
||||
# Run daily at 3am PT (11am UTC during PST)
|
||||
- cron: '0 11 * * *'
|
||||
workflow_dispatch:
|
||||
|
||||
jobs:
|
||||
sync-foss:
|
||||
runs-on: ubuntu-latest
|
||||
timeout-minutes: 45
|
||||
permissions:
|
||||
contents: read
|
||||
steps:
|
||||
- name: Checkout main Onyx repo
|
||||
uses: actions/checkout@1af3b93b6815bc44a9784bd300feb67ff0d1eeb3 # ratchet:actions/checkout@v6
|
||||
with:
|
||||
fetch-depth: 0
|
||||
persist-credentials: false
|
||||
|
||||
- name: Install git-filter-repo
|
||||
run: |
|
||||
sudo apt-get update && sudo apt-get install -y git-filter-repo
|
||||
|
||||
- name: Configure SSH for deploy key
|
||||
env:
|
||||
FOSS_REPO_DEPLOY_KEY: ${{ secrets.FOSS_REPO_DEPLOY_KEY }}
|
||||
run: |
|
||||
mkdir -p ~/.ssh
|
||||
echo "$FOSS_REPO_DEPLOY_KEY" > ~/.ssh/id_ed25519
|
||||
chmod 600 ~/.ssh/id_ed25519
|
||||
ssh-keyscan github.com >> ~/.ssh/known_hosts
|
||||
|
||||
- name: Set Git config
|
||||
run: |
|
||||
git config --global user.name "onyx-bot"
|
||||
git config --global user.email "bot@onyx.app"
|
||||
|
||||
- name: Build FOSS version
|
||||
run: bash backend/scripts/make_foss_repo.sh
|
||||
|
||||
- name: Push to FOSS repo
|
||||
env:
|
||||
FOSS_REPO_URL: git@github.com:onyx-dot-app/onyx-foss.git
|
||||
run: |
|
||||
cd /tmp/foss_repo
|
||||
git remote add public "$FOSS_REPO_URL"
|
||||
git push --force public main
|
||||
25
.github/workflows/tag-nightly.yml
vendored
25
.github/workflows/tag-nightly.yml
vendored
@@ -3,30 +3,30 @@ name: Nightly Tag Push
|
||||
on:
|
||||
schedule:
|
||||
- cron: "0 10 * * *" # Runs every day at 2 AM PST / 3 AM PDT / 10 AM UTC
|
||||
workflow_dispatch:
|
||||
|
||||
permissions:
|
||||
contents: write # Allows pushing tags to the repository
|
||||
|
||||
jobs:
|
||||
create-and-push-tag:
|
||||
runs-on: [runs-on, runner=2cpu-linux-x64, "run-id=${{ github.run_id }}"]
|
||||
runs-on: ubuntu-slim
|
||||
timeout-minutes: 45
|
||||
|
||||
steps:
|
||||
# actions using GITHUB_TOKEN cannot trigger another workflow, but we do want this to trigger docker pushes
|
||||
# see https://github.com/orgs/community/discussions/27028#discussioncomment-3254367 for the workaround we
|
||||
# implement here which needs an actual user's deploy key
|
||||
|
||||
# Additional NOTE: even though this is named "rkuo", the actual key is tied to the onyx repo
|
||||
# and not rkuo's personal account. It is fine to leave this key as is!
|
||||
- name: Checkout code
|
||||
uses: actions/checkout@v4
|
||||
uses: actions/checkout@1af3b93b6815bc44a9784bd300feb67ff0d1eeb3 # ratchet:actions/checkout@v6
|
||||
with:
|
||||
ssh-key: "${{ secrets.RKUO_DEPLOY_KEY }}"
|
||||
ssh-key: "${{ secrets.DEPLOY_KEY }}"
|
||||
persist-credentials: true
|
||||
|
||||
- name: Set up Git user
|
||||
run: |
|
||||
git config user.name "Richard Kuo [bot]"
|
||||
git config user.email "rkuo[bot]@onyx.app"
|
||||
git config user.name "Onyx Bot [bot]"
|
||||
git config user.email "onyx-bot[bot]@onyx.app"
|
||||
|
||||
- name: Check for existing nightly tag
|
||||
id: check_tag
|
||||
@@ -54,3 +54,12 @@ jobs:
|
||||
run: |
|
||||
TAG_NAME="nightly-latest-$(date +'%Y%m%d')"
|
||||
git push origin $TAG_NAME
|
||||
|
||||
- name: Send Slack notification
|
||||
if: failure()
|
||||
uses: ./.github/actions/slack-notify
|
||||
with:
|
||||
webhook-url: ${{ secrets.MONITOR_DEPLOYMENTS_WEBHOOK }}
|
||||
title: "🚨 Nightly Tag Push Failed"
|
||||
ref-name: ${{ github.ref_name }}
|
||||
failed-jobs: "create-and-push-tag"
|
||||
|
||||
36
.github/workflows/zizmor.yml
vendored
Normal file
36
.github/workflows/zizmor.yml
vendored
Normal file
@@ -0,0 +1,36 @@
|
||||
name: Run Zizmor
|
||||
|
||||
on:
|
||||
push:
|
||||
branches: ["main"]
|
||||
pull_request:
|
||||
branches: ["**"]
|
||||
|
||||
permissions: {}
|
||||
|
||||
jobs:
|
||||
zizmor:
|
||||
name: zizmor
|
||||
runs-on: ubuntu-slim
|
||||
timeout-minutes: 45
|
||||
permissions:
|
||||
security-events: write # needed for SARIF uploads
|
||||
steps:
|
||||
- name: Checkout repository
|
||||
uses: actions/checkout@1af3b93b6815bc44a9784bd300feb67ff0d1eeb3 # ratchet:actions/checkout@v6.0.0
|
||||
with:
|
||||
persist-credentials: false
|
||||
|
||||
- name: Install the latest version of uv
|
||||
uses: astral-sh/setup-uv@5a7eac68fb9809dea845d802897dc5c723910fa3 # ratchet:astral-sh/setup-uv@v7.1.3
|
||||
|
||||
- name: Run zizmor
|
||||
run: uvx zizmor==1.16.3 --format=sarif . > results.sarif
|
||||
env:
|
||||
GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
|
||||
|
||||
- name: Upload SARIF file
|
||||
uses: github/codeql-action/upload-sarif@ba454b8ab46733eb6145342877cd148270bb77ab # ratchet:github/codeql-action/upload-sarif@codeql-bundle-v2.23.5
|
||||
with:
|
||||
sarif_file: results.sarif
|
||||
category: zizmor
|
||||
13
.gitignore
vendored
13
.gitignore
vendored
@@ -1,6 +1,7 @@
|
||||
# editors
|
||||
.vscode
|
||||
.zed
|
||||
.cursor
|
||||
|
||||
# macos
|
||||
.DS_store
|
||||
@@ -18,6 +19,7 @@ backend/tests/regression/search_quality/eval-*
|
||||
backend/tests/regression/search_quality/search_eval_config.yaml
|
||||
backend/tests/regression/search_quality/*.json
|
||||
backend/onyx/evals/data/
|
||||
backend/onyx/evals/one_off/*.json
|
||||
*.log
|
||||
|
||||
# secret files
|
||||
@@ -27,10 +29,16 @@ settings.json
|
||||
|
||||
# others
|
||||
/deployment/data/nginx/app.conf
|
||||
/deployment/data/nginx/mcp.conf.inc
|
||||
/deployment/data/nginx/mcp_upstream.conf.inc
|
||||
*.sw?
|
||||
/backend/tests/regression/answer_quality/search_test_config.yaml
|
||||
*.egg-info
|
||||
|
||||
# Claude
|
||||
AGENTS.md
|
||||
CLAUDE.md
|
||||
|
||||
# Local .terraform directories
|
||||
**/.terraform/*
|
||||
|
||||
@@ -40,3 +48,8 @@ settings.json
|
||||
|
||||
# Local .terraform.lock.hcl file
|
||||
.terraform.lock.hcl
|
||||
|
||||
node_modules
|
||||
|
||||
# MCP configs
|
||||
.playwright-mcp
|
||||
|
||||
@@ -1,8 +0,0 @@
|
||||
{
|
||||
"mcpServers": {
|
||||
"onyx-mcp": {
|
||||
"type": "http",
|
||||
"url": "http://localhost:8000/mcp"
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -1,4 +1,31 @@
|
||||
default_install_hook_types:
|
||||
- pre-commit
|
||||
- post-checkout
|
||||
- post-merge
|
||||
- post-rewrite
|
||||
repos:
|
||||
- repo: https://github.com/astral-sh/uv-pre-commit
|
||||
# This revision is from https://github.com/astral-sh/uv-pre-commit/pull/53
|
||||
rev: d30b4298e4fb63ce8609e29acdbcf4c9018a483c
|
||||
hooks:
|
||||
- id: uv-sync
|
||||
- id: uv-run
|
||||
name: mypy
|
||||
args: ["mypy"]
|
||||
pass_filenames: true
|
||||
files: ^backend/.*\.py$
|
||||
|
||||
- repo: https://github.com/pre-commit/pre-commit-hooks
|
||||
rev: v4.6.0
|
||||
hooks:
|
||||
- id: check-yaml
|
||||
files: ^.github/
|
||||
|
||||
- repo: https://github.com/rhysd/actionlint
|
||||
rev: v1.7.8
|
||||
hooks:
|
||||
- id: actionlint
|
||||
|
||||
- repo: https://github.com/psf/black
|
||||
rev: 25.1.0
|
||||
hooks:
|
||||
@@ -29,6 +56,7 @@ repos:
|
||||
rev: v0.11.4
|
||||
hooks:
|
||||
- id: ruff
|
||||
|
||||
- repo: https://github.com/pre-commit/mirrors-prettier
|
||||
rev: v3.1.0
|
||||
hooks:
|
||||
@@ -47,37 +75,15 @@ repos:
|
||||
|
||||
- repo: local
|
||||
hooks:
|
||||
- id: terraform-fmt
|
||||
name: terraform fmt
|
||||
entry: terraform fmt -recursive
|
||||
language: system
|
||||
pass_filenames: false
|
||||
files: \.tf$
|
||||
|
||||
- id: check-lazy-imports
|
||||
name: Check lazy imports are not directly imported
|
||||
name: Check lazy imports
|
||||
entry: python3 backend/scripts/check_lazy_imports.py
|
||||
language: system
|
||||
files: ^backend/.*\.py$
|
||||
pass_filenames: false
|
||||
|
||||
# We would like to have a mypy pre-commit hook, but due to the fact that
|
||||
# pre-commit runs in it's own isolated environment, we would need to install
|
||||
# and keep in sync all dependencies so mypy has access to the appropriate type
|
||||
# stubs. This does not seem worth it at the moment, so for now we will stick to
|
||||
# having mypy run via Github Actions / manually by contributors
|
||||
# - repo: https://github.com/pre-commit/mirrors-mypy
|
||||
# rev: v1.1.1
|
||||
# hooks:
|
||||
# - id: mypy
|
||||
# exclude: ^tests/
|
||||
# # below are needed for type stubs since pre-commit runs in it's own
|
||||
# # isolated environment. Unfortunately, this needs to be kept in sync
|
||||
# # with requirements/dev.txt + requirements/default.txt
|
||||
# additional_dependencies: [
|
||||
# alembic==1.10.4,
|
||||
# types-beautifulsoup4==4.12.0.3,
|
||||
# types-html5lib==1.1.11.13,
|
||||
# types-oauthlib==3.2.0.9,
|
||||
# types-psycopg2==2.9.21.10,
|
||||
# types-python-dateutil==2.8.19.13,
|
||||
# types-regex==2023.3.23.1,
|
||||
# types-requests==2.28.11.17,
|
||||
# types-retry==0.9.9.3,
|
||||
# types-urllib3==1.26.25.11
|
||||
# ]
|
||||
# # TODO: add back once errors are addressed
|
||||
# # args: [--strict]
|
||||
files: ^backend/(?!\.venv/).*\.py$
|
||||
|
||||
13
.vscode/env_template.txt
vendored
13
.vscode/env_template.txt
vendored
@@ -1,6 +1,6 @@
|
||||
# Copy this file to .env in the .vscode folder
|
||||
# Fill in the <REPLACE THIS> values as needed, it is recommended to set the GEN_AI_API_KEY value to avoid having to set up an LLM in the UI
|
||||
# Also check out danswer/backend/scripts/restart_containers.sh for a script to restart the containers which Danswer relies on outside of VSCode/Cursor processes
|
||||
# Also check out onyx/backend/scripts/restart_containers.sh for a script to restart the containers which Onyx relies on outside of VSCode/Cursor processes
|
||||
|
||||
# For local dev, often user Authentication is not needed
|
||||
AUTH_TYPE=disabled
|
||||
@@ -37,8 +37,8 @@ OPENAI_API_KEY=<REPLACE THIS>
|
||||
GEN_AI_MODEL_VERSION=gpt-4o
|
||||
FAST_GEN_AI_MODEL_VERSION=gpt-4o
|
||||
|
||||
# For Danswer Slack Bot, overrides the UI values so no need to set this up via UI every time
|
||||
# Only needed if using DanswerBot
|
||||
# For Onyx Slack Bot, overrides the UI values so no need to set this up via UI every time
|
||||
# Only needed if using OnyxBot
|
||||
#ONYX_BOT_SLACK_APP_TOKEN=<REPLACE THIS>
|
||||
#ONYX_BOT_SLACK_BOT_TOKEN=<REPLACE THIS>
|
||||
|
||||
@@ -75,4 +75,9 @@ SHOW_EXTRA_CONNECTORS=True
|
||||
LANGSMITH_TRACING="true"
|
||||
LANGSMITH_ENDPOINT="https://api.smith.langchain.com"
|
||||
LANGSMITH_API_KEY=<REPLACE_THIS>
|
||||
LANGSMITH_PROJECT=<REPLACE_THIS>
|
||||
LANGSMITH_PROJECT=<REPLACE_THIS>
|
||||
|
||||
# Local Confluence OAuth testing
|
||||
# OAUTH_CONFLUENCE_CLOUD_CLIENT_ID=<REPLACE_THIS>
|
||||
# OAUTH_CONFLUENCE_CLOUD_CLIENT_SECRET=<REPLACE_THIS>
|
||||
# NEXT_PUBLIC_TEST_ENV=True
|
||||
29
.vscode/launch.template.jsonc
vendored
29
.vscode/launch.template.jsonc
vendored
@@ -20,6 +20,7 @@
|
||||
"Web Server",
|
||||
"Model Server",
|
||||
"API Server",
|
||||
"MCP Server",
|
||||
"Slack Bot",
|
||||
"Celery primary",
|
||||
"Celery light",
|
||||
@@ -152,6 +153,34 @@
|
||||
},
|
||||
"consoleTitle": "Slack Bot Console"
|
||||
},
|
||||
{
|
||||
"name": "MCP Server",
|
||||
"consoleName": "MCP Server",
|
||||
"type": "debugpy",
|
||||
"request": "launch",
|
||||
"module": "uvicorn",
|
||||
"cwd": "${workspaceFolder}/backend",
|
||||
"envFile": "${workspaceFolder}/.vscode/.env",
|
||||
"env": {
|
||||
"MCP_SERVER_ENABLED": "true",
|
||||
"MCP_SERVER_PORT": "8090",
|
||||
"MCP_SERVER_CORS_ORIGINS": "http://localhost:*",
|
||||
"LOG_LEVEL": "DEBUG",
|
||||
"PYTHONUNBUFFERED": "1"
|
||||
},
|
||||
"args": [
|
||||
"onyx.mcp_server.api:mcp_app",
|
||||
"--reload",
|
||||
"--port",
|
||||
"8090",
|
||||
"--timeout-graceful-shutdown",
|
||||
"0"
|
||||
],
|
||||
"presentation": {
|
||||
"group": "2"
|
||||
},
|
||||
"consoleTitle": "MCP Server Console"
|
||||
},
|
||||
{
|
||||
"name": "Celery primary",
|
||||
"type": "debugpy",
|
||||
|
||||
@@ -194,13 +194,15 @@ alembic -n schema_private upgrade head
|
||||
|
||||
### Creating Migrations
|
||||
```bash
|
||||
# Auto-generate migration
|
||||
alembic revision --autogenerate -m "description"
|
||||
# Create migration
|
||||
alembic revision -m "description"
|
||||
|
||||
# Multi-tenant migration
|
||||
alembic -n schema_private revision --autogenerate -m "description"
|
||||
alembic -n schema_private revision -m "description"
|
||||
```
|
||||
|
||||
Write the migration manually and place it in the file that alembic creates when running the above command.
|
||||
|
||||
## Testing Strategy
|
||||
|
||||
There are 4 main types of tests within Onyx:
|
||||
@@ -197,15 +197,19 @@ alembic -n schema_private upgrade head
|
||||
|
||||
### Creating Migrations
|
||||
```bash
|
||||
# Auto-generate migration
|
||||
alembic revision --autogenerate -m "description"
|
||||
# Create migration
|
||||
alembic revision -m "description"
|
||||
|
||||
# Multi-tenant migration
|
||||
alembic -n schema_private revision --autogenerate -m "description"
|
||||
alembic -n schema_private revision -m "description"
|
||||
```
|
||||
|
||||
Write the migration manually and place it in the file that alembic creates when running the above command.
|
||||
|
||||
## Testing Strategy
|
||||
|
||||
First, you must activate the virtual environment with `source .venv/bin/activate`.
|
||||
|
||||
There are 4 main types of tests within Onyx:
|
||||
|
||||
### Unit Tests
|
||||
@@ -216,7 +220,7 @@ write these for complex, isolated modules e.g. `citation_processing.py`.
|
||||
To run them:
|
||||
|
||||
```bash
|
||||
python -m dotenv -f .vscode/.env run -- pytest -xv backend/tests/unit
|
||||
pytest -xv backend/tests/unit
|
||||
```
|
||||
|
||||
### External Dependency Unit Tests
|
||||
@@ -94,6 +94,12 @@ If using PowerShell, the command slightly differs:
|
||||
|
||||
Install the required python dependencies:
|
||||
|
||||
```bash
|
||||
pip install -r backend/requirements/combined.txt
|
||||
```
|
||||
|
||||
or
|
||||
|
||||
```bash
|
||||
pip install -r backend/requirements/default.txt
|
||||
pip install -r backend/requirements/dev.txt
|
||||
@@ -122,7 +128,7 @@ Onyx uses Node v22.20.0. We highly recommend you use [Node Version Manager (nvm)
|
||||
to manage your Node installations. Once installed, you can run
|
||||
|
||||
```bash
|
||||
nvm install 22 && nvm use 22`
|
||||
nvm install 22 && nvm use 22
|
||||
node -v # verify your active version
|
||||
```
|
||||
|
||||
|
||||
35
README.md
35
README.md
@@ -1,29 +1,34 @@
|
||||
<a name="readme-top"></a>
|
||||
|
||||
<h2 align="center">
|
||||
<a href="https://www.onyx.app/"> <img width="50%" src="https://github.com/onyx-dot-app/onyx/blob/logo/OnyxLogoCropped.jpg?raw=true)" /></a>
|
||||
<a href="https://www.onyx.app/?utm_source=onyx_repo&utm_medium=github&utm_campaign=readme"> <img width="50%" src="https://github.com/onyx-dot-app/onyx/blob/logo/OnyxLogoCropped.jpg?raw=true" /></a>
|
||||
</h2>
|
||||
|
||||
<p align="center">Open Source AI Platform</p>
|
||||
|
||||
<p align="center">
|
||||
<a href="https://discord.gg/TDJ59cGV2X" target="_blank">
|
||||
<img src="https://img.shields.io/badge/discord-join-blue.svg?logo=discord&logoColor=white" alt="Discord">
|
||||
<img src="https://img.shields.io/badge/discord-join-blue.svg?logo=discord&logoColor=white" alt="Discord" />
|
||||
</a>
|
||||
<a href="https://docs.onyx.app/" target="_blank">
|
||||
<img src="https://img.shields.io/badge/docs-view-blue" alt="Documentation">
|
||||
<a href="https://docs.onyx.app/?utm_source=onyx_repo&utm_medium=github&utm_campaign=readme" target="_blank">
|
||||
<img src="https://img.shields.io/badge/docs-view-blue" alt="Documentation" />
|
||||
</a>
|
||||
<a href="https://docs.onyx.app/" target="_blank">
|
||||
<img src="https://img.shields.io/website?url=https://www.onyx.app&up_message=visit&up_color=blue" alt="Documentation">
|
||||
<a href="https://www.onyx.app/?utm_source=onyx_repo&utm_medium=github&utm_campaign=readme" target="_blank">
|
||||
<img src="https://img.shields.io/website?url=https://www.onyx.app&up_message=visit&up_color=blue" alt="Documentation" />
|
||||
</a>
|
||||
<a href="https://github.com/onyx-dot-app/onyx/blob/main/LICENSE" target="_blank">
|
||||
<img src="https://img.shields.io/static/v1?label=license&message=MIT&color=blue" alt="License">
|
||||
<img src="https://img.shields.io/static/v1?label=license&message=MIT&color=blue" alt="License" />
|
||||
</a>
|
||||
</p>
|
||||
|
||||
<p align="center">
|
||||
<a href="https://trendshift.io/repositories/12516" target="_blank">
|
||||
<img src="https://trendshift.io/api/badge/repositories/12516" alt="onyx-dot-app/onyx | Trendshift" style="width: 250px; height: 55px;" />
|
||||
</a>
|
||||
</p>
|
||||
|
||||
|
||||
**[Onyx](https://www.onyx.app/)** is a feature-rich, self-hostable Chat UI that works with any LLM. It is easy to deploy and can run in a completely airgapped environment.
|
||||
**[Onyx](https://www.onyx.app/?utm_source=onyx_repo&utm_medium=github&utm_campaign=readme)** is a feature-rich, self-hostable Chat UI that works with any LLM. It is easy to deploy and can run in a completely airgapped environment.
|
||||
|
||||
Onyx comes loaded with advanced features like Agents, Web Search, RAG, MCP, Deep Research, Connectors to 40+ knowledge sources, and more.
|
||||
|
||||
@@ -52,7 +57,7 @@ Onyx comes loaded with advanced features like Agents, Web Search, RAG, MCP, Deep
|
||||
|
||||
Onyx works with all LLMs (like OpenAI, Anthropic, Gemini, etc.) and self-hosted LLMs (like Ollama, vLLM, etc.)
|
||||
|
||||
To learn more about the features, check out our [documentation](https://docs.onyx.app/welcome)!
|
||||
To learn more about the features, check out our [documentation](https://docs.onyx.app/welcome?utm_source=onyx_repo&utm_medium=github&utm_campaign=readme)!
|
||||
|
||||
|
||||
|
||||
@@ -60,13 +65,13 @@ To learn more about the features, check out our [documentation](https://docs.ony
|
||||
Onyx supports deployments in Docker, Kubernetes, Terraform, along with guides for major cloud providers.
|
||||
|
||||
See guides below:
|
||||
- [Docker](https://docs.onyx.app/deployment/local/docker) or [Quickstart](https://docs.onyx.app/deployment/getting_started/quickstart) (best for most users)
|
||||
- [Kubernetes](https://docs.onyx.app/deployment/local/kubernetes) (best for large teams)
|
||||
- [Terraform](https://docs.onyx.app/deployment/local/terraform) (best for teams already using Terraform)
|
||||
- Cloud specific guides (best if specifically using [AWS EKS](https://docs.onyx.app/deployment/cloud/aws/eks), [Azure VMs](https://docs.onyx.app/deployment/cloud/azure), etc.)
|
||||
- [Docker](https://docs.onyx.app/deployment/local/docker?utm_source=onyx_repo&utm_medium=github&utm_campaign=readme) or [Quickstart](https://docs.onyx.app/deployment/getting_started/quickstart?utm_source=onyx_repo&utm_medium=github&utm_campaign=readme) (best for most users)
|
||||
- [Kubernetes](https://docs.onyx.app/deployment/local/kubernetes?utm_source=onyx_repo&utm_medium=github&utm_campaign=readme) (best for large teams)
|
||||
- [Terraform](https://docs.onyx.app/deployment/local/terraform?utm_source=onyx_repo&utm_medium=github&utm_campaign=readme) (best for teams already using Terraform)
|
||||
- Cloud specific guides (best if specifically using [AWS EKS](https://docs.onyx.app/deployment/cloud/aws/eks?utm_source=onyx_repo&utm_medium=github&utm_campaign=readme), [Azure VMs](https://docs.onyx.app/deployment/cloud/azure?utm_source=onyx_repo&utm_medium=github&utm_campaign=readme), etc.)
|
||||
|
||||
> [!TIP]
|
||||
> **To try Onyx for free without deploying, check out [Onyx Cloud](https://cloud.onyx.app/signup)**.
|
||||
> **To try Onyx for free without deploying, check out [Onyx Cloud](https://cloud.onyx.app/signup?utm_source=onyx_repo&utm_medium=github&utm_campaign=readme)**.
|
||||
|
||||
|
||||
|
||||
@@ -90,7 +95,7 @@ There are two editions of Onyx:
|
||||
|
||||
- Onyx Community Edition (CE) is available freely under the MIT license.
|
||||
- Onyx Enterprise Edition (EE) includes extra features that are primarily useful for larger organizations.
|
||||
For feature details, check out [our website](https://www.onyx.app/pricing).
|
||||
For feature details, check out [our website](https://www.onyx.app/pricing?utm_source=onyx_repo&utm_medium=github&utm_campaign=readme).
|
||||
|
||||
|
||||
|
||||
|
||||
@@ -7,15 +7,19 @@ have a contract or agreement with DanswerAI, you are not permitted to use the En
|
||||
Edition features outside of personal development or testing purposes. Please reach out to \
|
||||
founders@onyx.app for more information. Please visit https://github.com/onyx-dot-app/onyx"
|
||||
|
||||
# Default ONYX_VERSION, typically overriden during builds by GitHub Actions.
|
||||
ARG ONYX_VERSION=0.0.0-dev
|
||||
# DO_NOT_TRACK is used to disable telemetry for Unstructured
|
||||
ENV ONYX_VERSION=${ONYX_VERSION} \
|
||||
DANSWER_RUNNING_IN_DOCKER="true" \
|
||||
ENV DANSWER_RUNNING_IN_DOCKER="true" \
|
||||
DO_NOT_TRACK="true" \
|
||||
PLAYWRIGHT_BROWSERS_PATH="/app/.cache/ms-playwright"
|
||||
|
||||
COPY --from=ghcr.io/astral-sh/uv:latest /uv /uvx /bin/
|
||||
# Create non-root user for security best practices
|
||||
RUN groupadd -g 1001 onyx && \
|
||||
useradd -u 1001 -g onyx -m -s /bin/bash onyx && \
|
||||
mkdir -p /var/log/onyx && \
|
||||
chmod 755 /var/log/onyx && \
|
||||
chown onyx:onyx /var/log/onyx
|
||||
|
||||
COPY --from=ghcr.io/astral-sh/uv:0.9.9 /uv /uvx /bin/
|
||||
|
||||
# Install system dependencies
|
||||
# cmake needed for psycopg (postgres)
|
||||
@@ -54,6 +58,7 @@ RUN uv pip install --system --no-cache-dir --upgrade \
|
||||
pip uninstall -y py && \
|
||||
playwright install chromium && \
|
||||
playwright install-deps chromium && \
|
||||
chown -R onyx:onyx /app && \
|
||||
ln -s /usr/local/bin/supervisord /usr/bin/supervisord && \
|
||||
# Cleanup for CVEs and size reduction
|
||||
# https://github.com/tornadoweb/tornado/issues/3107
|
||||
@@ -90,16 +95,13 @@ nltk.download('stopwords', quiet=True); \
|
||||
nltk.download('punkt_tab', quiet=True);"
|
||||
# nltk.download('wordnet', quiet=True); introduce this back if lemmatization is needed
|
||||
|
||||
# Pre-downloading tiktoken for setups with limited egress
|
||||
RUN python -c "import tiktoken; \
|
||||
tiktoken.get_encoding('cl100k_base')"
|
||||
|
||||
# Set up application files
|
||||
WORKDIR /app
|
||||
|
||||
# Create non-root user for security best practices
|
||||
RUN groupadd -g 1001 onyx && \
|
||||
useradd -u 1001 -g onyx -m -s /bin/bash onyx && \
|
||||
mkdir -p /var/log/onyx && \
|
||||
chmod 755 /var/log/onyx && \
|
||||
chown onyx:onyx /var/log/onyx
|
||||
|
||||
# Enterprise Version Files
|
||||
COPY --chown=onyx:onyx ./ee /app/ee
|
||||
COPY supervisord.conf /etc/supervisor/conf.d/supervisord.conf
|
||||
@@ -124,6 +126,10 @@ COPY --chown=onyx:onyx ./assets /app/assets
|
||||
|
||||
ENV PYTHONPATH=/app
|
||||
|
||||
# Default ONYX_VERSION, typically overriden during builds by GitHub Actions.
|
||||
ARG ONYX_VERSION=0.0.0-dev
|
||||
ENV ONYX_VERSION=${ONYX_VERSION}
|
||||
|
||||
# Default command which does nothing
|
||||
# This container is used by api server and background which specify their own CMD
|
||||
CMD ["tail", "-f", "/dev/null"]
|
||||
|
||||
@@ -1,4 +1,42 @@
|
||||
FROM python:3.11.7-slim-bookworm
|
||||
# Base stage with dependencies
|
||||
FROM python:3.11.7-slim-bookworm AS base
|
||||
|
||||
ENV DANSWER_RUNNING_IN_DOCKER="true" \
|
||||
HF_HOME=/app/.cache/huggingface
|
||||
|
||||
COPY --from=ghcr.io/astral-sh/uv:0.9.9 /uv /uvx /bin/
|
||||
|
||||
RUN mkdir -p /app/.cache/huggingface
|
||||
|
||||
COPY ./requirements/model_server.txt /tmp/requirements.txt
|
||||
RUN uv pip install --system --no-cache-dir --upgrade \
|
||||
-r /tmp/requirements.txt && \
|
||||
rm -rf ~/.cache/uv /tmp/*.txt
|
||||
|
||||
# Stage for downloading tokenizers
|
||||
FROM base AS tokenizers
|
||||
RUN python -c "from transformers import AutoTokenizer; \
|
||||
AutoTokenizer.from_pretrained('distilbert-base-uncased'); \
|
||||
AutoTokenizer.from_pretrained('mixedbread-ai/mxbai-rerank-xsmall-v1');"
|
||||
|
||||
# Stage for downloading Onyx models
|
||||
FROM base AS onyx-models
|
||||
RUN python -c "from huggingface_hub import snapshot_download; \
|
||||
snapshot_download(repo_id='onyx-dot-app/hybrid-intent-token-classifier'); \
|
||||
snapshot_download(repo_id='onyx-dot-app/information-content-model');"
|
||||
|
||||
# Stage for downloading embedding and reranking models
|
||||
FROM base AS embedding-models
|
||||
RUN python -c "from huggingface_hub import snapshot_download; \
|
||||
snapshot_download('nomic-ai/nomic-embed-text-v1'); \
|
||||
snapshot_download('mixedbread-ai/mxbai-rerank-xsmall-v1');"
|
||||
|
||||
# Initialize SentenceTransformer to cache the custom architecture
|
||||
RUN python -c "from sentence_transformers import SentenceTransformer; \
|
||||
SentenceTransformer(model_name_or_path='nomic-ai/nomic-embed-text-v1', trust_remote_code=True);"
|
||||
|
||||
# Final stage - combine all downloads
|
||||
FROM base AS final
|
||||
|
||||
LABEL com.danswer.maintainer="founders@onyx.app"
|
||||
LABEL com.danswer.description="This image is for the Onyx model server which runs all of the \
|
||||
@@ -6,65 +44,19 @@ AI models for Onyx. This container and all the code is MIT Licensed and free for
|
||||
You can find it at https://hub.docker.com/r/onyx/onyx-model-server. For more details, \
|
||||
visit https://github.com/onyx-dot-app/onyx."
|
||||
|
||||
# Default ONYX_VERSION, typically overriden during builds by GitHub Actions.
|
||||
ARG ONYX_VERSION=0.0.0-dev
|
||||
ENV ONYX_VERSION=${ONYX_VERSION} \
|
||||
DANSWER_RUNNING_IN_DOCKER="true" \
|
||||
HF_HOME=/app/.cache/huggingface
|
||||
|
||||
COPY --from=ghcr.io/astral-sh/uv:latest /uv /uvx /bin/
|
||||
|
||||
# Create non-root user for security best practices
|
||||
RUN mkdir -p /app && \
|
||||
groupadd -g 1001 onyx && \
|
||||
useradd -u 1001 -g onyx -m -s /bin/bash onyx && \
|
||||
chown -R onyx:onyx /app && \
|
||||
RUN groupadd -g 1001 onyx && \
|
||||
useradd -u 1001 -g onyx -m -s /bin/bash onyx && \
|
||||
mkdir -p /var/log/onyx && \
|
||||
chmod 755 /var/log/onyx && \
|
||||
chown onyx:onyx /var/log/onyx
|
||||
|
||||
# --- add toolchain needed for Rust/Python builds (fastuuid) ---
|
||||
ENV RUSTUP_HOME=/usr/local/rustup \
|
||||
CARGO_HOME=/usr/local/cargo \
|
||||
PATH=/usr/local/cargo/bin:$PATH
|
||||
|
||||
RUN set -eux; \
|
||||
apt-get update && apt-get install -y --no-install-recommends \
|
||||
build-essential \
|
||||
pkg-config \
|
||||
curl \
|
||||
ca-certificates \
|
||||
# Install latest stable Rust (supports Cargo.lock v4)
|
||||
&& curl -sSf https://sh.rustup.rs | sh -s -- -y --profile minimal --default-toolchain stable \
|
||||
&& rustc --version && cargo --version \
|
||||
&& apt-get remove -y --allow-remove-essential perl-base \
|
||||
&& apt-get autoremove -y \
|
||||
&& rm -rf /var/lib/apt/lists/*
|
||||
|
||||
COPY ./requirements/model_server.txt /tmp/requirements.txt
|
||||
RUN uv pip install --system --no-cache-dir --upgrade \
|
||||
-r /tmp/requirements.txt && \
|
||||
rm -rf ~/.cache/uv /tmp/*.txt
|
||||
|
||||
# Pre-downloading models for setups with limited egress
|
||||
# Download tokenizers, distilbert for the Onyx model
|
||||
# Download model weights
|
||||
# Run Nomic to pull in the custom architecture and have it cached locally
|
||||
RUN python -c "from transformers import AutoTokenizer; \
|
||||
AutoTokenizer.from_pretrained('distilbert-base-uncased'); \
|
||||
AutoTokenizer.from_pretrained('mixedbread-ai/mxbai-rerank-xsmall-v1'); \
|
||||
from huggingface_hub import snapshot_download; \
|
||||
snapshot_download(repo_id='onyx-dot-app/hybrid-intent-token-classifier'); \
|
||||
snapshot_download(repo_id='onyx-dot-app/information-content-model'); \
|
||||
snapshot_download('nomic-ai/nomic-embed-text-v1'); \
|
||||
snapshot_download('mixedbread-ai/mxbai-rerank-xsmall-v1'); \
|
||||
from sentence_transformers import SentenceTransformer; \
|
||||
SentenceTransformer(model_name_or_path='nomic-ai/nomic-embed-text-v1', trust_remote_code=True);" && \
|
||||
# In case the user has volumes mounted to /app/.cache/huggingface that they've downloaded while
|
||||
# running Onyx, move the current contents of the cache folder to a temporary location to ensure
|
||||
# it's preserved in order to combine with the user's cache contents
|
||||
mv /app/.cache/huggingface /app/.cache/temp_huggingface && \
|
||||
chown -R onyx:onyx /app
|
||||
# In case the user has volumes mounted to /app/.cache/huggingface that they've downloaded while
|
||||
# running Onyx, move the current contents of the cache folder to a temporary location to ensure
|
||||
# it's preserved in order to combine with the user's cache contents
|
||||
COPY --chown=onyx:onyx --from=tokenizers /app/.cache/huggingface /app/.cache/temp_huggingface
|
||||
COPY --chown=onyx:onyx --from=onyx-models /app/.cache/huggingface /app/.cache/temp_huggingface
|
||||
COPY --chown=onyx:onyx --from=embedding-models /app/.cache/huggingface /app/.cache/temp_huggingface
|
||||
|
||||
WORKDIR /app
|
||||
|
||||
@@ -83,4 +75,8 @@ COPY ./model_server /app/model_server
|
||||
|
||||
ENV PYTHONPATH=/app
|
||||
|
||||
# Default ONYX_VERSION, typically overriden during builds by GitHub Actions.
|
||||
ARG ONYX_VERSION=0.0.0-dev
|
||||
ENV ONYX_VERSION=${ONYX_VERSION}
|
||||
|
||||
CMD ["uvicorn", "model_server.main:app", "--host", "0.0.0.0", "--port", "9000"]
|
||||
|
||||
@@ -0,0 +1,33 @@
|
||||
"""add theme_preference to user
|
||||
|
||||
Revision ID: 09995b8811eb
|
||||
Revises: 3d1cca026fe8
|
||||
Create Date: 2025-10-24 08:58:50.246949
|
||||
|
||||
"""
|
||||
|
||||
from alembic import op
|
||||
import sqlalchemy as sa
|
||||
from onyx.db.enums import ThemePreference
|
||||
|
||||
|
||||
# revision identifiers, used by Alembic.
|
||||
revision = "09995b8811eb"
|
||||
down_revision = "3d1cca026fe8"
|
||||
branch_labels = None
|
||||
depends_on = None
|
||||
|
||||
|
||||
def upgrade() -> None:
|
||||
op.add_column(
|
||||
"user",
|
||||
sa.Column(
|
||||
"theme_preference",
|
||||
sa.Enum(ThemePreference, native_enum=False),
|
||||
nullable=True,
|
||||
),
|
||||
)
|
||||
|
||||
|
||||
def downgrade() -> None:
|
||||
op.drop_column("user", "theme_preference")
|
||||
@@ -0,0 +1,89 @@
|
||||
"""add internet search and content provider tables
|
||||
|
||||
Revision ID: 1f2a3b4c5d6e
|
||||
Revises: 9drpiiw74ljy
|
||||
Create Date: 2025-11-10 19:45:00.000000
|
||||
|
||||
"""
|
||||
|
||||
from alembic import op
|
||||
import sqlalchemy as sa
|
||||
from sqlalchemy.dialects import postgresql
|
||||
|
||||
|
||||
# revision identifiers, used by Alembic.
|
||||
revision = "1f2a3b4c5d6e"
|
||||
down_revision = "9drpiiw74ljy"
|
||||
branch_labels = None
|
||||
depends_on = None
|
||||
|
||||
|
||||
def upgrade() -> None:
|
||||
op.create_table(
|
||||
"internet_search_provider",
|
||||
sa.Column("id", sa.Integer(), primary_key=True),
|
||||
sa.Column("name", sa.String(), nullable=False, unique=True),
|
||||
sa.Column("provider_type", sa.String(), nullable=False),
|
||||
sa.Column("api_key", sa.LargeBinary(), nullable=True),
|
||||
sa.Column("config", postgresql.JSONB(astext_type=sa.Text()), nullable=True),
|
||||
sa.Column(
|
||||
"is_active", sa.Boolean(), nullable=False, server_default=sa.text("false")
|
||||
),
|
||||
sa.Column(
|
||||
"time_created",
|
||||
sa.DateTime(timezone=True),
|
||||
nullable=False,
|
||||
server_default=sa.text("now()"),
|
||||
),
|
||||
sa.Column(
|
||||
"time_updated",
|
||||
sa.DateTime(timezone=True),
|
||||
nullable=False,
|
||||
server_default=sa.text("now()"),
|
||||
),
|
||||
)
|
||||
op.create_index(
|
||||
"ix_internet_search_provider_is_active",
|
||||
"internet_search_provider",
|
||||
["is_active"],
|
||||
)
|
||||
|
||||
op.create_table(
|
||||
"internet_content_provider",
|
||||
sa.Column("id", sa.Integer(), primary_key=True),
|
||||
sa.Column("name", sa.String(), nullable=False, unique=True),
|
||||
sa.Column("provider_type", sa.String(), nullable=False),
|
||||
sa.Column("api_key", sa.LargeBinary(), nullable=True),
|
||||
sa.Column("config", postgresql.JSONB(astext_type=sa.Text()), nullable=True),
|
||||
sa.Column(
|
||||
"is_active", sa.Boolean(), nullable=False, server_default=sa.text("false")
|
||||
),
|
||||
sa.Column(
|
||||
"time_created",
|
||||
sa.DateTime(timezone=True),
|
||||
nullable=False,
|
||||
server_default=sa.text("now()"),
|
||||
),
|
||||
sa.Column(
|
||||
"time_updated",
|
||||
sa.DateTime(timezone=True),
|
||||
nullable=False,
|
||||
server_default=sa.text("now()"),
|
||||
),
|
||||
)
|
||||
op.create_index(
|
||||
"ix_internet_content_provider_is_active",
|
||||
"internet_content_provider",
|
||||
["is_active"],
|
||||
)
|
||||
|
||||
|
||||
def downgrade() -> None:
|
||||
op.drop_index(
|
||||
"ix_internet_content_provider_is_active", table_name="internet_content_provider"
|
||||
)
|
||||
op.drop_table("internet_content_provider")
|
||||
op.drop_index(
|
||||
"ix_internet_search_provider_is_active", table_name="internet_search_provider"
|
||||
)
|
||||
op.drop_table("internet_search_provider")
|
||||
@@ -0,0 +1,72 @@
|
||||
"""add switchover_type field and remove background_reindex_enabled
|
||||
|
||||
Revision ID: 2acdef638fc2
|
||||
Revises: a4f23d6b71c8
|
||||
Create Date: 2025-01-XX XX:XX:XX.XXXXXX
|
||||
|
||||
"""
|
||||
|
||||
from alembic import op
|
||||
import sqlalchemy as sa
|
||||
|
||||
from onyx.db.enums import SwitchoverType
|
||||
|
||||
|
||||
# revision identifiers, used by Alembic.
|
||||
revision = "2acdef638fc2"
|
||||
down_revision = "a4f23d6b71c8"
|
||||
branch_labels = None
|
||||
depends_on = None
|
||||
|
||||
|
||||
def upgrade() -> None:
|
||||
# Add switchover_type column with default value of REINDEX
|
||||
op.add_column(
|
||||
"search_settings",
|
||||
sa.Column(
|
||||
"switchover_type",
|
||||
sa.Enum(SwitchoverType, native_enum=False),
|
||||
nullable=False,
|
||||
server_default=SwitchoverType.REINDEX.value,
|
||||
),
|
||||
)
|
||||
|
||||
# Migrate existing data: set switchover_type based on background_reindex_enabled
|
||||
# REINDEX where background_reindex_enabled=True, INSTANT where False
|
||||
op.execute(
|
||||
"""
|
||||
UPDATE search_settings
|
||||
SET switchover_type = CASE
|
||||
WHEN background_reindex_enabled = true THEN 'REINDEX'
|
||||
ELSE 'INSTANT'
|
||||
END
|
||||
"""
|
||||
)
|
||||
|
||||
# Remove the background_reindex_enabled column (replaced by switchover_type)
|
||||
op.drop_column("search_settings", "background_reindex_enabled")
|
||||
|
||||
|
||||
def downgrade() -> None:
|
||||
# Re-add the background_reindex_enabled column with default value of True
|
||||
op.add_column(
|
||||
"search_settings",
|
||||
sa.Column(
|
||||
"background_reindex_enabled",
|
||||
sa.Boolean(),
|
||||
nullable=False,
|
||||
server_default="true",
|
||||
),
|
||||
)
|
||||
# Set background_reindex_enabled based on switchover_type
|
||||
op.execute(
|
||||
"""
|
||||
UPDATE search_settings
|
||||
SET background_reindex_enabled = CASE
|
||||
WHEN switchover_type = 'INSTANT' THEN false
|
||||
ELSE true
|
||||
END
|
||||
"""
|
||||
)
|
||||
# Remove the switchover_type column
|
||||
op.drop_column("search_settings", "switchover_type")
|
||||
@@ -12,6 +12,7 @@ from alembic import op
|
||||
import sqlalchemy as sa
|
||||
from sqlalchemy import text
|
||||
import logging
|
||||
import fastapi_users_db_sqlalchemy
|
||||
|
||||
logger = logging.getLogger("alembic.runtime.migration")
|
||||
|
||||
@@ -58,6 +59,9 @@ def upgrade() -> None:
|
||||
logger.info("Dropping chat_session.folder_id...")
|
||||
|
||||
# Drop foreign key constraint first
|
||||
op.execute(
|
||||
"ALTER TABLE chat_session DROP CONSTRAINT IF EXISTS chat_session_chat_folder_fk"
|
||||
)
|
||||
op.execute(
|
||||
"ALTER TABLE chat_session DROP CONSTRAINT IF EXISTS chat_session_folder_fk"
|
||||
)
|
||||
@@ -172,20 +176,6 @@ def downgrade() -> None:
|
||||
"user_file", sa.Column("folder_id", sa.Integer(), nullable=True)
|
||||
)
|
||||
|
||||
# Recreate chat_folder table
|
||||
if "chat_folder" not in inspector.get_table_names():
|
||||
op.create_table(
|
||||
"chat_folder",
|
||||
sa.Column("id", sa.Integer(), nullable=False),
|
||||
sa.Column("user_id", sa.UUID(), nullable=False),
|
||||
sa.Column("name", sa.String(), nullable=False),
|
||||
sa.Column("created_at", sa.DateTime(timezone=True), nullable=False),
|
||||
sa.PrimaryKeyConstraint("id"),
|
||||
sa.ForeignKeyConstraint(
|
||||
["user_id"], ["user.id"], name="chat_folder_user_fk"
|
||||
),
|
||||
)
|
||||
|
||||
# Recreate persona__user_folder table
|
||||
if "persona__user_folder" not in inspector.get_table_names():
|
||||
op.create_table(
|
||||
@@ -197,6 +187,26 @@ def downgrade() -> None:
|
||||
sa.ForeignKeyConstraint(["user_folder_id"], ["user_project.id"]),
|
||||
)
|
||||
|
||||
# Recreate chat_folder table and related structures
|
||||
if "chat_folder" not in inspector.get_table_names():
|
||||
op.create_table(
|
||||
"chat_folder",
|
||||
sa.Column("id", sa.Integer(), nullable=False),
|
||||
sa.Column(
|
||||
"user_id",
|
||||
fastapi_users_db_sqlalchemy.generics.GUID(),
|
||||
nullable=True,
|
||||
),
|
||||
sa.Column("name", sa.String(), nullable=True),
|
||||
sa.Column("display_priority", sa.Integer(), nullable=False),
|
||||
sa.ForeignKeyConstraint(
|
||||
["user_id"],
|
||||
["user.id"],
|
||||
name="chat_folder_user_id_fkey",
|
||||
),
|
||||
sa.PrimaryKeyConstraint("id"),
|
||||
)
|
||||
|
||||
# Add folder_id back to chat_session
|
||||
if "chat_session" in inspector.get_table_names():
|
||||
columns = [col["name"] for col in inspector.get_columns("chat_session")]
|
||||
@@ -208,7 +218,7 @@ def downgrade() -> None:
|
||||
# Add foreign key if chat_folder exists
|
||||
if "chat_folder" in inspector.get_table_names():
|
||||
op.create_foreign_key(
|
||||
"chat_session_folder_fk",
|
||||
"chat_session_chat_folder_fk",
|
||||
"chat_session",
|
||||
"chat_folder",
|
||||
["folder_id"],
|
||||
|
||||
@@ -292,7 +292,7 @@ def downgrade() -> None:
|
||||
logger.error("CRITICAL: Downgrading data cleanup cannot restore deleted data!")
|
||||
logger.error("Data restoration requires backup files or database backup.")
|
||||
|
||||
raise NotImplementedError(
|
||||
"Downgrade of legacy data cleanup is not supported. "
|
||||
"Deleted data must be restored from backups."
|
||||
)
|
||||
# raise NotImplementedError(
|
||||
# "Downgrade of legacy data cleanup is not supported. "
|
||||
# "Deleted data must be restored from backups."
|
||||
# )
|
||||
|
||||
@@ -0,0 +1,89 @@
|
||||
"""seed_exa_provider_from_env
|
||||
|
||||
Revision ID: 3c9a65f1207f
|
||||
Revises: 1f2a3b4c5d6e
|
||||
Create Date: 2025-11-20 19:18:00.000000
|
||||
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import os
|
||||
|
||||
from alembic import op
|
||||
import sqlalchemy as sa
|
||||
from sqlalchemy.dialects import postgresql
|
||||
from dotenv import load_dotenv, find_dotenv
|
||||
|
||||
from onyx.utils.encryption import encrypt_string_to_bytes
|
||||
|
||||
revision = "3c9a65f1207f"
|
||||
down_revision = "1f2a3b4c5d6e"
|
||||
branch_labels = None
|
||||
depends_on = None
|
||||
|
||||
|
||||
EXA_PROVIDER_NAME = "Exa"
|
||||
|
||||
|
||||
def _get_internet_search_table(metadata: sa.MetaData) -> sa.Table:
|
||||
return sa.Table(
|
||||
"internet_search_provider",
|
||||
metadata,
|
||||
sa.Column("id", sa.Integer, primary_key=True),
|
||||
sa.Column("name", sa.String),
|
||||
sa.Column("provider_type", sa.String),
|
||||
sa.Column("api_key", sa.LargeBinary),
|
||||
sa.Column("config", postgresql.JSONB),
|
||||
sa.Column("is_active", sa.Boolean),
|
||||
sa.Column(
|
||||
"time_created",
|
||||
sa.DateTime(timezone=True),
|
||||
nullable=False,
|
||||
server_default=sa.text("now()"),
|
||||
),
|
||||
sa.Column(
|
||||
"time_updated",
|
||||
sa.DateTime(timezone=True),
|
||||
nullable=False,
|
||||
server_default=sa.text("now()"),
|
||||
),
|
||||
)
|
||||
|
||||
|
||||
def upgrade() -> None:
|
||||
load_dotenv(find_dotenv())
|
||||
|
||||
exa_api_key = os.environ.get("EXA_API_KEY")
|
||||
if not exa_api_key:
|
||||
return
|
||||
|
||||
bind = op.get_bind()
|
||||
metadata = sa.MetaData()
|
||||
table = _get_internet_search_table(metadata)
|
||||
|
||||
existing = bind.execute(
|
||||
sa.select(table.c.id).where(table.c.name == EXA_PROVIDER_NAME)
|
||||
).first()
|
||||
if existing:
|
||||
return
|
||||
|
||||
encrypted_key = encrypt_string_to_bytes(exa_api_key)
|
||||
|
||||
has_active_provider = bind.execute(
|
||||
sa.select(table.c.id).where(table.c.is_active.is_(True))
|
||||
).first()
|
||||
|
||||
bind.execute(
|
||||
table.insert().values(
|
||||
name=EXA_PROVIDER_NAME,
|
||||
provider_type="exa",
|
||||
api_key=encrypted_key,
|
||||
config=None,
|
||||
is_active=not bool(has_active_provider),
|
||||
)
|
||||
)
|
||||
|
||||
|
||||
def downgrade() -> None:
|
||||
return
|
||||
@@ -0,0 +1,121 @@
|
||||
"""add_oauth_config_and_user_tokens
|
||||
|
||||
Revision ID: 3d1cca026fe8
|
||||
Revises: c8a93a2af083
|
||||
Create Date: 2025-10-21 13:27:34.274721
|
||||
|
||||
"""
|
||||
|
||||
from alembic import op
|
||||
import fastapi_users_db_sqlalchemy
|
||||
import sqlalchemy as sa
|
||||
from sqlalchemy.dialects import postgresql
|
||||
|
||||
# revision identifiers, used by Alembic.
|
||||
revision = "3d1cca026fe8"
|
||||
down_revision = "c8a93a2af083"
|
||||
branch_labels = None
|
||||
depends_on = None
|
||||
|
||||
|
||||
def upgrade() -> None:
|
||||
# Create oauth_config table
|
||||
op.create_table(
|
||||
"oauth_config",
|
||||
sa.Column("id", sa.Integer(), nullable=False),
|
||||
sa.Column("name", sa.String(), nullable=False),
|
||||
sa.Column("authorization_url", sa.Text(), nullable=False),
|
||||
sa.Column("token_url", sa.Text(), nullable=False),
|
||||
sa.Column("client_id", sa.LargeBinary(), nullable=False),
|
||||
sa.Column("client_secret", sa.LargeBinary(), nullable=False),
|
||||
sa.Column("scopes", postgresql.JSONB(astext_type=sa.Text()), nullable=True),
|
||||
sa.Column(
|
||||
"additional_params",
|
||||
postgresql.JSONB(astext_type=sa.Text()),
|
||||
nullable=True,
|
||||
),
|
||||
sa.Column(
|
||||
"created_at",
|
||||
sa.DateTime(timezone=True),
|
||||
server_default=sa.text("now()"),
|
||||
nullable=False,
|
||||
),
|
||||
sa.Column(
|
||||
"updated_at",
|
||||
sa.DateTime(timezone=True),
|
||||
server_default=sa.text("now()"),
|
||||
nullable=False,
|
||||
),
|
||||
sa.PrimaryKeyConstraint("id"),
|
||||
sa.UniqueConstraint("name"),
|
||||
)
|
||||
|
||||
# Create oauth_user_token table
|
||||
op.create_table(
|
||||
"oauth_user_token",
|
||||
sa.Column("id", sa.Integer(), nullable=False),
|
||||
sa.Column("oauth_config_id", sa.Integer(), nullable=False),
|
||||
sa.Column(
|
||||
"user_id",
|
||||
fastapi_users_db_sqlalchemy.generics.GUID(),
|
||||
nullable=False,
|
||||
),
|
||||
sa.Column("token_data", sa.LargeBinary(), nullable=False),
|
||||
sa.Column(
|
||||
"created_at",
|
||||
sa.DateTime(timezone=True),
|
||||
server_default=sa.text("now()"),
|
||||
nullable=False,
|
||||
),
|
||||
sa.Column(
|
||||
"updated_at",
|
||||
sa.DateTime(timezone=True),
|
||||
server_default=sa.text("now()"),
|
||||
nullable=False,
|
||||
),
|
||||
sa.ForeignKeyConstraint(
|
||||
["oauth_config_id"], ["oauth_config.id"], ondelete="CASCADE"
|
||||
),
|
||||
sa.ForeignKeyConstraint(["user_id"], ["user.id"], ondelete="CASCADE"),
|
||||
sa.PrimaryKeyConstraint("id"),
|
||||
sa.UniqueConstraint("oauth_config_id", "user_id", name="uq_oauth_user_token"),
|
||||
)
|
||||
|
||||
# Create index on user_id for efficient user-based token lookups
|
||||
# Note: unique constraint on (oauth_config_id, user_id) already creates
|
||||
# an index for config-based lookups
|
||||
op.create_index(
|
||||
"ix_oauth_user_token_user_id",
|
||||
"oauth_user_token",
|
||||
["user_id"],
|
||||
)
|
||||
|
||||
# Add oauth_config_id column to tool table
|
||||
op.add_column("tool", sa.Column("oauth_config_id", sa.Integer(), nullable=True))
|
||||
|
||||
# Create foreign key from tool to oauth_config
|
||||
op.create_foreign_key(
|
||||
"tool_oauth_config_fk",
|
||||
"tool",
|
||||
"oauth_config",
|
||||
["oauth_config_id"],
|
||||
["id"],
|
||||
ondelete="SET NULL",
|
||||
)
|
||||
|
||||
|
||||
def downgrade() -> None:
|
||||
# Drop foreign key from tool to oauth_config
|
||||
op.drop_constraint("tool_oauth_config_fk", "tool", type_="foreignkey")
|
||||
|
||||
# Drop oauth_config_id column from tool table
|
||||
op.drop_column("tool", "oauth_config_id")
|
||||
|
||||
# Drop index on user_id
|
||||
op.drop_index("ix_oauth_user_token_user_id", table_name="oauth_user_token")
|
||||
|
||||
# Drop oauth_user_token table (will cascade delete tokens)
|
||||
op.drop_table("oauth_user_token")
|
||||
|
||||
# Drop oauth_config table
|
||||
op.drop_table("oauth_config")
|
||||
104
backend/alembic/versions/4f8a2b3c1d9e_add_open_url_tool.py
Normal file
104
backend/alembic/versions/4f8a2b3c1d9e_add_open_url_tool.py
Normal file
@@ -0,0 +1,104 @@
|
||||
"""add_open_url_tool
|
||||
|
||||
Revision ID: 4f8a2b3c1d9e
|
||||
Revises: a852cbe15577
|
||||
Create Date: 2025-11-24 12:00:00.000000
|
||||
|
||||
"""
|
||||
|
||||
from alembic import op
|
||||
import sqlalchemy as sa
|
||||
|
||||
|
||||
# revision identifiers, used by Alembic.
|
||||
revision = "4f8a2b3c1d9e"
|
||||
down_revision = "a852cbe15577"
|
||||
branch_labels = None
|
||||
depends_on = None
|
||||
|
||||
|
||||
OPEN_URL_TOOL = {
|
||||
"name": "OpenURLTool",
|
||||
"display_name": "Open URL",
|
||||
"description": (
|
||||
"The Open URL Action allows the agent to fetch and read contents of web pages."
|
||||
),
|
||||
"in_code_tool_id": "OpenURLTool",
|
||||
"enabled": True,
|
||||
}
|
||||
|
||||
|
||||
def upgrade() -> None:
|
||||
conn = op.get_bind()
|
||||
|
||||
# Check if tool already exists
|
||||
existing = conn.execute(
|
||||
sa.text("SELECT id FROM tool WHERE in_code_tool_id = :in_code_tool_id"),
|
||||
{"in_code_tool_id": OPEN_URL_TOOL["in_code_tool_id"]},
|
||||
).fetchone()
|
||||
|
||||
if existing:
|
||||
tool_id = existing[0]
|
||||
# Update existing tool
|
||||
conn.execute(
|
||||
sa.text(
|
||||
"""
|
||||
UPDATE tool
|
||||
SET name = :name,
|
||||
display_name = :display_name,
|
||||
description = :description
|
||||
WHERE in_code_tool_id = :in_code_tool_id
|
||||
"""
|
||||
),
|
||||
OPEN_URL_TOOL,
|
||||
)
|
||||
else:
|
||||
# Insert new tool
|
||||
conn.execute(
|
||||
sa.text(
|
||||
"""
|
||||
INSERT INTO tool (name, display_name, description, in_code_tool_id, enabled)
|
||||
VALUES (:name, :display_name, :description, :in_code_tool_id, :enabled)
|
||||
"""
|
||||
),
|
||||
OPEN_URL_TOOL,
|
||||
)
|
||||
# Get the newly inserted tool's id
|
||||
result = conn.execute(
|
||||
sa.text("SELECT id FROM tool WHERE in_code_tool_id = :in_code_tool_id"),
|
||||
{"in_code_tool_id": OPEN_URL_TOOL["in_code_tool_id"]},
|
||||
).fetchone()
|
||||
tool_id = result[0] # type: ignore
|
||||
|
||||
# Associate the tool with all existing personas
|
||||
# Get all persona IDs
|
||||
persona_ids = conn.execute(sa.text("SELECT id FROM persona")).fetchall()
|
||||
|
||||
for (persona_id,) in persona_ids:
|
||||
# Check if association already exists
|
||||
exists = conn.execute(
|
||||
sa.text(
|
||||
"""
|
||||
SELECT 1 FROM persona__tool
|
||||
WHERE persona_id = :persona_id AND tool_id = :tool_id
|
||||
"""
|
||||
),
|
||||
{"persona_id": persona_id, "tool_id": tool_id},
|
||||
).fetchone()
|
||||
|
||||
if not exists:
|
||||
conn.execute(
|
||||
sa.text(
|
||||
"""
|
||||
INSERT INTO persona__tool (persona_id, tool_id)
|
||||
VALUES (:persona_id, :tool_id)
|
||||
"""
|
||||
),
|
||||
{"persona_id": persona_id, "tool_id": tool_id},
|
||||
)
|
||||
|
||||
|
||||
def downgrade() -> None:
|
||||
# We don't remove the tool on downgrade since it's fine to have it around.
|
||||
# If we upgrade again, it will be a no-op.
|
||||
pass
|
||||
@@ -0,0 +1,88 @@
|
||||
"""add_personal_access_token_table
|
||||
|
||||
Revision ID: 5e1c073d48a3
|
||||
Revises: 09995b8811eb
|
||||
Create Date: 2025-10-30 17:30:24.308521
|
||||
|
||||
"""
|
||||
|
||||
from alembic import op
|
||||
import sqlalchemy as sa
|
||||
from sqlalchemy.dialects import postgresql
|
||||
|
||||
|
||||
# revision identifiers, used by Alembic.
|
||||
revision = "5e1c073d48a3"
|
||||
down_revision = "09995b8811eb"
|
||||
branch_labels = None
|
||||
depends_on = None
|
||||
|
||||
|
||||
def upgrade() -> None:
|
||||
# Create personal_access_token table
|
||||
op.create_table(
|
||||
"personal_access_token",
|
||||
sa.Column("id", sa.Integer(), nullable=False),
|
||||
sa.Column("name", sa.String(), nullable=False),
|
||||
sa.Column("hashed_token", sa.String(length=64), nullable=False),
|
||||
sa.Column("token_display", sa.String(), nullable=False),
|
||||
sa.Column(
|
||||
"user_id",
|
||||
postgresql.UUID(as_uuid=True),
|
||||
nullable=False,
|
||||
),
|
||||
sa.Column(
|
||||
"expires_at",
|
||||
sa.DateTime(timezone=True),
|
||||
nullable=True,
|
||||
),
|
||||
sa.Column(
|
||||
"created_at",
|
||||
sa.DateTime(timezone=True),
|
||||
server_default=sa.text("now()"),
|
||||
nullable=False,
|
||||
),
|
||||
sa.Column(
|
||||
"last_used_at",
|
||||
sa.DateTime(timezone=True),
|
||||
nullable=True,
|
||||
),
|
||||
sa.Column(
|
||||
"is_revoked",
|
||||
sa.Boolean(),
|
||||
server_default=sa.text("false"),
|
||||
nullable=False,
|
||||
),
|
||||
sa.ForeignKeyConstraint(
|
||||
["user_id"],
|
||||
["user.id"],
|
||||
ondelete="CASCADE",
|
||||
),
|
||||
sa.PrimaryKeyConstraint("id"),
|
||||
sa.UniqueConstraint("hashed_token"),
|
||||
)
|
||||
|
||||
# Create indexes
|
||||
op.create_index(
|
||||
"ix_personal_access_token_expires_at",
|
||||
"personal_access_token",
|
||||
["expires_at"],
|
||||
unique=False,
|
||||
)
|
||||
op.create_index(
|
||||
"ix_pat_user_created",
|
||||
"personal_access_token",
|
||||
["user_id", sa.text("created_at DESC")],
|
||||
unique=False,
|
||||
)
|
||||
|
||||
|
||||
def downgrade() -> None:
|
||||
# Drop indexes first
|
||||
op.drop_index("ix_pat_user_created", table_name="personal_access_token")
|
||||
op.drop_index(
|
||||
"ix_personal_access_token_expires_at", table_name="personal_access_token"
|
||||
)
|
||||
|
||||
# Drop table
|
||||
op.drop_table("personal_access_token")
|
||||
@@ -0,0 +1,44 @@
|
||||
"""add_created_at_in_project_userfile
|
||||
|
||||
Revision ID: 6436661d5b65
|
||||
Revises: c7e9f4a3b2d1
|
||||
Create Date: 2025-11-24 11:50:24.536052
|
||||
|
||||
"""
|
||||
|
||||
from alembic import op
|
||||
import sqlalchemy as sa
|
||||
|
||||
# revision identifiers, used by Alembic.
|
||||
revision = "6436661d5b65"
|
||||
down_revision = "c7e9f4a3b2d1"
|
||||
branch_labels = None
|
||||
depends_on = None
|
||||
|
||||
|
||||
def upgrade() -> None:
|
||||
# Add created_at column to project__user_file table
|
||||
op.add_column(
|
||||
"project__user_file",
|
||||
sa.Column(
|
||||
"created_at",
|
||||
sa.DateTime(timezone=True),
|
||||
server_default=sa.text("now()"),
|
||||
nullable=False,
|
||||
),
|
||||
)
|
||||
# Add composite index on (project_id, created_at DESC)
|
||||
op.create_index(
|
||||
"ix_project__user_file_project_id_created_at",
|
||||
"project__user_file",
|
||||
["project_id", sa.text("created_at DESC")],
|
||||
)
|
||||
|
||||
|
||||
def downgrade() -> None:
|
||||
# Remove composite index on (project_id, created_at)
|
||||
op.drop_index(
|
||||
"ix_project__user_file_project_id_created_at", table_name="project__user_file"
|
||||
)
|
||||
# Remove created_at column from project__user_file table
|
||||
op.drop_column("project__user_file", "created_at")
|
||||
@@ -45,8 +45,23 @@ def upgrade() -> None:
|
||||
|
||||
|
||||
def downgrade() -> None:
|
||||
op.drop_constraint(
|
||||
"chat_session_chat_folder_fk", "chat_session", type_="foreignkey"
|
||||
)
|
||||
op.drop_column("chat_session", "folder_id")
|
||||
op.drop_table("chat_folder")
|
||||
bind = op.get_bind()
|
||||
inspector = sa.inspect(bind)
|
||||
|
||||
if "chat_session" in inspector.get_table_names():
|
||||
chat_session_fks = {
|
||||
fk.get("name") for fk in inspector.get_foreign_keys("chat_session")
|
||||
}
|
||||
if "chat_session_chat_folder_fk" in chat_session_fks:
|
||||
op.drop_constraint(
|
||||
"chat_session_chat_folder_fk", "chat_session", type_="foreignkey"
|
||||
)
|
||||
|
||||
chat_session_columns = {
|
||||
col["name"] for col in inspector.get_columns("chat_session")
|
||||
}
|
||||
if "folder_id" in chat_session_columns:
|
||||
op.drop_column("chat_session", "folder_id")
|
||||
|
||||
if "chat_folder" in inspector.get_table_names():
|
||||
op.drop_table("chat_folder")
|
||||
|
||||
@@ -180,14 +180,162 @@ def downgrade() -> None:
|
||||
)
|
||||
logger.error("Only proceed if absolutely necessary and have backups.")
|
||||
|
||||
# The downgrade would need to:
|
||||
# 1. Add back integer columns
|
||||
# 2. Generate new sequential IDs
|
||||
# 3. Update all foreign key references
|
||||
# 4. Swap primary keys back
|
||||
# This is complex and risky, so we raise an error instead
|
||||
bind = op.get_bind()
|
||||
inspector = sa.inspect(bind)
|
||||
|
||||
raise NotImplementedError(
|
||||
"Downgrade of UUID primary key swap is not supported due to data loss risk. "
|
||||
"Manual intervention with data backup/restore is required."
|
||||
# Capture existing primary key definitions so we can restore them after swaps
|
||||
persona_pk = inspector.get_pk_constraint("persona__user_file") or {}
|
||||
persona_pk_name = persona_pk.get("name")
|
||||
persona_pk_cols = persona_pk.get("constrained_columns") or []
|
||||
|
||||
project_pk = inspector.get_pk_constraint("project__user_file") or {}
|
||||
project_pk_name = project_pk.get("name")
|
||||
project_pk_cols = project_pk.get("constrained_columns") or []
|
||||
|
||||
# Drop foreign keys that reference the UUID primary key
|
||||
op.drop_constraint(
|
||||
"persona__user_file_user_file_id_fkey",
|
||||
"persona__user_file",
|
||||
type_="foreignkey",
|
||||
)
|
||||
op.drop_constraint(
|
||||
"fk_project__user_file_user_file_id",
|
||||
"project__user_file",
|
||||
type_="foreignkey",
|
||||
)
|
||||
|
||||
# Drop primary keys that rely on the UUID column so we can replace it
|
||||
if persona_pk_name:
|
||||
op.drop_constraint(persona_pk_name, "persona__user_file", type_="primary")
|
||||
if project_pk_name:
|
||||
op.drop_constraint(project_pk_name, "project__user_file", type_="primary")
|
||||
|
||||
# Rebuild integer IDs on user_file using a sequence-backed column
|
||||
op.execute("CREATE SEQUENCE IF NOT EXISTS user_file_id_seq")
|
||||
op.add_column(
|
||||
"user_file",
|
||||
sa.Column(
|
||||
"id_int",
|
||||
sa.Integer(),
|
||||
server_default=sa.text("nextval('user_file_id_seq')"),
|
||||
nullable=False,
|
||||
),
|
||||
)
|
||||
op.execute("ALTER SEQUENCE user_file_id_seq OWNED BY user_file.id_int")
|
||||
|
||||
# Prepare integer foreign key columns on referencing tables
|
||||
op.add_column(
|
||||
"persona__user_file",
|
||||
sa.Column("user_file_id_int", sa.Integer(), nullable=True),
|
||||
)
|
||||
op.add_column(
|
||||
"project__user_file",
|
||||
sa.Column("user_file_id_int", sa.Integer(), nullable=True),
|
||||
)
|
||||
|
||||
# Populate the new integer foreign key columns by mapping from the UUID IDs
|
||||
op.execute(
|
||||
"""
|
||||
UPDATE persona__user_file AS p
|
||||
SET user_file_id_int = uf.id_int
|
||||
FROM user_file AS uf
|
||||
WHERE p.user_file_id = uf.id
|
||||
"""
|
||||
)
|
||||
op.execute(
|
||||
"""
|
||||
UPDATE project__user_file AS p
|
||||
SET user_file_id_int = uf.id_int
|
||||
FROM user_file AS uf
|
||||
WHERE p.user_file_id = uf.id
|
||||
"""
|
||||
)
|
||||
|
||||
op.alter_column(
|
||||
"persona__user_file",
|
||||
"user_file_id_int",
|
||||
existing_type=sa.Integer(),
|
||||
nullable=False,
|
||||
)
|
||||
op.alter_column(
|
||||
"project__user_file",
|
||||
"user_file_id_int",
|
||||
existing_type=sa.Integer(),
|
||||
nullable=False,
|
||||
)
|
||||
|
||||
# Remove the UUID foreign key columns and rename the integer replacements
|
||||
op.drop_column("persona__user_file", "user_file_id")
|
||||
op.alter_column(
|
||||
"persona__user_file",
|
||||
"user_file_id_int",
|
||||
new_column_name="user_file_id",
|
||||
existing_type=sa.Integer(),
|
||||
nullable=False,
|
||||
)
|
||||
|
||||
op.drop_column("project__user_file", "user_file_id")
|
||||
op.alter_column(
|
||||
"project__user_file",
|
||||
"user_file_id_int",
|
||||
new_column_name="user_file_id",
|
||||
existing_type=sa.Integer(),
|
||||
nullable=False,
|
||||
)
|
||||
|
||||
# Swap the user_file primary key back to the integer column
|
||||
op.drop_constraint("user_file_pkey", "user_file", type_="primary")
|
||||
op.drop_column("user_file", "id")
|
||||
op.alter_column(
|
||||
"user_file",
|
||||
"id_int",
|
||||
new_column_name="id",
|
||||
existing_type=sa.Integer(),
|
||||
)
|
||||
op.alter_column(
|
||||
"user_file",
|
||||
"id",
|
||||
existing_type=sa.Integer(),
|
||||
nullable=False,
|
||||
server_default=sa.text("nextval('user_file_id_seq')"),
|
||||
)
|
||||
op.execute("ALTER SEQUENCE user_file_id_seq OWNED BY user_file.id")
|
||||
op.execute(
|
||||
"""
|
||||
SELECT setval(
|
||||
'user_file_id_seq',
|
||||
GREATEST(COALESCE(MAX(id), 1), 1),
|
||||
MAX(id) IS NOT NULL
|
||||
)
|
||||
FROM user_file
|
||||
"""
|
||||
)
|
||||
op.create_primary_key("user_file_pkey", "user_file", ["id"])
|
||||
|
||||
# Restore primary keys on referencing tables
|
||||
if persona_pk_cols:
|
||||
op.create_primary_key(
|
||||
"persona__user_file_pkey", "persona__user_file", persona_pk_cols
|
||||
)
|
||||
if project_pk_cols:
|
||||
op.create_primary_key(
|
||||
"project__user_file_pkey",
|
||||
"project__user_file",
|
||||
project_pk_cols,
|
||||
)
|
||||
|
||||
# Recreate foreign keys pointing at the integer primary key
|
||||
op.create_foreign_key(
|
||||
"persona__user_file_user_file_id_fkey",
|
||||
"persona__user_file",
|
||||
"user_file",
|
||||
["user_file_id"],
|
||||
["id"],
|
||||
)
|
||||
op.create_foreign_key(
|
||||
"fk_project__user_file_user_file_id",
|
||||
"project__user_file",
|
||||
"user_file",
|
||||
["user_file_id"],
|
||||
["id"],
|
||||
)
|
||||
|
||||
@@ -181,12 +181,21 @@ def upgrade() -> None:
|
||||
sa.Column("user_file_id", psql.UUID(as_uuid=True), nullable=False),
|
||||
sa.PrimaryKeyConstraint("project_id", "user_file_id"),
|
||||
)
|
||||
logger.info("Created project__user_file table")
|
||||
|
||||
# Only create the index if it doesn't exist
|
||||
existing_indexes = [
|
||||
ix["name"] for ix in inspector.get_indexes("project__user_file")
|
||||
]
|
||||
if "idx_project__user_file_user_file_id" not in existing_indexes:
|
||||
op.create_index(
|
||||
"idx_project__user_file_user_file_id",
|
||||
"project__user_file",
|
||||
["user_file_id"],
|
||||
)
|
||||
logger.info("Created project__user_file table")
|
||||
logger.info(
|
||||
"Created index idx_project__user_file_user_file_id on project__user_file"
|
||||
)
|
||||
|
||||
logger.info("Migration 1 (schema additions) completed successfully")
|
||||
|
||||
@@ -201,7 +210,7 @@ def downgrade() -> None:
|
||||
|
||||
# Drop project__user_file table
|
||||
if "project__user_file" in inspector.get_table_names():
|
||||
op.drop_index("idx_project__user_file_user_file_id", "project__user_file")
|
||||
# op.drop_index("idx_project__user_file_user_file_id", "project__user_file")
|
||||
op.drop_table("project__user_file")
|
||||
logger.info("Dropped project__user_file table")
|
||||
|
||||
|
||||
@@ -0,0 +1,97 @@
|
||||
"""add config to federated_connector
|
||||
|
||||
Revision ID: 9drpiiw74ljy
|
||||
Revises: 2acdef638fc2
|
||||
Create Date: 2025-11-03 12:00:00.000000
|
||||
|
||||
"""
|
||||
|
||||
from alembic import op
|
||||
import sqlalchemy as sa
|
||||
from sqlalchemy.dialects import postgresql
|
||||
|
||||
# revision identifiers, used by Alembic.
|
||||
revision = "9drpiiw74ljy"
|
||||
down_revision = "2acdef638fc2"
|
||||
branch_labels = None
|
||||
depends_on = None
|
||||
|
||||
|
||||
def upgrade() -> None:
|
||||
connection = op.get_bind()
|
||||
|
||||
# Check if column already exists in current schema
|
||||
result = connection.execute(
|
||||
sa.text(
|
||||
"""
|
||||
SELECT column_name
|
||||
FROM information_schema.columns
|
||||
WHERE table_schema = current_schema()
|
||||
AND table_name = 'federated_connector'
|
||||
AND column_name = 'config'
|
||||
"""
|
||||
)
|
||||
)
|
||||
column_exists = result.fetchone() is not None
|
||||
|
||||
# Add config column with default empty object (only if it doesn't exist)
|
||||
if not column_exists:
|
||||
op.add_column(
|
||||
"federated_connector",
|
||||
sa.Column(
|
||||
"config", postgresql.JSONB(), nullable=False, server_default="{}"
|
||||
),
|
||||
)
|
||||
|
||||
# Data migration: Single bulk update for all Slack connectors
|
||||
connection.execute(
|
||||
sa.text(
|
||||
"""
|
||||
WITH connector_configs AS (
|
||||
SELECT
|
||||
fc.id as connector_id,
|
||||
CASE
|
||||
WHEN fcds.entities->'channels' IS NOT NULL
|
||||
AND jsonb_typeof(fcds.entities->'channels') = 'array'
|
||||
AND jsonb_array_length(fcds.entities->'channels') > 0
|
||||
THEN
|
||||
jsonb_build_object(
|
||||
'channels', fcds.entities->'channels',
|
||||
'search_all_channels', false
|
||||
) ||
|
||||
CASE
|
||||
WHEN fcds.entities->'include_dm' IS NOT NULL
|
||||
THEN jsonb_build_object('include_dm', fcds.entities->'include_dm')
|
||||
ELSE '{}'::jsonb
|
||||
END
|
||||
ELSE
|
||||
jsonb_build_object('search_all_channels', true) ||
|
||||
CASE
|
||||
WHEN fcds.entities->'include_dm' IS NOT NULL
|
||||
THEN jsonb_build_object('include_dm', fcds.entities->'include_dm')
|
||||
ELSE '{}'::jsonb
|
||||
END
|
||||
END as config
|
||||
FROM federated_connector fc
|
||||
LEFT JOIN LATERAL (
|
||||
SELECT entities
|
||||
FROM federated_connector__document_set
|
||||
WHERE federated_connector_id = fc.id
|
||||
AND entities IS NOT NULL
|
||||
ORDER BY id
|
||||
LIMIT 1
|
||||
) fcds ON true
|
||||
WHERE fc.source = 'FEDERATED_SLACK'
|
||||
AND fcds.entities IS NOT NULL
|
||||
)
|
||||
UPDATE federated_connector fc
|
||||
SET config = cc.config
|
||||
FROM connector_configs cc
|
||||
WHERE fc.id = cc.connector_id
|
||||
"""
|
||||
)
|
||||
)
|
||||
|
||||
|
||||
def downgrade() -> None:
|
||||
op.drop_column("federated_connector", "config")
|
||||
@@ -0,0 +1,61 @@
|
||||
"""add llm provider persona restrictions
|
||||
|
||||
Revision ID: a4f23d6b71c8
|
||||
Revises: 5e1c073d48a3
|
||||
Create Date: 2025-10-21 00:00:00.000000
|
||||
|
||||
"""
|
||||
|
||||
from alembic import op
|
||||
import sqlalchemy as sa
|
||||
|
||||
|
||||
# revision identifiers, used by Alembic.
|
||||
revision = "a4f23d6b71c8"
|
||||
down_revision = "5e1c073d48a3"
|
||||
branch_labels = None
|
||||
depends_on = None
|
||||
|
||||
|
||||
def upgrade() -> None:
|
||||
op.create_table(
|
||||
"llm_provider__persona",
|
||||
sa.Column("llm_provider_id", sa.Integer(), nullable=False),
|
||||
sa.Column("persona_id", sa.Integer(), nullable=False),
|
||||
sa.ForeignKeyConstraint(
|
||||
["llm_provider_id"], ["llm_provider.id"], ondelete="CASCADE"
|
||||
),
|
||||
sa.ForeignKeyConstraint(["persona_id"], ["persona.id"], ondelete="CASCADE"),
|
||||
sa.PrimaryKeyConstraint("llm_provider_id", "persona_id"),
|
||||
)
|
||||
op.create_index(
|
||||
"ix_llm_provider__persona_llm_provider_id",
|
||||
"llm_provider__persona",
|
||||
["llm_provider_id"],
|
||||
)
|
||||
op.create_index(
|
||||
"ix_llm_provider__persona_persona_id",
|
||||
"llm_provider__persona",
|
||||
["persona_id"],
|
||||
)
|
||||
op.create_index(
|
||||
"ix_llm_provider__persona_composite",
|
||||
"llm_provider__persona",
|
||||
["persona_id", "llm_provider_id"],
|
||||
)
|
||||
|
||||
|
||||
def downgrade() -> None:
|
||||
op.drop_index(
|
||||
"ix_llm_provider__persona_composite",
|
||||
table_name="llm_provider__persona",
|
||||
)
|
||||
op.drop_index(
|
||||
"ix_llm_provider__persona_persona_id",
|
||||
table_name="llm_provider__persona",
|
||||
)
|
||||
op.drop_index(
|
||||
"ix_llm_provider__persona_llm_provider_id",
|
||||
table_name="llm_provider__persona",
|
||||
)
|
||||
op.drop_table("llm_provider__persona")
|
||||
572
backend/alembic/versions/a852cbe15577_new_chat_history.py
Normal file
572
backend/alembic/versions/a852cbe15577_new_chat_history.py
Normal file
@@ -0,0 +1,572 @@
|
||||
"""New Chat History
|
||||
|
||||
Revision ID: a852cbe15577
|
||||
Revises: 6436661d5b65
|
||||
Create Date: 2025-11-08 15:16:37.781308
|
||||
|
||||
"""
|
||||
|
||||
from alembic import op
|
||||
import sqlalchemy as sa
|
||||
from sqlalchemy.dialects import postgresql
|
||||
|
||||
# revision identifiers, used by Alembic.
|
||||
revision = "a852cbe15577"
|
||||
down_revision = "6436661d5b65"
|
||||
branch_labels = None
|
||||
depends_on = None
|
||||
|
||||
|
||||
def upgrade() -> None:
|
||||
# Drop research agent tables (if they exist)
|
||||
op.execute("DROP TABLE IF EXISTS research_agent_iteration_sub_step CASCADE")
|
||||
op.execute("DROP TABLE IF EXISTS research_agent_iteration CASCADE")
|
||||
|
||||
# Drop agent sub query and sub question tables (if they exist)
|
||||
op.execute("DROP TABLE IF EXISTS agent__sub_query__search_doc CASCADE")
|
||||
op.execute("DROP TABLE IF EXISTS agent__sub_query CASCADE")
|
||||
op.execute("DROP TABLE IF EXISTS agent__sub_question CASCADE")
|
||||
|
||||
# Update ChatMessage table
|
||||
# Rename parent_message to parent_message_id and make it a foreign key (if not already done)
|
||||
conn = op.get_bind()
|
||||
result = conn.execute(
|
||||
sa.text(
|
||||
"""
|
||||
SELECT column_name FROM information_schema.columns
|
||||
WHERE table_name = 'chat_message' AND column_name = 'parent_message'
|
||||
"""
|
||||
)
|
||||
)
|
||||
if result.fetchone():
|
||||
op.alter_column(
|
||||
"chat_message", "parent_message", new_column_name="parent_message_id"
|
||||
)
|
||||
op.create_foreign_key(
|
||||
"fk_chat_message_parent_message_id",
|
||||
"chat_message",
|
||||
"chat_message",
|
||||
["parent_message_id"],
|
||||
["id"],
|
||||
)
|
||||
|
||||
# Rename latest_child_message to latest_child_message_id and make it a foreign key (if not already done)
|
||||
result = conn.execute(
|
||||
sa.text(
|
||||
"""
|
||||
SELECT column_name FROM information_schema.columns
|
||||
WHERE table_name = 'chat_message' AND column_name = 'latest_child_message'
|
||||
"""
|
||||
)
|
||||
)
|
||||
if result.fetchone():
|
||||
op.alter_column(
|
||||
"chat_message",
|
||||
"latest_child_message",
|
||||
new_column_name="latest_child_message_id",
|
||||
)
|
||||
op.create_foreign_key(
|
||||
"fk_chat_message_latest_child_message_id",
|
||||
"chat_message",
|
||||
"chat_message",
|
||||
["latest_child_message_id"],
|
||||
["id"],
|
||||
)
|
||||
|
||||
# Add reasoning_tokens column (if not exists)
|
||||
result = conn.execute(
|
||||
sa.text(
|
||||
"""
|
||||
SELECT column_name FROM information_schema.columns
|
||||
WHERE table_name = 'chat_message' AND column_name = 'reasoning_tokens'
|
||||
"""
|
||||
)
|
||||
)
|
||||
if not result.fetchone():
|
||||
op.add_column(
|
||||
"chat_message", sa.Column("reasoning_tokens", sa.Text(), nullable=True)
|
||||
)
|
||||
|
||||
# Drop columns no longer needed (if they exist)
|
||||
for col in [
|
||||
"rephrased_query",
|
||||
"alternate_assistant_id",
|
||||
"overridden_model",
|
||||
"is_agentic",
|
||||
"refined_answer_improvement",
|
||||
"research_type",
|
||||
"research_plan",
|
||||
"research_answer_purpose",
|
||||
]:
|
||||
result = conn.execute(
|
||||
sa.text(
|
||||
f"""
|
||||
SELECT column_name FROM information_schema.columns
|
||||
WHERE table_name = 'chat_message' AND column_name = '{col}'
|
||||
"""
|
||||
)
|
||||
)
|
||||
if result.fetchone():
|
||||
op.drop_column("chat_message", col)
|
||||
|
||||
# Update ToolCall table
|
||||
# Add chat_session_id column (if not exists)
|
||||
result = conn.execute(
|
||||
sa.text(
|
||||
"""
|
||||
SELECT column_name FROM information_schema.columns
|
||||
WHERE table_name = 'tool_call' AND column_name = 'chat_session_id'
|
||||
"""
|
||||
)
|
||||
)
|
||||
if not result.fetchone():
|
||||
op.add_column(
|
||||
"tool_call",
|
||||
sa.Column("chat_session_id", postgresql.UUID(as_uuid=True), nullable=False),
|
||||
)
|
||||
op.create_foreign_key(
|
||||
"fk_tool_call_chat_session_id",
|
||||
"tool_call",
|
||||
"chat_session",
|
||||
["chat_session_id"],
|
||||
["id"],
|
||||
)
|
||||
|
||||
# Rename message_id to parent_chat_message_id and make nullable (if not already done)
|
||||
result = conn.execute(
|
||||
sa.text(
|
||||
"""
|
||||
SELECT column_name FROM information_schema.columns
|
||||
WHERE table_name = 'tool_call' AND column_name = 'message_id'
|
||||
"""
|
||||
)
|
||||
)
|
||||
if result.fetchone():
|
||||
op.alter_column(
|
||||
"tool_call",
|
||||
"message_id",
|
||||
new_column_name="parent_chat_message_id",
|
||||
nullable=True,
|
||||
)
|
||||
|
||||
# Add parent_tool_call_id (if not exists)
|
||||
result = conn.execute(
|
||||
sa.text(
|
||||
"""
|
||||
SELECT column_name FROM information_schema.columns
|
||||
WHERE table_name = 'tool_call' AND column_name = 'parent_tool_call_id'
|
||||
"""
|
||||
)
|
||||
)
|
||||
if not result.fetchone():
|
||||
op.add_column(
|
||||
"tool_call", sa.Column("parent_tool_call_id", sa.Integer(), nullable=True)
|
||||
)
|
||||
op.create_foreign_key(
|
||||
"fk_tool_call_parent_tool_call_id",
|
||||
"tool_call",
|
||||
"tool_call",
|
||||
["parent_tool_call_id"],
|
||||
["id"],
|
||||
)
|
||||
op.drop_constraint("uq_tool_call_message_id", "tool_call", type_="unique")
|
||||
|
||||
# Add turn_number, tool_id (if not exists)
|
||||
for col_name in ["turn_number", "tool_id"]:
|
||||
result = conn.execute(
|
||||
sa.text(
|
||||
f"""
|
||||
SELECT column_name FROM information_schema.columns
|
||||
WHERE table_name = 'tool_call' AND column_name = '{col_name}'
|
||||
"""
|
||||
)
|
||||
)
|
||||
if not result.fetchone():
|
||||
op.add_column(
|
||||
"tool_call",
|
||||
sa.Column(col_name, sa.Integer(), nullable=False, server_default="0"),
|
||||
)
|
||||
|
||||
# Add tool_call_id as String (if not exists)
|
||||
result = conn.execute(
|
||||
sa.text(
|
||||
"""
|
||||
SELECT column_name FROM information_schema.columns
|
||||
WHERE table_name = 'tool_call' AND column_name = 'tool_call_id'
|
||||
"""
|
||||
)
|
||||
)
|
||||
if not result.fetchone():
|
||||
op.add_column(
|
||||
"tool_call",
|
||||
sa.Column("tool_call_id", sa.String(), nullable=False, server_default=""),
|
||||
)
|
||||
|
||||
# Add reasoning_tokens (if not exists)
|
||||
result = conn.execute(
|
||||
sa.text(
|
||||
"""
|
||||
SELECT column_name FROM information_schema.columns
|
||||
WHERE table_name = 'tool_call' AND column_name = 'reasoning_tokens'
|
||||
"""
|
||||
)
|
||||
)
|
||||
if not result.fetchone():
|
||||
op.add_column(
|
||||
"tool_call", sa.Column("reasoning_tokens", sa.Text(), nullable=True)
|
||||
)
|
||||
|
||||
# Rename tool_arguments to tool_call_arguments (if not already done)
|
||||
result = conn.execute(
|
||||
sa.text(
|
||||
"""
|
||||
SELECT column_name FROM information_schema.columns
|
||||
WHERE table_name = 'tool_call' AND column_name = 'tool_arguments'
|
||||
"""
|
||||
)
|
||||
)
|
||||
if result.fetchone():
|
||||
op.alter_column(
|
||||
"tool_call", "tool_arguments", new_column_name="tool_call_arguments"
|
||||
)
|
||||
|
||||
# Rename tool_result to tool_call_response and change type from JSONB to Text (if not already done)
|
||||
result = conn.execute(
|
||||
sa.text(
|
||||
"""
|
||||
SELECT column_name, data_type FROM information_schema.columns
|
||||
WHERE table_name = 'tool_call' AND column_name = 'tool_result'
|
||||
"""
|
||||
)
|
||||
)
|
||||
tool_result_row = result.fetchone()
|
||||
if tool_result_row:
|
||||
op.alter_column(
|
||||
"tool_call", "tool_result", new_column_name="tool_call_response"
|
||||
)
|
||||
# Change type from JSONB to Text
|
||||
op.execute(
|
||||
sa.text(
|
||||
"""
|
||||
ALTER TABLE tool_call
|
||||
ALTER COLUMN tool_call_response TYPE TEXT
|
||||
USING tool_call_response::text
|
||||
"""
|
||||
)
|
||||
)
|
||||
else:
|
||||
# Check if tool_call_response already exists and is JSONB, then convert to Text
|
||||
result = conn.execute(
|
||||
sa.text(
|
||||
"""
|
||||
SELECT data_type FROM information_schema.columns
|
||||
WHERE table_name = 'tool_call' AND column_name = 'tool_call_response'
|
||||
"""
|
||||
)
|
||||
)
|
||||
tool_call_response_row = result.fetchone()
|
||||
if tool_call_response_row and tool_call_response_row[0] == "jsonb":
|
||||
op.execute(
|
||||
sa.text(
|
||||
"""
|
||||
ALTER TABLE tool_call
|
||||
ALTER COLUMN tool_call_response TYPE TEXT
|
||||
USING tool_call_response::text
|
||||
"""
|
||||
)
|
||||
)
|
||||
|
||||
# Add tool_call_tokens (if not exists)
|
||||
result = conn.execute(
|
||||
sa.text(
|
||||
"""
|
||||
SELECT column_name FROM information_schema.columns
|
||||
WHERE table_name = 'tool_call' AND column_name = 'tool_call_tokens'
|
||||
"""
|
||||
)
|
||||
)
|
||||
if not result.fetchone():
|
||||
op.add_column(
|
||||
"tool_call",
|
||||
sa.Column(
|
||||
"tool_call_tokens", sa.Integer(), nullable=False, server_default="0"
|
||||
),
|
||||
)
|
||||
|
||||
# Add generated_images column for image generation tool replay (if not exists)
|
||||
result = conn.execute(
|
||||
sa.text(
|
||||
"""
|
||||
SELECT column_name FROM information_schema.columns
|
||||
WHERE table_name = 'tool_call' AND column_name = 'generated_images'
|
||||
"""
|
||||
)
|
||||
)
|
||||
if not result.fetchone():
|
||||
op.add_column(
|
||||
"tool_call",
|
||||
sa.Column("generated_images", postgresql.JSONB(), nullable=True),
|
||||
)
|
||||
|
||||
# Drop tool_name column (if exists)
|
||||
result = conn.execute(
|
||||
sa.text(
|
||||
"""
|
||||
SELECT column_name FROM information_schema.columns
|
||||
WHERE table_name = 'tool_call' AND column_name = 'tool_name'
|
||||
"""
|
||||
)
|
||||
)
|
||||
if result.fetchone():
|
||||
op.drop_column("tool_call", "tool_name")
|
||||
|
||||
# Create tool_call__search_doc association table (if not exists)
|
||||
result = conn.execute(
|
||||
sa.text(
|
||||
"""
|
||||
SELECT table_name FROM information_schema.tables
|
||||
WHERE table_name = 'tool_call__search_doc'
|
||||
"""
|
||||
)
|
||||
)
|
||||
if not result.fetchone():
|
||||
op.create_table(
|
||||
"tool_call__search_doc",
|
||||
sa.Column("tool_call_id", sa.Integer(), nullable=False),
|
||||
sa.Column("search_doc_id", sa.Integer(), nullable=False),
|
||||
sa.ForeignKeyConstraint(
|
||||
["tool_call_id"], ["tool_call.id"], ondelete="CASCADE"
|
||||
),
|
||||
sa.ForeignKeyConstraint(
|
||||
["search_doc_id"], ["search_doc.id"], ondelete="CASCADE"
|
||||
),
|
||||
sa.PrimaryKeyConstraint("tool_call_id", "search_doc_id"),
|
||||
)
|
||||
|
||||
# Add replace_base_system_prompt to persona table (if not exists)
|
||||
result = conn.execute(
|
||||
sa.text(
|
||||
"""
|
||||
SELECT column_name FROM information_schema.columns
|
||||
WHERE table_name = 'persona' AND column_name = 'replace_base_system_prompt'
|
||||
"""
|
||||
)
|
||||
)
|
||||
if not result.fetchone():
|
||||
op.add_column(
|
||||
"persona",
|
||||
sa.Column(
|
||||
"replace_base_system_prompt",
|
||||
sa.Boolean(),
|
||||
nullable=False,
|
||||
server_default="false",
|
||||
),
|
||||
)
|
||||
|
||||
|
||||
def downgrade() -> None:
|
||||
# Reverse persona changes
|
||||
op.drop_column("persona", "replace_base_system_prompt")
|
||||
|
||||
# Drop tool_call__search_doc association table
|
||||
op.execute("DROP TABLE IF EXISTS tool_call__search_doc CASCADE")
|
||||
|
||||
# Reverse ToolCall changes
|
||||
op.add_column("tool_call", sa.Column("tool_name", sa.String(), nullable=False))
|
||||
op.drop_column("tool_call", "tool_id")
|
||||
op.drop_column("tool_call", "tool_call_tokens")
|
||||
op.drop_column("tool_call", "generated_images")
|
||||
# Change tool_call_response back to JSONB before renaming
|
||||
op.execute(
|
||||
sa.text(
|
||||
"""
|
||||
ALTER TABLE tool_call
|
||||
ALTER COLUMN tool_call_response TYPE JSONB
|
||||
USING tool_call_response::jsonb
|
||||
"""
|
||||
)
|
||||
)
|
||||
op.alter_column("tool_call", "tool_call_response", new_column_name="tool_result")
|
||||
op.alter_column(
|
||||
"tool_call", "tool_call_arguments", new_column_name="tool_arguments"
|
||||
)
|
||||
op.drop_column("tool_call", "reasoning_tokens")
|
||||
op.drop_column("tool_call", "tool_call_id")
|
||||
op.drop_column("tool_call", "turn_number")
|
||||
op.drop_constraint(
|
||||
"fk_tool_call_parent_tool_call_id", "tool_call", type_="foreignkey"
|
||||
)
|
||||
op.drop_column("tool_call", "parent_tool_call_id")
|
||||
op.alter_column(
|
||||
"tool_call",
|
||||
"parent_chat_message_id",
|
||||
new_column_name="message_id",
|
||||
nullable=False,
|
||||
)
|
||||
op.drop_constraint("fk_tool_call_chat_session_id", "tool_call", type_="foreignkey")
|
||||
op.drop_column("tool_call", "chat_session_id")
|
||||
|
||||
op.add_column(
|
||||
"chat_message",
|
||||
sa.Column(
|
||||
"research_answer_purpose",
|
||||
sa.Enum("INTRO", "DEEP_DIVE", name="researchanswerpurpose"),
|
||||
nullable=True,
|
||||
),
|
||||
)
|
||||
op.add_column(
|
||||
"chat_message", sa.Column("research_plan", postgresql.JSONB(), nullable=True)
|
||||
)
|
||||
op.add_column(
|
||||
"chat_message",
|
||||
sa.Column(
|
||||
"research_type",
|
||||
sa.Enum("SIMPLE", "DEEP", name="researchtype"),
|
||||
nullable=True,
|
||||
),
|
||||
)
|
||||
op.add_column(
|
||||
"chat_message",
|
||||
sa.Column("refined_answer_improvement", sa.Boolean(), nullable=True),
|
||||
)
|
||||
op.add_column(
|
||||
"chat_message",
|
||||
sa.Column("is_agentic", sa.Boolean(), nullable=False, server_default="false"),
|
||||
)
|
||||
op.add_column(
|
||||
"chat_message", sa.Column("overridden_model", sa.String(), nullable=True)
|
||||
)
|
||||
op.add_column(
|
||||
"chat_message", sa.Column("alternate_assistant_id", sa.Integer(), nullable=True)
|
||||
)
|
||||
op.add_column(
|
||||
"chat_message", sa.Column("rephrased_query", sa.Text(), nullable=True)
|
||||
)
|
||||
op.drop_column("chat_message", "reasoning_tokens")
|
||||
op.drop_constraint(
|
||||
"fk_chat_message_latest_child_message_id", "chat_message", type_="foreignkey"
|
||||
)
|
||||
op.alter_column(
|
||||
"chat_message",
|
||||
"latest_child_message_id",
|
||||
new_column_name="latest_child_message",
|
||||
)
|
||||
op.drop_constraint(
|
||||
"fk_chat_message_parent_message_id", "chat_message", type_="foreignkey"
|
||||
)
|
||||
op.alter_column(
|
||||
"chat_message", "parent_message_id", new_column_name="parent_message"
|
||||
)
|
||||
|
||||
# Recreate agent sub question and sub query tables
|
||||
op.create_table(
|
||||
"agent__sub_question",
|
||||
sa.Column("id", sa.Integer(), primary_key=True),
|
||||
sa.Column("primary_question_id", sa.Integer(), nullable=False),
|
||||
sa.Column("chat_session_id", postgresql.UUID(as_uuid=True), nullable=False),
|
||||
sa.Column("sub_question", sa.Text(), nullable=False),
|
||||
sa.Column("level", sa.Integer(), nullable=False),
|
||||
sa.Column("level_question_num", sa.Integer(), nullable=False),
|
||||
sa.Column(
|
||||
"time_created",
|
||||
sa.DateTime(timezone=True),
|
||||
server_default=sa.text("now()"),
|
||||
nullable=False,
|
||||
),
|
||||
sa.Column("sub_answer", sa.Text(), nullable=False),
|
||||
sa.Column("sub_question_doc_results", postgresql.JSONB(), nullable=False),
|
||||
sa.ForeignKeyConstraint(
|
||||
["primary_question_id"], ["chat_message.id"], ondelete="CASCADE"
|
||||
),
|
||||
sa.ForeignKeyConstraint(["chat_session_id"], ["chat_session.id"]),
|
||||
sa.PrimaryKeyConstraint("id"),
|
||||
)
|
||||
|
||||
op.create_table(
|
||||
"agent__sub_query",
|
||||
sa.Column("id", sa.Integer(), primary_key=True),
|
||||
sa.Column("parent_question_id", sa.Integer(), nullable=False),
|
||||
sa.Column("chat_session_id", postgresql.UUID(as_uuid=True), nullable=False),
|
||||
sa.Column("sub_query", sa.Text(), nullable=False),
|
||||
sa.Column(
|
||||
"time_created",
|
||||
sa.DateTime(timezone=True),
|
||||
server_default=sa.text("now()"),
|
||||
nullable=False,
|
||||
),
|
||||
sa.ForeignKeyConstraint(
|
||||
["parent_question_id"], ["agent__sub_question.id"], ondelete="CASCADE"
|
||||
),
|
||||
sa.ForeignKeyConstraint(["chat_session_id"], ["chat_session.id"]),
|
||||
sa.PrimaryKeyConstraint("id"),
|
||||
)
|
||||
|
||||
op.create_table(
|
||||
"agent__sub_query__search_doc",
|
||||
sa.Column("sub_query_id", sa.Integer(), nullable=False),
|
||||
sa.Column("search_doc_id", sa.Integer(), nullable=False),
|
||||
sa.ForeignKeyConstraint(
|
||||
["sub_query_id"], ["agent__sub_query.id"], ondelete="CASCADE"
|
||||
),
|
||||
sa.ForeignKeyConstraint(["search_doc_id"], ["search_doc.id"]),
|
||||
sa.PrimaryKeyConstraint("sub_query_id", "search_doc_id"),
|
||||
)
|
||||
|
||||
# Recreate research agent tables
|
||||
op.create_table(
|
||||
"research_agent_iteration",
|
||||
sa.Column("id", sa.Integer(), autoincrement=True, nullable=False),
|
||||
sa.Column("primary_question_id", sa.Integer(), nullable=False),
|
||||
sa.Column("iteration_nr", sa.Integer(), nullable=False),
|
||||
sa.Column(
|
||||
"created_at",
|
||||
sa.DateTime(timezone=True),
|
||||
server_default=sa.text("now()"),
|
||||
nullable=False,
|
||||
),
|
||||
sa.Column("purpose", sa.String(), nullable=True),
|
||||
sa.Column("reasoning", sa.String(), nullable=True),
|
||||
sa.ForeignKeyConstraint(
|
||||
["primary_question_id"], ["chat_message.id"], ondelete="CASCADE"
|
||||
),
|
||||
sa.PrimaryKeyConstraint("id"),
|
||||
sa.UniqueConstraint(
|
||||
"primary_question_id",
|
||||
"iteration_nr",
|
||||
name="_research_agent_iteration_unique_constraint",
|
||||
),
|
||||
)
|
||||
|
||||
op.create_table(
|
||||
"research_agent_iteration_sub_step",
|
||||
sa.Column("id", sa.Integer(), autoincrement=True, nullable=False),
|
||||
sa.Column("primary_question_id", sa.Integer(), nullable=False),
|
||||
sa.Column("iteration_nr", sa.Integer(), nullable=False),
|
||||
sa.Column("iteration_sub_step_nr", sa.Integer(), nullable=False),
|
||||
sa.Column(
|
||||
"created_at",
|
||||
sa.DateTime(timezone=True),
|
||||
server_default=sa.text("now()"),
|
||||
nullable=False,
|
||||
),
|
||||
sa.Column("sub_step_instructions", sa.String(), nullable=True),
|
||||
sa.Column("sub_step_tool_id", sa.Integer(), nullable=True),
|
||||
sa.Column("reasoning", sa.String(), nullable=True),
|
||||
sa.Column("sub_answer", sa.String(), nullable=True),
|
||||
sa.Column("cited_doc_results", postgresql.JSONB(), nullable=False),
|
||||
sa.Column("claims", postgresql.JSONB(), nullable=True),
|
||||
sa.Column("is_web_fetch", sa.Boolean(), nullable=True),
|
||||
sa.Column("queries", postgresql.JSONB(), nullable=True),
|
||||
sa.Column("generated_images", postgresql.JSONB(), nullable=True),
|
||||
sa.Column("additional_data", postgresql.JSONB(), nullable=True),
|
||||
sa.ForeignKeyConstraint(
|
||||
["primary_question_id", "iteration_nr"],
|
||||
[
|
||||
"research_agent_iteration.primary_question_id",
|
||||
"research_agent_iteration.iteration_nr",
|
||||
],
|
||||
ondelete="CASCADE",
|
||||
),
|
||||
sa.ForeignKeyConstraint(["sub_step_tool_id"], ["tool.id"], ondelete="SET NULL"),
|
||||
sa.PrimaryKeyConstraint("id"),
|
||||
)
|
||||
73
backend/alembic/versions/c7e9f4a3b2d1_add_python_tool.py
Normal file
73
backend/alembic/versions/c7e9f4a3b2d1_add_python_tool.py
Normal file
@@ -0,0 +1,73 @@
|
||||
"""add_python_tool
|
||||
|
||||
Revision ID: c7e9f4a3b2d1
|
||||
Revises: 3c9a65f1207f
|
||||
Create Date: 2025-11-08 00:00:00.000000
|
||||
|
||||
"""
|
||||
|
||||
from alembic import op
|
||||
import sqlalchemy as sa
|
||||
from sqlalchemy.dialects import postgresql
|
||||
|
||||
|
||||
# revision identifiers, used by Alembic.
|
||||
revision = "c7e9f4a3b2d1"
|
||||
down_revision = "3c9a65f1207f"
|
||||
branch_labels = None
|
||||
depends_on = None
|
||||
|
||||
|
||||
def upgrade() -> None:
|
||||
"""Add PythonTool to built-in tools"""
|
||||
conn = op.get_bind()
|
||||
|
||||
conn.execute(
|
||||
sa.text(
|
||||
"""
|
||||
INSERT INTO tool (name, display_name, description, in_code_tool_id, enabled)
|
||||
VALUES (:name, :display_name, :description, :in_code_tool_id, :enabled)
|
||||
"""
|
||||
),
|
||||
{
|
||||
"name": "PythonTool",
|
||||
# in the UI, call it `Code Interpreter` since this is a well known term for this tool
|
||||
"display_name": "Code Interpreter",
|
||||
"description": (
|
||||
"The Code Interpreter Action allows the assistant to execute "
|
||||
"Python code in a secure, isolated environment for data analysis, "
|
||||
"computation, visualization, and file processing."
|
||||
),
|
||||
"in_code_tool_id": "PythonTool",
|
||||
"enabled": True,
|
||||
},
|
||||
)
|
||||
|
||||
# needed to store files generated by the python tool
|
||||
op.add_column(
|
||||
"research_agent_iteration_sub_step",
|
||||
sa.Column(
|
||||
"file_ids",
|
||||
postgresql.JSONB(astext_type=sa.Text()),
|
||||
nullable=True,
|
||||
),
|
||||
)
|
||||
|
||||
|
||||
def downgrade() -> None:
|
||||
"""Remove PythonTool from built-in tools"""
|
||||
conn = op.get_bind()
|
||||
|
||||
conn.execute(
|
||||
sa.text(
|
||||
"""
|
||||
DELETE FROM tool
|
||||
WHERE in_code_tool_id = :in_code_tool_id
|
||||
"""
|
||||
),
|
||||
{
|
||||
"in_code_tool_id": "PythonTool",
|
||||
},
|
||||
)
|
||||
|
||||
op.drop_column("research_agent_iteration_sub_step", "file_ids")
|
||||
52
backend/docker-bake.hcl
Normal file
52
backend/docker-bake.hcl
Normal file
@@ -0,0 +1,52 @@
|
||||
group "default" {
|
||||
targets = ["backend", "model-server"]
|
||||
}
|
||||
|
||||
variable "BACKEND_REPOSITORY" {
|
||||
default = "onyxdotapp/onyx-backend"
|
||||
}
|
||||
|
||||
variable "MODEL_SERVER_REPOSITORY" {
|
||||
default = "onyxdotapp/onyx-model-server"
|
||||
}
|
||||
|
||||
variable "INTEGRATION_REPOSITORY" {
|
||||
default = "onyxdotapp/onyx-integration"
|
||||
}
|
||||
|
||||
variable "TAG" {
|
||||
default = "latest"
|
||||
}
|
||||
|
||||
target "backend" {
|
||||
context = "."
|
||||
dockerfile = "Dockerfile"
|
||||
|
||||
cache-from = ["type=registry,ref=${BACKEND_REPOSITORY}:latest"]
|
||||
cache-to = ["type=inline"]
|
||||
|
||||
tags = ["${BACKEND_REPOSITORY}:${TAG}"]
|
||||
}
|
||||
|
||||
target "model-server" {
|
||||
context = "."
|
||||
|
||||
dockerfile = "Dockerfile.model_server"
|
||||
|
||||
cache-from = ["type=registry,ref=${MODEL_SERVER_REPOSITORY}:latest"]
|
||||
cache-to = ["type=inline"]
|
||||
|
||||
tags = ["${MODEL_SERVER_REPOSITORY}:${TAG}"]
|
||||
}
|
||||
|
||||
target "integration" {
|
||||
context = "."
|
||||
dockerfile = "tests/integration/Dockerfile"
|
||||
|
||||
// Provide the base image via build context from the backend target
|
||||
contexts = {
|
||||
base = "target:backend"
|
||||
}
|
||||
|
||||
tags = ["${INTEGRATION_REPOSITORY}:${TAG}"]
|
||||
}
|
||||
@@ -124,6 +124,8 @@ SUPER_CLOUD_API_KEY = os.environ.get("SUPER_CLOUD_API_KEY", "api_key")
|
||||
POSTHOG_API_KEY = os.environ.get("POSTHOG_API_KEY") or "FooBar"
|
||||
POSTHOG_HOST = os.environ.get("POSTHOG_HOST") or "https://us.i.posthog.com"
|
||||
|
||||
MARKETING_POSTHOG_API_KEY = os.environ.get("MARKETING_POSTHOG_API_KEY")
|
||||
|
||||
HUBSPOT_TRACKING_URL = os.environ.get("HUBSPOT_TRACKING_URL")
|
||||
|
||||
GATED_TENANTS_KEY = "gated_tenants"
|
||||
|
||||
@@ -199,10 +199,7 @@ def fetch_persona_message_analytics(
|
||||
ChatMessage.chat_session_id == ChatSession.id,
|
||||
)
|
||||
.where(
|
||||
or_(
|
||||
ChatMessage.alternate_assistant_id == persona_id,
|
||||
ChatSession.persona_id == persona_id,
|
||||
),
|
||||
ChatSession.persona_id == persona_id,
|
||||
ChatMessage.time_sent >= start,
|
||||
ChatMessage.time_sent <= end,
|
||||
ChatMessage.message_type == MessageType.ASSISTANT,
|
||||
@@ -231,10 +228,7 @@ def fetch_persona_unique_users(
|
||||
ChatMessage.chat_session_id == ChatSession.id,
|
||||
)
|
||||
.where(
|
||||
or_(
|
||||
ChatMessage.alternate_assistant_id == persona_id,
|
||||
ChatSession.persona_id == persona_id,
|
||||
),
|
||||
ChatSession.persona_id == persona_id,
|
||||
ChatMessage.time_sent >= start,
|
||||
ChatMessage.time_sent <= end,
|
||||
ChatMessage.message_type == MessageType.ASSISTANT,
|
||||
@@ -265,10 +259,7 @@ def fetch_assistant_message_analytics(
|
||||
ChatMessage.chat_session_id == ChatSession.id,
|
||||
)
|
||||
.where(
|
||||
or_(
|
||||
ChatMessage.alternate_assistant_id == assistant_id,
|
||||
ChatSession.persona_id == assistant_id,
|
||||
),
|
||||
ChatSession.persona_id == assistant_id,
|
||||
ChatMessage.time_sent >= start,
|
||||
ChatMessage.time_sent <= end,
|
||||
ChatMessage.message_type == MessageType.ASSISTANT,
|
||||
@@ -299,10 +290,7 @@ def fetch_assistant_unique_users(
|
||||
ChatMessage.chat_session_id == ChatSession.id,
|
||||
)
|
||||
.where(
|
||||
or_(
|
||||
ChatMessage.alternate_assistant_id == assistant_id,
|
||||
ChatSession.persona_id == assistant_id,
|
||||
),
|
||||
ChatSession.persona_id == assistant_id,
|
||||
ChatMessage.time_sent >= start,
|
||||
ChatMessage.time_sent <= end,
|
||||
ChatMessage.message_type == MessageType.ASSISTANT,
|
||||
@@ -332,10 +320,7 @@ def fetch_assistant_unique_users_total(
|
||||
ChatMessage.chat_session_id == ChatSession.id,
|
||||
)
|
||||
.where(
|
||||
or_(
|
||||
ChatMessage.alternate_assistant_id == assistant_id,
|
||||
ChatSession.persona_id == assistant_id,
|
||||
),
|
||||
ChatSession.persona_id == assistant_id,
|
||||
ChatMessage.time_sent >= start,
|
||||
ChatMessage.time_sent <= end,
|
||||
ChatMessage.message_type == MessageType.ASSISTANT,
|
||||
|
||||
@@ -55,18 +55,7 @@ def get_empty_chat_messages_entries__paginated(
|
||||
|
||||
# Get assistant name (from session persona, or alternate if specified)
|
||||
assistant_name = None
|
||||
if message.alternate_assistant_id:
|
||||
# If there's an alternate assistant, we need to fetch it
|
||||
from onyx.db.models import Persona
|
||||
|
||||
alternate_persona = (
|
||||
db_session.query(Persona)
|
||||
.filter(Persona.id == message.alternate_assistant_id)
|
||||
.first()
|
||||
)
|
||||
if alternate_persona:
|
||||
assistant_name = alternate_persona.name
|
||||
elif chat_session.persona:
|
||||
if chat_session.persona:
|
||||
assistant_name = chat_session.persona.name
|
||||
|
||||
message_skeletons.append(
|
||||
|
||||
@@ -581,6 +581,48 @@ def update_user_curator_relationship(
|
||||
db_session.commit()
|
||||
|
||||
|
||||
def add_users_to_user_group(
|
||||
db_session: Session,
|
||||
user: User | None,
|
||||
user_group_id: int,
|
||||
user_ids: list[UUID],
|
||||
) -> UserGroup:
|
||||
db_user_group = fetch_user_group(db_session=db_session, user_group_id=user_group_id)
|
||||
if db_user_group is None:
|
||||
raise ValueError(f"UserGroup with id '{user_group_id}' not found")
|
||||
|
||||
missing_users = [
|
||||
user_id for user_id in user_ids if fetch_user_by_id(db_session, user_id) is None
|
||||
]
|
||||
if missing_users:
|
||||
raise ValueError(
|
||||
f"User(s) not found: {', '.join(str(user_id) for user_id in missing_users)}"
|
||||
)
|
||||
|
||||
_check_user_group_is_modifiable(db_user_group)
|
||||
|
||||
current_user_ids = [user.id for user in db_user_group.users]
|
||||
current_user_ids_set = set(current_user_ids)
|
||||
new_user_ids = [
|
||||
user_id for user_id in user_ids if user_id not in current_user_ids_set
|
||||
]
|
||||
|
||||
if not new_user_ids:
|
||||
return db_user_group
|
||||
|
||||
user_group_update = UserGroupUpdate(
|
||||
user_ids=current_user_ids + new_user_ids,
|
||||
cc_pair_ids=[cc_pair.id for cc_pair in db_user_group.cc_pairs],
|
||||
)
|
||||
|
||||
return update_user_group(
|
||||
db_session=db_session,
|
||||
user=user,
|
||||
user_group_id=user_group_id,
|
||||
user_group_update=user_group_update,
|
||||
)
|
||||
|
||||
|
||||
def update_user_group(
|
||||
db_session: Session,
|
||||
user: User | None,
|
||||
@@ -603,6 +645,17 @@ def update_user_group(
|
||||
added_user_ids = list(updated_user_ids - current_user_ids)
|
||||
removed_user_ids = list(current_user_ids - updated_user_ids)
|
||||
|
||||
if added_user_ids:
|
||||
missing_users = [
|
||||
user_id
|
||||
for user_id in added_user_ids
|
||||
if fetch_user_by_id(db_session, user_id) is None
|
||||
]
|
||||
if missing_users:
|
||||
raise ValueError(
|
||||
f"User(s) not found: {', '.join(str(user_id) for user_id in missing_users)}"
|
||||
)
|
||||
|
||||
# LEAVING THIS HERE FOR NOW FOR GIVING DIFFERENT ROLES
|
||||
# ACCESS TO DIFFERENT PERMISSIONS
|
||||
# if (removed_user_ids or added_user_ids) and (
|
||||
|
||||
@@ -18,7 +18,7 @@
|
||||
<!-- <document type="danswer_chunk" mode="index" /> -->
|
||||
{{ document_elements }}
|
||||
</documents>
|
||||
<nodes count="60">
|
||||
<nodes count="50">
|
||||
<resources vcpu="8.0" memory="128.0Gb" architecture="arm64" storage-type="local"
|
||||
disk="475.0Gb" />
|
||||
</nodes>
|
||||
|
||||
@@ -139,19 +139,13 @@ def get_all_space_permissions(
|
||||
) -> dict[str, ExternalAccess]:
|
||||
logger.debug("Getting space permissions")
|
||||
# Gets all the spaces in the Confluence instance
|
||||
all_space_keys = []
|
||||
start = 0
|
||||
while True:
|
||||
spaces_batch = confluence_client.get_all_spaces(
|
||||
start=start, limit=REQUEST_PAGINATION_LIMIT
|
||||
all_space_keys = [
|
||||
key
|
||||
for space in confluence_client.retrieve_confluence_spaces(
|
||||
limit=REQUEST_PAGINATION_LIMIT,
|
||||
)
|
||||
for space in spaces_batch.get("results", []):
|
||||
all_space_keys.append(space.get("key"))
|
||||
|
||||
if len(spaces_batch.get("results", [])) < REQUEST_PAGINATION_LIMIT:
|
||||
break
|
||||
|
||||
start += len(spaces_batch.get("results", []))
|
||||
if (key := space.get("key"))
|
||||
]
|
||||
|
||||
# Gets the permissions for each space
|
||||
logger.debug(f"Got {len(all_space_keys)} spaces from confluence")
|
||||
|
||||
@@ -23,7 +23,7 @@ from ee.onyx.server.query_and_chat.chat_backend import (
|
||||
router as chat_router,
|
||||
)
|
||||
from ee.onyx.server.query_and_chat.query_backend import (
|
||||
basic_router as query_router,
|
||||
basic_router as ee_query_router,
|
||||
)
|
||||
from ee.onyx.server.query_history.api import router as query_history_router
|
||||
from ee.onyx.server.reporting.usage_export_api import router as usage_export_router
|
||||
@@ -48,6 +48,9 @@ from onyx.main import include_auth_router_with_prefix
|
||||
from onyx.main import include_router_with_global_prefix_prepended
|
||||
from onyx.main import lifespan as lifespan_base
|
||||
from onyx.main import use_route_function_names_as_operation_ids
|
||||
from onyx.server.query_and_chat.query_backend import (
|
||||
basic_router as query_router,
|
||||
)
|
||||
from onyx.utils.logger import setup_logger
|
||||
from onyx.utils.variable_functionality import global_version
|
||||
from shared_configs.configs import MULTI_TENANT
|
||||
@@ -119,6 +122,7 @@ def get_application() -> FastAPI:
|
||||
include_router_with_global_prefix_prepended(application, query_history_router)
|
||||
# EE only backend APIs
|
||||
include_router_with_global_prefix_prepended(application, query_router)
|
||||
include_router_with_global_prefix_prepended(application, ee_query_router)
|
||||
include_router_with_global_prefix_prepended(application, chat_router)
|
||||
include_router_with_global_prefix_prepended(application, standard_answer_router)
|
||||
include_router_with_global_prefix_prepended(application, ee_oauth_router)
|
||||
|
||||
@@ -8,7 +8,7 @@ from fastapi import Request
|
||||
from fastapi import Response
|
||||
|
||||
from ee.onyx.auth.users import decode_anonymous_user_jwt_token
|
||||
from onyx.auth.api_key import extract_tenant_from_api_key_header
|
||||
from onyx.auth.utils import extract_tenant_from_auth_header
|
||||
from onyx.configs.constants import ANONYMOUS_USER_COOKIE_NAME
|
||||
from onyx.configs.constants import TENANT_ID_COOKIE_NAME
|
||||
from onyx.db.engine.sql_engine import is_valid_schema_name
|
||||
@@ -49,13 +49,13 @@ async def _get_tenant_id_from_request(
|
||||
) -> str:
|
||||
"""
|
||||
Attempt to extract tenant_id from:
|
||||
1) The API key header
|
||||
1) The API key or PAT (Personal Access Token) header
|
||||
2) The Redis-based token (stored in Cookie: fastapiusersauth)
|
||||
3) The anonymous user cookie
|
||||
Fallback: POSTGRES_DEFAULT_SCHEMA
|
||||
"""
|
||||
# Check for API key
|
||||
tenant_id = extract_tenant_from_api_key_header(request)
|
||||
# Check for API key or PAT in Authorization header
|
||||
tenant_id = extract_tenant_from_auth_header(request)
|
||||
if tenant_id is not None:
|
||||
return tenant_id
|
||||
|
||||
|
||||
@@ -76,6 +76,7 @@ class ConfluenceCloudOAuth:
|
||||
"read:confluence-content.permission%20"
|
||||
"read:confluence-user%20"
|
||||
"read:confluence-groups%20"
|
||||
"read:space:confluence%20"
|
||||
"readonly:content.attachment:confluence%20"
|
||||
"search:confluence%20"
|
||||
# granular scope
|
||||
|
||||
@@ -9,7 +9,7 @@ from ee.onyx.server.query_and_chat.models import (
|
||||
)
|
||||
from onyx.auth.users import current_user
|
||||
from onyx.chat.chat_utils import combine_message_thread
|
||||
from onyx.chat.chat_utils import create_chat_chain
|
||||
from onyx.chat.chat_utils import create_chat_history_chain
|
||||
from onyx.chat.models import ChatBasicResponse
|
||||
from onyx.chat.process_message import gather_stream
|
||||
from onyx.chat.process_message import stream_chat_message_objects
|
||||
@@ -69,7 +69,7 @@ def handle_simplified_chat_message(
|
||||
chat_session_id = chat_message_req.chat_session_id
|
||||
|
||||
try:
|
||||
parent_message, _ = create_chat_chain(
|
||||
parent_message, _ = create_chat_history_chain(
|
||||
chat_session_id=chat_session_id, db_session=db_session
|
||||
)
|
||||
except Exception:
|
||||
@@ -161,7 +161,7 @@ def handle_send_message_simple_with_history(
|
||||
persona_id=req.persona_id,
|
||||
)
|
||||
|
||||
llm, _ = get_llms_for_persona(persona=chat_session.persona)
|
||||
llm, _ = get_llms_for_persona(persona=chat_session.persona, user=user)
|
||||
|
||||
llm_tokenizer = get_tokenizer(
|
||||
model_name=llm.config.model_name,
|
||||
|
||||
@@ -6,18 +6,14 @@ from pydantic import BaseModel
|
||||
from pydantic import Field
|
||||
from pydantic import model_validator
|
||||
|
||||
from onyx.chat.models import PersonaOverrideConfig
|
||||
from onyx.chat.models import QADocsResponse
|
||||
from onyx.chat.models import ThreadMessage
|
||||
from onyx.configs.constants import DocumentSource
|
||||
from onyx.context.search.enums import LLMEvaluationType
|
||||
from onyx.context.search.enums import SearchType
|
||||
from onyx.context.search.models import BaseFilters
|
||||
from onyx.context.search.models import BasicChunkRequest
|
||||
from onyx.context.search.models import ChunkContext
|
||||
from onyx.context.search.models import RerankingDetails
|
||||
from onyx.context.search.models import InferenceChunk
|
||||
from onyx.context.search.models import RetrievalDetails
|
||||
from onyx.server.manage.models import StandardAnswer
|
||||
from onyx.server.query_and_chat.streaming_models import CitationInfo
|
||||
from onyx.server.query_and_chat.streaming_models import SubQuestionIdentifier
|
||||
|
||||
|
||||
class StandardAnswerRequest(BaseModel):
|
||||
@@ -29,14 +25,12 @@ class StandardAnswerResponse(BaseModel):
|
||||
standard_answers: list[StandardAnswer] = Field(default_factory=list)
|
||||
|
||||
|
||||
class DocumentSearchRequest(ChunkContext):
|
||||
message: str
|
||||
search_type: SearchType
|
||||
retrieval_options: RetrievalDetails
|
||||
recency_bias_multiplier: float = 1.0
|
||||
evaluation_type: LLMEvaluationType
|
||||
# None to use system defaults for reranking
|
||||
rerank_settings: RerankingDetails | None = None
|
||||
class DocumentSearchRequest(BasicChunkRequest):
|
||||
user_selected_filters: BaseFilters | None = None
|
||||
|
||||
|
||||
class DocumentSearchResponse(BaseModel):
|
||||
top_documents: list[InferenceChunk]
|
||||
|
||||
|
||||
class BasicCreateChatMessageRequest(ChunkContext):
|
||||
@@ -96,17 +90,17 @@ class SimpleDoc(BaseModel):
|
||||
metadata: dict | None
|
||||
|
||||
|
||||
class AgentSubQuestion(SubQuestionIdentifier):
|
||||
class AgentSubQuestion(BaseModel):
|
||||
sub_question: str
|
||||
document_ids: list[str]
|
||||
|
||||
|
||||
class AgentAnswer(SubQuestionIdentifier):
|
||||
class AgentAnswer(BaseModel):
|
||||
answer: str
|
||||
answer_type: Literal["agent_sub_answer", "agent_level_answer"]
|
||||
|
||||
|
||||
class AgentSubQuery(SubQuestionIdentifier):
|
||||
class AgentSubQuery(BaseModel):
|
||||
sub_query: str
|
||||
query_id: int
|
||||
|
||||
@@ -152,45 +146,3 @@ class AgentSubQuery(SubQuestionIdentifier):
|
||||
sorted(level_question_dict.items(), key=lambda x: (x is None, x))
|
||||
)
|
||||
return sorted_dict
|
||||
|
||||
|
||||
class OneShotQARequest(ChunkContext):
|
||||
# Supports simplier APIs that don't deal with chat histories or message edits
|
||||
# Easier APIs to work with for developers
|
||||
persona_override_config: PersonaOverrideConfig | None = None
|
||||
persona_id: int | None = None
|
||||
|
||||
messages: list[ThreadMessage]
|
||||
retrieval_options: RetrievalDetails = Field(default_factory=RetrievalDetails)
|
||||
rerank_settings: RerankingDetails | None = None
|
||||
|
||||
# allows the caller to specify the exact search query they want to use
|
||||
# can be used if the message sent to the LLM / query should not be the same
|
||||
# will also disable Thread-based Rewording if specified
|
||||
query_override: str | None = None
|
||||
|
||||
# If True, skips generating an AI response to the search query
|
||||
skip_gen_ai_answer_generation: bool = False
|
||||
|
||||
# If True, uses agentic search instead of basic search
|
||||
use_agentic_search: bool = False
|
||||
|
||||
@model_validator(mode="after")
|
||||
def check_persona_fields(self) -> "OneShotQARequest":
|
||||
if self.persona_override_config is None and self.persona_id is None:
|
||||
raise ValueError("Exactly one of persona_config or persona_id must be set")
|
||||
elif self.persona_override_config is not None and (self.persona_id is not None):
|
||||
raise ValueError(
|
||||
"If persona_override_config is set, persona_id cannot be set"
|
||||
)
|
||||
return self
|
||||
|
||||
|
||||
class OneShotQAResponse(BaseModel):
|
||||
# This is built piece by piece, any of these can be None as the flow could break
|
||||
answer: str | None = None
|
||||
rephrase: str | None = None
|
||||
citations: list[CitationInfo] | None = None
|
||||
docs: QADocsResponse | None = None
|
||||
error_msg: str | None = None
|
||||
chat_message_id: int | None = None
|
||||
|
||||
@@ -1,260 +1,23 @@
|
||||
import json
|
||||
from collections.abc import Generator
|
||||
|
||||
from fastapi import APIRouter
|
||||
from fastapi import Depends
|
||||
from fastapi import HTTPException
|
||||
from fastapi.responses import StreamingResponse
|
||||
from pydantic import BaseModel
|
||||
from sqlalchemy.orm import Session
|
||||
|
||||
from ee.onyx.onyxbot.slack.handlers.handle_standard_answers import (
|
||||
oneoff_standard_answers,
|
||||
)
|
||||
from ee.onyx.server.query_and_chat.models import DocumentSearchRequest
|
||||
from ee.onyx.server.query_and_chat.models import OneShotQARequest
|
||||
from ee.onyx.server.query_and_chat.models import OneShotQAResponse
|
||||
from ee.onyx.server.query_and_chat.models import StandardAnswerRequest
|
||||
from ee.onyx.server.query_and_chat.models import StandardAnswerResponse
|
||||
from onyx.auth.users import current_user
|
||||
from onyx.chat.chat_utils import combine_message_thread
|
||||
from onyx.chat.chat_utils import prepare_chat_message_request
|
||||
from onyx.chat.models import AnswerStream
|
||||
from onyx.chat.models import PersonaOverrideConfig
|
||||
from onyx.chat.models import QADocsResponse
|
||||
from onyx.chat.process_message import gather_stream
|
||||
from onyx.chat.process_message import stream_chat_message_objects
|
||||
from onyx.configs.onyxbot_configs import MAX_THREAD_CONTEXT_PERCENTAGE
|
||||
from onyx.context.search.models import SavedSearchDocWithContent
|
||||
from onyx.context.search.models import SearchRequest
|
||||
from onyx.context.search.pipeline import SearchPipeline
|
||||
from onyx.context.search.utils import dedupe_documents
|
||||
from onyx.context.search.utils import drop_llm_indices
|
||||
from onyx.context.search.utils import relevant_sections_to_indices
|
||||
from onyx.db.engine.sql_engine import get_session
|
||||
from onyx.db.models import Persona
|
||||
from onyx.db.models import User
|
||||
from onyx.db.persona import get_persona_by_id
|
||||
from onyx.llm.factory import get_default_llms
|
||||
from onyx.llm.factory import get_llms_for_persona
|
||||
from onyx.llm.factory import get_main_llm_from_tuple
|
||||
from onyx.natural_language_processing.utils import get_tokenizer
|
||||
from onyx.server.query_and_chat.streaming_models import CitationInfo
|
||||
from onyx.server.utils import get_json_line
|
||||
from onyx.utils.logger import setup_logger
|
||||
|
||||
|
||||
logger = setup_logger()
|
||||
|
||||
basic_router = APIRouter(prefix="/query")
|
||||
|
||||
|
||||
class DocumentSearchResponse(BaseModel):
|
||||
top_documents: list[SavedSearchDocWithContent]
|
||||
llm_indices: list[int]
|
||||
|
||||
|
||||
@basic_router.post("/document-search")
|
||||
def handle_search_request(
|
||||
search_request: DocumentSearchRequest,
|
||||
user: User | None = Depends(current_user),
|
||||
db_session: Session = Depends(get_session),
|
||||
) -> DocumentSearchResponse:
|
||||
"""Simple search endpoint, does not create a new message or records in the DB"""
|
||||
query = search_request.message
|
||||
logger.notice(f"Received document search query: {query}")
|
||||
|
||||
llm, fast_llm = get_default_llms()
|
||||
|
||||
search_pipeline = SearchPipeline(
|
||||
search_request=SearchRequest(
|
||||
query=query,
|
||||
search_type=search_request.search_type,
|
||||
human_selected_filters=search_request.retrieval_options.filters,
|
||||
enable_auto_detect_filters=search_request.retrieval_options.enable_auto_detect_filters,
|
||||
persona=None, # For simplicity, default settings should be good for this search
|
||||
offset=search_request.retrieval_options.offset,
|
||||
limit=search_request.retrieval_options.limit,
|
||||
rerank_settings=search_request.rerank_settings,
|
||||
evaluation_type=search_request.evaluation_type,
|
||||
chunks_above=search_request.chunks_above,
|
||||
chunks_below=search_request.chunks_below,
|
||||
full_doc=search_request.full_doc,
|
||||
),
|
||||
user=user,
|
||||
llm=llm,
|
||||
fast_llm=fast_llm,
|
||||
skip_query_analysis=False,
|
||||
db_session=db_session,
|
||||
bypass_acl=False,
|
||||
)
|
||||
top_sections = search_pipeline.reranked_sections
|
||||
relevance_sections = search_pipeline.section_relevance
|
||||
top_docs = [
|
||||
SavedSearchDocWithContent(
|
||||
document_id=section.center_chunk.document_id,
|
||||
chunk_ind=section.center_chunk.chunk_id,
|
||||
content=section.center_chunk.content,
|
||||
semantic_identifier=section.center_chunk.semantic_identifier or "Unknown",
|
||||
link=(
|
||||
section.center_chunk.source_links.get(0)
|
||||
if section.center_chunk.source_links
|
||||
else None
|
||||
),
|
||||
blurb=section.center_chunk.blurb,
|
||||
source_type=section.center_chunk.source_type,
|
||||
boost=section.center_chunk.boost,
|
||||
hidden=section.center_chunk.hidden,
|
||||
metadata=section.center_chunk.metadata,
|
||||
score=section.center_chunk.score or 0.0,
|
||||
match_highlights=section.center_chunk.match_highlights,
|
||||
updated_at=section.center_chunk.updated_at,
|
||||
primary_owners=section.center_chunk.primary_owners,
|
||||
secondary_owners=section.center_chunk.secondary_owners,
|
||||
is_internet=False,
|
||||
db_doc_id=0,
|
||||
)
|
||||
for section in top_sections
|
||||
]
|
||||
|
||||
# Deduping happens at the last step to avoid harming quality by dropping content early on
|
||||
deduped_docs = top_docs
|
||||
dropped_inds = None
|
||||
|
||||
if search_request.retrieval_options.dedupe_docs:
|
||||
deduped_docs, dropped_inds = dedupe_documents(top_docs)
|
||||
|
||||
llm_indices = relevant_sections_to_indices(
|
||||
relevance_sections=relevance_sections, items=deduped_docs
|
||||
)
|
||||
|
||||
if dropped_inds:
|
||||
llm_indices = drop_llm_indices(
|
||||
llm_indices=llm_indices,
|
||||
search_docs=deduped_docs,
|
||||
dropped_indices=dropped_inds,
|
||||
)
|
||||
|
||||
return DocumentSearchResponse(top_documents=deduped_docs, llm_indices=llm_indices)
|
||||
|
||||
|
||||
def get_answer_stream(
|
||||
query_request: OneShotQARequest,
|
||||
user: User | None = Depends(current_user),
|
||||
db_session: Session = Depends(get_session),
|
||||
) -> AnswerStream:
|
||||
query = query_request.messages[0].message
|
||||
logger.notice(f"Received query for Answer API: {query}")
|
||||
|
||||
if (
|
||||
query_request.persona_override_config is None
|
||||
and query_request.persona_id is None
|
||||
):
|
||||
raise KeyError("Must provide persona ID or Persona Config")
|
||||
|
||||
persona_info: Persona | PersonaOverrideConfig | None = None
|
||||
if query_request.persona_override_config is not None:
|
||||
persona_info = query_request.persona_override_config
|
||||
elif query_request.persona_id is not None:
|
||||
persona_info = get_persona_by_id(
|
||||
persona_id=query_request.persona_id,
|
||||
user=user,
|
||||
db_session=db_session,
|
||||
is_for_edit=False,
|
||||
)
|
||||
|
||||
llm = get_main_llm_from_tuple(get_llms_for_persona(persona_info))
|
||||
|
||||
llm_tokenizer = get_tokenizer(
|
||||
model_name=llm.config.model_name,
|
||||
provider_type=llm.config.model_provider,
|
||||
)
|
||||
|
||||
max_history_tokens = int(
|
||||
llm.config.max_input_tokens * MAX_THREAD_CONTEXT_PERCENTAGE
|
||||
)
|
||||
|
||||
combined_message = combine_message_thread(
|
||||
messages=query_request.messages,
|
||||
max_tokens=max_history_tokens,
|
||||
llm_tokenizer=llm_tokenizer,
|
||||
)
|
||||
|
||||
# Also creates a new chat session
|
||||
request = prepare_chat_message_request(
|
||||
message_text=combined_message,
|
||||
user=user,
|
||||
persona_id=query_request.persona_id,
|
||||
persona_override_config=query_request.persona_override_config,
|
||||
message_ts_to_respond_to=None,
|
||||
retrieval_details=query_request.retrieval_options,
|
||||
rerank_settings=query_request.rerank_settings,
|
||||
db_session=db_session,
|
||||
use_agentic_search=query_request.use_agentic_search,
|
||||
skip_gen_ai_answer_generation=query_request.skip_gen_ai_answer_generation,
|
||||
)
|
||||
|
||||
packets = stream_chat_message_objects(
|
||||
new_msg_req=request,
|
||||
user=user,
|
||||
db_session=db_session,
|
||||
)
|
||||
|
||||
return packets
|
||||
|
||||
|
||||
@basic_router.post("/answer-with-citation")
|
||||
def get_answer_with_citation(
|
||||
request: OneShotQARequest,
|
||||
db_session: Session = Depends(get_session),
|
||||
user: User | None = Depends(current_user),
|
||||
) -> OneShotQAResponse:
|
||||
try:
|
||||
packets = get_answer_stream(request, user, db_session)
|
||||
answer = gather_stream(packets)
|
||||
|
||||
if answer.error_msg:
|
||||
raise RuntimeError(answer.error_msg)
|
||||
|
||||
return OneShotQAResponse(
|
||||
answer=answer.answer,
|
||||
chat_message_id=answer.message_id,
|
||||
error_msg=answer.error_msg,
|
||||
citations=[
|
||||
CitationInfo(citation_num=i, document_id=doc_id)
|
||||
for i, doc_id in answer.cited_documents.items()
|
||||
],
|
||||
docs=QADocsResponse(
|
||||
top_documents=answer.top_documents,
|
||||
predicted_flow=None,
|
||||
predicted_search=None,
|
||||
applied_source_filters=None,
|
||||
applied_time_cutoff=None,
|
||||
recency_bias_multiplier=0.0,
|
||||
),
|
||||
)
|
||||
except Exception as e:
|
||||
logger.error(f"Error in get_answer_with_citation: {str(e)}", exc_info=True)
|
||||
raise HTTPException(status_code=500, detail="An internal server error occurred")
|
||||
|
||||
|
||||
@basic_router.post("/stream-answer-with-citation")
|
||||
def stream_answer_with_citation(
|
||||
request: OneShotQARequest,
|
||||
db_session: Session = Depends(get_session),
|
||||
user: User | None = Depends(current_user),
|
||||
) -> StreamingResponse:
|
||||
def stream_generator() -> Generator[str, None, None]:
|
||||
try:
|
||||
for packet in get_answer_stream(request, user, db_session):
|
||||
serialized = get_json_line(packet.model_dump())
|
||||
yield serialized
|
||||
except Exception as e:
|
||||
logger.exception("Error in answer streaming")
|
||||
yield json.dumps({"error": str(e)})
|
||||
|
||||
return StreamingResponse(stream_generator(), media_type="application/json")
|
||||
|
||||
|
||||
@basic_router.get("/standard-answer")
|
||||
def get_standard_answer(
|
||||
request: StandardAnswerRequest,
|
||||
|
||||
@@ -24,7 +24,7 @@ from onyx.auth.users import current_admin_user
|
||||
from onyx.auth.users import get_display_email
|
||||
from onyx.background.celery.versioned_apps.client import app as client_app
|
||||
from onyx.background.task_utils import construct_query_history_report_name
|
||||
from onyx.chat.chat_utils import create_chat_chain
|
||||
from onyx.chat.chat_utils import create_chat_history_chain
|
||||
from onyx.configs.app_configs import ONYX_QUERY_HISTORY_TYPE
|
||||
from onyx.configs.constants import FileOrigin
|
||||
from onyx.configs.constants import FileType
|
||||
@@ -123,10 +123,9 @@ def snapshot_from_chat_session(
|
||||
) -> ChatSessionSnapshot | None:
|
||||
try:
|
||||
# Older chats may not have the right structure
|
||||
last_message, messages = create_chat_chain(
|
||||
messages = create_chat_history_chain(
|
||||
chat_session_id=chat_session.id, db_session=db_session
|
||||
)
|
||||
messages.append(last_message)
|
||||
except RuntimeError:
|
||||
return None
|
||||
|
||||
|
||||
@@ -4,12 +4,14 @@ from fastapi import HTTPException
|
||||
from sqlalchemy.exc import IntegrityError
|
||||
from sqlalchemy.orm import Session
|
||||
|
||||
from ee.onyx.db.user_group import add_users_to_user_group
|
||||
from ee.onyx.db.user_group import fetch_user_groups
|
||||
from ee.onyx.db.user_group import fetch_user_groups_for_user
|
||||
from ee.onyx.db.user_group import insert_user_group
|
||||
from ee.onyx.db.user_group import prepare_user_group_for_deletion
|
||||
from ee.onyx.db.user_group import update_user_curator_relationship
|
||||
from ee.onyx.db.user_group import update_user_group
|
||||
from ee.onyx.server.user_group.models import AddUsersToUserGroupRequest
|
||||
from ee.onyx.server.user_group.models import SetCuratorRequest
|
||||
from ee.onyx.server.user_group.models import UserGroup
|
||||
from ee.onyx.server.user_group.models import UserGroupCreate
|
||||
@@ -79,6 +81,26 @@ def patch_user_group(
|
||||
raise HTTPException(status_code=404, detail=str(e))
|
||||
|
||||
|
||||
@router.post("/admin/user-group/{user_group_id}/add-users")
|
||||
def add_users(
|
||||
user_group_id: int,
|
||||
add_users_request: AddUsersToUserGroupRequest,
|
||||
user: User | None = Depends(current_curator_or_admin_user),
|
||||
db_session: Session = Depends(get_session),
|
||||
) -> UserGroup:
|
||||
try:
|
||||
return UserGroup.from_model(
|
||||
add_users_to_user_group(
|
||||
db_session=db_session,
|
||||
user=user,
|
||||
user_group_id=user_group_id,
|
||||
user_ids=add_users_request.user_ids,
|
||||
)
|
||||
)
|
||||
except ValueError as e:
|
||||
raise HTTPException(status_code=404, detail=str(e))
|
||||
|
||||
|
||||
@router.post("/admin/user-group/{user_group_id}/set-curator")
|
||||
def set_user_curator(
|
||||
user_group_id: int,
|
||||
|
||||
@@ -87,6 +87,10 @@ class UserGroupUpdate(BaseModel):
|
||||
cc_pair_ids: list[int]
|
||||
|
||||
|
||||
class AddUsersToUserGroupRequest(BaseModel):
|
||||
user_ids: list[UUID]
|
||||
|
||||
|
||||
class SetCuratorRequest(BaseModel):
|
||||
user_id: UUID
|
||||
is_curator: bool
|
||||
|
||||
@@ -1,7 +1,10 @@
|
||||
import json
|
||||
from typing import Any
|
||||
from urllib.parse import unquote
|
||||
|
||||
from posthog import Posthog
|
||||
|
||||
from ee.onyx.configs.app_configs import MARKETING_POSTHOG_API_KEY
|
||||
from ee.onyx.configs.app_configs import POSTHOG_API_KEY
|
||||
from ee.onyx.configs.app_configs import POSTHOG_HOST
|
||||
from onyx.utils.logger import setup_logger
|
||||
@@ -20,3 +23,80 @@ posthog = Posthog(
|
||||
debug=True,
|
||||
on_error=posthog_on_error,
|
||||
)
|
||||
|
||||
# For cross referencing between cloud and www Onyx sites
|
||||
# NOTE: These clients are separate because they are separate posthog projects.
|
||||
# We should eventually unify them into a single posthog project,
|
||||
# which would no longer require this workaround
|
||||
marketing_posthog = None
|
||||
if MARKETING_POSTHOG_API_KEY:
|
||||
marketing_posthog = Posthog(
|
||||
project_api_key=MARKETING_POSTHOG_API_KEY,
|
||||
host=POSTHOG_HOST,
|
||||
debug=True,
|
||||
on_error=posthog_on_error,
|
||||
)
|
||||
|
||||
|
||||
def capture_and_sync_with_alternate_posthog(
|
||||
alternate_distinct_id: str, event: str, properties: dict[str, Any]
|
||||
) -> None:
|
||||
"""
|
||||
Identify in both PostHog projects and capture the event in marketing.
|
||||
- Marketing keeps the marketing distinct_id (for feature flags).
|
||||
- Cloud identify uses the cloud distinct_id
|
||||
"""
|
||||
if not marketing_posthog:
|
||||
return
|
||||
|
||||
props = properties.copy()
|
||||
|
||||
try:
|
||||
marketing_posthog.identify(distinct_id=alternate_distinct_id, properties=props)
|
||||
marketing_posthog.capture(alternate_distinct_id, event, props)
|
||||
marketing_posthog.flush()
|
||||
except Exception as e:
|
||||
logger.error(f"Error capturing marketing posthog event: {e}")
|
||||
|
||||
try:
|
||||
if cloud_user_id := props.get("onyx_cloud_user_id"):
|
||||
cloud_props = props.copy()
|
||||
cloud_props.pop("onyx_cloud_user_id", None)
|
||||
|
||||
posthog.identify(
|
||||
distinct_id=cloud_user_id,
|
||||
properties=cloud_props,
|
||||
)
|
||||
except Exception as e:
|
||||
logger.error(f"Error identifying cloud posthog user: {e}")
|
||||
|
||||
|
||||
def get_marketing_posthog_cookie_name() -> str | None:
|
||||
if not MARKETING_POSTHOG_API_KEY:
|
||||
return None
|
||||
return f"onyx_custom_ph_{MARKETING_POSTHOG_API_KEY}_posthog"
|
||||
|
||||
|
||||
def parse_marketing_cookie(cookie_value: str) -> dict[str, Any] | None:
|
||||
"""
|
||||
Parse the URL-encoded JSON marketing cookie.
|
||||
|
||||
Expected format (URL-encoded):
|
||||
{"distinct_id":"...", "featureFlags":{"landing_page_variant":"..."}, ...}
|
||||
|
||||
Returns:
|
||||
Dict with 'distinct_id' explicitly required and all other cookie values
|
||||
passed through as-is, or None if parsing fails or distinct_id is missing.
|
||||
"""
|
||||
try:
|
||||
decoded_cookie = unquote(cookie_value)
|
||||
cookie_data = json.loads(decoded_cookie)
|
||||
|
||||
distinct_id = cookie_data.get("distinct_id")
|
||||
if not distinct_id:
|
||||
return None
|
||||
|
||||
return cookie_data
|
||||
except (json.JSONDecodeError, KeyError, TypeError, AttributeError) as e:
|
||||
logger.warning(f"Failed to parse cookie: {e}")
|
||||
return None
|
||||
|
||||
@@ -517,7 +517,7 @@ def run_analysis(intent_req: IntentRequest) -> tuple[bool, list[str]]:
|
||||
try:
|
||||
keywords = map_keywords(model_input.input_ids[0], tokenizer, keyword_preds)
|
||||
except Exception as e:
|
||||
logger.error(
|
||||
logger.warning(
|
||||
f"Failed to extract keywords for query: {intent_req.query} due to {e}"
|
||||
)
|
||||
# Fallback to keeping all words
|
||||
|
||||
@@ -116,7 +116,7 @@ def _concurrent_embedding(
|
||||
# the model to fail to encode texts. It's pretty rare and we want to allow
|
||||
# concurrent embedding, hence we retry (the specific error is
|
||||
# "RuntimeError: Already borrowed" and occurs in the transformers library)
|
||||
logger.error(f"Error encoding texts, retrying: {e}")
|
||||
logger.warning(f"Error encoding texts, retrying: {e}")
|
||||
time.sleep(ENCODING_RETRY_DELAY)
|
||||
return model.encode(texts, normalize_embeddings=normalize_embeddings)
|
||||
|
||||
|
||||
73
backend/onyx/agents/agent_framework/message_format.py
Normal file
73
backend/onyx/agents/agent_framework/message_format.py
Normal file
@@ -0,0 +1,73 @@
|
||||
import json
|
||||
from collections.abc import Sequence
|
||||
from typing import cast
|
||||
|
||||
from langchain_core.messages import AIMessage
|
||||
from langchain_core.messages import BaseMessage
|
||||
from langchain_core.messages import FunctionMessage
|
||||
|
||||
from onyx.llm.message_types import AssistantMessage
|
||||
from onyx.llm.message_types import ChatCompletionMessage
|
||||
from onyx.llm.message_types import FunctionCall
|
||||
from onyx.llm.message_types import SystemMessage
|
||||
from onyx.llm.message_types import ToolCall
|
||||
from onyx.llm.message_types import ToolMessage
|
||||
from onyx.llm.message_types import UserMessageWithText
|
||||
|
||||
|
||||
HUMAN = "human"
|
||||
SYSTEM = "system"
|
||||
AI = "ai"
|
||||
FUNCTION = "function"
|
||||
|
||||
|
||||
def base_messages_to_chat_completion_msgs(
|
||||
msgs: Sequence[BaseMessage],
|
||||
) -> list[ChatCompletionMessage]:
|
||||
return [_base_message_to_chat_completion_msg(msg) for msg in msgs]
|
||||
|
||||
|
||||
def _base_message_to_chat_completion_msg(
|
||||
msg: BaseMessage,
|
||||
) -> ChatCompletionMessage:
|
||||
if msg.type == HUMAN:
|
||||
content = msg.content if isinstance(msg.content, str) else str(msg.content)
|
||||
user_msg: UserMessageWithText = {"role": "user", "content": content}
|
||||
return user_msg
|
||||
if msg.type == SYSTEM:
|
||||
content = msg.content if isinstance(msg.content, str) else str(msg.content)
|
||||
system_msg: SystemMessage = {"role": "system", "content": content}
|
||||
return system_msg
|
||||
if msg.type == AI:
|
||||
content = msg.content if isinstance(msg.content, str) else str(msg.content)
|
||||
assistant_msg: AssistantMessage = {
|
||||
"role": "assistant",
|
||||
"content": content,
|
||||
}
|
||||
if isinstance(msg, AIMessage) and msg.tool_calls:
|
||||
assistant_msg["tool_calls"] = [
|
||||
ToolCall(
|
||||
id=tool_call.get("id") or "",
|
||||
type="function",
|
||||
function=FunctionCall(
|
||||
name=tool_call["name"],
|
||||
arguments=json.dumps(tool_call["args"]),
|
||||
),
|
||||
)
|
||||
for tool_call in msg.tool_calls
|
||||
]
|
||||
return assistant_msg
|
||||
if msg.type == FUNCTION:
|
||||
function_message = cast(FunctionMessage, msg)
|
||||
content = (
|
||||
function_message.content
|
||||
if isinstance(function_message.content, str)
|
||||
else str(function_message.content)
|
||||
)
|
||||
tool_msg: ToolMessage = {
|
||||
"role": "tool",
|
||||
"content": content,
|
||||
"tool_call_id": function_message.name or "",
|
||||
}
|
||||
return tool_msg
|
||||
raise ValueError(f"Unexpected message type: {msg.type}")
|
||||
47
backend/onyx/agents/agent_framework/models.py
Normal file
47
backend/onyx/agents/agent_framework/models.py
Normal file
@@ -0,0 +1,47 @@
|
||||
from typing import Any
|
||||
from typing import Literal
|
||||
from typing import TypeAlias
|
||||
|
||||
from pydantic import BaseModel
|
||||
|
||||
from onyx.llm.model_response import ModelResponseStream
|
||||
|
||||
|
||||
class ToolCallStreamItem(BaseModel):
|
||||
call_id: str | None = None
|
||||
|
||||
id: str | None = None
|
||||
|
||||
name: str | None = None
|
||||
|
||||
arguments: str | None = None
|
||||
|
||||
type: Literal["function_call"] = "function_call"
|
||||
|
||||
index: int | None = None
|
||||
|
||||
|
||||
class ToolCallOutputStreamItem(BaseModel):
|
||||
call_id: str | None = None
|
||||
|
||||
output: Any
|
||||
|
||||
type: Literal["function_call_output"] = "function_call_output"
|
||||
|
||||
|
||||
RunItemStreamEventDetails: TypeAlias = ToolCallStreamItem | ToolCallOutputStreamItem
|
||||
|
||||
|
||||
class RunItemStreamEvent(BaseModel):
|
||||
type: Literal[
|
||||
"message_start",
|
||||
"message_done",
|
||||
"reasoning_start",
|
||||
"reasoning_done",
|
||||
"tool_call",
|
||||
"tool_call_output",
|
||||
]
|
||||
details: RunItemStreamEventDetails | None = None
|
||||
|
||||
|
||||
StreamEvent: TypeAlias = ModelResponseStream | RunItemStreamEvent
|
||||
309
backend/onyx/agents/agent_framework/query.py
Normal file
309
backend/onyx/agents/agent_framework/query.py
Normal file
@@ -0,0 +1,309 @@
|
||||
# import json
|
||||
# from collections.abc import Callable
|
||||
# from collections.abc import Iterator
|
||||
# from collections.abc import Sequence
|
||||
# from dataclasses import dataclass
|
||||
# from typing import Any
|
||||
|
||||
# from onyx.agents.agent_framework.models import RunItemStreamEvent
|
||||
# from onyx.agents.agent_framework.models import StreamEvent
|
||||
# from onyx.agents.agent_framework.models import ToolCallStreamItem
|
||||
# from onyx.llm.interfaces import LanguageModelInput
|
||||
# from onyx.llm.interfaces import LLM
|
||||
# from onyx.llm.interfaces import ToolChoiceOptions
|
||||
# from onyx.llm.message_types import ChatCompletionMessage
|
||||
# from onyx.llm.message_types import ToolCall
|
||||
# from onyx.llm.model_response import ModelResponseStream
|
||||
# from onyx.tools.tool import Tool
|
||||
# from onyx.tracing.framework.create import agent_span
|
||||
# from onyx.tracing.framework.create import generation_span
|
||||
|
||||
|
||||
# @dataclass
|
||||
# class QueryResult:
|
||||
# stream: Iterator[StreamEvent]
|
||||
# new_messages_stateful: list[ChatCompletionMessage]
|
||||
|
||||
|
||||
# def _serialize_tool_output(output: Any) -> str:
|
||||
# if isinstance(output, str):
|
||||
# return output
|
||||
# try:
|
||||
# return json.dumps(output)
|
||||
# except TypeError:
|
||||
# return str(output)
|
||||
|
||||
|
||||
# def _parse_tool_calls_from_message_content(
|
||||
# content: str,
|
||||
# ) -> list[dict[str, Any]]:
|
||||
# """Parse JSON content that represents tool call instructions."""
|
||||
# try:
|
||||
# parsed_content = json.loads(content)
|
||||
# except json.JSONDecodeError:
|
||||
# return []
|
||||
|
||||
# if isinstance(parsed_content, dict):
|
||||
# candidates = [parsed_content]
|
||||
# elif isinstance(parsed_content, list):
|
||||
# candidates = [item for item in parsed_content if isinstance(item, dict)]
|
||||
# else:
|
||||
# return []
|
||||
|
||||
# tool_calls: list[dict[str, Any]] = []
|
||||
|
||||
# for candidate in candidates:
|
||||
# name = candidate.get("name")
|
||||
# arguments = candidate.get("arguments")
|
||||
|
||||
# if not isinstance(name, str) or arguments is None:
|
||||
# continue
|
||||
|
||||
# if not isinstance(arguments, dict):
|
||||
# continue
|
||||
|
||||
# call_id = candidate.get("id")
|
||||
# arguments_str = json.dumps(arguments)
|
||||
# tool_calls.append(
|
||||
# {
|
||||
# "id": call_id,
|
||||
# "name": name,
|
||||
# "arguments": arguments_str,
|
||||
# }
|
||||
# )
|
||||
|
||||
# return tool_calls
|
||||
|
||||
|
||||
# def _try_convert_content_to_tool_calls_for_non_tool_calling_llms(
|
||||
# tool_calls_in_progress: dict[int, dict[str, Any]],
|
||||
# content_parts: list[str],
|
||||
# structured_response_format: dict | None,
|
||||
# next_synthetic_tool_call_id: Callable[[], str],
|
||||
# ) -> None:
|
||||
# """Populate tool_calls_in_progress when a non-tool-calling LLM returns JSON content describing tool calls."""
|
||||
# if tool_calls_in_progress or not content_parts or structured_response_format:
|
||||
# return
|
||||
|
||||
# tool_calls_from_content = _parse_tool_calls_from_message_content(
|
||||
# "".join(content_parts)
|
||||
# )
|
||||
|
||||
# if not tool_calls_from_content:
|
||||
# return
|
||||
|
||||
# content_parts.clear()
|
||||
|
||||
# for index, tool_call_data in enumerate(tool_calls_from_content):
|
||||
# call_id = tool_call_data["id"] or next_synthetic_tool_call_id()
|
||||
# tool_calls_in_progress[index] = {
|
||||
# "id": call_id,
|
||||
# "name": tool_call_data["name"],
|
||||
# "arguments": tool_call_data["arguments"],
|
||||
# }
|
||||
|
||||
|
||||
# def _update_tool_call_with_delta(
|
||||
# tool_calls_in_progress: dict[int, dict[str, Any]],
|
||||
# tool_call_delta: Any,
|
||||
# ) -> None:
|
||||
# index = tool_call_delta.index
|
||||
|
||||
# if index not in tool_calls_in_progress:
|
||||
# tool_calls_in_progress[index] = {
|
||||
# "id": None,
|
||||
# "name": None,
|
||||
# "arguments": "",
|
||||
# }
|
||||
|
||||
# if tool_call_delta.id:
|
||||
# tool_calls_in_progress[index]["id"] = tool_call_delta.id
|
||||
|
||||
# if tool_call_delta.function:
|
||||
# if tool_call_delta.function.name:
|
||||
# tool_calls_in_progress[index]["name"] = tool_call_delta.function.name
|
||||
|
||||
# if tool_call_delta.function.arguments:
|
||||
# tool_calls_in_progress[index][
|
||||
# "arguments"
|
||||
# ] += tool_call_delta.function.arguments
|
||||
|
||||
|
||||
# def query(
|
||||
# llm_with_default_settings: LLM,
|
||||
# messages: LanguageModelInput,
|
||||
# tools: Sequence[Tool],
|
||||
# context: Any,
|
||||
# tool_choice: ToolChoiceOptions | None = None,
|
||||
# structured_response_format: dict | None = None,
|
||||
# ) -> QueryResult:
|
||||
# tool_definitions = [tool.tool_definition() for tool in tools]
|
||||
# tools_by_name = {tool.name: tool for tool in tools}
|
||||
|
||||
# new_messages_stateful: list[ChatCompletionMessage] = []
|
||||
|
||||
# current_span = agent_span(
|
||||
# name="agent_framework_query",
|
||||
# output_type="dict" if structured_response_format else "str",
|
||||
# )
|
||||
# current_span.start(mark_as_current=True)
|
||||
# current_span.span_data.tools = [t.name for t in tools]
|
||||
|
||||
# def stream_generator() -> Iterator[StreamEvent]:
|
||||
# message_started = False
|
||||
# reasoning_started = False
|
||||
|
||||
# tool_calls_in_progress: dict[int, dict[str, Any]] = {}
|
||||
|
||||
# content_parts: list[str] = []
|
||||
|
||||
# synthetic_tool_call_counter = 0
|
||||
|
||||
# def _next_synthetic_tool_call_id() -> str:
|
||||
# nonlocal synthetic_tool_call_counter
|
||||
# call_id = f"synthetic_tool_call_{synthetic_tool_call_counter}"
|
||||
# synthetic_tool_call_counter += 1
|
||||
# return call_id
|
||||
|
||||
# with generation_span( # type: ignore[misc]
|
||||
# model=llm_with_default_settings.config.model_name,
|
||||
# model_config={
|
||||
# "base_url": str(llm_with_default_settings.config.api_base or ""),
|
||||
# "model_impl": "litellm",
|
||||
# },
|
||||
# ) as span_generation:
|
||||
# # Only set input if messages is a sequence (not a string)
|
||||
# # ChatCompletionMessage TypedDicts are compatible with Mapping[str, Any] at runtime
|
||||
# if isinstance(messages, Sequence) and not isinstance(messages, str):
|
||||
# # Convert ChatCompletionMessage sequence to Sequence[Mapping[str, Any]]
|
||||
# span_generation.span_data.input = [dict(msg) for msg in messages] # type: ignore[assignment]
|
||||
# for chunk in llm_with_default_settings.stream(
|
||||
# prompt=messages,
|
||||
# tools=tool_definitions,
|
||||
# tool_choice=tool_choice,
|
||||
# structured_response_format=structured_response_format,
|
||||
# ):
|
||||
# assert isinstance(chunk, ModelResponseStream)
|
||||
# usage = getattr(chunk, "usage", None)
|
||||
# if usage:
|
||||
# span_generation.span_data.usage = {
|
||||
# "input_tokens": usage.prompt_tokens,
|
||||
# "output_tokens": usage.completion_tokens,
|
||||
# "cache_read_input_tokens": usage.cache_read_input_tokens,
|
||||
# "cache_creation_input_tokens": usage.cache_creation_input_tokens,
|
||||
# }
|
||||
|
||||
# delta = chunk.choice.delta
|
||||
# finish_reason = chunk.choice.finish_reason
|
||||
|
||||
# if delta.reasoning_content:
|
||||
# if not reasoning_started:
|
||||
# yield RunItemStreamEvent(type="reasoning_start")
|
||||
# reasoning_started = True
|
||||
|
||||
# if delta.content:
|
||||
# if reasoning_started:
|
||||
# yield RunItemStreamEvent(type="reasoning_done")
|
||||
# reasoning_started = False
|
||||
# content_parts.append(delta.content)
|
||||
# if not message_started:
|
||||
# yield RunItemStreamEvent(type="message_start")
|
||||
# message_started = True
|
||||
|
||||
# if delta.tool_calls:
|
||||
# if reasoning_started:
|
||||
# yield RunItemStreamEvent(type="reasoning_done")
|
||||
# reasoning_started = False
|
||||
# if message_started:
|
||||
# yield RunItemStreamEvent(type="message_done")
|
||||
# message_started = False
|
||||
|
||||
# for tool_call_delta in delta.tool_calls:
|
||||
# _update_tool_call_with_delta(
|
||||
# tool_calls_in_progress, tool_call_delta
|
||||
# )
|
||||
|
||||
# yield chunk
|
||||
|
||||
# if not finish_reason:
|
||||
# continue
|
||||
|
||||
# if reasoning_started:
|
||||
# yield RunItemStreamEvent(type="reasoning_done")
|
||||
# reasoning_started = False
|
||||
# if message_started:
|
||||
# yield RunItemStreamEvent(type="message_done")
|
||||
# message_started = False
|
||||
|
||||
# if tool_choice != "none":
|
||||
# _try_convert_content_to_tool_calls_for_non_tool_calling_llms(
|
||||
# tool_calls_in_progress,
|
||||
# content_parts,
|
||||
# structured_response_format,
|
||||
# _next_synthetic_tool_call_id,
|
||||
# )
|
||||
|
||||
# if content_parts:
|
||||
# new_messages_stateful.append(
|
||||
# {
|
||||
# "role": "assistant",
|
||||
# "content": "".join(content_parts),
|
||||
# }
|
||||
# )
|
||||
# span_generation.span_data.output = new_messages_stateful
|
||||
|
||||
# # Execute tool calls outside of the stream loop and generation_span
|
||||
# if tool_calls_in_progress:
|
||||
# sorted_tool_calls = sorted(tool_calls_in_progress.items())
|
||||
|
||||
# # Build tool calls for the message and execute tools
|
||||
# assistant_tool_calls: list[ToolCall] = []
|
||||
|
||||
# for _, tool_call_data in sorted_tool_calls:
|
||||
# call_id = tool_call_data["id"]
|
||||
# name = tool_call_data["name"]
|
||||
# arguments_str = tool_call_data["arguments"]
|
||||
|
||||
# if call_id is None or name is None:
|
||||
# continue
|
||||
|
||||
# assistant_tool_calls.append(
|
||||
# {
|
||||
# "id": call_id,
|
||||
# "type": "function",
|
||||
# "function": {
|
||||
# "name": name,
|
||||
# "arguments": arguments_str,
|
||||
# },
|
||||
# }
|
||||
# )
|
||||
|
||||
# yield RunItemStreamEvent(
|
||||
# type="tool_call",
|
||||
# details=ToolCallStreamItem(
|
||||
# call_id=call_id,
|
||||
# name=name,
|
||||
# arguments=arguments_str,
|
||||
# ),
|
||||
# )
|
||||
|
||||
# if name in tools_by_name:
|
||||
# tools_by_name[name]
|
||||
# json.loads(arguments_str)
|
||||
|
||||
# run_context = RunContextWrapper(context=context)
|
||||
|
||||
# TODO: Instead of executing sequentially, execute in parallel
|
||||
# In practice, it's not a must right now since we don't use parallel
|
||||
# tool calls, so kicking the can down the road for now.
|
||||
|
||||
# TODO broken for now, no need for a run_v2
|
||||
# output = tool.run_v2(run_context, **arguments)
|
||||
|
||||
# yield RunItemStreamEvent(
|
||||
# type="tool_call_output",
|
||||
# details=ToolCallOutputStreamItem(
|
||||
# call_id=call_id,
|
||||
# output=output,
|
||||
# ),
|
||||
# )
|
||||
@@ -2,15 +2,27 @@ from collections.abc import Sequence
|
||||
|
||||
from langchain.schema.messages import BaseMessage
|
||||
|
||||
from onyx.agents.agent_sdk.message_types import AgentSDKMessage
|
||||
from onyx.agents.agent_sdk.message_types import AssistantMessageWithContent
|
||||
from onyx.agents.agent_sdk.message_types import ImageContent
|
||||
from onyx.agents.agent_sdk.message_types import InputTextContent
|
||||
from onyx.agents.agent_sdk.message_types import SystemMessage
|
||||
from onyx.agents.agent_sdk.message_types import UserMessage
|
||||
|
||||
|
||||
# TODO: Currently, we only support native API input for images. For other
|
||||
# files, we process the content and share it as text in the message. In
|
||||
# the future, we might support native file uploads for other types of files.
|
||||
def base_messages_to_agent_sdk_msgs(msgs: Sequence[BaseMessage]) -> list[dict]:
|
||||
return [_base_message_to_agent_sdk_msg(msg) for msg in msgs]
|
||||
def base_messages_to_agent_sdk_msgs(
|
||||
msgs: Sequence[BaseMessage],
|
||||
is_responses_api: bool,
|
||||
) -> list[AgentSDKMessage]:
|
||||
return [_base_message_to_agent_sdk_msg(msg, is_responses_api) for msg in msgs]
|
||||
|
||||
|
||||
def _base_message_to_agent_sdk_msg(msg: BaseMessage) -> dict:
|
||||
def _base_message_to_agent_sdk_msg(
|
||||
msg: BaseMessage, is_responses_api: bool
|
||||
) -> AgentSDKMessage:
|
||||
message_type_to_agent_sdk_role = {
|
||||
"human": "user",
|
||||
"system": "system",
|
||||
@@ -20,59 +32,136 @@ def _base_message_to_agent_sdk_msg(msg: BaseMessage) -> dict:
|
||||
|
||||
# Convert content to Agent SDK format
|
||||
content = msg.content
|
||||
if isinstance(content, str):
|
||||
# Convert string to structured text format
|
||||
structured_content = [
|
||||
{
|
||||
"type": "input_text",
|
||||
"text": content,
|
||||
}
|
||||
]
|
||||
elif isinstance(content, list):
|
||||
# Content is already a list, process each item
|
||||
structured_content = []
|
||||
for item in content:
|
||||
if isinstance(item, str):
|
||||
structured_content.append(
|
||||
{
|
||||
"type": "input_text",
|
||||
"text": item,
|
||||
}
|
||||
)
|
||||
elif isinstance(item, dict):
|
||||
# Handle different item types
|
||||
item_type = item.get("type")
|
||||
|
||||
if item_type == "text":
|
||||
# Convert text type to input_text
|
||||
structured_content.append(
|
||||
{
|
||||
"type": "input_text",
|
||||
"text": item.get("text", ""),
|
||||
}
|
||||
)
|
||||
elif item_type == "image_url":
|
||||
# Convert image_url to input_image format
|
||||
image_url = item.get("image_url", {})
|
||||
if isinstance(image_url, dict):
|
||||
url = image_url.get("url", "")
|
||||
else:
|
||||
url = image_url
|
||||
structured_content.append(
|
||||
{
|
||||
"type": "input_image",
|
||||
"image_url": url,
|
||||
"detail": "auto",
|
||||
}
|
||||
)
|
||||
if isinstance(content, str):
|
||||
# For system/user/assistant messages, use InputTextContent
|
||||
if role in ("system", "user"):
|
||||
input_text_content: list[InputTextContent | ImageContent] = [
|
||||
InputTextContent(type="input_text", text=content)
|
||||
]
|
||||
if role == "system":
|
||||
# SystemMessage only accepts InputTextContent
|
||||
system_msg: SystemMessage = {
|
||||
"role": "system",
|
||||
"content": [InputTextContent(type="input_text", text=content)],
|
||||
}
|
||||
return system_msg
|
||||
else: # user
|
||||
user_msg: UserMessage = {
|
||||
"role": "user",
|
||||
"content": input_text_content,
|
||||
}
|
||||
return user_msg
|
||||
else: # assistant
|
||||
assistant_msg: AssistantMessageWithContent
|
||||
if is_responses_api:
|
||||
from onyx.agents.agent_sdk.message_types import OutputTextContent
|
||||
|
||||
assistant_msg = {
|
||||
"role": "assistant",
|
||||
"content": [OutputTextContent(type="output_text", text=content)],
|
||||
}
|
||||
else:
|
||||
raise ValueError(f"Unexpected item type: {type(item)}. Item: {item}")
|
||||
assistant_msg = {
|
||||
"role": "assistant",
|
||||
"content": [InputTextContent(type="input_text", text=content)],
|
||||
}
|
||||
return assistant_msg
|
||||
elif isinstance(content, list):
|
||||
# For lists, we need to process based on the role
|
||||
if role == "assistant":
|
||||
# For responses API, use OutputTextContent; otherwise use InputTextContent
|
||||
assistant_content: list[InputTextContent | OutputTextContent] = []
|
||||
|
||||
if is_responses_api:
|
||||
from onyx.agents.agent_sdk.message_types import OutputTextContent
|
||||
|
||||
for item in content:
|
||||
if isinstance(item, str):
|
||||
assistant_content.append(
|
||||
OutputTextContent(type="output_text", text=item)
|
||||
)
|
||||
elif isinstance(item, dict) and item.get("type") == "text":
|
||||
assistant_content.append(
|
||||
OutputTextContent(
|
||||
type="output_text", text=item.get("text", "")
|
||||
)
|
||||
)
|
||||
else:
|
||||
raise ValueError(
|
||||
f"Unexpected item type for assistant message: {type(item)}. Item: {item}"
|
||||
)
|
||||
else:
|
||||
for item in content:
|
||||
if isinstance(item, str):
|
||||
assistant_content.append(
|
||||
InputTextContent(type="input_text", text=item)
|
||||
)
|
||||
elif isinstance(item, dict) and item.get("type") == "text":
|
||||
assistant_content.append(
|
||||
InputTextContent(
|
||||
type="input_text", text=item.get("text", "")
|
||||
)
|
||||
)
|
||||
else:
|
||||
raise ValueError(
|
||||
f"Unexpected item type for assistant message: {type(item)}. Item: {item}"
|
||||
)
|
||||
|
||||
assistant_msg_list: AssistantMessageWithContent = {
|
||||
"role": "assistant",
|
||||
"content": assistant_content,
|
||||
}
|
||||
return assistant_msg_list
|
||||
else: # system or user - use InputTextContent
|
||||
input_content: list[InputTextContent | ImageContent] = []
|
||||
for item in content:
|
||||
if isinstance(item, str):
|
||||
input_content.append(InputTextContent(type="input_text", text=item))
|
||||
elif isinstance(item, dict):
|
||||
item_type = item.get("type")
|
||||
if item_type == "text":
|
||||
input_content.append(
|
||||
InputTextContent(
|
||||
type="input_text", text=item.get("text", "")
|
||||
)
|
||||
)
|
||||
elif item_type == "image_url":
|
||||
# Convert image_url to input_image format
|
||||
image_url = item.get("image_url", {})
|
||||
if isinstance(image_url, dict):
|
||||
url = image_url.get("url", "")
|
||||
else:
|
||||
url = image_url
|
||||
input_content.append(
|
||||
ImageContent(
|
||||
type="input_image", image_url=url, detail="auto"
|
||||
)
|
||||
)
|
||||
else:
|
||||
raise ValueError(f"Unexpected item type: {item_type}")
|
||||
else:
|
||||
raise ValueError(
|
||||
f"Unexpected item type: {type(item)}. Item: {item}"
|
||||
)
|
||||
|
||||
if role == "system":
|
||||
# SystemMessage only accepts InputTextContent (no images)
|
||||
text_only_content = [
|
||||
c for c in input_content if c["type"] == "input_text"
|
||||
]
|
||||
system_msg_list: SystemMessage = {
|
||||
"role": "system",
|
||||
"content": text_only_content, # type: ignore[typeddict-item]
|
||||
}
|
||||
return system_msg_list
|
||||
else: # user
|
||||
user_msg_list: UserMessage = {
|
||||
"role": "user",
|
||||
"content": input_content,
|
||||
}
|
||||
return user_msg_list
|
||||
else:
|
||||
raise ValueError(
|
||||
f"Unexpected content type: {type(content)}. Content: {content}"
|
||||
)
|
||||
|
||||
return {
|
||||
"role": role,
|
||||
"content": structured_content,
|
||||
}
|
||||
|
||||
125
backend/onyx/agents/agent_sdk/message_types.py
Normal file
125
backend/onyx/agents/agent_sdk/message_types.py
Normal file
@@ -0,0 +1,125 @@
|
||||
"""Strongly typed message structures for Agent SDK messages."""
|
||||
|
||||
from typing import Literal
|
||||
from typing import NotRequired
|
||||
|
||||
from typing_extensions import TypedDict
|
||||
|
||||
|
||||
class InputTextContent(TypedDict):
|
||||
type: Literal["input_text"]
|
||||
text: str
|
||||
|
||||
|
||||
class OutputTextContent(TypedDict):
|
||||
type: Literal["output_text"]
|
||||
text: str
|
||||
|
||||
|
||||
TextContent = InputTextContent | OutputTextContent
|
||||
|
||||
|
||||
class ImageContent(TypedDict):
|
||||
type: Literal["input_image"]
|
||||
image_url: str
|
||||
detail: str
|
||||
|
||||
|
||||
# Tool call structures
|
||||
class ToolCallFunction(TypedDict):
|
||||
name: str
|
||||
arguments: str
|
||||
|
||||
|
||||
class ToolCall(TypedDict):
|
||||
id: str
|
||||
type: Literal["function"]
|
||||
function: ToolCallFunction
|
||||
|
||||
|
||||
# Message types
|
||||
class SystemMessage(TypedDict):
|
||||
role: Literal["system"]
|
||||
content: list[InputTextContent] # System messages use input text
|
||||
|
||||
|
||||
class UserMessage(TypedDict):
|
||||
role: Literal["user"]
|
||||
content: list[
|
||||
InputTextContent | ImageContent
|
||||
] # User messages use input text or images
|
||||
|
||||
|
||||
class AssistantMessageWithContent(TypedDict):
|
||||
role: Literal["assistant"]
|
||||
content: list[
|
||||
InputTextContent | OutputTextContent
|
||||
] # Assistant messages use output_text for responses API compatibility
|
||||
|
||||
|
||||
class AssistantMessageWithToolCalls(TypedDict):
|
||||
role: Literal["assistant"]
|
||||
tool_calls: list[ToolCall]
|
||||
|
||||
|
||||
class AssistantMessageDuringAgentRun(TypedDict):
|
||||
role: Literal["assistant"]
|
||||
id: str
|
||||
content: (
|
||||
list[InputTextContent | OutputTextContent] | list[ToolCall]
|
||||
) # Assistant runtime messages receive output_text from agents SDK for responses API compatibility
|
||||
status: Literal["completed", "failed", "in_progress"]
|
||||
type: Literal["message"]
|
||||
|
||||
|
||||
class ToolMessage(TypedDict):
|
||||
role: Literal["tool"]
|
||||
content: str
|
||||
tool_call_id: str
|
||||
|
||||
|
||||
class FunctionCallMessage(TypedDict):
|
||||
"""Agent SDK function call message format."""
|
||||
|
||||
type: Literal["function_call"]
|
||||
id: NotRequired[str]
|
||||
call_id: str
|
||||
name: str
|
||||
arguments: str
|
||||
|
||||
|
||||
class FunctionCallOutputMessage(TypedDict):
|
||||
"""Agent SDK function call output message format."""
|
||||
|
||||
type: Literal["function_call_output"]
|
||||
call_id: str
|
||||
output: str
|
||||
|
||||
|
||||
class SummaryText(TypedDict):
|
||||
"""Summary text item in reasoning messages."""
|
||||
|
||||
text: str
|
||||
type: Literal["summary_text"]
|
||||
|
||||
|
||||
class ReasoningMessage(TypedDict):
|
||||
"""Agent SDK reasoning message format."""
|
||||
|
||||
id: str
|
||||
type: Literal["reasoning"]
|
||||
summary: list[SummaryText]
|
||||
|
||||
|
||||
# Union type for all Agent SDK messages
|
||||
AgentSDKMessage = (
|
||||
SystemMessage
|
||||
| UserMessage
|
||||
| AssistantMessageWithContent
|
||||
| AssistantMessageWithToolCalls
|
||||
| AssistantMessageDuringAgentRun
|
||||
| ToolMessage
|
||||
| FunctionCallMessage
|
||||
| FunctionCallOutputMessage
|
||||
| ReasoningMessage
|
||||
)
|
||||
36
backend/onyx/agents/agent_sdk/monkey_patches.py
Normal file
36
backend/onyx/agents/agent_sdk/monkey_patches.py
Normal file
@@ -0,0 +1,36 @@
|
||||
from typing import Any
|
||||
|
||||
from agents.models.openai_responses import Converter as OpenAIResponsesConverter
|
||||
|
||||
|
||||
# TODO: I am very sad that I have to monkey patch this :(
|
||||
# Basically, OpenAI agents sdk doesn't convert the tool choice correctly
|
||||
# when they have a built-in tool in their framework, like they do for web_search
|
||||
# and image_generation.
|
||||
# Going to open up a thread with OpenAI agents team to see what they recommend
|
||||
# or what we can fix.
|
||||
# A discussion is warranted, but we likely want to just write our own LitellmModel for
|
||||
# the OpenAI agents SDK since they probably don't really care about Litellm and will
|
||||
# prioritize functionality for their own models.
|
||||
def monkey_patch_convert_tool_choice_to_ignore_openai_hosted_web_search() -> None:
|
||||
if (
|
||||
getattr(OpenAIResponsesConverter.convert_tool_choice, "__name__", "")
|
||||
== "_patched_convert_tool_choice"
|
||||
):
|
||||
return
|
||||
|
||||
orig_func = OpenAIResponsesConverter.convert_tool_choice.__func__ # type: ignore[attr-defined]
|
||||
|
||||
def _patched_convert_tool_choice(cls: type, tool_choice: Any) -> Any:
|
||||
# Handle OpenAI hosted tools that we have custom implementations for
|
||||
# Without this patch, the library uses special formatting that breaks our custom tools
|
||||
# See: https://platform.openai.com/docs/api-reference/responses/create#responses_create-tool_choice-hosted_tool-type
|
||||
if tool_choice == "web_search":
|
||||
return {"type": "function", "name": "web_search"}
|
||||
if tool_choice == "image_generation":
|
||||
return {"type": "function", "name": "image_generation"}
|
||||
return orig_func(cls, tool_choice)
|
||||
|
||||
OpenAIResponsesConverter.convert_tool_choice = classmethod( # type: ignore[method-assign, assignment]
|
||||
_patched_convert_tool_choice
|
||||
)
|
||||
@@ -2,6 +2,7 @@ import asyncio
|
||||
import queue
|
||||
import threading
|
||||
from collections.abc import Iterator
|
||||
from collections.abc import Sequence
|
||||
from typing import Generic
|
||||
from typing import Optional
|
||||
from typing import TypeVar
|
||||
@@ -11,6 +12,7 @@ from agents import RunResultStreaming
|
||||
from agents import TContext
|
||||
from agents.run import Runner
|
||||
|
||||
from onyx.agents.agent_sdk.message_types import AgentSDKMessage
|
||||
from onyx.utils.threadpool_concurrency import run_in_background
|
||||
|
||||
T = TypeVar("T")
|
||||
@@ -41,7 +43,7 @@ class SyncAgentStream(Generic[T]):
|
||||
self,
|
||||
*,
|
||||
agent: Agent,
|
||||
input: list[dict],
|
||||
input: Sequence[AgentSDKMessage],
|
||||
context: TContext | None = None,
|
||||
max_turns: int = 100,
|
||||
queue_maxsize: int = 0,
|
||||
|
||||
@@ -1,21 +1,21 @@
|
||||
from operator import add
|
||||
from typing import Annotated
|
||||
# from operator import add
|
||||
# from typing import Annotated
|
||||
|
||||
from pydantic import BaseModel
|
||||
# from pydantic import BaseModel
|
||||
|
||||
|
||||
class CoreState(BaseModel):
|
||||
"""
|
||||
This is the core state that is shared across all subgraphs.
|
||||
"""
|
||||
# class CoreState(BaseModel):
|
||||
# """
|
||||
# This is the core state that is shared across all subgraphs.
|
||||
# """
|
||||
|
||||
log_messages: Annotated[list[str], add] = []
|
||||
current_step_nr: int = 1
|
||||
# log_messages: Annotated[list[str], add] = []
|
||||
# current_step_nr: int = 1
|
||||
|
||||
|
||||
class SubgraphCoreState(BaseModel):
|
||||
"""
|
||||
This is the core state that is shared across all subgraphs.
|
||||
"""
|
||||
# class SubgraphCoreState(BaseModel):
|
||||
# """
|
||||
# This is the core state that is shared across all subgraphs.
|
||||
# """
|
||||
|
||||
log_messages: Annotated[list[str], add] = []
|
||||
# log_messages: Annotated[list[str], add] = []
|
||||
|
||||
@@ -1,62 +1,62 @@
|
||||
from collections.abc import Hashable
|
||||
from typing import cast
|
||||
# from collections.abc import Hashable
|
||||
# from typing import cast
|
||||
|
||||
from langchain_core.runnables.config import RunnableConfig
|
||||
from langgraph.types import Send
|
||||
# from langchain_core.runnables.config import RunnableConfig
|
||||
# from langgraph.types import Send
|
||||
|
||||
from onyx.agents.agent_search.dc_search_analysis.states import ObjectInformationInput
|
||||
from onyx.agents.agent_search.dc_search_analysis.states import (
|
||||
ObjectResearchInformationUpdate,
|
||||
)
|
||||
from onyx.agents.agent_search.dc_search_analysis.states import ObjectSourceInput
|
||||
from onyx.agents.agent_search.dc_search_analysis.states import (
|
||||
SearchSourcesObjectsUpdate,
|
||||
)
|
||||
from onyx.agents.agent_search.models import GraphConfig
|
||||
# from onyx.agents.agent_search.dc_search_analysis.states import ObjectInformationInput
|
||||
# from onyx.agents.agent_search.dc_search_analysis.states import (
|
||||
# ObjectResearchInformationUpdate,
|
||||
# )
|
||||
# from onyx.agents.agent_search.dc_search_analysis.states import ObjectSourceInput
|
||||
# from onyx.agents.agent_search.dc_search_analysis.states import (
|
||||
# SearchSourcesObjectsUpdate,
|
||||
# )
|
||||
# from onyx.agents.agent_search.models import GraphConfig
|
||||
|
||||
|
||||
def parallel_object_source_research_edge(
|
||||
state: SearchSourcesObjectsUpdate, config: RunnableConfig
|
||||
) -> list[Send | Hashable]:
|
||||
"""
|
||||
LangGraph edge to parallelize the research for an individual object and source
|
||||
"""
|
||||
# def parallel_object_source_research_edge(
|
||||
# state: SearchSourcesObjectsUpdate, config: RunnableConfig
|
||||
# ) -> list[Send | Hashable]:
|
||||
# """
|
||||
# LangGraph edge to parallelize the research for an individual object and source
|
||||
# """
|
||||
|
||||
search_objects = state.analysis_objects
|
||||
search_sources = state.analysis_sources
|
||||
# search_objects = state.analysis_objects
|
||||
# search_sources = state.analysis_sources
|
||||
|
||||
object_source_combinations = [
|
||||
(object, source) for object in search_objects for source in search_sources
|
||||
]
|
||||
# object_source_combinations = [
|
||||
# (object, source) for object in search_objects for source in search_sources
|
||||
# ]
|
||||
|
||||
return [
|
||||
Send(
|
||||
"research_object_source",
|
||||
ObjectSourceInput(
|
||||
object_source_combination=object_source_combination,
|
||||
log_messages=[],
|
||||
),
|
||||
)
|
||||
for object_source_combination in object_source_combinations
|
||||
]
|
||||
# return [
|
||||
# Send(
|
||||
# "research_object_source",
|
||||
# ObjectSourceInput(
|
||||
# object_source_combination=object_source_combination,
|
||||
# log_messages=[],
|
||||
# ),
|
||||
# )
|
||||
# for object_source_combination in object_source_combinations
|
||||
# ]
|
||||
|
||||
|
||||
def parallel_object_research_consolidation_edge(
|
||||
state: ObjectResearchInformationUpdate, config: RunnableConfig
|
||||
) -> list[Send | Hashable]:
|
||||
"""
|
||||
LangGraph edge to parallelize the research for an individual object and source
|
||||
"""
|
||||
cast(GraphConfig, config["metadata"]["config"])
|
||||
object_research_information_results = state.object_research_information_results
|
||||
# def parallel_object_research_consolidation_edge(
|
||||
# state: ObjectResearchInformationUpdate, config: RunnableConfig
|
||||
# ) -> list[Send | Hashable]:
|
||||
# """
|
||||
# LangGraph edge to parallelize the research for an individual object and source
|
||||
# """
|
||||
# cast(GraphConfig, config["metadata"]["config"])
|
||||
# object_research_information_results = state.object_research_information_results
|
||||
|
||||
return [
|
||||
Send(
|
||||
"consolidate_object_research",
|
||||
ObjectInformationInput(
|
||||
object_information=object_information,
|
||||
log_messages=[],
|
||||
),
|
||||
)
|
||||
for object_information in object_research_information_results
|
||||
]
|
||||
# return [
|
||||
# Send(
|
||||
# "consolidate_object_research",
|
||||
# ObjectInformationInput(
|
||||
# object_information=object_information,
|
||||
# log_messages=[],
|
||||
# ),
|
||||
# )
|
||||
# for object_information in object_research_information_results
|
||||
# ]
|
||||
|
||||
@@ -1,103 +1,103 @@
|
||||
from langgraph.graph import END
|
||||
from langgraph.graph import START
|
||||
from langgraph.graph import StateGraph
|
||||
# from langgraph.graph import END
|
||||
# from langgraph.graph import START
|
||||
# from langgraph.graph import StateGraph
|
||||
|
||||
from onyx.agents.agent_search.dc_search_analysis.edges import (
|
||||
parallel_object_research_consolidation_edge,
|
||||
)
|
||||
from onyx.agents.agent_search.dc_search_analysis.edges import (
|
||||
parallel_object_source_research_edge,
|
||||
)
|
||||
from onyx.agents.agent_search.dc_search_analysis.nodes.a1_search_objects import (
|
||||
search_objects,
|
||||
)
|
||||
from onyx.agents.agent_search.dc_search_analysis.nodes.a2_research_object_source import (
|
||||
research_object_source,
|
||||
)
|
||||
from onyx.agents.agent_search.dc_search_analysis.nodes.a3_structure_research_by_object import (
|
||||
structure_research_by_object,
|
||||
)
|
||||
from onyx.agents.agent_search.dc_search_analysis.nodes.a4_consolidate_object_research import (
|
||||
consolidate_object_research,
|
||||
)
|
||||
from onyx.agents.agent_search.dc_search_analysis.nodes.a5_consolidate_research import (
|
||||
consolidate_research,
|
||||
)
|
||||
from onyx.agents.agent_search.dc_search_analysis.states import MainInput
|
||||
from onyx.agents.agent_search.dc_search_analysis.states import MainState
|
||||
from onyx.utils.logger import setup_logger
|
||||
# from onyx.agents.agent_search.dc_search_analysis.edges import (
|
||||
# parallel_object_research_consolidation_edge,
|
||||
# )
|
||||
# from onyx.agents.agent_search.dc_search_analysis.edges import (
|
||||
# parallel_object_source_research_edge,
|
||||
# )
|
||||
# from onyx.agents.agent_search.dc_search_analysis.nodes.a1_search_objects import (
|
||||
# search_objects,
|
||||
# )
|
||||
# from onyx.agents.agent_search.dc_search_analysis.nodes.a2_research_object_source import (
|
||||
# research_object_source,
|
||||
# )
|
||||
# from onyx.agents.agent_search.dc_search_analysis.nodes.a3_structure_research_by_object import (
|
||||
# structure_research_by_object,
|
||||
# )
|
||||
# from onyx.agents.agent_search.dc_search_analysis.nodes.a4_consolidate_object_research import (
|
||||
# consolidate_object_research,
|
||||
# )
|
||||
# from onyx.agents.agent_search.dc_search_analysis.nodes.a5_consolidate_research import (
|
||||
# consolidate_research,
|
||||
# )
|
||||
# from onyx.agents.agent_search.dc_search_analysis.states import MainInput
|
||||
# from onyx.agents.agent_search.dc_search_analysis.states import MainState
|
||||
# from onyx.utils.logger import setup_logger
|
||||
|
||||
logger = setup_logger()
|
||||
# logger = setup_logger()
|
||||
|
||||
test_mode = False
|
||||
# test_mode = False
|
||||
|
||||
|
||||
def divide_and_conquer_graph_builder(test_mode: bool = False) -> StateGraph:
|
||||
"""
|
||||
LangGraph graph builder for the knowledge graph search process.
|
||||
"""
|
||||
# def divide_and_conquer_graph_builder(test_mode: bool = False) -> StateGraph:
|
||||
# """
|
||||
# LangGraph graph builder for the knowledge graph search process.
|
||||
# """
|
||||
|
||||
graph = StateGraph(
|
||||
state_schema=MainState,
|
||||
input=MainInput,
|
||||
)
|
||||
# graph = StateGraph(
|
||||
# state_schema=MainState,
|
||||
# input=MainInput,
|
||||
# )
|
||||
|
||||
### Add nodes ###
|
||||
# ### Add nodes ###
|
||||
|
||||
graph.add_node(
|
||||
"search_objects",
|
||||
search_objects,
|
||||
)
|
||||
# graph.add_node(
|
||||
# "search_objects",
|
||||
# search_objects,
|
||||
# )
|
||||
|
||||
graph.add_node(
|
||||
"structure_research_by_source",
|
||||
structure_research_by_object,
|
||||
)
|
||||
# graph.add_node(
|
||||
# "structure_research_by_source",
|
||||
# structure_research_by_object,
|
||||
# )
|
||||
|
||||
graph.add_node(
|
||||
"research_object_source",
|
||||
research_object_source,
|
||||
)
|
||||
# graph.add_node(
|
||||
# "research_object_source",
|
||||
# research_object_source,
|
||||
# )
|
||||
|
||||
graph.add_node(
|
||||
"consolidate_object_research",
|
||||
consolidate_object_research,
|
||||
)
|
||||
# graph.add_node(
|
||||
# "consolidate_object_research",
|
||||
# consolidate_object_research,
|
||||
# )
|
||||
|
||||
graph.add_node(
|
||||
"consolidate_research",
|
||||
consolidate_research,
|
||||
)
|
||||
# graph.add_node(
|
||||
# "consolidate_research",
|
||||
# consolidate_research,
|
||||
# )
|
||||
|
||||
### Add edges ###
|
||||
# ### Add edges ###
|
||||
|
||||
graph.add_edge(start_key=START, end_key="search_objects")
|
||||
# graph.add_edge(start_key=START, end_key="search_objects")
|
||||
|
||||
graph.add_conditional_edges(
|
||||
source="search_objects",
|
||||
path=parallel_object_source_research_edge,
|
||||
path_map=["research_object_source"],
|
||||
)
|
||||
# graph.add_conditional_edges(
|
||||
# source="search_objects",
|
||||
# path=parallel_object_source_research_edge,
|
||||
# path_map=["research_object_source"],
|
||||
# )
|
||||
|
||||
graph.add_edge(
|
||||
start_key="research_object_source",
|
||||
end_key="structure_research_by_source",
|
||||
)
|
||||
# graph.add_edge(
|
||||
# start_key="research_object_source",
|
||||
# end_key="structure_research_by_source",
|
||||
# )
|
||||
|
||||
graph.add_conditional_edges(
|
||||
source="structure_research_by_source",
|
||||
path=parallel_object_research_consolidation_edge,
|
||||
path_map=["consolidate_object_research"],
|
||||
)
|
||||
# graph.add_conditional_edges(
|
||||
# source="structure_research_by_source",
|
||||
# path=parallel_object_research_consolidation_edge,
|
||||
# path_map=["consolidate_object_research"],
|
||||
# )
|
||||
|
||||
graph.add_edge(
|
||||
start_key="consolidate_object_research",
|
||||
end_key="consolidate_research",
|
||||
)
|
||||
# graph.add_edge(
|
||||
# start_key="consolidate_object_research",
|
||||
# end_key="consolidate_research",
|
||||
# )
|
||||
|
||||
graph.add_edge(
|
||||
start_key="consolidate_research",
|
||||
end_key=END,
|
||||
)
|
||||
# graph.add_edge(
|
||||
# start_key="consolidate_research",
|
||||
# end_key=END,
|
||||
# )
|
||||
|
||||
return graph
|
||||
# return graph
|
||||
|
||||
@@ -1,144 +1,146 @@
|
||||
from typing import cast
|
||||
# from typing import cast
|
||||
|
||||
from langchain_core.messages import HumanMessage
|
||||
from langchain_core.runnables import RunnableConfig
|
||||
from langgraph.types import StreamWriter
|
||||
# from langchain_core.messages import HumanMessage
|
||||
# from langchain_core.runnables import RunnableConfig
|
||||
# from langgraph.types import StreamWriter
|
||||
|
||||
from onyx.agents.agent_search.dc_search_analysis.ops import extract_section
|
||||
from onyx.agents.agent_search.dc_search_analysis.ops import research
|
||||
from onyx.agents.agent_search.dc_search_analysis.states import MainState
|
||||
from onyx.agents.agent_search.dc_search_analysis.states import (
|
||||
SearchSourcesObjectsUpdate,
|
||||
)
|
||||
from onyx.agents.agent_search.models import GraphConfig
|
||||
from onyx.agents.agent_search.shared_graph_utils.agent_prompt_ops import (
|
||||
trim_prompt_piece,
|
||||
)
|
||||
from onyx.configs.constants import DocumentSource
|
||||
from onyx.prompts.agents.dc_prompts import DC_OBJECT_NO_BASE_DATA_EXTRACTION_PROMPT
|
||||
from onyx.prompts.agents.dc_prompts import DC_OBJECT_SEPARATOR
|
||||
from onyx.prompts.agents.dc_prompts import DC_OBJECT_WITH_BASE_DATA_EXTRACTION_PROMPT
|
||||
from onyx.utils.logger import setup_logger
|
||||
from onyx.utils.threadpool_concurrency import run_with_timeout
|
||||
# from onyx.agents.agent_search.dc_search_analysis.ops import extract_section
|
||||
# from onyx.agents.agent_search.dc_search_analysis.ops import research
|
||||
# from onyx.agents.agent_search.dc_search_analysis.states import MainState
|
||||
# from onyx.agents.agent_search.dc_search_analysis.states import (
|
||||
# SearchSourcesObjectsUpdate,
|
||||
# )
|
||||
# from onyx.agents.agent_search.models import GraphConfig
|
||||
# from onyx.agents.agent_search.shared_graph_utils.agent_prompt_ops import (
|
||||
# trim_prompt_piece,
|
||||
# )
|
||||
# from onyx.prompts.agents.dc_prompts import DC_OBJECT_NO_BASE_DATA_EXTRACTION_PROMPT
|
||||
# from onyx.prompts.agents.dc_prompts import DC_OBJECT_SEPARATOR
|
||||
# from onyx.prompts.agents.dc_prompts import DC_OBJECT_WITH_BASE_DATA_EXTRACTION_PROMPT
|
||||
# from onyx.secondary_llm_flows.source_filter import strings_to_document_sources
|
||||
# from onyx.utils.logger import setup_logger
|
||||
# from onyx.utils.threadpool_concurrency import run_with_timeout
|
||||
|
||||
logger = setup_logger()
|
||||
# logger = setup_logger()
|
||||
|
||||
|
||||
def search_objects(
|
||||
state: MainState, config: RunnableConfig, writer: StreamWriter = lambda _: None
|
||||
) -> SearchSourcesObjectsUpdate:
|
||||
"""
|
||||
LangGraph node to start the agentic search process.
|
||||
"""
|
||||
# def search_objects(
|
||||
# state: MainState, config: RunnableConfig, writer: StreamWriter = lambda _: None
|
||||
# ) -> SearchSourcesObjectsUpdate:
|
||||
# """
|
||||
# LangGraph node to start the agentic search process.
|
||||
# """
|
||||
|
||||
graph_config = cast(GraphConfig, config["metadata"]["config"])
|
||||
question = graph_config.inputs.prompt_builder.raw_user_query
|
||||
search_tool = graph_config.tooling.search_tool
|
||||
# graph_config = cast(GraphConfig, config["metadata"]["config"])
|
||||
# question = graph_config.inputs.prompt_builder.raw_user_query
|
||||
# search_tool = graph_config.tooling.search_tool
|
||||
|
||||
if search_tool is None or graph_config.inputs.persona is None:
|
||||
raise ValueError("Search tool and persona must be provided for DivCon search")
|
||||
# if search_tool is None or graph_config.inputs.persona is None:
|
||||
# raise ValueError("Search tool and persona must be provided for DivCon search")
|
||||
|
||||
try:
|
||||
instructions = graph_config.inputs.persona.system_prompt or ""
|
||||
# try:
|
||||
# instructions = graph_config.inputs.persona.system_prompt or ""
|
||||
|
||||
agent_1_instructions = extract_section(
|
||||
instructions, "Agent Step 1:", "Agent Step 2:"
|
||||
)
|
||||
if agent_1_instructions is None:
|
||||
raise ValueError("Agent 1 instructions not found")
|
||||
# agent_1_instructions = extract_section(
|
||||
# instructions, "Agent Step 1:", "Agent Step 2:"
|
||||
# )
|
||||
# if agent_1_instructions is None:
|
||||
# raise ValueError("Agent 1 instructions not found")
|
||||
|
||||
agent_1_base_data = extract_section(instructions, "|Start Data|", "|End Data|")
|
||||
# agent_1_base_data = extract_section(instructions, "|Start Data|", "|End Data|")
|
||||
|
||||
agent_1_task = extract_section(
|
||||
agent_1_instructions, "Task:", "Independent Research Sources:"
|
||||
)
|
||||
if agent_1_task is None:
|
||||
raise ValueError("Agent 1 task not found")
|
||||
# agent_1_task = extract_section(
|
||||
# agent_1_instructions, "Task:", "Independent Research Sources:"
|
||||
# )
|
||||
# if agent_1_task is None:
|
||||
# raise ValueError("Agent 1 task not found")
|
||||
|
||||
agent_1_independent_sources_str = extract_section(
|
||||
agent_1_instructions, "Independent Research Sources:", "Output Objective:"
|
||||
)
|
||||
if agent_1_independent_sources_str is None:
|
||||
raise ValueError("Agent 1 Independent Research Sources not found")
|
||||
# agent_1_independent_sources_str = extract_section(
|
||||
# agent_1_instructions, "Independent Research Sources:", "Output Objective:"
|
||||
# )
|
||||
# if agent_1_independent_sources_str is None:
|
||||
# raise ValueError("Agent 1 Independent Research Sources not found")
|
||||
|
||||
document_sources = [
|
||||
DocumentSource(x.strip().lower())
|
||||
for x in agent_1_independent_sources_str.split(DC_OBJECT_SEPARATOR)
|
||||
]
|
||||
# document_sources = strings_to_document_sources(
|
||||
# [
|
||||
# x.strip().lower()
|
||||
# for x in agent_1_independent_sources_str.split(DC_OBJECT_SEPARATOR)
|
||||
# ]
|
||||
# )
|
||||
|
||||
agent_1_output_objective = extract_section(
|
||||
agent_1_instructions, "Output Objective:"
|
||||
)
|
||||
if agent_1_output_objective is None:
|
||||
raise ValueError("Agent 1 output objective not found")
|
||||
# agent_1_output_objective = extract_section(
|
||||
# agent_1_instructions, "Output Objective:"
|
||||
# )
|
||||
# if agent_1_output_objective is None:
|
||||
# raise ValueError("Agent 1 output objective not found")
|
||||
|
||||
except Exception as e:
|
||||
raise ValueError(
|
||||
f"Agent 1 instructions not found or not formatted correctly: {e}"
|
||||
)
|
||||
# except Exception as e:
|
||||
# raise ValueError(
|
||||
# f"Agent 1 instructions not found or not formatted correctly: {e}"
|
||||
# )
|
||||
|
||||
# Extract objects
|
||||
# # Extract objects
|
||||
|
||||
if agent_1_base_data is None:
|
||||
# Retrieve chunks for objects
|
||||
# if agent_1_base_data is None:
|
||||
# # Retrieve chunks for objects
|
||||
|
||||
retrieved_docs = research(question, search_tool)[:10]
|
||||
# retrieved_docs = research(question, search_tool)[:10]
|
||||
|
||||
document_texts_list = []
|
||||
for doc_num, doc in enumerate(retrieved_docs):
|
||||
chunk_text = "Document " + str(doc_num) + ":\n" + doc.content
|
||||
document_texts_list.append(chunk_text)
|
||||
# document_texts_list = []
|
||||
# for doc_num, doc in enumerate(retrieved_docs):
|
||||
# chunk_text = "Document " + str(doc_num) + ":\n" + doc.content
|
||||
# document_texts_list.append(chunk_text)
|
||||
|
||||
document_texts = "\n\n".join(document_texts_list)
|
||||
# document_texts = "\n\n".join(document_texts_list)
|
||||
|
||||
dc_object_extraction_prompt = DC_OBJECT_NO_BASE_DATA_EXTRACTION_PROMPT.format(
|
||||
question=question,
|
||||
task=agent_1_task,
|
||||
document_text=document_texts,
|
||||
objects_of_interest=agent_1_output_objective,
|
||||
)
|
||||
else:
|
||||
dc_object_extraction_prompt = DC_OBJECT_WITH_BASE_DATA_EXTRACTION_PROMPT.format(
|
||||
question=question,
|
||||
task=agent_1_task,
|
||||
base_data=agent_1_base_data,
|
||||
objects_of_interest=agent_1_output_objective,
|
||||
)
|
||||
# dc_object_extraction_prompt = DC_OBJECT_NO_BASE_DATA_EXTRACTION_PROMPT.format(
|
||||
# question=question,
|
||||
# task=agent_1_task,
|
||||
# document_text=document_texts,
|
||||
# objects_of_interest=agent_1_output_objective,
|
||||
# )
|
||||
# else:
|
||||
# dc_object_extraction_prompt = DC_OBJECT_WITH_BASE_DATA_EXTRACTION_PROMPT.format(
|
||||
# question=question,
|
||||
# task=agent_1_task,
|
||||
# base_data=agent_1_base_data,
|
||||
# objects_of_interest=agent_1_output_objective,
|
||||
# )
|
||||
|
||||
msg = [
|
||||
HumanMessage(
|
||||
content=trim_prompt_piece(
|
||||
config=graph_config.tooling.primary_llm.config,
|
||||
prompt_piece=dc_object_extraction_prompt,
|
||||
reserved_str="",
|
||||
),
|
||||
)
|
||||
]
|
||||
primary_llm = graph_config.tooling.primary_llm
|
||||
# Grader
|
||||
try:
|
||||
llm_response = run_with_timeout(
|
||||
30,
|
||||
primary_llm.invoke,
|
||||
prompt=msg,
|
||||
timeout_override=30,
|
||||
max_tokens=300,
|
||||
)
|
||||
# msg = [
|
||||
# HumanMessage(
|
||||
# content=trim_prompt_piece(
|
||||
# config=graph_config.tooling.primary_llm.config,
|
||||
# prompt_piece=dc_object_extraction_prompt,
|
||||
# reserved_str="",
|
||||
# ),
|
||||
# )
|
||||
# ]
|
||||
# primary_llm = graph_config.tooling.primary_llm
|
||||
# # Grader
|
||||
# try:
|
||||
# llm_response = run_with_timeout(
|
||||
# 30,
|
||||
# primary_llm.invoke_langchain,
|
||||
# prompt=msg,
|
||||
# timeout_override=30,
|
||||
# max_tokens=300,
|
||||
# )
|
||||
|
||||
cleaned_response = (
|
||||
str(llm_response.content)
|
||||
.replace("```json\n", "")
|
||||
.replace("\n```", "")
|
||||
.replace("\n", "")
|
||||
)
|
||||
cleaned_response = cleaned_response.split("OBJECTS:")[1]
|
||||
object_list = [x.strip() for x in cleaned_response.split(";")]
|
||||
# cleaned_response = (
|
||||
# str(llm_response.content)
|
||||
# .replace("```json\n", "")
|
||||
# .replace("\n```", "")
|
||||
# .replace("\n", "")
|
||||
# )
|
||||
# cleaned_response = cleaned_response.split("OBJECTS:")[1]
|
||||
# object_list = [x.strip() for x in cleaned_response.split(";")]
|
||||
|
||||
except Exception as e:
|
||||
raise ValueError(f"Error in search_objects: {e}")
|
||||
# except Exception as e:
|
||||
# raise ValueError(f"Error in search_objects: {e}")
|
||||
|
||||
return SearchSourcesObjectsUpdate(
|
||||
analysis_objects=object_list,
|
||||
analysis_sources=document_sources,
|
||||
log_messages=["Agent 1 Task done"],
|
||||
)
|
||||
# return SearchSourcesObjectsUpdate(
|
||||
# analysis_objects=object_list,
|
||||
# analysis_sources=document_sources,
|
||||
# log_messages=["Agent 1 Task done"],
|
||||
# )
|
||||
|
||||
@@ -1,180 +1,180 @@
|
||||
from datetime import datetime
|
||||
from datetime import timedelta
|
||||
from datetime import timezone
|
||||
from typing import cast
|
||||
# from datetime import datetime
|
||||
# from datetime import timedelta
|
||||
# from datetime import timezone
|
||||
# from typing import cast
|
||||
|
||||
from langchain_core.messages import HumanMessage
|
||||
from langchain_core.runnables import RunnableConfig
|
||||
from langgraph.types import StreamWriter
|
||||
# from langchain_core.messages import HumanMessage
|
||||
# from langchain_core.runnables import RunnableConfig
|
||||
# from langgraph.types import StreamWriter
|
||||
|
||||
from onyx.agents.agent_search.dc_search_analysis.ops import extract_section
|
||||
from onyx.agents.agent_search.dc_search_analysis.ops import research
|
||||
from onyx.agents.agent_search.dc_search_analysis.states import ObjectSourceInput
|
||||
from onyx.agents.agent_search.dc_search_analysis.states import (
|
||||
ObjectSourceResearchUpdate,
|
||||
)
|
||||
from onyx.agents.agent_search.models import GraphConfig
|
||||
from onyx.agents.agent_search.shared_graph_utils.agent_prompt_ops import (
|
||||
trim_prompt_piece,
|
||||
)
|
||||
from onyx.prompts.agents.dc_prompts import DC_OBJECT_SOURCE_RESEARCH_PROMPT
|
||||
from onyx.utils.logger import setup_logger
|
||||
from onyx.utils.threadpool_concurrency import run_with_timeout
|
||||
# from onyx.agents.agent_search.dc_search_analysis.ops import extract_section
|
||||
# from onyx.agents.agent_search.dc_search_analysis.ops import research
|
||||
# from onyx.agents.agent_search.dc_search_analysis.states import ObjectSourceInput
|
||||
# from onyx.agents.agent_search.dc_search_analysis.states import (
|
||||
# ObjectSourceResearchUpdate,
|
||||
# )
|
||||
# from onyx.agents.agent_search.models import GraphConfig
|
||||
# from onyx.agents.agent_search.shared_graph_utils.agent_prompt_ops import (
|
||||
# trim_prompt_piece,
|
||||
# )
|
||||
# from onyx.prompts.agents.dc_prompts import DC_OBJECT_SOURCE_RESEARCH_PROMPT
|
||||
# from onyx.utils.logger import setup_logger
|
||||
# from onyx.utils.threadpool_concurrency import run_with_timeout
|
||||
|
||||
logger = setup_logger()
|
||||
# logger = setup_logger()
|
||||
|
||||
|
||||
def research_object_source(
|
||||
state: ObjectSourceInput,
|
||||
config: RunnableConfig,
|
||||
writer: StreamWriter = lambda _: None,
|
||||
) -> ObjectSourceResearchUpdate:
|
||||
"""
|
||||
LangGraph node to start the agentic search process.
|
||||
"""
|
||||
datetime.now()
|
||||
# def research_object_source(
|
||||
# state: ObjectSourceInput,
|
||||
# config: RunnableConfig,
|
||||
# writer: StreamWriter = lambda _: None,
|
||||
# ) -> ObjectSourceResearchUpdate:
|
||||
# """
|
||||
# LangGraph node to start the agentic search process.
|
||||
# """
|
||||
# datetime.now()
|
||||
|
||||
graph_config = cast(GraphConfig, config["metadata"]["config"])
|
||||
search_tool = graph_config.tooling.search_tool
|
||||
question = graph_config.inputs.prompt_builder.raw_user_query
|
||||
object, document_source = state.object_source_combination
|
||||
# graph_config = cast(GraphConfig, config["metadata"]["config"])
|
||||
# search_tool = graph_config.tooling.search_tool
|
||||
# question = graph_config.inputs.prompt_builder.raw_user_query
|
||||
# object, document_source = state.object_source_combination
|
||||
|
||||
if search_tool is None or graph_config.inputs.persona is None:
|
||||
raise ValueError("Search tool and persona must be provided for DivCon search")
|
||||
# if search_tool is None or graph_config.inputs.persona is None:
|
||||
# raise ValueError("Search tool and persona must be provided for DivCon search")
|
||||
|
||||
try:
|
||||
instructions = graph_config.inputs.persona.system_prompt or ""
|
||||
# try:
|
||||
# instructions = graph_config.inputs.persona.system_prompt or ""
|
||||
|
||||
agent_2_instructions = extract_section(
|
||||
instructions, "Agent Step 2:", "Agent Step 3:"
|
||||
)
|
||||
if agent_2_instructions is None:
|
||||
raise ValueError("Agent 2 instructions not found")
|
||||
# agent_2_instructions = extract_section(
|
||||
# instructions, "Agent Step 2:", "Agent Step 3:"
|
||||
# )
|
||||
# if agent_2_instructions is None:
|
||||
# raise ValueError("Agent 2 instructions not found")
|
||||
|
||||
agent_2_task = extract_section(
|
||||
agent_2_instructions, "Task:", "Independent Research Sources:"
|
||||
)
|
||||
if agent_2_task is None:
|
||||
raise ValueError("Agent 2 task not found")
|
||||
# agent_2_task = extract_section(
|
||||
# agent_2_instructions, "Task:", "Independent Research Sources:"
|
||||
# )
|
||||
# if agent_2_task is None:
|
||||
# raise ValueError("Agent 2 task not found")
|
||||
|
||||
agent_2_time_cutoff = extract_section(
|
||||
agent_2_instructions, "Time Cutoff:", "Research Topics:"
|
||||
)
|
||||
# agent_2_time_cutoff = extract_section(
|
||||
# agent_2_instructions, "Time Cutoff:", "Research Topics:"
|
||||
# )
|
||||
|
||||
agent_2_research_topics = extract_section(
|
||||
agent_2_instructions, "Research Topics:", "Output Objective"
|
||||
)
|
||||
# agent_2_research_topics = extract_section(
|
||||
# agent_2_instructions, "Research Topics:", "Output Objective"
|
||||
# )
|
||||
|
||||
agent_2_output_objective = extract_section(
|
||||
agent_2_instructions, "Output Objective:"
|
||||
)
|
||||
if agent_2_output_objective is None:
|
||||
raise ValueError("Agent 2 output objective not found")
|
||||
# agent_2_output_objective = extract_section(
|
||||
# agent_2_instructions, "Output Objective:"
|
||||
# )
|
||||
# if agent_2_output_objective is None:
|
||||
# raise ValueError("Agent 2 output objective not found")
|
||||
|
||||
except Exception:
|
||||
raise ValueError(
|
||||
"Agent 1 instructions not found or not formatted correctly: {e}"
|
||||
)
|
||||
# except Exception:
|
||||
# raise ValueError(
|
||||
# "Agent 1 instructions not found or not formatted correctly: {e}"
|
||||
# )
|
||||
|
||||
# Populate prompt
|
||||
# # Populate prompt
|
||||
|
||||
# Retrieve chunks for objects
|
||||
# # Retrieve chunks for objects
|
||||
|
||||
if agent_2_time_cutoff is not None and agent_2_time_cutoff.strip() != "":
|
||||
if agent_2_time_cutoff.strip().endswith("d"):
|
||||
try:
|
||||
days = int(agent_2_time_cutoff.strip()[:-1])
|
||||
agent_2_source_start_time = datetime.now(timezone.utc) - timedelta(
|
||||
days=days
|
||||
)
|
||||
except ValueError:
|
||||
raise ValueError(
|
||||
f"Invalid time cutoff format: {agent_2_time_cutoff}. Expected format: '<number>d'"
|
||||
)
|
||||
else:
|
||||
raise ValueError(
|
||||
f"Invalid time cutoff format: {agent_2_time_cutoff}. Expected format: '<number>d'"
|
||||
)
|
||||
else:
|
||||
agent_2_source_start_time = None
|
||||
# if agent_2_time_cutoff is not None and agent_2_time_cutoff.strip() != "":
|
||||
# if agent_2_time_cutoff.strip().endswith("d"):
|
||||
# try:
|
||||
# days = int(agent_2_time_cutoff.strip()[:-1])
|
||||
# agent_2_source_start_time = datetime.now(timezone.utc) - timedelta(
|
||||
# days=days
|
||||
# )
|
||||
# except ValueError:
|
||||
# raise ValueError(
|
||||
# f"Invalid time cutoff format: {agent_2_time_cutoff}. Expected format: '<number>d'"
|
||||
# )
|
||||
# else:
|
||||
# raise ValueError(
|
||||
# f"Invalid time cutoff format: {agent_2_time_cutoff}. Expected format: '<number>d'"
|
||||
# )
|
||||
# else:
|
||||
# agent_2_source_start_time = None
|
||||
|
||||
document_sources = [document_source] if document_source else None
|
||||
# document_sources = [document_source] if document_source else None
|
||||
|
||||
if len(question.strip()) > 0:
|
||||
research_area = f"{question} for {object}"
|
||||
elif agent_2_research_topics and len(agent_2_research_topics.strip()) > 0:
|
||||
research_area = f"{agent_2_research_topics} for {object}"
|
||||
else:
|
||||
research_area = object
|
||||
# if len(question.strip()) > 0:
|
||||
# research_area = f"{question} for {object}"
|
||||
# elif agent_2_research_topics and len(agent_2_research_topics.strip()) > 0:
|
||||
# research_area = f"{agent_2_research_topics} for {object}"
|
||||
# else:
|
||||
# research_area = object
|
||||
|
||||
retrieved_docs = research(
|
||||
question=research_area,
|
||||
search_tool=search_tool,
|
||||
document_sources=document_sources,
|
||||
time_cutoff=agent_2_source_start_time,
|
||||
)
|
||||
# retrieved_docs = research(
|
||||
# question=research_area,
|
||||
# search_tool=search_tool,
|
||||
# document_sources=document_sources,
|
||||
# time_cutoff=agent_2_source_start_time,
|
||||
# )
|
||||
|
||||
# Generate document text
|
||||
# # Generate document text
|
||||
|
||||
document_texts_list = []
|
||||
for doc_num, doc in enumerate(retrieved_docs):
|
||||
chunk_text = "Document " + str(doc_num) + ":\n" + doc.content
|
||||
document_texts_list.append(chunk_text)
|
||||
# document_texts_list = []
|
||||
# for doc_num, doc in enumerate(retrieved_docs):
|
||||
# chunk_text = "Document " + str(doc_num) + ":\n" + doc.content
|
||||
# document_texts_list.append(chunk_text)
|
||||
|
||||
document_texts = "\n\n".join(document_texts_list)
|
||||
# document_texts = "\n\n".join(document_texts_list)
|
||||
|
||||
# Built prompt
|
||||
# # Built prompt
|
||||
|
||||
today = datetime.now().strftime("%A, %Y-%m-%d")
|
||||
# today = datetime.now().strftime("%A, %Y-%m-%d")
|
||||
|
||||
dc_object_source_research_prompt = (
|
||||
DC_OBJECT_SOURCE_RESEARCH_PROMPT.format(
|
||||
today=today,
|
||||
question=question,
|
||||
task=agent_2_task,
|
||||
document_text=document_texts,
|
||||
format=agent_2_output_objective,
|
||||
)
|
||||
.replace("---object---", object)
|
||||
.replace("---source---", document_source.value)
|
||||
)
|
||||
# dc_object_source_research_prompt = (
|
||||
# DC_OBJECT_SOURCE_RESEARCH_PROMPT.format(
|
||||
# today=today,
|
||||
# question=question,
|
||||
# task=agent_2_task,
|
||||
# document_text=document_texts,
|
||||
# format=agent_2_output_objective,
|
||||
# )
|
||||
# .replace("---object---", object)
|
||||
# .replace("---source---", document_source.value)
|
||||
# )
|
||||
|
||||
# Run LLM
|
||||
# # Run LLM
|
||||
|
||||
msg = [
|
||||
HumanMessage(
|
||||
content=trim_prompt_piece(
|
||||
config=graph_config.tooling.primary_llm.config,
|
||||
prompt_piece=dc_object_source_research_prompt,
|
||||
reserved_str="",
|
||||
),
|
||||
)
|
||||
]
|
||||
primary_llm = graph_config.tooling.primary_llm
|
||||
# Grader
|
||||
try:
|
||||
llm_response = run_with_timeout(
|
||||
30,
|
||||
primary_llm.invoke,
|
||||
prompt=msg,
|
||||
timeout_override=30,
|
||||
max_tokens=300,
|
||||
)
|
||||
# msg = [
|
||||
# HumanMessage(
|
||||
# content=trim_prompt_piece(
|
||||
# config=graph_config.tooling.primary_llm.config,
|
||||
# prompt_piece=dc_object_source_research_prompt,
|
||||
# reserved_str="",
|
||||
# ),
|
||||
# )
|
||||
# ]
|
||||
# primary_llm = graph_config.tooling.primary_llm
|
||||
# # Grader
|
||||
# try:
|
||||
# llm_response = run_with_timeout(
|
||||
# 30,
|
||||
# primary_llm.invoke_langchain,
|
||||
# prompt=msg,
|
||||
# timeout_override=30,
|
||||
# max_tokens=300,
|
||||
# )
|
||||
|
||||
cleaned_response = str(llm_response.content).replace("```json\n", "")
|
||||
cleaned_response = cleaned_response.split("RESEARCH RESULTS:")[1]
|
||||
object_research_results = {
|
||||
"object": object,
|
||||
"source": document_source.value,
|
||||
"research_result": cleaned_response,
|
||||
}
|
||||
# cleaned_response = str(llm_response.content).replace("```json\n", "")
|
||||
# cleaned_response = cleaned_response.split("RESEARCH RESULTS:")[1]
|
||||
# object_research_results = {
|
||||
# "object": object,
|
||||
# "source": document_source.value,
|
||||
# "research_result": cleaned_response,
|
||||
# }
|
||||
|
||||
except Exception as e:
|
||||
raise ValueError(f"Error in research_object_source: {e}")
|
||||
# except Exception as e:
|
||||
# raise ValueError(f"Error in research_object_source: {e}")
|
||||
|
||||
logger.debug("DivCon Step A2 - Object Source Research - completed for an object")
|
||||
# logger.debug("DivCon Step A2 - Object Source Research - completed for an object")
|
||||
|
||||
return ObjectSourceResearchUpdate(
|
||||
object_source_research_results=[object_research_results],
|
||||
log_messages=["Agent Step 2 done for one object"],
|
||||
)
|
||||
# return ObjectSourceResearchUpdate(
|
||||
# object_source_research_results=[object_research_results],
|
||||
# log_messages=["Agent Step 2 done for one object"],
|
||||
# )
|
||||
|
||||
@@ -1,48 +1,48 @@
|
||||
from collections import defaultdict
|
||||
from typing import Dict
|
||||
from typing import List
|
||||
# from collections import defaultdict
|
||||
# from typing import Dict
|
||||
# from typing import List
|
||||
|
||||
from langchain_core.runnables import RunnableConfig
|
||||
from langgraph.types import StreamWriter
|
||||
# from langchain_core.runnables import RunnableConfig
|
||||
# from langgraph.types import StreamWriter
|
||||
|
||||
from onyx.agents.agent_search.dc_search_analysis.states import MainState
|
||||
from onyx.agents.agent_search.dc_search_analysis.states import (
|
||||
ObjectResearchInformationUpdate,
|
||||
)
|
||||
from onyx.utils.logger import setup_logger
|
||||
# from onyx.agents.agent_search.dc_search_analysis.states import MainState
|
||||
# from onyx.agents.agent_search.dc_search_analysis.states import (
|
||||
# ObjectResearchInformationUpdate,
|
||||
# )
|
||||
# from onyx.utils.logger import setup_logger
|
||||
|
||||
logger = setup_logger()
|
||||
# logger = setup_logger()
|
||||
|
||||
|
||||
def structure_research_by_object(
|
||||
state: MainState, config: RunnableConfig, writer: StreamWriter = lambda _: None
|
||||
) -> ObjectResearchInformationUpdate:
|
||||
"""
|
||||
LangGraph node to start the agentic search process.
|
||||
"""
|
||||
# def structure_research_by_object(
|
||||
# state: MainState, config: RunnableConfig, writer: StreamWriter = lambda _: None
|
||||
# ) -> ObjectResearchInformationUpdate:
|
||||
# """
|
||||
# LangGraph node to start the agentic search process.
|
||||
# """
|
||||
|
||||
object_source_research_results = state.object_source_research_results
|
||||
# object_source_research_results = state.object_source_research_results
|
||||
|
||||
object_research_information_results: List[Dict[str, str]] = []
|
||||
object_research_information_results_list: Dict[str, List[str]] = defaultdict(list)
|
||||
# object_research_information_results: List[Dict[str, str]] = []
|
||||
# object_research_information_results_list: Dict[str, List[str]] = defaultdict(list)
|
||||
|
||||
for object_source_research in object_source_research_results:
|
||||
object = object_source_research["object"]
|
||||
source = object_source_research["source"]
|
||||
research_result = object_source_research["research_result"]
|
||||
# for object_source_research in object_source_research_results:
|
||||
# object = object_source_research["object"]
|
||||
# source = object_source_research["source"]
|
||||
# research_result = object_source_research["research_result"]
|
||||
|
||||
object_research_information_results_list[object].append(
|
||||
f"Source: {source}\n{research_result}"
|
||||
)
|
||||
# object_research_information_results_list[object].append(
|
||||
# f"Source: {source}\n{research_result}"
|
||||
# )
|
||||
|
||||
for object, information in object_research_information_results_list.items():
|
||||
object_research_information_results.append(
|
||||
{"object": object, "information": "\n".join(information)}
|
||||
)
|
||||
# for object, information in object_research_information_results_list.items():
|
||||
# object_research_information_results.append(
|
||||
# {"object": object, "information": "\n".join(information)}
|
||||
# )
|
||||
|
||||
logger.debug("DivCon Step A3 - Object Research Information Structuring - completed")
|
||||
# logger.debug("DivCon Step A3 - Object Research Information Structuring - completed")
|
||||
|
||||
return ObjectResearchInformationUpdate(
|
||||
object_research_information_results=object_research_information_results,
|
||||
log_messages=["A3 - Object Research Information structured"],
|
||||
)
|
||||
# return ObjectResearchInformationUpdate(
|
||||
# object_research_information_results=object_research_information_results,
|
||||
# log_messages=["A3 - Object Research Information structured"],
|
||||
# )
|
||||
|
||||
@@ -1,103 +1,103 @@
|
||||
from typing import cast
|
||||
# from typing import cast
|
||||
|
||||
from langchain_core.messages import HumanMessage
|
||||
from langchain_core.runnables import RunnableConfig
|
||||
from langgraph.types import StreamWriter
|
||||
# from langchain_core.messages import HumanMessage
|
||||
# from langchain_core.runnables import RunnableConfig
|
||||
# from langgraph.types import StreamWriter
|
||||
|
||||
from onyx.agents.agent_search.dc_search_analysis.ops import extract_section
|
||||
from onyx.agents.agent_search.dc_search_analysis.states import ObjectInformationInput
|
||||
from onyx.agents.agent_search.dc_search_analysis.states import ObjectResearchUpdate
|
||||
from onyx.agents.agent_search.models import GraphConfig
|
||||
from onyx.agents.agent_search.shared_graph_utils.agent_prompt_ops import (
|
||||
trim_prompt_piece,
|
||||
)
|
||||
from onyx.prompts.agents.dc_prompts import DC_OBJECT_CONSOLIDATION_PROMPT
|
||||
from onyx.utils.logger import setup_logger
|
||||
from onyx.utils.threadpool_concurrency import run_with_timeout
|
||||
# from onyx.agents.agent_search.dc_search_analysis.ops import extract_section
|
||||
# from onyx.agents.agent_search.dc_search_analysis.states import ObjectInformationInput
|
||||
# from onyx.agents.agent_search.dc_search_analysis.states import ObjectResearchUpdate
|
||||
# from onyx.agents.agent_search.models import GraphConfig
|
||||
# from onyx.agents.agent_search.shared_graph_utils.agent_prompt_ops import (
|
||||
# trim_prompt_piece,
|
||||
# )
|
||||
# from onyx.prompts.agents.dc_prompts import DC_OBJECT_CONSOLIDATION_PROMPT
|
||||
# from onyx.utils.logger import setup_logger
|
||||
# from onyx.utils.threadpool_concurrency import run_with_timeout
|
||||
|
||||
logger = setup_logger()
|
||||
# logger = setup_logger()
|
||||
|
||||
|
||||
def consolidate_object_research(
|
||||
state: ObjectInformationInput,
|
||||
config: RunnableConfig,
|
||||
writer: StreamWriter = lambda _: None,
|
||||
) -> ObjectResearchUpdate:
|
||||
"""
|
||||
LangGraph node to start the agentic search process.
|
||||
"""
|
||||
graph_config = cast(GraphConfig, config["metadata"]["config"])
|
||||
search_tool = graph_config.tooling.search_tool
|
||||
question = graph_config.inputs.prompt_builder.raw_user_query
|
||||
# def consolidate_object_research(
|
||||
# state: ObjectInformationInput,
|
||||
# config: RunnableConfig,
|
||||
# writer: StreamWriter = lambda _: None,
|
||||
# ) -> ObjectResearchUpdate:
|
||||
# """
|
||||
# LangGraph node to start the agentic search process.
|
||||
# """
|
||||
# graph_config = cast(GraphConfig, config["metadata"]["config"])
|
||||
# search_tool = graph_config.tooling.search_tool
|
||||
# question = graph_config.inputs.prompt_builder.raw_user_query
|
||||
|
||||
if search_tool is None or graph_config.inputs.persona is None:
|
||||
raise ValueError("Search tool and persona must be provided for DivCon search")
|
||||
# if search_tool is None or graph_config.inputs.persona is None:
|
||||
# raise ValueError("Search tool and persona must be provided for DivCon search")
|
||||
|
||||
instructions = graph_config.inputs.persona.system_prompt or ""
|
||||
# instructions = graph_config.inputs.persona.system_prompt or ""
|
||||
|
||||
agent_4_instructions = extract_section(
|
||||
instructions, "Agent Step 4:", "Agent Step 5:"
|
||||
)
|
||||
if agent_4_instructions is None:
|
||||
raise ValueError("Agent 4 instructions not found")
|
||||
agent_4_output_objective = extract_section(
|
||||
agent_4_instructions, "Output Objective:"
|
||||
)
|
||||
if agent_4_output_objective is None:
|
||||
raise ValueError("Agent 4 output objective not found")
|
||||
# agent_4_instructions = extract_section(
|
||||
# instructions, "Agent Step 4:", "Agent Step 5:"
|
||||
# )
|
||||
# if agent_4_instructions is None:
|
||||
# raise ValueError("Agent 4 instructions not found")
|
||||
# agent_4_output_objective = extract_section(
|
||||
# agent_4_instructions, "Output Objective:"
|
||||
# )
|
||||
# if agent_4_output_objective is None:
|
||||
# raise ValueError("Agent 4 output objective not found")
|
||||
|
||||
object_information = state.object_information
|
||||
# object_information = state.object_information
|
||||
|
||||
object = object_information["object"]
|
||||
information = object_information["information"]
|
||||
# object = object_information["object"]
|
||||
# information = object_information["information"]
|
||||
|
||||
# Create a prompt for the object consolidation
|
||||
# # Create a prompt for the object consolidation
|
||||
|
||||
dc_object_consolidation_prompt = DC_OBJECT_CONSOLIDATION_PROMPT.format(
|
||||
question=question,
|
||||
object=object,
|
||||
information=information,
|
||||
format=agent_4_output_objective,
|
||||
)
|
||||
# dc_object_consolidation_prompt = DC_OBJECT_CONSOLIDATION_PROMPT.format(
|
||||
# question=question,
|
||||
# object=object,
|
||||
# information=information,
|
||||
# format=agent_4_output_objective,
|
||||
# )
|
||||
|
||||
# Run LLM
|
||||
# # Run LLM
|
||||
|
||||
msg = [
|
||||
HumanMessage(
|
||||
content=trim_prompt_piece(
|
||||
config=graph_config.tooling.primary_llm.config,
|
||||
prompt_piece=dc_object_consolidation_prompt,
|
||||
reserved_str="",
|
||||
),
|
||||
)
|
||||
]
|
||||
primary_llm = graph_config.tooling.primary_llm
|
||||
# Grader
|
||||
try:
|
||||
llm_response = run_with_timeout(
|
||||
30,
|
||||
primary_llm.invoke,
|
||||
prompt=msg,
|
||||
timeout_override=30,
|
||||
max_tokens=300,
|
||||
)
|
||||
# msg = [
|
||||
# HumanMessage(
|
||||
# content=trim_prompt_piece(
|
||||
# config=graph_config.tooling.primary_llm.config,
|
||||
# prompt_piece=dc_object_consolidation_prompt,
|
||||
# reserved_str="",
|
||||
# ),
|
||||
# )
|
||||
# ]
|
||||
# primary_llm = graph_config.tooling.primary_llm
|
||||
# # Grader
|
||||
# try:
|
||||
# llm_response = run_with_timeout(
|
||||
# 30,
|
||||
# primary_llm.invoke_langchain,
|
||||
# prompt=msg,
|
||||
# timeout_override=30,
|
||||
# max_tokens=300,
|
||||
# )
|
||||
|
||||
cleaned_response = str(llm_response.content).replace("```json\n", "")
|
||||
consolidated_information = cleaned_response.split("INFORMATION:")[1]
|
||||
# cleaned_response = str(llm_response.content).replace("```json\n", "")
|
||||
# consolidated_information = cleaned_response.split("INFORMATION:")[1]
|
||||
|
||||
except Exception as e:
|
||||
raise ValueError(f"Error in consolidate_object_research: {e}")
|
||||
# except Exception as e:
|
||||
# raise ValueError(f"Error in consolidate_object_research: {e}")
|
||||
|
||||
object_research_results = {
|
||||
"object": object,
|
||||
"research_result": consolidated_information,
|
||||
}
|
||||
# object_research_results = {
|
||||
# "object": object,
|
||||
# "research_result": consolidated_information,
|
||||
# }
|
||||
|
||||
logger.debug(
|
||||
"DivCon Step A4 - Object Research Consolidation - completed for an object"
|
||||
)
|
||||
# logger.debug(
|
||||
# "DivCon Step A4 - Object Research Consolidation - completed for an object"
|
||||
# )
|
||||
|
||||
return ObjectResearchUpdate(
|
||||
object_research_results=[object_research_results],
|
||||
log_messages=["Agent Source Consilidation done"],
|
||||
)
|
||||
# return ObjectResearchUpdate(
|
||||
# object_research_results=[object_research_results],
|
||||
# log_messages=["Agent Source Consilidation done"],
|
||||
# )
|
||||
|
||||
@@ -1,127 +1,127 @@
|
||||
from typing import cast
|
||||
# from typing import cast
|
||||
|
||||
from langchain_core.messages import HumanMessage
|
||||
from langchain_core.runnables import RunnableConfig
|
||||
from langgraph.types import StreamWriter
|
||||
# from langchain_core.messages import HumanMessage
|
||||
# from langchain_core.runnables import RunnableConfig
|
||||
# from langgraph.types import StreamWriter
|
||||
|
||||
from onyx.agents.agent_search.dc_search_analysis.ops import extract_section
|
||||
from onyx.agents.agent_search.dc_search_analysis.states import MainState
|
||||
from onyx.agents.agent_search.dc_search_analysis.states import ResearchUpdate
|
||||
from onyx.agents.agent_search.models import GraphConfig
|
||||
from onyx.agents.agent_search.shared_graph_utils.agent_prompt_ops import (
|
||||
trim_prompt_piece,
|
||||
)
|
||||
from onyx.agents.agent_search.shared_graph_utils.llm import stream_llm_answer
|
||||
from onyx.prompts.agents.dc_prompts import DC_FORMATTING_NO_BASE_DATA_PROMPT
|
||||
from onyx.prompts.agents.dc_prompts import DC_FORMATTING_WITH_BASE_DATA_PROMPT
|
||||
from onyx.utils.logger import setup_logger
|
||||
from onyx.utils.threadpool_concurrency import run_with_timeout
|
||||
# from onyx.agents.agent_search.dc_search_analysis.ops import extract_section
|
||||
# from onyx.agents.agent_search.dc_search_analysis.states import MainState
|
||||
# from onyx.agents.agent_search.dc_search_analysis.states import ResearchUpdate
|
||||
# from onyx.agents.agent_search.models import GraphConfig
|
||||
# from onyx.agents.agent_search.shared_graph_utils.agent_prompt_ops import (
|
||||
# trim_prompt_piece,
|
||||
# )
|
||||
# from onyx.agents.agent_search.shared_graph_utils.llm import stream_llm_answer
|
||||
# from onyx.prompts.agents.dc_prompts import DC_FORMATTING_NO_BASE_DATA_PROMPT
|
||||
# from onyx.prompts.agents.dc_prompts import DC_FORMATTING_WITH_BASE_DATA_PROMPT
|
||||
# from onyx.utils.logger import setup_logger
|
||||
# from onyx.utils.threadpool_concurrency import run_with_timeout
|
||||
|
||||
logger = setup_logger()
|
||||
# logger = setup_logger()
|
||||
|
||||
|
||||
def consolidate_research(
|
||||
state: MainState, config: RunnableConfig, writer: StreamWriter = lambda _: None
|
||||
) -> ResearchUpdate:
|
||||
"""
|
||||
LangGraph node to start the agentic search process.
|
||||
"""
|
||||
# def consolidate_research(
|
||||
# state: MainState, config: RunnableConfig, writer: StreamWriter = lambda _: None
|
||||
# ) -> ResearchUpdate:
|
||||
# """
|
||||
# LangGraph node to start the agentic search process.
|
||||
# """
|
||||
|
||||
graph_config = cast(GraphConfig, config["metadata"]["config"])
|
||||
# graph_config = cast(GraphConfig, config["metadata"]["config"])
|
||||
|
||||
search_tool = graph_config.tooling.search_tool
|
||||
# search_tool = graph_config.tooling.search_tool
|
||||
|
||||
if search_tool is None or graph_config.inputs.persona is None:
|
||||
raise ValueError("Search tool and persona must be provided for DivCon search")
|
||||
# if search_tool is None or graph_config.inputs.persona is None:
|
||||
# raise ValueError("Search tool and persona must be provided for DivCon search")
|
||||
|
||||
# Populate prompt
|
||||
instructions = graph_config.inputs.persona.system_prompt or ""
|
||||
# # Populate prompt
|
||||
# instructions = graph_config.inputs.persona.system_prompt or ""
|
||||
|
||||
try:
|
||||
agent_5_instructions = extract_section(
|
||||
instructions, "Agent Step 5:", "Agent End"
|
||||
)
|
||||
if agent_5_instructions is None:
|
||||
raise ValueError("Agent 5 instructions not found")
|
||||
agent_5_base_data = extract_section(instructions, "|Start Data|", "|End Data|")
|
||||
agent_5_task = extract_section(
|
||||
agent_5_instructions, "Task:", "Independent Research Sources:"
|
||||
)
|
||||
if agent_5_task is None:
|
||||
raise ValueError("Agent 5 task not found")
|
||||
agent_5_output_objective = extract_section(
|
||||
agent_5_instructions, "Output Objective:"
|
||||
)
|
||||
if agent_5_output_objective is None:
|
||||
raise ValueError("Agent 5 output objective not found")
|
||||
except ValueError as e:
|
||||
raise ValueError(
|
||||
f"Instructions for Agent Step 5 were not properly formatted: {e}"
|
||||
)
|
||||
# try:
|
||||
# agent_5_instructions = extract_section(
|
||||
# instructions, "Agent Step 5:", "Agent End"
|
||||
# )
|
||||
# if agent_5_instructions is None:
|
||||
# raise ValueError("Agent 5 instructions not found")
|
||||
# agent_5_base_data = extract_section(instructions, "|Start Data|", "|End Data|")
|
||||
# agent_5_task = extract_section(
|
||||
# agent_5_instructions, "Task:", "Independent Research Sources:"
|
||||
# )
|
||||
# if agent_5_task is None:
|
||||
# raise ValueError("Agent 5 task not found")
|
||||
# agent_5_output_objective = extract_section(
|
||||
# agent_5_instructions, "Output Objective:"
|
||||
# )
|
||||
# if agent_5_output_objective is None:
|
||||
# raise ValueError("Agent 5 output objective not found")
|
||||
# except ValueError as e:
|
||||
# raise ValueError(
|
||||
# f"Instructions for Agent Step 5 were not properly formatted: {e}"
|
||||
# )
|
||||
|
||||
research_result_list = []
|
||||
# research_result_list = []
|
||||
|
||||
if agent_5_task.strip() == "*concatenate*":
|
||||
object_research_results = state.object_research_results
|
||||
# if agent_5_task.strip() == "*concatenate*":
|
||||
# object_research_results = state.object_research_results
|
||||
|
||||
for object_research_result in object_research_results:
|
||||
object = object_research_result["object"]
|
||||
research_result = object_research_result["research_result"]
|
||||
research_result_list.append(f"Object: {object}\n\n{research_result}")
|
||||
# for object_research_result in object_research_results:
|
||||
# object = object_research_result["object"]
|
||||
# research_result = object_research_result["research_result"]
|
||||
# research_result_list.append(f"Object: {object}\n\n{research_result}")
|
||||
|
||||
research_results = "\n\n".join(research_result_list)
|
||||
# research_results = "\n\n".join(research_result_list)
|
||||
|
||||
else:
|
||||
raise NotImplementedError("Only '*concatenate*' is currently supported")
|
||||
# else:
|
||||
# raise NotImplementedError("Only '*concatenate*' is currently supported")
|
||||
|
||||
# Create a prompt for the object consolidation
|
||||
# # Create a prompt for the object consolidation
|
||||
|
||||
if agent_5_base_data is None:
|
||||
dc_formatting_prompt = DC_FORMATTING_NO_BASE_DATA_PROMPT.format(
|
||||
text=research_results,
|
||||
format=agent_5_output_objective,
|
||||
)
|
||||
else:
|
||||
dc_formatting_prompt = DC_FORMATTING_WITH_BASE_DATA_PROMPT.format(
|
||||
base_data=agent_5_base_data,
|
||||
text=research_results,
|
||||
format=agent_5_output_objective,
|
||||
)
|
||||
# if agent_5_base_data is None:
|
||||
# dc_formatting_prompt = DC_FORMATTING_NO_BASE_DATA_PROMPT.format(
|
||||
# text=research_results,
|
||||
# format=agent_5_output_objective,
|
||||
# )
|
||||
# else:
|
||||
# dc_formatting_prompt = DC_FORMATTING_WITH_BASE_DATA_PROMPT.format(
|
||||
# base_data=agent_5_base_data,
|
||||
# text=research_results,
|
||||
# format=agent_5_output_objective,
|
||||
# )
|
||||
|
||||
# Run LLM
|
||||
# # Run LLM
|
||||
|
||||
msg = [
|
||||
HumanMessage(
|
||||
content=trim_prompt_piece(
|
||||
config=graph_config.tooling.primary_llm.config,
|
||||
prompt_piece=dc_formatting_prompt,
|
||||
reserved_str="",
|
||||
),
|
||||
)
|
||||
]
|
||||
# msg = [
|
||||
# HumanMessage(
|
||||
# content=trim_prompt_piece(
|
||||
# config=graph_config.tooling.primary_llm.config,
|
||||
# prompt_piece=dc_formatting_prompt,
|
||||
# reserved_str="",
|
||||
# ),
|
||||
# )
|
||||
# ]
|
||||
|
||||
try:
|
||||
_ = run_with_timeout(
|
||||
60,
|
||||
lambda: stream_llm_answer(
|
||||
llm=graph_config.tooling.primary_llm,
|
||||
prompt=msg,
|
||||
event_name="initial_agent_answer",
|
||||
writer=writer,
|
||||
agent_answer_level=0,
|
||||
agent_answer_question_num=0,
|
||||
agent_answer_type="agent_level_answer",
|
||||
timeout_override=30,
|
||||
max_tokens=None,
|
||||
),
|
||||
)
|
||||
# try:
|
||||
# _ = run_with_timeout(
|
||||
# 60,
|
||||
# lambda: stream_llm_answer(
|
||||
# llm=graph_config.tooling.primary_llm,
|
||||
# prompt=msg,
|
||||
# event_name="initial_agent_answer",
|
||||
# writer=writer,
|
||||
# agent_answer_level=0,
|
||||
# agent_answer_question_num=0,
|
||||
# agent_answer_type="agent_level_answer",
|
||||
# timeout_override=30,
|
||||
# max_tokens=None,
|
||||
# ),
|
||||
# )
|
||||
|
||||
except Exception as e:
|
||||
raise ValueError(f"Error in consolidate_research: {e}")
|
||||
# except Exception as e:
|
||||
# raise ValueError(f"Error in consolidate_research: {e}")
|
||||
|
||||
logger.debug("DivCon Step A5 - Final Generation - completed")
|
||||
# logger.debug("DivCon Step A5 - Final Generation - completed")
|
||||
|
||||
return ResearchUpdate(
|
||||
research_results=research_results,
|
||||
log_messages=["Agent Source Consilidation done"],
|
||||
)
|
||||
# return ResearchUpdate(
|
||||
# research_results=research_results,
|
||||
# log_messages=["Agent Source Consilidation done"],
|
||||
# )
|
||||
|
||||
@@ -1,61 +1,50 @@
|
||||
from datetime import datetime
|
||||
from typing import cast
|
||||
# from datetime import datetime
|
||||
# from typing import cast
|
||||
|
||||
from onyx.chat.models import LlmDoc
|
||||
from onyx.configs.constants import DocumentSource
|
||||
from onyx.context.search.models import InferenceSection
|
||||
from onyx.db.engine.sql_engine import get_session_with_current_tenant
|
||||
from onyx.tools.models import SearchToolOverrideKwargs
|
||||
from onyx.tools.tool_implementations.search.search_tool import (
|
||||
FINAL_CONTEXT_DOCUMENTS_ID,
|
||||
)
|
||||
from onyx.tools.tool_implementations.search.search_tool import SearchTool
|
||||
# from onyx.chat.models import LlmDoc
|
||||
# from onyx.configs.constants import DocumentSource
|
||||
# from onyx.tools.models import SearchToolOverrideKwargs
|
||||
# from onyx.tools.tool_implementations.search.search_tool import (
|
||||
# FINAL_CONTEXT_DOCUMENTS_ID,
|
||||
# )
|
||||
# from onyx.tools.tool_implementations.search.search_tool import SearchTool
|
||||
|
||||
|
||||
def research(
|
||||
question: str,
|
||||
search_tool: SearchTool,
|
||||
document_sources: list[DocumentSource] | None = None,
|
||||
time_cutoff: datetime | None = None,
|
||||
) -> list[LlmDoc]:
|
||||
# new db session to avoid concurrency issues
|
||||
# def research(
|
||||
# question: str,
|
||||
# search_tool: SearchTool,
|
||||
# document_sources: list[DocumentSource] | None = None,
|
||||
# time_cutoff: datetime | None = None,
|
||||
# ) -> list[LlmDoc]:
|
||||
# # new db session to avoid concurrency issues
|
||||
|
||||
callback_container: list[list[InferenceSection]] = []
|
||||
retrieved_docs: list[LlmDoc] = []
|
||||
# retrieved_docs: list[LlmDoc] = []
|
||||
|
||||
with get_session_with_current_tenant() as db_session:
|
||||
for tool_response in search_tool.run(
|
||||
query=question,
|
||||
override_kwargs=SearchToolOverrideKwargs(
|
||||
force_no_rerank=False,
|
||||
alternate_db_session=db_session,
|
||||
retrieved_sections_callback=callback_container.append,
|
||||
skip_query_analysis=True,
|
||||
document_sources=document_sources,
|
||||
time_cutoff=time_cutoff,
|
||||
),
|
||||
):
|
||||
# get retrieved docs to send to the rest of the graph
|
||||
if tool_response.id == FINAL_CONTEXT_DOCUMENTS_ID:
|
||||
retrieved_docs = cast(list[LlmDoc], tool_response.response)[:10]
|
||||
break
|
||||
return retrieved_docs
|
||||
# for tool_response in search_tool.run(
|
||||
# query=question,
|
||||
# override_kwargs=SearchToolOverrideKwargs(original_query=question),
|
||||
# ):
|
||||
# # get retrieved docs to send to the rest of the graph
|
||||
# if tool_response.id == FINAL_CONTEXT_DOCUMENTS_ID:
|
||||
# retrieved_docs = cast(list[LlmDoc], tool_response.response)[:10]
|
||||
# break
|
||||
# return retrieved_docs
|
||||
|
||||
|
||||
def extract_section(
|
||||
text: str, start_marker: str, end_marker: str | None = None
|
||||
) -> str | None:
|
||||
"""Extract text between markers, returning None if markers not found"""
|
||||
parts = text.split(start_marker)
|
||||
# def extract_section(
|
||||
# text: str, start_marker: str, end_marker: str | None = None
|
||||
# ) -> str | None:
|
||||
# """Extract text between markers, returning None if markers not found"""
|
||||
# parts = text.split(start_marker)
|
||||
|
||||
if len(parts) == 1:
|
||||
return None
|
||||
# if len(parts) == 1:
|
||||
# return None
|
||||
|
||||
after_start = parts[1].strip()
|
||||
# after_start = parts[1].strip()
|
||||
|
||||
if not end_marker:
|
||||
return after_start
|
||||
# if not end_marker:
|
||||
# return after_start
|
||||
|
||||
extract = after_start.split(end_marker)[0]
|
||||
# extract = after_start.split(end_marker)[0]
|
||||
|
||||
return extract.strip()
|
||||
# return extract.strip()
|
||||
|
||||
@@ -1,72 +1,72 @@
|
||||
from operator import add
|
||||
from typing import Annotated
|
||||
from typing import Dict
|
||||
from typing import TypedDict
|
||||
# from operator import add
|
||||
# from typing import Annotated
|
||||
# from typing import Dict
|
||||
# from typing import TypedDict
|
||||
|
||||
from pydantic import BaseModel
|
||||
# from pydantic import BaseModel
|
||||
|
||||
from onyx.agents.agent_search.core_state import CoreState
|
||||
from onyx.agents.agent_search.orchestration.states import ToolCallUpdate
|
||||
from onyx.agents.agent_search.orchestration.states import ToolChoiceInput
|
||||
from onyx.agents.agent_search.orchestration.states import ToolChoiceUpdate
|
||||
from onyx.configs.constants import DocumentSource
|
||||
# from onyx.agents.agent_search.core_state import CoreState
|
||||
# from onyx.agents.agent_search.orchestration.states import ToolCallUpdate
|
||||
# from onyx.agents.agent_search.orchestration.states import ToolChoiceInput
|
||||
# from onyx.agents.agent_search.orchestration.states import ToolChoiceUpdate
|
||||
# from onyx.configs.constants import DocumentSource
|
||||
|
||||
|
||||
### States ###
|
||||
class LoggerUpdate(BaseModel):
|
||||
log_messages: Annotated[list[str], add] = []
|
||||
# ### States ###
|
||||
# class LoggerUpdate(BaseModel):
|
||||
# log_messages: Annotated[list[str], add] = []
|
||||
|
||||
|
||||
class SearchSourcesObjectsUpdate(LoggerUpdate):
|
||||
analysis_objects: list[str] = []
|
||||
analysis_sources: list[DocumentSource] = []
|
||||
# class SearchSourcesObjectsUpdate(LoggerUpdate):
|
||||
# analysis_objects: list[str] = []
|
||||
# analysis_sources: list[DocumentSource] = []
|
||||
|
||||
|
||||
class ObjectSourceInput(LoggerUpdate):
|
||||
object_source_combination: tuple[str, DocumentSource]
|
||||
# class ObjectSourceInput(LoggerUpdate):
|
||||
# object_source_combination: tuple[str, DocumentSource]
|
||||
|
||||
|
||||
class ObjectSourceResearchUpdate(LoggerUpdate):
|
||||
object_source_research_results: Annotated[list[Dict[str, str]], add] = []
|
||||
# class ObjectSourceResearchUpdate(LoggerUpdate):
|
||||
# object_source_research_results: Annotated[list[Dict[str, str]], add] = []
|
||||
|
||||
|
||||
class ObjectInformationInput(LoggerUpdate):
|
||||
object_information: Dict[str, str]
|
||||
# class ObjectInformationInput(LoggerUpdate):
|
||||
# object_information: Dict[str, str]
|
||||
|
||||
|
||||
class ObjectResearchInformationUpdate(LoggerUpdate):
|
||||
object_research_information_results: Annotated[list[Dict[str, str]], add] = []
|
||||
# class ObjectResearchInformationUpdate(LoggerUpdate):
|
||||
# object_research_information_results: Annotated[list[Dict[str, str]], add] = []
|
||||
|
||||
|
||||
class ObjectResearchUpdate(LoggerUpdate):
|
||||
object_research_results: Annotated[list[Dict[str, str]], add] = []
|
||||
# class ObjectResearchUpdate(LoggerUpdate):
|
||||
# object_research_results: Annotated[list[Dict[str, str]], add] = []
|
||||
|
||||
|
||||
class ResearchUpdate(LoggerUpdate):
|
||||
research_results: str | None = None
|
||||
# class ResearchUpdate(LoggerUpdate):
|
||||
# research_results: str | None = None
|
||||
|
||||
|
||||
## Graph Input State
|
||||
class MainInput(CoreState):
|
||||
pass
|
||||
# ## Graph Input State
|
||||
# class MainInput(CoreState):
|
||||
# pass
|
||||
|
||||
|
||||
## Graph State
|
||||
class MainState(
|
||||
# This includes the core state
|
||||
MainInput,
|
||||
ToolChoiceInput,
|
||||
ToolCallUpdate,
|
||||
ToolChoiceUpdate,
|
||||
SearchSourcesObjectsUpdate,
|
||||
ObjectSourceResearchUpdate,
|
||||
ObjectResearchInformationUpdate,
|
||||
ObjectResearchUpdate,
|
||||
ResearchUpdate,
|
||||
):
|
||||
pass
|
||||
# ## Graph State
|
||||
# class MainState(
|
||||
# # This includes the core state
|
||||
# MainInput,
|
||||
# ToolChoiceInput,
|
||||
# ToolCallUpdate,
|
||||
# ToolChoiceUpdate,
|
||||
# SearchSourcesObjectsUpdate,
|
||||
# ObjectSourceResearchUpdate,
|
||||
# ObjectResearchInformationUpdate,
|
||||
# ObjectResearchUpdate,
|
||||
# ResearchUpdate,
|
||||
# ):
|
||||
# pass
|
||||
|
||||
|
||||
## Graph Output State - presently not used
|
||||
class MainOutput(TypedDict):
|
||||
log_messages: list[str]
|
||||
# ## Graph Output State - presently not used
|
||||
# class MainOutput(TypedDict):
|
||||
# log_messages: list[str]
|
||||
|
||||
@@ -1,36 +1,36 @@
|
||||
from pydantic import BaseModel
|
||||
# from pydantic import BaseModel
|
||||
|
||||
|
||||
class RefinementSubQuestion(BaseModel):
|
||||
sub_question: str
|
||||
sub_question_id: str
|
||||
verified: bool
|
||||
answered: bool
|
||||
answer: str
|
||||
# class RefinementSubQuestion(BaseModel):
|
||||
# sub_question: str
|
||||
# sub_question_id: str
|
||||
# verified: bool
|
||||
# answered: bool
|
||||
# answer: str
|
||||
|
||||
|
||||
class AgentTimings(BaseModel):
|
||||
base_duration_s: float | None
|
||||
refined_duration_s: float | None
|
||||
full_duration_s: float | None
|
||||
# class AgentTimings(BaseModel):
|
||||
# base_duration_s: float | None
|
||||
# refined_duration_s: float | None
|
||||
# full_duration_s: float | None
|
||||
|
||||
|
||||
class AgentBaseMetrics(BaseModel):
|
||||
num_verified_documents_total: int | None
|
||||
num_verified_documents_core: int | None
|
||||
verified_avg_score_core: float | None
|
||||
num_verified_documents_base: int | float | None
|
||||
verified_avg_score_base: float | None = None
|
||||
base_doc_boost_factor: float | None = None
|
||||
support_boost_factor: float | None = None
|
||||
duration_s: float | None = None
|
||||
# class AgentBaseMetrics(BaseModel):
|
||||
# num_verified_documents_total: int | None
|
||||
# num_verified_documents_core: int | None
|
||||
# verified_avg_score_core: float | None
|
||||
# num_verified_documents_base: int | float | None
|
||||
# verified_avg_score_base: float | None = None
|
||||
# base_doc_boost_factor: float | None = None
|
||||
# support_boost_factor: float | None = None
|
||||
# duration_s: float | None = None
|
||||
|
||||
|
||||
class AgentRefinedMetrics(BaseModel):
|
||||
refined_doc_boost_factor: float | None = None
|
||||
refined_question_boost_factor: float | None = None
|
||||
duration_s: float | None = None
|
||||
# class AgentRefinedMetrics(BaseModel):
|
||||
# refined_doc_boost_factor: float | None = None
|
||||
# refined_question_boost_factor: float | None = None
|
||||
# duration_s: float | None = None
|
||||
|
||||
|
||||
class AgentAdditionalMetrics(BaseModel):
|
||||
pass
|
||||
# class AgentAdditionalMetrics(BaseModel):
|
||||
# pass
|
||||
|
||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user