mirror of
https://github.com/onyx-dot-app/onyx.git
synced 2026-02-16 23:35:46 +00:00
Compare commits
1007 Commits
prune-sele
...
nightly-la
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
bffca81477 | ||
|
|
561b487102 | ||
|
|
cc9b14c99b | ||
|
|
de674a19e0 | ||
|
|
79114bf92c | ||
|
|
b5dccd96b3 | ||
|
|
a55cc5a537 | ||
|
|
cdf3cc444b | ||
|
|
cd3941f4b7 | ||
|
|
0182743619 | ||
|
|
0e2f596aa2 | ||
|
|
0be45676b7 | ||
|
|
30a3470001 | ||
|
|
d52fa83afa | ||
|
|
9eb5643cc3 | ||
|
|
afe34218b8 | ||
|
|
4776947dfa | ||
|
|
c4bc25f540 | ||
|
|
b77078b339 | ||
|
|
88b28a303b | ||
|
|
59d7d3905a | ||
|
|
a48fe7550a | ||
|
|
c25a99955c | ||
|
|
ac509f865a | ||
|
|
5819389ae8 | ||
|
|
eae5774cdc | ||
|
|
8fed0a8138 | ||
|
|
c04196941d | ||
|
|
19461955ed | ||
|
|
cb3152ff5c | ||
|
|
cf187e8f58 | ||
|
|
deaa3df42f | ||
|
|
d6e98bfbc8 | ||
|
|
ff58ad0b87 | ||
|
|
eb7cb02cc0 | ||
|
|
7876d8da1b | ||
|
|
8a6f83115e | ||
|
|
b7f81aed10 | ||
|
|
a415a997cf | ||
|
|
7781afd74e | ||
|
|
d0a4f4ce66 | ||
|
|
ba00de8904 | ||
|
|
91f21bb22b | ||
|
|
491f3127c5 | ||
|
|
0987fb852b | ||
|
|
5f68141335 | ||
|
|
b5793ee522 | ||
|
|
238c244fec | ||
|
|
c103a878b7 | ||
|
|
03deb064cc | ||
|
|
09062195b4 | ||
|
|
dc57a5451c | ||
|
|
781f60a5ab | ||
|
|
423961fefb | ||
|
|
324b6ceeef | ||
|
|
d9e14bf5da | ||
|
|
eb2cb1bb25 | ||
|
|
0de9f47694 | ||
|
|
2757f3936c | ||
|
|
8ba61e9123 | ||
|
|
c10d7fbc32 | ||
|
|
b6ed217781 | ||
|
|
7d20f73f71 | ||
|
|
2b306255f9 | ||
|
|
e149d08d47 | ||
|
|
e98ddb9fe6 | ||
|
|
b9a5297694 | ||
|
|
4666312df2 | ||
|
|
d4e524cd83 | ||
|
|
a719228034 | ||
|
|
2fe8b5e33a | ||
|
|
af243b0ef5 | ||
|
|
c96ac04619 | ||
|
|
e2f2950fee | ||
|
|
8b84c59d29 | ||
|
|
b718a276cf | ||
|
|
700511720f | ||
|
|
6bd1719156 | ||
|
|
c8bfe9e0a1 | ||
|
|
037bc04740 | ||
|
|
c3704d47df | ||
|
|
397a153ff6 | ||
|
|
870c432ccf | ||
|
|
c4a81a590f | ||
|
|
017c095eed | ||
|
|
ee37d21aa4 | ||
|
|
e492d88b2d | ||
|
|
3512fdcd9d | ||
|
|
3550795cab | ||
|
|
b26306d678 | ||
|
|
85140b4ba6 | ||
|
|
c241f79f97 | ||
|
|
9808dec6b7 | ||
|
|
632c74af6d | ||
|
|
79073d878c | ||
|
|
620df88c51 | ||
|
|
717f05975d | ||
|
|
d2176342c1 | ||
|
|
bb198b05e1 | ||
|
|
085013d8c3 | ||
|
|
e46f632570 | ||
|
|
bbb4b9eda3 | ||
|
|
12b7c7d4dd | ||
|
|
464967340b | ||
|
|
a2308c2f45 | ||
|
|
2ee9f79f71 | ||
|
|
c3904b7c96 | ||
|
|
5009dcf911 | ||
|
|
c7b4a0fad9 | ||
|
|
60a402fcab | ||
|
|
c9bb078a37 | ||
|
|
c36c2a6c8d | ||
|
|
f9e2f9cbb4 | ||
|
|
0b7c808480 | ||
|
|
0a6ff30ee4 | ||
|
|
dc036eb452 | ||
|
|
ee950b9cbd | ||
|
|
dd71765849 | ||
|
|
dc6b97f1b1 | ||
|
|
d960c23b6a | ||
|
|
d9c753ba92 | ||
|
|
60234dd6da | ||
|
|
f88ef2e9ff | ||
|
|
6b479a01ea | ||
|
|
248fe416e1 | ||
|
|
cbea4bb75c | ||
|
|
4a147a48dc | ||
|
|
a77025cd46 | ||
|
|
d10914ccc6 | ||
|
|
7d44d48f87 | ||
|
|
82fd0e0316 | ||
|
|
d7e4c47ef1 | ||
|
|
799b0df1cb | ||
|
|
b31d36564a | ||
|
|
84df0a1bf9 | ||
|
|
dbc53fe176 | ||
|
|
1e4ba93daa | ||
|
|
d872715620 | ||
|
|
46ad541ebc | ||
|
|
613907a06f | ||
|
|
ff723992d1 | ||
|
|
bda3c6b189 | ||
|
|
264d1de994 | ||
|
|
335571ce79 | ||
|
|
4d3fac2574 | ||
|
|
7c229dd103 | ||
|
|
b5df182a36 | ||
|
|
7e7cfa4187 | ||
|
|
69d8430288 | ||
|
|
467d294b30 | ||
|
|
ba2dd18233 | ||
|
|
891eeb0212 | ||
|
|
9085731ff0 | ||
|
|
f5d88c47f4 | ||
|
|
807e5c21b0 | ||
|
|
1bcd795011 | ||
|
|
aae357df40 | ||
|
|
4f03e85c57 | ||
|
|
c3411fb28d | ||
|
|
b3d1b1f4aa | ||
|
|
cbb86c12aa | ||
|
|
8fd606b713 | ||
|
|
d69170ee13 | ||
|
|
e356c5308c | ||
|
|
3026ac8912 | ||
|
|
0cee7c849f | ||
|
|
14bfb7fd0c | ||
|
|
804e48a3da | ||
|
|
907271656e | ||
|
|
1f11dd3e46 | ||
|
|
048561ce0b | ||
|
|
8718f10c38 | ||
|
|
ab4d820089 | ||
|
|
77ae4f1a45 | ||
|
|
8fd1f42a1c | ||
|
|
b94c7e581b | ||
|
|
c90ff701dc | ||
|
|
b1ad58c5af | ||
|
|
345f9b3497 | ||
|
|
4671d18d4f | ||
|
|
f0598be875 | ||
|
|
eb361c6434 | ||
|
|
e39b0a921c | ||
|
|
2dd8a8c788 | ||
|
|
8b79e2e90b | ||
|
|
d05941d1bd | ||
|
|
50070fb264 | ||
|
|
5792d8d5ed | ||
|
|
e1c4b33cf7 | ||
|
|
2c2f6e7c23 | ||
|
|
3d30233d46 | ||
|
|
875f8cff5c | ||
|
|
6e4686a09f | ||
|
|
237c18e15e | ||
|
|
a71d80329d | ||
|
|
91c392b4fc | ||
|
|
a25df4002d | ||
|
|
436a5add88 | ||
|
|
3a4bb239b1 | ||
|
|
2acb4cfdb6 | ||
|
|
f1d626adb0 | ||
|
|
5ca604f186 | ||
|
|
c19c76c3ad | ||
|
|
4555f6badc | ||
|
|
71bd643537 | ||
|
|
23f70f0a96 | ||
|
|
c97672559a | ||
|
|
243f0bbdbd | ||
|
|
0a5ca7f1cf | ||
|
|
8d56d213ec | ||
|
|
cea2ea924b | ||
|
|
569d205e31 | ||
|
|
9feff5002f | ||
|
|
a1314e49a3 | ||
|
|
463f839154 | ||
|
|
5a0fe3c1d1 | ||
|
|
8ac5c86c1e | ||
|
|
d803b48edd | ||
|
|
bc3adcdc89 | ||
|
|
95e27f1c30 | ||
|
|
d0724312db | ||
|
|
5b1021f20b | ||
|
|
55cdbe396f | ||
|
|
e8fe0fecd2 | ||
|
|
5b4fc91a3e | ||
|
|
afd2d8c362 | ||
|
|
8a8cf13089 | ||
|
|
c7e872d4e3 | ||
|
|
1dbe926518 | ||
|
|
d095bec6df | ||
|
|
58e8d501a1 | ||
|
|
a39782468b | ||
|
|
d747b48d22 | ||
|
|
817de23854 | ||
|
|
6474d30ba0 | ||
|
|
6c9635373a | ||
|
|
1a945b6f94 | ||
|
|
526c76fa08 | ||
|
|
932e62531f | ||
|
|
83768e2ff1 | ||
|
|
f23b6506f4 | ||
|
|
5f09318302 | ||
|
|
674e789036 | ||
|
|
cb514e6e34 | ||
|
|
965dad785c | ||
|
|
c9558224d2 | ||
|
|
c2dbd3fd1e | ||
|
|
d27c2b1b4e | ||
|
|
8c52444bda | ||
|
|
b4caa85cd4 | ||
|
|
57163dd936 | ||
|
|
15f2a0bf60 | ||
|
|
aeae7ebdef | ||
|
|
eaa14a5ce0 | ||
|
|
b07c834e83 | ||
|
|
97cd308ef7 | ||
|
|
28cdab7a70 | ||
|
|
ad9aa01819 | ||
|
|
508a88c8d7 | ||
|
|
b6f81fbb8e | ||
|
|
b9b66396ec | ||
|
|
dd20b9ef4c | ||
|
|
e1f7e8cacf | ||
|
|
fd567279fd | ||
|
|
1427eb3cf0 | ||
|
|
e70be0f816 | ||
|
|
0014c7cff7 | ||
|
|
1c23dbeaee | ||
|
|
b2b122a24b | ||
|
|
033ae74b0e | ||
|
|
c593fb4866 | ||
|
|
b9580ef346 | ||
|
|
4df3a9204f | ||
|
|
e0ad313a60 | ||
|
|
a2bfb46edd | ||
|
|
25e3371bee | ||
|
|
4b9b306140 | ||
|
|
ccf55136be | ||
|
|
a13db828f3 | ||
|
|
b7d56d0645 | ||
|
|
9ac70d35a8 | ||
|
|
7da792dd27 | ||
|
|
136c2f4082 | ||
|
|
67bd14e801 | ||
|
|
8c9a20be7a | ||
|
|
0427845502 | ||
|
|
a85a5a324e | ||
|
|
78f1fb5bf4 | ||
|
|
6a8a214324 | ||
|
|
884266c009 | ||
|
|
2c422215e6 | ||
|
|
32fe185bb4 | ||
|
|
c2758a28d5 | ||
|
|
5cda2e0173 | ||
|
|
9e885a68b3 | ||
|
|
376fc86b0c | ||
|
|
2eb1444d80 | ||
|
|
bd6ebe4718 | ||
|
|
691d63bc0f | ||
|
|
dfd4d9abef | ||
|
|
4cb39bc150 | ||
|
|
4e357478e0 | ||
|
|
b5b1b3287c | ||
|
|
2f58a972eb | ||
|
|
6b39d8eed9 | ||
|
|
f81c34d040 | ||
|
|
0771b1f476 | ||
|
|
eedd2ba3fe | ||
|
|
98554e5025 | ||
|
|
dcd2cad6b4 | ||
|
|
189f4bb071 | ||
|
|
7eeab8fb80 | ||
|
|
60f83dd0db | ||
|
|
2618602fd6 | ||
|
|
b80f96de85 | ||
|
|
74a15b2c01 | ||
|
|
408b80ce51 | ||
|
|
e82b68c1b0 | ||
|
|
af5eec648b | ||
|
|
d186c5e82e | ||
|
|
4420a50aed | ||
|
|
9caa6ea7ff | ||
|
|
8d7b217d33 | ||
|
|
57908769f1 | ||
|
|
600cec7c89 | ||
|
|
bb8ea536c4 | ||
|
|
f97869b91e | ||
|
|
aa5be56884 | ||
|
|
7580178c95 | ||
|
|
2e0bc8caf0 | ||
|
|
f9bd03c7f0 | ||
|
|
77466e1f2b | ||
|
|
8dd79345ed | ||
|
|
a049835c49 | ||
|
|
d186d8e8ed | ||
|
|
082897eb9b | ||
|
|
e38f79dec5 | ||
|
|
26e7bba25d | ||
|
|
3cde4ef77f | ||
|
|
f4d135d710 | ||
|
|
6094f70ac8 | ||
|
|
a90e58b39b | ||
|
|
e82e3141ed | ||
|
|
f8e9060bab | ||
|
|
24831fa1a1 | ||
|
|
f6a0e69b2a | ||
|
|
0394eaea7f | ||
|
|
898b8c316e | ||
|
|
4b0c6d1e54 | ||
|
|
da7dc33afa | ||
|
|
c558732ddd | ||
|
|
339ad9189b | ||
|
|
32d5e408b8 | ||
|
|
14ead457d9 | ||
|
|
458cd7e832 | ||
|
|
770a2692e9 | ||
|
|
5dd99b6acf | ||
|
|
6c7eb89374 | ||
|
|
fd11c16c6d | ||
|
|
11ec603c37 | ||
|
|
495d4cac44 | ||
|
|
fd2d74ae2e | ||
|
|
4c7a2e486b | ||
|
|
01e0ba6270 | ||
|
|
227dfc4a05 | ||
|
|
c3702b76b6 | ||
|
|
bb239d574c | ||
|
|
172e5f0e24 | ||
|
|
26b026fb88 | ||
|
|
870629e8a9 | ||
|
|
a547112321 | ||
|
|
da5a94815e | ||
|
|
e024472b74 | ||
|
|
e74855e633 | ||
|
|
e4c26a933d | ||
|
|
36c96f2d98 | ||
|
|
1ea94dcd8d | ||
|
|
2b1c5a0755 | ||
|
|
82b5f806ab | ||
|
|
6340c517d1 | ||
|
|
3baae2d4f0 | ||
|
|
d7c223ddd4 | ||
|
|
df4917243b | ||
|
|
a79ab713ce | ||
|
|
d1f7cee959 | ||
|
|
a3f41e20da | ||
|
|
458ed93da0 | ||
|
|
273d073bd7 | ||
|
|
9455c8e5ae | ||
|
|
d45d4389a0 | ||
|
|
bd901c0da1 | ||
|
|
2192605c95 | ||
|
|
d248d2f4e9 | ||
|
|
331c53871a | ||
|
|
f62d0d9144 | ||
|
|
427945e757 | ||
|
|
e55cdc6250 | ||
|
|
6a01db9ff2 | ||
|
|
82e9df5c22 | ||
|
|
16c2ef2852 | ||
|
|
224a70eea9 | ||
|
|
c457982120 | ||
|
|
0649748da2 | ||
|
|
ddceddaa28 | ||
|
|
c6733a5026 | ||
|
|
7db744a5de | ||
|
|
cd2a8b0def | ||
|
|
f15bc26cd6 | ||
|
|
65f35f0293 | ||
|
|
4e3e608249 | ||
|
|
719a092a12 | ||
|
|
6a8fde7eb1 | ||
|
|
4fdd0812a0 | ||
|
|
4913dc1e85 | ||
|
|
4a43a9642e | ||
|
|
cc48a0c38e | ||
|
|
01ccfd2df7 | ||
|
|
36d75786ee | ||
|
|
f9bc38ba65 | ||
|
|
3da283221d | ||
|
|
90568d3bbb | ||
|
|
7955ca938c | ||
|
|
f5d357eb28 | ||
|
|
d83f616214 | ||
|
|
275c1bec3d | ||
|
|
7d1ef912e8 | ||
|
|
2fe1d4c373 | ||
|
|
2396ad309e | ||
|
|
0b13ef963a | ||
|
|
83073f3ded | ||
|
|
439a27a775 | ||
|
|
91773a4789 | ||
|
|
185beca648 | ||
|
|
2dc564c8df | ||
|
|
b259f53972 | ||
|
|
f8beb08e2f | ||
|
|
83c88c7cf6 | ||
|
|
2372dd40e0 | ||
|
|
5cb6bafe81 | ||
|
|
a0309b31c7 | ||
|
|
0fd268dba7 | ||
|
|
f345da7487 | ||
|
|
f2dacf03f1 | ||
|
|
e0fef50cf0 | ||
|
|
6ba3eeefa5 | ||
|
|
aa158abaa9 | ||
|
|
255c2af1d6 | ||
|
|
9ece3b0310 | ||
|
|
9e3aca03a7 | ||
|
|
dbd5d4d8f1 | ||
|
|
cdb97c3ce4 | ||
|
|
f30ced31a9 | ||
|
|
6cc6c43234 | ||
|
|
224d934cf4 | ||
|
|
8ecdc61ad3 | ||
|
|
08161db7ea | ||
|
|
b139764631 | ||
|
|
2b23dbde8d | ||
|
|
2dec009d63 | ||
|
|
91eadae353 | ||
|
|
8bff616e27 | ||
|
|
2c049e170f | ||
|
|
23e6d7ef3c | ||
|
|
ed81e75edd | ||
|
|
de22fc3a58 | ||
|
|
009b7f60f1 | ||
|
|
9d997e20df | ||
|
|
e6423c4541 | ||
|
|
cb969ad06a | ||
|
|
c4076d16b6 | ||
|
|
04a607a718 | ||
|
|
c1e1aa9dfd | ||
|
|
1ed7abae6e | ||
|
|
cf4855822b | ||
|
|
e242b1319c | ||
|
|
eba4b6620e | ||
|
|
3534515e11 | ||
|
|
5602ff8666 | ||
|
|
2fc70781b4 | ||
|
|
f76b4dec4c | ||
|
|
a5a516fa8a | ||
|
|
811a198134 | ||
|
|
5867ab1d7d | ||
|
|
dd6653eb1f | ||
|
|
db457ef432 | ||
|
|
de7fe939b2 | ||
|
|
38114d9542 | ||
|
|
32f20f2e2e | ||
|
|
3dd27099f7 | ||
|
|
91c4d43a80 | ||
|
|
a63ba1bb03 | ||
|
|
7b6189e74c | ||
|
|
ba423e5773 | ||
|
|
fe029eccae | ||
|
|
ea72af7698 | ||
|
|
17abf85533 | ||
|
|
3bd162acb9 | ||
|
|
664ce441eb | ||
|
|
6863fbee54 | ||
|
|
bb98088b80 | ||
|
|
ce8cb1112a | ||
|
|
a605bd4ca4 | ||
|
|
0e8b5af619 | ||
|
|
46f3af4f68 | ||
|
|
2af64ebf4c | ||
|
|
0eb1824158 | ||
|
|
e0a9a6fb66 | ||
|
|
fe194076c2 | ||
|
|
55dc24fd27 | ||
|
|
da02962a67 | ||
|
|
9bc62cc803 | ||
|
|
bf6705a9a5 | ||
|
|
df2fef3383 | ||
|
|
8cec3448d7 | ||
|
|
b81687995e | ||
|
|
87c2253451 | ||
|
|
297c2957b4 | ||
|
|
bacee0d09d | ||
|
|
297720c132 | ||
|
|
bd4bd00cef | ||
|
|
07c482f727 | ||
|
|
cf193dee29 | ||
|
|
1b47fa2700 | ||
|
|
e1a305d18a | ||
|
|
e2233d22c9 | ||
|
|
20d1175312 | ||
|
|
7117774287 | ||
|
|
77f2660bb2 | ||
|
|
1b2f4f3b87 | ||
|
|
d85b55a9d2 | ||
|
|
e2bae5a2d9 | ||
|
|
cc9c76c4fb | ||
|
|
258e08abcd | ||
|
|
67047e42a7 | ||
|
|
146628e734 | ||
|
|
c1d4b08132 | ||
|
|
f3f47d0709 | ||
|
|
fe26a1bfcc | ||
|
|
554cd0f891 | ||
|
|
f87d3e9849 | ||
|
|
72cdada893 | ||
|
|
c442ebaff6 | ||
|
|
56f16d107e | ||
|
|
0157ae099a | ||
|
|
565fb42457 | ||
|
|
a50a8b4a12 | ||
|
|
4baf4e7d96 | ||
|
|
8b7ab2eb66 | ||
|
|
1f75f3633e | ||
|
|
650884d76a | ||
|
|
8722bdb414 | ||
|
|
71037678c3 | ||
|
|
68de1015e1 | ||
|
|
e2b3a6e144 | ||
|
|
4f04b09efa | ||
|
|
5c4f44d258 | ||
|
|
19652ad60e | ||
|
|
70c96b6ab3 | ||
|
|
65076b916f | ||
|
|
06bc0e51db | ||
|
|
508b456b40 | ||
|
|
bf1e2a2661 | ||
|
|
991d5e4203 | ||
|
|
d21f012b04 | ||
|
|
86b7beab01 | ||
|
|
b4eaa81d8b | ||
|
|
ff2a4c8723 | ||
|
|
51027fd259 | ||
|
|
7e3fd2b12a | ||
|
|
d2fef6f0b7 | ||
|
|
bd06147d26 | ||
|
|
1f3cc9ed6e | ||
|
|
6086d9e51a | ||
|
|
e0de24f64e | ||
|
|
08b6b1f8b3 | ||
|
|
afed1a4b37 | ||
|
|
bca18cacdf | ||
|
|
335db91803 | ||
|
|
67c488ff1f | ||
|
|
deb7f13962 | ||
|
|
e2d3d65c60 | ||
|
|
b78a6834f5 | ||
|
|
4abe90aa2c | ||
|
|
de9568844b | ||
|
|
34268f9806 | ||
|
|
ed75678837 | ||
|
|
3bb58a3dd3 | ||
|
|
4b02feef31 | ||
|
|
6a4d49f02e | ||
|
|
d1736187d3 | ||
|
|
0e79b96091 | ||
|
|
ae302d473d | ||
|
|
feca4fda78 | ||
|
|
f7ed7cd3cd | ||
|
|
8377ab3ef2 | ||
|
|
95c23bf870 | ||
|
|
e49fb8f56d | ||
|
|
adf48de652 | ||
|
|
bca2500438 | ||
|
|
89f925662f | ||
|
|
b64c6d5d40 | ||
|
|
36c63950a6 | ||
|
|
3f31340e6f | ||
|
|
6ac2258c2e | ||
|
|
b4d3b43e8a | ||
|
|
ca281b71e3 | ||
|
|
9bd5a1de7a | ||
|
|
d3c5a4fba0 | ||
|
|
f50006ee63 | ||
|
|
e0092024af | ||
|
|
675ef524b0 | ||
|
|
240367c775 | ||
|
|
f0ed063860 | ||
|
|
bcf0ef0c87 | ||
|
|
0c7a245a46 | ||
|
|
583d82433a | ||
|
|
391e710b6e | ||
|
|
004e56a91b | ||
|
|
103300798f | ||
|
|
8349d6f0ea | ||
|
|
cd63bf6da9 | ||
|
|
5f03e85195 | ||
|
|
cbdbfcab5e | ||
|
|
6918611287 | ||
|
|
b0639add8f | ||
|
|
7af10308d7 | ||
|
|
5e14f23507 | ||
|
|
0bf3a5c609 | ||
|
|
82724826ce | ||
|
|
f9e061926a | ||
|
|
8afd07ff7a | ||
|
|
6523a38255 | ||
|
|
264878a1c9 | ||
|
|
e480946f8a | ||
|
|
be25b1efbd | ||
|
|
204493439b | ||
|
|
106c685afb | ||
|
|
809122fec3 | ||
|
|
c8741d8e9c | ||
|
|
885f01e6a7 | ||
|
|
3180a13cf1 | ||
|
|
630ac31355 | ||
|
|
80de62f47d | ||
|
|
c75d42aa99 | ||
|
|
e1766bca55 | ||
|
|
211102f5f0 | ||
|
|
c46cc4666f | ||
|
|
0b2536b82b | ||
|
|
600a86f11d | ||
|
|
4d97a03935 | ||
|
|
5d7169f244 | ||
|
|
df9329009c | ||
|
|
e74a0398dc | ||
|
|
94c5822cb7 | ||
|
|
dedac55098 | ||
|
|
2bbab5cefe | ||
|
|
4bef718fad | ||
|
|
e7376e9dc2 | ||
|
|
8d5136fe8b | ||
|
|
3272050975 | ||
|
|
1960714042 | ||
|
|
5bddb2632e | ||
|
|
5cd055dab8 | ||
|
|
fa32b7f21e | ||
|
|
37f7227000 | ||
|
|
c1f9a9d122 | ||
|
|
045b7cc7e2 | ||
|
|
970e07a93b | ||
|
|
d463a3f213 | ||
|
|
4ba44c5e48 | ||
|
|
6f8176092e | ||
|
|
198ec417ba | ||
|
|
fbdf7798cf | ||
|
|
7bd9c856aa | ||
|
|
948c719d73 | ||
|
|
42572479cb | ||
|
|
accd363d3f | ||
|
|
8cf754a8b6 | ||
|
|
bf79220ac0 | ||
|
|
4c9dc14e65 | ||
|
|
f8621f7ea9 | ||
|
|
e0e08427b9 | ||
|
|
169df994da | ||
|
|
d83eaf2efb | ||
|
|
4e1e30f751 | ||
|
|
561f8c9c53 | ||
|
|
f625a4d0a7 | ||
|
|
746d4b6d3c | ||
|
|
fdd48c6588 | ||
|
|
23a04f7b9c | ||
|
|
b7b0dde7aa | ||
|
|
c40b78c7e9 | ||
|
|
33c0133cc7 | ||
|
|
cca5bc13dc | ||
|
|
d5ecaea8e7 | ||
|
|
b6d3c38ca9 | ||
|
|
b5fc1b4323 | ||
|
|
a1a9c42b0b | ||
|
|
e689e143e5 | ||
|
|
a7a168d934 | ||
|
|
69f47fc3e3 | ||
|
|
8a87140b4f | ||
|
|
53db0ddc4d | ||
|
|
087085403f | ||
|
|
c040b1cb47 | ||
|
|
1f4d0716b9 | ||
|
|
aa4993873f | ||
|
|
ce031c4394 | ||
|
|
d4dadb0dda | ||
|
|
0ded5813cd | ||
|
|
83137a19fb | ||
|
|
be66f8dbeb | ||
|
|
70baecb402 | ||
|
|
c27ba6bad4 | ||
|
|
61fda6ec58 | ||
|
|
2c93841eaa | ||
|
|
879db3391b | ||
|
|
6dff3d41fa | ||
|
|
e399eeb014 | ||
|
|
b133e8fcf0 | ||
|
|
ba9b24a477 | ||
|
|
cc7fb625a6 | ||
|
|
2b812b7d7d | ||
|
|
c5adbe4180 | ||
|
|
21dc3a2456 | ||
|
|
9631f373f0 | ||
|
|
cbd4d46fa5 | ||
|
|
dc4b9bc003 | ||
|
|
affb9e6941 | ||
|
|
dc542fd7fa | ||
|
|
85eeb21b77 | ||
|
|
4bb3ee03a0 | ||
|
|
1bb23d6837 | ||
|
|
f447359815 | ||
|
|
851e0b05f2 | ||
|
|
094cc940a4 | ||
|
|
51be9000bb | ||
|
|
80ecdb711d | ||
|
|
a599176bbf | ||
|
|
e0341b4c8a | ||
|
|
4c93fd448f | ||
|
|
84d916e210 | ||
|
|
f57ed2a8dd | ||
|
|
713889babf | ||
|
|
58c641d8ec | ||
|
|
94985e24c6 | ||
|
|
4c71a5f5ff | ||
|
|
b19e3a500b | ||
|
|
267fe027f5 | ||
|
|
0d4d8c0d64 | ||
|
|
6f9d8c0cff | ||
|
|
5031096a2b | ||
|
|
797e113000 | ||
|
|
edc2892785 | ||
|
|
ef4d5dcec3 | ||
|
|
0b5e3e5ee4 | ||
|
|
f5afb3621e | ||
|
|
9f72826143 | ||
|
|
ab7a4184df | ||
|
|
16a14bac89 | ||
|
|
baaf31513c | ||
|
|
0b01d7f848 | ||
|
|
23ff3476bc | ||
|
|
0c7ba8e2ac | ||
|
|
dad99cbec7 | ||
|
|
3e78c2f087 | ||
|
|
e822afdcfa | ||
|
|
b824951c89 | ||
|
|
ca20e527fc | ||
|
|
c8e65cce1e | ||
|
|
6c349687da | ||
|
|
3b64793d4b | ||
|
|
9dbe12cea8 | ||
|
|
e78637d632 | ||
|
|
cac03c07f7 | ||
|
|
95dabfaa18 | ||
|
|
e92c418e0f | ||
|
|
0593d045bf | ||
|
|
fff701b0bb | ||
|
|
0087a32d8b | ||
|
|
06312e485c | ||
|
|
e0f5b95cfc | ||
|
|
10bc072b4b | ||
|
|
b60884d3af | ||
|
|
95ae6d300c | ||
|
|
b76e4754bf | ||
|
|
b1085039ca | ||
|
|
d64f479c9f | ||
|
|
fd735c9a3f | ||
|
|
2282f6a42e | ||
|
|
0262002883 | ||
|
|
01ca9dc85d | ||
|
|
0735a98284 | ||
|
|
8d2e170fc4 | ||
|
|
f3e2795e69 | ||
|
|
30d9ce1310 | ||
|
|
2af2b7f130 | ||
|
|
9d41820363 | ||
|
|
a44f289aed | ||
|
|
9c078b3acf | ||
|
|
349f2c6ed6 | ||
|
|
0dc851a1cf | ||
|
|
f27fe068e8 | ||
|
|
f836cff935 | ||
|
|
312e3b92bc | ||
|
|
0cc0964231 | ||
|
|
b82278e685 | ||
|
|
daa1746b4a | ||
|
|
d8068f0a68 | ||
|
|
d91f776c2d | ||
|
|
a01135581f | ||
|
|
392b87fb4f | ||
|
|
551a05aef0 | ||
|
|
6b9d0b5af9 | ||
|
|
b8f3ad3e5d | ||
|
|
b19515e25d | ||
|
|
913f7cc7d4 | ||
|
|
84566debab | ||
|
|
1a8b7abd00 | ||
|
|
4c0423f27b | ||
|
|
7965fd9cbb | ||
|
|
91831f4d07 | ||
|
|
1dd98a87cc | ||
|
|
0dd65cc839 | ||
|
|
519aeb6a1f | ||
|
|
0eab6ab935 | ||
|
|
ee09cb95af | ||
|
|
8a9a66947e | ||
|
|
d744c0dab4 | ||
|
|
70df685709 | ||
|
|
f85ef78238 | ||
|
|
2d7e48d8e8 | ||
|
|
8231328dc6 | ||
|
|
7763e2fa23 | ||
|
|
6085bff12d | ||
|
|
97d60a89ae | ||
|
|
79b981075e | ||
|
|
113876b276 | ||
|
|
5c3820b39f | ||
|
|
55e4465782 | ||
|
|
6d9693dc51 | ||
|
|
75fa10cead | ||
|
|
0497bfdf78 | ||
|
|
0db2ad2132 | ||
|
|
49cd38fb2d | ||
|
|
bd36b2ad6d | ||
|
|
6436b60763 | ||
|
|
a6cc1c84dc | ||
|
|
8515f4b57a | ||
|
|
f68b74ff4a | ||
|
|
e254fdc066 | ||
|
|
f26de37878 | ||
|
|
94de23fe87 | ||
|
|
dd242c9926 | ||
|
|
47767c1666 | ||
|
|
9be3da2357 | ||
|
|
8961a3cc72 | ||
|
|
47b9e7aa62 | ||
|
|
eebfa5be18 | ||
|
|
5047d256b4 | ||
|
|
5db676967f | ||
|
|
ea0664e203 | ||
|
|
bbd0874200 | ||
|
|
c6d100b415 | ||
|
|
5ca7a7def9 | ||
|
|
92b5e1adf4 | ||
|
|
23c6e0f3bf | ||
|
|
ad76e6ac9e | ||
|
|
151aabea73 | ||
|
|
d711680069 | ||
|
|
9835d55ecb | ||
|
|
69c539df6e | ||
|
|
df67ca18d8 | ||
|
|
115cfb6ae9 | ||
|
|
672f3a1c34 | ||
|
|
13b71f559f | ||
|
|
2981b7a425 | ||
|
|
37adf31a3b | ||
|
|
5d59850a17 | ||
|
|
91d6b739a4 | ||
|
|
c83ee06062 | ||
|
|
c93cebe1ab | ||
|
|
ea1d3c1eda | ||
|
|
c9a609b7d8 | ||
|
|
07f04e35ec | ||
|
|
d8b050026d | ||
|
|
c76dc2ea2c | ||
|
|
5e11c635d9 | ||
|
|
669b668463 | ||
|
|
85fa083717 | ||
|
|
420d2614d4 | ||
|
|
e3218d358d | ||
|
|
ae632b5fab | ||
|
|
0d4c600852 | ||
|
|
eb569bf79d | ||
|
|
f3d5303d93 | ||
|
|
b97628070e | ||
|
|
72d3a7ff21 | ||
|
|
2111eccf07 | ||
|
|
87478c5ca6 | ||
|
|
dc62d83a06 | ||
|
|
5681df9095 | ||
|
|
6666300f37 | ||
|
|
7f99c54527 | ||
|
|
4b8ef4b151 | ||
|
|
e5e0944049 | ||
|
|
356336a842 | ||
|
|
5bc059881e | ||
|
|
fa80842afe | ||
|
|
a8a5a82251 | ||
|
|
953a4e3793 | ||
|
|
04ebde7838 | ||
|
|
6df1c6c72f | ||
|
|
fe94bdf936 | ||
|
|
2a9fd9342e | ||
|
|
597ad806e3 | ||
|
|
5acae2dc80 | ||
|
|
99455db26c | ||
|
|
0d12e96362 | ||
|
|
7e7b6e08ff | ||
|
|
1dd32ebfce | ||
|
|
c3ffaa19a4 | ||
|
|
f4ea7e62a7 | ||
|
|
2ac41c3719 | ||
|
|
a8cba7abae | ||
|
|
ae9f8c3071 | ||
|
|
742041d97a | ||
|
|
187b93275d | ||
|
|
ca2aeac2cc | ||
|
|
f7543c6285 | ||
|
|
1430a18d44 | ||
|
|
7c4487585d | ||
|
|
e572ce95e7 | ||
|
|
68c6c1f4f8 | ||
|
|
a5edc8aa0f | ||
|
|
a377f6ffb6 | ||
|
|
72ce2f75cc | ||
|
|
2683207a24 | ||
|
|
e3aab8e85e | ||
|
|
65fd8b90a8 | ||
|
|
6eaa774051 | ||
|
|
60da282dd1 | ||
|
|
493e5386ec | ||
|
|
bc74bcae3a | ||
|
|
e51e4b33b6 | ||
|
|
1d7d5e1809 | ||
|
|
4a6998b7e3 | ||
|
|
6d48b9b4fd | ||
|
|
86680cd45b | ||
|
|
77e60b9812 | ||
|
|
24184024bb | ||
|
|
e79134eaa0 | ||
|
|
b5be1fb948 | ||
|
|
1718b8f677 | ||
|
|
3fc8027e73 | ||
|
|
caa9b106e4 | ||
|
|
89688f0cef | ||
|
|
eeab3f06ec | ||
|
|
15c74224ad | ||
|
|
2da26c16a9 | ||
|
|
8db80a6bb1 | ||
|
|
9b6c7625fd | ||
|
|
634d990cb8 | ||
|
|
5792261a4f | ||
|
|
71839e723f | ||
|
|
10f1ac5da1 | ||
|
|
1f80ed11d9 | ||
|
|
ba80191f5b | ||
|
|
206daa6903 | ||
|
|
17562f9b8f | ||
|
|
9c73099241 | ||
|
|
88d4a65e7b | ||
|
|
614d0f8d72 | ||
|
|
157da24504 | ||
|
|
989dab51b9 | ||
|
|
6a13401172 | ||
|
|
bb73bb224a | ||
|
|
4dc382b571 | ||
|
|
056a83493f | ||
|
|
aadd4f212a | ||
|
|
8b05f98d54 | ||
|
|
1c16c4ea3d | ||
|
|
cf6ff3ce4a | ||
|
|
86d9f5d9dd | ||
|
|
09450010cd | ||
|
|
0acd50b75d | ||
|
|
c3c9a0e57c | ||
|
|
ef978aea97 | ||
|
|
15ab0586df | ||
|
|
839c8611b7 | ||
|
|
68f9f157a6 | ||
|
|
9dd56a5c80 | ||
|
|
842a73a242 | ||
|
|
c04c1ea31b | ||
|
|
2380c2266c | ||
|
|
b02af9b280 | ||
|
|
42938dcf62 | ||
|
|
93886f0e2c | ||
|
|
8c3a953b7a | ||
|
|
54b883d0ca | ||
|
|
91faac5447 | ||
|
|
1d8f9fc39d | ||
|
|
9390de21e5 | ||
|
|
3a33433fc9 | ||
|
|
c4865d57b1 | ||
|
|
81d04db08f | ||
|
|
d50a17db21 |
2
.github/CODEOWNERS
vendored
2
.github/CODEOWNERS
vendored
@@ -1 +1,3 @@
|
||||
* @onyx-dot-app/onyx-core-team
|
||||
# Helm charts Owners
|
||||
/helm/ @justin-tahara
|
||||
|
||||
26
.github/actions/custom-build-and-push/action.yml
vendored
26
.github/actions/custom-build-and-push/action.yml
vendored
@@ -25,12 +25,26 @@ inputs:
|
||||
tags:
|
||||
description: 'Image tags'
|
||||
required: true
|
||||
no-cache:
|
||||
description: 'Read from cache'
|
||||
required: false
|
||||
default: 'false'
|
||||
cache-from:
|
||||
description: 'Cache sources'
|
||||
required: false
|
||||
cache-to:
|
||||
description: 'Cache destinations'
|
||||
required: false
|
||||
outputs:
|
||||
description: 'Output destinations'
|
||||
required: false
|
||||
provenance:
|
||||
description: 'Generate provenance attestation'
|
||||
required: false
|
||||
default: 'false'
|
||||
build-args:
|
||||
description: 'Build arguments'
|
||||
required: false
|
||||
retry-wait-time:
|
||||
description: 'Time to wait before attempt 2 in seconds'
|
||||
required: false
|
||||
@@ -55,8 +69,12 @@ runs:
|
||||
push: ${{ inputs.push }}
|
||||
load: ${{ inputs.load }}
|
||||
tags: ${{ inputs.tags }}
|
||||
no-cache: ${{ inputs.no-cache }}
|
||||
cache-from: ${{ inputs.cache-from }}
|
||||
cache-to: ${{ inputs.cache-to }}
|
||||
outputs: ${{ inputs.outputs }}
|
||||
provenance: ${{ inputs.provenance }}
|
||||
build-args: ${{ inputs.build-args }}
|
||||
|
||||
- name: Wait before attempt 2
|
||||
if: steps.buildx1.outcome != 'success'
|
||||
@@ -77,8 +95,12 @@ runs:
|
||||
push: ${{ inputs.push }}
|
||||
load: ${{ inputs.load }}
|
||||
tags: ${{ inputs.tags }}
|
||||
no-cache: ${{ inputs.no-cache }}
|
||||
cache-from: ${{ inputs.cache-from }}
|
||||
cache-to: ${{ inputs.cache-to }}
|
||||
outputs: ${{ inputs.outputs }}
|
||||
provenance: ${{ inputs.provenance }}
|
||||
build-args: ${{ inputs.build-args }}
|
||||
|
||||
- name: Wait before attempt 3
|
||||
if: steps.buildx1.outcome != 'success' && steps.buildx2.outcome != 'success'
|
||||
@@ -99,8 +121,12 @@ runs:
|
||||
push: ${{ inputs.push }}
|
||||
load: ${{ inputs.load }}
|
||||
tags: ${{ inputs.tags }}
|
||||
no-cache: ${{ inputs.no-cache }}
|
||||
cache-from: ${{ inputs.cache-from }}
|
||||
cache-to: ${{ inputs.cache-to }}
|
||||
outputs: ${{ inputs.outputs }}
|
||||
provenance: ${{ inputs.provenance }}
|
||||
build-args: ${{ inputs.build-args }}
|
||||
|
||||
- name: Report failure
|
||||
if: steps.buildx1.outcome != 'success' && steps.buildx2.outcome != 'success' && steps.buildx3.outcome != 'success'
|
||||
|
||||
50
.github/actions/prepare-build/action.yml
vendored
Normal file
50
.github/actions/prepare-build/action.yml
vendored
Normal file
@@ -0,0 +1,50 @@
|
||||
name: "Prepare Build (OpenAPI generation)"
|
||||
description: "Sets up Python with uv, installs deps, generates OpenAPI schema and Python client, uploads artifact"
|
||||
runs:
|
||||
using: "composite"
|
||||
steps:
|
||||
- name: Checkout code
|
||||
uses: actions/checkout@v4
|
||||
|
||||
- name: Setup uv
|
||||
uses: astral-sh/setup-uv@v3
|
||||
|
||||
- name: Setup Python
|
||||
uses: actions/setup-python@v5
|
||||
with:
|
||||
python-version: "3.11"
|
||||
|
||||
- name: Install Python dependencies with uv
|
||||
shell: bash
|
||||
run: |
|
||||
uv pip install --system \
|
||||
-r backend/requirements/default.txt \
|
||||
-r backend/requirements/dev.txt
|
||||
|
||||
- name: Generate OpenAPI schema
|
||||
shell: bash
|
||||
working-directory: backend
|
||||
env:
|
||||
PYTHONPATH: "."
|
||||
run: |
|
||||
python scripts/onyx_openapi_schema.py --filename generated/openapi.json
|
||||
|
||||
- name: Generate OpenAPI Python client
|
||||
shell: bash
|
||||
run: |
|
||||
docker run --rm \
|
||||
-v "${{ github.workspace }}/backend/generated:/local" \
|
||||
openapitools/openapi-generator-cli generate \
|
||||
-i /local/openapi.json \
|
||||
-g python \
|
||||
-o /local/onyx_openapi_client \
|
||||
--package-name onyx_openapi_client \
|
||||
--skip-validate-spec \
|
||||
--openapi-normalizer "SIMPLIFY_ONEOF_ANYOF=true,SET_OAS3_NULLABLE=true"
|
||||
|
||||
- name: Upload OpenAPI artifacts
|
||||
uses: actions/upload-artifact@v4
|
||||
with:
|
||||
name: openapi-artifacts
|
||||
path: backend/generated/
|
||||
|
||||
5
.github/pull_request_template.md
vendored
5
.github/pull_request_template.md
vendored
@@ -6,9 +6,6 @@
|
||||
|
||||
[Describe the tests you ran to verify your changes]
|
||||
|
||||
## Backporting (check the box to trigger backport action)
|
||||
## Additional Options
|
||||
|
||||
Note: You have to check that the action passes, otherwise resolve the conflicts manually and tag the patches.
|
||||
|
||||
- [ ] This PR should be backported (make sure to check that the backport attempt succeeds)
|
||||
- [ ] [Optional] Override Linear Check
|
||||
|
||||
24
.github/workflows/check-lazy-imports.yml
vendored
Normal file
24
.github/workflows/check-lazy-imports.yml
vendored
Normal file
@@ -0,0 +1,24 @@
|
||||
name: Check Lazy Imports
|
||||
|
||||
on:
|
||||
merge_group:
|
||||
pull_request:
|
||||
branches:
|
||||
- main
|
||||
- 'release/**'
|
||||
|
||||
jobs:
|
||||
check-lazy-imports:
|
||||
runs-on: ubuntu-latest
|
||||
|
||||
steps:
|
||||
- name: Checkout code
|
||||
uses: actions/checkout@v4
|
||||
|
||||
- name: Set up Python
|
||||
uses: actions/setup-python@v4
|
||||
with:
|
||||
python-version: '3.11'
|
||||
|
||||
- name: Check lazy imports
|
||||
run: python3 backend/scripts/check_lazy_imports.py
|
||||
@@ -7,18 +7,63 @@ on:
|
||||
|
||||
env:
|
||||
REGISTRY_IMAGE: ${{ contains(github.ref_name, 'cloud') && 'onyxdotapp/onyx-backend-cloud' || 'onyxdotapp/onyx-backend' }}
|
||||
LATEST_TAG: ${{ contains(github.ref_name, 'latest') }}
|
||||
DEPLOYMENT: ${{ contains(github.ref_name, 'cloud') && 'cloud' || 'standalone' }}
|
||||
|
||||
# tag nightly builds with "edge"
|
||||
EDGE_TAG: ${{ startsWith(github.ref_name, 'nightly-latest') }}
|
||||
|
||||
jobs:
|
||||
build-and-push:
|
||||
# TODO: investigate a matrix build like the web container
|
||||
# See https://runs-on.com/runners/linux/
|
||||
runs-on: [runs-on, runner=8cpu-linux-x64, "run-id=${{ github.run_id }}"]
|
||||
|
||||
runs-on:
|
||||
- runs-on
|
||||
- runner=${{ matrix.platform == 'linux/amd64' && '8cpu-linux-x64' || '8cpu-linux-arm64' }}
|
||||
- run-id=${{ github.run_id }}
|
||||
- tag=platform-${{ matrix.platform }}
|
||||
strategy:
|
||||
fail-fast: false
|
||||
matrix:
|
||||
platform:
|
||||
- linux/amd64
|
||||
- linux/arm64
|
||||
|
||||
steps:
|
||||
- name: Prepare
|
||||
run: |
|
||||
platform=${{ matrix.platform }}
|
||||
echo "PLATFORM_PAIR=${platform//\//-}" >> $GITHUB_ENV
|
||||
|
||||
- name: Check if stable release version
|
||||
id: check_version
|
||||
run: |
|
||||
if [[ "${{ github.ref_name }}" =~ ^v[0-9]+\.[0-9]+\.[0-9]+$ ]] && [[ "${{ github.ref_name }}" != *"cloud"* ]]; then
|
||||
echo "is_stable=true" >> $GITHUB_OUTPUT
|
||||
else
|
||||
echo "is_stable=false" >> $GITHUB_OUTPUT
|
||||
fi
|
||||
if [[ "${{ github.ref_name }}" =~ ^v[0-9]+\.[0-9]+\.[0-9]+-beta\.[0-9]+$ ]] && [[ "${{ github.ref_name }}" != *"cloud"* ]]; then
|
||||
echo "is_beta=true" >> $GITHUB_OUTPUT
|
||||
else
|
||||
echo "is_beta=false" >> $GITHUB_OUTPUT
|
||||
fi
|
||||
|
||||
- name: Checkout code
|
||||
uses: actions/checkout@v4
|
||||
|
||||
- name: Docker meta
|
||||
id: meta
|
||||
uses: docker/metadata-action@v5
|
||||
with:
|
||||
images: ${{ env.REGISTRY_IMAGE }}
|
||||
flavor: |
|
||||
latest=false
|
||||
tags: |
|
||||
type=raw,value=${{ github.ref_name }}
|
||||
type=raw,value=${{ steps.check_version.outputs.is_stable == 'true' && 'latest' || '' }}
|
||||
type=raw,value=${{ env.EDGE_TAG == 'true' && 'edge' || '' }}
|
||||
type=raw,value=${{ steps.check_version.outputs.is_beta == 'true' && 'beta' || '' }}
|
||||
|
||||
- name: Set up Docker Buildx
|
||||
uses: docker/setup-buildx-action@v3
|
||||
|
||||
@@ -34,30 +79,120 @@ jobs:
|
||||
sudo apt-get install -y build-essential
|
||||
|
||||
- name: Backend Image Docker Build and Push
|
||||
uses: docker/build-push-action@v5
|
||||
id: build
|
||||
uses: docker/build-push-action@v6
|
||||
with:
|
||||
context: ./backend
|
||||
file: ./backend/Dockerfile
|
||||
platforms: linux/amd64,linux/arm64
|
||||
platforms: ${{ matrix.platform }}
|
||||
push: true
|
||||
tags: |
|
||||
${{ env.REGISTRY_IMAGE }}:${{ github.ref_name }}
|
||||
${{ env.LATEST_TAG == 'true' && format('{0}:latest', env.REGISTRY_IMAGE) || '' }}
|
||||
build-args: |
|
||||
ONYX_VERSION=${{ github.ref_name }}
|
||||
labels: ${{ steps.meta.outputs.labels }}
|
||||
outputs: type=image,name=${{ env.REGISTRY_IMAGE }},push-by-digest=true,name-canonical=true,push=true
|
||||
cache-from: type=s3,prefix=cache/${{ github.repository }}/${{ env.DEPLOYMENT }}/backend-${{ env.PLATFORM_PAIR }}/,region=${{ env.RUNS_ON_AWS_REGION }},bucket=${{ env.RUNS_ON_S3_BUCKET_CACHE }}
|
||||
cache-to: type=s3,prefix=cache/${{ github.repository }}/${{ env.DEPLOYMENT }}/backend-${{ env.PLATFORM_PAIR }}/,region=${{ env.RUNS_ON_AWS_REGION }},bucket=${{ env.RUNS_ON_S3_BUCKET_CACHE }},mode=max
|
||||
|
||||
- name: Export digest
|
||||
run: |
|
||||
mkdir -p /tmp/digests
|
||||
digest="${{ steps.build.outputs.digest }}"
|
||||
touch "/tmp/digests/${digest#sha256:}"
|
||||
|
||||
- name: Upload digest
|
||||
uses: actions/upload-artifact@v4
|
||||
with:
|
||||
name: backend-digests-${{ env.PLATFORM_PAIR }}-${{ github.run_id }}
|
||||
path: /tmp/digests/*
|
||||
if-no-files-found: error
|
||||
retention-days: 1
|
||||
|
||||
merge:
|
||||
runs-on: ubuntu-latest
|
||||
needs:
|
||||
- build-and-push
|
||||
steps:
|
||||
# Needed for trivyignore
|
||||
- name: Checkout
|
||||
uses: actions/checkout@v4
|
||||
|
||||
- name: Check if stable release version
|
||||
id: check_version
|
||||
run: |
|
||||
if [[ "${{ github.ref_name }}" =~ ^v[0-9]+\.[0-9]+\.[0-9]+$ ]] && [[ "${{ github.ref_name }}" != *"cloud"* ]]; then
|
||||
echo "is_stable=true" >> $GITHUB_OUTPUT
|
||||
else
|
||||
echo "is_stable=false" >> $GITHUB_OUTPUT
|
||||
fi
|
||||
if [[ "${{ github.ref_name }}" =~ ^v[0-9]+\.[0-9]+\.[0-9]+-beta\.[0-9]+$ ]] && [[ "${{ github.ref_name }}" != *"cloud"* ]]; then
|
||||
echo "is_beta=true" >> $GITHUB_OUTPUT
|
||||
else
|
||||
echo "is_beta=false" >> $GITHUB_OUTPUT
|
||||
fi
|
||||
|
||||
- name: Download digests
|
||||
uses: actions/download-artifact@v4
|
||||
with:
|
||||
path: /tmp/digests
|
||||
pattern: backend-digests-*-${{ github.run_id }}
|
||||
merge-multiple: true
|
||||
|
||||
- name: Set up Docker Buildx
|
||||
uses: docker/setup-buildx-action@v3
|
||||
|
||||
- name: Docker meta
|
||||
id: meta
|
||||
uses: docker/metadata-action@v5
|
||||
with:
|
||||
images: ${{ env.REGISTRY_IMAGE }}
|
||||
flavor: |
|
||||
latest=false
|
||||
tags: |
|
||||
type=raw,value=${{ github.ref_name }}
|
||||
type=raw,value=${{ steps.check_version.outputs.is_stable == 'true' && 'latest' || '' }}
|
||||
type=raw,value=${{ env.EDGE_TAG == 'true' && 'edge' || '' }}
|
||||
type=raw,value=${{ steps.check_version.outputs.is_beta == 'true' && 'beta' || '' }}
|
||||
|
||||
- name: Login to Docker Hub
|
||||
uses: docker/login-action@v3
|
||||
with:
|
||||
username: ${{ secrets.DOCKER_USERNAME }}
|
||||
password: ${{ secrets.DOCKER_TOKEN }}
|
||||
|
||||
- name: Create manifest list and push
|
||||
working-directory: /tmp/digests
|
||||
run: |
|
||||
docker buildx imagetools create $(jq -cr '.tags | map("-t " + .) | join(" ")' <<< "$DOCKER_METADATA_OUTPUT_JSON") \
|
||||
$(printf '${{ env.REGISTRY_IMAGE }}@sha256:%s ' *)
|
||||
|
||||
- name: Inspect image
|
||||
run: |
|
||||
docker buildx imagetools inspect ${{ env.REGISTRY_IMAGE }}:${{ steps.meta.outputs.version }}
|
||||
|
||||
# trivy has their own rate limiting issues causing this action to flake
|
||||
# we worked around it by hardcoding to different db repos in env
|
||||
# can re-enable when they figure it out
|
||||
# https://github.com/aquasecurity/trivy/discussions/7538
|
||||
# https://github.com/aquasecurity/trivy-action/issues/389
|
||||
# Security: Using pinned digest (0.65.0@sha256:a22415a38938a56c379387a8163fcb0ce38b10ace73e593475d3658d578b2436)
|
||||
# Security: No Docker socket mount needed for remote registry scanning
|
||||
- name: Run Trivy vulnerability scanner
|
||||
uses: aquasecurity/trivy-action@master
|
||||
env:
|
||||
TRIVY_DB_REPOSITORY: "public.ecr.aws/aquasecurity/trivy-db:2"
|
||||
TRIVY_JAVA_DB_REPOSITORY: "public.ecr.aws/aquasecurity/trivy-java-db:1"
|
||||
uses: nick-fields/retry@v3
|
||||
with:
|
||||
# To run locally: trivy image --severity HIGH,CRITICAL onyxdotapp/onyx-backend
|
||||
image-ref: docker.io/${{ env.REGISTRY_IMAGE }}:${{ github.ref_name }}
|
||||
severity: "CRITICAL,HIGH"
|
||||
trivyignores: ./backend/.trivyignore
|
||||
timeout_minutes: 30
|
||||
max_attempts: 3
|
||||
retry_wait_seconds: 10
|
||||
command: |
|
||||
docker run --rm -v $HOME/.cache/trivy:/root/.cache/trivy \
|
||||
-v ${{ github.workspace }}/backend/.trivyignore:/tmp/.trivyignore:ro \
|
||||
-e TRIVY_DB_REPOSITORY="public.ecr.aws/aquasecurity/trivy-db:2" \
|
||||
-e TRIVY_JAVA_DB_REPOSITORY="public.ecr.aws/aquasecurity/trivy-java-db:1" \
|
||||
-e TRIVY_USERNAME="${{ secrets.DOCKER_USERNAME }}" \
|
||||
-e TRIVY_PASSWORD="${{ secrets.DOCKER_TOKEN }}" \
|
||||
aquasec/trivy@sha256:a22415a38938a56c379387a8163fcb0ce38b10ace73e593475d3658d578b2436 \
|
||||
image \
|
||||
--skip-version-check \
|
||||
--timeout 20m \
|
||||
--severity CRITICAL,HIGH \
|
||||
--ignorefile /tmp/.trivyignore \
|
||||
docker.io/${{ env.REGISTRY_IMAGE }}:${{ github.ref_name }}
|
||||
|
||||
@@ -4,12 +4,12 @@ name: Build and Push Cloud Web Image on Tag
|
||||
on:
|
||||
push:
|
||||
tags:
|
||||
- "*"
|
||||
- "*cloud*"
|
||||
|
||||
env:
|
||||
REGISTRY_IMAGE: onyxdotapp/onyx-web-server-cloud
|
||||
LATEST_TAG: ${{ contains(github.ref_name, 'latest') }}
|
||||
|
||||
DEPLOYMENT: cloud
|
||||
|
||||
jobs:
|
||||
build:
|
||||
runs-on:
|
||||
@@ -38,9 +38,10 @@ jobs:
|
||||
uses: docker/metadata-action@v5
|
||||
with:
|
||||
images: ${{ env.REGISTRY_IMAGE }}
|
||||
flavor: |
|
||||
latest=false
|
||||
tags: |
|
||||
type=raw,value=${{ env.REGISTRY_IMAGE }}:${{ github.ref_name }}
|
||||
type=raw,value=${{ env.LATEST_TAG == 'true' && format('{0}:latest', env.REGISTRY_IMAGE) || '' }}
|
||||
type=raw,value=${{ github.ref_name }}
|
||||
|
||||
- name: Set up Docker Buildx
|
||||
uses: docker/setup-buildx-action@v3
|
||||
@@ -53,7 +54,7 @@ jobs:
|
||||
|
||||
- name: Build and push by digest
|
||||
id: build
|
||||
uses: docker/build-push-action@v5
|
||||
uses: docker/build-push-action@v6
|
||||
with:
|
||||
context: ./web
|
||||
file: ./web/Dockerfile
|
||||
@@ -70,10 +71,12 @@ jobs:
|
||||
NEXT_PUBLIC_FORGOT_PASSWORD_ENABLED=true
|
||||
NEXT_PUBLIC_INCLUDE_ERROR_POPUP_SUPPORT_LINK=true
|
||||
NODE_OPTIONS=--max-old-space-size=8192
|
||||
# needed due to weird interactions with the builds for different platforms
|
||||
no-cache: true
|
||||
labels: ${{ steps.meta.outputs.labels }}
|
||||
outputs: type=image,name=${{ env.REGISTRY_IMAGE }},push-by-digest=true,name-canonical=true,push=true
|
||||
cache-from: type=s3,prefix=cache/${{ github.repository }}/${{ env.DEPLOYMENT }}/cloudweb-${{ env.PLATFORM_PAIR }}/,region=${{ env.RUNS_ON_AWS_REGION }},bucket=${{ env.RUNS_ON_S3_BUCKET_CACHE }}
|
||||
cache-to: type=s3,prefix=cache/${{ github.repository }}/${{ env.DEPLOYMENT }}/cloudweb-${{ env.PLATFORM_PAIR }}/,region=${{ env.RUNS_ON_AWS_REGION }},bucket=${{ env.RUNS_ON_S3_BUCKET_CACHE }},mode=max
|
||||
# no-cache needed due to weird interactions with the builds for different platforms
|
||||
# NOTE(rkuo): this may not be true any more with the proper cache prefixing by architecture - currently testing with it off
|
||||
|
||||
- name: Export digest
|
||||
run: |
|
||||
@@ -84,7 +87,7 @@ jobs:
|
||||
- name: Upload digest
|
||||
uses: actions/upload-artifact@v4
|
||||
with:
|
||||
name: digests-${{ env.PLATFORM_PAIR }}
|
||||
name: cloudweb-digests-${{ env.PLATFORM_PAIR }}-${{ github.run_id }}
|
||||
path: /tmp/digests/*
|
||||
if-no-files-found: error
|
||||
retention-days: 1
|
||||
@@ -98,7 +101,7 @@ jobs:
|
||||
uses: actions/download-artifact@v4
|
||||
with:
|
||||
path: /tmp/digests
|
||||
pattern: digests-*
|
||||
pattern: cloudweb-digests-*-${{ github.run_id }}
|
||||
merge-multiple: true
|
||||
|
||||
- name: Set up Docker Buildx
|
||||
@@ -109,6 +112,10 @@ jobs:
|
||||
uses: docker/metadata-action@v5
|
||||
with:
|
||||
images: ${{ env.REGISTRY_IMAGE }}
|
||||
flavor: |
|
||||
latest=false
|
||||
tags: |
|
||||
type=raw,value=${{ github.ref_name }}
|
||||
|
||||
- name: Login to Docker Hub
|
||||
uses: docker/login-action@v3
|
||||
@@ -132,10 +139,20 @@ jobs:
|
||||
# https://github.com/aquasecurity/trivy/discussions/7538
|
||||
# https://github.com/aquasecurity/trivy-action/issues/389
|
||||
- name: Run Trivy vulnerability scanner
|
||||
uses: aquasecurity/trivy-action@master
|
||||
env:
|
||||
TRIVY_DB_REPOSITORY: "public.ecr.aws/aquasecurity/trivy-db:2"
|
||||
TRIVY_JAVA_DB_REPOSITORY: "public.ecr.aws/aquasecurity/trivy-java-db:1"
|
||||
uses: nick-fields/retry@v3
|
||||
with:
|
||||
image-ref: docker.io/${{ env.REGISTRY_IMAGE }}:${{ github.ref_name }}
|
||||
severity: "CRITICAL,HIGH"
|
||||
timeout_minutes: 30
|
||||
max_attempts: 3
|
||||
retry_wait_seconds: 10
|
||||
command: |
|
||||
docker run --rm -v $HOME/.cache/trivy:/root/.cache/trivy \
|
||||
-e TRIVY_DB_REPOSITORY="public.ecr.aws/aquasecurity/trivy-db:2" \
|
||||
-e TRIVY_JAVA_DB_REPOSITORY="public.ecr.aws/aquasecurity/trivy-java-db:1" \
|
||||
-e TRIVY_USERNAME="${{ secrets.DOCKER_USERNAME }}" \
|
||||
-e TRIVY_PASSWORD="${{ secrets.DOCKER_TOKEN }}" \
|
||||
aquasec/trivy@sha256:a22415a38938a56c379387a8163fcb0ce38b10ace73e593475d3658d578b2436 \
|
||||
image \
|
||||
--skip-version-check \
|
||||
--timeout 20m \
|
||||
--severity CRITICAL,HIGH \
|
||||
docker.io/${{ env.REGISTRY_IMAGE }}:${{ github.ref_name }}
|
||||
|
||||
@@ -7,10 +7,13 @@ on:
|
||||
|
||||
env:
|
||||
REGISTRY_IMAGE: ${{ contains(github.ref_name, 'cloud') && 'onyxdotapp/onyx-model-server-cloud' || 'onyxdotapp/onyx-model-server' }}
|
||||
LATEST_TAG: ${{ contains(github.ref_name, 'latest') }}
|
||||
DOCKER_BUILDKIT: 1
|
||||
BUILDKIT_PROGRESS: plain
|
||||
DEPLOYMENT: ${{ contains(github.ref_name, 'cloud') && 'cloud' || 'standalone' }}
|
||||
|
||||
# tag nightly builds with "edge"
|
||||
EDGE_TAG: ${{ startsWith(github.ref_name, 'nightly-latest') }}
|
||||
|
||||
jobs:
|
||||
|
||||
# Bypassing this for now as the idea of not building is glitching
|
||||
@@ -51,6 +54,8 @@ jobs:
|
||||
if: needs.check_model_server_changes.outputs.changed == 'true'
|
||||
runs-on:
|
||||
[runs-on, runner=8cpu-linux-x64, "run-id=${{ github.run_id }}-amd64"]
|
||||
env:
|
||||
PLATFORM_PAIR: linux-amd64
|
||||
steps:
|
||||
- name: Checkout code
|
||||
uses: actions/checkout@v4
|
||||
@@ -75,7 +80,7 @@ jobs:
|
||||
password: ${{ secrets.DOCKER_TOKEN }}
|
||||
|
||||
- name: Build and Push AMD64
|
||||
uses: docker/build-push-action@v5
|
||||
uses: docker/build-push-action@v6
|
||||
with:
|
||||
context: ./backend
|
||||
file: ./backend/Dockerfile.model_server
|
||||
@@ -83,15 +88,20 @@ jobs:
|
||||
push: true
|
||||
tags: ${{ env.REGISTRY_IMAGE }}:${{ github.ref_name }}-amd64
|
||||
build-args: |
|
||||
DANSWER_VERSION=${{ github.ref_name }}
|
||||
ONYX_VERSION=${{ github.ref_name }}
|
||||
outputs: type=registry
|
||||
provenance: false
|
||||
cache-from: type=s3,prefix=cache/${{ github.repository }}/${{ env.DEPLOYMENT }}/model-server-${{ env.PLATFORM_PAIR }}/,region=${{ env.RUNS_ON_AWS_REGION }},bucket=${{ env.RUNS_ON_S3_BUCKET_CACHE }}
|
||||
cache-to: type=s3,prefix=cache/${{ github.repository }}/${{ env.DEPLOYMENT }}/model-server-${{ env.PLATFORM_PAIR }}/,region=${{ env.RUNS_ON_AWS_REGION }},bucket=${{ env.RUNS_ON_S3_BUCKET_CACHE }},mode=max
|
||||
# no-cache: true
|
||||
|
||||
build-arm64:
|
||||
needs: [check_model_server_changes]
|
||||
if: needs.check_model_server_changes.outputs.changed == 'true'
|
||||
runs-on:
|
||||
[runs-on, runner=8cpu-linux-x64, "run-id=${{ github.run_id }}-arm64"]
|
||||
[runs-on, runner=8cpu-linux-arm64, "run-id=${{ github.run_id }}-arm64"]
|
||||
env:
|
||||
PLATFORM_PAIR: linux-arm64
|
||||
steps:
|
||||
- name: Checkout code
|
||||
uses: actions/checkout@v4
|
||||
@@ -116,7 +126,7 @@ jobs:
|
||||
password: ${{ secrets.DOCKER_TOKEN }}
|
||||
|
||||
- name: Build and Push ARM64
|
||||
uses: docker/build-push-action@v5
|
||||
uses: docker/build-push-action@v6
|
||||
with:
|
||||
context: ./backend
|
||||
file: ./backend/Dockerfile.model_server
|
||||
@@ -124,15 +134,31 @@ jobs:
|
||||
push: true
|
||||
tags: ${{ env.REGISTRY_IMAGE }}:${{ github.ref_name }}-arm64
|
||||
build-args: |
|
||||
DANSWER_VERSION=${{ github.ref_name }}
|
||||
ONYX_VERSION=${{ github.ref_name }}
|
||||
outputs: type=registry
|
||||
provenance: false
|
||||
cache-from: type=s3,prefix=cache/${{ github.repository }}/${{ env.DEPLOYMENT }}/model-server-${{ env.PLATFORM_PAIR }}/,region=${{ env.RUNS_ON_AWS_REGION }},bucket=${{ env.RUNS_ON_S3_BUCKET_CACHE }}
|
||||
cache-to: type=s3,prefix=cache/${{ github.repository }}/${{ env.DEPLOYMENT }}/model-server-${{ env.PLATFORM_PAIR }}/,region=${{ env.RUNS_ON_AWS_REGION }},bucket=${{ env.RUNS_ON_S3_BUCKET_CACHE }},mode=max
|
||||
|
||||
merge-and-scan:
|
||||
needs: [build-amd64, build-arm64, check_model_server_changes]
|
||||
if: needs.check_model_server_changes.outputs.changed == 'true'
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- name: Check if stable release version
|
||||
id: check_version
|
||||
run: |
|
||||
if [[ "${{ github.ref_name }}" =~ ^v[0-9]+\.[0-9]+\.[0-9]+$ ]] && [[ "${{ github.ref_name }}" != *"cloud"* ]]; then
|
||||
echo "is_stable=true" >> $GITHUB_OUTPUT
|
||||
else
|
||||
echo "is_stable=false" >> $GITHUB_OUTPUT
|
||||
fi
|
||||
if [[ "${{ github.ref_name }}" =~ ^v[0-9]+\.[0-9]+\.[0-9]+-beta\.[0-9]+$ ]] && [[ "${{ github.ref_name }}" != *"cloud"* ]]; then
|
||||
echo "is_beta=true" >> $GITHUB_OUTPUT
|
||||
else
|
||||
echo "is_beta=false" >> $GITHUB_OUTPUT
|
||||
fi
|
||||
|
||||
- name: Login to Docker Hub
|
||||
uses: docker/login-action@v3
|
||||
with:
|
||||
@@ -145,18 +171,37 @@ jobs:
|
||||
docker buildx imagetools create -t ${{ env.REGISTRY_IMAGE }}:${{ github.ref_name }} \
|
||||
${{ env.REGISTRY_IMAGE }}:${{ github.ref_name }}-amd64 \
|
||||
${{ env.REGISTRY_IMAGE }}:${{ github.ref_name }}-arm64
|
||||
if [[ "${{ env.LATEST_TAG }}" == "true" ]]; then
|
||||
if [[ "${{ steps.check_version.outputs.is_stable }}" == "true" ]]; then
|
||||
docker buildx imagetools create -t ${{ env.REGISTRY_IMAGE }}:latest \
|
||||
${{ env.REGISTRY_IMAGE }}:${{ github.ref_name }}-amd64 \
|
||||
${{ env.REGISTRY_IMAGE }}:${{ github.ref_name }}-arm64
|
||||
fi
|
||||
if [[ "${{ env.EDGE_TAG }}" == "true" ]]; then
|
||||
docker buildx imagetools create -t ${{ env.REGISTRY_IMAGE }}:edge \
|
||||
${{ env.REGISTRY_IMAGE }}:${{ github.ref_name }}-amd64 \
|
||||
${{ env.REGISTRY_IMAGE }}:${{ github.ref_name }}-arm64
|
||||
fi
|
||||
if [[ "${{ steps.check_version.outputs.is_beta }}" == "true" ]]; then
|
||||
docker buildx imagetools create -t ${{ env.REGISTRY_IMAGE }}:beta \
|
||||
${{ env.REGISTRY_IMAGE }}:${{ github.ref_name }}-amd64 \
|
||||
${{ env.REGISTRY_IMAGE }}:${{ github.ref_name }}-arm64
|
||||
fi
|
||||
|
||||
- name: Run Trivy vulnerability scanner
|
||||
uses: aquasecurity/trivy-action@master
|
||||
env:
|
||||
TRIVY_DB_REPOSITORY: "public.ecr.aws/aquasecurity/trivy-db:2"
|
||||
TRIVY_JAVA_DB_REPOSITORY: "public.ecr.aws/aquasecurity/trivy-java-db:1"
|
||||
uses: nick-fields/retry@v3
|
||||
with:
|
||||
image-ref: docker.io/${{ env.REGISTRY_IMAGE }}:${{ github.ref_name }}
|
||||
severity: "CRITICAL,HIGH"
|
||||
timeout: "10m"
|
||||
timeout_minutes: 30
|
||||
max_attempts: 3
|
||||
retry_wait_seconds: 10
|
||||
command: |
|
||||
docker run --rm -v $HOME/.cache/trivy:/root/.cache/trivy \
|
||||
-e TRIVY_DB_REPOSITORY="public.ecr.aws/aquasecurity/trivy-db:2" \
|
||||
-e TRIVY_JAVA_DB_REPOSITORY="public.ecr.aws/aquasecurity/trivy-java-db:1" \
|
||||
-e TRIVY_USERNAME="${{ secrets.DOCKER_USERNAME }}" \
|
||||
-e TRIVY_PASSWORD="${{ secrets.DOCKER_TOKEN }}" \
|
||||
aquasec/trivy@sha256:a22415a38938a56c379387a8163fcb0ce38b10ace73e593475d3658d578b2436 \
|
||||
image \
|
||||
--skip-version-check \
|
||||
--timeout 20m \
|
||||
--severity CRITICAL,HIGH \
|
||||
docker.io/${{ env.REGISTRY_IMAGE }}:${{ github.ref_name }}
|
||||
|
||||
@@ -7,10 +7,29 @@ on:
|
||||
|
||||
env:
|
||||
REGISTRY_IMAGE: onyxdotapp/onyx-web-server
|
||||
LATEST_TAG: ${{ contains(github.ref_name, 'latest') }}
|
||||
|
||||
# tag nightly builds with "edge"
|
||||
EDGE_TAG: ${{ startsWith(github.ref_name, 'nightly-latest') }}
|
||||
|
||||
DEPLOYMENT: standalone
|
||||
|
||||
jobs:
|
||||
precheck:
|
||||
runs-on: [runs-on, runner=2cpu-linux-x64, "run-id=${{ github.run_id }}"]
|
||||
outputs:
|
||||
should-run: ${{ steps.set-output.outputs.should-run }}
|
||||
steps:
|
||||
- name: Check if tag contains "cloud"
|
||||
id: set-output
|
||||
run: |
|
||||
if [[ "${{ github.ref_name }}" == *cloud* ]]; then
|
||||
echo "should-run=false" >> "$GITHUB_OUTPUT"
|
||||
else
|
||||
echo "should-run=true" >> "$GITHUB_OUTPUT"
|
||||
fi
|
||||
build:
|
||||
needs: precheck
|
||||
if: needs.precheck.outputs.should-run == 'true'
|
||||
runs-on:
|
||||
- runs-on
|
||||
- runner=${{ matrix.platform == 'linux/amd64' && '8cpu-linux-x64' || '8cpu-linux-arm64' }}
|
||||
@@ -29,6 +48,20 @@ jobs:
|
||||
platform=${{ matrix.platform }}
|
||||
echo "PLATFORM_PAIR=${platform//\//-}" >> $GITHUB_ENV
|
||||
|
||||
- name: Check if stable release version
|
||||
id: check_version
|
||||
run: |
|
||||
if [[ "${{ github.ref_name }}" =~ ^v[0-9]+\.[0-9]+\.[0-9]+$ ]]; then
|
||||
echo "is_stable=true" >> $GITHUB_OUTPUT
|
||||
else
|
||||
echo "is_stable=false" >> $GITHUB_OUTPUT
|
||||
fi
|
||||
if [[ "${{ github.ref_name }}" =~ ^v[0-9]+\.[0-9]+\.[0-9]+-beta\.[0-9]+$ ]]; then
|
||||
echo "is_beta=true" >> $GITHUB_OUTPUT
|
||||
else
|
||||
echo "is_beta=false" >> $GITHUB_OUTPUT
|
||||
fi
|
||||
|
||||
- name: Checkout
|
||||
uses: actions/checkout@v4
|
||||
|
||||
@@ -37,9 +70,13 @@ jobs:
|
||||
uses: docker/metadata-action@v5
|
||||
with:
|
||||
images: ${{ env.REGISTRY_IMAGE }}
|
||||
flavor: |
|
||||
latest=false
|
||||
tags: |
|
||||
type=raw,value=${{ env.REGISTRY_IMAGE }}:${{ github.ref_name }}
|
||||
type=raw,value=${{ env.LATEST_TAG == 'true' && format('{0}:latest', env.REGISTRY_IMAGE) || '' }}
|
||||
type=raw,value=${{ github.ref_name }}
|
||||
type=raw,value=${{ steps.check_version.outputs.is_stable == 'true' && 'latest' || '' }}
|
||||
type=raw,value=${{ env.EDGE_TAG == 'true' && 'edge' || '' }}
|
||||
type=raw,value=${{ steps.check_version.outputs.is_beta == 'true' && 'beta' || '' }}
|
||||
|
||||
- name: Set up Docker Buildx
|
||||
uses: docker/setup-buildx-action@v3
|
||||
@@ -52,7 +89,7 @@ jobs:
|
||||
|
||||
- name: Build and push by digest
|
||||
id: build
|
||||
uses: docker/build-push-action@v5
|
||||
uses: docker/build-push-action@v6
|
||||
with:
|
||||
context: ./web
|
||||
file: ./web/Dockerfile
|
||||
@@ -62,11 +99,13 @@ jobs:
|
||||
ONYX_VERSION=${{ github.ref_name }}
|
||||
NODE_OPTIONS=--max-old-space-size=8192
|
||||
|
||||
# needed due to weird interactions with the builds for different platforms
|
||||
no-cache: true
|
||||
labels: ${{ steps.meta.outputs.labels }}
|
||||
outputs: type=image,name=${{ env.REGISTRY_IMAGE }},push-by-digest=true,name-canonical=true,push=true
|
||||
|
||||
cache-from: type=s3,prefix=cache/${{ github.repository }}/${{ env.DEPLOYMENT }}/web-${{ env.PLATFORM_PAIR }}/,region=${{ env.RUNS_ON_AWS_REGION }},bucket=${{ env.RUNS_ON_S3_BUCKET_CACHE }}
|
||||
cache-to: type=s3,prefix=cache/${{ github.repository }}/${{ env.DEPLOYMENT }}/web-${{ env.PLATFORM_PAIR }}/,region=${{ env.RUNS_ON_AWS_REGION }},bucket=${{ env.RUNS_ON_S3_BUCKET_CACHE }},mode=max
|
||||
# no-cache needed due to weird interactions with the builds for different platforms
|
||||
# NOTE(rkuo): this may not be true any more with the proper cache prefixing by architecture - currently testing with it off
|
||||
|
||||
- name: Export digest
|
||||
run: |
|
||||
mkdir -p /tmp/digests
|
||||
@@ -76,21 +115,36 @@ jobs:
|
||||
- name: Upload digest
|
||||
uses: actions/upload-artifact@v4
|
||||
with:
|
||||
name: digests-${{ env.PLATFORM_PAIR }}
|
||||
name: web-digests-${{ env.PLATFORM_PAIR }}-${{ github.run_id }}
|
||||
path: /tmp/digests/*
|
||||
if-no-files-found: error
|
||||
retention-days: 1
|
||||
|
||||
merge:
|
||||
runs-on: ubuntu-latest
|
||||
needs:
|
||||
- build
|
||||
if: needs.precheck.outputs.should-run == 'true'
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- name: Check if stable release version
|
||||
id: check_version
|
||||
run: |
|
||||
if [[ "${{ github.ref_name }}" =~ ^v[0-9]+\.[0-9]+\.[0-9]+$ ]] && [[ "${{ github.ref_name }}" != *"cloud"* ]]; then
|
||||
echo "is_stable=true" >> $GITHUB_OUTPUT
|
||||
else
|
||||
echo "is_stable=false" >> $GITHUB_OUTPUT
|
||||
fi
|
||||
if [[ "${{ github.ref_name }}" =~ ^v[0-9]+\.[0-9]+\.[0-9]+-beta\.[0-9]+$ ]]; then
|
||||
echo "is_beta=true" >> $GITHUB_OUTPUT
|
||||
else
|
||||
echo "is_beta=false" >> $GITHUB_OUTPUT
|
||||
fi
|
||||
|
||||
- name: Download digests
|
||||
uses: actions/download-artifact@v4
|
||||
with:
|
||||
path: /tmp/digests
|
||||
pattern: digests-*
|
||||
pattern: web-digests-*-${{ github.run_id }}
|
||||
merge-multiple: true
|
||||
|
||||
- name: Set up Docker Buildx
|
||||
@@ -101,6 +155,13 @@ jobs:
|
||||
uses: docker/metadata-action@v5
|
||||
with:
|
||||
images: ${{ env.REGISTRY_IMAGE }}
|
||||
flavor: |
|
||||
latest=false
|
||||
tags: |
|
||||
type=raw,value=${{ github.ref_name }}
|
||||
type=raw,value=${{ steps.check_version.outputs.is_stable == 'true' && 'latest' || '' }}
|
||||
type=raw,value=${{ env.EDGE_TAG == 'true' && 'edge' || '' }}
|
||||
type=raw,value=${{ steps.check_version.outputs.is_beta == 'true' && 'beta' || '' }}
|
||||
|
||||
- name: Login to Docker Hub
|
||||
uses: docker/login-action@v3
|
||||
@@ -124,10 +185,20 @@ jobs:
|
||||
# https://github.com/aquasecurity/trivy/discussions/7538
|
||||
# https://github.com/aquasecurity/trivy-action/issues/389
|
||||
- name: Run Trivy vulnerability scanner
|
||||
uses: aquasecurity/trivy-action@master
|
||||
env:
|
||||
TRIVY_DB_REPOSITORY: "public.ecr.aws/aquasecurity/trivy-db:2"
|
||||
TRIVY_JAVA_DB_REPOSITORY: "public.ecr.aws/aquasecurity/trivy-java-db:1"
|
||||
uses: nick-fields/retry@v3
|
||||
with:
|
||||
image-ref: docker.io/${{ env.REGISTRY_IMAGE }}:${{ github.ref_name }}
|
||||
severity: "CRITICAL,HIGH"
|
||||
timeout_minutes: 30
|
||||
max_attempts: 3
|
||||
retry_wait_seconds: 10
|
||||
command: |
|
||||
docker run --rm -v $HOME/.cache/trivy:/root/.cache/trivy \
|
||||
-e TRIVY_DB_REPOSITORY="public.ecr.aws/aquasecurity/trivy-db:2" \
|
||||
-e TRIVY_JAVA_DB_REPOSITORY="public.ecr.aws/aquasecurity/trivy-java-db:1" \
|
||||
-e TRIVY_USERNAME="${{ secrets.DOCKER_USERNAME }}" \
|
||||
-e TRIVY_PASSWORD="${{ secrets.DOCKER_TOKEN }}" \
|
||||
aquasec/trivy@sha256:a22415a38938a56c379387a8163fcb0ce38b10ace73e593475d3658d578b2436 \
|
||||
image \
|
||||
--skip-version-check \
|
||||
--timeout 20m \
|
||||
--severity CRITICAL,HIGH \
|
||||
docker.io/${{ env.REGISTRY_IMAGE }}:${{ github.ref_name }}
|
||||
|
||||
41
.github/workflows/docker-tag-beta.yml
vendored
Normal file
41
.github/workflows/docker-tag-beta.yml
vendored
Normal file
@@ -0,0 +1,41 @@
|
||||
# This workflow is set up to be manually triggered via the GitHub Action tab.
|
||||
# Given a version, it will tag those backend and webserver images as "beta".
|
||||
|
||||
name: Tag Beta Version
|
||||
|
||||
on:
|
||||
workflow_dispatch:
|
||||
inputs:
|
||||
version:
|
||||
description: "The version (ie v1.0.0-beta.0) to tag as beta"
|
||||
required: true
|
||||
|
||||
jobs:
|
||||
tag:
|
||||
# See https://runs-on.com/runners/linux/
|
||||
# use a lower powered instance since this just does i/o to docker hub
|
||||
runs-on: [runs-on, runner=2cpu-linux-x64, "run-id=${{ github.run_id }}"]
|
||||
steps:
|
||||
- name: Set up Docker Buildx
|
||||
uses: docker/setup-buildx-action@v1
|
||||
|
||||
- name: Login to Docker Hub
|
||||
uses: docker/login-action@v1
|
||||
with:
|
||||
username: ${{ secrets.DOCKER_USERNAME }}
|
||||
password: ${{ secrets.DOCKER_TOKEN }}
|
||||
|
||||
- name: Enable Docker CLI experimental features
|
||||
run: echo "DOCKER_CLI_EXPERIMENTAL=enabled" >> $GITHUB_ENV
|
||||
|
||||
- name: Pull, Tag and Push Web Server Image
|
||||
run: |
|
||||
docker buildx imagetools create -t onyxdotapp/onyx-web-server:beta onyxdotapp/onyx-web-server:${{ github.event.inputs.version }}
|
||||
|
||||
- name: Pull, Tag and Push API Server Image
|
||||
run: |
|
||||
docker buildx imagetools create -t onyxdotapp/onyx-backend:beta onyxdotapp/onyx-backend:${{ github.event.inputs.version }}
|
||||
|
||||
- name: Pull, Tag and Push Model Server Image
|
||||
run: |
|
||||
docker buildx imagetools create -t onyxdotapp/onyx-model-server:beta onyxdotapp/onyx-model-server:${{ github.event.inputs.version }}
|
||||
4
.github/workflows/docker-tag-latest.yml
vendored
4
.github/workflows/docker-tag-latest.yml
vendored
@@ -35,3 +35,7 @@ jobs:
|
||||
- name: Pull, Tag and Push API Server Image
|
||||
run: |
|
||||
docker buildx imagetools create -t onyxdotapp/onyx-backend:latest onyxdotapp/onyx-backend:${{ github.event.inputs.version }}
|
||||
|
||||
- name: Pull, Tag and Push Model Server Image
|
||||
run: |
|
||||
docker buildx imagetools create -t onyxdotapp/onyx-model-server:latest onyxdotapp/onyx-model-server:${{ github.event.inputs.version }}
|
||||
|
||||
52
.github/workflows/helm-chart-releases.yml
vendored
Normal file
52
.github/workflows/helm-chart-releases.yml
vendored
Normal file
@@ -0,0 +1,52 @@
|
||||
name: Release Onyx Helm Charts
|
||||
|
||||
on:
|
||||
push:
|
||||
branches:
|
||||
- main
|
||||
|
||||
permissions: write-all
|
||||
|
||||
jobs:
|
||||
release:
|
||||
permissions:
|
||||
contents: write
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- name: Checkout
|
||||
uses: actions/checkout@v4
|
||||
with:
|
||||
fetch-depth: 0
|
||||
|
||||
- name: Install Helm CLI
|
||||
uses: azure/setup-helm@v4
|
||||
with:
|
||||
version: v3.12.1
|
||||
|
||||
- name: Add required Helm repositories
|
||||
run: |
|
||||
helm repo add ingress-nginx https://kubernetes.github.io/ingress-nginx
|
||||
helm repo add onyx-vespa https://onyx-dot-app.github.io/vespa-helm-charts
|
||||
helm repo add cloudnative-pg https://cloudnative-pg.github.io/charts
|
||||
helm repo add ot-container-kit https://ot-container-kit.github.io/helm-charts
|
||||
helm repo add minio https://charts.min.io/
|
||||
helm repo update
|
||||
|
||||
- name: Build chart dependencies
|
||||
run: |
|
||||
set -euo pipefail
|
||||
for chart_dir in deployment/helm/charts/*; do
|
||||
if [ -f "$chart_dir/Chart.yaml" ]; then
|
||||
echo "Building dependencies for $chart_dir"
|
||||
helm dependency build "$chart_dir"
|
||||
fi
|
||||
done
|
||||
|
||||
- name: Publish Helm charts to gh-pages
|
||||
uses: stefanprodan/helm-gh-pages@v1.7.0
|
||||
with:
|
||||
token: ${{ secrets.GITHUB_TOKEN }}
|
||||
charts_dir: deployment/helm/charts
|
||||
branch: gh-pages
|
||||
commit_username: ${{ github.actor }}
|
||||
commit_email: ${{ github.actor }}@users.noreply.github.com
|
||||
171
.github/workflows/hotfix-release-branches.yml
vendored
171
.github/workflows/hotfix-release-branches.yml
vendored
@@ -1,171 +0,0 @@
|
||||
# This workflow is intended to be manually triggered via the GitHub Action tab.
|
||||
# Given a hotfix branch, it will attempt to open a PR to all release branches and
|
||||
# by default auto merge them
|
||||
|
||||
name: Hotfix release branches
|
||||
|
||||
on:
|
||||
workflow_dispatch:
|
||||
inputs:
|
||||
hotfix_commit:
|
||||
description: "Hotfix commit hash"
|
||||
required: true
|
||||
hotfix_suffix:
|
||||
description: "Hotfix branch suffix (e.g. hotfix/v0.8-{suffix})"
|
||||
required: true
|
||||
release_branch_pattern:
|
||||
description: "Release branch pattern (regex)"
|
||||
required: true
|
||||
default: "release/.*"
|
||||
auto_merge:
|
||||
description: "Automatically merge the hotfix PRs"
|
||||
required: true
|
||||
type: choice
|
||||
default: "true"
|
||||
options:
|
||||
- true
|
||||
- false
|
||||
|
||||
jobs:
|
||||
hotfix_release_branches:
|
||||
permissions: write-all
|
||||
# See https://runs-on.com/runners/linux/
|
||||
# use a lower powered instance since this just does i/o to docker hub
|
||||
runs-on: [runs-on, runner=2cpu-linux-x64, "run-id=${{ github.run_id }}"]
|
||||
steps:
|
||||
# needs RKUO_DEPLOY_KEY for write access to merge PR's
|
||||
- name: Checkout Repository
|
||||
uses: actions/checkout@v4
|
||||
with:
|
||||
ssh-key: "${{ secrets.RKUO_DEPLOY_KEY }}"
|
||||
fetch-depth: 0
|
||||
|
||||
- name: Set up Git user
|
||||
run: |
|
||||
git config user.name "Richard Kuo [bot]"
|
||||
git config user.email "rkuo[bot]@onyx.app"
|
||||
|
||||
- name: Fetch All Branches
|
||||
run: |
|
||||
git fetch --all --prune
|
||||
|
||||
- name: Verify Hotfix Commit Exists
|
||||
run: |
|
||||
git rev-parse --verify "${{ github.event.inputs.hotfix_commit }}" || { echo "Commit not found: ${{ github.event.inputs.hotfix_commit }}"; exit 1; }
|
||||
|
||||
- name: Get Release Branches
|
||||
id: get_release_branches
|
||||
run: |
|
||||
BRANCHES=$(git branch -r | grep -E "${{ github.event.inputs.release_branch_pattern }}" | sed 's|origin/||' | tr -d ' ')
|
||||
if [ -z "$BRANCHES" ]; then
|
||||
echo "No release branches found matching pattern '${{ github.event.inputs.release_branch_pattern }}'."
|
||||
exit 1
|
||||
fi
|
||||
|
||||
echo "Found release branches:"
|
||||
echo "$BRANCHES"
|
||||
|
||||
# Join the branches into a single line separated by commas
|
||||
BRANCHES_JOINED=$(echo "$BRANCHES" | tr '\n' ',' | sed 's/,$//')
|
||||
|
||||
# Set the branches as an output
|
||||
echo "branches=$BRANCHES_JOINED" >> $GITHUB_OUTPUT
|
||||
|
||||
# notes on all the vagaries of wiring up automated PR's
|
||||
# https://github.com/peter-evans/create-pull-request/blob/main/docs/concepts-guidelines.md#triggering-further-workflow-runs
|
||||
# we must use a custom token for GH_TOKEN to trigger the subsequent PR checks
|
||||
- name: Create and Merge Pull Requests to Matching Release Branches
|
||||
env:
|
||||
HOTFIX_COMMIT: ${{ github.event.inputs.hotfix_commit }}
|
||||
HOTFIX_SUFFIX: ${{ github.event.inputs.hotfix_suffix }}
|
||||
AUTO_MERGE: ${{ github.event.inputs.auto_merge }}
|
||||
GH_TOKEN: ${{ secrets.RKUO_PERSONAL_ACCESS_TOKEN }}
|
||||
run: |
|
||||
# Get the branches from the previous step
|
||||
BRANCHES="${{ steps.get_release_branches.outputs.branches }}"
|
||||
|
||||
# Convert BRANCHES to an array
|
||||
IFS=$',' read -ra BRANCH_ARRAY <<< "$BRANCHES"
|
||||
|
||||
# Loop through each release branch and create and merge a PR
|
||||
for RELEASE_BRANCH in "${BRANCH_ARRAY[@]}"; do
|
||||
echo "Processing $RELEASE_BRANCH..."
|
||||
|
||||
# Parse out the release version by removing "release/" from the branch name
|
||||
RELEASE_VERSION=${RELEASE_BRANCH#release/}
|
||||
echo "Release version parsed: $RELEASE_VERSION"
|
||||
|
||||
HOTFIX_BRANCH="hotfix/${RELEASE_VERSION}-${HOTFIX_SUFFIX}"
|
||||
echo "Creating PR from $HOTFIX_BRANCH to $RELEASE_BRANCH"
|
||||
|
||||
# Checkout the release branch
|
||||
echo "Checking out $RELEASE_BRANCH"
|
||||
git checkout "$RELEASE_BRANCH"
|
||||
|
||||
# Create the new hotfix branch
|
||||
if git rev-parse --verify "$HOTFIX_BRANCH" >/dev/null 2>&1; then
|
||||
echo "Hotfix branch $HOTFIX_BRANCH already exists. Skipping branch creation."
|
||||
else
|
||||
echo "Branching $RELEASE_BRANCH to $HOTFIX_BRANCH"
|
||||
git checkout -b "$HOTFIX_BRANCH"
|
||||
fi
|
||||
|
||||
# Check if the hotfix commit is a merge commit
|
||||
if git rev-list --merges -n 1 "$HOTFIX_COMMIT" >/dev/null 2>&1; then
|
||||
# -m 1 uses the target branch as the base (which is what we want)
|
||||
echo "Hotfix commit $HOTFIX_COMMIT is a merge commit, using -m 1 for cherry-pick"
|
||||
CHERRY_PICK_CMD="git cherry-pick -m 1 $HOTFIX_COMMIT"
|
||||
else
|
||||
CHERRY_PICK_CMD="git cherry-pick $HOTFIX_COMMIT"
|
||||
fi
|
||||
|
||||
# Perform the cherry-pick
|
||||
echo "Executing: $CHERRY_PICK_CMD"
|
||||
eval "$CHERRY_PICK_CMD"
|
||||
|
||||
if [ $? -ne 0 ]; then
|
||||
echo "Cherry-pick failed for $HOTFIX_COMMIT on $HOTFIX_BRANCH. Aborting..."
|
||||
git cherry-pick --abort
|
||||
continue
|
||||
fi
|
||||
|
||||
# Push the hotfix branch to the remote
|
||||
echo "Pushing $HOTFIX_BRANCH..."
|
||||
git push origin "$HOTFIX_BRANCH"
|
||||
echo "Hotfix branch $HOTFIX_BRANCH created and pushed."
|
||||
|
||||
# Check if PR already exists
|
||||
EXISTING_PR=$(gh pr list --head "$HOTFIX_BRANCH" --base "$RELEASE_BRANCH" --state open --json number --jq '.[0].number')
|
||||
|
||||
if [ -n "$EXISTING_PR" ]; then
|
||||
echo "An open PR already exists: #$EXISTING_PR. Skipping..."
|
||||
continue
|
||||
fi
|
||||
|
||||
# Create a new PR and capture the output
|
||||
PR_OUTPUT=$(gh pr create --title "Merge $HOTFIX_BRANCH into $RELEASE_BRANCH" \
|
||||
--body "Automated PR to merge \`$HOTFIX_BRANCH\` into \`$RELEASE_BRANCH\`." \
|
||||
--head "$HOTFIX_BRANCH" --base "$RELEASE_BRANCH")
|
||||
|
||||
# Extract the URL from the output
|
||||
PR_URL=$(echo "$PR_OUTPUT" | grep -Eo 'https://github.com/[^ ]+')
|
||||
echo "Pull request created: $PR_URL"
|
||||
|
||||
# Extract PR number from URL
|
||||
PR_NUMBER=$(basename "$PR_URL")
|
||||
echo "Pull request created: $PR_NUMBER"
|
||||
|
||||
if [ "$AUTO_MERGE" == "true" ]; then
|
||||
echo "Attempting to merge pull request #$PR_NUMBER"
|
||||
|
||||
# Attempt to merge the PR
|
||||
gh pr merge "$PR_NUMBER" --merge --auto --delete-branch
|
||||
|
||||
if [ $? -eq 0 ]; then
|
||||
echo "Pull request #$PR_NUMBER merged successfully."
|
||||
else
|
||||
# Optionally, handle the error or continue
|
||||
echo "Failed to merge pull request #$PR_NUMBER."
|
||||
fi
|
||||
fi
|
||||
done
|
||||
124
.github/workflows/pr-backport-autotrigger.yml
vendored
124
.github/workflows/pr-backport-autotrigger.yml
vendored
@@ -1,124 +0,0 @@
|
||||
name: Backport on Merge
|
||||
|
||||
# Note this workflow does not trigger the builds, be sure to manually tag the branches to trigger the builds
|
||||
|
||||
on:
|
||||
pull_request:
|
||||
types: [closed] # Later we check for merge so only PRs that go in can get backported
|
||||
|
||||
permissions:
|
||||
contents: write
|
||||
actions: write
|
||||
|
||||
jobs:
|
||||
backport:
|
||||
if: github.event.pull_request.merged == true
|
||||
runs-on: ubuntu-latest
|
||||
env:
|
||||
GITHUB_TOKEN: ${{ secrets.YUHONG_GH_ACTIONS }}
|
||||
steps:
|
||||
- name: Checkout code
|
||||
uses: actions/checkout@v4
|
||||
with:
|
||||
ssh-key: "${{ secrets.RKUO_DEPLOY_KEY }}"
|
||||
fetch-depth: 0
|
||||
|
||||
- name: Set up Git user
|
||||
run: |
|
||||
git config user.name "Richard Kuo [bot]"
|
||||
git config user.email "rkuo[bot]@onyx.app"
|
||||
git fetch --prune
|
||||
|
||||
- name: Check for Backport Checkbox
|
||||
id: checkbox-check
|
||||
run: |
|
||||
PR_BODY="${{ github.event.pull_request.body }}"
|
||||
if [[ "$PR_BODY" == *"[x] This PR should be backported"* ]]; then
|
||||
echo "backport=true" >> $GITHUB_OUTPUT
|
||||
else
|
||||
echo "backport=false" >> $GITHUB_OUTPUT
|
||||
fi
|
||||
|
||||
- name: List and sort release branches
|
||||
id: list-branches
|
||||
run: |
|
||||
git fetch --all --tags
|
||||
BRANCHES=$(git for-each-ref --format='%(refname:short)' refs/remotes/origin/release/* | sed 's|origin/release/||' | sort -Vr)
|
||||
BETA=$(echo "$BRANCHES" | head -n 1)
|
||||
STABLE=$(echo "$BRANCHES" | head -n 2 | tail -n 1)
|
||||
echo "beta=release/$BETA" >> $GITHUB_OUTPUT
|
||||
echo "stable=release/$STABLE" >> $GITHUB_OUTPUT
|
||||
# Fetch latest tags for beta and stable
|
||||
LATEST_BETA_TAG=$(git tag -l "v[0-9]*.[0-9]*.[0-9]*-beta.[0-9]*" | grep -E "^v[0-9]+\.[0-9]+\.[0-9]+-beta\.[0-9]+$" | grep -v -- "-cloud" | sort -Vr | head -n 1)
|
||||
LATEST_STABLE_TAG=$(git tag -l "v[0-9]*.[0-9]*.[0-9]*" | grep -E "^v[0-9]+\.[0-9]+\.[0-9]+$" | sort -Vr | head -n 1)
|
||||
|
||||
# Handle case where no beta tags exist
|
||||
if [[ -z "$LATEST_BETA_TAG" ]]; then
|
||||
NEW_BETA_TAG="v1.0.0-beta.1"
|
||||
else
|
||||
NEW_BETA_TAG=$(echo $LATEST_BETA_TAG | awk -F '[.-]' '{print $1 "." $2 "." $3 "-beta." ($NF+1)}')
|
||||
fi
|
||||
|
||||
# Increment latest stable tag
|
||||
NEW_STABLE_TAG=$(echo $LATEST_STABLE_TAG | awk -F '.' '{print $1 "." $2 "." ($3+1)}')
|
||||
echo "latest_beta_tag=$LATEST_BETA_TAG" >> $GITHUB_OUTPUT
|
||||
echo "latest_stable_tag=$LATEST_STABLE_TAG" >> $GITHUB_OUTPUT
|
||||
echo "new_beta_tag=$NEW_BETA_TAG" >> $GITHUB_OUTPUT
|
||||
echo "new_stable_tag=$NEW_STABLE_TAG" >> $GITHUB_OUTPUT
|
||||
|
||||
- name: Echo branch and tag information
|
||||
run: |
|
||||
echo "Beta branch: ${{ steps.list-branches.outputs.beta }}"
|
||||
echo "Stable branch: ${{ steps.list-branches.outputs.stable }}"
|
||||
echo "Latest beta tag: ${{ steps.list-branches.outputs.latest_beta_tag }}"
|
||||
echo "Latest stable tag: ${{ steps.list-branches.outputs.latest_stable_tag }}"
|
||||
echo "New beta tag: ${{ steps.list-branches.outputs.new_beta_tag }}"
|
||||
echo "New stable tag: ${{ steps.list-branches.outputs.new_stable_tag }}"
|
||||
|
||||
- name: Trigger Backport
|
||||
if: steps.checkbox-check.outputs.backport == 'true'
|
||||
run: |
|
||||
set -e
|
||||
echo "Backporting to beta ${{ steps.list-branches.outputs.beta }} and stable ${{ steps.list-branches.outputs.stable }}"
|
||||
|
||||
# Echo the merge commit SHA
|
||||
echo "Merge commit SHA: ${{ github.event.pull_request.merge_commit_sha }}"
|
||||
|
||||
# Fetch all history for all branches and tags
|
||||
git fetch --prune
|
||||
|
||||
# Reset and prepare the beta branch
|
||||
git checkout ${{ steps.list-branches.outputs.beta }}
|
||||
echo "Last 5 commits on beta branch:"
|
||||
git log -n 5 --pretty=format:"%H"
|
||||
echo "" # Newline for formatting
|
||||
|
||||
# Cherry-pick the merge commit from the merged PR
|
||||
git cherry-pick -m 1 ${{ github.event.pull_request.merge_commit_sha }} || {
|
||||
echo "Cherry-pick to beta failed due to conflicts."
|
||||
exit 1
|
||||
}
|
||||
|
||||
# Create new beta branch/tag
|
||||
git tag ${{ steps.list-branches.outputs.new_beta_tag }}
|
||||
# Push the changes and tag to the beta branch using PAT
|
||||
git push origin ${{ steps.list-branches.outputs.beta }}
|
||||
git push origin ${{ steps.list-branches.outputs.new_beta_tag }}
|
||||
|
||||
# Reset and prepare the stable branch
|
||||
git checkout ${{ steps.list-branches.outputs.stable }}
|
||||
echo "Last 5 commits on stable branch:"
|
||||
git log -n 5 --pretty=format:"%H"
|
||||
echo "" # Newline for formatting
|
||||
|
||||
# Cherry-pick the merge commit from the merged PR
|
||||
git cherry-pick -m 1 ${{ github.event.pull_request.merge_commit_sha }} || {
|
||||
echo "Cherry-pick to stable failed due to conflicts."
|
||||
exit 1
|
||||
}
|
||||
|
||||
# Create new stable branch/tag
|
||||
git tag ${{ steps.list-branches.outputs.new_stable_tag }}
|
||||
# Push the changes and tag to the stable branch using PAT
|
||||
git push origin ${{ steps.list-branches.outputs.stable }}
|
||||
git push origin ${{ steps.list-branches.outputs.new_stable_tag }}
|
||||
110
.github/workflows/pr-external-dependency-unit-tests.yml
vendored
Normal file
110
.github/workflows/pr-external-dependency-unit-tests.yml
vendored
Normal file
@@ -0,0 +1,110 @@
|
||||
name: External Dependency Unit Tests
|
||||
|
||||
on:
|
||||
merge_group:
|
||||
pull_request:
|
||||
branches: [main]
|
||||
|
||||
env:
|
||||
# AWS
|
||||
S3_AWS_ACCESS_KEY_ID: ${{ secrets.S3_AWS_ACCESS_KEY_ID }}
|
||||
S3_AWS_SECRET_ACCESS_KEY: ${{ secrets.S3_AWS_SECRET_ACCESS_KEY }}
|
||||
|
||||
# MinIO
|
||||
S3_ENDPOINT_URL: "http://localhost:9004"
|
||||
|
||||
# Confluence
|
||||
CONFLUENCE_TEST_SPACE_URL: ${{ vars.CONFLUENCE_TEST_SPACE_URL }}
|
||||
CONFLUENCE_TEST_SPACE: ${{ vars.CONFLUENCE_TEST_SPACE }}
|
||||
CONFLUENCE_TEST_PAGE_ID: ${{ secrets.CONFLUENCE_TEST_PAGE_ID }}
|
||||
CONFLUENCE_USER_NAME: ${{ vars.CONFLUENCE_USER_NAME }}
|
||||
CONFLUENCE_ACCESS_TOKEN: ${{ secrets.CONFLUENCE_ACCESS_TOKEN }}
|
||||
CONFLUENCE_ACCESS_TOKEN_SCOPED: ${{ secrets.CONFLUENCE_ACCESS_TOKEN_SCOPED }}
|
||||
|
||||
# LLMs
|
||||
OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
|
||||
ANTHROPIC_API_KEY: ${{ secrets.ANTHROPIC_API_KEY }}
|
||||
|
||||
jobs:
|
||||
discover-test-dirs:
|
||||
runs-on: ubuntu-latest
|
||||
outputs:
|
||||
test-dirs: ${{ steps.set-matrix.outputs.test-dirs }}
|
||||
steps:
|
||||
- name: Checkout code
|
||||
uses: actions/checkout@v4
|
||||
|
||||
- name: Discover test directories
|
||||
id: set-matrix
|
||||
run: |
|
||||
# Find all subdirectories in backend/tests/external_dependency_unit
|
||||
dirs=$(find backend/tests/external_dependency_unit -mindepth 1 -maxdepth 1 -type d -exec basename {} \; | sort | jq -R -s -c 'split("\n")[:-1]')
|
||||
echo "test-dirs=$dirs" >> $GITHUB_OUTPUT
|
||||
|
||||
external-dependency-unit-tests:
|
||||
needs: discover-test-dirs
|
||||
# Use larger runner with more resources for Vespa
|
||||
runs-on: [runs-on, runner=16cpu-linux-x64, "run-id=${{ github.run_id }}"]
|
||||
|
||||
strategy:
|
||||
fail-fast: false
|
||||
matrix:
|
||||
test-dir: ${{ fromJson(needs.discover-test-dirs.outputs.test-dirs) }}
|
||||
|
||||
env:
|
||||
PYTHONPATH: ./backend
|
||||
MODEL_SERVER_HOST: "disabled"
|
||||
|
||||
steps:
|
||||
- name: Checkout code
|
||||
uses: actions/checkout@v4
|
||||
|
||||
- name: Set up Python
|
||||
uses: actions/setup-python@v5
|
||||
with:
|
||||
python-version: "3.11"
|
||||
cache: "pip"
|
||||
cache-dependency-path: |
|
||||
backend/requirements/default.txt
|
||||
backend/requirements/dev.txt
|
||||
|
||||
- name: Install Dependencies
|
||||
run: |
|
||||
python -m pip install --upgrade pip
|
||||
pip install --retries 5 --timeout 30 -r backend/requirements/default.txt
|
||||
pip install --retries 5 --timeout 30 -r backend/requirements/dev.txt
|
||||
playwright install chromium
|
||||
playwright install-deps chromium
|
||||
|
||||
- name: Set up Standard Dependencies
|
||||
run: |
|
||||
cd deployment/docker_compose
|
||||
docker compose -f docker-compose.yml -f docker-compose.dev.yml up -d minio relational_db cache index
|
||||
|
||||
- name: Wait for services
|
||||
run: |
|
||||
echo "Waiting for services to be ready..."
|
||||
sleep 30
|
||||
|
||||
# Wait for Vespa specifically
|
||||
echo "Waiting for Vespa to be ready..."
|
||||
timeout 300 bash -c 'until curl -f -s http://localhost:8081/ApplicationStatus > /dev/null 2>&1; do echo "Vespa not ready, waiting..."; sleep 10; done' || echo "Vespa timeout - continuing anyway"
|
||||
|
||||
echo "Services should be ready now"
|
||||
|
||||
- name: Run migrations
|
||||
run: |
|
||||
cd backend
|
||||
# Run migrations to head
|
||||
alembic upgrade head
|
||||
alembic heads --verbose
|
||||
|
||||
- name: Run Tests for ${{ matrix.test-dir }}
|
||||
shell: script -q -e -c "bash --noprofile --norc -eo pipefail {0}"
|
||||
run: |
|
||||
py.test \
|
||||
--durations=8 \
|
||||
-o junit_family=xunit2 \
|
||||
-xv \
|
||||
--ff \
|
||||
backend/tests/external_dependency_unit/${{ matrix.test-dir }}
|
||||
180
.github/workflows/pr-helm-chart-testing.yml
vendored
180
.github/workflows/pr-helm-chart-testing.yml
vendored
@@ -19,9 +19,9 @@ jobs:
|
||||
fetch-depth: 0
|
||||
|
||||
- name: Set up Helm
|
||||
uses: azure/setup-helm@v4.2.0
|
||||
uses: azure/setup-helm@v4.3.1
|
||||
with:
|
||||
version: v3.17.0
|
||||
version: v3.19.0
|
||||
|
||||
- name: Set up chart-testing
|
||||
uses: helm/chart-testing-action@v2.7.0
|
||||
@@ -37,6 +37,11 @@ jobs:
|
||||
echo "changed=true" >> "$GITHUB_OUTPUT"
|
||||
fi
|
||||
|
||||
# uncomment to force run chart-testing
|
||||
# - name: Force run chart-testing (list-changed)
|
||||
# id: list-changed
|
||||
# run: echo "changed=true" >> $GITHUB_OUTPUT
|
||||
|
||||
# lint all charts if any changes were detected
|
||||
- name: Run chart-testing (lint)
|
||||
if: steps.list-changed.outputs.changed == 'true'
|
||||
@@ -48,9 +53,176 @@ jobs:
|
||||
if: steps.list-changed.outputs.changed == 'true'
|
||||
uses: helm/kind-action@v1.12.0
|
||||
|
||||
- name: Run chart-testing (install)
|
||||
- name: Pre-install cluster status check
|
||||
if: steps.list-changed.outputs.changed == 'true'
|
||||
run: ct install --all --helm-extra-set-args="--set=nginx.enabled=false" --debug --config ct.yaml
|
||||
run: |
|
||||
echo "=== Pre-install Cluster Status ==="
|
||||
kubectl get nodes -o wide
|
||||
kubectl get pods --all-namespaces
|
||||
kubectl get storageclass
|
||||
|
||||
- name: Add Helm repositories and update
|
||||
if: steps.list-changed.outputs.changed == 'true'
|
||||
run: |
|
||||
echo "=== Adding Helm repositories ==="
|
||||
helm repo add ingress-nginx https://kubernetes.github.io/ingress-nginx
|
||||
helm repo add vespa https://onyx-dot-app.github.io/vespa-helm-charts
|
||||
helm repo add cloudnative-pg https://cloudnative-pg.github.io/charts
|
||||
helm repo add ot-container-kit https://ot-container-kit.github.io/helm-charts
|
||||
helm repo add minio https://charts.min.io/
|
||||
helm repo update
|
||||
|
||||
- name: Install Redis operator
|
||||
if: steps.list-changed.outputs.changed == 'true'
|
||||
shell: bash
|
||||
run: |
|
||||
echo "=== Installing redis-operator CRDs ==="
|
||||
helm upgrade --install redis-operator ot-container-kit/redis-operator \
|
||||
--namespace redis-operator --create-namespace --wait --timeout 300s
|
||||
|
||||
- name: Pre-pull required images
|
||||
if: steps.list-changed.outputs.changed == 'true'
|
||||
run: |
|
||||
echo "=== Pre-pulling required images to avoid timeout ==="
|
||||
KIND_CLUSTER=$(kubectl config current-context | sed 's/kind-//')
|
||||
echo "Kind cluster: $KIND_CLUSTER"
|
||||
|
||||
IMAGES=(
|
||||
"ghcr.io/cloudnative-pg/cloudnative-pg:1.27.0"
|
||||
"quay.io/opstree/redis:v7.0.15"
|
||||
"docker.io/onyxdotapp/onyx-web-server:latest"
|
||||
)
|
||||
|
||||
for image in "${IMAGES[@]}"; do
|
||||
echo "Pre-pulling $image"
|
||||
if docker pull "$image"; then
|
||||
kind load docker-image "$image" --name "$KIND_CLUSTER" || echo "Failed to load $image into kind"
|
||||
else
|
||||
echo "Failed to pull $image"
|
||||
fi
|
||||
done
|
||||
|
||||
echo "=== Images loaded into Kind cluster ==="
|
||||
docker exec "$KIND_CLUSTER"-control-plane crictl images | grep -E "(cloudnative-pg|redis|onyx)" || echo "Some images may still be loading..."
|
||||
|
||||
- name: Validate chart dependencies
|
||||
if: steps.list-changed.outputs.changed == 'true'
|
||||
run: |
|
||||
echo "=== Validating chart dependencies ==="
|
||||
cd deployment/helm/charts/onyx
|
||||
helm dependency update
|
||||
helm lint .
|
||||
|
||||
- name: Run chart-testing (install) with enhanced monitoring
|
||||
timeout-minutes: 25
|
||||
if: steps.list-changed.outputs.changed == 'true'
|
||||
run: |
|
||||
echo "=== Starting chart installation with monitoring ==="
|
||||
|
||||
# Function to monitor cluster state
|
||||
monitor_cluster() {
|
||||
while true; do
|
||||
echo "=== Cluster Status Check at $(date) ==="
|
||||
# Only show non-running pods to reduce noise
|
||||
NON_RUNNING_PODS=$(kubectl get pods --all-namespaces --field-selector=status.phase!=Running,status.phase!=Succeeded --no-headers 2>/dev/null | wc -l)
|
||||
if [ "$NON_RUNNING_PODS" -gt 0 ]; then
|
||||
echo "Non-running pods:"
|
||||
kubectl get pods --all-namespaces --field-selector=status.phase!=Running,status.phase!=Succeeded
|
||||
else
|
||||
echo "All pods running successfully"
|
||||
fi
|
||||
# Only show recent events if there are issues
|
||||
RECENT_EVENTS=$(kubectl get events --sort-by=.lastTimestamp --all-namespaces --field-selector=type!=Normal 2>/dev/null | tail -5)
|
||||
if [ -n "$RECENT_EVENTS" ]; then
|
||||
echo "Recent warnings/errors:"
|
||||
echo "$RECENT_EVENTS"
|
||||
fi
|
||||
sleep 60
|
||||
done
|
||||
}
|
||||
|
||||
# Start monitoring in background
|
||||
monitor_cluster &
|
||||
MONITOR_PID=$!
|
||||
|
||||
# Set up cleanup
|
||||
cleanup() {
|
||||
echo "=== Cleaning up monitoring process ==="
|
||||
kill $MONITOR_PID 2>/dev/null || true
|
||||
echo "=== Final cluster state ==="
|
||||
kubectl get pods --all-namespaces
|
||||
kubectl get events --all-namespaces --sort-by=.lastTimestamp | tail -20
|
||||
}
|
||||
|
||||
# Trap cleanup on exit
|
||||
trap cleanup EXIT
|
||||
|
||||
# Run the actual installation with detailed logging
|
||||
echo "=== Starting ct install ==="
|
||||
set +e
|
||||
ct install --all \
|
||||
--helm-extra-set-args="\
|
||||
--set=nginx.enabled=false \
|
||||
--set=minio.enabled=false \
|
||||
--set=vespa.enabled=false \
|
||||
--set=slackbot.enabled=false \
|
||||
--set=postgresql.enabled=true \
|
||||
--set=postgresql.nameOverride=cloudnative-pg \
|
||||
--set=postgresql.cluster.storage.storageClass=standard \
|
||||
--set=redis.enabled=true \
|
||||
--set=redis.storageSpec.volumeClaimTemplate.spec.storageClassName=standard \
|
||||
--set=webserver.replicaCount=1 \
|
||||
--set=api.replicaCount=0 \
|
||||
--set=inferenceCapability.replicaCount=0 \
|
||||
--set=indexCapability.replicaCount=0 \
|
||||
--set=celery_beat.replicaCount=0 \
|
||||
--set=celery_worker_heavy.replicaCount=0 \
|
||||
--set=celery_worker_docfetching.replicaCount=0 \
|
||||
--set=celery_worker_docprocessing.replicaCount=0 \
|
||||
--set=celery_worker_light.replicaCount=0 \
|
||||
--set=celery_worker_monitoring.replicaCount=0 \
|
||||
--set=celery_worker_primary.replicaCount=0 \
|
||||
--set=celery_worker_user_file_processing.replicaCount=0 \
|
||||
--set=celery_worker_user_files_indexing.replicaCount=0" \
|
||||
--helm-extra-args="--timeout 900s --debug" \
|
||||
--debug --config ct.yaml
|
||||
CT_EXIT=$?
|
||||
set -e
|
||||
|
||||
if [[ $CT_EXIT -ne 0 ]]; then
|
||||
echo "ct install failed with exit code $CT_EXIT"
|
||||
exit $CT_EXIT
|
||||
else
|
||||
echo "=== Installation completed successfully ==="
|
||||
fi
|
||||
|
||||
kubectl get pods --all-namespaces
|
||||
|
||||
- name: Post-install verification
|
||||
if: steps.list-changed.outputs.changed == 'true'
|
||||
run: |
|
||||
echo "=== Post-install verification ==="
|
||||
kubectl get pods --all-namespaces
|
||||
kubectl get services --all-namespaces
|
||||
# Only show issues if they exist
|
||||
kubectl describe pods --all-namespaces | grep -A 5 -B 2 "Failed\|Error\|Warning" || echo "No pod issues found"
|
||||
|
||||
- name: Cleanup on failure
|
||||
if: failure() && steps.list-changed.outputs.changed == 'true'
|
||||
run: |
|
||||
echo "=== Cleanup on failure ==="
|
||||
echo "=== Final cluster state ==="
|
||||
kubectl get pods --all-namespaces
|
||||
kubectl get events --all-namespaces --sort-by=.lastTimestamp | tail -10
|
||||
|
||||
echo "=== Pod descriptions for debugging ==="
|
||||
kubectl describe pods --all-namespaces | grep -A 10 -B 3 "Failed\|Error\|Warning\|Pending" || echo "No problematic pods found"
|
||||
|
||||
echo "=== Recent logs for debugging ==="
|
||||
kubectl logs --all-namespaces --tail=50 | grep -i "error\|timeout\|failed\|pull" || echo "No error logs found"
|
||||
|
||||
echo "=== Helm releases ==="
|
||||
helm list --all-namespaces
|
||||
# the following would install only changed charts, but we only have one chart so
|
||||
# don't worry about that for now
|
||||
# run: ct install --target-branch ${{ github.event.repository.default_branch }}
|
||||
|
||||
548
.github/workflows/pr-integration-tests.yml
vendored
548
.github/workflows/pr-integration-tests.yml
vendored
@@ -11,142 +11,207 @@ on:
|
||||
- "release/**"
|
||||
|
||||
env:
|
||||
# Private Registry Configuration
|
||||
PRIVATE_REGISTRY: experimental-registry.blacksmith.sh:5000
|
||||
PRIVATE_REGISTRY_USERNAME: ${{ secrets.PRIVATE_REGISTRY_USERNAME }}
|
||||
PRIVATE_REGISTRY_PASSWORD: ${{ secrets.PRIVATE_REGISTRY_PASSWORD }}
|
||||
|
||||
# Test Environment Variables
|
||||
OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
|
||||
SLACK_BOT_TOKEN: ${{ secrets.SLACK_BOT_TOKEN }}
|
||||
CONFLUENCE_TEST_SPACE_URL: ${{ secrets.CONFLUENCE_TEST_SPACE_URL }}
|
||||
CONFLUENCE_USER_NAME: ${{ secrets.CONFLUENCE_USER_NAME }}
|
||||
CONFLUENCE_TEST_SPACE_URL: ${{ vars.CONFLUENCE_TEST_SPACE_URL }}
|
||||
CONFLUENCE_USER_NAME: ${{ vars.CONFLUENCE_USER_NAME }}
|
||||
CONFLUENCE_ACCESS_TOKEN: ${{ secrets.CONFLUENCE_ACCESS_TOKEN }}
|
||||
CONFLUENCE_ACCESS_TOKEN_SCOPED: ${{ secrets.CONFLUENCE_ACCESS_TOKEN_SCOPED }}
|
||||
JIRA_BASE_URL: ${{ secrets.JIRA_BASE_URL }}
|
||||
JIRA_USER_EMAIL: ${{ secrets.JIRA_USER_EMAIL }}
|
||||
JIRA_API_TOKEN: ${{ secrets.JIRA_API_TOKEN }}
|
||||
JIRA_API_TOKEN_SCOPED: ${{ secrets.JIRA_API_TOKEN_SCOPED }}
|
||||
PERM_SYNC_SHAREPOINT_CLIENT_ID: ${{ secrets.PERM_SYNC_SHAREPOINT_CLIENT_ID }}
|
||||
PERM_SYNC_SHAREPOINT_PRIVATE_KEY: ${{ secrets.PERM_SYNC_SHAREPOINT_PRIVATE_KEY }}
|
||||
PERM_SYNC_SHAREPOINT_CERTIFICATE_PASSWORD: ${{ secrets.PERM_SYNC_SHAREPOINT_CERTIFICATE_PASSWORD }}
|
||||
PERM_SYNC_SHAREPOINT_DIRECTORY_ID: ${{ secrets.PERM_SYNC_SHAREPOINT_DIRECTORY_ID }}
|
||||
EXA_API_KEY: ${{ secrets.EXA_API_KEY }}
|
||||
|
||||
jobs:
|
||||
integration-tests:
|
||||
# See https://runs-on.com/runners/linux/
|
||||
runs-on: [runs-on, runner=32cpu-linux-x64, "run-id=${{ github.run_id }}"]
|
||||
discover-test-dirs:
|
||||
runs-on: blacksmith-2vcpu-ubuntu-2404-arm
|
||||
outputs:
|
||||
test-dirs: ${{ steps.set-matrix.outputs.test-dirs }}
|
||||
steps:
|
||||
- name: Checkout code
|
||||
uses: actions/checkout@v4
|
||||
|
||||
- name: Set up Docker Buildx
|
||||
uses: docker/setup-buildx-action@v3
|
||||
- name: Discover test directories
|
||||
id: set-matrix
|
||||
run: |
|
||||
# Find all leaf-level directories in both test directories
|
||||
tests_dirs=$(find backend/tests/integration/tests -mindepth 1 -maxdepth 1 -type d ! -name "__pycache__" -exec basename {} \; | sort)
|
||||
connector_dirs=$(find backend/tests/integration/connector_job_tests -mindepth 1 -maxdepth 1 -type d ! -name "__pycache__" -exec basename {} \; | sort)
|
||||
|
||||
# Create JSON array with directory info
|
||||
all_dirs=""
|
||||
for dir in $tests_dirs; do
|
||||
all_dirs="$all_dirs{\"path\":\"tests/$dir\",\"name\":\"tests-$dir\"},"
|
||||
done
|
||||
for dir in $connector_dirs; do
|
||||
all_dirs="$all_dirs{\"path\":\"connector_job_tests/$dir\",\"name\":\"connector-$dir\"},"
|
||||
done
|
||||
|
||||
# Remove trailing comma and wrap in array
|
||||
all_dirs="[${all_dirs%,}]"
|
||||
echo "test-dirs=$all_dirs" >> $GITHUB_OUTPUT
|
||||
|
||||
prepare-build:
|
||||
runs-on: blacksmith-2vcpu-ubuntu-2404-arm
|
||||
steps:
|
||||
- name: Checkout code
|
||||
uses: actions/checkout@v4
|
||||
|
||||
- name: Prepare build
|
||||
uses: ./.github/actions/prepare-build
|
||||
|
||||
build-backend-image:
|
||||
runs-on: blacksmith-16vcpu-ubuntu-2404-arm
|
||||
steps:
|
||||
- name: Checkout code
|
||||
uses: actions/checkout@v4
|
||||
|
||||
- name: Login to Private Registry
|
||||
uses: docker/login-action@v3
|
||||
with:
|
||||
registry: ${{ env.PRIVATE_REGISTRY }}
|
||||
username: ${{ env.PRIVATE_REGISTRY_USERNAME }}
|
||||
password: ${{ env.PRIVATE_REGISTRY_PASSWORD }}
|
||||
|
||||
- name: Set up Docker Buildx
|
||||
uses: useblacksmith/setup-docker-builder@v1
|
||||
|
||||
- name: Build and push Backend Docker image
|
||||
uses: useblacksmith/build-push-action@v2
|
||||
with:
|
||||
context: ./backend
|
||||
file: ./backend/Dockerfile
|
||||
platforms: linux/arm64
|
||||
tags: ${{ env.PRIVATE_REGISTRY }}/integration-test-onyx-backend:test-${{ github.run_id }}
|
||||
push: true
|
||||
outputs: type=registry
|
||||
no-cache: true
|
||||
|
||||
|
||||
build-model-server-image:
|
||||
runs-on: blacksmith-16vcpu-ubuntu-2404-arm
|
||||
steps:
|
||||
- name: Checkout code
|
||||
uses: actions/checkout@v4
|
||||
|
||||
- name: Login to Private Registry
|
||||
uses: docker/login-action@v3
|
||||
with:
|
||||
registry: ${{ env.PRIVATE_REGISTRY }}
|
||||
username: ${{ env.PRIVATE_REGISTRY_USERNAME }}
|
||||
password: ${{ env.PRIVATE_REGISTRY_PASSWORD }}
|
||||
|
||||
- name: Set up Docker Buildx
|
||||
uses: useblacksmith/setup-docker-builder@v1
|
||||
|
||||
- name: Build and push Model Server Docker image
|
||||
uses: useblacksmith/build-push-action@v2
|
||||
with:
|
||||
context: ./backend
|
||||
file: ./backend/Dockerfile.model_server
|
||||
platforms: linux/arm64
|
||||
tags: ${{ env.PRIVATE_REGISTRY }}/integration-test-onyx-model-server:test-${{ github.run_id }}
|
||||
push: true
|
||||
outputs: type=registry
|
||||
provenance: false
|
||||
no-cache: ${{ vars.DOCKER_NO_CACHE == 'true' }}
|
||||
|
||||
|
||||
build-integration-image:
|
||||
needs: prepare-build
|
||||
runs-on: blacksmith-16vcpu-ubuntu-2404-arm
|
||||
steps:
|
||||
- name: Checkout code
|
||||
uses: actions/checkout@v4
|
||||
|
||||
- name: Login to Private Registry
|
||||
uses: docker/login-action@v3
|
||||
with:
|
||||
registry: ${{ env.PRIVATE_REGISTRY }}
|
||||
username: ${{ env.PRIVATE_REGISTRY_USERNAME }}
|
||||
password: ${{ env.PRIVATE_REGISTRY_PASSWORD }}
|
||||
|
||||
- name: Download OpenAPI artifacts
|
||||
uses: actions/download-artifact@v4
|
||||
with:
|
||||
name: openapi-artifacts
|
||||
path: backend/generated/
|
||||
|
||||
- name: Set up Docker Buildx
|
||||
uses: useblacksmith/setup-docker-builder@v1
|
||||
|
||||
- name: Build and push integration test image with Docker Bake
|
||||
env:
|
||||
REGISTRY: ${{ env.PRIVATE_REGISTRY }}
|
||||
TAG: test-${{ github.run_id }}
|
||||
run: cd backend && docker buildx bake --no-cache --push integration
|
||||
|
||||
integration-tests:
|
||||
needs:
|
||||
[
|
||||
discover-test-dirs,
|
||||
build-backend-image,
|
||||
build-model-server-image,
|
||||
build-integration-image,
|
||||
]
|
||||
runs-on: blacksmith-8vcpu-ubuntu-2404-arm
|
||||
|
||||
strategy:
|
||||
fail-fast: false
|
||||
matrix:
|
||||
test-dir: ${{ fromJson(needs.discover-test-dirs.outputs.test-dirs) }}
|
||||
|
||||
steps:
|
||||
- name: Checkout code
|
||||
uses: actions/checkout@v4
|
||||
|
||||
- name: Login to Private Registry
|
||||
uses: docker/login-action@v3
|
||||
with:
|
||||
registry: ${{ env.PRIVATE_REGISTRY }}
|
||||
username: ${{ env.PRIVATE_REGISTRY_USERNAME }}
|
||||
password: ${{ env.PRIVATE_REGISTRY_PASSWORD }}
|
||||
|
||||
# needed for pulling Vespa, Redis, Postgres, and Minio images
|
||||
# otherwise, we hit the "Unauthenticated users" limit
|
||||
# https://docs.docker.com/docker-hub/usage/
|
||||
- name: Login to Docker Hub
|
||||
uses: docker/login-action@v3
|
||||
with:
|
||||
username: ${{ secrets.DOCKER_USERNAME }}
|
||||
password: ${{ secrets.DOCKER_TOKEN }}
|
||||
|
||||
# tag every docker image with "test" so that we can spin up the correct set
|
||||
# of images during testing
|
||||
|
||||
# We don't need to build the Web Docker image since it's not yet used
|
||||
# in the integration tests. We have a separate action to verify that it builds
|
||||
# successfully.
|
||||
- name: Pull Web Docker image
|
||||
- name: Pull Docker images
|
||||
run: |
|
||||
docker pull onyxdotapp/onyx-web-server:latest
|
||||
docker tag onyxdotapp/onyx-web-server:latest onyxdotapp/onyx-web-server:test
|
||||
# Pull all images from registry in parallel
|
||||
echo "Pulling Docker images in parallel..."
|
||||
# Pull images from private registry
|
||||
(docker pull --platform linux/arm64 ${{ env.PRIVATE_REGISTRY }}/integration-test-onyx-backend:test-${{ github.run_id }}) &
|
||||
(docker pull --platform linux/arm64 ${{ env.PRIVATE_REGISTRY }}/integration-test-onyx-model-server:test-${{ github.run_id }}) &
|
||||
(docker pull --platform linux/arm64 ${{ env.PRIVATE_REGISTRY }}/integration-test-onyx-integration:test-${{ github.run_id }}) &
|
||||
|
||||
# we use the runs-on cache for docker builds
|
||||
# in conjunction with runs-on runners, it has better speed and unlimited caching
|
||||
# https://runs-on.com/caching/s3-cache-for-github-actions/
|
||||
# https://runs-on.com/caching/docker/
|
||||
# https://github.com/moby/buildkit#s3-cache-experimental
|
||||
# Wait for all background jobs to complete
|
||||
wait
|
||||
echo "All Docker images pulled successfully"
|
||||
|
||||
# images are built and run locally for testing purposes. Not pushed.
|
||||
- name: Build Backend Docker image
|
||||
uses: ./.github/actions/custom-build-and-push
|
||||
with:
|
||||
context: ./backend
|
||||
file: ./backend/Dockerfile
|
||||
platforms: linux/amd64
|
||||
tags: onyxdotapp/onyx-backend:test
|
||||
push: false
|
||||
load: true
|
||||
cache-from: type=s3,prefix=cache/${{ github.repository }}/integration-tests/backend/,region=${{ env.RUNS_ON_AWS_REGION }},bucket=${{ env.RUNS_ON_S3_BUCKET_CACHE }}
|
||||
cache-to: type=s3,prefix=cache/${{ github.repository }}/integration-tests/backend/,region=${{ env.RUNS_ON_AWS_REGION }},bucket=${{ env.RUNS_ON_S3_BUCKET_CACHE }},mode=max
|
||||
|
||||
- name: Build Model Server Docker image
|
||||
uses: ./.github/actions/custom-build-and-push
|
||||
with:
|
||||
context: ./backend
|
||||
file: ./backend/Dockerfile.model_server
|
||||
platforms: linux/amd64
|
||||
tags: onyxdotapp/onyx-model-server:test
|
||||
push: false
|
||||
load: true
|
||||
cache-from: type=s3,prefix=cache/${{ github.repository }}/integration-tests/model-server/,region=${{ env.RUNS_ON_AWS_REGION }},bucket=${{ env.RUNS_ON_S3_BUCKET_CACHE }}
|
||||
cache-to: type=s3,prefix=cache/${{ github.repository }}/integration-tests/model-server/,region=${{ env.RUNS_ON_AWS_REGION }},bucket=${{ env.RUNS_ON_S3_BUCKET_CACHE }},mode=max
|
||||
|
||||
- name: Build integration test Docker image
|
||||
uses: ./.github/actions/custom-build-and-push
|
||||
with:
|
||||
context: ./backend
|
||||
file: ./backend/tests/integration/Dockerfile
|
||||
platforms: linux/amd64
|
||||
tags: onyxdotapp/onyx-integration:test
|
||||
push: false
|
||||
load: true
|
||||
cache-from: type=s3,prefix=cache/${{ github.repository }}/integration-tests/integration/,region=${{ env.RUNS_ON_AWS_REGION }},bucket=${{ env.RUNS_ON_S3_BUCKET_CACHE }}
|
||||
cache-to: type=s3,prefix=cache/${{ github.repository }}/integration-tests/integration/,region=${{ env.RUNS_ON_AWS_REGION }},bucket=${{ env.RUNS_ON_S3_BUCKET_CACHE }},mode=max
|
||||
|
||||
# Start containers for multi-tenant tests
|
||||
- name: Start Docker containers for multi-tenant tests
|
||||
run: |
|
||||
cd deployment/docker_compose
|
||||
ENABLE_PAID_ENTERPRISE_EDITION_FEATURES=true \
|
||||
MULTI_TENANT=true \
|
||||
AUTH_TYPE=cloud \
|
||||
REQUIRE_EMAIL_VERIFICATION=false \
|
||||
DISABLE_TELEMETRY=true \
|
||||
IMAGE_TAG=test \
|
||||
DEV_MODE=true \
|
||||
docker compose -f docker-compose.multitenant-dev.yml -p onyx-stack up -d
|
||||
id: start_docker_multi_tenant
|
||||
|
||||
# In practice, `cloud` Auth type would require OAUTH credentials to be set.
|
||||
- name: Run Multi-Tenant Integration Tests
|
||||
run: |
|
||||
echo "Waiting for 3 minutes to ensure API server is ready..."
|
||||
sleep 180
|
||||
echo "Running integration tests..."
|
||||
docker run --rm --network onyx-stack_default \
|
||||
--name test-runner \
|
||||
-e POSTGRES_HOST=relational_db \
|
||||
-e POSTGRES_USER=postgres \
|
||||
-e POSTGRES_PASSWORD=password \
|
||||
-e POSTGRES_DB=postgres \
|
||||
-e POSTGRES_USE_NULL_POOL=true \
|
||||
-e VESPA_HOST=index \
|
||||
-e REDIS_HOST=cache \
|
||||
-e API_SERVER_HOST=api_server \
|
||||
-e OPENAI_API_KEY=${OPENAI_API_KEY} \
|
||||
-e SLACK_BOT_TOKEN=${SLACK_BOT_TOKEN} \
|
||||
-e TEST_WEB_HOSTNAME=test-runner \
|
||||
-e AUTH_TYPE=cloud \
|
||||
-e MULTI_TENANT=true \
|
||||
-e REQUIRE_EMAIL_VERIFICATION=false \
|
||||
-e DISABLE_TELEMETRY=true \
|
||||
-e IMAGE_TAG=test \
|
||||
-e DEV_MODE=true \
|
||||
onyxdotapp/onyx-integration:test \
|
||||
/app/tests/integration/multitenant_tests
|
||||
continue-on-error: true
|
||||
id: run_multitenant_tests
|
||||
|
||||
- name: Check multi-tenant test results
|
||||
run: |
|
||||
if [ ${{ steps.run_multitenant_tests.outcome }} == 'failure' ]; then
|
||||
echo "Multi-tenant integration tests failed. Exiting with error."
|
||||
exit 1
|
||||
else
|
||||
echo "All multi-tenant integration tests passed successfully."
|
||||
fi
|
||||
|
||||
- name: Stop multi-tenant Docker containers
|
||||
run: |
|
||||
cd deployment/docker_compose
|
||||
docker compose -f docker-compose.multitenant-dev.yml -p onyx-stack down -v
|
||||
# Re-tag to remove registry prefix for docker-compose
|
||||
docker tag ${{ env.PRIVATE_REGISTRY }}/integration-test-onyx-backend:test-${{ github.run_id }} onyxdotapp/onyx-backend:test
|
||||
docker tag ${{ env.PRIVATE_REGISTRY }}/integration-test-onyx-model-server:test-${{ github.run_id }} onyxdotapp/onyx-model-server:test
|
||||
docker tag ${{ env.PRIVATE_REGISTRY }}/integration-test-onyx-integration:test-${{ github.run_id }} onyxdotapp/onyx-integration:test
|
||||
|
||||
# NOTE: Use pre-ping/null pool to reduce flakiness due to dropped connections
|
||||
# NOTE: don't need web server for integration tests
|
||||
- name: Start Docker containers
|
||||
run: |
|
||||
cd deployment/docker_compose
|
||||
@@ -158,14 +223,24 @@ jobs:
|
||||
DISABLE_TELEMETRY=true \
|
||||
IMAGE_TAG=test \
|
||||
INTEGRATION_TESTS_MODE=true \
|
||||
docker compose -f docker-compose.dev.yml -p onyx-stack up -d
|
||||
CHECK_TTL_MANAGEMENT_TASK_FREQUENCY_IN_HOURS=0.001 \
|
||||
docker compose -f docker-compose.yml -f docker-compose.dev.yml up \
|
||||
relational_db \
|
||||
index \
|
||||
cache \
|
||||
minio \
|
||||
api_server \
|
||||
inference_model_server \
|
||||
indexing_model_server \
|
||||
background \
|
||||
-d
|
||||
id: start_docker
|
||||
|
||||
- name: Wait for service to be ready
|
||||
run: |
|
||||
echo "Starting wait-for-service script..."
|
||||
|
||||
docker logs -f onyx-stack-api_server-1 &
|
||||
docker logs -f onyx-api_server-1 &
|
||||
|
||||
start_time=$(date +%s)
|
||||
timeout=300 # 5 minutes in seconds
|
||||
@@ -201,43 +276,46 @@ jobs:
|
||||
docker compose -f docker-compose.mock-it-services.yml \
|
||||
-p mock-it-services-stack up -d
|
||||
|
||||
# NOTE: Use pre-ping/null to reduce flakiness due to dropped connections
|
||||
- name: Run Standard Integration Tests
|
||||
run: |
|
||||
echo "Running integration tests..."
|
||||
docker run --rm --network onyx-stack_default \
|
||||
--name test-runner \
|
||||
-e POSTGRES_HOST=relational_db \
|
||||
-e POSTGRES_USER=postgres \
|
||||
-e POSTGRES_PASSWORD=password \
|
||||
-e POSTGRES_DB=postgres \
|
||||
-e POSTGRES_POOL_PRE_PING=true \
|
||||
-e POSTGRES_USE_NULL_POOL=true \
|
||||
-e VESPA_HOST=index \
|
||||
-e REDIS_HOST=cache \
|
||||
-e API_SERVER_HOST=api_server \
|
||||
-e OPENAI_API_KEY=${OPENAI_API_KEY} \
|
||||
-e SLACK_BOT_TOKEN=${SLACK_BOT_TOKEN} \
|
||||
-e CONFLUENCE_TEST_SPACE_URL=${CONFLUENCE_TEST_SPACE_URL} \
|
||||
-e CONFLUENCE_USER_NAME=${CONFLUENCE_USER_NAME} \
|
||||
-e CONFLUENCE_ACCESS_TOKEN=${CONFLUENCE_ACCESS_TOKEN} \
|
||||
-e TEST_WEB_HOSTNAME=test-runner \
|
||||
-e MOCK_CONNECTOR_SERVER_HOST=mock_connector_server \
|
||||
-e MOCK_CONNECTOR_SERVER_PORT=8001 \
|
||||
onyxdotapp/onyx-integration:test \
|
||||
/app/tests/integration/tests \
|
||||
/app/tests/integration/connector_job_tests
|
||||
continue-on-error: true
|
||||
id: run_tests
|
||||
|
||||
- name: Check test results
|
||||
run: |
|
||||
if [ ${{ steps.run_tests.outcome }} == 'failure' ]; then
|
||||
echo "Integration tests failed. Exiting with error."
|
||||
exit 1
|
||||
else
|
||||
echo "All integration tests passed successfully."
|
||||
fi
|
||||
- name: Run Integration Tests for ${{ matrix.test-dir.name }}
|
||||
uses: nick-fields/retry@v3
|
||||
with:
|
||||
timeout_minutes: 20
|
||||
max_attempts: 3
|
||||
retry_wait_seconds: 10
|
||||
command: |
|
||||
echo "Running integration tests for ${{ matrix.test-dir.path }}..."
|
||||
docker run --rm --network onyx_default \
|
||||
--name test-runner \
|
||||
-e POSTGRES_HOST=relational_db \
|
||||
-e POSTGRES_USER=postgres \
|
||||
-e POSTGRES_PASSWORD=password \
|
||||
-e POSTGRES_DB=postgres \
|
||||
-e DB_READONLY_USER=db_readonly_user \
|
||||
-e DB_READONLY_PASSWORD=password \
|
||||
-e POSTGRES_POOL_PRE_PING=true \
|
||||
-e POSTGRES_USE_NULL_POOL=true \
|
||||
-e VESPA_HOST=index \
|
||||
-e REDIS_HOST=cache \
|
||||
-e API_SERVER_HOST=api_server \
|
||||
-e OPENAI_API_KEY=${OPENAI_API_KEY} \
|
||||
-e SLACK_BOT_TOKEN=${SLACK_BOT_TOKEN} \
|
||||
-e CONFLUENCE_TEST_SPACE_URL=${CONFLUENCE_TEST_SPACE_URL} \
|
||||
-e CONFLUENCE_USER_NAME=${CONFLUENCE_USER_NAME} \
|
||||
-e CONFLUENCE_ACCESS_TOKEN=${CONFLUENCE_ACCESS_TOKEN} \
|
||||
-e CONFLUENCE_ACCESS_TOKEN_SCOPED=${CONFLUENCE_ACCESS_TOKEN_SCOPED} \
|
||||
-e JIRA_BASE_URL=${JIRA_BASE_URL} \
|
||||
-e JIRA_USER_EMAIL=${JIRA_USER_EMAIL} \
|
||||
-e JIRA_API_TOKEN=${JIRA_API_TOKEN} \
|
||||
-e JIRA_API_TOKEN_SCOPED=${JIRA_API_TOKEN_SCOPED} \
|
||||
-e PERM_SYNC_SHAREPOINT_CLIENT_ID=${PERM_SYNC_SHAREPOINT_CLIENT_ID} \
|
||||
-e PERM_SYNC_SHAREPOINT_PRIVATE_KEY="${PERM_SYNC_SHAREPOINT_PRIVATE_KEY}" \
|
||||
-e PERM_SYNC_SHAREPOINT_CERTIFICATE_PASSWORD=${PERM_SYNC_SHAREPOINT_CERTIFICATE_PASSWORD} \
|
||||
-e PERM_SYNC_SHAREPOINT_DIRECTORY_ID=${PERM_SYNC_SHAREPOINT_DIRECTORY_ID} \
|
||||
-e TEST_WEB_HOSTNAME=test-runner \
|
||||
-e MOCK_CONNECTOR_SERVER_HOST=mock_connector_server \
|
||||
-e MOCK_CONNECTOR_SERVER_PORT=8001 \
|
||||
onyxdotapp/onyx-integration:test \
|
||||
/app/tests/integration/${{ matrix.test-dir.path }}
|
||||
|
||||
# ------------------------------------------------------------
|
||||
# Always gather logs BEFORE "down":
|
||||
@@ -245,19 +323,19 @@ jobs:
|
||||
if: always()
|
||||
run: |
|
||||
cd deployment/docker_compose
|
||||
docker compose -f docker-compose.dev.yml -p onyx-stack logs --no-color api_server > $GITHUB_WORKSPACE/api_server.log || true
|
||||
docker compose logs --no-color api_server > $GITHUB_WORKSPACE/api_server.log || true
|
||||
|
||||
- name: Dump all-container logs (optional)
|
||||
if: always()
|
||||
run: |
|
||||
cd deployment/docker_compose
|
||||
docker compose -f docker-compose.dev.yml -p onyx-stack logs --no-color > $GITHUB_WORKSPACE/docker-compose.log || true
|
||||
docker compose logs --no-color > $GITHUB_WORKSPACE/docker-compose.log || true
|
||||
|
||||
- name: Upload logs
|
||||
if: always()
|
||||
uses: actions/upload-artifact@v4
|
||||
with:
|
||||
name: docker-all-logs
|
||||
name: docker-all-logs-${{ matrix.test-dir.name }}
|
||||
path: ${{ github.workspace }}/docker-compose.log
|
||||
# ------------------------------------------------------------
|
||||
|
||||
@@ -265,4 +343,158 @@ jobs:
|
||||
if: always()
|
||||
run: |
|
||||
cd deployment/docker_compose
|
||||
docker compose -f docker-compose.dev.yml -p onyx-stack down -v
|
||||
docker compose down -v
|
||||
|
||||
|
||||
multitenant-tests:
|
||||
needs:
|
||||
[
|
||||
build-backend-image,
|
||||
build-model-server-image,
|
||||
build-integration-image,
|
||||
]
|
||||
runs-on: blacksmith-8vcpu-ubuntu-2404-arm
|
||||
|
||||
steps:
|
||||
- name: Checkout code
|
||||
uses: actions/checkout@v4
|
||||
|
||||
- name: Login to Private Registry
|
||||
uses: docker/login-action@v3
|
||||
with:
|
||||
registry: ${{ env.PRIVATE_REGISTRY }}
|
||||
username: ${{ env.PRIVATE_REGISTRY_USERNAME }}
|
||||
password: ${{ env.PRIVATE_REGISTRY_PASSWORD }}
|
||||
|
||||
- name: Login to Docker Hub
|
||||
uses: docker/login-action@v3
|
||||
with:
|
||||
username: ${{ secrets.DOCKER_USERNAME }}
|
||||
password: ${{ secrets.DOCKER_TOKEN }}
|
||||
|
||||
- name: Pull Docker images
|
||||
run: |
|
||||
(docker pull --platform linux/arm64 ${{ env.PRIVATE_REGISTRY }}/integration-test-onyx-backend:test-${{ github.run_id }}) &
|
||||
(docker pull --platform linux/arm64 ${{ env.PRIVATE_REGISTRY }}/integration-test-onyx-model-server:test-${{ github.run_id }}) &
|
||||
(docker pull --platform linux/arm64 ${{ env.PRIVATE_REGISTRY }}/integration-test-onyx-integration:test-${{ github.run_id }}) &
|
||||
wait
|
||||
docker tag ${{ env.PRIVATE_REGISTRY }}/integration-test-onyx-backend:test-${{ github.run_id }} onyxdotapp/onyx-backend:test
|
||||
docker tag ${{ env.PRIVATE_REGISTRY }}/integration-test-onyx-model-server:test-${{ github.run_id }} onyxdotapp/onyx-model-server:test
|
||||
docker tag ${{ env.PRIVATE_REGISTRY }}/integration-test-onyx-integration:test-${{ github.run_id }} onyxdotapp/onyx-integration:test
|
||||
|
||||
- name: Start Docker containers for multi-tenant tests
|
||||
run: |
|
||||
cd deployment/docker_compose
|
||||
ENABLE_PAID_ENTERPRISE_EDITION_FEATURES=true \
|
||||
MULTI_TENANT=true \
|
||||
AUTH_TYPE=cloud \
|
||||
REQUIRE_EMAIL_VERIFICATION=false \
|
||||
DISABLE_TELEMETRY=true \
|
||||
IMAGE_TAG=test \
|
||||
DEV_MODE=true \
|
||||
docker compose -f docker-compose.multitenant-dev.yml up \
|
||||
relational_db \
|
||||
index \
|
||||
cache \
|
||||
minio \
|
||||
api_server \
|
||||
inference_model_server \
|
||||
indexing_model_server \
|
||||
background \
|
||||
-d
|
||||
id: start_docker_multi_tenant
|
||||
|
||||
- name: Wait for service to be ready (multi-tenant)
|
||||
run: |
|
||||
echo "Starting wait-for-service script for multi-tenant..."
|
||||
docker logs -f onyx-api_server-1 &
|
||||
start_time=$(date +%s)
|
||||
timeout=300
|
||||
while true; do
|
||||
current_time=$(date +%s)
|
||||
elapsed_time=$((current_time - start_time))
|
||||
if [ $elapsed_time -ge $timeout ]; then
|
||||
echo "Timeout reached. Service did not become ready in 5 minutes."
|
||||
exit 1
|
||||
fi
|
||||
response=$(curl -s -o /dev/null -w "%{http_code}" http://localhost:8080/health || echo "curl_error")
|
||||
if [ "$response" = "200" ]; then
|
||||
echo "Service is ready!"
|
||||
break
|
||||
elif [ "$response" = "curl_error" ]; then
|
||||
echo "Curl encountered an error; retrying..."
|
||||
else
|
||||
echo "Service not ready yet (HTTP $response). Retrying in 5 seconds..."
|
||||
fi
|
||||
sleep 5
|
||||
done
|
||||
echo "Finished waiting for service."
|
||||
|
||||
- name: Run Multi-Tenant Integration Tests
|
||||
run: |
|
||||
echo "Running multi-tenant integration tests..."
|
||||
docker run --rm --network onyx_default \
|
||||
--name test-runner \
|
||||
-e POSTGRES_HOST=relational_db \
|
||||
-e POSTGRES_USER=postgres \
|
||||
-e POSTGRES_PASSWORD=password \
|
||||
-e DB_READONLY_USER=db_readonly_user \
|
||||
-e DB_READONLY_PASSWORD=password \
|
||||
-e POSTGRES_DB=postgres \
|
||||
-e POSTGRES_USE_NULL_POOL=true \
|
||||
-e VESPA_HOST=index \
|
||||
-e REDIS_HOST=cache \
|
||||
-e API_SERVER_HOST=api_server \
|
||||
-e OPENAI_API_KEY=${OPENAI_API_KEY} \
|
||||
-e SLACK_BOT_TOKEN=${SLACK_BOT_TOKEN} \
|
||||
-e TEST_WEB_HOSTNAME=test-runner \
|
||||
-e AUTH_TYPE=cloud \
|
||||
-e MULTI_TENANT=true \
|
||||
-e SKIP_RESET=true \
|
||||
-e REQUIRE_EMAIL_VERIFICATION=false \
|
||||
-e DISABLE_TELEMETRY=true \
|
||||
-e IMAGE_TAG=test \
|
||||
-e DEV_MODE=true \
|
||||
onyxdotapp/onyx-integration:test \
|
||||
/app/tests/integration/multitenant_tests
|
||||
|
||||
- name: Dump API server logs (multi-tenant)
|
||||
if: always()
|
||||
run: |
|
||||
cd deployment/docker_compose
|
||||
docker compose -f docker-compose.multitenant-dev.yml logs --no-color api_server > $GITHUB_WORKSPACE/api_server_multitenant.log || true
|
||||
|
||||
- name: Dump all-container logs (multi-tenant)
|
||||
if: always()
|
||||
run: |
|
||||
cd deployment/docker_compose
|
||||
docker compose -f docker-compose.multitenant-dev.yml logs --no-color > $GITHUB_WORKSPACE/docker-compose-multitenant.log || true
|
||||
|
||||
- name: Upload logs (multi-tenant)
|
||||
if: always()
|
||||
uses: actions/upload-artifact@v4
|
||||
with:
|
||||
name: docker-all-logs-multitenant
|
||||
path: ${{ github.workspace }}/docker-compose-multitenant.log
|
||||
|
||||
- name: Stop multi-tenant Docker containers
|
||||
if: always()
|
||||
run: |
|
||||
cd deployment/docker_compose
|
||||
docker compose -f docker-compose.multitenant-dev.yml down -v
|
||||
|
||||
required:
|
||||
runs-on: blacksmith-2vcpu-ubuntu-2404-arm
|
||||
needs: [integration-tests, multitenant-tests]
|
||||
if: ${{ always() }}
|
||||
steps:
|
||||
- uses: actions/github-script@v7
|
||||
with:
|
||||
script: |
|
||||
const needs = ${{ toJSON(needs) }};
|
||||
const failed = Object.values(needs).some(n => n.result !== 'success');
|
||||
if (failed) {
|
||||
core.setFailed('One or more upstream jobs failed or were cancelled.');
|
||||
} else {
|
||||
core.notice('All required jobs succeeded.');
|
||||
}
|
||||
|
||||
35
.github/workflows/pr-jest-tests.yml
vendored
Normal file
35
.github/workflows/pr-jest-tests.yml
vendored
Normal file
@@ -0,0 +1,35 @@
|
||||
name: Run Jest Tests
|
||||
concurrency:
|
||||
group: Run-Jest-Tests-${{ github.workflow }}-${{ github.head_ref || github.event.workflow_run.head_branch || github.run_id }}
|
||||
cancel-in-progress: true
|
||||
|
||||
on: push
|
||||
|
||||
jobs:
|
||||
jest-tests:
|
||||
name: Jest Tests
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- name: Checkout code
|
||||
uses: actions/checkout@v4
|
||||
|
||||
- name: Setup node
|
||||
uses: actions/setup-node@v4
|
||||
with:
|
||||
node-version: 22
|
||||
|
||||
- name: Install node dependencies
|
||||
working-directory: ./web
|
||||
run: npm ci
|
||||
|
||||
- name: Run Jest tests
|
||||
working-directory: ./web
|
||||
run: npm test -- --ci --coverage --maxWorkers=50%
|
||||
|
||||
- name: Upload coverage reports
|
||||
if: always()
|
||||
uses: actions/upload-artifact@v4
|
||||
with:
|
||||
name: jest-coverage-${{ github.run_id }}
|
||||
path: ./web/coverage
|
||||
retention-days: 7
|
||||
38
.github/workflows/pr-labeler.yml
vendored
Normal file
38
.github/workflows/pr-labeler.yml
vendored
Normal file
@@ -0,0 +1,38 @@
|
||||
name: PR Labeler
|
||||
|
||||
on:
|
||||
pull_request_target:
|
||||
branches:
|
||||
- main
|
||||
types:
|
||||
- opened
|
||||
- reopened
|
||||
- synchronize
|
||||
- edited
|
||||
|
||||
permissions:
|
||||
contents: read
|
||||
pull-requests: write
|
||||
|
||||
jobs:
|
||||
validate_pr_title:
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- name: Check PR title for Conventional Commits
|
||||
env:
|
||||
PR_TITLE: ${{ github.event.pull_request.title }}
|
||||
run: |
|
||||
echo "PR Title: $PR_TITLE"
|
||||
if [[ ! "$PR_TITLE" =~ ^(feat|fix|docs|test|ci|refactor|perf|chore|revert|build)(\(.+\))?:\ .+ ]]; then
|
||||
echo "::error::❌ Your PR title does not follow the Conventional Commits format.
|
||||
This check ensures that all pull requests use clear, consistent titles that help automate changelogs and improve project history.
|
||||
|
||||
Please update your PR title to follow the Conventional Commits style.
|
||||
Here is a link to a blog explaining the reason why we've included the Conventional Commits style into our PR titles: https://xfuture-blog.com/working-with-conventional-commits
|
||||
|
||||
**Here are some examples of valid PR titles:**
|
||||
- feat: add user authentication
|
||||
- fix(login): handle null password error
|
||||
- docs(readme): update installation instructions"
|
||||
exit 1
|
||||
fi
|
||||
356
.github/workflows/pr-mit-integration-tests.yml
vendored
356
.github/workflows/pr-mit-integration-tests.yml
vendored
@@ -5,90 +5,210 @@ concurrency:
|
||||
|
||||
on:
|
||||
merge_group:
|
||||
pull_request:
|
||||
branches:
|
||||
- main
|
||||
- "release/**"
|
||||
types: [checks_requested]
|
||||
|
||||
env:
|
||||
# Private Registry Configuration
|
||||
PRIVATE_REGISTRY: experimental-registry.blacksmith.sh:5000
|
||||
PRIVATE_REGISTRY_USERNAME: ${{ secrets.PRIVATE_REGISTRY_USERNAME }}
|
||||
PRIVATE_REGISTRY_PASSWORD: ${{ secrets.PRIVATE_REGISTRY_PASSWORD }}
|
||||
|
||||
# Test Environment Variables
|
||||
OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
|
||||
SLACK_BOT_TOKEN: ${{ secrets.SLACK_BOT_TOKEN }}
|
||||
CONFLUENCE_TEST_SPACE_URL: ${{ secrets.CONFLUENCE_TEST_SPACE_URL }}
|
||||
CONFLUENCE_USER_NAME: ${{ secrets.CONFLUENCE_USER_NAME }}
|
||||
CONFLUENCE_TEST_SPACE_URL: ${{ vars.CONFLUENCE_TEST_SPACE_URL }}
|
||||
CONFLUENCE_USER_NAME: ${{ vars.CONFLUENCE_USER_NAME }}
|
||||
CONFLUENCE_ACCESS_TOKEN: ${{ secrets.CONFLUENCE_ACCESS_TOKEN }}
|
||||
CONFLUENCE_ACCESS_TOKEN_SCOPED: ${{ secrets.CONFLUENCE_ACCESS_TOKEN_SCOPED }}
|
||||
JIRA_BASE_URL: ${{ secrets.JIRA_BASE_URL }}
|
||||
JIRA_USER_EMAIL: ${{ secrets.JIRA_USER_EMAIL }}
|
||||
JIRA_API_TOKEN: ${{ secrets.JIRA_API_TOKEN }}
|
||||
JIRA_API_TOKEN_SCOPED: ${{ secrets.JIRA_API_TOKEN_SCOPED }}
|
||||
PERM_SYNC_SHAREPOINT_CLIENT_ID: ${{ secrets.PERM_SYNC_SHAREPOINT_CLIENT_ID }}
|
||||
PERM_SYNC_SHAREPOINT_PRIVATE_KEY: ${{ secrets.PERM_SYNC_SHAREPOINT_PRIVATE_KEY }}
|
||||
PERM_SYNC_SHAREPOINT_CERTIFICATE_PASSWORD: ${{ secrets.PERM_SYNC_SHAREPOINT_CERTIFICATE_PASSWORD }}
|
||||
PERM_SYNC_SHAREPOINT_DIRECTORY_ID: ${{ secrets.PERM_SYNC_SHAREPOINT_DIRECTORY_ID }}
|
||||
|
||||
jobs:
|
||||
integration-tests-mit:
|
||||
# See https://runs-on.com/runners/linux/
|
||||
runs-on: [runs-on, runner=32cpu-linux-x64, "run-id=${{ github.run_id }}"]
|
||||
discover-test-dirs:
|
||||
runs-on: blacksmith-2vcpu-ubuntu-2404-arm
|
||||
outputs:
|
||||
test-dirs: ${{ steps.set-matrix.outputs.test-dirs }}
|
||||
steps:
|
||||
- name: Checkout code
|
||||
uses: actions/checkout@v4
|
||||
|
||||
- name: Set up Docker Buildx
|
||||
uses: docker/setup-buildx-action@v3
|
||||
- name: Discover test directories
|
||||
id: set-matrix
|
||||
run: |
|
||||
# Find all leaf-level directories in both test directories
|
||||
tests_dirs=$(find backend/tests/integration/tests -mindepth 1 -maxdepth 1 -type d ! -name "__pycache__" -exec basename {} \; | sort)
|
||||
connector_dirs=$(find backend/tests/integration/connector_job_tests -mindepth 1 -maxdepth 1 -type d ! -name "__pycache__" -exec basename {} \; | sort)
|
||||
|
||||
# Create JSON array with directory info
|
||||
all_dirs=""
|
||||
for dir in $tests_dirs; do
|
||||
all_dirs="$all_dirs{\"path\":\"tests/$dir\",\"name\":\"tests-$dir\"},"
|
||||
done
|
||||
for dir in $connector_dirs; do
|
||||
all_dirs="$all_dirs{\"path\":\"connector_job_tests/$dir\",\"name\":\"connector-$dir\"},"
|
||||
done
|
||||
|
||||
# Remove trailing comma and wrap in array
|
||||
all_dirs="[${all_dirs%,}]"
|
||||
echo "test-dirs=$all_dirs" >> $GITHUB_OUTPUT
|
||||
|
||||
prepare-build:
|
||||
runs-on: blacksmith-2vcpu-ubuntu-2404-arm
|
||||
steps:
|
||||
- name: Checkout code
|
||||
uses: actions/checkout@v4
|
||||
|
||||
- name: Prepare build
|
||||
uses: ./.github/actions/prepare-build
|
||||
|
||||
build-backend-image:
|
||||
runs-on: blacksmith-16vcpu-ubuntu-2404-arm
|
||||
steps:
|
||||
- name: Checkout code
|
||||
uses: actions/checkout@v4
|
||||
|
||||
- name: Login to Private Registry
|
||||
uses: docker/login-action@v3
|
||||
with:
|
||||
registry: ${{ env.PRIVATE_REGISTRY }}
|
||||
username: ${{ env.PRIVATE_REGISTRY_USERNAME }}
|
||||
password: ${{ env.PRIVATE_REGISTRY_PASSWORD }}
|
||||
|
||||
- name: Set up Docker Buildx
|
||||
uses: useblacksmith/setup-docker-builder@v1
|
||||
|
||||
- name: Build and push Backend Docker image
|
||||
uses: useblacksmith/build-push-action@v2
|
||||
with:
|
||||
context: ./backend
|
||||
file: ./backend/Dockerfile
|
||||
platforms: linux/arm64
|
||||
tags: ${{ env.PRIVATE_REGISTRY }}/integration-test-onyx-backend:test-${{ github.run_id }}
|
||||
push: true
|
||||
outputs: type=registry
|
||||
no-cache: true
|
||||
|
||||
|
||||
build-model-server-image:
|
||||
runs-on: blacksmith-16vcpu-ubuntu-2404-arm
|
||||
steps:
|
||||
- name: Checkout code
|
||||
uses: actions/checkout@v4
|
||||
|
||||
- name: Login to Private Registry
|
||||
uses: docker/login-action@v3
|
||||
with:
|
||||
registry: ${{ env.PRIVATE_REGISTRY }}
|
||||
username: ${{ env.PRIVATE_REGISTRY_USERNAME }}
|
||||
password: ${{ env.PRIVATE_REGISTRY_PASSWORD }}
|
||||
|
||||
- name: Set up Docker Buildx
|
||||
uses: useblacksmith/setup-docker-builder@v1
|
||||
|
||||
- name: Build and push Model Server Docker image
|
||||
uses: useblacksmith/build-push-action@v2
|
||||
with:
|
||||
context: ./backend
|
||||
file: ./backend/Dockerfile.model_server
|
||||
platforms: linux/arm64
|
||||
tags: ${{ env.PRIVATE_REGISTRY }}/integration-test-onyx-model-server:test-${{ github.run_id }}
|
||||
push: true
|
||||
outputs: type=registry
|
||||
provenance: false
|
||||
no-cache: ${{ vars.DOCKER_NO_CACHE == 'true' }}
|
||||
|
||||
|
||||
build-integration-image:
|
||||
needs: prepare-build
|
||||
runs-on: blacksmith-16vcpu-ubuntu-2404-arm
|
||||
steps:
|
||||
- name: Checkout code
|
||||
uses: actions/checkout@v4
|
||||
|
||||
- name: Login to Private Registry
|
||||
uses: docker/login-action@v3
|
||||
with:
|
||||
registry: ${{ env.PRIVATE_REGISTRY }}
|
||||
username: ${{ env.PRIVATE_REGISTRY_USERNAME }}
|
||||
password: ${{ env.PRIVATE_REGISTRY_PASSWORD }}
|
||||
|
||||
- name: Download OpenAPI artifacts
|
||||
uses: actions/download-artifact@v4
|
||||
with:
|
||||
name: openapi-artifacts
|
||||
path: backend/generated/
|
||||
|
||||
- name: Set up Docker Buildx
|
||||
uses: useblacksmith/setup-docker-builder@v1
|
||||
|
||||
- name: Build and push integration test image with Docker Bake
|
||||
env:
|
||||
REGISTRY: ${{ env.PRIVATE_REGISTRY }}
|
||||
TAG: test-${{ github.run_id }}
|
||||
run: cd backend && docker buildx bake --no-cache --push integration
|
||||
|
||||
integration-tests-mit:
|
||||
needs:
|
||||
[
|
||||
discover-test-dirs,
|
||||
build-backend-image,
|
||||
build-model-server-image,
|
||||
build-integration-image,
|
||||
]
|
||||
# See https://docs.blacksmith.sh/blacksmith-runners/overview
|
||||
runs-on: blacksmith-8vcpu-ubuntu-2404-arm
|
||||
|
||||
strategy:
|
||||
fail-fast: false
|
||||
matrix:
|
||||
test-dir: ${{ fromJson(needs.discover-test-dirs.outputs.test-dirs) }}
|
||||
|
||||
steps:
|
||||
- name: Checkout code
|
||||
uses: actions/checkout@v4
|
||||
|
||||
- name: Login to Private Registry
|
||||
uses: docker/login-action@v3
|
||||
with:
|
||||
registry: ${{ env.PRIVATE_REGISTRY }}
|
||||
username: ${{ env.PRIVATE_REGISTRY_USERNAME }}
|
||||
password: ${{ env.PRIVATE_REGISTRY_PASSWORD }}
|
||||
|
||||
# needed for pulling Vespa, Redis, Postgres, and Minio images
|
||||
# otherwise, we hit the "Unauthenticated users" limit
|
||||
# https://docs.docker.com/docker-hub/usage/
|
||||
- name: Login to Docker Hub
|
||||
uses: docker/login-action@v3
|
||||
with:
|
||||
username: ${{ secrets.DOCKER_USERNAME }}
|
||||
password: ${{ secrets.DOCKER_TOKEN }}
|
||||
|
||||
# tag every docker image with "test" so that we can spin up the correct set
|
||||
# of images during testing
|
||||
|
||||
# We don't need to build the Web Docker image since it's not yet used
|
||||
# in the integration tests. We have a separate action to verify that it builds
|
||||
# successfully.
|
||||
- name: Pull Web Docker image
|
||||
- name: Pull Docker images
|
||||
run: |
|
||||
docker pull onyxdotapp/onyx-web-server:latest
|
||||
docker tag onyxdotapp/onyx-web-server:latest onyxdotapp/onyx-web-server:test
|
||||
# Pull all images from registry in parallel
|
||||
echo "Pulling Docker images in parallel..."
|
||||
# Pull images from private registry
|
||||
(docker pull --platform linux/arm64 ${{ env.PRIVATE_REGISTRY }}/integration-test-onyx-backend:test-${{ github.run_id }}) &
|
||||
(docker pull --platform linux/arm64 ${{ env.PRIVATE_REGISTRY }}/integration-test-onyx-model-server:test-${{ github.run_id }}) &
|
||||
(docker pull --platform linux/arm64 ${{ env.PRIVATE_REGISTRY }}/integration-test-onyx-integration:test-${{ github.run_id }}) &
|
||||
|
||||
# we use the runs-on cache for docker builds
|
||||
# in conjunction with runs-on runners, it has better speed and unlimited caching
|
||||
# https://runs-on.com/caching/s3-cache-for-github-actions/
|
||||
# https://runs-on.com/caching/docker/
|
||||
# https://github.com/moby/buildkit#s3-cache-experimental
|
||||
# Wait for all background jobs to complete
|
||||
wait
|
||||
echo "All Docker images pulled successfully"
|
||||
|
||||
# images are built and run locally for testing purposes. Not pushed.
|
||||
- name: Build Backend Docker image
|
||||
uses: ./.github/actions/custom-build-and-push
|
||||
with:
|
||||
context: ./backend
|
||||
file: ./backend/Dockerfile
|
||||
platforms: linux/amd64
|
||||
tags: onyxdotapp/onyx-backend:test
|
||||
push: false
|
||||
load: true
|
||||
cache-from: type=s3,prefix=cache/${{ github.repository }}/integration-tests/backend/,region=${{ env.RUNS_ON_AWS_REGION }},bucket=${{ env.RUNS_ON_S3_BUCKET_CACHE }}
|
||||
cache-to: type=s3,prefix=cache/${{ github.repository }}/integration-tests/backend/,region=${{ env.RUNS_ON_AWS_REGION }},bucket=${{ env.RUNS_ON_S3_BUCKET_CACHE }},mode=max
|
||||
|
||||
- name: Build Model Server Docker image
|
||||
uses: ./.github/actions/custom-build-and-push
|
||||
with:
|
||||
context: ./backend
|
||||
file: ./backend/Dockerfile.model_server
|
||||
platforms: linux/amd64
|
||||
tags: onyxdotapp/onyx-model-server:test
|
||||
push: false
|
||||
load: true
|
||||
cache-from: type=s3,prefix=cache/${{ github.repository }}/integration-tests/model-server/,region=${{ env.RUNS_ON_AWS_REGION }},bucket=${{ env.RUNS_ON_S3_BUCKET_CACHE }}
|
||||
cache-to: type=s3,prefix=cache/${{ github.repository }}/integration-tests/model-server/,region=${{ env.RUNS_ON_AWS_REGION }},bucket=${{ env.RUNS_ON_S3_BUCKET_CACHE }},mode=max
|
||||
|
||||
- name: Build integration test Docker image
|
||||
uses: ./.github/actions/custom-build-and-push
|
||||
with:
|
||||
context: ./backend
|
||||
file: ./backend/tests/integration/Dockerfile
|
||||
platforms: linux/amd64
|
||||
tags: onyxdotapp/onyx-integration:test
|
||||
push: false
|
||||
load: true
|
||||
cache-from: type=s3,prefix=cache/${{ github.repository }}/integration-tests/integration/,region=${{ env.RUNS_ON_AWS_REGION }},bucket=${{ env.RUNS_ON_S3_BUCKET_CACHE }}
|
||||
cache-to: type=s3,prefix=cache/${{ github.repository }}/integration-tests/integration/,region=${{ env.RUNS_ON_AWS_REGION }},bucket=${{ env.RUNS_ON_S3_BUCKET_CACHE }},mode=max
|
||||
# Re-tag to remove registry prefix for docker-compose
|
||||
docker tag ${{ env.PRIVATE_REGISTRY }}/integration-test-onyx-backend:test-${{ github.run_id }} onyxdotapp/onyx-backend:test
|
||||
docker tag ${{ env.PRIVATE_REGISTRY }}/integration-test-onyx-model-server:test-${{ github.run_id }} onyxdotapp/onyx-model-server:test
|
||||
docker tag ${{ env.PRIVATE_REGISTRY }}/integration-test-onyx-integration:test-${{ github.run_id }} onyxdotapp/onyx-integration:test
|
||||
|
||||
# NOTE: Use pre-ping/null pool to reduce flakiness due to dropped connections
|
||||
# NOTE: don't need web server for integration tests
|
||||
- name: Start Docker containers
|
||||
run: |
|
||||
cd deployment/docker_compose
|
||||
@@ -99,14 +219,23 @@ jobs:
|
||||
DISABLE_TELEMETRY=true \
|
||||
IMAGE_TAG=test \
|
||||
INTEGRATION_TESTS_MODE=true \
|
||||
docker compose -f docker-compose.dev.yml -p onyx-stack up -d
|
||||
docker compose -f docker-compose.yml -f docker-compose.dev.yml up \
|
||||
relational_db \
|
||||
index \
|
||||
cache \
|
||||
minio \
|
||||
api_server \
|
||||
inference_model_server \
|
||||
indexing_model_server \
|
||||
background \
|
||||
-d
|
||||
id: start_docker
|
||||
|
||||
- name: Wait for service to be ready
|
||||
run: |
|
||||
echo "Starting wait-for-service script..."
|
||||
|
||||
docker logs -f onyx-stack-api_server-1 &
|
||||
docker logs -f onyx-api_server-1 &
|
||||
|
||||
start_time=$(date +%s)
|
||||
timeout=300 # 5 minutes in seconds
|
||||
@@ -143,42 +272,46 @@ jobs:
|
||||
-p mock-it-services-stack up -d
|
||||
|
||||
# NOTE: Use pre-ping/null to reduce flakiness due to dropped connections
|
||||
- name: Run Standard Integration Tests
|
||||
run: |
|
||||
echo "Running integration tests..."
|
||||
docker run --rm --network onyx-stack_default \
|
||||
--name test-runner \
|
||||
-e POSTGRES_HOST=relational_db \
|
||||
-e POSTGRES_USER=postgres \
|
||||
-e POSTGRES_PASSWORD=password \
|
||||
-e POSTGRES_DB=postgres \
|
||||
-e POSTGRES_POOL_PRE_PING=true \
|
||||
-e POSTGRES_USE_NULL_POOL=true \
|
||||
-e VESPA_HOST=index \
|
||||
-e REDIS_HOST=cache \
|
||||
-e API_SERVER_HOST=api_server \
|
||||
-e OPENAI_API_KEY=${OPENAI_API_KEY} \
|
||||
-e SLACK_BOT_TOKEN=${SLACK_BOT_TOKEN} \
|
||||
-e CONFLUENCE_TEST_SPACE_URL=${CONFLUENCE_TEST_SPACE_URL} \
|
||||
-e CONFLUENCE_USER_NAME=${CONFLUENCE_USER_NAME} \
|
||||
-e CONFLUENCE_ACCESS_TOKEN=${CONFLUENCE_ACCESS_TOKEN} \
|
||||
-e TEST_WEB_HOSTNAME=test-runner \
|
||||
-e MOCK_CONNECTOR_SERVER_HOST=mock_connector_server \
|
||||
-e MOCK_CONNECTOR_SERVER_PORT=8001 \
|
||||
onyxdotapp/onyx-integration:test \
|
||||
/app/tests/integration/tests \
|
||||
/app/tests/integration/connector_job_tests
|
||||
continue-on-error: true
|
||||
id: run_tests
|
||||
|
||||
- name: Check test results
|
||||
run: |
|
||||
if [ ${{ steps.run_tests.outcome }} == 'failure' ]; then
|
||||
echo "Integration tests failed. Exiting with error."
|
||||
exit 1
|
||||
else
|
||||
echo "All integration tests passed successfully."
|
||||
fi
|
||||
- name: Run Integration Tests for ${{ matrix.test-dir.name }}
|
||||
uses: nick-fields/retry@v3
|
||||
with:
|
||||
timeout_minutes: 20
|
||||
max_attempts: 3
|
||||
retry_wait_seconds: 10
|
||||
command: |
|
||||
echo "Running integration tests for ${{ matrix.test-dir.path }}..."
|
||||
docker run --rm --network onyx_default \
|
||||
--name test-runner \
|
||||
-e POSTGRES_HOST=relational_db \
|
||||
-e POSTGRES_USER=postgres \
|
||||
-e POSTGRES_PASSWORD=password \
|
||||
-e POSTGRES_DB=postgres \
|
||||
-e DB_READONLY_USER=db_readonly_user \
|
||||
-e DB_READONLY_PASSWORD=password \
|
||||
-e POSTGRES_POOL_PRE_PING=true \
|
||||
-e POSTGRES_USE_NULL_POOL=true \
|
||||
-e VESPA_HOST=index \
|
||||
-e REDIS_HOST=cache \
|
||||
-e API_SERVER_HOST=api_server \
|
||||
-e OPENAI_API_KEY=${OPENAI_API_KEY} \
|
||||
-e SLACK_BOT_TOKEN=${SLACK_BOT_TOKEN} \
|
||||
-e CONFLUENCE_TEST_SPACE_URL=${CONFLUENCE_TEST_SPACE_URL} \
|
||||
-e CONFLUENCE_USER_NAME=${CONFLUENCE_USER_NAME} \
|
||||
-e CONFLUENCE_ACCESS_TOKEN=${CONFLUENCE_ACCESS_TOKEN} \
|
||||
-e CONFLUENCE_ACCESS_TOKEN_SCOPED=${CONFLUENCE_ACCESS_TOKEN_SCOPED} \
|
||||
-e JIRA_BASE_URL=${JIRA_BASE_URL} \
|
||||
-e JIRA_USER_EMAIL=${JIRA_USER_EMAIL} \
|
||||
-e JIRA_API_TOKEN=${JIRA_API_TOKEN} \
|
||||
-e JIRA_API_TOKEN_SCOPED=${JIRA_API_TOKEN_SCOPED} \
|
||||
-e PERM_SYNC_SHAREPOINT_CLIENT_ID=${PERM_SYNC_SHAREPOINT_CLIENT_ID} \
|
||||
-e PERM_SYNC_SHAREPOINT_PRIVATE_KEY="${PERM_SYNC_SHAREPOINT_PRIVATE_KEY}" \
|
||||
-e PERM_SYNC_SHAREPOINT_CERTIFICATE_PASSWORD=${PERM_SYNC_SHAREPOINT_CERTIFICATE_PASSWORD} \
|
||||
-e PERM_SYNC_SHAREPOINT_DIRECTORY_ID=${PERM_SYNC_SHAREPOINT_DIRECTORY_ID} \
|
||||
-e TEST_WEB_HOSTNAME=test-runner \
|
||||
-e MOCK_CONNECTOR_SERVER_HOST=mock_connector_server \
|
||||
-e MOCK_CONNECTOR_SERVER_PORT=8001 \
|
||||
onyxdotapp/onyx-integration:test \
|
||||
/app/tests/integration/${{ matrix.test-dir.path }}
|
||||
|
||||
# ------------------------------------------------------------
|
||||
# Always gather logs BEFORE "down":
|
||||
@@ -186,19 +319,19 @@ jobs:
|
||||
if: always()
|
||||
run: |
|
||||
cd deployment/docker_compose
|
||||
docker compose -f docker-compose.dev.yml -p onyx-stack logs --no-color api_server > $GITHUB_WORKSPACE/api_server.log || true
|
||||
docker compose logs --no-color api_server > $GITHUB_WORKSPACE/api_server.log || true
|
||||
|
||||
- name: Dump all-container logs (optional)
|
||||
if: always()
|
||||
run: |
|
||||
cd deployment/docker_compose
|
||||
docker compose -f docker-compose.dev.yml -p onyx-stack logs --no-color > $GITHUB_WORKSPACE/docker-compose.log || true
|
||||
docker compose logs --no-color > $GITHUB_WORKSPACE/docker-compose.log || true
|
||||
|
||||
- name: Upload logs
|
||||
if: always()
|
||||
uses: actions/upload-artifact@v4
|
||||
with:
|
||||
name: docker-all-logs
|
||||
name: docker-all-logs-${{ matrix.test-dir.name }}
|
||||
path: ${{ github.workspace }}/docker-compose.log
|
||||
# ------------------------------------------------------------
|
||||
|
||||
@@ -206,4 +339,21 @@ jobs:
|
||||
if: always()
|
||||
run: |
|
||||
cd deployment/docker_compose
|
||||
docker compose -f docker-compose.dev.yml -p onyx-stack down -v
|
||||
docker compose down -v
|
||||
|
||||
|
||||
required:
|
||||
runs-on: blacksmith-2vcpu-ubuntu-2404-arm
|
||||
needs: [integration-tests-mit]
|
||||
if: ${{ always() }}
|
||||
steps:
|
||||
- uses: actions/github-script@v7
|
||||
with:
|
||||
script: |
|
||||
const needs = ${{ toJSON(needs) }};
|
||||
const failed = Object.values(needs).some(n => n.result !== 'success');
|
||||
if (failed) {
|
||||
core.setFailed('One or more upstream jobs failed or were cancelled.');
|
||||
} else {
|
||||
core.notice('All required jobs succeeded.');
|
||||
}
|
||||
|
||||
280
.github/workflows/pr-playwright-tests.yml
vendored
280
.github/workflows/pr-playwright-tests.yml
vendored
@@ -6,43 +6,171 @@ concurrency:
|
||||
on: push
|
||||
|
||||
env:
|
||||
# AWS ECR Configuration
|
||||
AWS_REGION: ${{ secrets.AWS_REGION || 'us-west-2' }}
|
||||
ECR_REGISTRY: ${{ secrets.ECR_REGISTRY }}
|
||||
AWS_ACCESS_KEY_ID: ${{ secrets.AWS_ACCESS_KEY_ID_ECR }}
|
||||
AWS_SECRET_ACCESS_KEY: ${{ secrets.AWS_SECRET_ACCESS_KEY_ECR }}
|
||||
BUILDX_NO_DEFAULT_ATTESTATIONS: 1
|
||||
|
||||
# Test Environment Variables
|
||||
OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
|
||||
SLACK_BOT_TOKEN: ${{ secrets.SLACK_BOT_TOKEN }}
|
||||
GEN_AI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
|
||||
EXA_API_KEY: ${{ secrets.EXA_API_KEY }}
|
||||
|
||||
# for federated slack tests
|
||||
SLACK_CLIENT_ID: ${{ secrets.SLACK_CLIENT_ID }}
|
||||
SLACK_CLIENT_SECRET: ${{ secrets.SLACK_CLIENT_SECRET }}
|
||||
|
||||
MOCK_LLM_RESPONSE: true
|
||||
|
||||
jobs:
|
||||
playwright-tests:
|
||||
name: Playwright Tests
|
||||
build-web-image:
|
||||
runs-on: blacksmith-8vcpu-ubuntu-2404-arm
|
||||
steps:
|
||||
- name: Checkout code
|
||||
uses: actions/checkout@v4
|
||||
|
||||
# See https://runs-on.com/runners/linux/
|
||||
runs-on:
|
||||
[
|
||||
runs-on,
|
||||
runner=32cpu-linux-x64,
|
||||
disk=large,
|
||||
"run-id=${{ github.run_id }}",
|
||||
]
|
||||
- name: Configure AWS credentials
|
||||
uses: aws-actions/configure-aws-credentials@v4
|
||||
with:
|
||||
aws-access-key-id: ${{ env.AWS_ACCESS_KEY_ID }}
|
||||
aws-secret-access-key: ${{ env.AWS_SECRET_ACCESS_KEY }}
|
||||
aws-region: ${{ env.AWS_REGION }}
|
||||
|
||||
- name: Login to Amazon ECR
|
||||
id: login-ecr
|
||||
uses: aws-actions/amazon-ecr-login@v2
|
||||
|
||||
- name: Set up Docker Buildx
|
||||
uses: useblacksmith/setup-docker-builder@v1
|
||||
|
||||
- name: Build and push Web Docker image
|
||||
uses: useblacksmith/build-push-action@v2
|
||||
with:
|
||||
context: ./web
|
||||
file: ./web/Dockerfile
|
||||
platforms: linux/arm64
|
||||
tags: ${{ env.ECR_REGISTRY }}/integration-test-onyx-web-server:playwright-test-${{ github.run_id }}
|
||||
provenance: false
|
||||
sbom: false
|
||||
push: true
|
||||
outputs: type=registry
|
||||
no-cache: ${{ vars.DOCKER_NO_CACHE == 'true' }}
|
||||
|
||||
build-backend-image:
|
||||
runs-on: blacksmith-8vcpu-ubuntu-2404-arm
|
||||
steps:
|
||||
- name: Checkout code
|
||||
uses: actions/checkout@v4
|
||||
|
||||
- name: Configure AWS credentials
|
||||
uses: aws-actions/configure-aws-credentials@v4
|
||||
with:
|
||||
aws-access-key-id: ${{ env.AWS_ACCESS_KEY_ID }}
|
||||
aws-secret-access-key: ${{ env.AWS_SECRET_ACCESS_KEY }}
|
||||
aws-region: ${{ env.AWS_REGION }}
|
||||
|
||||
- name: Login to Amazon ECR
|
||||
id: login-ecr
|
||||
uses: aws-actions/amazon-ecr-login@v2
|
||||
|
||||
- name: Set up Docker Buildx
|
||||
uses: useblacksmith/setup-docker-builder@v1
|
||||
|
||||
- name: Build and push Backend Docker image
|
||||
uses: useblacksmith/build-push-action@v2
|
||||
with:
|
||||
context: ./backend
|
||||
file: ./backend/Dockerfile
|
||||
platforms: linux/arm64
|
||||
tags: ${{ env.ECR_REGISTRY }}/integration-test-onyx-backend:playwright-test-${{ github.run_id }}
|
||||
provenance: false
|
||||
sbom: false
|
||||
push: true
|
||||
outputs: type=registry
|
||||
no-cache: ${{ vars.DOCKER_NO_CACHE == 'true' }}
|
||||
|
||||
build-model-server-image:
|
||||
runs-on: blacksmith-8vcpu-ubuntu-2404-arm
|
||||
steps:
|
||||
- name: Checkout code
|
||||
uses: actions/checkout@v4
|
||||
|
||||
- name: Configure AWS credentials
|
||||
uses: aws-actions/configure-aws-credentials@v4
|
||||
with:
|
||||
aws-access-key-id: ${{ env.AWS_ACCESS_KEY_ID }}
|
||||
aws-secret-access-key: ${{ env.AWS_SECRET_ACCESS_KEY }}
|
||||
aws-region: ${{ env.AWS_REGION }}
|
||||
|
||||
- name: Login to Amazon ECR
|
||||
id: login-ecr
|
||||
uses: aws-actions/amazon-ecr-login@v2
|
||||
|
||||
- name: Set up Docker Buildx
|
||||
uses: useblacksmith/setup-docker-builder@v1
|
||||
|
||||
- name: Build and push Model Server Docker image
|
||||
uses: useblacksmith/build-push-action@v2
|
||||
with:
|
||||
context: ./backend
|
||||
file: ./backend/Dockerfile.model_server
|
||||
platforms: linux/arm64
|
||||
tags: ${{ env.ECR_REGISTRY }}/integration-test-onyx-model-server:playwright-test-${{ github.run_id }}
|
||||
provenance: false
|
||||
sbom: false
|
||||
push: true
|
||||
outputs: type=registry
|
||||
no-cache: ${{ vars.DOCKER_NO_CACHE == 'true' }}
|
||||
|
||||
playwright-tests:
|
||||
needs: [build-web-image, build-backend-image, build-model-server-image]
|
||||
name: Playwright Tests
|
||||
runs-on: blacksmith-8vcpu-ubuntu-2404-arm
|
||||
steps:
|
||||
- name: Checkout code
|
||||
uses: actions/checkout@v4
|
||||
with:
|
||||
fetch-depth: 0
|
||||
|
||||
- name: Set up Python
|
||||
uses: actions/setup-python@v5
|
||||
- name: Configure AWS credentials
|
||||
uses: aws-actions/configure-aws-credentials@v4
|
||||
with:
|
||||
python-version: "3.11"
|
||||
cache: "pip"
|
||||
cache-dependency-path: |
|
||||
backend/requirements/default.txt
|
||||
backend/requirements/dev.txt
|
||||
backend/requirements/model_server.txt
|
||||
- run: |
|
||||
python -m pip install --upgrade pip
|
||||
pip install --retries 5 --timeout 30 -r backend/requirements/default.txt
|
||||
pip install --retries 5 --timeout 30 -r backend/requirements/dev.txt
|
||||
pip install --retries 5 --timeout 30 -r backend/requirements/model_server.txt
|
||||
aws-access-key-id: ${{ env.AWS_ACCESS_KEY_ID }}
|
||||
aws-secret-access-key: ${{ env.AWS_SECRET_ACCESS_KEY }}
|
||||
aws-region: ${{ env.AWS_REGION }}
|
||||
|
||||
- name: Login to Amazon ECR
|
||||
id: login-ecr
|
||||
uses: aws-actions/amazon-ecr-login@v2
|
||||
|
||||
# needed for pulling Vespa, Redis, Postgres, and Minio images
|
||||
# otherwise, we hit the "Unauthenticated users" limit
|
||||
# https://docs.docker.com/docker-hub/usage/
|
||||
- name: Login to Docker Hub
|
||||
uses: docker/login-action@v3
|
||||
with:
|
||||
username: ${{ secrets.DOCKER_USERNAME }}
|
||||
password: ${{ secrets.DOCKER_TOKEN }}
|
||||
|
||||
- name: Pull Docker images
|
||||
run: |
|
||||
# Pull all images from ECR in parallel
|
||||
echo "Pulling Docker images in parallel..."
|
||||
(docker pull ${{ env.ECR_REGISTRY }}/integration-test-onyx-web-server:playwright-test-${{ github.run_id }}) &
|
||||
(docker pull ${{ env.ECR_REGISTRY }}/integration-test-onyx-backend:playwright-test-${{ github.run_id }}) &
|
||||
(docker pull ${{ env.ECR_REGISTRY }}/integration-test-onyx-model-server:playwright-test-${{ github.run_id }}) &
|
||||
|
||||
# Wait for all background jobs to complete
|
||||
wait
|
||||
echo "All Docker images pulled successfully"
|
||||
|
||||
# Re-tag with expected names for docker-compose
|
||||
docker tag ${{ env.ECR_REGISTRY }}/integration-test-onyx-web-server:playwright-test-${{ github.run_id }} onyxdotapp/onyx-web-server:test
|
||||
docker tag ${{ env.ECR_REGISTRY }}/integration-test-onyx-backend:playwright-test-${{ github.run_id }} onyxdotapp/onyx-backend:test
|
||||
docker tag ${{ env.ECR_REGISTRY }}/integration-test-onyx-model-server:playwright-test-${{ github.run_id }} onyxdotapp/onyx-model-server:test
|
||||
|
||||
- name: Setup node
|
||||
uses: actions/setup-node@v4
|
||||
@@ -57,79 +185,29 @@ jobs:
|
||||
working-directory: ./web
|
||||
run: npx playwright install --with-deps
|
||||
|
||||
- name: Set up Docker Buildx
|
||||
uses: docker/setup-buildx-action@v3
|
||||
|
||||
- name: Login to Docker Hub
|
||||
uses: docker/login-action@v3
|
||||
with:
|
||||
username: ${{ secrets.DOCKER_USERNAME }}
|
||||
password: ${{ secrets.DOCKER_TOKEN }}
|
||||
|
||||
# tag every docker image with "test" so that we can spin up the correct set
|
||||
# of images during testing
|
||||
|
||||
# we use the runs-on cache for docker builds
|
||||
# in conjunction with runs-on runners, it has better speed and unlimited caching
|
||||
# https://runs-on.com/caching/s3-cache-for-github-actions/
|
||||
# https://runs-on.com/caching/docker/
|
||||
# https://github.com/moby/buildkit#s3-cache-experimental
|
||||
|
||||
# images are built and run locally for testing purposes. Not pushed.
|
||||
|
||||
- name: Build Web Docker image
|
||||
uses: ./.github/actions/custom-build-and-push
|
||||
with:
|
||||
context: ./web
|
||||
file: ./web/Dockerfile
|
||||
platforms: linux/amd64
|
||||
tags: onyxdotapp/onyx-web-server:test
|
||||
push: false
|
||||
load: true
|
||||
cache-from: type=s3,prefix=cache/${{ github.repository }}/integration-tests/web-server/,region=${{ env.RUNS_ON_AWS_REGION }},bucket=${{ env.RUNS_ON_S3_BUCKET_CACHE }}
|
||||
cache-to: type=s3,prefix=cache/${{ github.repository }}/integration-tests/web-server/,region=${{ env.RUNS_ON_AWS_REGION }},bucket=${{ env.RUNS_ON_S3_BUCKET_CACHE }},mode=max
|
||||
|
||||
- name: Build Backend Docker image
|
||||
uses: ./.github/actions/custom-build-and-push
|
||||
with:
|
||||
context: ./backend
|
||||
file: ./backend/Dockerfile
|
||||
platforms: linux/amd64
|
||||
tags: onyxdotapp/onyx-backend:test
|
||||
push: false
|
||||
load: true
|
||||
cache-from: type=s3,prefix=cache/${{ github.repository }}/integration-tests/backend/,region=${{ env.RUNS_ON_AWS_REGION }},bucket=${{ env.RUNS_ON_S3_BUCKET_CACHE }}
|
||||
cache-to: type=s3,prefix=cache/${{ github.repository }}/integration-tests/backend/,region=${{ env.RUNS_ON_AWS_REGION }},bucket=${{ env.RUNS_ON_S3_BUCKET_CACHE }},mode=max
|
||||
|
||||
- name: Build Model Server Docker image
|
||||
uses: ./.github/actions/custom-build-and-push
|
||||
with:
|
||||
context: ./backend
|
||||
file: ./backend/Dockerfile.model_server
|
||||
platforms: linux/amd64
|
||||
tags: onyxdotapp/onyx-model-server:test
|
||||
push: false
|
||||
load: true
|
||||
cache-from: type=s3,prefix=cache/${{ github.repository }}/integration-tests/model-server/,region=${{ env.RUNS_ON_AWS_REGION }},bucket=${{ env.RUNS_ON_S3_BUCKET_CACHE }}
|
||||
cache-to: type=s3,prefix=cache/${{ github.repository }}/integration-tests/model-server/,region=${{ env.RUNS_ON_AWS_REGION }},bucket=${{ env.RUNS_ON_S3_BUCKET_CACHE }},mode=max
|
||||
- name: Create .env file for Docker Compose
|
||||
run: |
|
||||
cat <<EOF > deployment/docker_compose/.env
|
||||
ENABLE_PAID_ENTERPRISE_EDITION_FEATURES=true
|
||||
AUTH_TYPE=basic
|
||||
GEN_AI_API_KEY=${{ env.OPENAI_API_KEY }}
|
||||
EXA_API_KEY=${{ env.EXA_API_KEY }}
|
||||
REQUIRE_EMAIL_VERIFICATION=false
|
||||
DISABLE_TELEMETRY=true
|
||||
IMAGE_TAG=test
|
||||
EOF
|
||||
|
||||
- name: Start Docker containers
|
||||
run: |
|
||||
cd deployment/docker_compose
|
||||
ENABLE_PAID_ENTERPRISE_EDITION_FEATURES=true \
|
||||
AUTH_TYPE=basic \
|
||||
GEN_AI_API_KEY=${{ secrets.OPENAI_API_KEY }} \
|
||||
REQUIRE_EMAIL_VERIFICATION=false \
|
||||
DISABLE_TELEMETRY=true \
|
||||
IMAGE_TAG=test \
|
||||
docker compose -f docker-compose.dev.yml -p danswer-stack up -d
|
||||
docker compose -f docker-compose.yml -f docker-compose.dev.yml up -d
|
||||
id: start_docker
|
||||
|
||||
- name: Wait for service to be ready
|
||||
run: |
|
||||
echo "Starting wait-for-service script..."
|
||||
|
||||
docker logs -f danswer-stack-api_server-1 &
|
||||
docker logs -f onyx-api_server-1 &
|
||||
|
||||
start_time=$(date +%s)
|
||||
timeout=300 # 5 minutes in seconds
|
||||
@@ -137,15 +215,15 @@ jobs:
|
||||
while true; do
|
||||
current_time=$(date +%s)
|
||||
elapsed_time=$((current_time - start_time))
|
||||
|
||||
|
||||
if [ $elapsed_time -ge $timeout ]; then
|
||||
echo "Timeout reached. Service did not become ready in 5 minutes."
|
||||
exit 1
|
||||
fi
|
||||
|
||||
|
||||
# Use curl with error handling to ignore specific exit code 56
|
||||
response=$(curl -s -o /dev/null -w "%{http_code}" http://localhost:8080/health || echo "curl_error")
|
||||
|
||||
|
||||
if [ "$response" = "200" ]; then
|
||||
echo "Service is ready!"
|
||||
break
|
||||
@@ -154,28 +232,24 @@ jobs:
|
||||
else
|
||||
echo "Service not ready yet (HTTP status $response). Retrying in 5 seconds..."
|
||||
fi
|
||||
|
||||
|
||||
sleep 5
|
||||
done
|
||||
echo "Finished waiting for service."
|
||||
|
||||
- name: Run pytest playwright test init
|
||||
working-directory: ./backend
|
||||
env:
|
||||
PYTEST_IGNORE_SKIP: true
|
||||
run: pytest -s tests/integration/tests/playwright/test_playwright.py
|
||||
|
||||
- name: Run Playwright tests
|
||||
working-directory: ./web
|
||||
run: npx playwright test
|
||||
run: |
|
||||
# Create test-results directory to ensure it exists for artifact upload
|
||||
mkdir -p test-results
|
||||
npx playwright test
|
||||
|
||||
- uses: actions/upload-artifact@v4
|
||||
if: always()
|
||||
with:
|
||||
# Chromatic automatically defaults to the test-results directory.
|
||||
# Replace with the path to your custom directory and adjust the CHROMATIC_ARCHIVE_LOCATION environment variable accordingly.
|
||||
name: test-results
|
||||
path: ./web/test-results
|
||||
# Includes test results and trace.zip files
|
||||
name: playwright-test-results-${{ github.run_id }}
|
||||
path: ./web/test-results/
|
||||
retention-days: 30
|
||||
|
||||
# save before stopping the containers so the logs can be captured
|
||||
@@ -183,7 +257,7 @@ jobs:
|
||||
if: success() || failure()
|
||||
run: |
|
||||
cd deployment/docker_compose
|
||||
docker compose -f docker-compose.dev.yml -p danswer-stack logs > docker-compose.log
|
||||
docker compose logs > docker-compose.log
|
||||
mv docker-compose.log ${{ github.workspace }}/docker-compose.log
|
||||
|
||||
- name: Upload logs
|
||||
@@ -196,7 +270,7 @@ jobs:
|
||||
- name: Stop Docker containers
|
||||
run: |
|
||||
cd deployment/docker_compose
|
||||
docker compose -f docker-compose.dev.yml -p danswer-stack down -v
|
||||
docker compose down -v
|
||||
|
||||
# NOTE: Chromatic UI diff testing is currently disabled.
|
||||
# We are using Playwright for local and CI testing without visual regression checks.
|
||||
|
||||
27
.github/workflows/pr-python-checks.yml
vendored
27
.github/workflows/pr-python-checks.yml
vendored
@@ -31,20 +31,35 @@ jobs:
|
||||
pip install --retries 5 --timeout 30 -r backend/requirements/dev.txt
|
||||
pip install --retries 5 --timeout 30 -r backend/requirements/model_server.txt
|
||||
|
||||
- name: Generate OpenAPI schema
|
||||
working-directory: ./backend
|
||||
env:
|
||||
PYTHONPATH: "."
|
||||
run: |
|
||||
python scripts/onyx_openapi_schema.py --filename generated/openapi.json
|
||||
|
||||
- name: Generate OpenAPI Python client
|
||||
working-directory: ./backend
|
||||
run: |
|
||||
docker run --rm \
|
||||
-v "${{ github.workspace }}/backend/generated:/local" \
|
||||
openapitools/openapi-generator-cli generate \
|
||||
-i /local/openapi.json \
|
||||
-g python \
|
||||
-o /local/onyx_openapi_client \
|
||||
--package-name onyx_openapi_client \
|
||||
--skip-validate-spec \
|
||||
--openapi-normalizer "SIMPLIFY_ONEOF_ANYOF=true,SET_OAS3_NULLABLE=true"
|
||||
|
||||
- name: Run MyPy
|
||||
run: |
|
||||
cd backend
|
||||
mypy .
|
||||
|
||||
- name: Run ruff
|
||||
run: |
|
||||
cd backend
|
||||
ruff .
|
||||
|
||||
- name: Check import order with reorder-python-imports
|
||||
run: |
|
||||
cd backend
|
||||
find ./danswer -name "*.py" | xargs reorder-python-imports --py311-plus
|
||||
find ./onyx -name "*.py" | xargs reorder-python-imports --py311-plus
|
||||
|
||||
- name: Check code formatting with Black
|
||||
run: |
|
||||
|
||||
149
.github/workflows/pr-python-connector-tests.yml
vendored
149
.github/workflows/pr-python-connector-tests.yml
vendored
@@ -12,54 +12,110 @@ env:
|
||||
# AWS
|
||||
AWS_ACCESS_KEY_ID_DAILY_CONNECTOR_TESTS: ${{ secrets.AWS_ACCESS_KEY_ID_DAILY_CONNECTOR_TESTS }}
|
||||
AWS_SECRET_ACCESS_KEY_DAILY_CONNECTOR_TESTS: ${{ secrets.AWS_SECRET_ACCESS_KEY_DAILY_CONNECTOR_TESTS }}
|
||||
|
||||
|
||||
# Cloudflare R2
|
||||
R2_ACCOUNT_ID_DAILY_CONNECTOR_TESTS: ${{ vars.R2_ACCOUNT_ID_DAILY_CONNECTOR_TESTS }}
|
||||
R2_ACCESS_KEY_ID_DAILY_CONNECTOR_TESTS: ${{ secrets.R2_ACCESS_KEY_ID_DAILY_CONNECTOR_TESTS }}
|
||||
R2_SECRET_ACCESS_KEY_DAILY_CONNECTOR_TESTS: ${{ secrets.R2_SECRET_ACCESS_KEY_DAILY_CONNECTOR_TESTS }}
|
||||
|
||||
# Google Cloud Storage
|
||||
GCS_ACCESS_KEY_ID_DAILY_CONNECTOR_TESTS: ${{ secrets.GCS_ACCESS_KEY_ID_DAILY_CONNECTOR_TESTS }}
|
||||
GCS_SECRET_ACCESS_KEY_DAILY_CONNECTOR_TESTS: ${{ secrets.GCS_SECRET_ACCESS_KEY_DAILY_CONNECTOR_TESTS }}
|
||||
|
||||
# Confluence
|
||||
CONFLUENCE_TEST_SPACE_URL: ${{ secrets.CONFLUENCE_TEST_SPACE_URL }}
|
||||
CONFLUENCE_TEST_SPACE: ${{ secrets.CONFLUENCE_TEST_SPACE }}
|
||||
CONFLUENCE_IS_CLOUD: ${{ secrets.CONFLUENCE_IS_CLOUD }}
|
||||
CONFLUENCE_TEST_SPACE_URL: ${{ vars.CONFLUENCE_TEST_SPACE_URL }}
|
||||
CONFLUENCE_TEST_SPACE: ${{ vars.CONFLUENCE_TEST_SPACE }}
|
||||
CONFLUENCE_TEST_PAGE_ID: ${{ secrets.CONFLUENCE_TEST_PAGE_ID }}
|
||||
CONFLUENCE_USER_NAME: ${{ secrets.CONFLUENCE_USER_NAME }}
|
||||
CONFLUENCE_USER_NAME: ${{ vars.CONFLUENCE_USER_NAME }}
|
||||
CONFLUENCE_ACCESS_TOKEN: ${{ secrets.CONFLUENCE_ACCESS_TOKEN }}
|
||||
CONFLUENCE_ACCESS_TOKEN_SCOPED: ${{ secrets.CONFLUENCE_ACCESS_TOKEN_SCOPED }}
|
||||
|
||||
# Jira
|
||||
JIRA_BASE_URL: ${{ secrets.JIRA_BASE_URL }}
|
||||
JIRA_USER_EMAIL: ${{ secrets.JIRA_USER_EMAIL }}
|
||||
JIRA_API_TOKEN: ${{ secrets.JIRA_API_TOKEN }}
|
||||
JIRA_API_TOKEN_SCOPED: ${{ secrets.JIRA_API_TOKEN_SCOPED }}
|
||||
|
||||
# Gong
|
||||
GONG_ACCESS_KEY: ${{ secrets.GONG_ACCESS_KEY }}
|
||||
GONG_ACCESS_KEY_SECRET: ${{ secrets.GONG_ACCESS_KEY_SECRET }}
|
||||
|
||||
# Google
|
||||
GOOGLE_DRIVE_SERVICE_ACCOUNT_JSON_STR: ${{ secrets.GOOGLE_DRIVE_SERVICE_ACCOUNT_JSON_STR }}
|
||||
GOOGLE_DRIVE_OAUTH_CREDENTIALS_JSON_STR_TEST_USER_1: ${{ secrets.GOOGLE_DRIVE_OAUTH_CREDENTIALS_JSON_STR_TEST_USER_1 }}
|
||||
GOOGLE_DRIVE_OAUTH_CREDENTIALS_JSON_STR: ${{ secrets.GOOGLE_DRIVE_OAUTH_CREDENTIALS_JSON_STR }}
|
||||
GOOGLE_GMAIL_SERVICE_ACCOUNT_JSON_STR: ${{ secrets.GOOGLE_GMAIL_SERVICE_ACCOUNT_JSON_STR }}
|
||||
GOOGLE_GMAIL_OAUTH_CREDENTIALS_JSON_STR: ${{ secrets.GOOGLE_GMAIL_OAUTH_CREDENTIALS_JSON_STR }}
|
||||
|
||||
# Slab
|
||||
SLAB_BOT_TOKEN: ${{ secrets.SLAB_BOT_TOKEN }}
|
||||
|
||||
# Zendesk
|
||||
ZENDESK_SUBDOMAIN: ${{ secrets.ZENDESK_SUBDOMAIN }}
|
||||
ZENDESK_EMAIL: ${{ secrets.ZENDESK_EMAIL }}
|
||||
ZENDESK_TOKEN: ${{ secrets.ZENDESK_TOKEN }}
|
||||
|
||||
# Salesforce
|
||||
SF_USERNAME: ${{ secrets.SF_USERNAME }}
|
||||
SF_PASSWORD: ${{ secrets.SF_PASSWORD }}
|
||||
SF_SECURITY_TOKEN: ${{ secrets.SF_SECURITY_TOKEN }}
|
||||
|
||||
# Hubspot
|
||||
HUBSPOT_ACCESS_TOKEN: ${{ secrets.HUBSPOT_ACCESS_TOKEN }}
|
||||
|
||||
# IMAP
|
||||
IMAP_HOST: ${{ vars.IMAP_HOST }}
|
||||
IMAP_USERNAME: ${{ vars.IMAP_USERNAME }}
|
||||
IMAP_PASSWORD: ${{ secrets.IMAP_PASSWORD }}
|
||||
IMAP_MAILBOXES: ${{ vars.IMAP_MAILBOXES }}
|
||||
|
||||
# Airtable
|
||||
AIRTABLE_TEST_BASE_ID: ${{ secrets.AIRTABLE_TEST_BASE_ID }}
|
||||
AIRTABLE_TEST_TABLE_ID: ${{ secrets.AIRTABLE_TEST_TABLE_ID }}
|
||||
AIRTABLE_TEST_TABLE_NAME: ${{ secrets.AIRTABLE_TEST_TABLE_NAME }}
|
||||
AIRTABLE_TEST_BASE_ID: ${{ vars.AIRTABLE_TEST_BASE_ID }}
|
||||
AIRTABLE_TEST_TABLE_ID: ${{ vars.AIRTABLE_TEST_TABLE_ID }}
|
||||
AIRTABLE_TEST_TABLE_NAME: ${{ vars.AIRTABLE_TEST_TABLE_NAME }}
|
||||
AIRTABLE_ACCESS_TOKEN: ${{ secrets.AIRTABLE_ACCESS_TOKEN }}
|
||||
|
||||
# Sharepoint
|
||||
SHAREPOINT_CLIENT_ID: ${{ secrets.SHAREPOINT_CLIENT_ID }}
|
||||
SHAREPOINT_CLIENT_ID: ${{ vars.SHAREPOINT_CLIENT_ID }}
|
||||
SHAREPOINT_CLIENT_SECRET: ${{ secrets.SHAREPOINT_CLIENT_SECRET }}
|
||||
SHAREPOINT_CLIENT_DIRECTORY_ID: ${{ secrets.SHAREPOINT_CLIENT_DIRECTORY_ID }}
|
||||
SHAREPOINT_SITE: ${{ secrets.SHAREPOINT_SITE }}
|
||||
SHAREPOINT_CLIENT_DIRECTORY_ID: ${{ vars.SHAREPOINT_CLIENT_DIRECTORY_ID }}
|
||||
SHAREPOINT_SITE: ${{ vars.SHAREPOINT_SITE }}
|
||||
|
||||
# Github
|
||||
ACCESS_TOKEN_GITHUB: ${{ secrets.ACCESS_TOKEN_GITHUB }}
|
||||
|
||||
# Gitlab
|
||||
GITLAB_ACCESS_TOKEN: ${{ secrets.GITLAB_ACCESS_TOKEN }}
|
||||
|
||||
# Gitbook
|
||||
GITBOOK_SPACE_ID: ${{ secrets.GITBOOK_SPACE_ID }}
|
||||
GITBOOK_API_KEY: ${{ secrets.GITBOOK_API_KEY }}
|
||||
|
||||
# Notion
|
||||
NOTION_INTEGRATION_TOKEN: ${{ secrets.NOTION_INTEGRATION_TOKEN }}
|
||||
|
||||
# Highspot
|
||||
HIGHSPOT_KEY: ${{ secrets.HIGHSPOT_KEY }}
|
||||
HIGHSPOT_SECRET: ${{ secrets.HIGHSPOT_SECRET }}
|
||||
|
||||
# Slack
|
||||
SLACK_BOT_TOKEN: ${{ secrets.SLACK_BOT_TOKEN }}
|
||||
|
||||
# Teams
|
||||
TEAMS_APPLICATION_ID: ${{ secrets.TEAMS_APPLICATION_ID }}
|
||||
TEAMS_DIRECTORY_ID: ${{ secrets.TEAMS_DIRECTORY_ID }}
|
||||
TEAMS_SECRET: ${{ secrets.TEAMS_SECRET }}
|
||||
|
||||
# Bitbucket
|
||||
BITBUCKET_WORKSPACE: ${{ secrets.BITBUCKET_WORKSPACE }}
|
||||
BITBUCKET_REPOSITORIES: ${{ secrets.BITBUCKET_REPOSITORIES }}
|
||||
BITBUCKET_PROJECTS: ${{ secrets.BITBUCKET_PROJECTS }}
|
||||
BITBUCKET_EMAIL: ${{ vars.BITBUCKET_EMAIL }}
|
||||
BITBUCKET_API_TOKEN: ${{ secrets.BITBUCKET_API_TOKEN }}
|
||||
|
||||
# Fireflies
|
||||
FIREFLIES_API_KEY: ${{ secrets.FIREFLIES_API_KEY }}
|
||||
|
||||
jobs:
|
||||
connectors-check:
|
||||
# See https://runs-on.com/runners/linux/
|
||||
@@ -89,9 +145,76 @@ jobs:
|
||||
playwright install chromium
|
||||
playwright install-deps chromium
|
||||
|
||||
- name: Run Tests
|
||||
- name: Detect Connector changes
|
||||
id: changes
|
||||
uses: dorny/paths-filter@v3
|
||||
with:
|
||||
filters: |
|
||||
hubspot:
|
||||
- 'backend/onyx/connectors/hubspot/**'
|
||||
- 'backend/tests/daily/connectors/hubspot/**'
|
||||
salesforce:
|
||||
- 'backend/onyx/connectors/salesforce/**'
|
||||
- 'backend/tests/daily/connectors/salesforce/**'
|
||||
github:
|
||||
- 'backend/onyx/connectors/github/**'
|
||||
- 'backend/tests/daily/connectors/github/**'
|
||||
file_processing:
|
||||
- 'backend/onyx/file_processing/**'
|
||||
|
||||
- name: Run Tests (excluding HubSpot, Salesforce, and GitHub)
|
||||
shell: script -q -e -c "bash --noprofile --norc -eo pipefail {0}"
|
||||
run: py.test -o junit_family=xunit2 -xv --ff backend/tests/daily/connectors
|
||||
run: |
|
||||
py.test \
|
||||
-n 8 \
|
||||
--dist loadfile \
|
||||
--durations=8 \
|
||||
-o junit_family=xunit2 \
|
||||
-xv \
|
||||
--ff \
|
||||
backend/tests/daily/connectors \
|
||||
--ignore backend/tests/daily/connectors/hubspot \
|
||||
--ignore backend/tests/daily/connectors/salesforce \
|
||||
--ignore backend/tests/daily/connectors/github
|
||||
|
||||
- name: Run HubSpot Connector Tests
|
||||
if: ${{ github.event_name == 'schedule' || steps.changes.outputs.hubspot == 'true' || steps.changes.outputs.file_processing == 'true' }}
|
||||
shell: script -q -e -c "bash --noprofile --norc -eo pipefail {0}"
|
||||
run: |
|
||||
py.test \
|
||||
-n 8 \
|
||||
--dist loadfile \
|
||||
--durations=8 \
|
||||
-o junit_family=xunit2 \
|
||||
-xv \
|
||||
--ff \
|
||||
backend/tests/daily/connectors/hubspot
|
||||
|
||||
- name: Run Salesforce Connector Tests
|
||||
if: ${{ github.event_name == 'schedule' || steps.changes.outputs.salesforce == 'true' || steps.changes.outputs.file_processing == 'true' }}
|
||||
shell: script -q -e -c "bash --noprofile --norc -eo pipefail {0}"
|
||||
run: |
|
||||
py.test \
|
||||
-n 8 \
|
||||
--dist loadfile \
|
||||
--durations=8 \
|
||||
-o junit_family=xunit2 \
|
||||
-xv \
|
||||
--ff \
|
||||
backend/tests/daily/connectors/salesforce
|
||||
|
||||
- name: Run GitHub Connector Tests
|
||||
if: ${{ github.event_name == 'schedule' || steps.changes.outputs.github == 'true' || steps.changes.outputs.file_processing == 'true' }}
|
||||
shell: script -q -e -c "bash --noprofile --norc -eo pipefail {0}"
|
||||
run: |
|
||||
py.test \
|
||||
-n 8 \
|
||||
--dist loadfile \
|
||||
--durations=8 \
|
||||
-o junit_family=xunit2 \
|
||||
-xv \
|
||||
--ff \
|
||||
backend/tests/daily/connectors/github
|
||||
|
||||
- name: Alert on Failure
|
||||
if: failure() && github.event_name == 'schedule'
|
||||
|
||||
10
.github/workflows/pr-python-model-tests.yml
vendored
10
.github/workflows/pr-python-model-tests.yml
vendored
@@ -15,7 +15,7 @@ env:
|
||||
# Bedrock
|
||||
AWS_ACCESS_KEY_ID: ${{ secrets.AWS_ACCESS_KEY_ID }}
|
||||
AWS_SECRET_ACCESS_KEY: ${{ secrets.AWS_SECRET_ACCESS_KEY }}
|
||||
AWS_REGION_NAME: ${{ secrets.AWS_REGION_NAME }}
|
||||
AWS_REGION_NAME: ${{ vars.AWS_REGION_NAME }}
|
||||
|
||||
# API keys for testing
|
||||
COHERE_API_KEY: ${{ secrets.COHERE_API_KEY }}
|
||||
@@ -23,7 +23,7 @@ env:
|
||||
LITELLM_API_URL: ${{ secrets.LITELLM_API_URL }}
|
||||
OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
|
||||
AZURE_API_KEY: ${{ secrets.AZURE_API_KEY }}
|
||||
AZURE_API_URL: ${{ secrets.AZURE_API_URL }}
|
||||
AZURE_API_URL: ${{ vars.AZURE_API_URL }}
|
||||
|
||||
jobs:
|
||||
model-check:
|
||||
@@ -77,7 +77,7 @@ jobs:
|
||||
REQUIRE_EMAIL_VERIFICATION=false \
|
||||
DISABLE_TELEMETRY=true \
|
||||
IMAGE_TAG=test \
|
||||
docker compose -f docker-compose.model-server-test.yml -p onyx-stack up -d indexing_model_server
|
||||
docker compose -f docker-compose.model-server-test.yml up -d indexing_model_server
|
||||
id: start_docker
|
||||
|
||||
- name: Wait for service to be ready
|
||||
@@ -132,7 +132,7 @@ jobs:
|
||||
if: always()
|
||||
run: |
|
||||
cd deployment/docker_compose
|
||||
docker compose -f docker-compose.model-server-test.yml -p onyx-stack logs --no-color > $GITHUB_WORKSPACE/docker-compose.log || true
|
||||
docker compose -f docker-compose.model-server-test.yml logs --no-color > $GITHUB_WORKSPACE/docker-compose.log || true
|
||||
|
||||
- name: Upload logs
|
||||
if: always()
|
||||
@@ -145,5 +145,5 @@ jobs:
|
||||
if: always()
|
||||
run: |
|
||||
cd deployment/docker_compose
|
||||
docker compose -f docker-compose.model-server-test.yml -p onyx-stack down -v
|
||||
docker compose -f docker-compose.model-server-test.yml down -v
|
||||
|
||||
|
||||
5
.github/workflows/pr-python-tests.yml
vendored
5
.github/workflows/pr-python-tests.yml
vendored
@@ -15,6 +15,9 @@ jobs:
|
||||
env:
|
||||
PYTHONPATH: ./backend
|
||||
REDIS_CLOUD_PYTEST_PASSWORD: ${{ secrets.REDIS_CLOUD_PYTEST_PASSWORD }}
|
||||
SF_USERNAME: ${{ secrets.SF_USERNAME }}
|
||||
SF_PASSWORD: ${{ secrets.SF_PASSWORD }}
|
||||
SF_SECURITY_TOKEN: ${{ secrets.SF_SECURITY_TOKEN }}
|
||||
|
||||
steps:
|
||||
- name: Checkout code
|
||||
@@ -28,12 +31,14 @@ jobs:
|
||||
cache-dependency-path: |
|
||||
backend/requirements/default.txt
|
||||
backend/requirements/dev.txt
|
||||
backend/requirements/model_server.txt
|
||||
|
||||
- name: Install Dependencies
|
||||
run: |
|
||||
python -m pip install --upgrade pip
|
||||
pip install --retries 5 --timeout 30 -r backend/requirements/default.txt
|
||||
pip install --retries 5 --timeout 30 -r backend/requirements/dev.txt
|
||||
pip install --retries 5 --timeout 30 -r backend/requirements/model_server.txt
|
||||
|
||||
- name: Run Tests
|
||||
shell: script -q -e -c "bash --noprofile --norc -eo pipefail {0}"
|
||||
|
||||
47
.github/workflows/sync_foss.yml
vendored
Normal file
47
.github/workflows/sync_foss.yml
vendored
Normal file
@@ -0,0 +1,47 @@
|
||||
name: Sync FOSS Repo
|
||||
|
||||
on:
|
||||
schedule:
|
||||
# Run daily at 3am PT (11am UTC during PST)
|
||||
- cron: '0 11 * * *'
|
||||
workflow_dispatch:
|
||||
|
||||
jobs:
|
||||
sync-foss:
|
||||
runs-on: ubuntu-latest
|
||||
permissions:
|
||||
contents: read
|
||||
steps:
|
||||
- name: Checkout main Onyx repo
|
||||
uses: actions/checkout@v4
|
||||
with:
|
||||
fetch-depth: 0
|
||||
|
||||
- name: Install git-filter-repo
|
||||
run: |
|
||||
sudo apt-get update && sudo apt-get install -y git-filter-repo
|
||||
|
||||
- name: Configure SSH for deploy key
|
||||
env:
|
||||
FOSS_REPO_DEPLOY_KEY: ${{ secrets.FOSS_REPO_DEPLOY_KEY }}
|
||||
run: |
|
||||
mkdir -p ~/.ssh
|
||||
echo "$FOSS_REPO_DEPLOY_KEY" > ~/.ssh/id_ed25519
|
||||
chmod 600 ~/.ssh/id_ed25519
|
||||
ssh-keyscan github.com >> ~/.ssh/known_hosts
|
||||
|
||||
- name: Set Git config
|
||||
run: |
|
||||
git config --global user.name "onyx-bot"
|
||||
git config --global user.email "bot@onyx.app"
|
||||
|
||||
- name: Build FOSS version
|
||||
run: bash backend/scripts/make_foss_repo.sh
|
||||
|
||||
- name: Push to FOSS repo
|
||||
env:
|
||||
FOSS_REPO_URL: git@github.com:onyx-dot-app/onyx-foss.git
|
||||
run: |
|
||||
cd /tmp/foss_repo
|
||||
git remote add public "$FOSS_REPO_URL"
|
||||
git push --force public main
|
||||
3
.github/workflows/tag-nightly.yml
vendored
3
.github/workflows/tag-nightly.yml
vendored
@@ -15,6 +15,9 @@ jobs:
|
||||
# actions using GITHUB_TOKEN cannot trigger another workflow, but we do want this to trigger docker pushes
|
||||
# see https://github.com/orgs/community/discussions/27028#discussioncomment-3254367 for the workaround we
|
||||
# implement here which needs an actual user's deploy key
|
||||
|
||||
# Additional NOTE: even though this is named "rkuo", the actual key is tied to the onyx repo
|
||||
# and not rkuo's personal account. It is fine to leave this key as is!
|
||||
- name: Checkout code
|
||||
uses: actions/checkout@v4
|
||||
with:
|
||||
|
||||
45
.gitignore
vendored
45
.gitignore
vendored
@@ -1,12 +1,47 @@
|
||||
.env
|
||||
# editors
|
||||
.vscode
|
||||
.zed
|
||||
|
||||
# macos
|
||||
.DS_store
|
||||
|
||||
# python
|
||||
.venv
|
||||
.mypy_cache
|
||||
.idea
|
||||
/deployment/data/nginx/app.conf
|
||||
.vscode/
|
||||
*.sw?
|
||||
/backend/tests/regression/answer_quality/search_test_config.yaml
|
||||
|
||||
# testing
|
||||
/web/test-results/
|
||||
backend/onyx/agent_search/main/test_data.json
|
||||
backend/tests/regression/answer_quality/test_data.json
|
||||
backend/tests/regression/search_quality/eval-*
|
||||
backend/tests/regression/search_quality/search_eval_config.yaml
|
||||
backend/tests/regression/search_quality/*.json
|
||||
backend/onyx/evals/data/
|
||||
*.log
|
||||
|
||||
# secret files
|
||||
.env
|
||||
jira_test_env
|
||||
settings.json
|
||||
|
||||
# others
|
||||
/deployment/data/nginx/app.conf
|
||||
*.sw?
|
||||
/backend/tests/regression/answer_quality/search_test_config.yaml
|
||||
*.egg-info
|
||||
|
||||
# Claude
|
||||
AGENTS.md
|
||||
CLAUDE.md
|
||||
|
||||
|
||||
# Local .terraform directories
|
||||
**/.terraform/*
|
||||
|
||||
# Local .tfstate files
|
||||
*.tfstate
|
||||
*.tfstate.*
|
||||
|
||||
# Local .terraform.lock.hcl file
|
||||
.terraform.lock.hcl
|
||||
|
||||
8
.mcp.json.template
Normal file
8
.mcp.json.template
Normal file
@@ -0,0 +1,8 @@
|
||||
{
|
||||
"mcpServers": {
|
||||
"onyx-mcp": {
|
||||
"type": "http",
|
||||
"url": "http://localhost:8000/mcp"
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -1,12 +1,13 @@
|
||||
repos:
|
||||
- repo: https://github.com/psf/black
|
||||
rev: 23.3.0
|
||||
rev: 25.1.0
|
||||
hooks:
|
||||
- id: black
|
||||
language_version: python3.11
|
||||
|
||||
- repo: https://github.com/asottile/reorder_python_imports
|
||||
rev: v3.9.0
|
||||
# this is a fork which keeps compatibility with black
|
||||
- repo: https://github.com/wimglenn/reorder-python-imports-black
|
||||
rev: v3.14.0
|
||||
hooks:
|
||||
- id: reorder-python-imports
|
||||
args: ['--py311-plus', '--application-directories=backend/']
|
||||
@@ -18,23 +19,50 @@ repos:
|
||||
# These settings will remove unused imports with side effects
|
||||
# Note: The repo currently does not and should not have imports with side effects
|
||||
- repo: https://github.com/PyCQA/autoflake
|
||||
rev: v2.2.0
|
||||
rev: v2.3.1
|
||||
hooks:
|
||||
- id: autoflake
|
||||
args: [ '--remove-all-unused-imports', '--remove-unused-variables', '--in-place' , '--recursive']
|
||||
|
||||
- repo: https://github.com/astral-sh/ruff-pre-commit
|
||||
# Ruff version.
|
||||
rev: v0.0.286
|
||||
rev: v0.11.4
|
||||
hooks:
|
||||
- id: ruff
|
||||
|
||||
- repo: https://github.com/pre-commit/mirrors-prettier
|
||||
rev: v3.1.0
|
||||
hooks:
|
||||
- id: prettier
|
||||
types_or: [html, css, javascript, ts, tsx]
|
||||
additional_dependencies:
|
||||
- prettier
|
||||
language_version: system
|
||||
|
||||
- repo: https://github.com/sirwart/ripsecrets
|
||||
rev: v0.1.11
|
||||
hooks:
|
||||
- id: ripsecrets
|
||||
args:
|
||||
- --additional-pattern
|
||||
- ^sk-[A-Za-z0-9_\-]{20,}$
|
||||
|
||||
|
||||
- repo: local
|
||||
hooks:
|
||||
- id: terraform-fmt
|
||||
name: terraform fmt
|
||||
entry: terraform fmt -recursive
|
||||
language: system
|
||||
pass_filenames: false
|
||||
files: \.tf$
|
||||
- id: check-lazy-imports
|
||||
name: Check lazy imports are not directly imported
|
||||
entry: python3 backend/scripts/check_lazy_imports.py
|
||||
language: system
|
||||
files: ^backend/(?!\.venv/).*\.py$
|
||||
pass_filenames: false
|
||||
# Note: pass_filenames is false because tsc must check the entire
|
||||
# project, but the files filter ensures this only runs when relevant
|
||||
# files change. Using --incremental for faster subsequent checks.
|
||||
|
||||
# We would like to have a mypy pre-commit hook, but due to the fact that
|
||||
# pre-commit runs in it's own isolated environment, we would need to install
|
||||
|
||||
39
.vscode/env_template.txt
vendored
39
.vscode/env_template.txt
vendored
@@ -1,6 +1,6 @@
|
||||
# Copy this file to .env in the .vscode folder
|
||||
# Fill in the <REPLACE THIS> values as needed, it is recommended to set the GEN_AI_API_KEY value to avoid having to set up an LLM in the UI
|
||||
# Also check out danswer/backend/scripts/restart_containers.sh for a script to restart the containers which Danswer relies on outside of VSCode/Cursor processes
|
||||
# Also check out onyx/backend/scripts/restart_containers.sh for a script to restart the containers which Onyx relies on outside of VSCode/Cursor processes
|
||||
|
||||
# For local dev, often user Authentication is not needed
|
||||
AUTH_TYPE=disabled
|
||||
@@ -10,7 +10,7 @@ SKIP_WARM_UP=True
|
||||
|
||||
# Always keep these on for Dev
|
||||
# Logs all model prompts to stdout
|
||||
LOG_DANSWER_MODEL_INTERACTIONS=True
|
||||
LOG_ONYX_MODEL_INTERACTIONS=True
|
||||
# More verbose logging
|
||||
LOG_LEVEL=debug
|
||||
|
||||
@@ -23,6 +23,9 @@ DISABLE_LLM_DOC_RELEVANCE=False
|
||||
# Useful if you want to toggle auth on/off (google_oauth/OIDC specifically)
|
||||
OAUTH_CLIENT_ID=<REPLACE THIS>
|
||||
OAUTH_CLIENT_SECRET=<REPLACE THIS>
|
||||
OPENID_CONFIG_URL=<REPLACE THIS>
|
||||
SAML_CONF_DIR=/<ABSOLUTE PATH TO ONYX>/onyx/backend/ee/onyx/configs/saml_config
|
||||
|
||||
# Generally not useful for dev, we don't generally want to set up an SMTP server for dev
|
||||
REQUIRE_EMAIL_VERIFICATION=False
|
||||
|
||||
@@ -34,10 +37,10 @@ OPENAI_API_KEY=<REPLACE THIS>
|
||||
GEN_AI_MODEL_VERSION=gpt-4o
|
||||
FAST_GEN_AI_MODEL_VERSION=gpt-4o
|
||||
|
||||
# For Danswer Slack Bot, overrides the UI values so no need to set this up via UI every time
|
||||
# Only needed if using DanswerBot
|
||||
#DANSWER_BOT_SLACK_APP_TOKEN=<REPLACE THIS>
|
||||
#DANSWER_BOT_SLACK_BOT_TOKEN=<REPLACE THIS>
|
||||
# For Onyx Slack Bot, overrides the UI values so no need to set this up via UI every time
|
||||
# Only needed if using OnyxBot
|
||||
#ONYX_BOT_SLACK_APP_TOKEN=<REPLACE THIS>
|
||||
#ONYX_BOT_SLACK_BOT_TOKEN=<REPLACE THIS>
|
||||
|
||||
|
||||
# Python stuff
|
||||
@@ -45,8 +48,8 @@ PYTHONPATH=../backend
|
||||
PYTHONUNBUFFERED=1
|
||||
|
||||
|
||||
# Internet Search
|
||||
BING_API_KEY=<REPLACE THIS>
|
||||
# Internet Search
|
||||
EXA_API_KEY=<REPLACE THIS>
|
||||
|
||||
|
||||
# Enable the full set of Danswer Enterprise Edition features
|
||||
@@ -58,3 +61,23 @@ AGENT_RETRIEVAL_STATS=False # Note: This setting will incur substantial re-ran
|
||||
AGENT_RERANKING_STATS=True
|
||||
AGENT_MAX_QUERY_RETRIEVAL_RESULTS=20
|
||||
AGENT_RERANKING_MAX_QUERY_RETRIEVAL_RESULTS=20
|
||||
|
||||
# S3 File Store Configuration (MinIO for local development)
|
||||
S3_ENDPOINT_URL=http://localhost:9004
|
||||
S3_FILE_STORE_BUCKET_NAME=onyx-file-store-bucket
|
||||
S3_AWS_ACCESS_KEY_ID=minioadmin
|
||||
S3_AWS_SECRET_ACCESS_KEY=minioadmin
|
||||
|
||||
# Show extra/uncommon connectors
|
||||
SHOW_EXTRA_CONNECTORS=True
|
||||
|
||||
# Local langsmith tracing
|
||||
LANGSMITH_TRACING="true"
|
||||
LANGSMITH_ENDPOINT="https://api.smith.langchain.com"
|
||||
LANGSMITH_API_KEY=<REPLACE_THIS>
|
||||
LANGSMITH_PROJECT=<REPLACE_THIS>
|
||||
|
||||
# Local Confluence OAuth testing
|
||||
# OAUTH_CONFLUENCE_CLOUD_CLIENT_ID=<REPLACE_THIS>
|
||||
# OAUTH_CONFLUENCE_CLOUD_CLIENT_SECRET=<REPLACE_THIS>
|
||||
# NEXT_PUBLIC_TEST_ENV=True
|
||||
1000
.vscode/launch.template.jsonc
vendored
1000
.vscode/launch.template.jsonc
vendored
File diff suppressed because it is too large
Load Diff
101
.vscode/tasks.template.jsonc
vendored
Normal file
101
.vscode/tasks.template.jsonc
vendored
Normal file
@@ -0,0 +1,101 @@
|
||||
{
|
||||
"version": "2.0.0",
|
||||
"tasks": [
|
||||
{
|
||||
"type": "austin",
|
||||
"label": "Profile celery beat",
|
||||
"envFile": "${workspaceFolder}/.env",
|
||||
"options": {
|
||||
"cwd": "${workspaceFolder}/backend"
|
||||
},
|
||||
"command": [
|
||||
"sudo",
|
||||
"-E"
|
||||
],
|
||||
"args": [
|
||||
"celery",
|
||||
"-A",
|
||||
"onyx.background.celery.versioned_apps.beat",
|
||||
"beat",
|
||||
"--loglevel=INFO"
|
||||
]
|
||||
},
|
||||
{
|
||||
"type": "shell",
|
||||
"label": "Generate Onyx OpenAPI Python client",
|
||||
"cwd": "${workspaceFolder}/backend",
|
||||
"envFile": "${workspaceFolder}/.env",
|
||||
"options": {
|
||||
"cwd": "${workspaceFolder}/backend"
|
||||
},
|
||||
"command": [
|
||||
"openapi-generator"
|
||||
],
|
||||
"args": [
|
||||
"generate",
|
||||
"-i",
|
||||
"generated/openapi.json",
|
||||
"-g",
|
||||
"python",
|
||||
"-o",
|
||||
"generated/onyx_openapi_client",
|
||||
"--package-name",
|
||||
"onyx_openapi_client",
|
||||
]
|
||||
},
|
||||
{
|
||||
"type": "shell",
|
||||
"label": "Generate Typescript Fetch client (openapi-generator)",
|
||||
"envFile": "${workspaceFolder}/.env",
|
||||
"options": {
|
||||
"cwd": "${workspaceFolder}"
|
||||
},
|
||||
"command": [
|
||||
"openapi-generator"
|
||||
],
|
||||
"args": [
|
||||
"generate",
|
||||
"-i",
|
||||
"backend/generated/openapi.json",
|
||||
"-g",
|
||||
"typescript-fetch",
|
||||
"-o",
|
||||
"${workspaceFolder}/web/src/lib/generated/onyx_api",
|
||||
"--additional-properties=disallowAdditionalPropertiesIfNotPresent=false,legacyDiscriminatorBehavior=false,supportsES6=true",
|
||||
]
|
||||
},
|
||||
{
|
||||
"type": "shell",
|
||||
"label": "Generate TypeScript Client (openapi-ts)",
|
||||
"envFile": "${workspaceFolder}/.env",
|
||||
"options": {
|
||||
"cwd": "${workspaceFolder}/web"
|
||||
},
|
||||
"command": [
|
||||
"npx"
|
||||
],
|
||||
"args": [
|
||||
"openapi-typescript",
|
||||
"../backend/generated/openapi.json",
|
||||
"--output",
|
||||
"./src/lib/generated/onyx-schema.ts",
|
||||
]
|
||||
},
|
||||
{
|
||||
"type": "shell",
|
||||
"label": "Generate TypeScript Client (orval)",
|
||||
"envFile": "${workspaceFolder}/.env",
|
||||
"options": {
|
||||
"cwd": "${workspaceFolder}/web"
|
||||
},
|
||||
"command": [
|
||||
"npx"
|
||||
],
|
||||
"args": [
|
||||
"orval",
|
||||
"--config",
|
||||
"orval.config.js",
|
||||
]
|
||||
}
|
||||
]
|
||||
}
|
||||
327
AGENTS.md.template
Normal file
327
AGENTS.md.template
Normal file
@@ -0,0 +1,327 @@
|
||||
# AGENTS.md
|
||||
|
||||
This file provides guidance to Codex when working with code in this repository.
|
||||
|
||||
## KEY NOTES
|
||||
|
||||
- If you run into any missing python dependency errors, try running your command with `source backend/.venv/bin/activate` \
|
||||
to assume the python venv.
|
||||
- To make tests work, check the `.env` file at the root of the project to find an OpenAI key.
|
||||
- If using `playwright` to explore the frontend, you can usually log in with username `a@test.com` and password
|
||||
`a`. The app can be accessed at `http://localhost:3000`.
|
||||
- You should assume that all Onyx services are running. To verify, you can check the `backend/log` directory to
|
||||
make sure we see logs coming out from the relevant service.
|
||||
- To connect to the Postgres database, use: `docker exec -it onyx-relational_db-1 psql -U postgres -c "<SQL>"`
|
||||
- When making calls to the backend, always go through the frontend. E.g. make a call to `http://localhost:3000/api/persona` not `http://localhost:8080/api/persona`
|
||||
- Put ALL db operations under the `backend/onyx/db` / `backend/ee/onyx/db` directories. Don't run queries
|
||||
outside of those directories.
|
||||
|
||||
## Project Overview
|
||||
|
||||
**Onyx** (formerly Danswer) is an open-source Gen-AI and Enterprise Search platform that connects to company documents, apps, and people. It features a modular architecture with both Community Edition (MIT licensed) and Enterprise Edition offerings.
|
||||
|
||||
|
||||
### Background Workers (Celery)
|
||||
|
||||
Onyx uses Celery for asynchronous task processing with multiple specialized workers:
|
||||
|
||||
#### Worker Types
|
||||
|
||||
1. **Primary Worker** (`celery_app.py`)
|
||||
- Coordinates core background tasks and system-wide operations
|
||||
- Handles connector management, document sync, pruning, and periodic checks
|
||||
- Runs with 4 threads concurrency
|
||||
- Tasks: connector deletion, vespa sync, pruning, LLM model updates, user file sync
|
||||
|
||||
2. **Docfetching Worker** (`docfetching`)
|
||||
- Fetches documents from external data sources (connectors)
|
||||
- Spawns docprocessing tasks for each document batch
|
||||
- Implements watchdog monitoring for stuck connectors
|
||||
- Configurable concurrency (default from env)
|
||||
|
||||
3. **Docprocessing Worker** (`docprocessing`)
|
||||
- Processes fetched documents through the indexing pipeline:
|
||||
- Upserts documents to PostgreSQL
|
||||
- Chunks documents and adds contextual information
|
||||
- Embeds chunks via model server
|
||||
- Writes chunks to Vespa vector database
|
||||
- Updates document metadata
|
||||
- Configurable concurrency (default from env)
|
||||
|
||||
4. **Light Worker** (`light`)
|
||||
- Handles lightweight, fast operations
|
||||
- Tasks: vespa operations, document permissions sync, external group sync
|
||||
- Higher concurrency for quick tasks
|
||||
|
||||
5. **Heavy Worker** (`heavy`)
|
||||
- Handles resource-intensive operations
|
||||
- Primary task: document pruning operations
|
||||
- Runs with 4 threads concurrency
|
||||
|
||||
6. **KG Processing Worker** (`kg_processing`)
|
||||
- Handles Knowledge Graph processing and clustering
|
||||
- Builds relationships between documents
|
||||
- Runs clustering algorithms
|
||||
- Configurable concurrency
|
||||
|
||||
7. **Monitoring Worker** (`monitoring`)
|
||||
- System health monitoring and metrics collection
|
||||
- Monitors Celery queues, process memory, and system status
|
||||
- Single thread (monitoring doesn't need parallelism)
|
||||
- Cloud-specific monitoring tasks
|
||||
|
||||
8. **User File Processing Worker** (`user_file_processing`)
|
||||
- Processes user-uploaded files
|
||||
- Handles user file indexing and project synchronization
|
||||
- Configurable concurrency
|
||||
|
||||
9. **Beat Worker** (`beat`)
|
||||
- Celery's scheduler for periodic tasks
|
||||
- Uses DynamicTenantScheduler for multi-tenant support
|
||||
- Schedules tasks like:
|
||||
- Indexing checks (every 15 seconds)
|
||||
- Connector deletion checks (every 20 seconds)
|
||||
- Vespa sync checks (every 20 seconds)
|
||||
- Pruning checks (every 20 seconds)
|
||||
- KG processing (every 60 seconds)
|
||||
- Monitoring tasks (every 5 minutes)
|
||||
- Cleanup tasks (hourly)
|
||||
|
||||
#### Worker Deployment Modes
|
||||
|
||||
Onyx supports two deployment modes for background workers, controlled by the `USE_LIGHTWEIGHT_BACKGROUND_WORKER` environment variable:
|
||||
|
||||
**Lightweight Mode** (default, `USE_LIGHTWEIGHT_BACKGROUND_WORKER=true`):
|
||||
- Runs a single consolidated `background` worker that handles all background tasks:
|
||||
- Pruning operations (from `heavy` worker)
|
||||
- Knowledge graph processing (from `kg_processing` worker)
|
||||
- Monitoring tasks (from `monitoring` worker)
|
||||
- User file processing (from `user_file_processing` worker)
|
||||
- Lower resource footprint (single worker process)
|
||||
- Suitable for smaller deployments or development environments
|
||||
- Default concurrency: 6 threads
|
||||
|
||||
**Standard Mode** (`USE_LIGHTWEIGHT_BACKGROUND_WORKER=false`):
|
||||
- Runs separate specialized workers as documented above (heavy, kg_processing, monitoring, user_file_processing)
|
||||
- Better isolation and scalability
|
||||
- Can scale individual workers independently based on workload
|
||||
- Suitable for production deployments with higher load
|
||||
|
||||
The deployment mode affects:
|
||||
- **Backend**: Worker processes spawned by supervisord or dev scripts
|
||||
- **Helm**: Which Kubernetes deployments are created
|
||||
- **Dev Environment**: Which workers `dev_run_background_jobs.py` spawns
|
||||
|
||||
#### Key Features
|
||||
|
||||
- **Thread-based Workers**: All workers use thread pools (not processes) for stability
|
||||
- **Tenant Awareness**: Multi-tenant support with per-tenant task isolation. There is a
|
||||
middleware layer that automatically finds the appropriate tenant ID when sending tasks
|
||||
via Celery Beat.
|
||||
- **Task Prioritization**: High, Medium, Low priority queues
|
||||
- **Monitoring**: Built-in heartbeat and liveness checking
|
||||
- **Failure Handling**: Automatic retry and failure recovery mechanisms
|
||||
- **Redis Coordination**: Inter-process communication via Redis
|
||||
- **PostgreSQL State**: Task state and metadata stored in PostgreSQL
|
||||
|
||||
|
||||
#### Important Notes
|
||||
|
||||
**Defining Tasks**:
|
||||
- Always use `@shared_task` rather than `@celery_app`
|
||||
- Put tasks under `background/celery/tasks/` or `ee/background/celery/tasks`
|
||||
|
||||
**Defining APIs**:
|
||||
When creating new FastAPI APIs, do NOT use the `response_model` field. Instead, just type the
|
||||
function.
|
||||
|
||||
**Testing Updates**:
|
||||
If you make any updates to a celery worker and you want to test these changes, you will need
|
||||
to ask me to restart the celery worker. There is no auto-restart on code-change mechanism.
|
||||
|
||||
### Code Quality
|
||||
```bash
|
||||
# Install and run pre-commit hooks
|
||||
pre-commit install
|
||||
pre-commit run --all-files
|
||||
```
|
||||
|
||||
NOTE: Always make sure everything is strictly typed (both in Python and Typescript).
|
||||
|
||||
## Architecture Overview
|
||||
|
||||
### Technology Stack
|
||||
- **Backend**: Python 3.11, FastAPI, SQLAlchemy, Alembic, Celery
|
||||
- **Frontend**: Next.js 15+, React 18, TypeScript, Tailwind CSS
|
||||
- **Database**: PostgreSQL with Redis caching
|
||||
- **Search**: Vespa vector database
|
||||
- **Auth**: OAuth2, SAML, multi-provider support
|
||||
- **AI/ML**: LangChain, LiteLLM, multiple embedding models
|
||||
|
||||
### Directory Structure
|
||||
|
||||
```
|
||||
backend/
|
||||
├── onyx/
|
||||
│ ├── auth/ # Authentication & authorization
|
||||
│ ├── chat/ # Chat functionality & LLM interactions
|
||||
│ ├── connectors/ # Data source connectors
|
||||
│ ├── db/ # Database models & operations
|
||||
│ ├── document_index/ # Vespa integration
|
||||
│ ├── federated_connectors/ # External search connectors
|
||||
│ ├── llm/ # LLM provider integrations
|
||||
│ └── server/ # API endpoints & routers
|
||||
├── ee/ # Enterprise Edition features
|
||||
├── alembic/ # Database migrations
|
||||
└── tests/ # Test suites
|
||||
|
||||
web/
|
||||
├── src/app/ # Next.js app router pages
|
||||
├── src/components/ # Reusable React components
|
||||
└── src/lib/ # Utilities & business logic
|
||||
```
|
||||
|
||||
## Database & Migrations
|
||||
|
||||
### Running Migrations
|
||||
```bash
|
||||
# Standard migrations
|
||||
alembic upgrade head
|
||||
|
||||
# Multi-tenant (Enterprise)
|
||||
alembic -n schema_private upgrade head
|
||||
```
|
||||
|
||||
### Creating Migrations
|
||||
```bash
|
||||
# Create migration
|
||||
alembic revision -m "description"
|
||||
|
||||
# Multi-tenant migration
|
||||
alembic -n schema_private revision -m "description"
|
||||
```
|
||||
|
||||
Write the migration manually and place it in the file that alembic creates when running the above command.
|
||||
|
||||
## Testing Strategy
|
||||
|
||||
There are 4 main types of tests within Onyx:
|
||||
|
||||
### Unit Tests
|
||||
These should not assume any Onyx/external services are available to be called.
|
||||
Interactions with the outside world should be mocked using `unittest.mock`. Generally, only
|
||||
write these for complex, isolated modules e.g. `citation_processing.py`.
|
||||
|
||||
To run them:
|
||||
|
||||
```bash
|
||||
python -m dotenv -f .vscode/.env run -- pytest -xv backend/tests/unit
|
||||
```
|
||||
|
||||
### External Dependency Unit Tests
|
||||
These tests assume that all external dependencies of Onyx are available and callable (e.g. Postgres, Redis,
|
||||
MinIO/S3, Vespa are running + OpenAI can be called + any request to the internet is fine + etc.).
|
||||
|
||||
However, the actual Onyx containers are not running and with these tests we call the function to test directly.
|
||||
We can also mock components/calls at will.
|
||||
|
||||
The goal with these tests are to minimize mocking while giving some flexibility to mock things that are flakey,
|
||||
need strictly controlled behavior, or need to have their internal behavior validated (e.g. verify a function is called
|
||||
with certain args, something that would be impossible with proper integration tests).
|
||||
|
||||
A great example of this type of test is `backend/tests/external_dependency_unit/connectors/confluence/test_confluence_group_sync.py`.
|
||||
|
||||
To run them:
|
||||
|
||||
```bash
|
||||
python -m dotenv -f .vscode/.env run -- pytest backend/tests/external_dependency_unit
|
||||
```
|
||||
|
||||
### Integration Tests
|
||||
Standard integration tests. Every test in `backend/tests/integration` runs against a real Onyx deployment. We cannot
|
||||
mock anything in these tests. Prefer writing integration tests (or External Dependency Unit Tests if mocking/internal
|
||||
verification is necessary) over any other type of test.
|
||||
|
||||
Tests are parallelized at a directory level.
|
||||
|
||||
When writing integration tests, make sure to check the root `conftest.py` for useful fixtures + the `backend/tests/integration/common_utils` directory for utilities. Prefer (if one exists), calling the appropriate Manager
|
||||
class in the utils over directly calling the APIs with a library like `requests`. Prefer using fixtures rather than
|
||||
calling the utilities directly (e.g. do NOT create admin users with
|
||||
`admin_user = UserManager.create(name="admin_user")`, instead use the `admin_user` fixture).
|
||||
|
||||
A great example of this type of test is `backend/tests/integration/dev_apis/test_simple_chat_api.py`.
|
||||
|
||||
To run them:
|
||||
|
||||
```bash
|
||||
python -m dotenv -f .vscode/.env run -- pytest backend/tests/integration
|
||||
```
|
||||
|
||||
### Playwright (E2E) Tests
|
||||
These tests are an even more complete version of the Integration Tests mentioned above. Has all services of Onyx
|
||||
running, *including* the Web Server.
|
||||
|
||||
Use these tests for anything that requires significant frontend <-> backend coordination.
|
||||
|
||||
Tests are located at `web/tests/e2e`. Tests are written in TypeScript.
|
||||
|
||||
To run them:
|
||||
|
||||
```bash
|
||||
npx playwright test <TEST_NAME>
|
||||
```
|
||||
|
||||
|
||||
## Logs
|
||||
|
||||
When (1) writing integration tests or (2) doing live tests (e.g. curl / playwright) you can get access
|
||||
to logs via the `backend/log/<service_name>_debug.log` file. All Onyx services (api_server, web_server, celery_X)
|
||||
will be tailing their logs to this file.
|
||||
|
||||
|
||||
## Security Considerations
|
||||
|
||||
- Never commit API keys or secrets to repository
|
||||
- Use encrypted credential storage for connector credentials
|
||||
- Follow RBAC patterns for new features
|
||||
- Implement proper input validation with Pydantic models
|
||||
- Use parameterized queries to prevent SQL injection
|
||||
|
||||
## AI/LLM Integration
|
||||
|
||||
- Multiple LLM providers supported via LiteLLM
|
||||
- Configurable models per feature (chat, search, embeddings)
|
||||
- Streaming support for real-time responses
|
||||
- Token management and rate limiting
|
||||
- Custom prompts and agent actions
|
||||
|
||||
## UI/UX Patterns
|
||||
|
||||
- Tailwind CSS with design system in `web/src/components/ui/`
|
||||
- Radix UI and Headless UI for accessible components
|
||||
- SWR for data fetching and caching
|
||||
- Form validation with react-hook-form
|
||||
- Error handling with popup notifications
|
||||
|
||||
## Creating a Plan
|
||||
When creating a plan in the `plans` directory, make sure to include at least these elements:
|
||||
|
||||
**Issues to Address**
|
||||
What the change is meant to do.
|
||||
|
||||
**Important Notes**
|
||||
Things you come across in your research that are important to the implementation.
|
||||
|
||||
**Implementation strategy**
|
||||
How you are going to make the changes happen. High level approach.
|
||||
|
||||
**Tests**
|
||||
What unit (use rarely), external dependency unit, integration, and playwright tests you plan to write to
|
||||
verify the correct behavior. Don't overtest. Usually, a given change only needs one type of test.
|
||||
|
||||
Do NOT include these: *Timeline*, *Rollback plan*
|
||||
|
||||
This is a minimal list - feel free to include more. Do NOT write code as part of your plan.
|
||||
Keep it high level. You can reference certain files or functions though.
|
||||
|
||||
Before writing your plan, make sure to do research. Explore the relevant sections in the codebase.
|
||||
332
CLAUDE.md.template
Normal file
332
CLAUDE.md.template
Normal file
@@ -0,0 +1,332 @@
|
||||
# CLAUDE.md
|
||||
|
||||
This file provides guidance to Claude Code (claude.ai/code) when working with code in this repository.
|
||||
|
||||
## KEY NOTES
|
||||
|
||||
- If you run into any missing python dependency errors, try running your command with `source backend/.venv/bin/activate` \
|
||||
to assume the python venv.
|
||||
- To make tests work, check the `.env` file at the root of the project to find an OpenAI key.
|
||||
- If using `playwright` to explore the frontend, you can usually log in with username `a@test.com` and password
|
||||
`a`. The app can be accessed at `http://localhost:3000`.
|
||||
- You should assume that all Onyx services are running. To verify, you can check the `backend/log` directory to
|
||||
make sure we see logs coming out from the relevant service.
|
||||
- To connect to the Postgres database, use: `docker exec -it onyx-relational_db-1 psql -U postgres -c "<SQL>"`
|
||||
- When making calls to the backend, always go through the frontend. E.g. make a call to `http://localhost:3000/api/persona` not `http://localhost:8080/api/persona`
|
||||
- Put ALL db operations under the `backend/onyx/db` / `backend/ee/onyx/db` directories. Don't run queries
|
||||
outside of those directories.
|
||||
|
||||
## Project Overview
|
||||
|
||||
**Onyx** (formerly Danswer) is an open-source Gen-AI and Enterprise Search platform that connects to company documents, apps, and people. It features a modular architecture with both Community Edition (MIT licensed) and Enterprise Edition offerings.
|
||||
|
||||
|
||||
### Background Workers (Celery)
|
||||
|
||||
Onyx uses Celery for asynchronous task processing with multiple specialized workers:
|
||||
|
||||
#### Worker Types
|
||||
|
||||
1. **Primary Worker** (`celery_app.py`)
|
||||
- Coordinates core background tasks and system-wide operations
|
||||
- Handles connector management, document sync, pruning, and periodic checks
|
||||
- Runs with 4 threads concurrency
|
||||
- Tasks: connector deletion, vespa sync, pruning, LLM model updates, user file sync
|
||||
|
||||
2. **Docfetching Worker** (`docfetching`)
|
||||
- Fetches documents from external data sources (connectors)
|
||||
- Spawns docprocessing tasks for each document batch
|
||||
- Implements watchdog monitoring for stuck connectors
|
||||
- Configurable concurrency (default from env)
|
||||
|
||||
3. **Docprocessing Worker** (`docprocessing`)
|
||||
- Processes fetched documents through the indexing pipeline:
|
||||
- Upserts documents to PostgreSQL
|
||||
- Chunks documents and adds contextual information
|
||||
- Embeds chunks via model server
|
||||
- Writes chunks to Vespa vector database
|
||||
- Updates document metadata
|
||||
- Configurable concurrency (default from env)
|
||||
|
||||
4. **Light Worker** (`light`)
|
||||
- Handles lightweight, fast operations
|
||||
- Tasks: vespa operations, document permissions sync, external group sync
|
||||
- Higher concurrency for quick tasks
|
||||
|
||||
5. **Heavy Worker** (`heavy`)
|
||||
- Handles resource-intensive operations
|
||||
- Primary task: document pruning operations
|
||||
- Runs with 4 threads concurrency
|
||||
|
||||
6. **KG Processing Worker** (`kg_processing`)
|
||||
- Handles Knowledge Graph processing and clustering
|
||||
- Builds relationships between documents
|
||||
- Runs clustering algorithms
|
||||
- Configurable concurrency
|
||||
|
||||
7. **Monitoring Worker** (`monitoring`)
|
||||
- System health monitoring and metrics collection
|
||||
- Monitors Celery queues, process memory, and system status
|
||||
- Single thread (monitoring doesn't need parallelism)
|
||||
- Cloud-specific monitoring tasks
|
||||
|
||||
8. **User File Processing Worker** (`user_file_processing`)
|
||||
- Processes user-uploaded files
|
||||
- Handles user file indexing and project synchronization
|
||||
- Configurable concurrency
|
||||
|
||||
9. **Beat Worker** (`beat`)
|
||||
- Celery's scheduler for periodic tasks
|
||||
- Uses DynamicTenantScheduler for multi-tenant support
|
||||
- Schedules tasks like:
|
||||
- Indexing checks (every 15 seconds)
|
||||
- Connector deletion checks (every 20 seconds)
|
||||
- Vespa sync checks (every 20 seconds)
|
||||
- Pruning checks (every 20 seconds)
|
||||
- KG processing (every 60 seconds)
|
||||
- Monitoring tasks (every 5 minutes)
|
||||
- Cleanup tasks (hourly)
|
||||
|
||||
#### Worker Deployment Modes
|
||||
|
||||
Onyx supports two deployment modes for background workers, controlled by the `USE_LIGHTWEIGHT_BACKGROUND_WORKER` environment variable:
|
||||
|
||||
**Lightweight Mode** (default, `USE_LIGHTWEIGHT_BACKGROUND_WORKER=true`):
|
||||
- Runs a single consolidated `background` worker that handles all background tasks:
|
||||
- Light worker tasks (Vespa operations, permissions sync, deletion)
|
||||
- Document processing (indexing pipeline)
|
||||
- Document fetching (connector data retrieval)
|
||||
- Pruning operations (from `heavy` worker)
|
||||
- Knowledge graph processing (from `kg_processing` worker)
|
||||
- Monitoring tasks (from `monitoring` worker)
|
||||
- User file processing (from `user_file_processing` worker)
|
||||
- Lower resource footprint (fewer worker processes)
|
||||
- Suitable for smaller deployments or development environments
|
||||
- Default concurrency: 20 threads (increased to handle combined workload)
|
||||
|
||||
**Standard Mode** (`USE_LIGHTWEIGHT_BACKGROUND_WORKER=false`):
|
||||
- Runs separate specialized workers as documented above (light, docprocessing, docfetching, heavy, kg_processing, monitoring, user_file_processing)
|
||||
- Better isolation and scalability
|
||||
- Can scale individual workers independently based on workload
|
||||
- Suitable for production deployments with higher load
|
||||
|
||||
The deployment mode affects:
|
||||
- **Backend**: Worker processes spawned by supervisord or dev scripts
|
||||
- **Helm**: Which Kubernetes deployments are created
|
||||
- **Dev Environment**: Which workers `dev_run_background_jobs.py` spawns
|
||||
|
||||
#### Key Features
|
||||
|
||||
- **Thread-based Workers**: All workers use thread pools (not processes) for stability
|
||||
- **Tenant Awareness**: Multi-tenant support with per-tenant task isolation. There is a
|
||||
middleware layer that automatically finds the appropriate tenant ID when sending tasks
|
||||
via Celery Beat.
|
||||
- **Task Prioritization**: High, Medium, Low priority queues
|
||||
- **Monitoring**: Built-in heartbeat and liveness checking
|
||||
- **Failure Handling**: Automatic retry and failure recovery mechanisms
|
||||
- **Redis Coordination**: Inter-process communication via Redis
|
||||
- **PostgreSQL State**: Task state and metadata stored in PostgreSQL
|
||||
|
||||
|
||||
#### Important Notes
|
||||
|
||||
**Defining Tasks**:
|
||||
- Always use `@shared_task` rather than `@celery_app`
|
||||
- Put tasks under `background/celery/tasks/` or `ee/background/celery/tasks`
|
||||
|
||||
**Defining APIs**:
|
||||
When creating new FastAPI APIs, do NOT use the `response_model` field. Instead, just type the
|
||||
function.
|
||||
|
||||
**Testing Updates**:
|
||||
If you make any updates to a celery worker and you want to test these changes, you will need
|
||||
to ask me to restart the celery worker. There is no auto-restart on code-change mechanism.
|
||||
|
||||
### Code Quality
|
||||
```bash
|
||||
# Install and run pre-commit hooks
|
||||
pre-commit install
|
||||
pre-commit run --all-files
|
||||
```
|
||||
|
||||
NOTE: Always make sure everything is strictly typed (both in Python and Typescript).
|
||||
|
||||
## Architecture Overview
|
||||
|
||||
### Technology Stack
|
||||
- **Backend**: Python 3.11, FastAPI, SQLAlchemy, Alembic, Celery
|
||||
- **Frontend**: Next.js 15+, React 18, TypeScript, Tailwind CSS
|
||||
- **Database**: PostgreSQL with Redis caching
|
||||
- **Search**: Vespa vector database
|
||||
- **Auth**: OAuth2, SAML, multi-provider support
|
||||
- **AI/ML**: LangChain, LiteLLM, multiple embedding models
|
||||
|
||||
### Directory Structure
|
||||
|
||||
```
|
||||
backend/
|
||||
├── onyx/
|
||||
│ ├── auth/ # Authentication & authorization
|
||||
│ ├── chat/ # Chat functionality & LLM interactions
|
||||
│ ├── connectors/ # Data source connectors
|
||||
│ ├── db/ # Database models & operations
|
||||
│ ├── document_index/ # Vespa integration
|
||||
│ ├── federated_connectors/ # External search connectors
|
||||
│ ├── llm/ # LLM provider integrations
|
||||
│ └── server/ # API endpoints & routers
|
||||
├── ee/ # Enterprise Edition features
|
||||
├── alembic/ # Database migrations
|
||||
└── tests/ # Test suites
|
||||
|
||||
web/
|
||||
├── src/app/ # Next.js app router pages
|
||||
├── src/components/ # Reusable React components
|
||||
└── src/lib/ # Utilities & business logic
|
||||
```
|
||||
|
||||
## Database & Migrations
|
||||
|
||||
### Running Migrations
|
||||
```bash
|
||||
# Standard migrations
|
||||
alembic upgrade head
|
||||
|
||||
# Multi-tenant (Enterprise)
|
||||
alembic -n schema_private upgrade head
|
||||
```
|
||||
|
||||
### Creating Migrations
|
||||
```bash
|
||||
# Create migration
|
||||
alembic revision -m "description"
|
||||
|
||||
# Multi-tenant migration
|
||||
alembic -n schema_private revision -m "description"
|
||||
```
|
||||
|
||||
Write the migration manually and place it in the file that alembic creates when running the above command.
|
||||
|
||||
## Testing Strategy
|
||||
|
||||
First, you must activate the virtual environment with `source .venv/bin/activate`.
|
||||
|
||||
There are 4 main types of tests within Onyx:
|
||||
|
||||
### Unit Tests
|
||||
These should not assume any Onyx/external services are available to be called.
|
||||
Interactions with the outside world should be mocked using `unittest.mock`. Generally, only
|
||||
write these for complex, isolated modules e.g. `citation_processing.py`.
|
||||
|
||||
To run them:
|
||||
|
||||
```bash
|
||||
pytest -xv backend/tests/unit
|
||||
```
|
||||
|
||||
### External Dependency Unit Tests
|
||||
These tests assume that all external dependencies of Onyx are available and callable (e.g. Postgres, Redis,
|
||||
MinIO/S3, Vespa are running + OpenAI can be called + any request to the internet is fine + etc.).
|
||||
|
||||
However, the actual Onyx containers are not running and with these tests we call the function to test directly.
|
||||
We can also mock components/calls at will.
|
||||
|
||||
The goal with these tests are to minimize mocking while giving some flexibility to mock things that are flakey,
|
||||
need strictly controlled behavior, or need to have their internal behavior validated (e.g. verify a function is called
|
||||
with certain args, something that would be impossible with proper integration tests).
|
||||
|
||||
A great example of this type of test is `backend/tests/external_dependency_unit/connectors/confluence/test_confluence_group_sync.py`.
|
||||
|
||||
To run them:
|
||||
|
||||
```bash
|
||||
python -m dotenv -f .vscode/.env run -- pytest backend/tests/external_dependency_unit
|
||||
```
|
||||
|
||||
### Integration Tests
|
||||
Standard integration tests. Every test in `backend/tests/integration` runs against a real Onyx deployment. We cannot
|
||||
mock anything in these tests. Prefer writing integration tests (or External Dependency Unit Tests if mocking/internal
|
||||
verification is necessary) over any other type of test.
|
||||
|
||||
Tests are parallelized at a directory level.
|
||||
|
||||
When writing integration tests, make sure to check the root `conftest.py` for useful fixtures + the `backend/tests/integration/common_utils` directory for utilities. Prefer (if one exists), calling the appropriate Manager
|
||||
class in the utils over directly calling the APIs with a library like `requests`. Prefer using fixtures rather than
|
||||
calling the utilities directly (e.g. do NOT create admin users with
|
||||
`admin_user = UserManager.create(name="admin_user")`, instead use the `admin_user` fixture).
|
||||
|
||||
A great example of this type of test is `backend/tests/integration/dev_apis/test_simple_chat_api.py`.
|
||||
|
||||
To run them:
|
||||
|
||||
```bash
|
||||
python -m dotenv -f .vscode/.env run -- pytest backend/tests/integration
|
||||
```
|
||||
|
||||
### Playwright (E2E) Tests
|
||||
These tests are an even more complete version of the Integration Tests mentioned above. Has all services of Onyx
|
||||
running, *including* the Web Server.
|
||||
|
||||
Use these tests for anything that requires significant frontend <-> backend coordination.
|
||||
|
||||
Tests are located at `web/tests/e2e`. Tests are written in TypeScript.
|
||||
|
||||
To run them:
|
||||
|
||||
```bash
|
||||
npx playwright test <TEST_NAME>
|
||||
```
|
||||
|
||||
|
||||
## Logs
|
||||
|
||||
When (1) writing integration tests or (2) doing live tests (e.g. curl / playwright) you can get access
|
||||
to logs via the `backend/log/<service_name>_debug.log` file. All Onyx services (api_server, web_server, celery_X)
|
||||
will be tailing their logs to this file.
|
||||
|
||||
|
||||
## Security Considerations
|
||||
|
||||
- Never commit API keys or secrets to repository
|
||||
- Use encrypted credential storage for connector credentials
|
||||
- Follow RBAC patterns for new features
|
||||
- Implement proper input validation with Pydantic models
|
||||
- Use parameterized queries to prevent SQL injection
|
||||
|
||||
## AI/LLM Integration
|
||||
|
||||
- Multiple LLM providers supported via LiteLLM
|
||||
- Configurable models per feature (chat, search, embeddings)
|
||||
- Streaming support for real-time responses
|
||||
- Token management and rate limiting
|
||||
- Custom prompts and agent actions
|
||||
|
||||
## UI/UX Patterns
|
||||
|
||||
- Tailwind CSS with design system in `web/src/components/ui/`
|
||||
- Radix UI and Headless UI for accessible components
|
||||
- SWR for data fetching and caching
|
||||
- Form validation with react-hook-form
|
||||
- Error handling with popup notifications
|
||||
|
||||
## Creating a Plan
|
||||
When creating a plan in the `plans` directory, make sure to include at least these elements:
|
||||
|
||||
**Issues to Address**
|
||||
What the change is meant to do.
|
||||
|
||||
**Important Notes**
|
||||
Things you come across in your research that are important to the implementation.
|
||||
|
||||
**Implementation strategy**
|
||||
How you are going to make the changes happen. High level approach.
|
||||
|
||||
**Tests**
|
||||
What unit (use rarely), external dependency unit, integration, and playwright tests you plan to write to
|
||||
verify the correct behavior. Don't overtest. Usually, a given change only needs one type of test.
|
||||
|
||||
Do NOT include these: *Timeline*, *Rollback plan*
|
||||
|
||||
This is a minimal list - feel free to include more. Do NOT write code as part of your plan.
|
||||
Keep it high level. You can reference certain files or functions though.
|
||||
|
||||
Before writing your plan, make sure to do research. Explore the relevant sections in the codebase.
|
||||
@@ -1,4 +1,4 @@
|
||||
<!-- DANSWER_METADATA={"link": "https://github.com/onyx-dot-app/onyx/blob/main/CONTRIBUTING.md"} -->
|
||||
<!-- ONYX_METADATA={"link": "https://github.com/onyx-dot-app/onyx/blob/main/CONTRIBUTING.md"} -->
|
||||
|
||||
# Contributing to Onyx
|
||||
|
||||
@@ -12,9 +12,8 @@ As an open source project in a rapidly changing space, we welcome all contributi
|
||||
|
||||
The [GitHub Issues](https://github.com/onyx-dot-app/onyx/issues) page is a great place to start for contribution ideas.
|
||||
|
||||
To ensure that your contribution is aligned with the project's direction, please reach out to Hagen (or any other maintainer) on the Onyx team
|
||||
via [Slack](https://join.slack.com/t/onyx-dot-app/shared_invite/zt-2twesxdr6-5iQitKZQpgq~hYIZ~dv3KA) /
|
||||
[Discord](https://discord.gg/TDJ59cGV2X) or [email](mailto:founders@onyx.app).
|
||||
To ensure that your contribution is aligned with the project's direction, please reach out to any maintainer on the Onyx team
|
||||
via [Discord](https://discord.gg/4NA5SbzrWb) or [email](mailto:hello@onyx.app).
|
||||
|
||||
Issues that have been explicitly approved by the maintainers (aligned with the direction of the project)
|
||||
will be marked with the `approved by maintainers` label.
|
||||
@@ -28,8 +27,7 @@ Your input is vital to making sure that Onyx moves in the right direction.
|
||||
Before starting on implementation, please raise a GitHub issue.
|
||||
|
||||
Also, always feel free to message the founders (Chris Weaver / Yuhong Sun) on
|
||||
[Slack](https://join.slack.com/t/onyx-dot-app/shared_invite/zt-2twesxdr6-5iQitKZQpgq~hYIZ~dv3KA) /
|
||||
[Discord](https://discord.gg/TDJ59cGV2X) directly about anything at all.
|
||||
[Discord](https://discord.gg/4NA5SbzrWb) directly about anything at all.
|
||||
|
||||
### Contributing Code
|
||||
|
||||
@@ -46,9 +44,7 @@ Our goal is to make contributing as easy as possible. If you run into any issues
|
||||
That way we can help future contributors and users can avoid the same issue.
|
||||
|
||||
We also have support channels and generally interesting discussions on our
|
||||
[Slack](https://join.slack.com/t/onyx-dot-app/shared_invite/zt-2twesxdr6-5iQitKZQpgq~hYIZ~dv3KA)
|
||||
and
|
||||
[Discord](https://discord.gg/TDJ59cGV2X).
|
||||
[Discord](https://discord.gg/4NA5SbzrWb).
|
||||
|
||||
We would love to see you there!
|
||||
|
||||
@@ -59,6 +55,7 @@ Onyx being a fully functional app, relies on some external software, specificall
|
||||
- [Postgres](https://www.postgresql.org/) (Relational DB)
|
||||
- [Vespa](https://vespa.ai/) (Vector DB/Search Engine)
|
||||
- [Redis](https://redis.io/) (Cache)
|
||||
- [MinIO](https://min.io/) (File Store)
|
||||
- [Nginx](https://nginx.org/) (Not needed for development flows generally)
|
||||
|
||||
> **Note:**
|
||||
@@ -83,10 +80,6 @@ python -m venv .venv
|
||||
source .venv/bin/activate
|
||||
```
|
||||
|
||||
> **Note:**
|
||||
> This virtual environment MUST NOT be set up WITHIN the onyx directory if you plan on using mypy within certain IDEs.
|
||||
> For simplicity, we recommend setting up the virtual environment outside of the onyx directory.
|
||||
|
||||
_For Windows, activate the virtual environment using Command Prompt:_
|
||||
|
||||
```bash
|
||||
@@ -102,10 +95,21 @@ If using PowerShell, the command slightly differs:
|
||||
Install the required python dependencies:
|
||||
|
||||
```bash
|
||||
pip install -r onyx/backend/requirements/default.txt
|
||||
pip install -r onyx/backend/requirements/dev.txt
|
||||
pip install -r onyx/backend/requirements/ee.txt
|
||||
pip install -r onyx/backend/requirements/model_server.txt
|
||||
pip install -r backend/requirements/combined.txt
|
||||
```
|
||||
|
||||
or
|
||||
|
||||
```bash
|
||||
pip install -r backend/requirements/default.txt
|
||||
pip install -r backend/requirements/dev.txt
|
||||
pip install -r backend/requirements/ee.txt
|
||||
pip install -r backend/requirements/model_server.txt
|
||||
```
|
||||
|
||||
Fix vscode/cursor auto-imports:
|
||||
```bash
|
||||
pip install -e .
|
||||
```
|
||||
|
||||
Install Playwright for Python (headless browser required by the Web Connector)
|
||||
@@ -120,8 +124,15 @@ You may have to deactivate and reactivate your virtualenv for `playwright` to ap
|
||||
|
||||
#### Frontend: Node dependencies
|
||||
|
||||
Install [Node.js and npm](https://docs.npmjs.com/downloading-and-installing-node-js-and-npm) for the frontend.
|
||||
Once the above is done, navigate to `onyx/web` run:
|
||||
Onyx uses Node v22.20.0. We highly recommend you use [Node Version Manager (nvm)](https://github.com/nvm-sh/nvm)
|
||||
to manage your Node installations. Once installed, you can run
|
||||
|
||||
```bash
|
||||
nvm install 22 && nvm use 22
|
||||
node -v # verify your active version
|
||||
```
|
||||
|
||||
Navigate to `onyx/web` and run:
|
||||
|
||||
```bash
|
||||
npm i
|
||||
@@ -132,8 +143,6 @@ npm i
|
||||
### Backend
|
||||
|
||||
For the backend, you'll need to setup pre-commit hooks (black / reorder-python-imports).
|
||||
First, install pre-commit (if you don't have it already) following the instructions
|
||||
[here](https://pre-commit.com/#installation).
|
||||
|
||||
With the virtual environment active, install the pre-commit library with:
|
||||
|
||||
@@ -153,15 +162,17 @@ To run the mypy checks manually, run `python -m mypy .` from the `onyx/backend`
|
||||
|
||||
### Web
|
||||
|
||||
We use `prettier` for formatting. The desired version (2.8.8) will be installed via a `npm i` from the `onyx/web` directory.
|
||||
We use `prettier` for formatting. The desired version will be installed via a `npm i` from the `onyx/web` directory.
|
||||
To run the formatter, use `npx prettier --write .` from the `onyx/web` directory.
|
||||
Please double check that prettier passes before creating a pull request.
|
||||
|
||||
Pre-commit will also run prettier automatically on files you've recently touched. If re-formatted, your commit will fail.
|
||||
Re-stage your changes and commit again.
|
||||
|
||||
# Running the application for development
|
||||
|
||||
## Developing using VSCode Debugger (recommended)
|
||||
|
||||
We highly recommend using VSCode debugger for development.
|
||||
**We highly recommend using VSCode debugger for development.**
|
||||
See [CONTRIBUTING_VSCODE.md](./CONTRIBUTING_VSCODE.md) for more details.
|
||||
|
||||
Otherwise, you can follow the instructions below to run the application for development.
|
||||
@@ -171,10 +182,10 @@ Otherwise, you can follow the instructions below to run the application for deve
|
||||
|
||||
You will need Docker installed to run these containers.
|
||||
|
||||
First navigate to `onyx/deployment/docker_compose`, then start up Postgres/Vespa/Redis with:
|
||||
First navigate to `onyx/deployment/docker_compose`, then start up Postgres/Vespa/Redis/MinIO with:
|
||||
|
||||
```bash
|
||||
docker compose -f docker-compose.dev.yml -p onyx-stack up -d index relational_db cache
|
||||
docker compose up -d index relational_db cache minio
|
||||
```
|
||||
|
||||
(index refers to Vespa, relational_db refers to Postgres, and cache refers to Redis)
|
||||
@@ -256,7 +267,7 @@ You can run the full Onyx application stack from pre-built images including all
|
||||
Navigate to `onyx/deployment/docker_compose` and run:
|
||||
|
||||
```bash
|
||||
docker compose -f docker-compose.dev.yml -p onyx-stack up -d
|
||||
docker compose up -d
|
||||
```
|
||||
|
||||
After Docker pulls and starts these containers, navigate to `http://localhost:3000` to use Onyx.
|
||||
@@ -264,7 +275,7 @@ After Docker pulls and starts these containers, navigate to `http://localhost:30
|
||||
If you want to make changes to Onyx and run those changes in Docker, you can also build a local version of the Onyx container images that incorporates your changes like so:
|
||||
|
||||
```bash
|
||||
docker compose -f docker-compose.dev.yml -p onyx-stack up -d --build
|
||||
docker compose up -d --build
|
||||
```
|
||||
|
||||
|
||||
|
||||
@@ -5,7 +5,7 @@ This guide explains how to set up and use VSCode's debugging capabilities with t
|
||||
## Initial Setup
|
||||
|
||||
1. **Environment Setup**:
|
||||
- Copy `.vscode/.env.template` to `.vscode/.env`
|
||||
- Copy `.vscode/env_template.txt` to `.vscode/.env`
|
||||
- Fill in the necessary environment variables in `.vscode/.env`
|
||||
2. **launch.json**:
|
||||
- Copy `.vscode/launch.template.jsonc` to `.vscode/launch.json`
|
||||
@@ -17,10 +17,12 @@ Before starting, make sure the Docker Daemon is running.
|
||||
1. Open the Debug view in VSCode (Cmd+Shift+D on macOS)
|
||||
2. From the dropdown at the top, select "Clear and Restart External Volumes and Containers" and press the green play button
|
||||
3. From the dropdown at the top, select "Run All Onyx Services" and press the green play button
|
||||
4. CD into web, run "npm i" followed by npm run dev.
|
||||
5. Now, you can navigate to onyx in your browser (default is http://localhost:3000) and start using the app
|
||||
6. You can set breakpoints by clicking to the left of line numbers to help debug while the app is running
|
||||
7. Use the debug toolbar to step through code, inspect variables, etc.
|
||||
4. Now, you can navigate to onyx in your browser (default is http://localhost:3000) and start using the app
|
||||
5. You can set breakpoints by clicking to the left of line numbers to help debug while the app is running
|
||||
6. Use the debug toolbar to step through code, inspect variables, etc.
|
||||
|
||||
Note: Clear and Restart External Volumes and Containers will reset your postgres and Vespa (relational-db and index).
|
||||
Only run this if you are okay with wiping your data.
|
||||
|
||||
## Features
|
||||
|
||||
|
||||
134
README.md
134
README.md
@@ -1,117 +1,103 @@
|
||||
<!-- DANSWER_METADATA={"link": "https://github.com/onyx-dot-app/onyx/blob/main/README.md"} -->
|
||||
|
||||
<a name="readme-top"></a>
|
||||
|
||||
<h2 align="center">
|
||||
<a href="https://www.onyx.app/"> <img width="50%" src="https://github.com/onyx-dot-app/onyx/blob/logo/OnyxLogoCropped.jpg?raw=true)" /></a>
|
||||
<a href="https://www.onyx.app/"> <img width="50%" src="https://github.com/onyx-dot-app/onyx/blob/logo/OnyxLogoCropped.jpg?raw=true)" /></a>
|
||||
</h2>
|
||||
|
||||
<p align="center">
|
||||
<p align="center">Open Source Gen-AI + Enterprise Search.</p>
|
||||
<p align="center">Open Source AI Platform</p>
|
||||
|
||||
<p align="center">
|
||||
<a href="https://docs.onyx.app/" target="_blank">
|
||||
<img src="https://img.shields.io/badge/docs-view-blue" alt="Documentation">
|
||||
</a>
|
||||
<a href="https://join.slack.com/t/onyx-dot-app/shared_invite/zt-2twesxdr6-5iQitKZQpgq~hYIZ~dv3KA" target="_blank">
|
||||
<img src="https://img.shields.io/badge/slack-join-blue.svg?logo=slack" alt="Slack">
|
||||
</a>
|
||||
<a href="https://discord.gg/TDJ59cGV2X" target="_blank">
|
||||
<img src="https://img.shields.io/badge/discord-join-blue.svg?logo=discord&logoColor=white" alt="Discord">
|
||||
</a>
|
||||
<a href="https://github.com/onyx-dot-app/onyx/blob/main/README.md" target="_blank">
|
||||
<img src="https://img.shields.io/static/v1?label=license&message=MIT&color=blue" alt="License">
|
||||
</a>
|
||||
<a href="https://discord.gg/TDJ59cGV2X" target="_blank">
|
||||
<img src="https://img.shields.io/badge/discord-join-blue.svg?logo=discord&logoColor=white" alt="Discord">
|
||||
</a>
|
||||
<a href="https://docs.onyx.app/" target="_blank">
|
||||
<img src="https://img.shields.io/badge/docs-view-blue" alt="Documentation">
|
||||
</a>
|
||||
<a href="https://docs.onyx.app/" target="_blank">
|
||||
<img src="https://img.shields.io/website?url=https://www.onyx.app&up_message=visit&up_color=blue" alt="Documentation">
|
||||
</a>
|
||||
<a href="https://github.com/onyx-dot-app/onyx/blob/main/LICENSE" target="_blank">
|
||||
<img src="https://img.shields.io/static/v1?label=license&message=MIT&color=blue" alt="License">
|
||||
</a>
|
||||
</p>
|
||||
|
||||
<strong>[Onyx](https://www.onyx.app/)</strong> (formerly Danswer) is the AI platform connected to your company's docs, apps, and people.
|
||||
Onyx provides a feature rich Chat interface and plugs into any LLM of your choice.
|
||||
Keep knowledge and access controls sync-ed across over 40 connectors like Google Drive, Slack, Confluence, Salesforce, etc.
|
||||
Create custom AI agents with unique prompts, knowledge, and actions that the agents can take.
|
||||
Onyx can be deployed securely anywhere and for any scale - on a laptop, on-premise, or to cloud.
|
||||
|
||||
|
||||
<h3>Feature Highlights</h3>
|
||||
**[Onyx](https://www.onyx.app/)** is a feature-rich, self-hostable Chat UI that works with any LLM. It is easy to deploy and can run in a completely airgapped environment.
|
||||
|
||||
**Deep research over your team's knowledge:**
|
||||
Onyx comes loaded with advanced features like Agents, Web Search, RAG, MCP, Deep Research, Connectors to 40+ knowledge sources, and more.
|
||||
|
||||
https://private-user-images.githubusercontent.com/32520769/414509312-48392e83-95d0-4fb5-8650-a396e05e0a32.mp4?jwt=eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJpc3MiOiJnaXRodWIuY29tIiwiYXVkIjoicmF3LmdpdGh1YnVzZXJjb250ZW50LmNvbSIsImtleSI6ImtleTUiLCJleHAiOjE3Mzk5Mjg2MzYsIm5iZiI6MTczOTkyODMzNiwicGF0aCI6Ii8zMjUyMDc2OS80MTQ1MDkzMTItNDgzOTJlODMtOTVkMC00ZmI1LTg2NTAtYTM5NmUwNWUwYTMyLm1wND9YLUFtei1BbGdvcml0aG09QVdTNC1ITUFDLVNIQTI1NiZYLUFtei1DcmVkZW50aWFsPUFLSUFWQ09EWUxTQTUzUFFLNFpBJTJGMjAyNTAyMTklMkZ1cy1lYXN0LTElMkZzMyUyRmF3czRfcmVxdWVzdCZYLUFtei1EYXRlPTIwMjUwMjE5VDAxMjUzNlomWC1BbXotRXhwaXJlcz0zMDAmWC1BbXotU2lnbmF0dXJlPWFhMzk5Njg2Y2Y5YjFmNDNiYTQ2YzM5ZTg5YWJiYTU2NWMyY2YwNmUyODE2NWUxMDRiMWQxZWJmODI4YTA0MTUmWC1BbXotU2lnbmVkSGVhZGVycz1ob3N0In0.a9D8A0sgKE9AoaoE-mfFbJ6_OKYeqaf7TZ4Han2JfW8
|
||||
> [!TIP]
|
||||
> Run Onyx with one command (or see deployment section below):
|
||||
> ```
|
||||
> curl -fsSL https://raw.githubusercontent.com/onyx-dot-app/onyx/main/deployment/docker_compose/install.sh > install.sh && chmod +x install.sh && ./install.sh
|
||||
> ```
|
||||
|
||||
|
||||
**Use Onyx as a secure AI Chat with any LLM:**
|
||||
****
|
||||
|
||||

|
||||
|
||||
|
||||
**Easily set up connectors to your apps:**
|
||||
|
||||

|
||||
## ⭐ Features
|
||||
- **🤖 Custom Agents:** Build AI Agents with unique instructions, knowledge and actions.
|
||||
- **🌍 Web Search:** Browse the web with Google PSE, Exa, and Serper as well as an in-house scraper or Firecrawl.
|
||||
- **🔍 RAG:** Best in class hybrid-search + knowledge graph for uploaded files and ingested documents from connectors.
|
||||
- **🔄 Connectors:** Pull knowledge, metadata, and access information from over 40 applications.
|
||||
- **🔬 Deep Research:** Get in depth answers with an agentic multi-step search.
|
||||
- **▶️ Actions & MCP:** Give AI Agents the ability to interact with external systems.
|
||||
- **💻 Code Interpreter:** Execute code to analyze data, render graphs and create files.
|
||||
- **🎨 Image Generation:** Generate images based on user prompts.
|
||||
- **👥 Collaboration:** Chat sharing, feedback gathering, user management, usage analytics, and more.
|
||||
|
||||
Onyx works with all LLMs (like OpenAI, Anthropic, Gemini, etc.) and self-hosted LLMs (like Ollama, vLLM, etc.)
|
||||
|
||||
To learn more about the features, check out our [documentation](https://docs.onyx.app/welcome)!
|
||||
|
||||
|
||||
**Access Onyx where your team already works:**
|
||||
|
||||

|
||||
## 🚀 Deployment
|
||||
Onyx supports deployments in Docker, Kubernetes, Terraform, along with guides for major cloud providers.
|
||||
|
||||
See guides below:
|
||||
- [Docker](https://docs.onyx.app/deployment/local/docker) or [Quickstart](https://docs.onyx.app/deployment/getting_started/quickstart) (best for most users)
|
||||
- [Kubernetes](https://docs.onyx.app/deployment/local/kubernetes) (best for large teams)
|
||||
- [Terraform](https://docs.onyx.app/deployment/local/terraform) (best for teams already using Terraform)
|
||||
- Cloud specific guides (best if specifically using [AWS EKS](https://docs.onyx.app/deployment/cloud/aws/eks), [Azure VMs](https://docs.onyx.app/deployment/cloud/azure), etc.)
|
||||
|
||||
> [!TIP]
|
||||
> **To try Onyx for free without deploying, check out [Onyx Cloud](https://cloud.onyx.app/signup)**.
|
||||
|
||||
|
||||
## Deployment
|
||||
**To try it out for free and get started in seconds, check out [Onyx Cloud](https://cloud.onyx.app/signup)**.
|
||||
|
||||
Onyx can also be run locally (even on a laptop) or deployed on a virtual machine with a single
|
||||
`docker compose` command. Checkout our [docs](https://docs.onyx.app/quickstart) to learn more.
|
||||
## 🔍 Other Notable Benefits
|
||||
Onyx is built for teams of all sizes, from individual users to the largest global enterprises.
|
||||
|
||||
We also have built-in support for high-availability/scalable deployment on Kubernetes.
|
||||
References [here](https://github.com/onyx-dot-app/onyx/tree/main/deployment).
|
||||
- **Enterprise Search**: far more than simple RAG, Onyx has custom indexing and retrieval that remains performant and accurate for scales of up to tens of millions of documents.
|
||||
- **Security**: SSO (OIDC/SAML/OAuth2), RBAC, encryption of credentials, etc.
|
||||
- **Management UI**: different user roles such as basic, curator, and admin.
|
||||
- **Document Permissioning**: mirrors user access from external apps for RAG use cases.
|
||||
|
||||
|
||||
## 🔍 Other Notable Benefits of Onyx
|
||||
- Custom deep learning models for indexing and inference time, only through Onyx + learning from user feedback.
|
||||
- Flexible security features like SSO (OIDC/SAML/OAuth2), RBAC, encryption of credentials, etc.
|
||||
- Knowledge curation features like document-sets, query history, usage analytics, etc.
|
||||
- Scalable deployment options tested up to many tens of thousands users and hundreds of millions of documents.
|
||||
|
||||
|
||||
## 🚧 Roadmap
|
||||
- New methods in information retrieval (StructRAG, LightGraphRAG, etc.)
|
||||
- Personalized Search
|
||||
- Organizational understanding and ability to locate and suggest experts from your team.
|
||||
- Code Search
|
||||
- SQL and Structured Query Language
|
||||
To see ongoing and upcoming projects, check out our [roadmap](https://github.com/orgs/onyx-dot-app/projects/2)!
|
||||
|
||||
|
||||
## 🔌 Connectors
|
||||
Keep knowledge and access up to sync across 40+ connectors:
|
||||
|
||||
- Google Drive
|
||||
- Confluence
|
||||
- Slack
|
||||
- Gmail
|
||||
- Salesforce
|
||||
- Microsoft Sharepoint
|
||||
- Github
|
||||
- Jira
|
||||
- Zendesk
|
||||
- Gong
|
||||
- Microsoft Teams
|
||||
- Dropbox
|
||||
- Local Files
|
||||
- Websites
|
||||
- And more ...
|
||||
|
||||
See the full list [here](https://docs.onyx.app/connectors).
|
||||
|
||||
|
||||
## 📚 Licensing
|
||||
There are two editions of Onyx:
|
||||
|
||||
- Onyx Community Edition (CE) is available freely under the MIT Expat license. Simply follow the Deployment guide above.
|
||||
- Onyx Community Edition (CE) is available freely under the MIT license.
|
||||
- Onyx Enterprise Edition (EE) includes extra features that are primarily useful for larger organizations.
|
||||
For feature details, check out [our website](https://www.onyx.app/pricing).
|
||||
|
||||
To try the Onyx Enterprise Edition:
|
||||
1. Checkout [Onyx Cloud](https://cloud.onyx.app/signup).
|
||||
2. For self-hosting the Enterprise Edition, contact us at [founders@onyx.app](mailto:founders@onyx.app) or book a call with us on our [Cal](https://cal.com/team/onyx/founders).
|
||||
|
||||
|
||||
## 👪 Community
|
||||
Join our open source community on **[Discord](https://discord.gg/TDJ59cGV2X)**!
|
||||
|
||||
|
||||
|
||||
## 💡 Contributing
|
||||
Looking to contribute? Please check out the [Contribution Guide](CONTRIBUTING.md) for more details.
|
||||
|
||||
|
||||
4
backend/.gitignore
vendored
4
backend/.gitignore
vendored
@@ -9,4 +9,6 @@ api_keys.py
|
||||
vespa-app.zip
|
||||
dynamic_config_storage/
|
||||
celerybeat-schedule*
|
||||
onyx/connectors/salesforce/data/
|
||||
onyx/connectors/salesforce/data/
|
||||
.test.env
|
||||
/generated
|
||||
|
||||
@@ -12,10 +12,11 @@ ARG ONYX_VERSION=0.0.0-dev
|
||||
# DO_NOT_TRACK is used to disable telemetry for Unstructured
|
||||
ENV ONYX_VERSION=${ONYX_VERSION} \
|
||||
DANSWER_RUNNING_IN_DOCKER="true" \
|
||||
DO_NOT_TRACK="true"
|
||||
DO_NOT_TRACK="true" \
|
||||
PLAYWRIGHT_BROWSERS_PATH="/app/.cache/ms-playwright"
|
||||
|
||||
COPY --from=ghcr.io/astral-sh/uv:latest /uv /uvx /bin/
|
||||
|
||||
RUN echo "ONYX_VERSION: ${ONYX_VERSION}"
|
||||
# Install system dependencies
|
||||
# cmake needed for psycopg (postgres)
|
||||
# libpq-dev needed for psycopg (postgres)
|
||||
@@ -47,22 +48,19 @@ RUN apt-get update && \
|
||||
# Remove py which is pulled in by retry, py is not needed and is a CVE
|
||||
COPY ./requirements/default.txt /tmp/requirements.txt
|
||||
COPY ./requirements/ee.txt /tmp/ee-requirements.txt
|
||||
RUN pip install --no-cache-dir --upgrade \
|
||||
--retries 5 \
|
||||
--timeout 30 \
|
||||
RUN uv pip install --system --no-cache-dir --upgrade \
|
||||
-r /tmp/requirements.txt \
|
||||
-r /tmp/ee-requirements.txt && \
|
||||
pip uninstall -y py && \
|
||||
playwright install chromium && \
|
||||
playwright install-deps chromium && \
|
||||
ln -s /usr/local/bin/supervisord /usr/bin/supervisord
|
||||
|
||||
# Cleanup for CVEs and size reduction
|
||||
# https://github.com/tornadoweb/tornado/issues/3107
|
||||
# xserver-common and xvfb included by playwright installation but not needed after
|
||||
# perl-base is part of the base Python Debian image but not needed for Onyx functionality
|
||||
# perl-base could only be removed with --allow-remove-essential
|
||||
RUN apt-get update && \
|
||||
ln -s /usr/local/bin/supervisord /usr/bin/supervisord && \
|
||||
# Cleanup for CVEs and size reduction
|
||||
# https://github.com/tornadoweb/tornado/issues/3107
|
||||
# xserver-common and xvfb included by playwright installation but not needed after
|
||||
# perl-base is part of the base Python Debian image but not needed for Onyx functionality
|
||||
# perl-base could only be removed with --allow-remove-essential
|
||||
apt-get update && \
|
||||
apt-get remove -y --allow-remove-essential \
|
||||
perl-base \
|
||||
xserver-common \
|
||||
@@ -72,12 +70,16 @@ RUN apt-get update && \
|
||||
libxmlsec1-dev \
|
||||
pkg-config \
|
||||
gcc && \
|
||||
apt-get install -y libxmlsec1-openssl && \
|
||||
# Install here to avoid some packages being cleaned up above
|
||||
apt-get install -y \
|
||||
libxmlsec1-openssl \
|
||||
# Install postgresql-client for easy manual tests
|
||||
postgresql-client && \
|
||||
apt-get autoremove -y && \
|
||||
rm -rf /var/lib/apt/lists/* && \
|
||||
rm -rf ~/.cache/uv /tmp/*.txt && \
|
||||
rm -f /usr/local/lib/python3.11/site-packages/tornado/test/test.key
|
||||
|
||||
|
||||
# Pre-downloading models for setups with limited egress
|
||||
RUN python -c "from tokenizers import Tokenizer; \
|
||||
Tokenizer.from_pretrained('nomic-ai/nomic-embed-text-v1')"
|
||||
@@ -85,31 +87,40 @@ Tokenizer.from_pretrained('nomic-ai/nomic-embed-text-v1')"
|
||||
# Pre-downloading NLTK for setups with limited egress
|
||||
RUN python -c "import nltk; \
|
||||
nltk.download('stopwords', quiet=True); \
|
||||
nltk.download('punkt', quiet=True);"
|
||||
nltk.download('punkt_tab', quiet=True);"
|
||||
# nltk.download('wordnet', quiet=True); introduce this back if lemmatization is needed
|
||||
|
||||
# Set up application files
|
||||
WORKDIR /app
|
||||
|
||||
# Create non-root user for security best practices
|
||||
RUN groupadd -g 1001 onyx && \
|
||||
useradd -u 1001 -g onyx -m -s /bin/bash onyx && \
|
||||
mkdir -p /var/log/onyx && \
|
||||
chmod 755 /var/log/onyx && \
|
||||
chown onyx:onyx /var/log/onyx
|
||||
|
||||
# Enterprise Version Files
|
||||
COPY ./ee /app/ee
|
||||
COPY --chown=onyx:onyx ./ee /app/ee
|
||||
COPY supervisord.conf /etc/supervisor/conf.d/supervisord.conf
|
||||
|
||||
# Set up application files
|
||||
COPY ./onyx /app/onyx
|
||||
COPY ./shared_configs /app/shared_configs
|
||||
COPY ./alembic /app/alembic
|
||||
COPY ./alembic_tenants /app/alembic_tenants
|
||||
COPY ./alembic.ini /app/alembic.ini
|
||||
COPY --chown=onyx:onyx ./onyx /app/onyx
|
||||
COPY --chown=onyx:onyx ./shared_configs /app/shared_configs
|
||||
COPY --chown=onyx:onyx ./alembic /app/alembic
|
||||
COPY --chown=onyx:onyx ./alembic_tenants /app/alembic_tenants
|
||||
COPY --chown=onyx:onyx ./alembic.ini /app/alembic.ini
|
||||
COPY supervisord.conf /usr/etc/supervisord.conf
|
||||
COPY ./static /app/static
|
||||
COPY --chown=onyx:onyx ./static /app/static
|
||||
|
||||
# Escape hatch scripts
|
||||
COPY ./scripts/debugging /app/scripts/debugging
|
||||
COPY ./scripts/force_delete_connector_by_id.py /app/scripts/force_delete_connector_by_id.py
|
||||
COPY --chown=onyx:onyx ./scripts/debugging /app/scripts/debugging
|
||||
COPY --chown=onyx:onyx ./scripts/force_delete_connector_by_id.py /app/scripts/force_delete_connector_by_id.py
|
||||
COPY --chown=onyx:onyx ./scripts/supervisord_entrypoint.sh /app/scripts/supervisord_entrypoint.sh
|
||||
RUN chmod +x /app/scripts/supervisord_entrypoint.sh
|
||||
|
||||
# Put logo in assets
|
||||
COPY ./assets /app/assets
|
||||
COPY --chown=onyx:onyx ./assets /app/assets
|
||||
|
||||
ENV PYTHONPATH=/app
|
||||
|
||||
|
||||
@@ -9,19 +9,42 @@ visit https://github.com/onyx-dot-app/onyx."
|
||||
# Default ONYX_VERSION, typically overriden during builds by GitHub Actions.
|
||||
ARG ONYX_VERSION=0.0.0-dev
|
||||
ENV ONYX_VERSION=${ONYX_VERSION} \
|
||||
DANSWER_RUNNING_IN_DOCKER="true"
|
||||
DANSWER_RUNNING_IN_DOCKER="true" \
|
||||
HF_HOME=/app/.cache/huggingface
|
||||
|
||||
COPY --from=ghcr.io/astral-sh/uv:latest /uv /uvx /bin/
|
||||
|
||||
RUN echo "ONYX_VERSION: ${ONYX_VERSION}"
|
||||
# Create non-root user for security best practices
|
||||
RUN mkdir -p /app && \
|
||||
groupadd -g 1001 onyx && \
|
||||
useradd -u 1001 -g onyx -m -s /bin/bash onyx && \
|
||||
chown -R onyx:onyx /app && \
|
||||
mkdir -p /var/log/onyx && \
|
||||
chmod 755 /var/log/onyx && \
|
||||
chown onyx:onyx /var/log/onyx
|
||||
|
||||
# --- add toolchain needed for Rust/Python builds (fastuuid) ---
|
||||
ENV RUSTUP_HOME=/usr/local/rustup \
|
||||
CARGO_HOME=/usr/local/cargo \
|
||||
PATH=/usr/local/cargo/bin:$PATH
|
||||
|
||||
RUN set -eux; \
|
||||
apt-get update && apt-get install -y --no-install-recommends \
|
||||
build-essential \
|
||||
pkg-config \
|
||||
curl \
|
||||
ca-certificates \
|
||||
# Install latest stable Rust (supports Cargo.lock v4)
|
||||
&& curl -sSf https://sh.rustup.rs | sh -s -- -y --profile minimal --default-toolchain stable \
|
||||
&& rustc --version && cargo --version \
|
||||
&& apt-get remove -y --allow-remove-essential perl-base \
|
||||
&& apt-get autoremove -y \
|
||||
&& rm -rf /var/lib/apt/lists/*
|
||||
|
||||
COPY ./requirements/model_server.txt /tmp/requirements.txt
|
||||
RUN pip install --no-cache-dir --upgrade \
|
||||
--retries 5 \
|
||||
--timeout 30 \
|
||||
-r /tmp/requirements.txt
|
||||
|
||||
RUN apt-get remove -y --allow-remove-essential perl-base && \
|
||||
apt-get autoremove -y
|
||||
RUN uv pip install --system --no-cache-dir --upgrade \
|
||||
-r /tmp/requirements.txt && \
|
||||
rm -rf ~/.cache/uv /tmp/*.txt
|
||||
|
||||
# Pre-downloading models for setups with limited egress
|
||||
# Download tokenizers, distilbert for the Onyx model
|
||||
@@ -36,16 +59,18 @@ snapshot_download(repo_id='onyx-dot-app/information-content-model'); \
|
||||
snapshot_download('nomic-ai/nomic-embed-text-v1'); \
|
||||
snapshot_download('mixedbread-ai/mxbai-rerank-xsmall-v1'); \
|
||||
from sentence_transformers import SentenceTransformer; \
|
||||
SentenceTransformer(model_name_or_path='nomic-ai/nomic-embed-text-v1', trust_remote_code=True);"
|
||||
|
||||
# In case the user has volumes mounted to /root/.cache/huggingface that they've downloaded while
|
||||
# running Onyx, don't overwrite it with the built in cache folder
|
||||
RUN mv /root/.cache/huggingface /root/.cache/temp_huggingface
|
||||
SentenceTransformer(model_name_or_path='nomic-ai/nomic-embed-text-v1', trust_remote_code=True);" && \
|
||||
# In case the user has volumes mounted to /app/.cache/huggingface that they've downloaded while
|
||||
# running Onyx, move the current contents of the cache folder to a temporary location to ensure
|
||||
# it's preserved in order to combine with the user's cache contents
|
||||
mv /app/.cache/huggingface /app/.cache/temp_huggingface && \
|
||||
chown -R onyx:onyx /app
|
||||
|
||||
WORKDIR /app
|
||||
|
||||
# Utils used by model server
|
||||
COPY ./onyx/utils/logger.py /app/onyx/utils/logger.py
|
||||
COPY ./onyx/utils/middleware.py /app/onyx/utils/middleware.py
|
||||
|
||||
# Place to fetch version information
|
||||
COPY ./onyx/__init__.py /app/onyx/__init__.py
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
<!-- DANSWER_METADATA={"link": "https://github.com/onyx-dot-app/onyx/blob/main/backend/alembic/README.md"} -->
|
||||
<!-- ONYX_METADATA={"link": "https://github.com/onyx-dot-app/onyx/blob/main/backend/alembic/README.md"} -->
|
||||
|
||||
# Alembic DB Migrations
|
||||
|
||||
@@ -20,3 +20,44 @@ To run all un-applied migrations:
|
||||
To undo migrations:
|
||||
`alembic downgrade -X`
|
||||
where X is the number of migrations you want to undo from the current state
|
||||
|
||||
### Multi-tenant migrations
|
||||
|
||||
For multi-tenant deployments, you can use additional options:
|
||||
|
||||
**Upgrade all tenants:**
|
||||
```bash
|
||||
alembic -x upgrade_all_tenants=true upgrade head
|
||||
```
|
||||
|
||||
**Upgrade specific schemas:**
|
||||
```bash
|
||||
# Single schema
|
||||
alembic -x schemas=tenant_12345678-1234-1234-1234-123456789012 upgrade head
|
||||
|
||||
# Multiple schemas (comma-separated)
|
||||
alembic -x schemas=tenant_12345678-1234-1234-1234-123456789012,public,another_tenant upgrade head
|
||||
```
|
||||
|
||||
**Upgrade tenants within an alphabetical range:**
|
||||
```bash
|
||||
# Upgrade tenants 100-200 when sorted alphabetically (positions 100 to 200)
|
||||
alembic -x upgrade_all_tenants=true -x tenant_range_start=100 -x tenant_range_end=200 upgrade head
|
||||
|
||||
# Upgrade tenants starting from position 1000 alphabetically
|
||||
alembic -x upgrade_all_tenants=true -x tenant_range_start=1000 upgrade head
|
||||
|
||||
# Upgrade first 500 tenants alphabetically
|
||||
alembic -x upgrade_all_tenants=true -x tenant_range_end=500 upgrade head
|
||||
```
|
||||
|
||||
**Continue on error (for batch operations):**
|
||||
```bash
|
||||
alembic -x upgrade_all_tenants=true -x continue=true upgrade head
|
||||
```
|
||||
|
||||
The tenant range filtering works by:
|
||||
1. Sorting tenant IDs alphabetically
|
||||
2. Using 1-based position numbers (1st, 2nd, 3rd tenant, etc.)
|
||||
3. Filtering to the specified range of positions
|
||||
4. Non-tenant schemas (like 'public') are always included
|
||||
|
||||
@@ -1,12 +1,12 @@
|
||||
from typing import Any, Literal
|
||||
from onyx.db.engine import get_iam_auth_token
|
||||
from onyx.db.engine.iam_auth import get_iam_auth_token
|
||||
from onyx.configs.app_configs import USE_IAM_AUTH
|
||||
from onyx.configs.app_configs import POSTGRES_HOST
|
||||
from onyx.configs.app_configs import POSTGRES_PORT
|
||||
from onyx.configs.app_configs import POSTGRES_USER
|
||||
from onyx.configs.app_configs import AWS_REGION_NAME
|
||||
from onyx.db.engine import build_connection_string
|
||||
from onyx.db.engine import get_all_tenant_ids
|
||||
from onyx.db.engine.sql_engine import build_connection_string
|
||||
from onyx.db.engine.tenant_utils import get_all_tenant_ids
|
||||
from sqlalchemy import event
|
||||
from sqlalchemy import pool
|
||||
from sqlalchemy import text
|
||||
@@ -21,9 +21,14 @@ from alembic import context
|
||||
from sqlalchemy.ext.asyncio import create_async_engine
|
||||
from sqlalchemy.sql.schema import SchemaItem
|
||||
from onyx.configs.constants import SSL_CERT_FILE
|
||||
from shared_configs.configs import MULTI_TENANT, POSTGRES_DEFAULT_SCHEMA
|
||||
from shared_configs.configs import (
|
||||
MULTI_TENANT,
|
||||
POSTGRES_DEFAULT_SCHEMA,
|
||||
TENANT_ID_PREFIX,
|
||||
)
|
||||
from onyx.db.models import Base
|
||||
from celery.backends.database.session import ResultModelBase # type: ignore
|
||||
from onyx.db.engine.sql_engine import SqlEngine
|
||||
|
||||
# Make sure in alembic.ini [logger_root] level=INFO is set or most logging will be
|
||||
# hidden! (defaults to level=WARN)
|
||||
@@ -68,15 +73,67 @@ def include_object(
|
||||
return True
|
||||
|
||||
|
||||
def get_schema_options() -> tuple[str, bool, bool, bool]:
|
||||
def filter_tenants_by_range(
|
||||
tenant_ids: list[str], start_range: int | None = None, end_range: int | None = None
|
||||
) -> list[str]:
|
||||
"""
|
||||
Filter tenant IDs by alphabetical position range.
|
||||
|
||||
Args:
|
||||
tenant_ids: List of tenant IDs to filter
|
||||
start_range: Starting position in alphabetically sorted list (1-based, inclusive)
|
||||
end_range: Ending position in alphabetically sorted list (1-based, inclusive)
|
||||
|
||||
Returns:
|
||||
Filtered list of tenant IDs in their original order
|
||||
"""
|
||||
if start_range is None and end_range is None:
|
||||
return tenant_ids
|
||||
|
||||
# Separate tenant IDs from non-tenant schemas
|
||||
tenant_schemas = [tid for tid in tenant_ids if tid.startswith(TENANT_ID_PREFIX)]
|
||||
non_tenant_schemas = [
|
||||
tid for tid in tenant_ids if not tid.startswith(TENANT_ID_PREFIX)
|
||||
]
|
||||
|
||||
# Sort tenant schemas alphabetically.
|
||||
# NOTE: can cause missed schemas if a schema is created in between workers
|
||||
# fetching of all tenant IDs. We accept this risk for now. Just re-running
|
||||
# the migration will fix the issue.
|
||||
sorted_tenant_schemas = sorted(tenant_schemas)
|
||||
|
||||
# Apply range filtering (0-based indexing)
|
||||
start_idx = start_range if start_range is not None else 0
|
||||
end_idx = end_range if end_range is not None else len(sorted_tenant_schemas)
|
||||
|
||||
# Ensure indices are within bounds
|
||||
start_idx = max(0, start_idx)
|
||||
end_idx = min(len(sorted_tenant_schemas), end_idx)
|
||||
|
||||
# Get the filtered tenant schemas
|
||||
filtered_tenant_schemas = sorted_tenant_schemas[start_idx:end_idx]
|
||||
|
||||
# Combine with non-tenant schemas and preserve original order
|
||||
filtered_tenants = []
|
||||
for tenant_id in tenant_ids:
|
||||
if tenant_id in filtered_tenant_schemas or tenant_id in non_tenant_schemas:
|
||||
filtered_tenants.append(tenant_id)
|
||||
|
||||
return filtered_tenants
|
||||
|
||||
|
||||
def get_schema_options() -> (
|
||||
tuple[bool, bool, bool, int | None, int | None, list[str] | None]
|
||||
):
|
||||
x_args_raw = context.get_x_argument()
|
||||
x_args = {}
|
||||
for arg in x_args_raw:
|
||||
for pair in arg.split(","):
|
||||
if "=" in pair:
|
||||
key, value = pair.split("=", 1)
|
||||
x_args[key.strip()] = value.strip()
|
||||
schema_name = x_args.get("schema", POSTGRES_DEFAULT_SCHEMA)
|
||||
if "=" in arg:
|
||||
key, value = arg.split("=", 1)
|
||||
x_args[key.strip()] = value.strip()
|
||||
else:
|
||||
raise ValueError(f"Invalid argument: {arg}")
|
||||
|
||||
create_schema = x_args.get("create_schema", "true").lower() == "true"
|
||||
upgrade_all_tenants = x_args.get("upgrade_all_tenants", "false").lower() == "true"
|
||||
|
||||
@@ -84,17 +141,81 @@ def get_schema_options() -> tuple[str, bool, bool, bool]:
|
||||
# only applies to online migrations
|
||||
continue_on_error = x_args.get("continue", "false").lower() == "true"
|
||||
|
||||
if (
|
||||
MULTI_TENANT
|
||||
and schema_name == POSTGRES_DEFAULT_SCHEMA
|
||||
and not upgrade_all_tenants
|
||||
):
|
||||
# Tenant range filtering
|
||||
tenant_range_start = None
|
||||
tenant_range_end = None
|
||||
|
||||
if "tenant_range_start" in x_args:
|
||||
try:
|
||||
tenant_range_start = int(x_args["tenant_range_start"])
|
||||
except ValueError:
|
||||
raise ValueError(
|
||||
f"Invalid tenant_range_start value: {x_args['tenant_range_start']}. Must be an integer."
|
||||
)
|
||||
|
||||
if "tenant_range_end" in x_args:
|
||||
try:
|
||||
tenant_range_end = int(x_args["tenant_range_end"])
|
||||
except ValueError:
|
||||
raise ValueError(
|
||||
f"Invalid tenant_range_end value: {x_args['tenant_range_end']}. Must be an integer."
|
||||
)
|
||||
|
||||
# Validate range
|
||||
if tenant_range_start is not None and tenant_range_end is not None:
|
||||
if tenant_range_start > tenant_range_end:
|
||||
raise ValueError(
|
||||
f"tenant_range_start ({tenant_range_start}) cannot be greater than tenant_range_end ({tenant_range_end})"
|
||||
)
|
||||
|
||||
# Specific schema names filtering (replaces both schema_name and the old tenant_ids approach)
|
||||
schemas = None
|
||||
if "schemas" in x_args:
|
||||
schema_names_str = x_args["schemas"].strip()
|
||||
if schema_names_str:
|
||||
# Split by comma and strip whitespace
|
||||
schemas = [
|
||||
name.strip() for name in schema_names_str.split(",") if name.strip()
|
||||
]
|
||||
if schemas:
|
||||
logger.info(f"Specific schema names specified: {schemas}")
|
||||
|
||||
# Validate that only one method is used at a time
|
||||
range_filtering = tenant_range_start is not None or tenant_range_end is not None
|
||||
specific_filtering = schemas is not None and len(schemas) > 0
|
||||
|
||||
if range_filtering and specific_filtering:
|
||||
raise ValueError(
|
||||
"Cannot run default migrations in public schema when multi-tenancy is enabled. "
|
||||
"Please specify a tenant-specific schema."
|
||||
"Cannot use both tenant range filtering (tenant_range_start/tenant_range_end) "
|
||||
"and specific schema filtering (schemas) at the same time. "
|
||||
"Please use only one filtering method."
|
||||
)
|
||||
|
||||
return schema_name, create_schema, upgrade_all_tenants, continue_on_error
|
||||
if upgrade_all_tenants and specific_filtering:
|
||||
raise ValueError(
|
||||
"Cannot use both upgrade_all_tenants=true and schemas at the same time. "
|
||||
"Use either upgrade_all_tenants=true for all tenants, or schemas for specific schemas."
|
||||
)
|
||||
|
||||
# If any filtering parameters are specified, we're not doing the default single schema migration
|
||||
if range_filtering:
|
||||
upgrade_all_tenants = True
|
||||
|
||||
# Validate multi-tenant requirements
|
||||
if MULTI_TENANT and not upgrade_all_tenants and not specific_filtering:
|
||||
raise ValueError(
|
||||
"In multi-tenant mode, you must specify either upgrade_all_tenants=true "
|
||||
"or provide schemas. Cannot run default migration."
|
||||
)
|
||||
|
||||
return (
|
||||
create_schema,
|
||||
upgrade_all_tenants,
|
||||
continue_on_error,
|
||||
tenant_range_start,
|
||||
tenant_range_end,
|
||||
schemas,
|
||||
)
|
||||
|
||||
|
||||
def do_run_migrations(
|
||||
@@ -141,12 +262,20 @@ def provide_iam_token_for_alembic(
|
||||
|
||||
async def run_async_migrations() -> None:
|
||||
(
|
||||
schema_name,
|
||||
create_schema,
|
||||
upgrade_all_tenants,
|
||||
continue_on_error,
|
||||
tenant_range_start,
|
||||
tenant_range_end,
|
||||
schemas,
|
||||
) = get_schema_options()
|
||||
|
||||
if not schemas and not MULTI_TENANT:
|
||||
schemas = [POSTGRES_DEFAULT_SCHEMA]
|
||||
|
||||
# without init_engine, subsequent engine calls fail hard intentionally
|
||||
SqlEngine.init_engine(pool_size=20, max_overflow=5)
|
||||
|
||||
engine = create_async_engine(
|
||||
build_connection_string(),
|
||||
poolclass=pool.NullPool,
|
||||
@@ -160,12 +289,50 @@ async def run_async_migrations() -> None:
|
||||
) -> None:
|
||||
provide_iam_token_for_alembic(dialect, conn_rec, cargs, cparams)
|
||||
|
||||
if upgrade_all_tenants:
|
||||
if schemas:
|
||||
# Use specific schema names directly without fetching all tenants
|
||||
logger.info(f"Migrating specific schema names: {schemas}")
|
||||
|
||||
i_schema = 0
|
||||
num_schemas = len(schemas)
|
||||
for schema in schemas:
|
||||
i_schema += 1
|
||||
logger.info(
|
||||
f"Migrating schema: index={i_schema} num_schemas={num_schemas} schema={schema}"
|
||||
)
|
||||
try:
|
||||
async with engine.connect() as connection:
|
||||
await connection.run_sync(
|
||||
do_run_migrations,
|
||||
schema_name=schema,
|
||||
create_schema=create_schema,
|
||||
)
|
||||
except Exception as e:
|
||||
logger.error(f"Error migrating schema {schema}: {e}")
|
||||
if not continue_on_error:
|
||||
logger.error("--continue=true is not set, raising exception!")
|
||||
raise
|
||||
|
||||
logger.warning("--continue=true is set, continuing to next schema.")
|
||||
|
||||
elif upgrade_all_tenants:
|
||||
tenant_schemas = get_all_tenant_ids()
|
||||
|
||||
filtered_tenant_schemas = filter_tenants_by_range(
|
||||
tenant_schemas, tenant_range_start, tenant_range_end
|
||||
)
|
||||
|
||||
if tenant_range_start is not None or tenant_range_end is not None:
|
||||
logger.info(
|
||||
f"Filtering tenants by range: start={tenant_range_start}, end={tenant_range_end}"
|
||||
)
|
||||
logger.info(
|
||||
f"Total tenants: {len(tenant_schemas)}, Filtered tenants: {len(filtered_tenant_schemas)}"
|
||||
)
|
||||
|
||||
i_tenant = 0
|
||||
num_tenants = len(tenant_schemas)
|
||||
for schema in tenant_schemas:
|
||||
num_tenants = len(filtered_tenant_schemas)
|
||||
for schema in filtered_tenant_schemas:
|
||||
i_tenant += 1
|
||||
logger.info(
|
||||
f"Migrating schema: index={i_tenant} num_tenants={num_tenants} schema={schema}"
|
||||
@@ -180,36 +347,70 @@ async def run_async_migrations() -> None:
|
||||
except Exception as e:
|
||||
logger.error(f"Error migrating schema {schema}: {e}")
|
||||
if not continue_on_error:
|
||||
logger.error("--continue is not set, raising exception!")
|
||||
logger.error("--continue=true is not set, raising exception!")
|
||||
raise
|
||||
|
||||
logger.warning("--continue is set, continuing to next schema.")
|
||||
logger.warning("--continue=true is set, continuing to next schema.")
|
||||
|
||||
else:
|
||||
try:
|
||||
logger.info(f"Migrating schema: {schema_name}")
|
||||
async with engine.connect() as connection:
|
||||
await connection.run_sync(
|
||||
do_run_migrations,
|
||||
schema_name=schema_name,
|
||||
create_schema=create_schema,
|
||||
)
|
||||
except Exception as e:
|
||||
logger.error(f"Error migrating schema {schema_name}: {e}")
|
||||
raise
|
||||
# This should not happen in the new design since we require either
|
||||
# upgrade_all_tenants=true or schemas in multi-tenant mode
|
||||
# and for non-multi-tenant mode, we should use schemas with the default schema
|
||||
raise ValueError(
|
||||
"No migration target specified. Use either upgrade_all_tenants=true for all tenants "
|
||||
"or schemas for specific schemas."
|
||||
)
|
||||
|
||||
await engine.dispose()
|
||||
|
||||
|
||||
def run_migrations_offline() -> None:
|
||||
"""This doesn't really get used when we migrate in the cloud."""
|
||||
"""
|
||||
NOTE(rkuo): This generates a sql script that can be used to migrate the database ...
|
||||
instead of migrating the db live via an open connection
|
||||
|
||||
Not clear on when this would be used by us or if it even works.
|
||||
|
||||
If it is offline, then why are there calls to the db engine?
|
||||
|
||||
This doesn't really get used when we migrate in the cloud."""
|
||||
|
||||
logger.info("run_migrations_offline starting.")
|
||||
|
||||
schema_name, _, upgrade_all_tenants, continue_on_error = get_schema_options()
|
||||
# without init_engine, subsequent engine calls fail hard intentionally
|
||||
SqlEngine.init_engine(pool_size=20, max_overflow=5)
|
||||
|
||||
(
|
||||
create_schema,
|
||||
upgrade_all_tenants,
|
||||
continue_on_error,
|
||||
tenant_range_start,
|
||||
tenant_range_end,
|
||||
schemas,
|
||||
) = get_schema_options()
|
||||
url = build_connection_string()
|
||||
|
||||
if upgrade_all_tenants:
|
||||
if schemas:
|
||||
# Use specific schema names directly without fetching all tenants
|
||||
logger.info(f"Migrating specific schema names: {schemas}")
|
||||
|
||||
for schema in schemas:
|
||||
logger.info(f"Migrating schema: {schema}")
|
||||
context.configure(
|
||||
url=url,
|
||||
target_metadata=target_metadata, # type: ignore
|
||||
literal_binds=True,
|
||||
include_object=include_object,
|
||||
version_table_schema=schema,
|
||||
include_schemas=True,
|
||||
script_location=config.get_main_option("script_location"),
|
||||
dialect_opts={"paramstyle": "named"},
|
||||
)
|
||||
|
||||
with context.begin_transaction():
|
||||
context.run_migrations()
|
||||
|
||||
elif upgrade_all_tenants:
|
||||
engine = create_async_engine(url)
|
||||
|
||||
if USE_IAM_AUTH:
|
||||
@@ -223,7 +424,19 @@ def run_migrations_offline() -> None:
|
||||
tenant_schemas = get_all_tenant_ids()
|
||||
engine.sync_engine.dispose()
|
||||
|
||||
for schema in tenant_schemas:
|
||||
filtered_tenant_schemas = filter_tenants_by_range(
|
||||
tenant_schemas, tenant_range_start, tenant_range_end
|
||||
)
|
||||
|
||||
if tenant_range_start is not None or tenant_range_end is not None:
|
||||
logger.info(
|
||||
f"Filtering tenants by range: start={tenant_range_start}, end={tenant_range_end}"
|
||||
)
|
||||
logger.info(
|
||||
f"Total tenants: {len(tenant_schemas)}, Filtered tenants: {len(filtered_tenant_schemas)}"
|
||||
)
|
||||
|
||||
for schema in filtered_tenant_schemas:
|
||||
logger.info(f"Migrating schema: {schema}")
|
||||
context.configure(
|
||||
url=url,
|
||||
@@ -239,21 +452,12 @@ def run_migrations_offline() -> None:
|
||||
with context.begin_transaction():
|
||||
context.run_migrations()
|
||||
else:
|
||||
logger.info(f"Migrating schema: {schema_name}")
|
||||
context.configure(
|
||||
url=url,
|
||||
target_metadata=target_metadata, # type: ignore
|
||||
literal_binds=True,
|
||||
include_object=include_object,
|
||||
version_table_schema=schema_name,
|
||||
include_schemas=True,
|
||||
script_location=config.get_main_option("script_location"),
|
||||
dialect_opts={"paramstyle": "named"},
|
||||
# This should not happen in the new design
|
||||
raise ValueError(
|
||||
"No migration target specified. Use either upgrade_all_tenants=true for all tenants "
|
||||
"or schemas for specific schemas."
|
||||
)
|
||||
|
||||
with context.begin_transaction():
|
||||
context.run_migrations()
|
||||
|
||||
|
||||
def run_migrations_online() -> None:
|
||||
logger.info("run_migrations_online starting.")
|
||||
|
||||
@@ -5,6 +5,7 @@ Revises: 6fc7886d665d
|
||||
Create Date: 2025-01-14 12:14:00.814390
|
||||
|
||||
"""
|
||||
|
||||
from alembic import op
|
||||
import sqlalchemy as sa
|
||||
|
||||
|
||||
121
backend/alembic/versions/03bf8be6b53a_rework_kg_config.py
Normal file
121
backend/alembic/versions/03bf8be6b53a_rework_kg_config.py
Normal file
@@ -0,0 +1,121 @@
|
||||
"""rework-kg-config
|
||||
|
||||
Revision ID: 03bf8be6b53a
|
||||
Revises: 65bc6e0f8500
|
||||
Create Date: 2025-06-16 10:52:34.815335
|
||||
|
||||
"""
|
||||
|
||||
import json
|
||||
|
||||
|
||||
from datetime import datetime
|
||||
from datetime import timedelta
|
||||
from sqlalchemy.dialects import postgresql
|
||||
from sqlalchemy import text
|
||||
from alembic import op
|
||||
import sqlalchemy as sa
|
||||
|
||||
|
||||
# revision identifiers, used by Alembic.
|
||||
revision = "03bf8be6b53a"
|
||||
down_revision = "65bc6e0f8500"
|
||||
branch_labels = None
|
||||
depends_on = None
|
||||
|
||||
|
||||
def upgrade() -> None:
|
||||
# get current config
|
||||
current_configs = (
|
||||
op.get_bind()
|
||||
.execute(text("SELECT kg_variable_name, kg_variable_values FROM kg_config"))
|
||||
.all()
|
||||
)
|
||||
current_config_dict = {
|
||||
config.kg_variable_name: (
|
||||
config.kg_variable_values[0]
|
||||
if config.kg_variable_name
|
||||
not in ("KG_VENDOR_DOMAINS", "KG_IGNORE_EMAIL_DOMAINS")
|
||||
else config.kg_variable_values
|
||||
)
|
||||
for config in current_configs
|
||||
if config.kg_variable_values
|
||||
}
|
||||
|
||||
# not using the KGConfigSettings model here in case it changes in the future
|
||||
kg_config_settings = json.dumps(
|
||||
{
|
||||
"KG_EXPOSED": current_config_dict.get("KG_EXPOSED", False),
|
||||
"KG_ENABLED": current_config_dict.get("KG_ENABLED", False),
|
||||
"KG_VENDOR": current_config_dict.get("KG_VENDOR", None),
|
||||
"KG_VENDOR_DOMAINS": current_config_dict.get("KG_VENDOR_DOMAINS", []),
|
||||
"KG_IGNORE_EMAIL_DOMAINS": current_config_dict.get(
|
||||
"KG_IGNORE_EMAIL_DOMAINS", []
|
||||
),
|
||||
"KG_COVERAGE_START": current_config_dict.get(
|
||||
"KG_COVERAGE_START",
|
||||
(datetime.now() - timedelta(days=90)).strftime("%Y-%m-%d"),
|
||||
),
|
||||
"KG_MAX_COVERAGE_DAYS": current_config_dict.get("KG_MAX_COVERAGE_DAYS", 90),
|
||||
"KG_MAX_PARENT_RECURSION_DEPTH": current_config_dict.get(
|
||||
"KG_MAX_PARENT_RECURSION_DEPTH", 2
|
||||
),
|
||||
"KG_BETA_PERSONA_ID": current_config_dict.get("KG_BETA_PERSONA_ID", None),
|
||||
}
|
||||
)
|
||||
op.execute(
|
||||
f"INSERT INTO key_value_store (key, value) VALUES ('kg_config', '{kg_config_settings}')"
|
||||
)
|
||||
|
||||
# drop kg config table
|
||||
op.drop_table("kg_config")
|
||||
|
||||
|
||||
def downgrade() -> None:
|
||||
# get current config
|
||||
current_config_dict = {
|
||||
"KG_EXPOSED": False,
|
||||
"KG_ENABLED": False,
|
||||
"KG_VENDOR": [],
|
||||
"KG_VENDOR_DOMAINS": [],
|
||||
"KG_IGNORE_EMAIL_DOMAINS": [],
|
||||
"KG_COVERAGE_START": (datetime.now() - timedelta(days=90)).strftime("%Y-%m-%d"),
|
||||
"KG_MAX_COVERAGE_DAYS": 90,
|
||||
"KG_MAX_PARENT_RECURSION_DEPTH": 2,
|
||||
}
|
||||
current_configs = (
|
||||
op.get_bind()
|
||||
.execute(text("SELECT value FROM key_value_store WHERE key = 'kg_config'"))
|
||||
.one_or_none()
|
||||
)
|
||||
if current_configs is not None:
|
||||
current_config_dict.update(current_configs[0])
|
||||
insert_values = [
|
||||
{
|
||||
"kg_variable_name": name,
|
||||
"kg_variable_values": (
|
||||
[str(val).lower() if isinstance(val, bool) else str(val)]
|
||||
if not isinstance(val, list)
|
||||
else val
|
||||
),
|
||||
}
|
||||
for name, val in current_config_dict.items()
|
||||
]
|
||||
|
||||
op.create_table(
|
||||
"kg_config",
|
||||
sa.Column("id", sa.Integer(), primary_key=True, nullable=False, index=True),
|
||||
sa.Column("kg_variable_name", sa.String(), nullable=False, index=True),
|
||||
sa.Column("kg_variable_values", postgresql.ARRAY(sa.String()), nullable=False),
|
||||
sa.UniqueConstraint("kg_variable_name", name="uq_kg_config_variable_name"),
|
||||
)
|
||||
op.bulk_insert(
|
||||
sa.table(
|
||||
"kg_config",
|
||||
sa.column("kg_variable_name", sa.String),
|
||||
sa.column("kg_variable_values", postgresql.ARRAY(sa.String)),
|
||||
),
|
||||
insert_values,
|
||||
)
|
||||
|
||||
op.execute("DELETE FROM key_value_store WHERE key = 'kg_config'")
|
||||
@@ -0,0 +1,153 @@
|
||||
"""add permission sync attempt tables
|
||||
|
||||
Revision ID: 03d710ccf29c
|
||||
Revises: 96a5702df6aa
|
||||
Create Date: 2025-09-11 13:30:00.000000
|
||||
|
||||
"""
|
||||
|
||||
from alembic import op
|
||||
import sqlalchemy as sa
|
||||
|
||||
|
||||
# revision identifiers, used by Alembic.
|
||||
revision = "03d710ccf29c" # Generate a new unique ID
|
||||
down_revision = "96a5702df6aa"
|
||||
branch_labels = None
|
||||
depends_on = None
|
||||
|
||||
|
||||
def upgrade() -> None:
|
||||
# Create the permission sync status enum
|
||||
permission_sync_status_enum = sa.Enum(
|
||||
"not_started",
|
||||
"in_progress",
|
||||
"success",
|
||||
"canceled",
|
||||
"failed",
|
||||
"completed_with_errors",
|
||||
name="permissionsyncstatus",
|
||||
native_enum=False,
|
||||
)
|
||||
|
||||
# Create doc_permission_sync_attempt table
|
||||
op.create_table(
|
||||
"doc_permission_sync_attempt",
|
||||
sa.Column("id", sa.Integer(), nullable=False),
|
||||
sa.Column("connector_credential_pair_id", sa.Integer(), nullable=False),
|
||||
sa.Column("status", permission_sync_status_enum, nullable=False),
|
||||
sa.Column("total_docs_synced", sa.Integer(), nullable=True),
|
||||
sa.Column("docs_with_permission_errors", sa.Integer(), nullable=True),
|
||||
sa.Column("error_message", sa.Text(), nullable=True),
|
||||
sa.Column(
|
||||
"time_created",
|
||||
sa.DateTime(timezone=True),
|
||||
server_default=sa.text("now()"),
|
||||
nullable=False,
|
||||
),
|
||||
sa.Column("time_started", sa.DateTime(timezone=True), nullable=True),
|
||||
sa.Column("time_finished", sa.DateTime(timezone=True), nullable=True),
|
||||
sa.ForeignKeyConstraint(
|
||||
["connector_credential_pair_id"],
|
||||
["connector_credential_pair.id"],
|
||||
),
|
||||
sa.PrimaryKeyConstraint("id"),
|
||||
)
|
||||
|
||||
# Create indexes for doc_permission_sync_attempt
|
||||
op.create_index(
|
||||
"ix_doc_permission_sync_attempt_time_created",
|
||||
"doc_permission_sync_attempt",
|
||||
["time_created"],
|
||||
unique=False,
|
||||
)
|
||||
op.create_index(
|
||||
"ix_permission_sync_attempt_latest_for_cc_pair",
|
||||
"doc_permission_sync_attempt",
|
||||
["connector_credential_pair_id", "time_created"],
|
||||
unique=False,
|
||||
)
|
||||
op.create_index(
|
||||
"ix_permission_sync_attempt_status_time",
|
||||
"doc_permission_sync_attempt",
|
||||
["status", sa.text("time_finished DESC")],
|
||||
unique=False,
|
||||
)
|
||||
|
||||
# Create external_group_permission_sync_attempt table
|
||||
# connector_credential_pair_id is nullable - group syncs can be global (e.g., Confluence)
|
||||
op.create_table(
|
||||
"external_group_permission_sync_attempt",
|
||||
sa.Column("id", sa.Integer(), nullable=False),
|
||||
sa.Column("connector_credential_pair_id", sa.Integer(), nullable=True),
|
||||
sa.Column("status", permission_sync_status_enum, nullable=False),
|
||||
sa.Column("total_users_processed", sa.Integer(), nullable=True),
|
||||
sa.Column("total_groups_processed", sa.Integer(), nullable=True),
|
||||
sa.Column("total_group_memberships_synced", sa.Integer(), nullable=True),
|
||||
sa.Column("error_message", sa.Text(), nullable=True),
|
||||
sa.Column(
|
||||
"time_created",
|
||||
sa.DateTime(timezone=True),
|
||||
server_default=sa.text("now()"),
|
||||
nullable=False,
|
||||
),
|
||||
sa.Column("time_started", sa.DateTime(timezone=True), nullable=True),
|
||||
sa.Column("time_finished", sa.DateTime(timezone=True), nullable=True),
|
||||
sa.ForeignKeyConstraint(
|
||||
["connector_credential_pair_id"],
|
||||
["connector_credential_pair.id"],
|
||||
),
|
||||
sa.PrimaryKeyConstraint("id"),
|
||||
)
|
||||
|
||||
# Create indexes for external_group_permission_sync_attempt
|
||||
op.create_index(
|
||||
"ix_external_group_permission_sync_attempt_time_created",
|
||||
"external_group_permission_sync_attempt",
|
||||
["time_created"],
|
||||
unique=False,
|
||||
)
|
||||
op.create_index(
|
||||
"ix_group_sync_attempt_cc_pair_time",
|
||||
"external_group_permission_sync_attempt",
|
||||
["connector_credential_pair_id", "time_created"],
|
||||
unique=False,
|
||||
)
|
||||
op.create_index(
|
||||
"ix_group_sync_attempt_status_time",
|
||||
"external_group_permission_sync_attempt",
|
||||
["status", sa.text("time_finished DESC")],
|
||||
unique=False,
|
||||
)
|
||||
|
||||
|
||||
def downgrade() -> None:
|
||||
# Drop indexes
|
||||
op.drop_index(
|
||||
"ix_group_sync_attempt_status_time",
|
||||
table_name="external_group_permission_sync_attempt",
|
||||
)
|
||||
op.drop_index(
|
||||
"ix_group_sync_attempt_cc_pair_time",
|
||||
table_name="external_group_permission_sync_attempt",
|
||||
)
|
||||
op.drop_index(
|
||||
"ix_external_group_permission_sync_attempt_time_created",
|
||||
table_name="external_group_permission_sync_attempt",
|
||||
)
|
||||
op.drop_index(
|
||||
"ix_permission_sync_attempt_status_time",
|
||||
table_name="doc_permission_sync_attempt",
|
||||
)
|
||||
op.drop_index(
|
||||
"ix_permission_sync_attempt_latest_for_cc_pair",
|
||||
table_name="doc_permission_sync_attempt",
|
||||
)
|
||||
op.drop_index(
|
||||
"ix_doc_permission_sync_attempt_time_created",
|
||||
table_name="doc_permission_sync_attempt",
|
||||
)
|
||||
|
||||
# Drop tables
|
||||
op.drop_table("external_group_permission_sync_attempt")
|
||||
op.drop_table("doc_permission_sync_attempt")
|
||||
@@ -0,0 +1,72 @@
|
||||
"""add federated connector tables
|
||||
|
||||
Revision ID: 0816326d83aa
|
||||
Revises: 12635f6655b7
|
||||
Create Date: 2025-06-29 14:09:45.109518
|
||||
|
||||
"""
|
||||
|
||||
from alembic import op
|
||||
import sqlalchemy as sa
|
||||
from sqlalchemy.dialects import postgresql
|
||||
|
||||
|
||||
# revision identifiers, used by Alembic.
|
||||
revision = "0816326d83aa"
|
||||
down_revision = "12635f6655b7"
|
||||
branch_labels = None
|
||||
depends_on = None
|
||||
|
||||
|
||||
def upgrade() -> None:
|
||||
# Create federated_connector table
|
||||
op.create_table(
|
||||
"federated_connector",
|
||||
sa.Column("id", sa.Integer(), nullable=False),
|
||||
sa.Column("source", sa.String(), nullable=False),
|
||||
sa.Column("credentials", sa.LargeBinary(), nullable=False),
|
||||
sa.PrimaryKeyConstraint("id"),
|
||||
)
|
||||
|
||||
# Create federated_connector_oauth_token table
|
||||
op.create_table(
|
||||
"federated_connector_oauth_token",
|
||||
sa.Column("id", sa.Integer(), nullable=False),
|
||||
sa.Column("federated_connector_id", sa.Integer(), nullable=False),
|
||||
sa.Column("user_id", postgresql.UUID(as_uuid=True), nullable=False),
|
||||
sa.Column("token", sa.LargeBinary(), nullable=False),
|
||||
sa.Column("expires_at", sa.DateTime(), nullable=True),
|
||||
sa.ForeignKeyConstraint(
|
||||
["federated_connector_id"], ["federated_connector.id"], ondelete="CASCADE"
|
||||
),
|
||||
sa.ForeignKeyConstraint(["user_id"], ["user.id"], ondelete="CASCADE"),
|
||||
sa.PrimaryKeyConstraint("id"),
|
||||
)
|
||||
|
||||
# Create federated_connector__document_set table
|
||||
op.create_table(
|
||||
"federated_connector__document_set",
|
||||
sa.Column("id", sa.Integer(), nullable=False),
|
||||
sa.Column("federated_connector_id", sa.Integer(), nullable=False),
|
||||
sa.Column("document_set_id", sa.Integer(), nullable=False),
|
||||
sa.Column("entities", postgresql.JSONB(), nullable=False),
|
||||
sa.ForeignKeyConstraint(
|
||||
["federated_connector_id"], ["federated_connector.id"], ondelete="CASCADE"
|
||||
),
|
||||
sa.ForeignKeyConstraint(
|
||||
["document_set_id"], ["document_set.id"], ondelete="CASCADE"
|
||||
),
|
||||
sa.PrimaryKeyConstraint("id"),
|
||||
sa.UniqueConstraint(
|
||||
"federated_connector_id",
|
||||
"document_set_id",
|
||||
name="uq_federated_connector_document_set",
|
||||
),
|
||||
)
|
||||
|
||||
|
||||
def downgrade() -> None:
|
||||
# Drop tables in reverse order due to foreign key dependencies
|
||||
op.drop_table("federated_connector__document_set")
|
||||
op.drop_table("federated_connector_oauth_token")
|
||||
op.drop_table("federated_connector")
|
||||
@@ -5,6 +5,7 @@ Revises: 8a87bd6ec550
|
||||
Create Date: 2024-07-23 11:12:39.462397
|
||||
|
||||
"""
|
||||
|
||||
from alembic import op
|
||||
import sqlalchemy as sa
|
||||
|
||||
|
||||
@@ -0,0 +1,33 @@
|
||||
"""add theme_preference to user
|
||||
|
||||
Revision ID: 09995b8811eb
|
||||
Revises: 3d1cca026fe8
|
||||
Create Date: 2025-10-24 08:58:50.246949
|
||||
|
||||
"""
|
||||
|
||||
from alembic import op
|
||||
import sqlalchemy as sa
|
||||
from onyx.db.enums import ThemePreference
|
||||
|
||||
|
||||
# revision identifiers, used by Alembic.
|
||||
revision = "09995b8811eb"
|
||||
down_revision = "3d1cca026fe8"
|
||||
branch_labels = None
|
||||
depends_on = None
|
||||
|
||||
|
||||
def upgrade() -> None:
|
||||
op.add_column(
|
||||
"user",
|
||||
sa.Column(
|
||||
"theme_preference",
|
||||
sa.Enum(ThemePreference, native_enum=False),
|
||||
nullable=True,
|
||||
),
|
||||
)
|
||||
|
||||
|
||||
def downgrade() -> None:
|
||||
op.drop_column("user", "theme_preference")
|
||||
@@ -5,6 +5,7 @@ Revises: 5f4b8568a221
|
||||
Create Date: 2024-03-02 23:23:49.960309
|
||||
|
||||
"""
|
||||
|
||||
from alembic import op
|
||||
import sqlalchemy as sa
|
||||
from sqlalchemy.dialects import postgresql
|
||||
|
||||
@@ -5,6 +5,7 @@ Revises: 570282d33c49
|
||||
Create Date: 2024-05-05 19:30:34.317972
|
||||
|
||||
"""
|
||||
|
||||
from alembic import op
|
||||
import sqlalchemy as sa
|
||||
from sqlalchemy.sql import table
|
||||
|
||||
@@ -0,0 +1,389 @@
|
||||
"""Migration 2: User file data preparation and backfill
|
||||
|
||||
Revision ID: 0cd424f32b1d
|
||||
Revises: 9b66d3156fc6
|
||||
Create Date: 2025-09-22 09:44:42.727034
|
||||
|
||||
This migration populates the new columns added in migration 1.
|
||||
It prepares data for the UUID transition and relationship migration.
|
||||
"""
|
||||
|
||||
from alembic import op
|
||||
import sqlalchemy as sa
|
||||
from sqlalchemy import text
|
||||
import logging
|
||||
|
||||
logger = logging.getLogger("alembic.runtime.migration")
|
||||
|
||||
# revision identifiers, used by Alembic.
|
||||
revision = "0cd424f32b1d"
|
||||
down_revision = "9b66d3156fc6"
|
||||
branch_labels = None
|
||||
depends_on = None
|
||||
|
||||
|
||||
def upgrade() -> None:
|
||||
"""Populate new columns with data."""
|
||||
|
||||
bind = op.get_bind()
|
||||
inspector = sa.inspect(bind)
|
||||
|
||||
# === Step 1: Populate user_file.new_id ===
|
||||
user_file_columns = [col["name"] for col in inspector.get_columns("user_file")]
|
||||
has_new_id = "new_id" in user_file_columns
|
||||
|
||||
if has_new_id:
|
||||
logger.info("Populating user_file.new_id with UUIDs...")
|
||||
|
||||
# Count rows needing UUIDs
|
||||
null_count = bind.execute(
|
||||
text("SELECT COUNT(*) FROM user_file WHERE new_id IS NULL")
|
||||
).scalar_one()
|
||||
|
||||
if null_count > 0:
|
||||
logger.info(f"Generating UUIDs for {null_count} user_file records...")
|
||||
|
||||
# Populate in batches to avoid long locks
|
||||
batch_size = 10000
|
||||
total_updated = 0
|
||||
|
||||
while True:
|
||||
result = bind.execute(
|
||||
text(
|
||||
"""
|
||||
UPDATE user_file
|
||||
SET new_id = gen_random_uuid()
|
||||
WHERE new_id IS NULL
|
||||
AND id IN (
|
||||
SELECT id FROM user_file
|
||||
WHERE new_id IS NULL
|
||||
LIMIT :batch_size
|
||||
)
|
||||
"""
|
||||
),
|
||||
{"batch_size": batch_size},
|
||||
)
|
||||
|
||||
updated = result.rowcount
|
||||
total_updated += updated
|
||||
|
||||
if updated < batch_size:
|
||||
break
|
||||
|
||||
logger.info(f" Updated {total_updated}/{null_count} records...")
|
||||
|
||||
logger.info(f"Generated UUIDs for {total_updated} user_file records")
|
||||
|
||||
# Verify all records have UUIDs
|
||||
remaining_null = bind.execute(
|
||||
text("SELECT COUNT(*) FROM user_file WHERE new_id IS NULL")
|
||||
).scalar_one()
|
||||
|
||||
if remaining_null > 0:
|
||||
raise Exception(
|
||||
f"Failed to populate all user_file.new_id values ({remaining_null} NULL)"
|
||||
)
|
||||
|
||||
# Lock down the column
|
||||
op.alter_column("user_file", "new_id", nullable=False)
|
||||
op.alter_column("user_file", "new_id", server_default=None)
|
||||
logger.info("Locked down user_file.new_id column")
|
||||
|
||||
# === Step 2: Populate persona__user_file.user_file_id_uuid ===
|
||||
persona_user_file_columns = [
|
||||
col["name"] for col in inspector.get_columns("persona__user_file")
|
||||
]
|
||||
|
||||
if has_new_id and "user_file_id_uuid" in persona_user_file_columns:
|
||||
logger.info("Populating persona__user_file.user_file_id_uuid...")
|
||||
|
||||
# Count rows needing update
|
||||
null_count = bind.execute(
|
||||
text(
|
||||
"""
|
||||
SELECT COUNT(*) FROM persona__user_file
|
||||
WHERE user_file_id IS NOT NULL AND user_file_id_uuid IS NULL
|
||||
"""
|
||||
)
|
||||
).scalar_one()
|
||||
|
||||
if null_count > 0:
|
||||
logger.info(f"Updating {null_count} persona__user_file records...")
|
||||
|
||||
# Update in batches
|
||||
batch_size = 10000
|
||||
total_updated = 0
|
||||
|
||||
while True:
|
||||
result = bind.execute(
|
||||
text(
|
||||
"""
|
||||
UPDATE persona__user_file p
|
||||
SET user_file_id_uuid = uf.new_id
|
||||
FROM user_file uf
|
||||
WHERE p.user_file_id = uf.id
|
||||
AND p.user_file_id_uuid IS NULL
|
||||
AND p.persona_id IN (
|
||||
SELECT persona_id
|
||||
FROM persona__user_file
|
||||
WHERE user_file_id_uuid IS NULL
|
||||
LIMIT :batch_size
|
||||
)
|
||||
"""
|
||||
),
|
||||
{"batch_size": batch_size},
|
||||
)
|
||||
|
||||
updated = result.rowcount
|
||||
total_updated += updated
|
||||
|
||||
if updated < batch_size:
|
||||
break
|
||||
|
||||
logger.info(f" Updated {total_updated}/{null_count} records...")
|
||||
|
||||
logger.info(f"Updated {total_updated} persona__user_file records")
|
||||
|
||||
# Verify all records are populated
|
||||
remaining_null = bind.execute(
|
||||
text(
|
||||
"""
|
||||
SELECT COUNT(*) FROM persona__user_file
|
||||
WHERE user_file_id IS NOT NULL AND user_file_id_uuid IS NULL
|
||||
"""
|
||||
)
|
||||
).scalar_one()
|
||||
|
||||
if remaining_null > 0:
|
||||
raise Exception(
|
||||
f"Failed to populate all persona__user_file.user_file_id_uuid values ({remaining_null} NULL)"
|
||||
)
|
||||
|
||||
op.alter_column("persona__user_file", "user_file_id_uuid", nullable=False)
|
||||
logger.info("Locked down persona__user_file.user_file_id_uuid column")
|
||||
|
||||
# === Step 3: Create user_project records from chat_folder ===
|
||||
if "chat_folder" in inspector.get_table_names():
|
||||
logger.info("Creating user_project records from chat_folder...")
|
||||
|
||||
result = bind.execute(
|
||||
text(
|
||||
"""
|
||||
INSERT INTO user_project (user_id, name)
|
||||
SELECT cf.user_id, cf.name
|
||||
FROM chat_folder cf
|
||||
WHERE NOT EXISTS (
|
||||
SELECT 1
|
||||
FROM user_project up
|
||||
WHERE up.user_id = cf.user_id AND up.name = cf.name
|
||||
)
|
||||
"""
|
||||
)
|
||||
)
|
||||
|
||||
logger.info(f"Created {result.rowcount} user_project records from chat_folder")
|
||||
|
||||
# === Step 4: Populate chat_session.project_id ===
|
||||
chat_session_columns = [
|
||||
col["name"] for col in inspector.get_columns("chat_session")
|
||||
]
|
||||
|
||||
if "folder_id" in chat_session_columns and "project_id" in chat_session_columns:
|
||||
logger.info("Populating chat_session.project_id...")
|
||||
|
||||
# Count sessions needing update
|
||||
null_count = bind.execute(
|
||||
text(
|
||||
"""
|
||||
SELECT COUNT(*) FROM chat_session
|
||||
WHERE project_id IS NULL AND folder_id IS NOT NULL
|
||||
"""
|
||||
)
|
||||
).scalar_one()
|
||||
|
||||
if null_count > 0:
|
||||
logger.info(f"Updating {null_count} chat_session records...")
|
||||
|
||||
result = bind.execute(
|
||||
text(
|
||||
"""
|
||||
UPDATE chat_session cs
|
||||
SET project_id = up.id
|
||||
FROM chat_folder cf
|
||||
JOIN user_project up ON up.user_id = cf.user_id AND up.name = cf.name
|
||||
WHERE cs.folder_id = cf.id AND cs.project_id IS NULL
|
||||
"""
|
||||
)
|
||||
)
|
||||
|
||||
logger.info(f"Updated {result.rowcount} chat_session records")
|
||||
|
||||
# Verify all records are populated
|
||||
remaining_null = bind.execute(
|
||||
text(
|
||||
"""
|
||||
SELECT COUNT(*) FROM chat_session
|
||||
WHERE project_id IS NULL AND folder_id IS NOT NULL
|
||||
"""
|
||||
)
|
||||
).scalar_one()
|
||||
|
||||
if remaining_null > 0:
|
||||
logger.warning(
|
||||
f"Warning: {remaining_null} chat_session records could not be mapped to projects"
|
||||
)
|
||||
|
||||
# === Step 5: Update plaintext FileRecord IDs/display names to UUID scheme ===
|
||||
# Prior to UUID migration, plaintext cache files were stored with file_id like 'plain_text_<int_id>'.
|
||||
# After migration, we use 'plaintext_<uuid>' (note the name change to 'plaintext_').
|
||||
# This step remaps existing FileRecord rows to the new naming while preserving object_key/bucket.
|
||||
logger.info("Updating plaintext FileRecord ids and display names to UUID scheme...")
|
||||
|
||||
# Count legacy plaintext records that can be mapped to UUID user_file ids
|
||||
count_query = text(
|
||||
"""
|
||||
SELECT COUNT(*)
|
||||
FROM file_record fr
|
||||
JOIN user_file uf ON fr.file_id = CONCAT('plaintext_', uf.id::text)
|
||||
WHERE LOWER(fr.file_origin::text) = 'plaintext_cache'
|
||||
"""
|
||||
)
|
||||
legacy_count = bind.execute(count_query).scalar_one()
|
||||
|
||||
if legacy_count and legacy_count > 0:
|
||||
logger.info(f"Found {legacy_count} legacy plaintext file records to update")
|
||||
|
||||
# Update display_name first for readability (safe regardless of rename)
|
||||
bind.execute(
|
||||
text(
|
||||
"""
|
||||
UPDATE file_record fr
|
||||
SET display_name = CONCAT('Plaintext for user file ', uf.new_id::text)
|
||||
FROM user_file uf
|
||||
WHERE LOWER(fr.file_origin::text) = 'plaintext_cache'
|
||||
AND fr.file_id = CONCAT('plaintext_', uf.id::text)
|
||||
"""
|
||||
)
|
||||
)
|
||||
|
||||
# Remap file_id from 'plaintext_<int>' -> 'plaintext_<uuid>' using transitional new_id
|
||||
# Use a single UPDATE ... WHERE file_id LIKE 'plain_text_%'
|
||||
# and ensure it aligns to existing user_file ids to avoid renaming unrelated rows
|
||||
result = bind.execute(
|
||||
text(
|
||||
"""
|
||||
UPDATE file_record fr
|
||||
SET file_id = CONCAT('plaintext_', uf.new_id::text)
|
||||
FROM user_file uf
|
||||
WHERE LOWER(fr.file_origin::text) = 'plaintext_cache'
|
||||
AND fr.file_id = CONCAT('plaintext_', uf.id::text)
|
||||
"""
|
||||
)
|
||||
)
|
||||
logger.info(
|
||||
f"Updated {result.rowcount} plaintext file_record ids to UUID scheme"
|
||||
)
|
||||
|
||||
# === Step 6: Ensure document_id_migrated default TRUE and backfill existing FALSE ===
|
||||
# New records should default to migrated=True so the migration task won't run for them.
|
||||
# Existing rows that had a legacy document_id should be marked as not migrated to be processed.
|
||||
|
||||
# Backfill existing records: if document_id is not null, set to FALSE
|
||||
bind.execute(
|
||||
text(
|
||||
"""
|
||||
UPDATE user_file
|
||||
SET document_id_migrated = FALSE
|
||||
WHERE document_id IS NOT NULL
|
||||
"""
|
||||
)
|
||||
)
|
||||
|
||||
# === Step 7: Backfill user_file.status from index_attempt ===
|
||||
logger.info("Backfilling user_file.status from index_attempt...")
|
||||
|
||||
# Update user_file status based on latest index attempt
|
||||
# Using CTEs instead of temp tables for asyncpg compatibility
|
||||
result = bind.execute(
|
||||
text(
|
||||
"""
|
||||
WITH latest_attempt AS (
|
||||
SELECT DISTINCT ON (ia.connector_credential_pair_id)
|
||||
ia.connector_credential_pair_id,
|
||||
ia.status
|
||||
FROM index_attempt ia
|
||||
ORDER BY ia.connector_credential_pair_id, ia.time_updated DESC
|
||||
),
|
||||
uf_to_ccp AS (
|
||||
SELECT DISTINCT uf.id AS uf_id, ccp.id AS cc_pair_id
|
||||
FROM user_file uf
|
||||
JOIN document_by_connector_credential_pair dcc
|
||||
ON dcc.id = REPLACE(uf.document_id, 'USER_FILE_CONNECTOR__', 'FILE_CONNECTOR__')
|
||||
JOIN connector_credential_pair ccp
|
||||
ON ccp.connector_id = dcc.connector_id
|
||||
AND ccp.credential_id = dcc.credential_id
|
||||
)
|
||||
UPDATE user_file uf
|
||||
SET status = CASE
|
||||
WHEN la.status IN ('NOT_STARTED', 'IN_PROGRESS') THEN 'PROCESSING'
|
||||
WHEN la.status = 'SUCCESS' THEN 'COMPLETED'
|
||||
ELSE 'FAILED'
|
||||
END
|
||||
FROM uf_to_ccp ufc
|
||||
LEFT JOIN latest_attempt la
|
||||
ON la.connector_credential_pair_id = ufc.cc_pair_id
|
||||
WHERE uf.id = ufc.uf_id
|
||||
AND uf.status = 'PROCESSING'
|
||||
"""
|
||||
)
|
||||
)
|
||||
|
||||
logger.info(f"Updated status for {result.rowcount} user_file records")
|
||||
|
||||
logger.info("Migration 2 (data preparation) completed successfully")
|
||||
|
||||
|
||||
def downgrade() -> None:
|
||||
"""Reset populated data to allow clean downgrade of schema."""
|
||||
|
||||
bind = op.get_bind()
|
||||
inspector = sa.inspect(bind)
|
||||
|
||||
logger.info("Starting downgrade of data preparation...")
|
||||
|
||||
# Reset user_file columns to allow nulls before data removal
|
||||
if "user_file" in inspector.get_table_names():
|
||||
columns = [col["name"] for col in inspector.get_columns("user_file")]
|
||||
|
||||
if "new_id" in columns:
|
||||
op.alter_column(
|
||||
"user_file",
|
||||
"new_id",
|
||||
nullable=True,
|
||||
server_default=sa.text("gen_random_uuid()"),
|
||||
)
|
||||
# Optionally clear the data
|
||||
# bind.execute(text("UPDATE user_file SET new_id = NULL"))
|
||||
logger.info("Reset user_file.new_id to nullable")
|
||||
|
||||
# Reset persona__user_file.user_file_id_uuid
|
||||
if "persona__user_file" in inspector.get_table_names():
|
||||
columns = [col["name"] for col in inspector.get_columns("persona__user_file")]
|
||||
|
||||
if "user_file_id_uuid" in columns:
|
||||
op.alter_column("persona__user_file", "user_file_id_uuid", nullable=True)
|
||||
# Optionally clear the data
|
||||
# bind.execute(text("UPDATE persona__user_file SET user_file_id_uuid = NULL"))
|
||||
logger.info("Reset persona__user_file.user_file_id_uuid to nullable")
|
||||
|
||||
# Note: We don't delete user_project records or reset chat_session.project_id
|
||||
# as these might be in use and can be handled by the schema downgrade
|
||||
|
||||
# Reset user_file.status to default
|
||||
if "user_file" in inspector.get_table_names():
|
||||
columns = [col["name"] for col in inspector.get_columns("user_file")]
|
||||
if "status" in columns:
|
||||
bind.execute(text("UPDATE user_file SET status = 'PROCESSING'"))
|
||||
logger.info("Reset user_file.status to default")
|
||||
|
||||
logger.info("Downgrade completed successfully")
|
||||
@@ -5,6 +5,7 @@ Revises: 52a219fb5233
|
||||
Create Date: 2024-09-10 15:03:48.233926
|
||||
|
||||
"""
|
||||
|
||||
from alembic import op
|
||||
import sqlalchemy as sa
|
||||
|
||||
|
||||
@@ -5,6 +5,7 @@ Revises: 369644546676
|
||||
Create Date: 2025-01-10 14:01:14.067144
|
||||
|
||||
"""
|
||||
|
||||
from alembic import op
|
||||
|
||||
# revision identifiers, used by Alembic.
|
||||
|
||||
596
backend/alembic/versions/12635f6655b7_drive_canonical_ids.py
Normal file
596
backend/alembic/versions/12635f6655b7_drive_canonical_ids.py
Normal file
@@ -0,0 +1,596 @@
|
||||
"""drive-canonical-ids
|
||||
|
||||
Revision ID: 12635f6655b7
|
||||
Revises: 58c50ef19f08
|
||||
Create Date: 2025-06-20 14:44:54.241159
|
||||
|
||||
"""
|
||||
|
||||
from alembic import op
|
||||
import sqlalchemy as sa
|
||||
from urllib.parse import urlparse, urlunparse
|
||||
from httpx import HTTPStatusError
|
||||
import httpx
|
||||
from onyx.document_index.factory import get_default_document_index
|
||||
from onyx.db.search_settings import SearchSettings
|
||||
from onyx.document_index.vespa.shared_utils.utils import get_vespa_http_client
|
||||
from onyx.document_index.vespa.shared_utils.utils import (
|
||||
replace_invalid_doc_id_characters,
|
||||
)
|
||||
from onyx.document_index.vespa_constants import DOCUMENT_ID_ENDPOINT
|
||||
from onyx.utils.logger import setup_logger
|
||||
import os
|
||||
|
||||
logger = setup_logger()
|
||||
|
||||
# revision identifiers, used by Alembic.
|
||||
revision = "12635f6655b7"
|
||||
down_revision = "58c50ef19f08"
|
||||
branch_labels = None
|
||||
depends_on = None
|
||||
|
||||
SKIP_CANON_DRIVE_IDS = os.environ.get("SKIP_CANON_DRIVE_IDS", "true").lower() == "true"
|
||||
|
||||
|
||||
def active_search_settings() -> tuple[SearchSettings, SearchSettings | None]:
|
||||
result = op.get_bind().execute(
|
||||
sa.text(
|
||||
"""
|
||||
SELECT * FROM search_settings WHERE status = 'PRESENT' ORDER BY id DESC LIMIT 1
|
||||
"""
|
||||
)
|
||||
)
|
||||
search_settings_fetch = result.fetchall()
|
||||
search_settings = (
|
||||
SearchSettings(**search_settings_fetch[0]._asdict())
|
||||
if search_settings_fetch
|
||||
else None
|
||||
)
|
||||
|
||||
result2 = op.get_bind().execute(
|
||||
sa.text(
|
||||
"""
|
||||
SELECT * FROM search_settings WHERE status = 'FUTURE' ORDER BY id DESC LIMIT 1
|
||||
"""
|
||||
)
|
||||
)
|
||||
search_settings_future_fetch = result2.fetchall()
|
||||
search_settings_future = (
|
||||
SearchSettings(**search_settings_future_fetch[0]._asdict())
|
||||
if search_settings_future_fetch
|
||||
else None
|
||||
)
|
||||
|
||||
if not isinstance(search_settings, SearchSettings):
|
||||
raise RuntimeError(
|
||||
"current search settings is of type " + str(type(search_settings))
|
||||
)
|
||||
if (
|
||||
not isinstance(search_settings_future, SearchSettings)
|
||||
and search_settings_future is not None
|
||||
):
|
||||
raise RuntimeError(
|
||||
"future search settings is of type " + str(type(search_settings_future))
|
||||
)
|
||||
|
||||
return search_settings, search_settings_future
|
||||
|
||||
|
||||
def normalize_google_drive_url(url: str) -> str:
|
||||
"""Remove query parameters from Google Drive URLs to create canonical document IDs.
|
||||
NOTE: copied from drive doc_conversion.py
|
||||
"""
|
||||
parsed_url = urlparse(url)
|
||||
parsed_url = parsed_url._replace(query="")
|
||||
spl_path = parsed_url.path.split("/")
|
||||
if spl_path and (spl_path[-1] in ["edit", "view", "preview"]):
|
||||
spl_path.pop()
|
||||
parsed_url = parsed_url._replace(path="/".join(spl_path))
|
||||
# Remove query parameters and reconstruct URL
|
||||
return urlunparse(parsed_url)
|
||||
|
||||
|
||||
def get_google_drive_documents_from_database() -> list[dict]:
|
||||
"""Get all Google Drive documents from the database."""
|
||||
bind = op.get_bind()
|
||||
result = bind.execute(
|
||||
sa.text(
|
||||
"""
|
||||
SELECT d.id
|
||||
FROM document d
|
||||
JOIN document_by_connector_credential_pair dcc ON d.id = dcc.id
|
||||
JOIN connector_credential_pair cc ON dcc.connector_id = cc.connector_id
|
||||
AND dcc.credential_id = cc.credential_id
|
||||
JOIN connector c ON cc.connector_id = c.id
|
||||
WHERE c.source = 'GOOGLE_DRIVE'
|
||||
"""
|
||||
)
|
||||
)
|
||||
|
||||
documents = []
|
||||
for row in result:
|
||||
documents.append({"document_id": row.id})
|
||||
|
||||
return documents
|
||||
|
||||
|
||||
def update_document_id_in_database(
|
||||
old_doc_id: str, new_doc_id: str, index_name: str
|
||||
) -> None:
|
||||
"""Update document IDs in all relevant database tables using copy-and-swap approach."""
|
||||
bind = op.get_bind()
|
||||
|
||||
# print(f"Updating database tables for document {old_doc_id} -> {new_doc_id}")
|
||||
|
||||
# Check if new document ID already exists
|
||||
result = bind.execute(
|
||||
sa.text("SELECT COUNT(*) FROM document WHERE id = :new_id"),
|
||||
{"new_id": new_doc_id},
|
||||
)
|
||||
row = result.fetchone()
|
||||
if row and row[0] > 0:
|
||||
# print(f"Document with ID {new_doc_id} already exists, deleting old one")
|
||||
delete_document_from_db(old_doc_id, index_name)
|
||||
return
|
||||
|
||||
# Step 1: Create a new document row with the new ID (copy all fields from old row)
|
||||
# Use a conservative approach to handle columns that might not exist in all installations
|
||||
try:
|
||||
bind.execute(
|
||||
sa.text(
|
||||
"""
|
||||
INSERT INTO document (id, from_ingestion_api, boost, hidden, semantic_id,
|
||||
link, doc_updated_at, primary_owners, secondary_owners,
|
||||
external_user_emails, external_user_group_ids, is_public,
|
||||
chunk_count, last_modified, last_synced, kg_stage, kg_processing_time)
|
||||
SELECT :new_id, from_ingestion_api, boost, hidden, semantic_id,
|
||||
link, doc_updated_at, primary_owners, secondary_owners,
|
||||
external_user_emails, external_user_group_ids, is_public,
|
||||
chunk_count, last_modified, last_synced, kg_stage, kg_processing_time
|
||||
FROM document
|
||||
WHERE id = :old_id
|
||||
"""
|
||||
),
|
||||
{"new_id": new_doc_id, "old_id": old_doc_id},
|
||||
)
|
||||
# print(f"Successfully updated database tables for document {old_doc_id} -> {new_doc_id}")
|
||||
except Exception as e:
|
||||
# If the full INSERT fails, try a more basic version with only core columns
|
||||
logger.warning(f"Full INSERT failed, trying basic version: {e}")
|
||||
bind.execute(
|
||||
sa.text(
|
||||
"""
|
||||
INSERT INTO document (id, from_ingestion_api, boost, hidden, semantic_id,
|
||||
link, doc_updated_at, primary_owners, secondary_owners)
|
||||
SELECT :new_id, from_ingestion_api, boost, hidden, semantic_id,
|
||||
link, doc_updated_at, primary_owners, secondary_owners
|
||||
FROM document
|
||||
WHERE id = :old_id
|
||||
"""
|
||||
),
|
||||
{"new_id": new_doc_id, "old_id": old_doc_id},
|
||||
)
|
||||
|
||||
# Step 2: Update all foreign key references to point to the new ID
|
||||
|
||||
# Update document_by_connector_credential_pair table
|
||||
bind.execute(
|
||||
sa.text(
|
||||
"UPDATE document_by_connector_credential_pair SET id = :new_id WHERE id = :old_id"
|
||||
),
|
||||
{"new_id": new_doc_id, "old_id": old_doc_id},
|
||||
)
|
||||
# print(f"Successfully updated document_by_connector_credential_pair table for document {old_doc_id} -> {new_doc_id}")
|
||||
|
||||
# Update search_doc table (stores search results for chat replay)
|
||||
# This is critical for agent functionality
|
||||
bind.execute(
|
||||
sa.text(
|
||||
"UPDATE search_doc SET document_id = :new_id WHERE document_id = :old_id"
|
||||
),
|
||||
{"new_id": new_doc_id, "old_id": old_doc_id},
|
||||
)
|
||||
# print(f"Successfully updated search_doc table for document {old_doc_id} -> {new_doc_id}")
|
||||
# Update document_retrieval_feedback table (user feedback on documents)
|
||||
bind.execute(
|
||||
sa.text(
|
||||
"UPDATE document_retrieval_feedback SET document_id = :new_id WHERE document_id = :old_id"
|
||||
),
|
||||
{"new_id": new_doc_id, "old_id": old_doc_id},
|
||||
)
|
||||
# print(f"Successfully updated document_retrieval_feedback table for document {old_doc_id} -> {new_doc_id}")
|
||||
# Update document__tag table (document-tag relationships)
|
||||
bind.execute(
|
||||
sa.text(
|
||||
"UPDATE document__tag SET document_id = :new_id WHERE document_id = :old_id"
|
||||
),
|
||||
{"new_id": new_doc_id, "old_id": old_doc_id},
|
||||
)
|
||||
# print(f"Successfully updated document__tag table for document {old_doc_id} -> {new_doc_id}")
|
||||
# Update user_file table (user uploaded files linked to documents)
|
||||
bind.execute(
|
||||
sa.text(
|
||||
"UPDATE user_file SET document_id = :new_id WHERE document_id = :old_id"
|
||||
),
|
||||
{"new_id": new_doc_id, "old_id": old_doc_id},
|
||||
)
|
||||
# print(f"Successfully updated user_file table for document {old_doc_id} -> {new_doc_id}")
|
||||
# Update KG and chunk_stats tables (these may not exist in all installations)
|
||||
try:
|
||||
# Update kg_entity table
|
||||
bind.execute(
|
||||
sa.text(
|
||||
"UPDATE kg_entity SET document_id = :new_id WHERE document_id = :old_id"
|
||||
),
|
||||
{"new_id": new_doc_id, "old_id": old_doc_id},
|
||||
)
|
||||
# print(f"Successfully updated kg_entity table for document {old_doc_id} -> {new_doc_id}")
|
||||
# Update kg_entity_extraction_staging table
|
||||
bind.execute(
|
||||
sa.text(
|
||||
"UPDATE kg_entity_extraction_staging SET document_id = :new_id WHERE document_id = :old_id"
|
||||
),
|
||||
{"new_id": new_doc_id, "old_id": old_doc_id},
|
||||
)
|
||||
# print(f"Successfully updated kg_entity_extraction_staging table for document {old_doc_id} -> {new_doc_id}")
|
||||
# Update kg_relationship table
|
||||
bind.execute(
|
||||
sa.text(
|
||||
"UPDATE kg_relationship SET source_document = :new_id WHERE source_document = :old_id"
|
||||
),
|
||||
{"new_id": new_doc_id, "old_id": old_doc_id},
|
||||
)
|
||||
# print(f"Successfully updated kg_relationship table for document {old_doc_id} -> {new_doc_id}")
|
||||
# Update kg_relationship_extraction_staging table
|
||||
bind.execute(
|
||||
sa.text(
|
||||
"UPDATE kg_relationship_extraction_staging SET source_document = :new_id WHERE source_document = :old_id"
|
||||
),
|
||||
{"new_id": new_doc_id, "old_id": old_doc_id},
|
||||
)
|
||||
# print(f"Successfully updated kg_relationship_extraction_staging table for document {old_doc_id} -> {new_doc_id}")
|
||||
# Update chunk_stats table
|
||||
bind.execute(
|
||||
sa.text(
|
||||
"UPDATE chunk_stats SET document_id = :new_id WHERE document_id = :old_id"
|
||||
),
|
||||
{"new_id": new_doc_id, "old_id": old_doc_id},
|
||||
)
|
||||
# print(f"Successfully updated chunk_stats table for document {old_doc_id} -> {new_doc_id}")
|
||||
# Update chunk_stats ID field which includes document_id
|
||||
bind.execute(
|
||||
sa.text(
|
||||
"""
|
||||
UPDATE chunk_stats
|
||||
SET id = REPLACE(id, :old_id, :new_id)
|
||||
WHERE id LIKE :old_id_pattern
|
||||
"""
|
||||
),
|
||||
{
|
||||
"new_id": new_doc_id,
|
||||
"old_id": old_doc_id,
|
||||
"old_id_pattern": f"{old_doc_id}__%",
|
||||
},
|
||||
)
|
||||
# print(f"Successfully updated chunk_stats ID field for document {old_doc_id} -> {new_doc_id}")
|
||||
except Exception as e:
|
||||
logger.warning(f"Some KG/chunk tables may not exist or failed to update: {e}")
|
||||
|
||||
# Step 3: Delete the old document row (this should now be safe since all FKs point to new row)
|
||||
bind.execute(
|
||||
sa.text("DELETE FROM document WHERE id = :old_id"), {"old_id": old_doc_id}
|
||||
)
|
||||
# print(f"Successfully deleted document {old_doc_id} from database")
|
||||
|
||||
|
||||
def _visit_chunks(
|
||||
*,
|
||||
http_client: httpx.Client,
|
||||
index_name: str,
|
||||
selection: str,
|
||||
continuation: str | None = None,
|
||||
) -> tuple[list[dict], str | None]:
|
||||
"""Helper that calls the /document/v1 visit API once and returns (docs, next_token)."""
|
||||
|
||||
# Use the same URL as the document API, but with visit-specific params
|
||||
base_url = DOCUMENT_ID_ENDPOINT.format(index_name=index_name)
|
||||
|
||||
params: dict[str, str] = {
|
||||
"selection": selection,
|
||||
"wantedDocumentCount": "1000",
|
||||
}
|
||||
if continuation:
|
||||
params["continuation"] = continuation
|
||||
|
||||
# print(f"Visiting chunks for selection '{selection}' with params {params}")
|
||||
resp = http_client.get(base_url, params=params, timeout=None)
|
||||
# print(f"Visited chunks for document {selection}")
|
||||
resp.raise_for_status()
|
||||
|
||||
payload = resp.json()
|
||||
return payload.get("documents", []), payload.get("continuation")
|
||||
|
||||
|
||||
def delete_document_chunks_from_vespa(index_name: str, doc_id: str) -> None:
|
||||
"""Delete all chunks for *doc_id* from Vespa using continuation-token paging (no offset)."""
|
||||
|
||||
total_deleted = 0
|
||||
# Use exact match instead of contains - Document Selector Language doesn't support contains
|
||||
selection = f'{index_name}.document_id=="{doc_id}"'
|
||||
|
||||
with get_vespa_http_client() as http_client:
|
||||
continuation: str | None = None
|
||||
while True:
|
||||
docs, continuation = _visit_chunks(
|
||||
http_client=http_client,
|
||||
index_name=index_name,
|
||||
selection=selection,
|
||||
continuation=continuation,
|
||||
)
|
||||
|
||||
if not docs:
|
||||
break
|
||||
|
||||
for doc in docs:
|
||||
vespa_full_id = doc.get("id")
|
||||
if not vespa_full_id:
|
||||
continue
|
||||
|
||||
vespa_doc_uuid = vespa_full_id.split("::")[-1]
|
||||
delete_url = f"{DOCUMENT_ID_ENDPOINT.format(index_name=index_name)}/{vespa_doc_uuid}"
|
||||
|
||||
try:
|
||||
resp = http_client.delete(delete_url)
|
||||
resp.raise_for_status()
|
||||
total_deleted += 1
|
||||
except Exception as e:
|
||||
print(f"Failed to delete chunk {vespa_doc_uuid}: {e}")
|
||||
|
||||
if not continuation:
|
||||
break
|
||||
|
||||
|
||||
def update_document_id_in_vespa(
|
||||
index_name: str, old_doc_id: str, new_doc_id: str
|
||||
) -> None:
|
||||
"""Update all chunks' document_id field from *old_doc_id* to *new_doc_id* using continuation paging."""
|
||||
|
||||
clean_new_doc_id = replace_invalid_doc_id_characters(new_doc_id)
|
||||
|
||||
# Use exact match instead of contains - Document Selector Language doesn't support contains
|
||||
selection = f'{index_name}.document_id=="{old_doc_id}"'
|
||||
|
||||
with get_vespa_http_client() as http_client:
|
||||
continuation: str | None = None
|
||||
while True:
|
||||
# print(f"Visiting chunks for document {old_doc_id} -> {new_doc_id}")
|
||||
docs, continuation = _visit_chunks(
|
||||
http_client=http_client,
|
||||
index_name=index_name,
|
||||
selection=selection,
|
||||
continuation=continuation,
|
||||
)
|
||||
|
||||
if not docs:
|
||||
break
|
||||
|
||||
for doc in docs:
|
||||
vespa_full_id = doc.get("id")
|
||||
if not vespa_full_id:
|
||||
continue
|
||||
|
||||
vespa_doc_uuid = vespa_full_id.split("::")[-1]
|
||||
vespa_url = f"{DOCUMENT_ID_ENDPOINT.format(index_name=index_name)}/{vespa_doc_uuid}"
|
||||
|
||||
update_request = {
|
||||
"fields": {"document_id": {"assign": clean_new_doc_id}}
|
||||
}
|
||||
|
||||
try:
|
||||
resp = http_client.put(vespa_url, json=update_request)
|
||||
resp.raise_for_status()
|
||||
except Exception as e:
|
||||
print(f"Failed to update chunk {vespa_doc_uuid}: {e}")
|
||||
raise
|
||||
|
||||
if not continuation:
|
||||
break
|
||||
|
||||
|
||||
def delete_document_from_db(current_doc_id: str, index_name: str) -> None:
|
||||
# Delete all foreign key references first, then delete the document
|
||||
try:
|
||||
bind = op.get_bind()
|
||||
|
||||
# Delete from agent-related tables first (order matters due to foreign keys)
|
||||
# Delete from agent__sub_query__search_doc first since it references search_doc
|
||||
bind.execute(
|
||||
sa.text(
|
||||
"""
|
||||
DELETE FROM agent__sub_query__search_doc
|
||||
WHERE search_doc_id IN (
|
||||
SELECT id FROM search_doc WHERE document_id = :doc_id
|
||||
)
|
||||
"""
|
||||
),
|
||||
{"doc_id": current_doc_id},
|
||||
)
|
||||
|
||||
# Delete from chat_message__search_doc
|
||||
bind.execute(
|
||||
sa.text(
|
||||
"""
|
||||
DELETE FROM chat_message__search_doc
|
||||
WHERE search_doc_id IN (
|
||||
SELECT id FROM search_doc WHERE document_id = :doc_id
|
||||
)
|
||||
"""
|
||||
),
|
||||
{"doc_id": current_doc_id},
|
||||
)
|
||||
|
||||
# Now we can safely delete from search_doc
|
||||
bind.execute(
|
||||
sa.text("DELETE FROM search_doc WHERE document_id = :doc_id"),
|
||||
{"doc_id": current_doc_id},
|
||||
)
|
||||
|
||||
# Delete from document_by_connector_credential_pair
|
||||
bind.execute(
|
||||
sa.text(
|
||||
"DELETE FROM document_by_connector_credential_pair WHERE id = :doc_id"
|
||||
),
|
||||
{"doc_id": current_doc_id},
|
||||
)
|
||||
|
||||
# Delete from other tables that reference this document
|
||||
bind.execute(
|
||||
sa.text(
|
||||
"DELETE FROM document_retrieval_feedback WHERE document_id = :doc_id"
|
||||
),
|
||||
{"doc_id": current_doc_id},
|
||||
)
|
||||
|
||||
bind.execute(
|
||||
sa.text("DELETE FROM document__tag WHERE document_id = :doc_id"),
|
||||
{"doc_id": current_doc_id},
|
||||
)
|
||||
|
||||
bind.execute(
|
||||
sa.text("DELETE FROM user_file WHERE document_id = :doc_id"),
|
||||
{"doc_id": current_doc_id},
|
||||
)
|
||||
|
||||
# Delete from KG tables if they exist
|
||||
try:
|
||||
bind.execute(
|
||||
sa.text("DELETE FROM kg_entity WHERE document_id = :doc_id"),
|
||||
{"doc_id": current_doc_id},
|
||||
)
|
||||
|
||||
bind.execute(
|
||||
sa.text(
|
||||
"DELETE FROM kg_entity_extraction_staging WHERE document_id = :doc_id"
|
||||
),
|
||||
{"doc_id": current_doc_id},
|
||||
)
|
||||
|
||||
bind.execute(
|
||||
sa.text("DELETE FROM kg_relationship WHERE source_document = :doc_id"),
|
||||
{"doc_id": current_doc_id},
|
||||
)
|
||||
|
||||
bind.execute(
|
||||
sa.text(
|
||||
"DELETE FROM kg_relationship_extraction_staging WHERE source_document = :doc_id"
|
||||
),
|
||||
{"doc_id": current_doc_id},
|
||||
)
|
||||
|
||||
bind.execute(
|
||||
sa.text("DELETE FROM chunk_stats WHERE document_id = :doc_id"),
|
||||
{"doc_id": current_doc_id},
|
||||
)
|
||||
|
||||
bind.execute(
|
||||
sa.text("DELETE FROM chunk_stats WHERE id LIKE :doc_id_pattern"),
|
||||
{"doc_id_pattern": f"{current_doc_id}__%"},
|
||||
)
|
||||
|
||||
except Exception as e:
|
||||
logger.warning(
|
||||
f"Some KG/chunk tables may not exist or failed to delete from: {e}"
|
||||
)
|
||||
|
||||
# Finally delete the document itself
|
||||
bind.execute(
|
||||
sa.text("DELETE FROM document WHERE id = :doc_id"),
|
||||
{"doc_id": current_doc_id},
|
||||
)
|
||||
|
||||
# Delete chunks from vespa
|
||||
delete_document_chunks_from_vespa(index_name, current_doc_id)
|
||||
|
||||
except Exception as e:
|
||||
print(f"Failed to delete duplicate document {current_doc_id}: {e}")
|
||||
# Continue with other documents instead of failing the entire migration
|
||||
|
||||
|
||||
def upgrade() -> None:
|
||||
if SKIP_CANON_DRIVE_IDS:
|
||||
return
|
||||
current_search_settings, future_search_settings = active_search_settings()
|
||||
document_index = get_default_document_index(
|
||||
current_search_settings,
|
||||
future_search_settings,
|
||||
)
|
||||
|
||||
# Get the index name
|
||||
if hasattr(document_index, "index_name"):
|
||||
index_name = document_index.index_name
|
||||
else:
|
||||
# Default index name if we can't get it from the document_index
|
||||
index_name = "danswer_index"
|
||||
|
||||
# Get all Google Drive documents from the database (this is faster and more reliable)
|
||||
gdrive_documents = get_google_drive_documents_from_database()
|
||||
|
||||
if not gdrive_documents:
|
||||
return
|
||||
|
||||
# Track normalized document IDs to detect duplicates
|
||||
all_normalized_doc_ids = set()
|
||||
updated_count = 0
|
||||
|
||||
for doc_info in gdrive_documents:
|
||||
current_doc_id = doc_info["document_id"]
|
||||
normalized_doc_id = normalize_google_drive_url(current_doc_id)
|
||||
|
||||
print(f"Processing document {current_doc_id} -> {normalized_doc_id}")
|
||||
# Check for duplicates
|
||||
if normalized_doc_id in all_normalized_doc_ids:
|
||||
# print(f"Deleting duplicate document {current_doc_id}")
|
||||
delete_document_from_db(current_doc_id, index_name)
|
||||
continue
|
||||
|
||||
all_normalized_doc_ids.add(normalized_doc_id)
|
||||
|
||||
# If the document ID already doesn't have query parameters, skip it
|
||||
if current_doc_id == normalized_doc_id:
|
||||
# print(f"Skipping document {current_doc_id} -> {normalized_doc_id} because it already has no query parameters")
|
||||
continue
|
||||
|
||||
try:
|
||||
# Update both database and Vespa in order
|
||||
# Database first to ensure consistency
|
||||
update_document_id_in_database(
|
||||
current_doc_id, normalized_doc_id, index_name
|
||||
)
|
||||
|
||||
# For Vespa, we can now use the original document IDs since we're using contains matching
|
||||
update_document_id_in_vespa(index_name, current_doc_id, normalized_doc_id)
|
||||
updated_count += 1
|
||||
# print(f"Finished updating document {current_doc_id} -> {normalized_doc_id}")
|
||||
except Exception as e:
|
||||
print(f"Failed to update document {current_doc_id}: {e}")
|
||||
|
||||
if isinstance(e, HTTPStatusError):
|
||||
print(f"HTTPStatusError: {e}")
|
||||
print(f"Response: {e.response.text}")
|
||||
print(f"Status: {e.response.status_code}")
|
||||
print(f"Headers: {e.response.headers}")
|
||||
print(f"Request: {e.request.url}")
|
||||
print(f"Request headers: {e.request.headers}")
|
||||
# Note: Rollback is complex with copy-and-swap approach since the old document is already deleted
|
||||
# In case of failure, manual intervention may be required
|
||||
# Continue with other documents instead of failing the entire migration
|
||||
continue
|
||||
|
||||
logger.info(f"Migration complete. Updated {updated_count} Google Drive documents")
|
||||
|
||||
|
||||
def downgrade() -> None:
|
||||
# this is a one way migration, so no downgrade.
|
||||
# It wouldn't make sense to store the extra query parameters
|
||||
# and duplicate documents to allow a reversal.
|
||||
pass
|
||||
@@ -5,6 +5,7 @@ Revises: 77d07dffae64
|
||||
Create Date: 2023-11-11 20:51:24.228999
|
||||
|
||||
"""
|
||||
|
||||
from alembic import op
|
||||
import sqlalchemy as sa
|
||||
|
||||
|
||||
@@ -0,0 +1,261 @@
|
||||
"""Migration 3: User file relationship migration
|
||||
|
||||
Revision ID: 16c37a30adf2
|
||||
Revises: 0cd424f32b1d
|
||||
Create Date: 2025-09-22 09:47:34.175596
|
||||
|
||||
This migration converts folder-based relationships to project-based relationships.
|
||||
It migrates persona__user_folder to persona__user_file and populates project__user_file.
|
||||
"""
|
||||
|
||||
from alembic import op
|
||||
import sqlalchemy as sa
|
||||
from sqlalchemy import text
|
||||
import logging
|
||||
|
||||
logger = logging.getLogger("alembic.runtime.migration")
|
||||
|
||||
# revision identifiers, used by Alembic.
|
||||
revision = "16c37a30adf2"
|
||||
down_revision = "0cd424f32b1d"
|
||||
branch_labels = None
|
||||
depends_on = None
|
||||
|
||||
|
||||
def upgrade() -> None:
|
||||
"""Migrate folder-based relationships to project-based relationships."""
|
||||
|
||||
bind = op.get_bind()
|
||||
inspector = sa.inspect(bind)
|
||||
|
||||
# === Step 1: Migrate persona__user_folder to persona__user_file ===
|
||||
table_names = inspector.get_table_names()
|
||||
|
||||
if "persona__user_folder" in table_names and "user_file" in table_names:
|
||||
user_file_columns = [col["name"] for col in inspector.get_columns("user_file")]
|
||||
has_new_id = "new_id" in user_file_columns
|
||||
|
||||
if has_new_id and "folder_id" in user_file_columns:
|
||||
logger.info(
|
||||
"Migrating persona__user_folder relationships to persona__user_file..."
|
||||
)
|
||||
|
||||
# Count relationships to migrate (asyncpg-compatible)
|
||||
count_query = text(
|
||||
"""
|
||||
SELECT COUNT(*)
|
||||
FROM (
|
||||
SELECT DISTINCT puf.persona_id, uf.id
|
||||
FROM persona__user_folder puf
|
||||
JOIN user_file uf ON uf.folder_id = puf.user_folder_id
|
||||
WHERE NOT EXISTS (
|
||||
SELECT 1
|
||||
FROM persona__user_file p2
|
||||
WHERE p2.persona_id = puf.persona_id
|
||||
AND p2.user_file_id = uf.id
|
||||
)
|
||||
) AS distinct_pairs
|
||||
"""
|
||||
)
|
||||
to_migrate = bind.execute(count_query).scalar_one()
|
||||
|
||||
if to_migrate > 0:
|
||||
logger.info(f"Creating {to_migrate} persona-file relationships...")
|
||||
|
||||
# Migrate in batches to avoid memory issues
|
||||
batch_size = 10000
|
||||
total_inserted = 0
|
||||
|
||||
while True:
|
||||
# Insert batch directly using subquery (asyncpg compatible)
|
||||
result = bind.execute(
|
||||
text(
|
||||
"""
|
||||
INSERT INTO persona__user_file (persona_id, user_file_id, user_file_id_uuid)
|
||||
SELECT DISTINCT puf.persona_id, uf.id as file_id, uf.new_id
|
||||
FROM persona__user_folder puf
|
||||
JOIN user_file uf ON uf.folder_id = puf.user_folder_id
|
||||
WHERE NOT EXISTS (
|
||||
SELECT 1
|
||||
FROM persona__user_file p2
|
||||
WHERE p2.persona_id = puf.persona_id
|
||||
AND p2.user_file_id = uf.id
|
||||
)
|
||||
LIMIT :batch_size
|
||||
"""
|
||||
),
|
||||
{"batch_size": batch_size},
|
||||
)
|
||||
|
||||
inserted = result.rowcount
|
||||
total_inserted += inserted
|
||||
|
||||
if inserted < batch_size:
|
||||
break
|
||||
|
||||
logger.info(
|
||||
f" Migrated {total_inserted}/{to_migrate} relationships..."
|
||||
)
|
||||
|
||||
logger.info(
|
||||
f"Created {total_inserted} persona__user_file relationships"
|
||||
)
|
||||
|
||||
# === Step 2: Add foreign key for chat_session.project_id ===
|
||||
chat_session_fks = inspector.get_foreign_keys("chat_session")
|
||||
fk_exists = any(
|
||||
fk["name"] == "fk_chat_session_project_id" for fk in chat_session_fks
|
||||
)
|
||||
|
||||
if not fk_exists:
|
||||
logger.info("Adding foreign key constraint for chat_session.project_id...")
|
||||
op.create_foreign_key(
|
||||
"fk_chat_session_project_id",
|
||||
"chat_session",
|
||||
"user_project",
|
||||
["project_id"],
|
||||
["id"],
|
||||
)
|
||||
logger.info("Added foreign key constraint")
|
||||
|
||||
# === Step 3: Populate project__user_file from user_file.folder_id ===
|
||||
user_file_columns = [col["name"] for col in inspector.get_columns("user_file")]
|
||||
has_new_id = "new_id" in user_file_columns
|
||||
|
||||
if has_new_id and "folder_id" in user_file_columns:
|
||||
logger.info("Populating project__user_file from folder relationships...")
|
||||
|
||||
# Count relationships to create
|
||||
count_query = text(
|
||||
"""
|
||||
SELECT COUNT(*)
|
||||
FROM user_file uf
|
||||
WHERE uf.folder_id IS NOT NULL
|
||||
AND NOT EXISTS (
|
||||
SELECT 1
|
||||
FROM project__user_file puf
|
||||
WHERE puf.project_id = uf.folder_id
|
||||
AND puf.user_file_id = uf.new_id
|
||||
)
|
||||
"""
|
||||
)
|
||||
to_create = bind.execute(count_query).scalar_one()
|
||||
|
||||
if to_create > 0:
|
||||
logger.info(f"Creating {to_create} project-file relationships...")
|
||||
|
||||
# Insert in batches
|
||||
batch_size = 10000
|
||||
total_inserted = 0
|
||||
|
||||
while True:
|
||||
result = bind.execute(
|
||||
text(
|
||||
"""
|
||||
INSERT INTO project__user_file (project_id, user_file_id)
|
||||
SELECT uf.folder_id, uf.new_id
|
||||
FROM user_file uf
|
||||
WHERE uf.folder_id IS NOT NULL
|
||||
AND NOT EXISTS (
|
||||
SELECT 1
|
||||
FROM project__user_file puf
|
||||
WHERE puf.project_id = uf.folder_id
|
||||
AND puf.user_file_id = uf.new_id
|
||||
)
|
||||
LIMIT :batch_size
|
||||
ON CONFLICT (project_id, user_file_id) DO NOTHING
|
||||
"""
|
||||
),
|
||||
{"batch_size": batch_size},
|
||||
)
|
||||
|
||||
inserted = result.rowcount
|
||||
total_inserted += inserted
|
||||
|
||||
if inserted < batch_size:
|
||||
break
|
||||
|
||||
logger.info(f" Created {total_inserted}/{to_create} relationships...")
|
||||
|
||||
logger.info(f"Created {total_inserted} project__user_file relationships")
|
||||
|
||||
# === Step 4: Create index on chat_session.project_id ===
|
||||
try:
|
||||
indexes = [ix.get("name") for ix in inspector.get_indexes("chat_session")]
|
||||
except Exception:
|
||||
indexes = []
|
||||
|
||||
if "ix_chat_session_project_id" not in indexes:
|
||||
logger.info("Creating index on chat_session.project_id...")
|
||||
op.create_index(
|
||||
"ix_chat_session_project_id", "chat_session", ["project_id"], unique=False
|
||||
)
|
||||
logger.info("Created index")
|
||||
|
||||
logger.info("Migration 3 (relationship migration) completed successfully")
|
||||
|
||||
|
||||
def downgrade() -> None:
|
||||
"""Remove migrated relationships and constraints."""
|
||||
|
||||
bind = op.get_bind()
|
||||
inspector = sa.inspect(bind)
|
||||
|
||||
logger.info("Starting downgrade of relationship migration...")
|
||||
|
||||
# Drop index on chat_session.project_id
|
||||
try:
|
||||
indexes = [ix.get("name") for ix in inspector.get_indexes("chat_session")]
|
||||
if "ix_chat_session_project_id" in indexes:
|
||||
op.drop_index("ix_chat_session_project_id", "chat_session")
|
||||
logger.info("Dropped index on chat_session.project_id")
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
# Drop foreign key constraint
|
||||
try:
|
||||
chat_session_fks = inspector.get_foreign_keys("chat_session")
|
||||
fk_exists = any(
|
||||
fk["name"] == "fk_chat_session_project_id" for fk in chat_session_fks
|
||||
)
|
||||
if fk_exists:
|
||||
op.drop_constraint(
|
||||
"fk_chat_session_project_id", "chat_session", type_="foreignkey"
|
||||
)
|
||||
logger.info("Dropped foreign key constraint on chat_session.project_id")
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
# Clear project__user_file relationships (but keep the table for migration 1 to handle)
|
||||
if "project__user_file" in inspector.get_table_names():
|
||||
result = bind.execute(text("DELETE FROM project__user_file"))
|
||||
logger.info(f"Cleared {result.rowcount} records from project__user_file")
|
||||
|
||||
# Remove migrated persona__user_file relationships
|
||||
# Only remove those that came from folder relationships
|
||||
if all(
|
||||
table in inspector.get_table_names()
|
||||
for table in ["persona__user_file", "persona__user_folder", "user_file"]
|
||||
):
|
||||
user_file_columns = [col["name"] for col in inspector.get_columns("user_file")]
|
||||
if "folder_id" in user_file_columns:
|
||||
result = bind.execute(
|
||||
text(
|
||||
"""
|
||||
DELETE FROM persona__user_file puf
|
||||
WHERE EXISTS (
|
||||
SELECT 1
|
||||
FROM user_file uf
|
||||
JOIN persona__user_folder puf2
|
||||
ON puf2.user_folder_id = uf.folder_id
|
||||
WHERE puf.persona_id = puf2.persona_id
|
||||
AND puf.user_file_id = uf.id
|
||||
)
|
||||
"""
|
||||
)
|
||||
)
|
||||
logger.info(
|
||||
f"Removed {result.rowcount} migrated persona__user_file relationships"
|
||||
)
|
||||
|
||||
logger.info("Downgrade completed successfully")
|
||||
@@ -5,6 +5,7 @@ Revises: e50154680a5c
|
||||
Create Date: 2024-03-19 15:30:44.425436
|
||||
|
||||
"""
|
||||
|
||||
from alembic import op
|
||||
import sqlalchemy as sa
|
||||
from sqlalchemy.dialects import postgresql
|
||||
|
||||
@@ -5,6 +5,7 @@ Revises: 4ee1287bd26a
|
||||
Create Date: 2024-11-21 11:49:04.488677
|
||||
|
||||
"""
|
||||
|
||||
from alembic import op
|
||||
import sqlalchemy as sa
|
||||
from sqlalchemy.dialects import postgresql
|
||||
|
||||
@@ -5,6 +5,7 @@ Revises: 9c00a2bccb83
|
||||
Create Date: 2025-02-18 10:45:13.957807
|
||||
|
||||
"""
|
||||
|
||||
from alembic import op
|
||||
|
||||
# revision identifiers, used by Alembic.
|
||||
|
||||
@@ -5,6 +5,7 @@ Revises: 6756efa39ada
|
||||
Create Date: 2024-10-15 19:26:44.071259
|
||||
|
||||
"""
|
||||
|
||||
from alembic import op
|
||||
import sqlalchemy as sa
|
||||
from sqlalchemy.dialects import postgresql
|
||||
|
||||
@@ -5,6 +5,7 @@ Revises: 35e6853a51d5
|
||||
Create Date: 2024-09-18 11:48:59.418726
|
||||
|
||||
"""
|
||||
|
||||
from alembic import op
|
||||
|
||||
|
||||
|
||||
@@ -5,6 +5,7 @@ Revises: 5fc1f54cc252
|
||||
Create Date: 2024-08-10 11:13:36.070790
|
||||
|
||||
"""
|
||||
|
||||
from alembic import op
|
||||
import sqlalchemy as sa
|
||||
|
||||
|
||||
@@ -0,0 +1,45 @@
|
||||
"""Add foreign key to user__external_user_group_id
|
||||
|
||||
Revision ID: 238b84885828
|
||||
Revises: a7688ab35c45
|
||||
Create Date: 2025-05-19 17:15:33.424584
|
||||
|
||||
"""
|
||||
|
||||
from alembic import op
|
||||
|
||||
|
||||
# revision identifiers, used by Alembic.
|
||||
revision = "238b84885828"
|
||||
down_revision = "a7688ab35c45"
|
||||
branch_labels = None
|
||||
depends_on = None
|
||||
|
||||
|
||||
def upgrade() -> None:
|
||||
# First, clean up any entries that don't have a valid cc_pair_id
|
||||
op.execute(
|
||||
"""
|
||||
DELETE FROM user__external_user_group_id
|
||||
WHERE cc_pair_id NOT IN (SELECT id FROM connector_credential_pair)
|
||||
"""
|
||||
)
|
||||
|
||||
# Add foreign key constraint with cascade delete
|
||||
op.create_foreign_key(
|
||||
"fk_user__external_user_group_id_cc_pair_id",
|
||||
"user__external_user_group_id",
|
||||
"connector_credential_pair",
|
||||
["cc_pair_id"],
|
||||
["id"],
|
||||
ondelete="CASCADE",
|
||||
)
|
||||
|
||||
|
||||
def downgrade() -> None:
|
||||
# Drop the foreign key constraint
|
||||
op.drop_constraint(
|
||||
"fk_user__external_user_group_id_cc_pair_id",
|
||||
"user__external_user_group_id",
|
||||
type_="foreignkey",
|
||||
)
|
||||
@@ -5,6 +5,7 @@ Revises: bc9771dccadf
|
||||
Create Date: 2024-06-27 16:04:51.480437
|
||||
|
||||
"""
|
||||
|
||||
from alembic import op
|
||||
import sqlalchemy as sa
|
||||
|
||||
|
||||
@@ -5,6 +5,7 @@ Revises: 6d387b3196c2
|
||||
Create Date: 2023-05-05 15:49:35.716016
|
||||
|
||||
"""
|
||||
|
||||
import fastapi_users_db_sqlalchemy
|
||||
import sqlalchemy as sa
|
||||
from alembic import op
|
||||
|
||||
@@ -5,6 +5,7 @@ Revises: 2daa494a0851
|
||||
Create Date: 2024-11-12 13:23:29.858995
|
||||
|
||||
"""
|
||||
|
||||
from alembic import op
|
||||
import sqlalchemy as sa
|
||||
from sqlalchemy.dialects import postgresql
|
||||
|
||||
@@ -5,6 +5,7 @@ Revises: 2666d766cb9b
|
||||
Create Date: 2023-05-24 18:45:17.244495
|
||||
|
||||
"""
|
||||
|
||||
import fastapi_users_db_sqlalchemy
|
||||
import sqlalchemy as sa
|
||||
from alembic import op
|
||||
@@ -143,27 +144,34 @@ def upgrade() -> None:
|
||||
|
||||
def downgrade() -> None:
|
||||
op.execute("TRUNCATE TABLE index_attempt")
|
||||
op.add_column(
|
||||
"index_attempt",
|
||||
sa.Column("input_type", sa.VARCHAR(), autoincrement=False, nullable=False),
|
||||
)
|
||||
op.add_column(
|
||||
"index_attempt",
|
||||
sa.Column("source", sa.VARCHAR(), autoincrement=False, nullable=False),
|
||||
)
|
||||
op.add_column(
|
||||
"index_attempt",
|
||||
sa.Column(
|
||||
"connector_specific_config",
|
||||
postgresql.JSONB(astext_type=sa.Text()),
|
||||
autoincrement=False,
|
||||
nullable=False,
|
||||
),
|
||||
)
|
||||
|
||||
# Check if the constraint exists before dropping
|
||||
conn = op.get_bind()
|
||||
inspector = sa.inspect(conn)
|
||||
existing_columns = {col["name"] for col in inspector.get_columns("index_attempt")}
|
||||
|
||||
if "input_type" not in existing_columns:
|
||||
op.add_column(
|
||||
"index_attempt",
|
||||
sa.Column("input_type", sa.VARCHAR(), autoincrement=False, nullable=False),
|
||||
)
|
||||
|
||||
if "source" not in existing_columns:
|
||||
op.add_column(
|
||||
"index_attempt",
|
||||
sa.Column("source", sa.VARCHAR(), autoincrement=False, nullable=False),
|
||||
)
|
||||
|
||||
if "connector_specific_config" not in existing_columns:
|
||||
op.add_column(
|
||||
"index_attempt",
|
||||
sa.Column(
|
||||
"connector_specific_config",
|
||||
postgresql.JSONB(astext_type=sa.Text()),
|
||||
autoincrement=False,
|
||||
nullable=False,
|
||||
),
|
||||
)
|
||||
|
||||
# Check if the constraint exists before dropping
|
||||
constraints = inspector.get_foreign_keys("index_attempt")
|
||||
|
||||
if any(
|
||||
@@ -182,8 +190,12 @@ def downgrade() -> None:
|
||||
"fk_index_attempt_connector_id", "index_attempt", type_="foreignkey"
|
||||
)
|
||||
|
||||
op.drop_column("index_attempt", "credential_id")
|
||||
op.drop_column("index_attempt", "connector_id")
|
||||
op.drop_table("connector_credential_pair")
|
||||
op.drop_table("credential")
|
||||
op.drop_table("connector")
|
||||
if "credential_id" in existing_columns:
|
||||
op.drop_column("index_attempt", "credential_id")
|
||||
|
||||
if "connector_id" in existing_columns:
|
||||
op.drop_column("index_attempt", "connector_id")
|
||||
|
||||
op.execute("DROP TABLE IF EXISTS connector_credential_pair CASCADE")
|
||||
op.execute("DROP TABLE IF EXISTS credential CASCADE")
|
||||
op.execute("DROP TABLE IF EXISTS connector CASCADE")
|
||||
|
||||
@@ -5,6 +5,7 @@ Revises: c0aab6edb6dd
|
||||
Create Date: 2025-01-04 11:39:43.268612
|
||||
|
||||
"""
|
||||
|
||||
from alembic import op
|
||||
import sqlalchemy as sa
|
||||
|
||||
|
||||
@@ -0,0 +1,228 @@
|
||||
"""Migration 6: User file schema cleanup
|
||||
|
||||
Revision ID: 2b75d0a8ffcb
|
||||
Revises: 3a78dba1080a
|
||||
Create Date: 2025-09-22 10:09:26.375377
|
||||
|
||||
This migration removes legacy columns and tables after data migration is complete.
|
||||
It should only be run after verifying all data has been successfully migrated.
|
||||
"""
|
||||
|
||||
from alembic import op
|
||||
import sqlalchemy as sa
|
||||
from sqlalchemy import text
|
||||
import logging
|
||||
import fastapi_users_db_sqlalchemy
|
||||
|
||||
logger = logging.getLogger("alembic.runtime.migration")
|
||||
|
||||
# revision identifiers, used by Alembic.
|
||||
revision = "2b75d0a8ffcb"
|
||||
down_revision = "3a78dba1080a"
|
||||
branch_labels = None
|
||||
depends_on = None
|
||||
|
||||
|
||||
def upgrade() -> None:
|
||||
"""Remove legacy columns and tables."""
|
||||
|
||||
bind = op.get_bind()
|
||||
inspector = sa.inspect(bind)
|
||||
|
||||
logger.info("Starting schema cleanup...")
|
||||
|
||||
# === Step 1: Verify data migration is complete ===
|
||||
logger.info("Verifying data migration completion...")
|
||||
|
||||
# Check if any chat sessions still have folder_id references
|
||||
chat_session_columns = [
|
||||
col["name"] for col in inspector.get_columns("chat_session")
|
||||
]
|
||||
if "folder_id" in chat_session_columns:
|
||||
orphaned_count = bind.execute(
|
||||
text(
|
||||
"""
|
||||
SELECT COUNT(*) FROM chat_session
|
||||
WHERE folder_id IS NOT NULL AND project_id IS NULL
|
||||
"""
|
||||
)
|
||||
).scalar_one()
|
||||
|
||||
if orphaned_count > 0:
|
||||
logger.warning(
|
||||
f"WARNING: {orphaned_count} chat_session records still have "
|
||||
f"folder_id without project_id. Proceeding anyway."
|
||||
)
|
||||
|
||||
# === Step 2: Drop chat_session.folder_id ===
|
||||
if "folder_id" in chat_session_columns:
|
||||
logger.info("Dropping chat_session.folder_id...")
|
||||
|
||||
# Drop foreign key constraint first
|
||||
op.execute(
|
||||
"ALTER TABLE chat_session DROP CONSTRAINT IF EXISTS chat_session_chat_folder_fk"
|
||||
)
|
||||
op.execute(
|
||||
"ALTER TABLE chat_session DROP CONSTRAINT IF EXISTS chat_session_folder_fk"
|
||||
)
|
||||
|
||||
# Drop the column
|
||||
op.drop_column("chat_session", "folder_id")
|
||||
logger.info("Dropped chat_session.folder_id")
|
||||
|
||||
# === Step 3: Drop persona__user_folder table ===
|
||||
if "persona__user_folder" in inspector.get_table_names():
|
||||
logger.info("Dropping persona__user_folder table...")
|
||||
|
||||
# Check for any remaining data
|
||||
remaining = bind.execute(
|
||||
text("SELECT COUNT(*) FROM persona__user_folder")
|
||||
).scalar_one()
|
||||
|
||||
if remaining > 0:
|
||||
logger.warning(
|
||||
f"WARNING: Dropping persona__user_folder with {remaining} records"
|
||||
)
|
||||
|
||||
op.drop_table("persona__user_folder")
|
||||
logger.info("Dropped persona__user_folder table")
|
||||
|
||||
# === Step 4: Drop chat_folder table ===
|
||||
if "chat_folder" in inspector.get_table_names():
|
||||
logger.info("Dropping chat_folder table...")
|
||||
|
||||
# Check for any remaining data
|
||||
remaining = bind.execute(text("SELECT COUNT(*) FROM chat_folder")).scalar_one()
|
||||
|
||||
if remaining > 0:
|
||||
logger.warning(f"WARNING: Dropping chat_folder with {remaining} records")
|
||||
|
||||
op.drop_table("chat_folder")
|
||||
logger.info("Dropped chat_folder table")
|
||||
|
||||
# === Step 5: Drop user_file legacy columns ===
|
||||
user_file_columns = [col["name"] for col in inspector.get_columns("user_file")]
|
||||
|
||||
# Drop folder_id
|
||||
if "folder_id" in user_file_columns:
|
||||
logger.info("Dropping user_file.folder_id...")
|
||||
op.drop_column("user_file", "folder_id")
|
||||
logger.info("Dropped user_file.folder_id")
|
||||
|
||||
# Drop cc_pair_id (already handled in migration 5, but be sure)
|
||||
if "cc_pair_id" in user_file_columns:
|
||||
logger.info("Dropping user_file.cc_pair_id...")
|
||||
|
||||
# Drop any remaining foreign key constraints
|
||||
bind.execute(
|
||||
text(
|
||||
"""
|
||||
DO $$
|
||||
DECLARE r RECORD;
|
||||
BEGIN
|
||||
FOR r IN (
|
||||
SELECT conname
|
||||
FROM pg_constraint c
|
||||
JOIN pg_class t ON c.conrelid = t.oid
|
||||
WHERE c.contype = 'f'
|
||||
AND t.relname = 'user_file'
|
||||
AND EXISTS (
|
||||
SELECT 1 FROM pg_attribute a
|
||||
WHERE a.attrelid = t.oid
|
||||
AND a.attname = 'cc_pair_id'
|
||||
)
|
||||
) LOOP
|
||||
EXECUTE format('ALTER TABLE user_file DROP CONSTRAINT IF EXISTS %I', r.conname);
|
||||
END LOOP;
|
||||
END$$;
|
||||
"""
|
||||
)
|
||||
)
|
||||
|
||||
op.drop_column("user_file", "cc_pair_id")
|
||||
logger.info("Dropped user_file.cc_pair_id")
|
||||
|
||||
# === Step 6: Clean up any remaining constraints ===
|
||||
logger.info("Cleaning up remaining constraints...")
|
||||
|
||||
# Drop any unique constraints on removed columns
|
||||
op.execute(
|
||||
"ALTER TABLE user_file DROP CONSTRAINT IF EXISTS user_file_cc_pair_id_key"
|
||||
)
|
||||
|
||||
logger.info("Migration 6 (schema cleanup) completed successfully")
|
||||
logger.info("Legacy schema has been fully removed")
|
||||
|
||||
|
||||
def downgrade() -> None:
|
||||
"""Recreate dropped columns and tables (structure only, no data)."""
|
||||
|
||||
bind = op.get_bind()
|
||||
inspector = sa.inspect(bind)
|
||||
|
||||
logger.warning("Downgrading schema cleanup - recreating structure only, no data!")
|
||||
|
||||
# Recreate user_file columns
|
||||
if "user_file" in inspector.get_table_names():
|
||||
columns = [col["name"] for col in inspector.get_columns("user_file")]
|
||||
|
||||
if "cc_pair_id" not in columns:
|
||||
op.add_column(
|
||||
"user_file", sa.Column("cc_pair_id", sa.Integer(), nullable=True)
|
||||
)
|
||||
|
||||
if "folder_id" not in columns:
|
||||
op.add_column(
|
||||
"user_file", sa.Column("folder_id", sa.Integer(), nullable=True)
|
||||
)
|
||||
|
||||
# Recreate persona__user_folder table
|
||||
if "persona__user_folder" not in inspector.get_table_names():
|
||||
op.create_table(
|
||||
"persona__user_folder",
|
||||
sa.Column("persona_id", sa.Integer(), nullable=False),
|
||||
sa.Column("user_folder_id", sa.Integer(), nullable=False),
|
||||
sa.PrimaryKeyConstraint("persona_id", "user_folder_id"),
|
||||
sa.ForeignKeyConstraint(["persona_id"], ["persona.id"]),
|
||||
sa.ForeignKeyConstraint(["user_folder_id"], ["user_project.id"]),
|
||||
)
|
||||
|
||||
# Recreate chat_folder table and related structures
|
||||
if "chat_folder" not in inspector.get_table_names():
|
||||
op.create_table(
|
||||
"chat_folder",
|
||||
sa.Column("id", sa.Integer(), nullable=False),
|
||||
sa.Column(
|
||||
"user_id",
|
||||
fastapi_users_db_sqlalchemy.generics.GUID(),
|
||||
nullable=True,
|
||||
),
|
||||
sa.Column("name", sa.String(), nullable=True),
|
||||
sa.Column("display_priority", sa.Integer(), nullable=False),
|
||||
sa.ForeignKeyConstraint(
|
||||
["user_id"],
|
||||
["user.id"],
|
||||
name="chat_folder_user_id_fkey",
|
||||
),
|
||||
sa.PrimaryKeyConstraint("id"),
|
||||
)
|
||||
|
||||
# Add folder_id back to chat_session
|
||||
if "chat_session" in inspector.get_table_names():
|
||||
columns = [col["name"] for col in inspector.get_columns("chat_session")]
|
||||
if "folder_id" not in columns:
|
||||
op.add_column(
|
||||
"chat_session", sa.Column("folder_id", sa.Integer(), nullable=True)
|
||||
)
|
||||
|
||||
# Add foreign key if chat_folder exists
|
||||
if "chat_folder" in inspector.get_table_names():
|
||||
op.create_foreign_key(
|
||||
"chat_session_chat_folder_fk",
|
||||
"chat_session",
|
||||
"chat_folder",
|
||||
["folder_id"],
|
||||
["id"],
|
||||
)
|
||||
|
||||
logger.info("Downgrade completed - structure recreated but data is lost")
|
||||
@@ -5,6 +5,7 @@ Revises: f5437cc136c5
|
||||
Create Date: 2025-02-11 14:57:51.308775
|
||||
|
||||
"""
|
||||
|
||||
from alembic import op
|
||||
|
||||
|
||||
|
||||
@@ -5,6 +5,7 @@ Revises: 4b08d97e175a
|
||||
Create Date: 2024-08-21 19:15:15.762948
|
||||
|
||||
"""
|
||||
|
||||
from alembic import op
|
||||
import sqlalchemy as sa
|
||||
|
||||
|
||||
@@ -5,6 +5,7 @@ Revises: c0fd6e4da83a
|
||||
Create Date: 2024-11-11 10:57:22.991157
|
||||
|
||||
"""
|
||||
|
||||
from alembic import op
|
||||
import sqlalchemy as sa
|
||||
|
||||
|
||||
@@ -5,6 +5,7 @@ Revises: 33ea50e88f24
|
||||
Create Date: 2025-01-31 10:30:27.289646
|
||||
|
||||
"""
|
||||
|
||||
from alembic import op
|
||||
import sqlalchemy as sa
|
||||
|
||||
|
||||
@@ -0,0 +1,115 @@
|
||||
"""add_indexing_coordination
|
||||
|
||||
Revision ID: 2f95e36923e6
|
||||
Revises: 0816326d83aa
|
||||
Create Date: 2025-07-10 16:17:57.762182
|
||||
|
||||
"""
|
||||
|
||||
from alembic import op
|
||||
import sqlalchemy as sa
|
||||
|
||||
|
||||
# revision identifiers, used by Alembic.
|
||||
revision = "2f95e36923e6"
|
||||
down_revision = "0816326d83aa"
|
||||
branch_labels = None
|
||||
depends_on = None
|
||||
|
||||
|
||||
def upgrade() -> None:
|
||||
# Add database-based coordination fields (replacing Redis fencing)
|
||||
op.add_column(
|
||||
"index_attempt", sa.Column("celery_task_id", sa.String(), nullable=True)
|
||||
)
|
||||
op.add_column(
|
||||
"index_attempt",
|
||||
sa.Column(
|
||||
"cancellation_requested",
|
||||
sa.Boolean(),
|
||||
nullable=False,
|
||||
server_default="false",
|
||||
),
|
||||
)
|
||||
|
||||
# Add batch coordination fields (replacing FileStore state)
|
||||
op.add_column(
|
||||
"index_attempt", sa.Column("total_batches", sa.Integer(), nullable=True)
|
||||
)
|
||||
op.add_column(
|
||||
"index_attempt",
|
||||
sa.Column(
|
||||
"completed_batches", sa.Integer(), nullable=False, server_default="0"
|
||||
),
|
||||
)
|
||||
op.add_column(
|
||||
"index_attempt",
|
||||
sa.Column(
|
||||
"total_failures_batch_level",
|
||||
sa.Integer(),
|
||||
nullable=False,
|
||||
server_default="0",
|
||||
),
|
||||
)
|
||||
op.add_column(
|
||||
"index_attempt",
|
||||
sa.Column("total_chunks", sa.Integer(), nullable=False, server_default="0"),
|
||||
)
|
||||
|
||||
# Progress tracking for stall detection
|
||||
op.add_column(
|
||||
"index_attempt",
|
||||
sa.Column("last_progress_time", sa.DateTime(timezone=True), nullable=True),
|
||||
)
|
||||
op.add_column(
|
||||
"index_attempt",
|
||||
sa.Column(
|
||||
"last_batches_completed_count",
|
||||
sa.Integer(),
|
||||
nullable=False,
|
||||
server_default="0",
|
||||
),
|
||||
)
|
||||
|
||||
# Heartbeat tracking for worker liveness detection
|
||||
op.add_column(
|
||||
"index_attempt",
|
||||
sa.Column(
|
||||
"heartbeat_counter", sa.Integer(), nullable=False, server_default="0"
|
||||
),
|
||||
)
|
||||
op.add_column(
|
||||
"index_attempt",
|
||||
sa.Column(
|
||||
"last_heartbeat_value", sa.Integer(), nullable=False, server_default="0"
|
||||
),
|
||||
)
|
||||
op.add_column(
|
||||
"index_attempt",
|
||||
sa.Column("last_heartbeat_time", sa.DateTime(timezone=True), nullable=True),
|
||||
)
|
||||
|
||||
# Add index for coordination queries
|
||||
op.create_index(
|
||||
"ix_index_attempt_active_coordination",
|
||||
"index_attempt",
|
||||
["connector_credential_pair_id", "search_settings_id", "status"],
|
||||
)
|
||||
|
||||
|
||||
def downgrade() -> None:
|
||||
# Remove the new index
|
||||
op.drop_index("ix_index_attempt_active_coordination", table_name="index_attempt")
|
||||
|
||||
# Remove the new columns
|
||||
op.drop_column("index_attempt", "last_batches_completed_count")
|
||||
op.drop_column("index_attempt", "last_progress_time")
|
||||
op.drop_column("index_attempt", "last_heartbeat_time")
|
||||
op.drop_column("index_attempt", "last_heartbeat_value")
|
||||
op.drop_column("index_attempt", "heartbeat_counter")
|
||||
op.drop_column("index_attempt", "total_chunks")
|
||||
op.drop_column("index_attempt", "total_failures_batch_level")
|
||||
op.drop_column("index_attempt", "completed_batches")
|
||||
op.drop_column("index_attempt", "total_batches")
|
||||
op.drop_column("index_attempt", "cancellation_requested")
|
||||
op.drop_column("index_attempt", "celery_task_id")
|
||||
@@ -5,6 +5,7 @@ Revises: 7f99be1cb9f5
|
||||
Create Date: 2023-10-16 23:21:01.283424
|
||||
|
||||
"""
|
||||
|
||||
from alembic import op
|
||||
import sqlalchemy as sa
|
||||
|
||||
|
||||
@@ -5,6 +5,7 @@ Revises: 91ffac7e65b3
|
||||
Create Date: 2024-07-24 21:29:31.784562
|
||||
|
||||
"""
|
||||
|
||||
import random
|
||||
from alembic import op
|
||||
import sqlalchemy as sa
|
||||
|
||||
@@ -5,6 +5,7 @@ Revises: 5b29123cd710
|
||||
Create Date: 2024-11-01 12:51:01.535003
|
||||
|
||||
"""
|
||||
|
||||
from alembic import op
|
||||
import sqlalchemy as sa
|
||||
|
||||
|
||||
@@ -5,6 +5,7 @@ Revises: a6df6b88ef81
|
||||
Create Date: 2025-01-29 10:54:22.141765
|
||||
|
||||
"""
|
||||
|
||||
from alembic import op
|
||||
|
||||
|
||||
|
||||
@@ -5,6 +5,7 @@ Revises: ee3f4b47fad5
|
||||
Create Date: 2024-08-15 22:37:08.397052
|
||||
|
||||
"""
|
||||
|
||||
from alembic import op
|
||||
import sqlalchemy as sa
|
||||
|
||||
|
||||
@@ -5,6 +5,7 @@ Revises: 91a0a4d62b14
|
||||
Create Date: 2024-09-20 21:24:04.891018
|
||||
|
||||
"""
|
||||
|
||||
from alembic import op
|
||||
|
||||
|
||||
|
||||
@@ -5,6 +5,7 @@ Revises: c99d76fcd298
|
||||
Create Date: 2024-09-13 13:20:32.885317
|
||||
|
||||
"""
|
||||
|
||||
from alembic import op
|
||||
import sqlalchemy as sa
|
||||
from sqlalchemy.dialects import postgresql
|
||||
|
||||
@@ -5,6 +5,7 @@ Revises: 2955778aa44c
|
||||
Create Date: 2025-01-08 15:38:17.224380
|
||||
|
||||
"""
|
||||
|
||||
from alembic import op
|
||||
from sqlalchemy import text
|
||||
|
||||
|
||||
@@ -0,0 +1,136 @@
|
||||
"""update_kg_trigger_functions
|
||||
|
||||
Revision ID: 36e9220ab794
|
||||
Revises: c9e2cd766c29
|
||||
Create Date: 2025-06-22 17:33:25.833733
|
||||
|
||||
"""
|
||||
|
||||
from alembic import op
|
||||
from sqlalchemy.orm import Session
|
||||
from sqlalchemy import text
|
||||
from shared_configs.configs import POSTGRES_DEFAULT_SCHEMA
|
||||
|
||||
# revision identifiers, used by Alembic.
|
||||
revision = "36e9220ab794"
|
||||
down_revision = "c9e2cd766c29"
|
||||
branch_labels = None
|
||||
depends_on = None
|
||||
|
||||
|
||||
def _get_tenant_contextvar(session: Session) -> str:
|
||||
"""Get the current schema for the migration"""
|
||||
current_tenant = session.execute(text("SELECT current_schema()")).scalar()
|
||||
if isinstance(current_tenant, str):
|
||||
return current_tenant
|
||||
else:
|
||||
raise ValueError("Current tenant is not a string")
|
||||
|
||||
|
||||
def upgrade() -> None:
|
||||
|
||||
bind = op.get_bind()
|
||||
session = Session(bind=bind)
|
||||
|
||||
# Create kg_entity trigger to update kg_entity.name and its trigrams
|
||||
tenant_id = _get_tenant_contextvar(session)
|
||||
alphanum_pattern = r"[^a-z0-9]+"
|
||||
truncate_length = 1000
|
||||
function = "update_kg_entity_name"
|
||||
op.execute(
|
||||
text(
|
||||
f"""
|
||||
CREATE OR REPLACE FUNCTION "{tenant_id}".{function}()
|
||||
RETURNS TRIGGER AS $$
|
||||
DECLARE
|
||||
name text;
|
||||
cleaned_name text;
|
||||
BEGIN
|
||||
-- Set name to semantic_id if document_id is not NULL
|
||||
IF NEW.document_id IS NOT NULL THEN
|
||||
SELECT lower(semantic_id) INTO name
|
||||
FROM "{tenant_id}".document
|
||||
WHERE id = NEW.document_id;
|
||||
ELSE
|
||||
name = lower(NEW.name);
|
||||
END IF;
|
||||
|
||||
-- Clean name and truncate if too long
|
||||
cleaned_name = regexp_replace(
|
||||
name,
|
||||
'{alphanum_pattern}', '', 'g'
|
||||
);
|
||||
IF length(cleaned_name) > {truncate_length} THEN
|
||||
cleaned_name = left(cleaned_name, {truncate_length});
|
||||
END IF;
|
||||
|
||||
-- Set name and name trigrams
|
||||
NEW.name = name;
|
||||
NEW.name_trigrams = {POSTGRES_DEFAULT_SCHEMA}.show_trgm(cleaned_name);
|
||||
RETURN NEW;
|
||||
END;
|
||||
$$ LANGUAGE plpgsql;
|
||||
"""
|
||||
)
|
||||
)
|
||||
trigger = f"{function}_trigger"
|
||||
op.execute(f'DROP TRIGGER IF EXISTS {trigger} ON "{tenant_id}".kg_entity')
|
||||
op.execute(
|
||||
f"""
|
||||
CREATE TRIGGER {trigger}
|
||||
BEFORE INSERT OR UPDATE OF name
|
||||
ON "{tenant_id}".kg_entity
|
||||
FOR EACH ROW
|
||||
EXECUTE FUNCTION "{tenant_id}".{function}();
|
||||
"""
|
||||
)
|
||||
|
||||
# Create kg_entity trigger to update kg_entity.name and its trigrams
|
||||
function = "update_kg_entity_name_from_doc"
|
||||
op.execute(
|
||||
text(
|
||||
f"""
|
||||
CREATE OR REPLACE FUNCTION "{tenant_id}".{function}()
|
||||
RETURNS TRIGGER AS $$
|
||||
DECLARE
|
||||
doc_name text;
|
||||
cleaned_name text;
|
||||
BEGIN
|
||||
doc_name = lower(NEW.semantic_id);
|
||||
|
||||
-- Clean name and truncate if too long
|
||||
cleaned_name = regexp_replace(
|
||||
doc_name,
|
||||
'{alphanum_pattern}', '', 'g'
|
||||
);
|
||||
IF length(cleaned_name) > {truncate_length} THEN
|
||||
cleaned_name = left(cleaned_name, {truncate_length});
|
||||
END IF;
|
||||
|
||||
-- Set name and name trigrams for all entities referencing this document
|
||||
UPDATE "{tenant_id}".kg_entity
|
||||
SET
|
||||
name = doc_name,
|
||||
name_trigrams = {POSTGRES_DEFAULT_SCHEMA}.show_trgm(cleaned_name)
|
||||
WHERE document_id = NEW.id;
|
||||
RETURN NEW;
|
||||
END;
|
||||
$$ LANGUAGE plpgsql;
|
||||
"""
|
||||
)
|
||||
)
|
||||
trigger = f"{function}_trigger"
|
||||
op.execute(f'DROP TRIGGER IF EXISTS {trigger} ON "{tenant_id}".document')
|
||||
op.execute(
|
||||
f"""
|
||||
CREATE TRIGGER {trigger}
|
||||
AFTER UPDATE OF semantic_id
|
||||
ON "{tenant_id}".document
|
||||
FOR EACH ROW
|
||||
EXECUTE FUNCTION "{tenant_id}".{function}();
|
||||
"""
|
||||
)
|
||||
|
||||
|
||||
def downgrade() -> None:
|
||||
pass
|
||||
@@ -5,6 +5,7 @@ Revises: df46c75b714e
|
||||
Create Date: 2025-03-10 10:02:30.586666
|
||||
|
||||
"""
|
||||
|
||||
from alembic import op
|
||||
import sqlalchemy as sa
|
||||
|
||||
|
||||
@@ -5,6 +5,7 @@ Revises: f1c6478c3fd8
|
||||
Create Date: 2024-05-11 16:11:23.718084
|
||||
|
||||
"""
|
||||
|
||||
from alembic import op
|
||||
import sqlalchemy as sa
|
||||
|
||||
|
||||
@@ -5,6 +5,7 @@ Revises: 776b3bbe9092
|
||||
Create Date: 2024-03-27 19:41:29.073594
|
||||
|
||||
"""
|
||||
|
||||
from alembic import op
|
||||
import sqlalchemy as sa
|
||||
|
||||
|
||||
@@ -5,6 +5,7 @@ Revises: b7c2b63c4a03
|
||||
Create Date: 2025-03-05 10:50:30.516962
|
||||
|
||||
"""
|
||||
|
||||
from alembic import op
|
||||
import sqlalchemy as sa
|
||||
import json
|
||||
|
||||
@@ -0,0 +1,298 @@
|
||||
"""Migration 5: User file legacy data cleanup
|
||||
|
||||
Revision ID: 3a78dba1080a
|
||||
Revises: 7cc3fcc116c1
|
||||
Create Date: 2025-09-22 10:04:27.986294
|
||||
|
||||
This migration removes legacy user-file documents and connector_credential_pairs.
|
||||
It performs bulk deletions of obsolete data after the UUID migration.
|
||||
"""
|
||||
|
||||
from alembic import op
|
||||
import sqlalchemy as sa
|
||||
from sqlalchemy.dialects import postgresql as psql
|
||||
from sqlalchemy import text
|
||||
import logging
|
||||
from typing import List
|
||||
import uuid
|
||||
|
||||
logger = logging.getLogger("alembic.runtime.migration")
|
||||
|
||||
# revision identifiers, used by Alembic.
|
||||
revision = "3a78dba1080a"
|
||||
down_revision = "7cc3fcc116c1"
|
||||
branch_labels = None
|
||||
depends_on = None
|
||||
|
||||
|
||||
def batch_delete(
|
||||
bind: sa.engine.Connection,
|
||||
table_name: str,
|
||||
id_column: str,
|
||||
ids: List[str | int | uuid.UUID],
|
||||
batch_size: int = 1000,
|
||||
id_type: str = "int",
|
||||
) -> int:
|
||||
"""Delete records in batches to avoid memory issues and timeouts."""
|
||||
total_count = len(ids)
|
||||
if total_count == 0:
|
||||
return 0
|
||||
|
||||
logger.info(
|
||||
f"Starting batch deletion of {total_count} records from {table_name}..."
|
||||
)
|
||||
|
||||
# Determine appropriate ARRAY type
|
||||
if id_type == "uuid":
|
||||
array_type = psql.ARRAY(psql.UUID(as_uuid=True))
|
||||
elif id_type == "int":
|
||||
array_type = psql.ARRAY(sa.Integer())
|
||||
else:
|
||||
array_type = psql.ARRAY(sa.String())
|
||||
|
||||
total_deleted = 0
|
||||
failed_batches = []
|
||||
|
||||
for i in range(0, total_count, batch_size):
|
||||
batch_ids = ids[i : i + batch_size]
|
||||
try:
|
||||
stmt = text(
|
||||
f"DELETE FROM {table_name} WHERE {id_column} = ANY(:ids)"
|
||||
).bindparams(sa.bindparam("ids", value=batch_ids, type_=array_type))
|
||||
result = bind.execute(stmt)
|
||||
total_deleted += result.rowcount
|
||||
|
||||
# Log progress every 10 batches or at completion
|
||||
batch_num = (i // batch_size) + 1
|
||||
if batch_num % 10 == 0 or i + batch_size >= total_count:
|
||||
logger.info(
|
||||
f" Deleted {min(i + batch_size, total_count)}/{total_count} records "
|
||||
f"({total_deleted} actual) from {table_name}"
|
||||
)
|
||||
except Exception as e:
|
||||
logger.error(f"Failed to delete batch {(i // batch_size) + 1}: {e}")
|
||||
failed_batches.append((i, min(i + batch_size, total_count)))
|
||||
|
||||
if failed_batches:
|
||||
logger.warning(
|
||||
f"Failed to delete {len(failed_batches)} batches from {table_name}. "
|
||||
f"Total deleted: {total_deleted}/{total_count}"
|
||||
)
|
||||
# Fail the migration to avoid silently succeeding on partial cleanup
|
||||
raise RuntimeError(
|
||||
f"Batch deletion failed for {table_name}: "
|
||||
f"{len(failed_batches)} failed batches out of "
|
||||
f"{(total_count + batch_size - 1) // batch_size}."
|
||||
)
|
||||
|
||||
return total_deleted
|
||||
|
||||
|
||||
def upgrade() -> None:
|
||||
"""Remove legacy user-file documents and connector_credential_pairs."""
|
||||
|
||||
bind = op.get_bind()
|
||||
inspector = sa.inspect(bind)
|
||||
|
||||
logger.info("Starting legacy data cleanup...")
|
||||
|
||||
# === Step 1: Identify and delete user-file documents ===
|
||||
logger.info("Identifying user-file documents to delete...")
|
||||
|
||||
# Get document IDs to delete
|
||||
doc_rows = bind.execute(
|
||||
text(
|
||||
"""
|
||||
SELECT DISTINCT dcc.id AS document_id
|
||||
FROM document_by_connector_credential_pair dcc
|
||||
JOIN connector_credential_pair u
|
||||
ON u.connector_id = dcc.connector_id
|
||||
AND u.credential_id = dcc.credential_id
|
||||
WHERE u.is_user_file IS TRUE
|
||||
"""
|
||||
)
|
||||
).fetchall()
|
||||
|
||||
doc_ids = [r[0] for r in doc_rows]
|
||||
|
||||
if doc_ids:
|
||||
logger.info(f"Found {len(doc_ids)} user-file documents to delete")
|
||||
|
||||
# Delete dependent rows first
|
||||
tables_to_clean = [
|
||||
("document_retrieval_feedback", "document_id"),
|
||||
("document__tag", "document_id"),
|
||||
("chunk_stats", "document_id"),
|
||||
]
|
||||
|
||||
for table_name, column_name in tables_to_clean:
|
||||
if table_name in inspector.get_table_names():
|
||||
# document_id is a string in these tables
|
||||
deleted = batch_delete(
|
||||
bind, table_name, column_name, doc_ids, id_type="str"
|
||||
)
|
||||
logger.info(f"Deleted {deleted} records from {table_name}")
|
||||
|
||||
# Delete document_by_connector_credential_pair entries
|
||||
deleted = batch_delete(
|
||||
bind, "document_by_connector_credential_pair", "id", doc_ids, id_type="str"
|
||||
)
|
||||
logger.info(f"Deleted {deleted} document_by_connector_credential_pair records")
|
||||
|
||||
# Delete documents themselves
|
||||
deleted = batch_delete(bind, "document", "id", doc_ids, id_type="str")
|
||||
logger.info(f"Deleted {deleted} document records")
|
||||
else:
|
||||
logger.info("No user-file documents found to delete")
|
||||
|
||||
# === Step 2: Clean up user-file connector_credential_pairs ===
|
||||
logger.info("Cleaning up user-file connector_credential_pairs...")
|
||||
|
||||
# Get cc_pair IDs
|
||||
cc_pair_rows = bind.execute(
|
||||
text(
|
||||
"""
|
||||
SELECT id AS cc_pair_id
|
||||
FROM connector_credential_pair
|
||||
WHERE is_user_file IS TRUE
|
||||
"""
|
||||
)
|
||||
).fetchall()
|
||||
|
||||
cc_pair_ids = [r[0] for r in cc_pair_rows]
|
||||
|
||||
if cc_pair_ids:
|
||||
logger.info(
|
||||
f"Found {len(cc_pair_ids)} user-file connector_credential_pairs to clean up"
|
||||
)
|
||||
|
||||
# Delete related records
|
||||
# Clean child tables first to satisfy foreign key constraints,
|
||||
# then the parent tables
|
||||
tables_to_clean = [
|
||||
("index_attempt_errors", "connector_credential_pair_id"),
|
||||
("index_attempt", "connector_credential_pair_id"),
|
||||
("background_error", "cc_pair_id"),
|
||||
("document_set__connector_credential_pair", "connector_credential_pair_id"),
|
||||
("user_group__connector_credential_pair", "cc_pair_id"),
|
||||
]
|
||||
|
||||
for table_name, column_name in tables_to_clean:
|
||||
if table_name in inspector.get_table_names():
|
||||
deleted = batch_delete(
|
||||
bind, table_name, column_name, cc_pair_ids, id_type="int"
|
||||
)
|
||||
logger.info(f"Deleted {deleted} records from {table_name}")
|
||||
|
||||
# === Step 3: Identify connectors and credentials to delete ===
|
||||
logger.info("Identifying orphaned connectors and credentials...")
|
||||
|
||||
# Get connectors used only by user-file cc_pairs
|
||||
connector_rows = bind.execute(
|
||||
text(
|
||||
"""
|
||||
SELECT DISTINCT ccp.connector_id
|
||||
FROM connector_credential_pair ccp
|
||||
WHERE ccp.is_user_file IS TRUE
|
||||
AND ccp.connector_id != 0 -- Exclude system default
|
||||
AND NOT EXISTS (
|
||||
SELECT 1
|
||||
FROM connector_credential_pair c2
|
||||
WHERE c2.connector_id = ccp.connector_id
|
||||
AND c2.is_user_file IS NOT TRUE
|
||||
)
|
||||
"""
|
||||
)
|
||||
).fetchall()
|
||||
|
||||
userfile_only_connector_ids = [r[0] for r in connector_rows]
|
||||
|
||||
# Get credentials used only by user-file cc_pairs
|
||||
credential_rows = bind.execute(
|
||||
text(
|
||||
"""
|
||||
SELECT DISTINCT ccp.credential_id
|
||||
FROM connector_credential_pair ccp
|
||||
WHERE ccp.is_user_file IS TRUE
|
||||
AND ccp.credential_id != 0 -- Exclude public/default
|
||||
AND NOT EXISTS (
|
||||
SELECT 1
|
||||
FROM connector_credential_pair c2
|
||||
WHERE c2.credential_id = ccp.credential_id
|
||||
AND c2.is_user_file IS NOT TRUE
|
||||
)
|
||||
"""
|
||||
)
|
||||
).fetchall()
|
||||
|
||||
userfile_only_credential_ids = [r[0] for r in credential_rows]
|
||||
|
||||
# === Step 4: Delete the cc_pairs themselves ===
|
||||
if cc_pair_ids:
|
||||
# Remove FK dependency from user_file first
|
||||
bind.execute(
|
||||
text(
|
||||
"""
|
||||
DO $$
|
||||
DECLARE r RECORD;
|
||||
BEGIN
|
||||
FOR r IN (
|
||||
SELECT conname
|
||||
FROM pg_constraint c
|
||||
JOIN pg_class t ON c.conrelid = t.oid
|
||||
JOIN pg_class ft ON c.confrelid = ft.oid
|
||||
WHERE c.contype = 'f'
|
||||
AND t.relname = 'user_file'
|
||||
AND ft.relname = 'connector_credential_pair'
|
||||
) LOOP
|
||||
EXECUTE format('ALTER TABLE user_file DROP CONSTRAINT IF EXISTS %I', r.conname);
|
||||
END LOOP;
|
||||
END$$;
|
||||
"""
|
||||
)
|
||||
)
|
||||
|
||||
# Delete cc_pairs
|
||||
deleted = batch_delete(
|
||||
bind, "connector_credential_pair", "id", cc_pair_ids, id_type="int"
|
||||
)
|
||||
logger.info(f"Deleted {deleted} connector_credential_pair records")
|
||||
|
||||
# === Step 5: Delete orphaned connectors ===
|
||||
if userfile_only_connector_ids:
|
||||
deleted = batch_delete(
|
||||
bind, "connector", "id", userfile_only_connector_ids, id_type="int"
|
||||
)
|
||||
logger.info(f"Deleted {deleted} orphaned connector records")
|
||||
|
||||
# === Step 6: Delete orphaned credentials ===
|
||||
if userfile_only_credential_ids:
|
||||
# Clean up credential__user_group mappings first
|
||||
deleted = batch_delete(
|
||||
bind,
|
||||
"credential__user_group",
|
||||
"credential_id",
|
||||
userfile_only_credential_ids,
|
||||
id_type="int",
|
||||
)
|
||||
logger.info(f"Deleted {deleted} credential__user_group records")
|
||||
|
||||
# Delete credentials
|
||||
deleted = batch_delete(
|
||||
bind, "credential", "id", userfile_only_credential_ids, id_type="int"
|
||||
)
|
||||
logger.info(f"Deleted {deleted} orphaned credential records")
|
||||
|
||||
logger.info("Migration 5 (legacy data cleanup) completed successfully")
|
||||
|
||||
|
||||
def downgrade() -> None:
|
||||
"""Cannot restore deleted data - requires backup restoration."""
|
||||
|
||||
logger.error("CRITICAL: Downgrading data cleanup cannot restore deleted data!")
|
||||
logger.error("Data restoration requires backup files or database backup.")
|
||||
|
||||
# raise NotImplementedError(
|
||||
# "Downgrade of legacy data cleanup is not supported. "
|
||||
# "Deleted data must be restored from backups."
|
||||
# )
|
||||
@@ -5,6 +5,7 @@ Revises: e0a68a81d434
|
||||
Create Date: 2023-10-05 18:47:09.582849
|
||||
|
||||
"""
|
||||
|
||||
from alembic import op
|
||||
import sqlalchemy as sa
|
||||
|
||||
|
||||
@@ -5,6 +5,7 @@ Revises: 8f43500ee275
|
||||
Create Date: 2025-02-26 13:07:56.217791
|
||||
|
||||
"""
|
||||
|
||||
from alembic import op
|
||||
|
||||
|
||||
@@ -20,22 +21,14 @@ depends_on = None
|
||||
# an outage by creating an index without using CONCURRENTLY. This migration:
|
||||
#
|
||||
# 1. Creates more efficient full-text search capabilities using tsvector columns and GIN indexes
|
||||
# 2. Uses CONCURRENTLY for all index creation to prevent table locking
|
||||
# 3. Explicitly manages transactions with COMMIT statements to allow CONCURRENTLY to work
|
||||
# (see: https://www.postgresql.org/docs/9.4/sql-createindex.html#SQL-CREATEINDEX-CONCURRENTLY)
|
||||
# (see: https://github.com/sqlalchemy/alembic/issues/277)
|
||||
# 4. Adds indexes to both chat_message and chat_session tables for comprehensive search
|
||||
# 2. Adds indexes to both chat_message and chat_session tables for comprehensive search
|
||||
# 3. Note: CONCURRENTLY was removed due to operational issues
|
||||
|
||||
|
||||
def upgrade() -> None:
|
||||
# First, drop any existing indexes to avoid conflicts
|
||||
op.execute("COMMIT")
|
||||
op.execute("DROP INDEX CONCURRENTLY IF EXISTS idx_chat_message_tsv;")
|
||||
|
||||
op.execute("COMMIT")
|
||||
op.execute("DROP INDEX CONCURRENTLY IF EXISTS idx_chat_session_desc_tsv;")
|
||||
|
||||
op.execute("COMMIT")
|
||||
op.execute("DROP INDEX IF EXISTS idx_chat_message_tsv;")
|
||||
op.execute("DROP INDEX IF EXISTS idx_chat_session_desc_tsv;")
|
||||
op.execute("DROP INDEX IF EXISTS idx_chat_message_message_lower;")
|
||||
|
||||
# Drop existing columns if they exist
|
||||
@@ -51,12 +44,9 @@ def upgrade() -> None:
|
||||
"""
|
||||
)
|
||||
|
||||
# Commit the current transaction before creating concurrent indexes
|
||||
op.execute("COMMIT")
|
||||
|
||||
op.execute(
|
||||
"""
|
||||
CREATE INDEX CONCURRENTLY IF NOT EXISTS idx_chat_message_tsv
|
||||
CREATE INDEX IF NOT EXISTS idx_chat_message_tsv
|
||||
ON chat_message
|
||||
USING GIN (message_tsv)
|
||||
"""
|
||||
@@ -71,12 +61,9 @@ def upgrade() -> None:
|
||||
"""
|
||||
)
|
||||
|
||||
# Commit again before creating the second concurrent index
|
||||
op.execute("COMMIT")
|
||||
|
||||
op.execute(
|
||||
"""
|
||||
CREATE INDEX CONCURRENTLY IF NOT EXISTS idx_chat_session_desc_tsv
|
||||
CREATE INDEX IF NOT EXISTS idx_chat_session_desc_tsv
|
||||
ON chat_session
|
||||
USING GIN (description_tsv)
|
||||
"""
|
||||
@@ -84,12 +71,9 @@ def upgrade() -> None:
|
||||
|
||||
|
||||
def downgrade() -> None:
|
||||
# Drop the indexes first (use CONCURRENTLY for dropping too)
|
||||
op.execute("COMMIT")
|
||||
op.execute("DROP INDEX CONCURRENTLY IF EXISTS idx_chat_message_tsv;")
|
||||
|
||||
op.execute("COMMIT")
|
||||
op.execute("DROP INDEX CONCURRENTLY IF EXISTS idx_chat_session_desc_tsv;")
|
||||
# Drop the indexes first
|
||||
op.execute("DROP INDEX IF EXISTS idx_chat_message_tsv;")
|
||||
op.execute("DROP INDEX IF EXISTS idx_chat_session_desc_tsv;")
|
||||
|
||||
# Then drop the columns
|
||||
op.execute("ALTER TABLE chat_message DROP COLUMN IF EXISTS message_tsv;")
|
||||
|
||||
@@ -5,6 +5,7 @@ Revises: 27c6ecc08586
|
||||
Create Date: 2023-06-14 23:45:51.760440
|
||||
|
||||
"""
|
||||
|
||||
import sqlalchemy as sa
|
||||
from alembic import op
|
||||
|
||||
|
||||
@@ -5,6 +5,7 @@ Revises: aeda5f2df4f6
|
||||
Create Date: 2025-01-13 12:49:51.705235
|
||||
|
||||
"""
|
||||
|
||||
from alembic import op
|
||||
import sqlalchemy as sa
|
||||
import fastapi_users_db_sqlalchemy
|
||||
|
||||
@@ -0,0 +1,121 @@
|
||||
"""add_oauth_config_and_user_tokens
|
||||
|
||||
Revision ID: 3d1cca026fe8
|
||||
Revises: c8a93a2af083
|
||||
Create Date: 2025-10-21 13:27:34.274721
|
||||
|
||||
"""
|
||||
|
||||
from alembic import op
|
||||
import fastapi_users_db_sqlalchemy
|
||||
import sqlalchemy as sa
|
||||
from sqlalchemy.dialects import postgresql
|
||||
|
||||
# revision identifiers, used by Alembic.
|
||||
revision = "3d1cca026fe8"
|
||||
down_revision = "c8a93a2af083"
|
||||
branch_labels = None
|
||||
depends_on = None
|
||||
|
||||
|
||||
def upgrade() -> None:
|
||||
# Create oauth_config table
|
||||
op.create_table(
|
||||
"oauth_config",
|
||||
sa.Column("id", sa.Integer(), nullable=False),
|
||||
sa.Column("name", sa.String(), nullable=False),
|
||||
sa.Column("authorization_url", sa.Text(), nullable=False),
|
||||
sa.Column("token_url", sa.Text(), nullable=False),
|
||||
sa.Column("client_id", sa.LargeBinary(), nullable=False),
|
||||
sa.Column("client_secret", sa.LargeBinary(), nullable=False),
|
||||
sa.Column("scopes", postgresql.JSONB(astext_type=sa.Text()), nullable=True),
|
||||
sa.Column(
|
||||
"additional_params",
|
||||
postgresql.JSONB(astext_type=sa.Text()),
|
||||
nullable=True,
|
||||
),
|
||||
sa.Column(
|
||||
"created_at",
|
||||
sa.DateTime(timezone=True),
|
||||
server_default=sa.text("now()"),
|
||||
nullable=False,
|
||||
),
|
||||
sa.Column(
|
||||
"updated_at",
|
||||
sa.DateTime(timezone=True),
|
||||
server_default=sa.text("now()"),
|
||||
nullable=False,
|
||||
),
|
||||
sa.PrimaryKeyConstraint("id"),
|
||||
sa.UniqueConstraint("name"),
|
||||
)
|
||||
|
||||
# Create oauth_user_token table
|
||||
op.create_table(
|
||||
"oauth_user_token",
|
||||
sa.Column("id", sa.Integer(), nullable=False),
|
||||
sa.Column("oauth_config_id", sa.Integer(), nullable=False),
|
||||
sa.Column(
|
||||
"user_id",
|
||||
fastapi_users_db_sqlalchemy.generics.GUID(),
|
||||
nullable=False,
|
||||
),
|
||||
sa.Column("token_data", sa.LargeBinary(), nullable=False),
|
||||
sa.Column(
|
||||
"created_at",
|
||||
sa.DateTime(timezone=True),
|
||||
server_default=sa.text("now()"),
|
||||
nullable=False,
|
||||
),
|
||||
sa.Column(
|
||||
"updated_at",
|
||||
sa.DateTime(timezone=True),
|
||||
server_default=sa.text("now()"),
|
||||
nullable=False,
|
||||
),
|
||||
sa.ForeignKeyConstraint(
|
||||
["oauth_config_id"], ["oauth_config.id"], ondelete="CASCADE"
|
||||
),
|
||||
sa.ForeignKeyConstraint(["user_id"], ["user.id"], ondelete="CASCADE"),
|
||||
sa.PrimaryKeyConstraint("id"),
|
||||
sa.UniqueConstraint("oauth_config_id", "user_id", name="uq_oauth_user_token"),
|
||||
)
|
||||
|
||||
# Create index on user_id for efficient user-based token lookups
|
||||
# Note: unique constraint on (oauth_config_id, user_id) already creates
|
||||
# an index for config-based lookups
|
||||
op.create_index(
|
||||
"ix_oauth_user_token_user_id",
|
||||
"oauth_user_token",
|
||||
["user_id"],
|
||||
)
|
||||
|
||||
# Add oauth_config_id column to tool table
|
||||
op.add_column("tool", sa.Column("oauth_config_id", sa.Integer(), nullable=True))
|
||||
|
||||
# Create foreign key from tool to oauth_config
|
||||
op.create_foreign_key(
|
||||
"tool_oauth_config_fk",
|
||||
"tool",
|
||||
"oauth_config",
|
||||
["oauth_config_id"],
|
||||
["id"],
|
||||
ondelete="SET NULL",
|
||||
)
|
||||
|
||||
|
||||
def downgrade() -> None:
|
||||
# Drop foreign key from tool to oauth_config
|
||||
op.drop_constraint("tool_oauth_config_fk", "tool", type_="foreignkey")
|
||||
|
||||
# Drop oauth_config_id column from tool table
|
||||
op.drop_column("tool", "oauth_config_id")
|
||||
|
||||
# Drop index on user_id
|
||||
op.drop_index("ix_oauth_user_token_user_id", table_name="oauth_user_token")
|
||||
|
||||
# Drop oauth_user_token table (will cascade delete tokens)
|
||||
op.drop_table("oauth_user_token")
|
||||
|
||||
# Drop oauth_config table
|
||||
op.drop_table("oauth_config")
|
||||
@@ -0,0 +1,30 @@
|
||||
"""add_doc_metadata_field_in_document_model
|
||||
|
||||
Revision ID: 3fc5d75723b3
|
||||
Revises: 2f95e36923e6
|
||||
Create Date: 2025-07-28 18:45:37.985406
|
||||
|
||||
"""
|
||||
|
||||
from alembic import op
|
||||
import sqlalchemy as sa
|
||||
from sqlalchemy.dialects import postgresql
|
||||
|
||||
# revision identifiers, used by Alembic.
|
||||
revision = "3fc5d75723b3"
|
||||
down_revision = "2f95e36923e6"
|
||||
branch_labels = None
|
||||
depends_on = None
|
||||
|
||||
|
||||
def upgrade() -> None:
|
||||
op.add_column(
|
||||
"document",
|
||||
sa.Column(
|
||||
"doc_metadata", postgresql.JSONB(astext_type=sa.Text()), nullable=True
|
||||
),
|
||||
)
|
||||
|
||||
|
||||
def downgrade() -> None:
|
||||
op.drop_column("document", "doc_metadata")
|
||||
@@ -5,6 +5,7 @@ Revises: 703313b75876
|
||||
Create Date: 2024-04-13 18:07:29.153817
|
||||
|
||||
"""
|
||||
|
||||
from alembic import op
|
||||
import sqlalchemy as sa
|
||||
from sqlalchemy.dialects import postgresql
|
||||
|
||||
@@ -0,0 +1,28 @@
|
||||
"""reset userfile document_id_migrated field
|
||||
|
||||
Revision ID: 40926a4dab77
|
||||
Revises: 64bd5677aeb6
|
||||
Create Date: 2025-10-06 16:10:32.898668
|
||||
|
||||
"""
|
||||
|
||||
from alembic import op
|
||||
|
||||
# revision identifiers, used by Alembic.
|
||||
revision = "40926a4dab77"
|
||||
down_revision = "64bd5677aeb6"
|
||||
branch_labels = None
|
||||
depends_on = None
|
||||
|
||||
|
||||
def upgrade() -> None:
|
||||
# Set all existing records to not migrated
|
||||
op.execute(
|
||||
"UPDATE user_file SET document_id_migrated = FALSE "
|
||||
"WHERE document_id_migrated IS DISTINCT FROM FALSE;"
|
||||
)
|
||||
|
||||
|
||||
def downgrade() -> None:
|
||||
# No-op
|
||||
pass
|
||||
@@ -5,6 +5,7 @@ Revises: e1392f05e840
|
||||
Create Date: 2024-08-01 12:38:54.466081
|
||||
|
||||
"""
|
||||
|
||||
from alembic import op
|
||||
|
||||
# revision identifiers, used by Alembic.
|
||||
|
||||
@@ -5,6 +5,7 @@ Revises: d716b0791ddd
|
||||
Create Date: 2024-06-28 20:01:05.927647
|
||||
|
||||
"""
|
||||
|
||||
from alembic import op
|
||||
import sqlalchemy as sa
|
||||
|
||||
|
||||
@@ -5,6 +5,7 @@ Revises: c18cdf4b497e
|
||||
Create Date: 2024-06-18 20:46:09.095034
|
||||
|
||||
"""
|
||||
|
||||
from alembic import op
|
||||
import sqlalchemy as sa
|
||||
|
||||
|
||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user