mirror of
https://github.com/onyx-dot-app/onyx.git
synced 2026-02-17 07:45:47 +00:00
Compare commits
635 Commits
v0.5.26
...
initial_im
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
0d848fa9dd | ||
|
|
1dbb6f3d69 | ||
|
|
91cf9a5472 | ||
|
|
ebb0e56a30 | ||
|
|
091cb136c4 | ||
|
|
56052c5b4b | ||
|
|
617726207b | ||
|
|
1be58e74b3 | ||
|
|
a693c991d7 | ||
|
|
ef9942b751 | ||
|
|
993acec5e9 | ||
|
|
b01a1b509a | ||
|
|
4f994124ef | ||
|
|
14863bd457 | ||
|
|
aa1c4c635a | ||
|
|
13f6e8a6b4 | ||
|
|
66f47d294c | ||
|
|
0a685bda7d | ||
|
|
23dc8b5dad | ||
|
|
cd5f2293ad | ||
|
|
6c2269e565 | ||
|
|
46315cddf1 | ||
|
|
5f28a1b0e4 | ||
|
|
9e9b7ed61d | ||
|
|
3fb2bfefec | ||
|
|
7c618c9d17 | ||
|
|
03e2789392 | ||
|
|
2783fa08a3 | ||
|
|
edeaee93a2 | ||
|
|
5385bae100 | ||
|
|
813445ab59 | ||
|
|
af814823c8 | ||
|
|
607f61eaeb | ||
|
|
4b28686721 | ||
|
|
de66f7adb2 | ||
|
|
3432d932d1 | ||
|
|
9bd0cb9eb5 | ||
|
|
f12eb4a5cf | ||
|
|
16863de0aa | ||
|
|
63d1eefee5 | ||
|
|
e338677896 | ||
|
|
7be80c4af9 | ||
|
|
7f1e4a02bf | ||
|
|
5be7d27285 | ||
|
|
fd84b7a768 | ||
|
|
36941ae663 | ||
|
|
212353ed4a | ||
|
|
eb8708f770 | ||
|
|
ac448956e9 | ||
|
|
634a0b9398 | ||
|
|
09d3e47c03 | ||
|
|
9c0cc94f15 | ||
|
|
07dfde2209 | ||
|
|
28e2b78b2e | ||
|
|
0553062ac6 | ||
|
|
284e375ba3 | ||
|
|
1f2f7d0ac2 | ||
|
|
2ecc28b57d | ||
|
|
77cf9b3539 | ||
|
|
076ce2ebd0 | ||
|
|
b625ee32a7 | ||
|
|
c32b93fcc3 | ||
|
|
1c8476072e | ||
|
|
7573416ca1 | ||
|
|
86d8666481 | ||
|
|
8abcde91d4 | ||
|
|
3466451d51 | ||
|
|
413891f143 | ||
|
|
7a0a4d4b79 | ||
|
|
a3439605a5 | ||
|
|
694e79f5e1 | ||
|
|
5dfafc8612 | ||
|
|
62a4aa10db | ||
|
|
a357cdc4c9 | ||
|
|
84615abfdd | ||
|
|
8ae6b1960b | ||
|
|
d9b87bbbc2 | ||
|
|
a0065b01af | ||
|
|
c5306148a3 | ||
|
|
1e17934de4 | ||
|
|
93add96ccc | ||
|
|
3a466a4b08 | ||
|
|
85cbd9caed | ||
|
|
9dc23bf3e7 | ||
|
|
e32809f7ca | ||
|
|
3e58f9f8ab | ||
|
|
2381c8d498 | ||
|
|
c6dadb24dc | ||
|
|
5dc07d4178 | ||
|
|
129c8f8faf | ||
|
|
67bfcabbc5 | ||
|
|
9819aa977a | ||
|
|
8d5b8a4028 | ||
|
|
682319d2e9 | ||
|
|
fe1400aa36 | ||
|
|
e3573b2bc1 | ||
|
|
35b5c44cc7 | ||
|
|
5eddc89b5a | ||
|
|
9a492ceb6d | ||
|
|
3c54ae9de9 | ||
|
|
13f08f3ebb | ||
|
|
bd9f15854f | ||
|
|
366aa2a8ea | ||
|
|
deee237c7e | ||
|
|
100b4a0d16 | ||
|
|
70207b4b39 | ||
|
|
50826b6bef | ||
|
|
3f648cbc31 | ||
|
|
c875a4774f | ||
|
|
049091eb01 | ||
|
|
3dac24542b | ||
|
|
194dcb593d | ||
|
|
bf291d0c0a | ||
|
|
8309f4a802 | ||
|
|
0ff2565125 | ||
|
|
e89dcd7f84 | ||
|
|
645e7e828e | ||
|
|
2a54f14195 | ||
|
|
9209fc804b | ||
|
|
b712877701 | ||
|
|
e6df32dcc3 | ||
|
|
eb81258a23 | ||
|
|
487ef4acc0 | ||
|
|
9b7cc83eae | ||
|
|
ce3124f9e4 | ||
|
|
e69303e309 | ||
|
|
6e698ac84a | ||
|
|
d69180aeb8 | ||
|
|
aa37051be9 | ||
|
|
a7d95661b3 | ||
|
|
33ee899408 | ||
|
|
954b5b2a56 | ||
|
|
521425a4f2 | ||
|
|
618bc02d54 | ||
|
|
b7de74fdf8 | ||
|
|
6e83fe3a39 | ||
|
|
259fc049b7 | ||
|
|
7015e6f2ab | ||
|
|
24be13c015 | ||
|
|
ddff7ecc3f | ||
|
|
97932dc44b | ||
|
|
637b6d9e75 | ||
|
|
54dc1ac917 | ||
|
|
21d5cc43f8 | ||
|
|
7c841051ed | ||
|
|
6e91964924 | ||
|
|
facf1d55a0 | ||
|
|
d68f8d6fbc | ||
|
|
65a205d488 | ||
|
|
485f3f72fa | ||
|
|
dcbea883ae | ||
|
|
a50a3944b3 | ||
|
|
60471b6a73 | ||
|
|
d703e694ce | ||
|
|
6066042fef | ||
|
|
eb0e20b9e4 | ||
|
|
490a68773b | ||
|
|
227aff1e47 | ||
|
|
6e29d1944c | ||
|
|
22189f02c6 | ||
|
|
fdc4811fce | ||
|
|
021d0cf314 | ||
|
|
942e47db29 | ||
|
|
f4a020b599 | ||
|
|
5166649eae | ||
|
|
ba805f766f | ||
|
|
9d57f34c34 | ||
|
|
cc2f584321 | ||
|
|
a1b95df3b8 | ||
|
|
9272d6ebfe | ||
|
|
4fb65dcf73 | ||
|
|
2bbc5d5d07 | ||
|
|
950b1c38f2 | ||
|
|
99fbfba32f | ||
|
|
0a59efe64a | ||
|
|
cf5d394d39 | ||
|
|
f6d8f5ca89 | ||
|
|
1fb4cdfcc3 | ||
|
|
ac51469bcb | ||
|
|
c25f164e28 | ||
|
|
813720905b | ||
|
|
0c45488ac6 | ||
|
|
95d9b33c1a | ||
|
|
55919f596c | ||
|
|
1d0fb6d012 | ||
|
|
2b1dbde829 | ||
|
|
2758ffd9d5 | ||
|
|
07a1b49b4f | ||
|
|
43d8daa5bc | ||
|
|
faeb9f09f0 | ||
|
|
25f5c12750 | ||
|
|
2d81710ccc | ||
|
|
187a7d2da2 | ||
|
|
4b152aa3a7 | ||
|
|
06f937cf93 | ||
|
|
5a24ed2947 | ||
|
|
2372e6a5a5 | ||
|
|
3eef4e3992 | ||
|
|
467ce4e3f3 | ||
|
|
ee4b334a0a | ||
|
|
4087292001 | ||
|
|
da6ed5b2b3 | ||
|
|
864ac2ac5c | ||
|
|
12cb77c80e | ||
|
|
583cd14bf4 | ||
|
|
001fcb3359 | ||
|
|
7ff18e0a93 | ||
|
|
9ac256e925 | ||
|
|
08600db41d | ||
|
|
6bf06ac7f7 | ||
|
|
5b06b53a3e | ||
|
|
afce57b29f | ||
|
|
257dbecd1d | ||
|
|
bd6baf39c3 | ||
|
|
b2c55ebd71 | ||
|
|
dea7a8f697 | ||
|
|
ddae2346ec | ||
|
|
9032fb4467 | ||
|
|
b6ecbbcf45 | ||
|
|
1d8e662b79 | ||
|
|
2cb33b1fb4 | ||
|
|
2cd1e6be00 | ||
|
|
8e55566f66 | ||
|
|
bafb95d920 | ||
|
|
c6e8bf2d28 | ||
|
|
c2d04f591d | ||
|
|
56c3a5ff5b | ||
|
|
fac2b100a1 | ||
|
|
51b79f688a | ||
|
|
a7002dfa1d | ||
|
|
93d0104d3c | ||
|
|
46e5ffa3ae | ||
|
|
d4f38bba8b | ||
|
|
19d6b63fd3 | ||
|
|
938d5788b6 | ||
|
|
70f703cc0f | ||
|
|
8bcf80aa76 | ||
|
|
5f5cc9a724 | ||
|
|
e4bb14d4e1 | ||
|
|
5d9b8364ab | ||
|
|
83c299ebc8 | ||
|
|
6b4143cc30 | ||
|
|
6e8c88ed71 | ||
|
|
d652cb3141 | ||
|
|
5e444d43f9 | ||
|
|
2e49027beb | ||
|
|
d7bcd32d9a | ||
|
|
4a6b8db65f | ||
|
|
6f440d126a | ||
|
|
013292a0e3 | ||
|
|
a1ae22ef4a | ||
|
|
40beda30a4 | ||
|
|
d3062cacea | ||
|
|
678ed23853 | ||
|
|
ea2da63cf2 | ||
|
|
4fc8a35220 | ||
|
|
f981106111 | ||
|
|
5439c33313 | ||
|
|
5e050f8305 | ||
|
|
12c82de78f | ||
|
|
645402c71a | ||
|
|
772313236f | ||
|
|
ecf4923a3a | ||
|
|
d66b81a902 | ||
|
|
753293cefb | ||
|
|
6d543f3d4f | ||
|
|
ccdc09e2d4 | ||
|
|
4a23c8702d | ||
|
|
dc2dfeb5b8 | ||
|
|
71d4fb98d3 | ||
|
|
b34f5862d7 | ||
|
|
0b08bf4e3f | ||
|
|
add87fa1b4 | ||
|
|
787fdf2e38 | ||
|
|
4499c630b3 | ||
|
|
e3be318781 | ||
|
|
231ab3fb5d | ||
|
|
ff9d7141a9 | ||
|
|
dba2d67cdb | ||
|
|
1a7d627949 | ||
|
|
f318e302c5 | ||
|
|
7384ca8768 | ||
|
|
73ee709801 | ||
|
|
53d2d333ab | ||
|
|
5be457e321 | ||
|
|
8223dc763d | ||
|
|
ea406c55cd | ||
|
|
ea80cdce02 | ||
|
|
40a0f71960 | ||
|
|
fcb94f1173 | ||
|
|
cc40f0d27b | ||
|
|
75dd103238 | ||
|
|
aafcf7af55 | ||
|
|
1201ed5ac0 | ||
|
|
a60613ec11 | ||
|
|
5640230f5b | ||
|
|
11d849b553 | ||
|
|
2eefb3c15f | ||
|
|
678ba41321 | ||
|
|
a40082c5da | ||
|
|
e5af4681d3 | ||
|
|
e05846db9f | ||
|
|
195e2c335d | ||
|
|
1dec69bb82 | ||
|
|
1d89fea73e | ||
|
|
075e4f18bc | ||
|
|
52bd1ad8ef | ||
|
|
5062075b8d | ||
|
|
e46facb765 | ||
|
|
f84e75cee7 | ||
|
|
b2d8e10339 | ||
|
|
d8ad3e73bf | ||
|
|
e2c4c07c34 | ||
|
|
7856718db8 | ||
|
|
3d9cc769d9 | ||
|
|
20e8c2287a | ||
|
|
57e5264df6 | ||
|
|
4c417b5e3e | ||
|
|
9270782c49 | ||
|
|
1a31f1e773 | ||
|
|
e28ba4b55b | ||
|
|
7ddfabed62 | ||
|
|
c7018f7a6c | ||
|
|
0fb6baef2b | ||
|
|
23988f8c49 | ||
|
|
1187849afe | ||
|
|
001801dee0 | ||
|
|
4a9966148d | ||
|
|
85c56f9942 | ||
|
|
07d76b2954 | ||
|
|
2a6c032883 | ||
|
|
e8dfed959e | ||
|
|
1f2be542f0 | ||
|
|
7dc06bfbe5 | ||
|
|
6f8e7abcbb | ||
|
|
18dcdd680d | ||
|
|
ad3df42b52 | ||
|
|
6568c7805a | ||
|
|
fa88c1dba8 | ||
|
|
7ea484aee2 | ||
|
|
dc7b367816 | ||
|
|
aea261d49e | ||
|
|
f27071cbc5 | ||
|
|
31a518a9d1 | ||
|
|
01463442ba | ||
|
|
53e916552b | ||
|
|
179dc418e0 | ||
|
|
a1bfa7847a | ||
|
|
e5494f9742 | ||
|
|
da3c5e3711 | ||
|
|
e5d84cae1b | ||
|
|
0c2cc7499f | ||
|
|
1261d859ac | ||
|
|
088551a4ef | ||
|
|
aa0f307cc7 | ||
|
|
e6bef573ba | ||
|
|
f6f9112b76 | ||
|
|
accdd580d7 | ||
|
|
4bcd65ed92 | ||
|
|
80f8d7a486 | ||
|
|
e8c28e79c9 | ||
|
|
b4bc6d994d | ||
|
|
ccc68c5c34 | ||
|
|
848d86b886 | ||
|
|
c0ab86bac2 | ||
|
|
8c2138a6ef | ||
|
|
9def9f0dba | ||
|
|
8023cafb2b | ||
|
|
5e01d6befb | ||
|
|
94edcac36e | ||
|
|
0ed77aa8a7 | ||
|
|
9b147ae437 | ||
|
|
bd63119684 | ||
|
|
a348caa9b1 | ||
|
|
76415aff41 | ||
|
|
84d551eda4 | ||
|
|
4ca38201d1 | ||
|
|
4a47e9a841 | ||
|
|
d7a30b01d2 | ||
|
|
9c0f927e16 | ||
|
|
55b9111410 | ||
|
|
07a4e112a4 | ||
|
|
b9781c43fb | ||
|
|
eaa8ae7399 | ||
|
|
a931494866 | ||
|
|
863f00f015 | ||
|
|
eae1dad0fa | ||
|
|
10b5b55658 | ||
|
|
b49a9ab171 | ||
|
|
9f50417109 | ||
|
|
94b4dc1656 | ||
|
|
4bce143d6e | ||
|
|
33eabf1b25 | ||
|
|
da979e5745 | ||
|
|
705b825580 | ||
|
|
32b595dfe1 | ||
|
|
2b9a751b96 | ||
|
|
1b6b134722 | ||
|
|
245adc4d3d | ||
|
|
4ad35d76b0 | ||
|
|
cc1e1c178b | ||
|
|
87b5975091 | ||
|
|
0545fb4443 | ||
|
|
b9fb657d81 | ||
|
|
14e75bbd24 | ||
|
|
3eb67baf5b | ||
|
|
8b72264535 | ||
|
|
786a46cbd0 | ||
|
|
7abbfa37bb | ||
|
|
85b56e39c9 | ||
|
|
143da5bc0d | ||
|
|
a1680fac2f | ||
|
|
5703ea47d2 | ||
|
|
9105f95d13 | ||
|
|
eccec6ab7c | ||
|
|
914da2e4cb | ||
|
|
e031576c87 | ||
|
|
bae794706c | ||
|
|
8f236a1288 | ||
|
|
6e9b6a1075 | ||
|
|
e4779c29a7 | ||
|
|
802086ee57 | ||
|
|
c516f3541c | ||
|
|
45d852a9db | ||
|
|
cee68106ef | ||
|
|
a24b465663 | ||
|
|
7ab0063dc6 | ||
|
|
dd2551040f | ||
|
|
f745ca1e03 | ||
|
|
eaaa135f90 | ||
|
|
457e7992a4 | ||
|
|
2fb1d06fbf | ||
|
|
8f9d4335ce | ||
|
|
ee1cb084ac | ||
|
|
2c77ad2aab | ||
|
|
f7d77a3c76 | ||
|
|
8b220d2dba | ||
|
|
6913efef90 | ||
|
|
12cbbe6cee | ||
|
|
55de519364 | ||
|
|
36134021c5 | ||
|
|
5b78299880 | ||
|
|
59364aadd7 | ||
|
|
e12785d277 | ||
|
|
7906d9edc8 | ||
|
|
6e54c97326 | ||
|
|
61424de531 | ||
|
|
4c2cf8b132 | ||
|
|
b169f78699 | ||
|
|
e48086b1c2 | ||
|
|
6b8ecb3a4b | ||
|
|
deb66a88aa | ||
|
|
90bd535c48 | ||
|
|
0de487064a | ||
|
|
114326d11a | ||
|
|
389c7b72db | ||
|
|
28ad01a51a | ||
|
|
0c102ebb5c | ||
|
|
5063b944ec | ||
|
|
15afe4dc78 | ||
|
|
a159779d39 | ||
|
|
44ebe3ae31 | ||
|
|
938a65628d | ||
|
|
5d390b65eb | ||
|
|
33974fc12c | ||
|
|
db0779dd02 | ||
|
|
f3fb7c572e | ||
|
|
0a0215ceee | ||
|
|
1a9921f63e | ||
|
|
a385234c0e | ||
|
|
65573210f1 | ||
|
|
c148fa5bfa | ||
|
|
11372aac8f | ||
|
|
f23a89ccfd | ||
|
|
e022e77b6d | ||
|
|
02cc211e91 | ||
|
|
bfe963988e | ||
|
|
0e6c2f0b51 | ||
|
|
98e88e2715 | ||
|
|
da46f61123 | ||
|
|
aa5be37f97 | ||
|
|
efe2e79f27 | ||
|
|
6f9740d026 | ||
|
|
dee197570d | ||
|
|
f8a7749b46 | ||
|
|
494fda906d | ||
|
|
89eaa8bc30 | ||
|
|
9537a2581e | ||
|
|
3ccd951307 | ||
|
|
ba712d447d | ||
|
|
a9bcc89a2c | ||
|
|
ded42e2036 | ||
|
|
86ecf8e0fc | ||
|
|
b393af676c | ||
|
|
26bdb41e8f | ||
|
|
3365e0b16e | ||
|
|
40dc4708d2 | ||
|
|
20df20ae51 | ||
|
|
7eafdae17f | ||
|
|
301032f59e | ||
|
|
b75b8334a6 | ||
|
|
d25de6e1cb | ||
|
|
d892203821 | ||
|
|
35d32ea3b0 | ||
|
|
1581d35476 | ||
|
|
1f4fe42f4b | ||
|
|
101b010c5c | ||
|
|
b212b228fb | ||
|
|
85d5e6c02f | ||
|
|
f40c5ca9bd | ||
|
|
9be54a2b4c | ||
|
|
b4417fabd7 | ||
|
|
2d74d44538 | ||
|
|
30d17ef9ee | ||
|
|
804de3248e | ||
|
|
1cbc067483 | ||
|
|
6c0a0b6454 | ||
|
|
ca88100f38 | ||
|
|
7c9f605a99 | ||
|
|
fbf09c7859 | ||
|
|
28fe0d12ca | ||
|
|
d403840507 | ||
|
|
174dabf52f | ||
|
|
03807688e6 | ||
|
|
8bbf5053de | ||
|
|
d6b4c08d24 | ||
|
|
af8e361fc2 | ||
|
|
7ce276bbe1 | ||
|
|
95df136104 | ||
|
|
6b57e68226 | ||
|
|
cbd4481838 | ||
|
|
80343d6d75 | ||
|
|
d5b9a6e552 | ||
|
|
10f221cd37 | ||
|
|
f83e6806b6 | ||
|
|
8f61505437 | ||
|
|
a47d27de6c | ||
|
|
aa187c86e2 | ||
|
|
c72c5619f0 | ||
|
|
78e7710f17 | ||
|
|
672f5cc5ce | ||
|
|
7b3c433ff8 | ||
|
|
057321a59f | ||
|
|
5cc46341f7 | ||
|
|
21a3921790 | ||
|
|
3586f9b565 | ||
|
|
aa69fe762b | ||
|
|
3ef72b8d1a | ||
|
|
a0124e4e50 | ||
|
|
a52485bda2 | ||
|
|
79d37156c6 | ||
|
|
6fa8fabb47 | ||
|
|
4214a3a6e2 | ||
|
|
1a3469d2c5 | ||
|
|
30dc408028 | ||
|
|
5d356cc971 | ||
|
|
e4c7cfde42 | ||
|
|
1900a390d8 | ||
|
|
150dcc2883 | ||
|
|
3404c7eb1d | ||
|
|
64909d74f9 | ||
|
|
83bc7d4656 | ||
|
|
3206bb27ce | ||
|
|
f189eda904 | ||
|
|
7aaf822430 | ||
|
|
0ff5180d7b | ||
|
|
089c734f63 | ||
|
|
0da736bed9 | ||
|
|
e00f4678df | ||
|
|
e56fd43ba6 | ||
|
|
28e65669b4 | ||
|
|
493c3d7314 | ||
|
|
b04e9e9b67 | ||
|
|
3755e575a5 | ||
|
|
63655cfbed | ||
|
|
7f788e4b1e | ||
|
|
1362d4b583 | ||
|
|
4f47004d47 | ||
|
|
3fdd233e84 | ||
|
|
0c54d9d57d | ||
|
|
c2088602e1 | ||
|
|
b3c367d09c | ||
|
|
457d32fef0 | ||
|
|
af187c6cfe | ||
|
|
a0235b7b7b | ||
|
|
a30de693cb | ||
|
|
07aeea69e7 | ||
|
|
bd40328a73 | ||
|
|
b8232e0681 | ||
|
|
fffb9c155a | ||
|
|
f513c5bbed | ||
|
|
9a4e51a18e | ||
|
|
2f2fc08553 | ||
|
|
c68c6fdc44 | ||
|
|
834c76e30a | ||
|
|
ec02665ffa | ||
|
|
3fa1b18306 | ||
|
|
c9bdf4c443 | ||
|
|
e229d27734 | ||
|
|
140c5b3957 | ||
|
|
3e511497d2 | ||
|
|
b0056907fb | ||
|
|
728a41a35a | ||
|
|
ef8dda2d47 | ||
|
|
15283b3140 | ||
|
|
e159b2e947 | ||
|
|
9155800fab | ||
|
|
a392ef0541 | ||
|
|
5679f0af61 | ||
|
|
ff8db71cb5 | ||
|
|
1cff2b82fd | ||
|
|
50dd3c8beb | ||
|
|
66a459234d | ||
|
|
19e57474dc | ||
|
|
f9638f2ea5 | ||
|
|
fbf51b70d0 | ||
|
|
b97cc01bb2 | ||
|
|
6d48fd5d99 | ||
|
|
1f61447b4b | ||
|
|
deee2b3513 | ||
|
|
b73d66c84a | ||
|
|
c5a61f4820 | ||
|
|
ea4a3cbf86 | ||
|
|
166514cedf | ||
|
|
be50ae1e71 | ||
|
|
f89504ec53 | ||
|
|
6b3213b1e4 | ||
|
|
48577bf0e4 | ||
|
|
c59d1ff0a5 | ||
|
|
ba38dec592 | ||
|
|
f5adc3063e | ||
|
|
8cfe80c53a | ||
|
|
487250320b | ||
|
|
c8d13922a9 | ||
|
|
cb75449cec |
47
.github/actions/custom-build-and-push/action.yml
vendored
47
.github/actions/custom-build-and-push/action.yml
vendored
@@ -32,16 +32,20 @@ inputs:
|
||||
description: 'Cache destinations'
|
||||
required: false
|
||||
retry-wait-time:
|
||||
description: 'Time to wait before retry in seconds'
|
||||
description: 'Time to wait before attempt 2 in seconds'
|
||||
required: false
|
||||
default: '5'
|
||||
default: '60'
|
||||
retry-wait-time-2:
|
||||
description: 'Time to wait before attempt 3 in seconds'
|
||||
required: false
|
||||
default: '120'
|
||||
|
||||
runs:
|
||||
using: "composite"
|
||||
steps:
|
||||
- name: Build and push Docker image (First Attempt)
|
||||
- name: Build and push Docker image (Attempt 1 of 3)
|
||||
id: buildx1
|
||||
uses: docker/build-push-action@v5
|
||||
uses: docker/build-push-action@v6
|
||||
continue-on-error: true
|
||||
with:
|
||||
context: ${{ inputs.context }}
|
||||
@@ -54,16 +58,17 @@ runs:
|
||||
cache-from: ${{ inputs.cache-from }}
|
||||
cache-to: ${{ inputs.cache-to }}
|
||||
|
||||
- name: Wait to retry
|
||||
- name: Wait before attempt 2
|
||||
if: steps.buildx1.outcome != 'success'
|
||||
run: |
|
||||
echo "First attempt failed. Waiting ${{ inputs.retry-wait-time }} seconds before retry..."
|
||||
sleep ${{ inputs.retry-wait-time }}
|
||||
shell: bash
|
||||
|
||||
- name: Build and push Docker image (Retry Attempt)
|
||||
- name: Build and push Docker image (Attempt 2 of 3)
|
||||
id: buildx2
|
||||
if: steps.buildx1.outcome != 'success'
|
||||
uses: docker/build-push-action@v5
|
||||
uses: docker/build-push-action@v6
|
||||
with:
|
||||
context: ${{ inputs.context }}
|
||||
file: ${{ inputs.file }}
|
||||
@@ -74,3 +79,31 @@ runs:
|
||||
tags: ${{ inputs.tags }}
|
||||
cache-from: ${{ inputs.cache-from }}
|
||||
cache-to: ${{ inputs.cache-to }}
|
||||
|
||||
- name: Wait before attempt 3
|
||||
if: steps.buildx1.outcome != 'success' && steps.buildx2.outcome != 'success'
|
||||
run: |
|
||||
echo "Second attempt failed. Waiting ${{ inputs.retry-wait-time-2 }} seconds before retry..."
|
||||
sleep ${{ inputs.retry-wait-time-2 }}
|
||||
shell: bash
|
||||
|
||||
- name: Build and push Docker image (Attempt 3 of 3)
|
||||
id: buildx3
|
||||
if: steps.buildx1.outcome != 'success' && steps.buildx2.outcome != 'success'
|
||||
uses: docker/build-push-action@v6
|
||||
with:
|
||||
context: ${{ inputs.context }}
|
||||
file: ${{ inputs.file }}
|
||||
platforms: ${{ inputs.platforms }}
|
||||
pull: ${{ inputs.pull }}
|
||||
push: ${{ inputs.push }}
|
||||
load: ${{ inputs.load }}
|
||||
tags: ${{ inputs.tags }}
|
||||
cache-from: ${{ inputs.cache-from }}
|
||||
cache-to: ${{ inputs.cache-to }}
|
||||
|
||||
- name: Report failure
|
||||
if: steps.buildx1.outcome != 'success' && steps.buildx2.outcome != 'success' && steps.buildx3.outcome != 'success'
|
||||
run: |
|
||||
echo "All attempts failed. Possible transient infrastucture issues? Try again later or inspect logs for details."
|
||||
shell: bash
|
||||
|
||||
30
.github/pull_request_template.md
vendored
30
.github/pull_request_template.md
vendored
@@ -6,20 +6,24 @@
|
||||
[Describe the tests you ran to verify your changes]
|
||||
|
||||
|
||||
## Accepted Risk
|
||||
[Any know risks or failure modes to point out to reviewers]
|
||||
## Accepted Risk (provide if relevant)
|
||||
N/A
|
||||
|
||||
|
||||
## Related Issue(s)
|
||||
[If applicable, link to the issue(s) this PR addresses]
|
||||
## Related Issue(s) (provide if relevant)
|
||||
N/A
|
||||
|
||||
|
||||
## Checklist:
|
||||
- [ ] All of the automated tests pass
|
||||
- [ ] All PR comments are addressed and marked resolved
|
||||
- [ ] If there are migrations, they have been rebased to latest main
|
||||
- [ ] If there are new dependencies, they are added to the requirements
|
||||
- [ ] If there are new environment variables, they are added to all of the deployment methods
|
||||
- [ ] If there are new APIs that don't require auth, they are added to PUBLIC_ENDPOINT_SPECS
|
||||
- [ ] Docker images build and basic functionalities work
|
||||
- [ ] Author has done a final read through of the PR right before merge
|
||||
## Mental Checklist:
|
||||
- All of the automated tests pass
|
||||
- All PR comments are addressed and marked resolved
|
||||
- If there are migrations, they have been rebased to latest main
|
||||
- If there are new dependencies, they are added to the requirements
|
||||
- If there are new environment variables, they are added to all of the deployment methods
|
||||
- If there are new APIs that don't require auth, they are added to PUBLIC_ENDPOINT_SPECS
|
||||
- Docker images build and basic functionalities work
|
||||
- Author has done a final read through of the PR right before merge
|
||||
|
||||
## Backporting (check the box to trigger backport action)
|
||||
Note: You have to check that the action passes, otherwise resolve the conflicts manually and tag the patches.
|
||||
- [ ] This PR should be backported (make sure to check that the backport attempt succeeds)
|
||||
|
||||
@@ -3,52 +3,61 @@ name: Build and Push Backend Image on Tag
|
||||
on:
|
||||
push:
|
||||
tags:
|
||||
- '*'
|
||||
- "*"
|
||||
|
||||
env:
|
||||
REGISTRY_IMAGE: danswer/danswer-backend
|
||||
REGISTRY_IMAGE: ${{ contains(github.ref_name, 'cloud') && 'danswer/danswer-backend-cloud' || 'danswer/danswer-backend' }}
|
||||
LATEST_TAG: ${{ contains(github.ref_name, 'latest') }}
|
||||
|
||||
jobs:
|
||||
build-and-push:
|
||||
# TODO: investigate a matrix build like the web container
|
||||
# TODO: investigate a matrix build like the web container
|
||||
# See https://runs-on.com/runners/linux/
|
||||
runs-on: [runs-on,runner=8cpu-linux-x64,"run-id=${{ github.run_id }}"]
|
||||
runs-on: [runs-on, runner=8cpu-linux-x64, "run-id=${{ github.run_id }}"]
|
||||
|
||||
steps:
|
||||
- name: Checkout code
|
||||
uses: actions/checkout@v2
|
||||
- name: Checkout code
|
||||
uses: actions/checkout@v4
|
||||
|
||||
- name: Set up Docker Buildx
|
||||
uses: docker/setup-buildx-action@v3
|
||||
- name: Set up Docker Buildx
|
||||
uses: docker/setup-buildx-action@v3
|
||||
|
||||
- name: Login to Docker Hub
|
||||
uses: docker/login-action@v3
|
||||
with:
|
||||
username: ${{ secrets.DOCKER_USERNAME }}
|
||||
password: ${{ secrets.DOCKER_TOKEN }}
|
||||
- name: Login to Docker Hub
|
||||
uses: docker/login-action@v3
|
||||
with:
|
||||
username: ${{ secrets.DOCKER_USERNAME }}
|
||||
password: ${{ secrets.DOCKER_TOKEN }}
|
||||
|
||||
- name: Install build-essential
|
||||
run: |
|
||||
sudo apt-get update
|
||||
sudo apt-get install -y build-essential
|
||||
|
||||
- name: Backend Image Docker Build and Push
|
||||
uses: docker/build-push-action@v5
|
||||
with:
|
||||
context: ./backend
|
||||
file: ./backend/Dockerfile
|
||||
platforms: linux/amd64,linux/arm64
|
||||
push: true
|
||||
tags: |
|
||||
${{ env.REGISTRY_IMAGE }}:${{ github.ref_name }}
|
||||
${{ env.REGISTRY_IMAGE }}:latest
|
||||
build-args: |
|
||||
DANSWER_VERSION=${{ github.ref_name }}
|
||||
- name: Install build-essential
|
||||
run: |
|
||||
sudo apt-get update
|
||||
sudo apt-get install -y build-essential
|
||||
|
||||
- name: Run Trivy vulnerability scanner
|
||||
uses: aquasecurity/trivy-action@master
|
||||
with:
|
||||
# To run locally: trivy image --severity HIGH,CRITICAL danswer/danswer-backend
|
||||
image-ref: docker.io/${{ env.REGISTRY_IMAGE }}:${{ github.ref_name }}
|
||||
severity: 'CRITICAL,HIGH'
|
||||
trivyignores: ./backend/.trivyignore
|
||||
- name: Backend Image Docker Build and Push
|
||||
uses: docker/build-push-action@v5
|
||||
with:
|
||||
context: ./backend
|
||||
file: ./backend/Dockerfile
|
||||
platforms: linux/amd64,linux/arm64
|
||||
push: true
|
||||
tags: |
|
||||
${{ env.REGISTRY_IMAGE }}:${{ github.ref_name }}
|
||||
${{ env.LATEST_TAG == 'true' && format('{0}:latest', env.REGISTRY_IMAGE) || '' }}
|
||||
build-args: |
|
||||
DANSWER_VERSION=${{ github.ref_name }}
|
||||
|
||||
# trivy has their own rate limiting issues causing this action to flake
|
||||
# we worked around it by hardcoding to different db repos in env
|
||||
# can re-enable when they figure it out
|
||||
# https://github.com/aquasecurity/trivy/discussions/7538
|
||||
# https://github.com/aquasecurity/trivy-action/issues/389
|
||||
- name: Run Trivy vulnerability scanner
|
||||
uses: aquasecurity/trivy-action@master
|
||||
env:
|
||||
TRIVY_DB_REPOSITORY: "public.ecr.aws/aquasecurity/trivy-db:2"
|
||||
TRIVY_JAVA_DB_REPOSITORY: "public.ecr.aws/aquasecurity/trivy-java-db:1"
|
||||
with:
|
||||
# To run locally: trivy image --severity HIGH,CRITICAL danswer/danswer-backend
|
||||
image-ref: docker.io/${{ env.REGISTRY_IMAGE }}:${{ github.ref_name }}
|
||||
severity: "CRITICAL,HIGH"
|
||||
trivyignores: ./backend/.trivyignore
|
||||
|
||||
137
.github/workflows/docker-build-push-cloud-web-container-on-tag.yml
vendored
Normal file
137
.github/workflows/docker-build-push-cloud-web-container-on-tag.yml
vendored
Normal file
@@ -0,0 +1,137 @@
|
||||
name: Build and Push Cloud Web Image on Tag
|
||||
# Identical to the web container build, but with correct image tag and build args
|
||||
|
||||
on:
|
||||
push:
|
||||
tags:
|
||||
- "*"
|
||||
|
||||
env:
|
||||
REGISTRY_IMAGE: danswer/danswer-web-server-cloud
|
||||
LATEST_TAG: ${{ contains(github.ref_name, 'latest') }}
|
||||
|
||||
jobs:
|
||||
build:
|
||||
runs-on:
|
||||
- runs-on
|
||||
- runner=${{ matrix.platform == 'linux/amd64' && '8cpu-linux-x64' || '8cpu-linux-arm64' }}
|
||||
- run-id=${{ github.run_id }}
|
||||
- tag=platform-${{ matrix.platform }}
|
||||
strategy:
|
||||
fail-fast: false
|
||||
matrix:
|
||||
platform:
|
||||
- linux/amd64
|
||||
- linux/arm64
|
||||
|
||||
steps:
|
||||
- name: Prepare
|
||||
run: |
|
||||
platform=${{ matrix.platform }}
|
||||
echo "PLATFORM_PAIR=${platform//\//-}" >> $GITHUB_ENV
|
||||
|
||||
- name: Checkout
|
||||
uses: actions/checkout@v4
|
||||
|
||||
- name: Docker meta
|
||||
id: meta
|
||||
uses: docker/metadata-action@v5
|
||||
with:
|
||||
images: ${{ env.REGISTRY_IMAGE }}
|
||||
tags: |
|
||||
type=raw,value=${{ env.REGISTRY_IMAGE }}:${{ github.ref_name }}
|
||||
type=raw,value=${{ env.LATEST_TAG == 'true' && format('{0}:latest', env.REGISTRY_IMAGE) || '' }}
|
||||
|
||||
- name: Set up Docker Buildx
|
||||
uses: docker/setup-buildx-action@v3
|
||||
|
||||
- name: Login to Docker Hub
|
||||
uses: docker/login-action@v3
|
||||
with:
|
||||
username: ${{ secrets.DOCKER_USERNAME }}
|
||||
password: ${{ secrets.DOCKER_TOKEN }}
|
||||
|
||||
- name: Build and push by digest
|
||||
id: build
|
||||
uses: docker/build-push-action@v5
|
||||
with:
|
||||
context: ./web
|
||||
file: ./web/Dockerfile
|
||||
platforms: ${{ matrix.platform }}
|
||||
push: true
|
||||
build-args: |
|
||||
DANSWER_VERSION=${{ github.ref_name }}
|
||||
NEXT_PUBLIC_CLOUD_ENABLED=true
|
||||
NEXT_PUBLIC_POSTHOG_KEY=${{ secrets.POSTHOG_KEY }}
|
||||
NEXT_PUBLIC_POSTHOG_HOST=${{ secrets.POSTHOG_HOST }}
|
||||
NEXT_PUBLIC_SENTRY_DSN=${{ secrets.SENTRY_DSN }}
|
||||
NEXT_PUBLIC_GTM_ENABLED=true
|
||||
# needed due to weird interactions with the builds for different platforms
|
||||
no-cache: true
|
||||
labels: ${{ steps.meta.outputs.labels }}
|
||||
outputs: type=image,name=${{ env.REGISTRY_IMAGE }},push-by-digest=true,name-canonical=true,push=true
|
||||
|
||||
- name: Export digest
|
||||
run: |
|
||||
mkdir -p /tmp/digests
|
||||
digest="${{ steps.build.outputs.digest }}"
|
||||
touch "/tmp/digests/${digest#sha256:}"
|
||||
|
||||
- name: Upload digest
|
||||
uses: actions/upload-artifact@v4
|
||||
with:
|
||||
name: digests-${{ env.PLATFORM_PAIR }}
|
||||
path: /tmp/digests/*
|
||||
if-no-files-found: error
|
||||
retention-days: 1
|
||||
|
||||
merge:
|
||||
runs-on: ubuntu-latest
|
||||
needs:
|
||||
- build
|
||||
steps:
|
||||
- name: Download digests
|
||||
uses: actions/download-artifact@v4
|
||||
with:
|
||||
path: /tmp/digests
|
||||
pattern: digests-*
|
||||
merge-multiple: true
|
||||
|
||||
- name: Set up Docker Buildx
|
||||
uses: docker/setup-buildx-action@v3
|
||||
|
||||
- name: Docker meta
|
||||
id: meta
|
||||
uses: docker/metadata-action@v5
|
||||
with:
|
||||
images: ${{ env.REGISTRY_IMAGE }}
|
||||
|
||||
- name: Login to Docker Hub
|
||||
uses: docker/login-action@v3
|
||||
with:
|
||||
username: ${{ secrets.DOCKER_USERNAME }}
|
||||
password: ${{ secrets.DOCKER_TOKEN }}
|
||||
|
||||
- name: Create manifest list and push
|
||||
working-directory: /tmp/digests
|
||||
run: |
|
||||
docker buildx imagetools create $(jq -cr '.tags | map("-t " + .) | join(" ")' <<< "$DOCKER_METADATA_OUTPUT_JSON") \
|
||||
$(printf '${{ env.REGISTRY_IMAGE }}@sha256:%s ' *)
|
||||
|
||||
- name: Inspect image
|
||||
run: |
|
||||
docker buildx imagetools inspect ${{ env.REGISTRY_IMAGE }}:${{ steps.meta.outputs.version }}
|
||||
|
||||
# trivy has their own rate limiting issues causing this action to flake
|
||||
# we worked around it by hardcoding to different db repos in env
|
||||
# can re-enable when they figure it out
|
||||
# https://github.com/aquasecurity/trivy/discussions/7538
|
||||
# https://github.com/aquasecurity/trivy-action/issues/389
|
||||
- name: Run Trivy vulnerability scanner
|
||||
uses: aquasecurity/trivy-action@master
|
||||
env:
|
||||
TRIVY_DB_REPOSITORY: "public.ecr.aws/aquasecurity/trivy-db:2"
|
||||
TRIVY_JAVA_DB_REPOSITORY: "public.ecr.aws/aquasecurity/trivy-java-db:1"
|
||||
with:
|
||||
image-ref: docker.io/${{ env.REGISTRY_IMAGE }}:${{ github.ref_name }}
|
||||
severity: "CRITICAL,HIGH"
|
||||
@@ -3,41 +3,53 @@ name: Build and Push Model Server Image on Tag
|
||||
on:
|
||||
push:
|
||||
tags:
|
||||
- '*'
|
||||
- "*"
|
||||
|
||||
env:
|
||||
REGISTRY_IMAGE: ${{ contains(github.ref_name, 'cloud') && 'danswer/danswer-model-server-cloud' || 'danswer/danswer-model-server' }}
|
||||
LATEST_TAG: ${{ contains(github.ref_name, 'latest') }}
|
||||
|
||||
jobs:
|
||||
build-and-push:
|
||||
# See https://runs-on.com/runners/linux/
|
||||
runs-on: [runs-on,runner=8cpu-linux-x64,"run-id=${{ github.run_id }}"]
|
||||
runs-on: [runs-on, runner=8cpu-linux-x64, "run-id=${{ github.run_id }}"]
|
||||
|
||||
steps:
|
||||
- name: Checkout code
|
||||
uses: actions/checkout@v2
|
||||
- name: Checkout code
|
||||
uses: actions/checkout@v4
|
||||
|
||||
- name: Set up Docker Buildx
|
||||
uses: docker/setup-buildx-action@v3
|
||||
- name: Set up Docker Buildx
|
||||
uses: docker/setup-buildx-action@v3
|
||||
|
||||
- name: Login to Docker Hub
|
||||
uses: docker/login-action@v3
|
||||
with:
|
||||
username: ${{ secrets.DOCKER_USERNAME }}
|
||||
password: ${{ secrets.DOCKER_TOKEN }}
|
||||
- name: Login to Docker Hub
|
||||
uses: docker/login-action@v3
|
||||
with:
|
||||
username: ${{ secrets.DOCKER_USERNAME }}
|
||||
password: ${{ secrets.DOCKER_TOKEN }}
|
||||
|
||||
- name: Model Server Image Docker Build and Push
|
||||
uses: docker/build-push-action@v5
|
||||
with:
|
||||
context: ./backend
|
||||
file: ./backend/Dockerfile.model_server
|
||||
platforms: linux/amd64,linux/arm64
|
||||
push: true
|
||||
tags: |
|
||||
danswer/danswer-model-server:${{ github.ref_name }}
|
||||
danswer/danswer-model-server:latest
|
||||
build-args: |
|
||||
DANSWER_VERSION=${{ github.ref_name }}
|
||||
- name: Model Server Image Docker Build and Push
|
||||
uses: docker/build-push-action@v5
|
||||
with:
|
||||
context: ./backend
|
||||
file: ./backend/Dockerfile.model_server
|
||||
platforms: linux/amd64,linux/arm64
|
||||
push: true
|
||||
tags: |
|
||||
${{ env.REGISTRY_IMAGE }}:${{ github.ref_name }}
|
||||
${{ env.LATEST_TAG == 'true' && format('{0}:latest', env.REGISTRY_IMAGE) || '' }}
|
||||
build-args: |
|
||||
DANSWER_VERSION=${{ github.ref_name }}
|
||||
|
||||
- name: Run Trivy vulnerability scanner
|
||||
uses: aquasecurity/trivy-action@master
|
||||
with:
|
||||
image-ref: docker.io/danswer/danswer-model-server:${{ github.ref_name }}
|
||||
severity: 'CRITICAL,HIGH'
|
||||
# trivy has their own rate limiting issues causing this action to flake
|
||||
# we worked around it by hardcoding to different db repos in env
|
||||
# can re-enable when they figure it out
|
||||
# https://github.com/aquasecurity/trivy/discussions/7538
|
||||
# https://github.com/aquasecurity/trivy-action/issues/389
|
||||
- name: Run Trivy vulnerability scanner
|
||||
uses: aquasecurity/trivy-action@master
|
||||
env:
|
||||
TRIVY_DB_REPOSITORY: "public.ecr.aws/aquasecurity/trivy-db:2"
|
||||
TRIVY_JAVA_DB_REPOSITORY: "public.ecr.aws/aquasecurity/trivy-java-db:1"
|
||||
with:
|
||||
image-ref: docker.io/danswer/danswer-model-server:${{ github.ref_name }}
|
||||
severity: "CRITICAL,HIGH"
|
||||
|
||||
@@ -7,11 +7,15 @@ on:
|
||||
|
||||
env:
|
||||
REGISTRY_IMAGE: danswer/danswer-web-server
|
||||
|
||||
LATEST_TAG: ${{ contains(github.ref_name, 'latest') }}
|
||||
|
||||
jobs:
|
||||
build:
|
||||
runs-on:
|
||||
group: ${{ matrix.platform == 'linux/amd64' && 'amd64-image-builders' || 'arm64-image-builders' }}
|
||||
runs-on:
|
||||
- runs-on
|
||||
- runner=${{ matrix.platform == 'linux/amd64' && '8cpu-linux-x64' || '8cpu-linux-arm64' }}
|
||||
- run-id=${{ github.run_id }}
|
||||
- tag=platform-${{ matrix.platform }}
|
||||
strategy:
|
||||
fail-fast: false
|
||||
matrix:
|
||||
@@ -35,7 +39,7 @@ jobs:
|
||||
images: ${{ env.REGISTRY_IMAGE }}
|
||||
tags: |
|
||||
type=raw,value=${{ env.REGISTRY_IMAGE }}:${{ github.ref_name }}
|
||||
type=raw,value=${{ env.REGISTRY_IMAGE }}:latest
|
||||
type=raw,value=${{ env.LATEST_TAG == 'true' && format('{0}:latest', env.REGISTRY_IMAGE) || '' }}
|
||||
|
||||
- name: Set up Docker Buildx
|
||||
uses: docker/setup-buildx-action@v3
|
||||
@@ -112,8 +116,16 @@ jobs:
|
||||
run: |
|
||||
docker buildx imagetools inspect ${{ env.REGISTRY_IMAGE }}:${{ steps.meta.outputs.version }}
|
||||
|
||||
# trivy has their own rate limiting issues causing this action to flake
|
||||
# we worked around it by hardcoding to different db repos in env
|
||||
# can re-enable when they figure it out
|
||||
# https://github.com/aquasecurity/trivy/discussions/7538
|
||||
# https://github.com/aquasecurity/trivy-action/issues/389
|
||||
- name: Run Trivy vulnerability scanner
|
||||
uses: aquasecurity/trivy-action@master
|
||||
env:
|
||||
TRIVY_DB_REPOSITORY: 'public.ecr.aws/aquasecurity/trivy-db:2'
|
||||
TRIVY_JAVA_DB_REPOSITORY: 'public.ecr.aws/aquasecurity/trivy-java-db:1'
|
||||
with:
|
||||
image-ref: docker.io/${{ env.REGISTRY_IMAGE }}:${{ github.ref_name }}
|
||||
severity: 'CRITICAL,HIGH'
|
||||
|
||||
3
.github/workflows/docker-tag-latest.yml
vendored
3
.github/workflows/docker-tag-latest.yml
vendored
@@ -1,3 +1,6 @@
|
||||
# This workflow is set up to be manually triggered via the GitHub Action tab.
|
||||
# Given a version, it will tag those backend and webserver images as "latest".
|
||||
|
||||
name: Tag Latest Version
|
||||
|
||||
on:
|
||||
|
||||
172
.github/workflows/hotfix-release-branches.yml
vendored
Normal file
172
.github/workflows/hotfix-release-branches.yml
vendored
Normal file
@@ -0,0 +1,172 @@
|
||||
# This workflow is intended to be manually triggered via the GitHub Action tab.
|
||||
# Given a hotfix branch, it will attempt to open a PR to all release branches and
|
||||
# by default auto merge them
|
||||
|
||||
name: Hotfix release branches
|
||||
|
||||
on:
|
||||
workflow_dispatch:
|
||||
inputs:
|
||||
hotfix_commit:
|
||||
description: 'Hotfix commit hash'
|
||||
required: true
|
||||
hotfix_suffix:
|
||||
description: 'Hotfix branch suffix (e.g. hotfix/v0.8-{suffix})'
|
||||
required: true
|
||||
release_branch_pattern:
|
||||
description: 'Release branch pattern (regex)'
|
||||
required: true
|
||||
default: 'release/.*'
|
||||
auto_merge:
|
||||
description: 'Automatically merge the hotfix PRs'
|
||||
required: true
|
||||
type: choice
|
||||
default: 'true'
|
||||
options:
|
||||
- true
|
||||
- false
|
||||
|
||||
jobs:
|
||||
hotfix_release_branches:
|
||||
permissions: write-all
|
||||
# See https://runs-on.com/runners/linux/
|
||||
# use a lower powered instance since this just does i/o to docker hub
|
||||
runs-on: [runs-on,runner=2cpu-linux-x64,"run-id=${{ github.run_id }}"]
|
||||
steps:
|
||||
|
||||
# needs RKUO_DEPLOY_KEY for write access to merge PR's
|
||||
- name: Checkout Repository
|
||||
uses: actions/checkout@v4
|
||||
with:
|
||||
ssh-key: "${{ secrets.RKUO_DEPLOY_KEY }}"
|
||||
fetch-depth: 0
|
||||
|
||||
- name: Set up Git user
|
||||
run: |
|
||||
git config user.name "Richard Kuo [bot]"
|
||||
git config user.email "rkuo[bot]@danswer.ai"
|
||||
|
||||
- name: Fetch All Branches
|
||||
run: |
|
||||
git fetch --all --prune
|
||||
|
||||
- name: Verify Hotfix Commit Exists
|
||||
run: |
|
||||
git rev-parse --verify "${{ github.event.inputs.hotfix_commit }}" || { echo "Commit not found: ${{ github.event.inputs.hotfix_commit }}"; exit 1; }
|
||||
|
||||
- name: Get Release Branches
|
||||
id: get_release_branches
|
||||
run: |
|
||||
BRANCHES=$(git branch -r | grep -E "${{ github.event.inputs.release_branch_pattern }}" | sed 's|origin/||' | tr -d ' ')
|
||||
if [ -z "$BRANCHES" ]; then
|
||||
echo "No release branches found matching pattern '${{ github.event.inputs.release_branch_pattern }}'."
|
||||
exit 1
|
||||
fi
|
||||
|
||||
echo "Found release branches:"
|
||||
echo "$BRANCHES"
|
||||
|
||||
# Join the branches into a single line separated by commas
|
||||
BRANCHES_JOINED=$(echo "$BRANCHES" | tr '\n' ',' | sed 's/,$//')
|
||||
|
||||
# Set the branches as an output
|
||||
echo "branches=$BRANCHES_JOINED" >> $GITHUB_OUTPUT
|
||||
|
||||
# notes on all the vagaries of wiring up automated PR's
|
||||
# https://github.com/peter-evans/create-pull-request/blob/main/docs/concepts-guidelines.md#triggering-further-workflow-runs
|
||||
# we must use a custom token for GH_TOKEN to trigger the subsequent PR checks
|
||||
- name: Create and Merge Pull Requests to Matching Release Branches
|
||||
env:
|
||||
HOTFIX_COMMIT: ${{ github.event.inputs.hotfix_commit }}
|
||||
HOTFIX_SUFFIX: ${{ github.event.inputs.hotfix_suffix }}
|
||||
AUTO_MERGE: ${{ github.event.inputs.auto_merge }}
|
||||
GH_TOKEN: ${{ secrets.RKUO_PERSONAL_ACCESS_TOKEN }}
|
||||
run: |
|
||||
# Get the branches from the previous step
|
||||
BRANCHES="${{ steps.get_release_branches.outputs.branches }}"
|
||||
|
||||
# Convert BRANCHES to an array
|
||||
IFS=$',' read -ra BRANCH_ARRAY <<< "$BRANCHES"
|
||||
|
||||
# Loop through each release branch and create and merge a PR
|
||||
for RELEASE_BRANCH in "${BRANCH_ARRAY[@]}"; do
|
||||
echo "Processing $RELEASE_BRANCH..."
|
||||
|
||||
# Parse out the release version by removing "release/" from the branch name
|
||||
RELEASE_VERSION=${RELEASE_BRANCH#release/}
|
||||
echo "Release version parsed: $RELEASE_VERSION"
|
||||
|
||||
HOTFIX_BRANCH="hotfix/${RELEASE_VERSION}-${HOTFIX_SUFFIX}"
|
||||
echo "Creating PR from $HOTFIX_BRANCH to $RELEASE_BRANCH"
|
||||
|
||||
# Checkout the release branch
|
||||
echo "Checking out $RELEASE_BRANCH"
|
||||
git checkout "$RELEASE_BRANCH"
|
||||
|
||||
# Create the new hotfix branch
|
||||
if git rev-parse --verify "$HOTFIX_BRANCH" >/dev/null 2>&1; then
|
||||
echo "Hotfix branch $HOTFIX_BRANCH already exists. Skipping branch creation."
|
||||
else
|
||||
echo "Branching $RELEASE_BRANCH to $HOTFIX_BRANCH"
|
||||
git checkout -b "$HOTFIX_BRANCH"
|
||||
fi
|
||||
|
||||
# Check if the hotfix commit is a merge commit
|
||||
if git rev-list --merges -n 1 "$HOTFIX_COMMIT" >/dev/null 2>&1; then
|
||||
# -m 1 uses the target branch as the base (which is what we want)
|
||||
echo "Hotfix commit $HOTFIX_COMMIT is a merge commit, using -m 1 for cherry-pick"
|
||||
CHERRY_PICK_CMD="git cherry-pick -m 1 $HOTFIX_COMMIT"
|
||||
else
|
||||
CHERRY_PICK_CMD="git cherry-pick $HOTFIX_COMMIT"
|
||||
fi
|
||||
|
||||
# Perform the cherry-pick
|
||||
echo "Executing: $CHERRY_PICK_CMD"
|
||||
eval "$CHERRY_PICK_CMD"
|
||||
|
||||
if [ $? -ne 0 ]; then
|
||||
echo "Cherry-pick failed for $HOTFIX_COMMIT on $HOTFIX_BRANCH. Aborting..."
|
||||
git cherry-pick --abort
|
||||
continue
|
||||
fi
|
||||
|
||||
# Push the hotfix branch to the remote
|
||||
echo "Pushing $HOTFIX_BRANCH..."
|
||||
git push origin "$HOTFIX_BRANCH"
|
||||
echo "Hotfix branch $HOTFIX_BRANCH created and pushed."
|
||||
|
||||
# Check if PR already exists
|
||||
EXISTING_PR=$(gh pr list --head "$HOTFIX_BRANCH" --base "$RELEASE_BRANCH" --state open --json number --jq '.[0].number')
|
||||
|
||||
if [ -n "$EXISTING_PR" ]; then
|
||||
echo "An open PR already exists: #$EXISTING_PR. Skipping..."
|
||||
continue
|
||||
fi
|
||||
|
||||
# Create a new PR and capture the output
|
||||
PR_OUTPUT=$(gh pr create --title "Merge $HOTFIX_BRANCH into $RELEASE_BRANCH" \
|
||||
--body "Automated PR to merge \`$HOTFIX_BRANCH\` into \`$RELEASE_BRANCH\`." \
|
||||
--head "$HOTFIX_BRANCH" --base "$RELEASE_BRANCH")
|
||||
|
||||
# Extract the URL from the output
|
||||
PR_URL=$(echo "$PR_OUTPUT" | grep -Eo 'https://github.com/[^ ]+')
|
||||
echo "Pull request created: $PR_URL"
|
||||
|
||||
# Extract PR number from URL
|
||||
PR_NUMBER=$(basename "$PR_URL")
|
||||
echo "Pull request created: $PR_NUMBER"
|
||||
|
||||
if [ "$AUTO_MERGE" == "true" ]; then
|
||||
echo "Attempting to merge pull request #$PR_NUMBER"
|
||||
|
||||
# Attempt to merge the PR
|
||||
gh pr merge "$PR_NUMBER" --merge --auto --delete-branch
|
||||
|
||||
if [ $? -eq 0 ]; then
|
||||
echo "Pull request #$PR_NUMBER merged successfully."
|
||||
else
|
||||
# Optionally, handle the error or continue
|
||||
echo "Failed to merge pull request #$PR_NUMBER."
|
||||
fi
|
||||
fi
|
||||
done
|
||||
23
.github/workflows/nightly-close-stale-issues.yml
vendored
Normal file
23
.github/workflows/nightly-close-stale-issues.yml
vendored
Normal file
@@ -0,0 +1,23 @@
|
||||
name: 'Nightly - Close stale issues and PRs'
|
||||
on:
|
||||
schedule:
|
||||
- cron: '0 11 * * *' # Runs every day at 3 AM PST / 4 AM PDT / 11 AM UTC
|
||||
|
||||
permissions:
|
||||
# contents: write # only for delete-branch option
|
||||
issues: write
|
||||
pull-requests: write
|
||||
|
||||
jobs:
|
||||
stale:
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- uses: actions/stale@v9
|
||||
with:
|
||||
stale-issue-message: 'This issue is stale because it has been open 75 days with no activity. Remove stale label or comment or this will be closed in 15 days.'
|
||||
stale-pr-message: 'This PR is stale because it has been open 75 days with no activity. Remove stale label or comment or this will be closed in 15 days.'
|
||||
close-issue-message: 'This issue was closed because it has been stalled for 90 days with no activity.'
|
||||
close-pr-message: 'This PR was closed because it has been stalled for 90 days with no activity.'
|
||||
days-before-stale: 75
|
||||
# days-before-close: 90 # uncomment after we test stale behavior
|
||||
|
||||
76
.github/workflows/nightly-scan-licenses.yml
vendored
Normal file
76
.github/workflows/nightly-scan-licenses.yml
vendored
Normal file
@@ -0,0 +1,76 @@
|
||||
# Scan for problematic software licenses
|
||||
|
||||
# trivy has their own rate limiting issues causing this action to flake
|
||||
# we worked around it by hardcoding to different db repos in env
|
||||
# can re-enable when they figure it out
|
||||
# https://github.com/aquasecurity/trivy/discussions/7538
|
||||
# https://github.com/aquasecurity/trivy-action/issues/389
|
||||
|
||||
name: 'Nightly - Scan licenses'
|
||||
on:
|
||||
# schedule:
|
||||
# - cron: '0 14 * * *' # Runs every day at 6 AM PST / 7 AM PDT / 2 PM UTC
|
||||
workflow_dispatch: # Allows manual triggering
|
||||
|
||||
permissions:
|
||||
actions: read
|
||||
contents: read
|
||||
security-events: write
|
||||
|
||||
jobs:
|
||||
scan-licenses:
|
||||
# See https://runs-on.com/runners/linux/
|
||||
runs-on: [runs-on,runner=2cpu-linux-x64,"run-id=${{ github.run_id }}"]
|
||||
|
||||
steps:
|
||||
- name: Checkout code
|
||||
uses: actions/checkout@v4
|
||||
|
||||
- name: Set up Python
|
||||
uses: actions/setup-python@v5
|
||||
with:
|
||||
python-version: '3.11'
|
||||
cache: 'pip'
|
||||
cache-dependency-path: |
|
||||
backend/requirements/default.txt
|
||||
backend/requirements/dev.txt
|
||||
backend/requirements/model_server.txt
|
||||
|
||||
- name: Get explicit and transitive dependencies
|
||||
run: |
|
||||
python -m pip install --upgrade pip
|
||||
pip install --retries 5 --timeout 30 -r backend/requirements/default.txt
|
||||
pip install --retries 5 --timeout 30 -r backend/requirements/dev.txt
|
||||
pip install --retries 5 --timeout 30 -r backend/requirements/model_server.txt
|
||||
pip freeze > requirements-all.txt
|
||||
|
||||
- name: Check python
|
||||
id: license_check_report
|
||||
uses: pilosus/action-pip-license-checker@v2
|
||||
with:
|
||||
requirements: 'requirements-all.txt'
|
||||
fail: 'Copyleft'
|
||||
exclude: '(?i)^(pylint|aio[-_]*).*'
|
||||
|
||||
- name: Print report
|
||||
if: ${{ always() }}
|
||||
run: echo "${{ steps.license_check_report.outputs.report }}"
|
||||
|
||||
- name: Install npm dependencies
|
||||
working-directory: ./web
|
||||
run: npm ci
|
||||
|
||||
- name: Run Trivy vulnerability scanner in repo mode
|
||||
uses: aquasecurity/trivy-action@0.28.0
|
||||
with:
|
||||
scan-type: fs
|
||||
scanners: license
|
||||
format: table
|
||||
# format: sarif
|
||||
# output: trivy-results.sarif
|
||||
severity: HIGH,CRITICAL
|
||||
|
||||
# - name: Upload Trivy scan results to GitHub Security tab
|
||||
# uses: github/codeql-action/upload-sarif@v3
|
||||
# with:
|
||||
# sarif_file: trivy-results.sarif
|
||||
124
.github/workflows/pr-backport-autotrigger.yml
vendored
Normal file
124
.github/workflows/pr-backport-autotrigger.yml
vendored
Normal file
@@ -0,0 +1,124 @@
|
||||
name: Backport on Merge
|
||||
|
||||
# Note this workflow does not trigger the builds, be sure to manually tag the branches to trigger the builds
|
||||
|
||||
on:
|
||||
pull_request:
|
||||
types: [closed] # Later we check for merge so only PRs that go in can get backported
|
||||
|
||||
permissions:
|
||||
contents: write
|
||||
actions: write
|
||||
|
||||
jobs:
|
||||
backport:
|
||||
if: github.event.pull_request.merged == true
|
||||
runs-on: ubuntu-latest
|
||||
env:
|
||||
GITHUB_TOKEN: ${{ secrets.YUHONG_GH_ACTIONS }}
|
||||
steps:
|
||||
- name: Checkout code
|
||||
uses: actions/checkout@v4
|
||||
with:
|
||||
ssh-key: "${{ secrets.RKUO_DEPLOY_KEY }}"
|
||||
fetch-depth: 0
|
||||
|
||||
- name: Set up Git user
|
||||
run: |
|
||||
git config user.name "Richard Kuo [bot]"
|
||||
git config user.email "rkuo[bot]@danswer.ai"
|
||||
git fetch --prune
|
||||
|
||||
- name: Check for Backport Checkbox
|
||||
id: checkbox-check
|
||||
run: |
|
||||
PR_BODY="${{ github.event.pull_request.body }}"
|
||||
if [[ "$PR_BODY" == *"[x] This PR should be backported"* ]]; then
|
||||
echo "backport=true" >> $GITHUB_OUTPUT
|
||||
else
|
||||
echo "backport=false" >> $GITHUB_OUTPUT
|
||||
fi
|
||||
|
||||
- name: List and sort release branches
|
||||
id: list-branches
|
||||
run: |
|
||||
git fetch --all --tags
|
||||
BRANCHES=$(git for-each-ref --format='%(refname:short)' refs/remotes/origin/release/* | sed 's|origin/release/||' | sort -Vr)
|
||||
BETA=$(echo "$BRANCHES" | head -n 1)
|
||||
STABLE=$(echo "$BRANCHES" | head -n 2 | tail -n 1)
|
||||
echo "beta=release/$BETA" >> $GITHUB_OUTPUT
|
||||
echo "stable=release/$STABLE" >> $GITHUB_OUTPUT
|
||||
# Fetch latest tags for beta and stable
|
||||
LATEST_BETA_TAG=$(git tag -l "v[0-9]*.[0-9]*.[0-9]*-beta.[0-9]*" | grep -E "^v[0-9]+\.[0-9]+\.[0-9]+-beta\.[0-9]+$" | grep -v -- "-cloud" | sort -Vr | head -n 1)
|
||||
LATEST_STABLE_TAG=$(git tag -l "v[0-9]*.[0-9]*.[0-9]*" | grep -E "^v[0-9]+\.[0-9]+\.[0-9]+$" | sort -Vr | head -n 1)
|
||||
|
||||
# Handle case where no beta tags exist
|
||||
if [[ -z "$LATEST_BETA_TAG" ]]; then
|
||||
NEW_BETA_TAG="v1.0.0-beta.1"
|
||||
else
|
||||
NEW_BETA_TAG=$(echo $LATEST_BETA_TAG | awk -F '[.-]' '{print $1 "." $2 "." $3 "-beta." ($NF+1)}')
|
||||
fi
|
||||
|
||||
# Increment latest stable tag
|
||||
NEW_STABLE_TAG=$(echo $LATEST_STABLE_TAG | awk -F '.' '{print $1 "." $2 "." ($3+1)}')
|
||||
echo "latest_beta_tag=$LATEST_BETA_TAG" >> $GITHUB_OUTPUT
|
||||
echo "latest_stable_tag=$LATEST_STABLE_TAG" >> $GITHUB_OUTPUT
|
||||
echo "new_beta_tag=$NEW_BETA_TAG" >> $GITHUB_OUTPUT
|
||||
echo "new_stable_tag=$NEW_STABLE_TAG" >> $GITHUB_OUTPUT
|
||||
|
||||
- name: Echo branch and tag information
|
||||
run: |
|
||||
echo "Beta branch: ${{ steps.list-branches.outputs.beta }}"
|
||||
echo "Stable branch: ${{ steps.list-branches.outputs.stable }}"
|
||||
echo "Latest beta tag: ${{ steps.list-branches.outputs.latest_beta_tag }}"
|
||||
echo "Latest stable tag: ${{ steps.list-branches.outputs.latest_stable_tag }}"
|
||||
echo "New beta tag: ${{ steps.list-branches.outputs.new_beta_tag }}"
|
||||
echo "New stable tag: ${{ steps.list-branches.outputs.new_stable_tag }}"
|
||||
|
||||
- name: Trigger Backport
|
||||
if: steps.checkbox-check.outputs.backport == 'true'
|
||||
run: |
|
||||
set -e
|
||||
echo "Backporting to beta ${{ steps.list-branches.outputs.beta }} and stable ${{ steps.list-branches.outputs.stable }}"
|
||||
|
||||
# Echo the merge commit SHA
|
||||
echo "Merge commit SHA: ${{ github.event.pull_request.merge_commit_sha }}"
|
||||
|
||||
# Fetch all history for all branches and tags
|
||||
git fetch --prune
|
||||
|
||||
# Reset and prepare the beta branch
|
||||
git checkout ${{ steps.list-branches.outputs.beta }}
|
||||
echo "Last 5 commits on beta branch:"
|
||||
git log -n 5 --pretty=format:"%H"
|
||||
echo "" # Newline for formatting
|
||||
|
||||
# Cherry-pick the merge commit from the merged PR
|
||||
git cherry-pick -m 1 ${{ github.event.pull_request.merge_commit_sha }} || {
|
||||
echo "Cherry-pick to beta failed due to conflicts."
|
||||
exit 1
|
||||
}
|
||||
|
||||
# Create new beta branch/tag
|
||||
git tag ${{ steps.list-branches.outputs.new_beta_tag }}
|
||||
# Push the changes and tag to the beta branch using PAT
|
||||
git push origin ${{ steps.list-branches.outputs.beta }}
|
||||
git push origin ${{ steps.list-branches.outputs.new_beta_tag }}
|
||||
|
||||
# Reset and prepare the stable branch
|
||||
git checkout ${{ steps.list-branches.outputs.stable }}
|
||||
echo "Last 5 commits on stable branch:"
|
||||
git log -n 5 --pretty=format:"%H"
|
||||
echo "" # Newline for formatting
|
||||
|
||||
# Cherry-pick the merge commit from the merged PR
|
||||
git cherry-pick -m 1 ${{ github.event.pull_request.merge_commit_sha }} || {
|
||||
echo "Cherry-pick to stable failed due to conflicts."
|
||||
exit 1
|
||||
}
|
||||
|
||||
# Create new stable branch/tag
|
||||
git tag ${{ steps.list-branches.outputs.new_stable_tag }}
|
||||
# Push the changes and tag to the stable branch using PAT
|
||||
git push origin ${{ steps.list-branches.outputs.stable }}
|
||||
git push origin ${{ steps.list-branches.outputs.new_stable_tag }}
|
||||
225
.github/workflows/pr-chromatic-tests.yml
vendored
Normal file
225
.github/workflows/pr-chromatic-tests.yml
vendored
Normal file
@@ -0,0 +1,225 @@
|
||||
name: Run Chromatic Tests
|
||||
concurrency:
|
||||
group: Run-Chromatic-Tests-${{ github.workflow }}-${{ github.head_ref || github.event.workflow_run.head_branch || github.run_id }}
|
||||
cancel-in-progress: true
|
||||
|
||||
on: push
|
||||
|
||||
env:
|
||||
OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
|
||||
SLACK_BOT_TOKEN: ${{ secrets.SLACK_BOT_TOKEN }}
|
||||
|
||||
jobs:
|
||||
playwright-tests:
|
||||
name: Playwright Tests
|
||||
|
||||
# See https://runs-on.com/runners/linux/
|
||||
runs-on: [runs-on,runner=8cpu-linux-x64,ram=16,"run-id=${{ github.run_id }}"]
|
||||
steps:
|
||||
- name: Checkout code
|
||||
uses: actions/checkout@v4
|
||||
with:
|
||||
fetch-depth: 0
|
||||
|
||||
- name: Set up Python
|
||||
uses: actions/setup-python@v5
|
||||
with:
|
||||
python-version: '3.11'
|
||||
cache: 'pip'
|
||||
cache-dependency-path: |
|
||||
backend/requirements/default.txt
|
||||
backend/requirements/dev.txt
|
||||
backend/requirements/model_server.txt
|
||||
- run: |
|
||||
python -m pip install --upgrade pip
|
||||
pip install --retries 5 --timeout 30 -r backend/requirements/default.txt
|
||||
pip install --retries 5 --timeout 30 -r backend/requirements/dev.txt
|
||||
pip install --retries 5 --timeout 30 -r backend/requirements/model_server.txt
|
||||
|
||||
- name: Setup node
|
||||
uses: actions/setup-node@v4
|
||||
with:
|
||||
node-version: 22
|
||||
|
||||
- name: Install node dependencies
|
||||
working-directory: ./web
|
||||
run: npm ci
|
||||
|
||||
- name: Install playwright browsers
|
||||
working-directory: ./web
|
||||
run: npx playwright install --with-deps
|
||||
|
||||
- name: Set up Docker Buildx
|
||||
uses: docker/setup-buildx-action@v3
|
||||
|
||||
- name: Login to Docker Hub
|
||||
uses: docker/login-action@v3
|
||||
with:
|
||||
username: ${{ secrets.DOCKER_USERNAME }}
|
||||
password: ${{ secrets.DOCKER_TOKEN }}
|
||||
|
||||
# tag every docker image with "test" so that we can spin up the correct set
|
||||
# of images during testing
|
||||
|
||||
# we use the runs-on cache for docker builds
|
||||
# in conjunction with runs-on runners, it has better speed and unlimited caching
|
||||
# https://runs-on.com/caching/s3-cache-for-github-actions/
|
||||
# https://runs-on.com/caching/docker/
|
||||
# https://github.com/moby/buildkit#s3-cache-experimental
|
||||
|
||||
# images are built and run locally for testing purposes. Not pushed.
|
||||
|
||||
- name: Build Web Docker image
|
||||
uses: ./.github/actions/custom-build-and-push
|
||||
with:
|
||||
context: ./web
|
||||
file: ./web/Dockerfile
|
||||
platforms: linux/amd64
|
||||
tags: danswer/danswer-web-server:test
|
||||
push: false
|
||||
load: true
|
||||
cache-from: type=s3,prefix=cache/${{ github.repository }}/integration-tests/web-server/,region=${{ env.RUNS_ON_AWS_REGION }},bucket=${{ env.RUNS_ON_S3_BUCKET_CACHE }}
|
||||
cache-to: type=s3,prefix=cache/${{ github.repository }}/integration-tests/web-server/,region=${{ env.RUNS_ON_AWS_REGION }},bucket=${{ env.RUNS_ON_S3_BUCKET_CACHE }},mode=max
|
||||
|
||||
- name: Build Backend Docker image
|
||||
uses: ./.github/actions/custom-build-and-push
|
||||
with:
|
||||
context: ./backend
|
||||
file: ./backend/Dockerfile
|
||||
platforms: linux/amd64
|
||||
tags: danswer/danswer-backend:test
|
||||
push: false
|
||||
load: true
|
||||
cache-from: type=s3,prefix=cache/${{ github.repository }}/integration-tests/backend/,region=${{ env.RUNS_ON_AWS_REGION }},bucket=${{ env.RUNS_ON_S3_BUCKET_CACHE }}
|
||||
cache-to: type=s3,prefix=cache/${{ github.repository }}/integration-tests/backend/,region=${{ env.RUNS_ON_AWS_REGION }},bucket=${{ env.RUNS_ON_S3_BUCKET_CACHE }},mode=max
|
||||
|
||||
- name: Build Model Server Docker image
|
||||
uses: ./.github/actions/custom-build-and-push
|
||||
with:
|
||||
context: ./backend
|
||||
file: ./backend/Dockerfile.model_server
|
||||
platforms: linux/amd64
|
||||
tags: danswer/danswer-model-server:test
|
||||
push: false
|
||||
load: true
|
||||
cache-from: type=s3,prefix=cache/${{ github.repository }}/integration-tests/model-server/,region=${{ env.RUNS_ON_AWS_REGION }},bucket=${{ env.RUNS_ON_S3_BUCKET_CACHE }}
|
||||
cache-to: type=s3,prefix=cache/${{ github.repository }}/integration-tests/model-server/,region=${{ env.RUNS_ON_AWS_REGION }},bucket=${{ env.RUNS_ON_S3_BUCKET_CACHE }},mode=max
|
||||
|
||||
- name: Start Docker containers
|
||||
run: |
|
||||
cd deployment/docker_compose
|
||||
ENABLE_PAID_ENTERPRISE_EDITION_FEATURES=true \
|
||||
AUTH_TYPE=basic \
|
||||
REQUIRE_EMAIL_VERIFICATION=false \
|
||||
DISABLE_TELEMETRY=true \
|
||||
IMAGE_TAG=test \
|
||||
docker compose -f docker-compose.dev.yml -p danswer-stack up -d
|
||||
id: start_docker
|
||||
|
||||
- name: Wait for service to be ready
|
||||
run: |
|
||||
echo "Starting wait-for-service script..."
|
||||
|
||||
docker logs -f danswer-stack-api_server-1 &
|
||||
|
||||
start_time=$(date +%s)
|
||||
timeout=300 # 5 minutes in seconds
|
||||
|
||||
while true; do
|
||||
current_time=$(date +%s)
|
||||
elapsed_time=$((current_time - start_time))
|
||||
|
||||
if [ $elapsed_time -ge $timeout ]; then
|
||||
echo "Timeout reached. Service did not become ready in 5 minutes."
|
||||
exit 1
|
||||
fi
|
||||
|
||||
# Use curl with error handling to ignore specific exit code 56
|
||||
response=$(curl -s -o /dev/null -w "%{http_code}" http://localhost:8080/health || echo "curl_error")
|
||||
|
||||
if [ "$response" = "200" ]; then
|
||||
echo "Service is ready!"
|
||||
break
|
||||
elif [ "$response" = "curl_error" ]; then
|
||||
echo "Curl encountered an error, possibly exit code 56. Continuing to retry..."
|
||||
else
|
||||
echo "Service not ready yet (HTTP status $response). Retrying in 5 seconds..."
|
||||
fi
|
||||
|
||||
sleep 5
|
||||
done
|
||||
echo "Finished waiting for service."
|
||||
|
||||
- name: Run pytest playwright test init
|
||||
working-directory: ./backend
|
||||
env:
|
||||
PYTEST_IGNORE_SKIP: true
|
||||
run: pytest -s tests/integration/tests/playwright/test_playwright.py
|
||||
|
||||
- name: Run Playwright tests
|
||||
working-directory: ./web
|
||||
run: npx playwright test
|
||||
|
||||
- uses: actions/upload-artifact@v4
|
||||
if: always()
|
||||
with:
|
||||
# Chromatic automatically defaults to the test-results directory.
|
||||
# Replace with the path to your custom directory and adjust the CHROMATIC_ARCHIVE_LOCATION environment variable accordingly.
|
||||
name: test-results
|
||||
path: ./web/test-results
|
||||
retention-days: 30
|
||||
|
||||
# save before stopping the containers so the logs can be captured
|
||||
- name: Save Docker logs
|
||||
if: success() || failure()
|
||||
run: |
|
||||
cd deployment/docker_compose
|
||||
docker compose -f docker-compose.dev.yml -p danswer-stack logs > docker-compose.log
|
||||
mv docker-compose.log ${{ github.workspace }}/docker-compose.log
|
||||
|
||||
- name: Upload logs
|
||||
if: success() || failure()
|
||||
uses: actions/upload-artifact@v4
|
||||
with:
|
||||
name: docker-logs
|
||||
path: ${{ github.workspace }}/docker-compose.log
|
||||
|
||||
- name: Stop Docker containers
|
||||
run: |
|
||||
cd deployment/docker_compose
|
||||
docker compose -f docker-compose.dev.yml -p danswer-stack down -v
|
||||
|
||||
chromatic-tests:
|
||||
name: Chromatic Tests
|
||||
|
||||
needs: playwright-tests
|
||||
runs-on: [runs-on,runner=8cpu-linux-x64,ram=16,"run-id=${{ github.run_id }}"]
|
||||
steps:
|
||||
- name: Checkout code
|
||||
uses: actions/checkout@v4
|
||||
with:
|
||||
fetch-depth: 0
|
||||
|
||||
- name: Setup node
|
||||
uses: actions/setup-node@v4
|
||||
with:
|
||||
node-version: 22
|
||||
|
||||
- name: Install node dependencies
|
||||
working-directory: ./web
|
||||
run: npm ci
|
||||
|
||||
- name: Download Playwright test results
|
||||
uses: actions/download-artifact@v4
|
||||
with:
|
||||
name: test-results
|
||||
path: ./web/test-results
|
||||
|
||||
- name: Run Chromatic
|
||||
uses: chromaui/action@latest
|
||||
with:
|
||||
playwright: true
|
||||
projectToken: ${{ secrets.CHROMATIC_PROJECT_TOKEN }}
|
||||
workingDir: ./web
|
||||
env:
|
||||
CHROMATIC_ARCHIVE_LOCATION: ./test-results
|
||||
72
.github/workflows/pr-helm-chart-testing.yml
vendored
Normal file
72
.github/workflows/pr-helm-chart-testing.yml
vendored
Normal file
@@ -0,0 +1,72 @@
|
||||
name: Helm - Lint and Test Charts
|
||||
|
||||
on:
|
||||
merge_group:
|
||||
pull_request:
|
||||
branches: [ main ]
|
||||
workflow_dispatch: # Allows manual triggering
|
||||
|
||||
jobs:
|
||||
helm-chart-check:
|
||||
# See https://runs-on.com/runners/linux/
|
||||
runs-on: [runs-on,runner=8cpu-linux-x64,hdd=256,"run-id=${{ github.run_id }}"]
|
||||
|
||||
# fetch-depth 0 is required for helm/chart-testing-action
|
||||
steps:
|
||||
- name: Checkout code
|
||||
uses: actions/checkout@v4
|
||||
with:
|
||||
fetch-depth: 0
|
||||
|
||||
- name: Set up Helm
|
||||
uses: azure/setup-helm@v4.2.0
|
||||
with:
|
||||
version: v3.14.4
|
||||
|
||||
- name: Set up chart-testing
|
||||
uses: helm/chart-testing-action@v2.6.1
|
||||
|
||||
# even though we specify chart-dirs in ct.yaml, it isn't used by ct for the list-changed command...
|
||||
- name: Run chart-testing (list-changed)
|
||||
id: list-changed
|
||||
run: |
|
||||
echo "default_branch: ${{ github.event.repository.default_branch }}"
|
||||
changed=$(ct list-changed --remote origin --target-branch ${{ github.event.repository.default_branch }} --chart-dirs deployment/helm/charts)
|
||||
echo "list-changed output: $changed"
|
||||
if [[ -n "$changed" ]]; then
|
||||
echo "changed=true" >> "$GITHUB_OUTPUT"
|
||||
fi
|
||||
|
||||
# rkuo: I don't think we need python?
|
||||
# - name: Set up Python
|
||||
# uses: actions/setup-python@v5
|
||||
# with:
|
||||
# python-version: '3.11'
|
||||
# cache: 'pip'
|
||||
# cache-dependency-path: |
|
||||
# backend/requirements/default.txt
|
||||
# backend/requirements/dev.txt
|
||||
# backend/requirements/model_server.txt
|
||||
# - run: |
|
||||
# python -m pip install --upgrade pip
|
||||
# pip install --retries 5 --timeout 30 -r backend/requirements/default.txt
|
||||
# pip install --retries 5 --timeout 30 -r backend/requirements/dev.txt
|
||||
# pip install --retries 5 --timeout 30 -r backend/requirements/model_server.txt
|
||||
|
||||
# lint all charts if any changes were detected
|
||||
- name: Run chart-testing (lint)
|
||||
if: steps.list-changed.outputs.changed == 'true'
|
||||
run: ct lint --config ct.yaml --all
|
||||
# the following would lint only changed charts, but linting isn't expensive
|
||||
# run: ct lint --config ct.yaml --target-branch ${{ github.event.repository.default_branch }}
|
||||
|
||||
- name: Create kind cluster
|
||||
if: steps.list-changed.outputs.changed == 'true'
|
||||
uses: helm/kind-action@v1.10.0
|
||||
|
||||
- name: Run chart-testing (install)
|
||||
if: steps.list-changed.outputs.changed == 'true'
|
||||
run: ct install --all --helm-extra-set-args="--set=nginx.enabled=false" --debug --config ct.yaml
|
||||
# the following would install only changed charts, but we only have one chart so
|
||||
# don't worry about that for now
|
||||
# run: ct install --target-branch ${{ github.event.repository.default_branch }}
|
||||
@@ -1,68 +0,0 @@
|
||||
# This workflow is intentionally disabled while we're still working on it
|
||||
# It's close to ready, but a race condition needs to be fixed with
|
||||
# API server and Vespa startup, and it needs to have a way to build/test against
|
||||
# local containers
|
||||
|
||||
name: Helm - Lint and Test Charts
|
||||
|
||||
on:
|
||||
merge_group:
|
||||
pull_request:
|
||||
branches: [ main ]
|
||||
|
||||
jobs:
|
||||
lint-test:
|
||||
# See https://runs-on.com/runners/linux/
|
||||
runs-on: [runs-on,runner=8cpu-linux-x64,hdd=256,"run-id=${{ github.run_id }}"]
|
||||
|
||||
# fetch-depth 0 is required for helm/chart-testing-action
|
||||
steps:
|
||||
- name: Checkout code
|
||||
uses: actions/checkout@v3
|
||||
with:
|
||||
fetch-depth: 0
|
||||
|
||||
- name: Set up Helm
|
||||
uses: azure/setup-helm@v4.2.0
|
||||
with:
|
||||
version: v3.14.4
|
||||
|
||||
- name: Set up Python
|
||||
uses: actions/setup-python@v4
|
||||
with:
|
||||
python-version: '3.11'
|
||||
cache: 'pip'
|
||||
cache-dependency-path: |
|
||||
backend/requirements/default.txt
|
||||
backend/requirements/dev.txt
|
||||
backend/requirements/model_server.txt
|
||||
- run: |
|
||||
python -m pip install --upgrade pip
|
||||
pip install --retries 5 --timeout 30 -r backend/requirements/default.txt
|
||||
pip install --retries 5 --timeout 30 -r backend/requirements/dev.txt
|
||||
pip install --retries 5 --timeout 30 -r backend/requirements/model_server.txt
|
||||
|
||||
- name: Set up chart-testing
|
||||
uses: helm/chart-testing-action@v2.6.1
|
||||
|
||||
- name: Run chart-testing (list-changed)
|
||||
id: list-changed
|
||||
run: |
|
||||
changed=$(ct list-changed --target-branch ${{ github.event.repository.default_branch }})
|
||||
if [[ -n "$changed" ]]; then
|
||||
echo "changed=true" >> "$GITHUB_OUTPUT"
|
||||
fi
|
||||
|
||||
- name: Run chart-testing (lint)
|
||||
# if: steps.list-changed.outputs.changed == 'true'
|
||||
run: ct lint --all --config ct.yaml --target-branch ${{ github.event.repository.default_branch }}
|
||||
|
||||
- name: Create kind cluster
|
||||
# if: steps.list-changed.outputs.changed == 'true'
|
||||
uses: helm/kind-action@v1.10.0
|
||||
|
||||
- name: Run chart-testing (install)
|
||||
# if: steps.list-changed.outputs.changed == 'true'
|
||||
run: ct install --all --config ct.yaml
|
||||
# run: ct install --target-branch ${{ github.event.repository.default_branch }}
|
||||
|
||||
243
.github/workflows/pr-integration-tests.yml
vendored
Normal file
243
.github/workflows/pr-integration-tests.yml
vendored
Normal file
@@ -0,0 +1,243 @@
|
||||
name: Run Integration Tests v2
|
||||
concurrency:
|
||||
group: Run-Integration-Tests-${{ github.workflow }}-${{ github.head_ref || github.event.workflow_run.head_branch || github.run_id }}
|
||||
cancel-in-progress: true
|
||||
|
||||
on:
|
||||
merge_group:
|
||||
pull_request:
|
||||
branches:
|
||||
- main
|
||||
- 'release/**'
|
||||
|
||||
env:
|
||||
OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
|
||||
SLACK_BOT_TOKEN: ${{ secrets.SLACK_BOT_TOKEN }}
|
||||
CONFLUENCE_TEST_SPACE_URL: ${{ secrets.CONFLUENCE_TEST_SPACE_URL }}
|
||||
CONFLUENCE_USER_NAME: ${{ secrets.CONFLUENCE_USER_NAME }}
|
||||
CONFLUENCE_ACCESS_TOKEN: ${{ secrets.CONFLUENCE_ACCESS_TOKEN }}
|
||||
|
||||
jobs:
|
||||
integration-tests:
|
||||
# See https://runs-on.com/runners/linux/
|
||||
runs-on: [runs-on,runner=8cpu-linux-x64,ram=16,"run-id=${{ github.run_id }}"]
|
||||
steps:
|
||||
- name: Checkout code
|
||||
uses: actions/checkout@v4
|
||||
|
||||
- name: Set up Docker Buildx
|
||||
uses: docker/setup-buildx-action@v3
|
||||
|
||||
- name: Login to Docker Hub
|
||||
uses: docker/login-action@v3
|
||||
with:
|
||||
username: ${{ secrets.DOCKER_USERNAME }}
|
||||
password: ${{ secrets.DOCKER_TOKEN }}
|
||||
|
||||
# tag every docker image with "test" so that we can spin up the correct set
|
||||
# of images during testing
|
||||
|
||||
# We don't need to build the Web Docker image since it's not yet used
|
||||
# in the integration tests. We have a separate action to verify that it builds
|
||||
# successfully.
|
||||
- name: Pull Web Docker image
|
||||
run: |
|
||||
docker pull danswer/danswer-web-server:latest
|
||||
docker tag danswer/danswer-web-server:latest danswer/danswer-web-server:test
|
||||
|
||||
# we use the runs-on cache for docker builds
|
||||
# in conjunction with runs-on runners, it has better speed and unlimited caching
|
||||
# https://runs-on.com/caching/s3-cache-for-github-actions/
|
||||
# https://runs-on.com/caching/docker/
|
||||
# https://github.com/moby/buildkit#s3-cache-experimental
|
||||
|
||||
# images are built and run locally for testing purposes. Not pushed.
|
||||
- name: Build Backend Docker image
|
||||
uses: ./.github/actions/custom-build-and-push
|
||||
with:
|
||||
context: ./backend
|
||||
file: ./backend/Dockerfile
|
||||
platforms: linux/amd64
|
||||
tags: danswer/danswer-backend:test
|
||||
push: false
|
||||
load: true
|
||||
cache-from: type=s3,prefix=cache/${{ github.repository }}/integration-tests/backend/,region=${{ env.RUNS_ON_AWS_REGION }},bucket=${{ env.RUNS_ON_S3_BUCKET_CACHE }}
|
||||
cache-to: type=s3,prefix=cache/${{ github.repository }}/integration-tests/backend/,region=${{ env.RUNS_ON_AWS_REGION }},bucket=${{ env.RUNS_ON_S3_BUCKET_CACHE }},mode=max
|
||||
|
||||
- name: Build Model Server Docker image
|
||||
uses: ./.github/actions/custom-build-and-push
|
||||
with:
|
||||
context: ./backend
|
||||
file: ./backend/Dockerfile.model_server
|
||||
platforms: linux/amd64
|
||||
tags: danswer/danswer-model-server:test
|
||||
push: false
|
||||
load: true
|
||||
cache-from: type=s3,prefix=cache/${{ github.repository }}/integration-tests/model-server/,region=${{ env.RUNS_ON_AWS_REGION }},bucket=${{ env.RUNS_ON_S3_BUCKET_CACHE }}
|
||||
cache-to: type=s3,prefix=cache/${{ github.repository }}/integration-tests/model-server/,region=${{ env.RUNS_ON_AWS_REGION }},bucket=${{ env.RUNS_ON_S3_BUCKET_CACHE }},mode=max
|
||||
|
||||
- name: Build integration test Docker image
|
||||
uses: ./.github/actions/custom-build-and-push
|
||||
with:
|
||||
context: ./backend
|
||||
file: ./backend/tests/integration/Dockerfile
|
||||
platforms: linux/amd64
|
||||
tags: danswer/danswer-integration:test
|
||||
push: false
|
||||
load: true
|
||||
cache-from: type=s3,prefix=cache/${{ github.repository }}/integration-tests/integration/,region=${{ env.RUNS_ON_AWS_REGION }},bucket=${{ env.RUNS_ON_S3_BUCKET_CACHE }}
|
||||
cache-to: type=s3,prefix=cache/${{ github.repository }}/integration-tests/integration/,region=${{ env.RUNS_ON_AWS_REGION }},bucket=${{ env.RUNS_ON_S3_BUCKET_CACHE }},mode=max
|
||||
|
||||
# Start containers for multi-tenant tests
|
||||
- name: Start Docker containers for multi-tenant tests
|
||||
run: |
|
||||
cd deployment/docker_compose
|
||||
ENABLE_PAID_ENTERPRISE_EDITION_FEATURES=true \
|
||||
MULTI_TENANT=true \
|
||||
AUTH_TYPE=basic \
|
||||
REQUIRE_EMAIL_VERIFICATION=false \
|
||||
DISABLE_TELEMETRY=true \
|
||||
IMAGE_TAG=test \
|
||||
docker compose -f docker-compose.dev.yml -p danswer-stack up -d
|
||||
id: start_docker_multi_tenant
|
||||
|
||||
# In practice, `cloud` Auth type would require OAUTH credentials to be set.
|
||||
- name: Run Multi-Tenant Integration Tests
|
||||
run: |
|
||||
echo "Running integration tests..."
|
||||
docker run --rm --network danswer-stack_default \
|
||||
--name test-runner \
|
||||
-e POSTGRES_HOST=relational_db \
|
||||
-e POSTGRES_USER=postgres \
|
||||
-e POSTGRES_PASSWORD=password \
|
||||
-e POSTGRES_DB=postgres \
|
||||
-e VESPA_HOST=index \
|
||||
-e REDIS_HOST=cache \
|
||||
-e API_SERVER_HOST=api_server \
|
||||
-e OPENAI_API_KEY=${OPENAI_API_KEY} \
|
||||
-e SLACK_BOT_TOKEN=${SLACK_BOT_TOKEN} \
|
||||
-e TEST_WEB_HOSTNAME=test-runner \
|
||||
-e AUTH_TYPE=cloud \
|
||||
-e MULTI_TENANT=true \
|
||||
danswer/danswer-integration:test \
|
||||
/app/tests/integration/multitenant_tests
|
||||
continue-on-error: true
|
||||
id: run_multitenant_tests
|
||||
|
||||
- name: Check multi-tenant test results
|
||||
run: |
|
||||
if [ ${{ steps.run_tests.outcome }} == 'failure' ]; then
|
||||
echo "Integration tests failed. Exiting with error."
|
||||
exit 1
|
||||
else
|
||||
echo "All integration tests passed successfully."
|
||||
fi
|
||||
|
||||
- name: Stop multi-tenant Docker containers
|
||||
run: |
|
||||
cd deployment/docker_compose
|
||||
docker compose -f docker-compose.dev.yml -p danswer-stack down -v
|
||||
|
||||
|
||||
- name: Start Docker containers
|
||||
run: |
|
||||
cd deployment/docker_compose
|
||||
ENABLE_PAID_ENTERPRISE_EDITION_FEATURES=true \
|
||||
AUTH_TYPE=basic \
|
||||
REQUIRE_EMAIL_VERIFICATION=false \
|
||||
DISABLE_TELEMETRY=true \
|
||||
IMAGE_TAG=test \
|
||||
docker compose -f docker-compose.dev.yml -p danswer-stack up -d
|
||||
id: start_docker
|
||||
|
||||
- name: Wait for service to be ready
|
||||
run: |
|
||||
echo "Starting wait-for-service script..."
|
||||
|
||||
docker logs -f danswer-stack-api_server-1 &
|
||||
|
||||
start_time=$(date +%s)
|
||||
timeout=300 # 5 minutes in seconds
|
||||
|
||||
while true; do
|
||||
current_time=$(date +%s)
|
||||
elapsed_time=$((current_time - start_time))
|
||||
|
||||
if [ $elapsed_time -ge $timeout ]; then
|
||||
echo "Timeout reached. Service did not become ready in 5 minutes."
|
||||
exit 1
|
||||
fi
|
||||
|
||||
# Use curl with error handling to ignore specific exit code 56
|
||||
response=$(curl -s -o /dev/null -w "%{http_code}" http://localhost:8080/health || echo "curl_error")
|
||||
|
||||
if [ "$response" = "200" ]; then
|
||||
echo "Service is ready!"
|
||||
break
|
||||
elif [ "$response" = "curl_error" ]; then
|
||||
echo "Curl encountered an error, possibly exit code 56. Continuing to retry..."
|
||||
else
|
||||
echo "Service not ready yet (HTTP status $response). Retrying in 5 seconds..."
|
||||
fi
|
||||
|
||||
sleep 5
|
||||
done
|
||||
echo "Finished waiting for service."
|
||||
|
||||
- name: Run Standard Integration Tests
|
||||
run: |
|
||||
echo "Running integration tests..."
|
||||
docker run --rm --network danswer-stack_default \
|
||||
--name test-runner \
|
||||
-e POSTGRES_HOST=relational_db \
|
||||
-e POSTGRES_USER=postgres \
|
||||
-e POSTGRES_PASSWORD=password \
|
||||
-e POSTGRES_DB=postgres \
|
||||
-e VESPA_HOST=index \
|
||||
-e REDIS_HOST=cache \
|
||||
-e API_SERVER_HOST=api_server \
|
||||
-e OPENAI_API_KEY=${OPENAI_API_KEY} \
|
||||
-e SLACK_BOT_TOKEN=${SLACK_BOT_TOKEN} \
|
||||
-e CONFLUENCE_TEST_SPACE_URL=${CONFLUENCE_TEST_SPACE_URL} \
|
||||
-e CONFLUENCE_USER_NAME=${CONFLUENCE_USER_NAME} \
|
||||
-e CONFLUENCE_ACCESS_TOKEN=${CONFLUENCE_ACCESS_TOKEN} \
|
||||
-e TEST_WEB_HOSTNAME=test-runner \
|
||||
danswer/danswer-integration:test \
|
||||
/app/tests/integration/tests \
|
||||
/app/tests/integration/connector_job_tests
|
||||
continue-on-error: true
|
||||
id: run_tests
|
||||
|
||||
- name: Check test results
|
||||
run: |
|
||||
if [ ${{ steps.run_tests.outcome }} == 'failure' ]; then
|
||||
echo "Integration tests failed. Exiting with error."
|
||||
exit 1
|
||||
else
|
||||
echo "All integration tests passed successfully."
|
||||
fi
|
||||
|
||||
# save before stopping the containers so the logs can be captured
|
||||
- name: Save Docker logs
|
||||
if: success() || failure()
|
||||
run: |
|
||||
cd deployment/docker_compose
|
||||
docker compose -f docker-compose.dev.yml -p danswer-stack logs > docker-compose.log
|
||||
mv docker-compose.log ${{ github.workspace }}/docker-compose.log
|
||||
|
||||
- name: Stop Docker containers
|
||||
run: |
|
||||
cd deployment/docker_compose
|
||||
docker compose -f docker-compose.dev.yml -p danswer-stack down -v
|
||||
|
||||
- name: Upload logs
|
||||
if: success() || failure()
|
||||
uses: actions/upload-artifact@v4
|
||||
with:
|
||||
name: docker-logs
|
||||
path: ${{ github.workspace }}/docker-compose.log
|
||||
|
||||
- name: Stop Docker containers
|
||||
run: |
|
||||
cd deployment/docker_compose
|
||||
docker compose -f docker-compose.dev.yml -p danswer-stack down -v
|
||||
8
.github/workflows/pr-python-checks.yml
vendored
8
.github/workflows/pr-python-checks.yml
vendored
@@ -3,7 +3,9 @@ name: Python Checks
|
||||
on:
|
||||
merge_group:
|
||||
pull_request:
|
||||
branches: [ main ]
|
||||
branches:
|
||||
- main
|
||||
- 'release/**'
|
||||
|
||||
jobs:
|
||||
mypy-check:
|
||||
@@ -12,10 +14,10 @@ jobs:
|
||||
|
||||
steps:
|
||||
- name: Checkout code
|
||||
uses: actions/checkout@v3
|
||||
uses: actions/checkout@v4
|
||||
|
||||
- name: Set up Python
|
||||
uses: actions/setup-python@v4
|
||||
uses: actions/setup-python@v5
|
||||
with:
|
||||
python-version: '3.11'
|
||||
cache: 'pip'
|
||||
|
||||
13
.github/workflows/pr-python-connector-tests.yml
vendored
13
.github/workflows/pr-python-connector-tests.yml
vendored
@@ -15,6 +15,17 @@ env:
|
||||
CONFLUENCE_TEST_PAGE_ID: ${{ secrets.CONFLUENCE_TEST_PAGE_ID }}
|
||||
CONFLUENCE_USER_NAME: ${{ secrets.CONFLUENCE_USER_NAME }}
|
||||
CONFLUENCE_ACCESS_TOKEN: ${{ secrets.CONFLUENCE_ACCESS_TOKEN }}
|
||||
# Jira
|
||||
JIRA_USER_EMAIL: ${{ secrets.JIRA_USER_EMAIL }}
|
||||
JIRA_API_TOKEN: ${{ secrets.JIRA_API_TOKEN }}
|
||||
# Google
|
||||
GOOGLE_DRIVE_SERVICE_ACCOUNT_JSON_STR: ${{ secrets.GOOGLE_DRIVE_SERVICE_ACCOUNT_JSON_STR }}
|
||||
GOOGLE_DRIVE_OAUTH_CREDENTIALS_JSON_STR_TEST_USER_1: ${{ secrets.GOOGLE_DRIVE_OAUTH_CREDENTIALS_JSON_STR_TEST_USER_1 }}
|
||||
GOOGLE_DRIVE_OAUTH_CREDENTIALS_JSON_STR: ${{ secrets.GOOGLE_DRIVE_OAUTH_CREDENTIALS_JSON_STR }}
|
||||
GOOGLE_GMAIL_SERVICE_ACCOUNT_JSON_STR: ${{ secrets.GOOGLE_GMAIL_SERVICE_ACCOUNT_JSON_STR }}
|
||||
GOOGLE_GMAIL_OAUTH_CREDENTIALS_JSON_STR: ${{ secrets.GOOGLE_GMAIL_OAUTH_CREDENTIALS_JSON_STR }}
|
||||
# Slab
|
||||
SLAB_BOT_TOKEN: ${{ secrets.SLAB_BOT_TOKEN }}
|
||||
|
||||
jobs:
|
||||
connectors-check:
|
||||
@@ -29,7 +40,7 @@ jobs:
|
||||
uses: actions/checkout@v4
|
||||
|
||||
- name: Set up Python
|
||||
uses: actions/setup-python@v4
|
||||
uses: actions/setup-python@v5
|
||||
with:
|
||||
python-version: "3.11"
|
||||
cache: "pip"
|
||||
|
||||
58
.github/workflows/pr-python-model-tests.yml
vendored
Normal file
58
.github/workflows/pr-python-model-tests.yml
vendored
Normal file
@@ -0,0 +1,58 @@
|
||||
name: Connector Tests
|
||||
|
||||
on:
|
||||
schedule:
|
||||
# This cron expression runs the job daily at 16:00 UTC (9am PT)
|
||||
- cron: "0 16 * * *"
|
||||
|
||||
env:
|
||||
# Bedrock
|
||||
AWS_ACCESS_KEY_ID: ${{ secrets.AWS_ACCESS_KEY_ID }}
|
||||
AWS_SECRET_ACCESS_KEY: ${{ secrets.AWS_SECRET_ACCESS_KEY }}
|
||||
AWS_REGION_NAME: ${{ secrets.AWS_REGION_NAME }}
|
||||
|
||||
# OpenAI
|
||||
OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
|
||||
|
||||
jobs:
|
||||
model-check:
|
||||
# See https://runs-on.com/runners/linux/
|
||||
runs-on: [runs-on,runner=8cpu-linux-x64,"run-id=${{ github.run_id }}"]
|
||||
|
||||
env:
|
||||
PYTHONPATH: ./backend
|
||||
|
||||
steps:
|
||||
- name: Checkout code
|
||||
uses: actions/checkout@v4
|
||||
|
||||
- name: Set up Python
|
||||
uses: actions/setup-python@v5
|
||||
with:
|
||||
python-version: "3.11"
|
||||
cache: "pip"
|
||||
cache-dependency-path: |
|
||||
backend/requirements/default.txt
|
||||
backend/requirements/dev.txt
|
||||
|
||||
- name: Install Dependencies
|
||||
run: |
|
||||
python -m pip install --upgrade pip
|
||||
pip install --retries 5 --timeout 30 -r backend/requirements/default.txt
|
||||
pip install --retries 5 --timeout 30 -r backend/requirements/dev.txt
|
||||
|
||||
- name: Run Tests
|
||||
shell: script -q -e -c "bash --noprofile --norc -eo pipefail {0}"
|
||||
run: |
|
||||
py.test -o junit_family=xunit2 -xv --ff backend/tests/daily/llm
|
||||
py.test -o junit_family=xunit2 -xv --ff backend/tests/daily/embedding
|
||||
|
||||
- name: Alert on Failure
|
||||
if: failure() && github.event_name == 'schedule'
|
||||
env:
|
||||
SLACK_WEBHOOK: ${{ secrets.SLACK_WEBHOOK }}
|
||||
run: |
|
||||
curl -X POST \
|
||||
-H 'Content-type: application/json' \
|
||||
--data '{"text":"Scheduled Model Tests failed! Check the run at: https://github.com/${{ github.repository }}/actions/runs/${{ github.run_id }}"}' \
|
||||
$SLACK_WEBHOOK
|
||||
6
.github/workflows/pr-python-tests.yml
vendored
6
.github/workflows/pr-python-tests.yml
vendored
@@ -3,7 +3,9 @@ name: Python Unit Tests
|
||||
on:
|
||||
merge_group:
|
||||
pull_request:
|
||||
branches: [ main ]
|
||||
branches:
|
||||
- main
|
||||
- 'release/**'
|
||||
|
||||
jobs:
|
||||
backend-check:
|
||||
@@ -19,7 +21,7 @@ jobs:
|
||||
uses: actions/checkout@v4
|
||||
|
||||
- name: Set up Python
|
||||
uses: actions/setup-python@v4
|
||||
uses: actions/setup-python@v5
|
||||
with:
|
||||
python-version: '3.11'
|
||||
cache: 'pip'
|
||||
|
||||
4
.github/workflows/pr-quality-checks.yml
vendored
4
.github/workflows/pr-quality-checks.yml
vendored
@@ -1,6 +1,6 @@
|
||||
name: Quality Checks PR
|
||||
concurrency:
|
||||
group: Quality-Checks-PR-${{ github.head_ref }}
|
||||
group: Quality-Checks-PR-${{ github.workflow }}-${{ github.head_ref || github.event.workflow_run.head_branch || github.run_id }}
|
||||
cancel-in-progress: true
|
||||
|
||||
on:
|
||||
@@ -18,6 +18,6 @@ jobs:
|
||||
- uses: actions/setup-python@v5
|
||||
with:
|
||||
python-version: "3.11"
|
||||
- uses: pre-commit/action@v3.0.0
|
||||
- uses: pre-commit/action@v3.0.1
|
||||
with:
|
||||
extra_args: ${{ github.event_name == 'pull_request' && format('--from-ref {0} --to-ref {1}', github.event.pull_request.base.sha, github.event.pull_request.head.sha) || '' }}
|
||||
|
||||
162
.github/workflows/run-it.yml
vendored
162
.github/workflows/run-it.yml
vendored
@@ -1,162 +0,0 @@
|
||||
name: Run Integration Tests
|
||||
concurrency:
|
||||
group: Run-Integration-Tests-${{ github.head_ref }}
|
||||
cancel-in-progress: true
|
||||
|
||||
on:
|
||||
merge_group:
|
||||
pull_request:
|
||||
branches: [ main ]
|
||||
|
||||
env:
|
||||
OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
|
||||
|
||||
jobs:
|
||||
integration-tests:
|
||||
# See https://runs-on.com/runners/linux/
|
||||
runs-on: [runs-on,runner=8cpu-linux-x64,"run-id=${{ github.run_id }}"]
|
||||
steps:
|
||||
- name: Checkout code
|
||||
uses: actions/checkout@v4
|
||||
|
||||
- name: Set up Docker Buildx
|
||||
uses: docker/setup-buildx-action@v3
|
||||
|
||||
- name: Login to Docker Hub
|
||||
uses: docker/login-action@v3
|
||||
with:
|
||||
username: ${{ secrets.DOCKER_USERNAME }}
|
||||
password: ${{ secrets.DOCKER_TOKEN }}
|
||||
|
||||
# NOTE: we don't need to build the Web Docker image since it's not used
|
||||
# during the IT for now. We have a separate action to verify it builds
|
||||
# succesfully
|
||||
- name: Pull Web Docker image
|
||||
run: |
|
||||
docker pull danswer/danswer-web-server:latest
|
||||
docker tag danswer/danswer-web-server:latest danswer/danswer-web-server:it
|
||||
|
||||
- name: Build Backend Docker image
|
||||
uses: ./.github/actions/custom-build-and-push
|
||||
with:
|
||||
context: ./backend
|
||||
file: ./backend/Dockerfile
|
||||
platforms: linux/amd64
|
||||
tags: danswer/danswer-backend:it
|
||||
cache-from: type=registry,ref=danswer/danswer-backend:it
|
||||
cache-to: |
|
||||
type=registry,ref=danswer/danswer-backend:it,mode=max
|
||||
type=inline
|
||||
|
||||
- name: Build Model Server Docker image
|
||||
uses: ./.github/actions/custom-build-and-push
|
||||
with:
|
||||
context: ./backend
|
||||
file: ./backend/Dockerfile.model_server
|
||||
platforms: linux/amd64
|
||||
tags: danswer/danswer-model-server:it
|
||||
cache-from: type=registry,ref=danswer/danswer-model-server:it
|
||||
cache-to: |
|
||||
type=registry,ref=danswer/danswer-model-server:it,mode=max
|
||||
type=inline
|
||||
|
||||
- name: Build integration test Docker image
|
||||
uses: ./.github/actions/custom-build-and-push
|
||||
with:
|
||||
context: ./backend
|
||||
file: ./backend/tests/integration/Dockerfile
|
||||
platforms: linux/amd64
|
||||
tags: danswer/integration-test-runner:it
|
||||
cache-from: type=registry,ref=danswer/integration-test-runner:it
|
||||
cache-to: |
|
||||
type=registry,ref=danswer/integration-test-runner:it,mode=max
|
||||
type=inline
|
||||
|
||||
- name: Start Docker containers
|
||||
run: |
|
||||
cd deployment/docker_compose
|
||||
ENABLE_PAID_ENTERPRISE_EDITION_FEATURES=true \
|
||||
AUTH_TYPE=basic \
|
||||
REQUIRE_EMAIL_VERIFICATION=false \
|
||||
DISABLE_TELEMETRY=true \
|
||||
IMAGE_TAG=it \
|
||||
docker compose -f docker-compose.dev.yml -p danswer-stack up -d
|
||||
id: start_docker
|
||||
|
||||
- name: Wait for service to be ready
|
||||
run: |
|
||||
echo "Starting wait-for-service script..."
|
||||
|
||||
docker logs -f danswer-stack-api_server-1 &
|
||||
|
||||
start_time=$(date +%s)
|
||||
timeout=300 # 5 minutes in seconds
|
||||
|
||||
while true; do
|
||||
current_time=$(date +%s)
|
||||
elapsed_time=$((current_time - start_time))
|
||||
|
||||
if [ $elapsed_time -ge $timeout ]; then
|
||||
echo "Timeout reached. Service did not become ready in 5 minutes."
|
||||
exit 1
|
||||
fi
|
||||
|
||||
# Use curl with error handling to ignore specific exit code 56
|
||||
response=$(curl -s -o /dev/null -w "%{http_code}" http://localhost:8080/health || echo "curl_error")
|
||||
|
||||
if [ "$response" = "200" ]; then
|
||||
echo "Service is ready!"
|
||||
break
|
||||
elif [ "$response" = "curl_error" ]; then
|
||||
echo "Curl encountered an error, possibly exit code 56. Continuing to retry..."
|
||||
else
|
||||
echo "Service not ready yet (HTTP status $response). Retrying in 5 seconds..."
|
||||
fi
|
||||
|
||||
sleep 5
|
||||
done
|
||||
echo "Finished waiting for service."
|
||||
|
||||
- name: Run integration tests
|
||||
run: |
|
||||
echo "Running integration tests..."
|
||||
docker run --rm --network danswer-stack_default \
|
||||
-e POSTGRES_HOST=relational_db \
|
||||
-e POSTGRES_USER=postgres \
|
||||
-e POSTGRES_PASSWORD=password \
|
||||
-e POSTGRES_DB=postgres \
|
||||
-e VESPA_HOST=index \
|
||||
-e REDIS_HOST=cache \
|
||||
-e API_SERVER_HOST=api_server \
|
||||
-e OPENAI_API_KEY=${OPENAI_API_KEY} \
|
||||
danswer/integration-test-runner:it
|
||||
continue-on-error: true
|
||||
id: run_tests
|
||||
|
||||
- name: Check test results
|
||||
run: |
|
||||
if [ ${{ steps.run_tests.outcome }} == 'failure' ]; then
|
||||
echo "Integration tests failed. Exiting with error."
|
||||
exit 1
|
||||
else
|
||||
echo "All integration tests passed successfully."
|
||||
fi
|
||||
|
||||
- name: Save Docker logs
|
||||
if: success() || failure()
|
||||
run: |
|
||||
cd deployment/docker_compose
|
||||
docker compose -f docker-compose.dev.yml -p danswer-stack logs > docker-compose.log
|
||||
mv docker-compose.log ${{ github.workspace }}/docker-compose.log
|
||||
|
||||
- name: Upload logs
|
||||
if: success() || failure()
|
||||
uses: actions/upload-artifact@v3
|
||||
with:
|
||||
name: docker-logs
|
||||
path: ${{ github.workspace }}/docker-compose.log
|
||||
|
||||
- name: Stop Docker containers
|
||||
run: |
|
||||
cd deployment/docker_compose
|
||||
docker compose -f docker-compose.dev.yml -p danswer-stack down -v
|
||||
54
.github/workflows/tag-nightly.yml
vendored
Normal file
54
.github/workflows/tag-nightly.yml
vendored
Normal file
@@ -0,0 +1,54 @@
|
||||
name: Nightly Tag Push
|
||||
|
||||
on:
|
||||
schedule:
|
||||
- cron: '0 10 * * *' # Runs every day at 2 AM PST / 3 AM PDT / 10 AM UTC
|
||||
|
||||
permissions:
|
||||
contents: write # Allows pushing tags to the repository
|
||||
|
||||
jobs:
|
||||
create-and-push-tag:
|
||||
runs-on: [runs-on,runner=2cpu-linux-x64,"run-id=${{ github.run_id }}"]
|
||||
|
||||
steps:
|
||||
# actions using GITHUB_TOKEN cannot trigger another workflow, but we do want this to trigger docker pushes
|
||||
# see https://github.com/orgs/community/discussions/27028#discussioncomment-3254367 for the workaround we
|
||||
# implement here which needs an actual user's deploy key
|
||||
- name: Checkout code
|
||||
uses: actions/checkout@v4
|
||||
with:
|
||||
ssh-key: "${{ secrets.RKUO_DEPLOY_KEY }}"
|
||||
|
||||
- name: Set up Git user
|
||||
run: |
|
||||
git config user.name "Richard Kuo [bot]"
|
||||
git config user.email "rkuo[bot]@danswer.ai"
|
||||
|
||||
- name: Check for existing nightly tag
|
||||
id: check_tag
|
||||
run: |
|
||||
if git tag --points-at HEAD --list "nightly-latest*" | grep -q .; then
|
||||
echo "A tag starting with 'nightly-latest' already exists on HEAD."
|
||||
echo "tag_exists=true" >> $GITHUB_OUTPUT
|
||||
else
|
||||
echo "No tag starting with 'nightly-latest' exists on HEAD."
|
||||
echo "tag_exists=false" >> $GITHUB_OUTPUT
|
||||
fi
|
||||
|
||||
# don't tag again if HEAD already has a nightly-latest tag on it
|
||||
- name: Create Nightly Tag
|
||||
if: steps.check_tag.outputs.tag_exists == 'false'
|
||||
env:
|
||||
DATE: ${{ github.run_id }}
|
||||
run: |
|
||||
TAG_NAME="nightly-latest-$(date +'%Y%m%d')"
|
||||
echo "Creating tag: $TAG_NAME"
|
||||
git tag $TAG_NAME
|
||||
|
||||
- name: Push Tag
|
||||
if: steps.check_tag.outputs.tag_exists == 'false'
|
||||
run: |
|
||||
TAG_NAME="nightly-latest-$(date +'%Y%m%d')"
|
||||
git push origin $TAG_NAME
|
||||
|
||||
1
.gitignore
vendored
1
.gitignore
vendored
@@ -7,3 +7,4 @@
|
||||
.vscode/
|
||||
*.sw?
|
||||
/backend/tests/regression/answer_quality/search_test_config.yaml
|
||||
/web/test-results/
|
||||
1
.prettierignore
Normal file
1
.prettierignore
Normal file
@@ -0,0 +1 @@
|
||||
backend/tests/integration/tests/pruning/website
|
||||
300
.vscode/launch.template.jsonc
vendored
300
.vscode/launch.template.jsonc
vendored
@@ -6,19 +6,69 @@
|
||||
// For more information, visit: https://go.microsoft.com/fwlink/?linkid=830387
|
||||
"version": "0.2.0",
|
||||
"compounds": [
|
||||
{
|
||||
// Dummy entry used to label the group
|
||||
"name": "--- Compound ---",
|
||||
"configurations": [
|
||||
"--- Individual ---"
|
||||
],
|
||||
"presentation": {
|
||||
"group": "1",
|
||||
}
|
||||
},
|
||||
{
|
||||
"name": "Run All Danswer Services",
|
||||
"configurations": [
|
||||
"Web Server",
|
||||
"Model Server",
|
||||
"API Server",
|
||||
"Indexing",
|
||||
"Background Jobs",
|
||||
"Slack Bot"
|
||||
]
|
||||
}
|
||||
"Slack Bot",
|
||||
"Celery primary",
|
||||
"Celery light",
|
||||
"Celery heavy",
|
||||
"Celery indexing",
|
||||
"Celery beat",
|
||||
],
|
||||
"presentation": {
|
||||
"group": "1",
|
||||
}
|
||||
},
|
||||
{
|
||||
"name": "Web / Model / API",
|
||||
"configurations": [
|
||||
"Web Server",
|
||||
"Model Server",
|
||||
"API Server",
|
||||
],
|
||||
"presentation": {
|
||||
"group": "1",
|
||||
}
|
||||
},
|
||||
{
|
||||
"name": "Celery (all)",
|
||||
"configurations": [
|
||||
"Celery primary",
|
||||
"Celery light",
|
||||
"Celery heavy",
|
||||
"Celery indexing",
|
||||
"Celery beat"
|
||||
],
|
||||
"presentation": {
|
||||
"group": "1",
|
||||
}
|
||||
}
|
||||
],
|
||||
"configurations": [
|
||||
{
|
||||
// Dummy entry used to label the group
|
||||
"name": "--- Individual ---",
|
||||
"type": "node",
|
||||
"request": "launch",
|
||||
"presentation": {
|
||||
"group": "2",
|
||||
"order": 0
|
||||
}
|
||||
},
|
||||
{
|
||||
"name": "Web Server",
|
||||
"type": "node",
|
||||
@@ -29,7 +79,11 @@
|
||||
"runtimeArgs": [
|
||||
"run", "dev"
|
||||
],
|
||||
"console": "integratedTerminal"
|
||||
"presentation": {
|
||||
"group": "2",
|
||||
},
|
||||
"console": "integratedTerminal",
|
||||
"consoleTitle": "Web Server Console"
|
||||
},
|
||||
{
|
||||
"name": "Model Server",
|
||||
@@ -48,7 +102,11 @@
|
||||
"--reload",
|
||||
"--port",
|
||||
"9000"
|
||||
]
|
||||
],
|
||||
"presentation": {
|
||||
"group": "2",
|
||||
},
|
||||
"consoleTitle": "Model Server Console"
|
||||
},
|
||||
{
|
||||
"name": "API Server",
|
||||
@@ -68,43 +126,13 @@
|
||||
"--reload",
|
||||
"--port",
|
||||
"8080"
|
||||
]
|
||||
],
|
||||
"presentation": {
|
||||
"group": "2",
|
||||
},
|
||||
"consoleTitle": "API Server Console"
|
||||
},
|
||||
{
|
||||
"name": "Indexing",
|
||||
"consoleName": "Indexing",
|
||||
"type": "debugpy",
|
||||
"request": "launch",
|
||||
"program": "danswer/background/update.py",
|
||||
"cwd": "${workspaceFolder}/backend",
|
||||
"envFile": "${workspaceFolder}/.vscode/.env",
|
||||
"env": {
|
||||
"ENABLE_MULTIPASS_INDEXING": "false",
|
||||
"LOG_LEVEL": "DEBUG",
|
||||
"PYTHONUNBUFFERED": "1",
|
||||
"PYTHONPATH": "."
|
||||
}
|
||||
},
|
||||
// Celery and all async jobs, usually would include indexing as well but this is handled separately above for dev
|
||||
{
|
||||
"name": "Background Jobs",
|
||||
"consoleName": "Background Jobs",
|
||||
"type": "debugpy",
|
||||
"request": "launch",
|
||||
"program": "scripts/dev_run_background_jobs.py",
|
||||
"cwd": "${workspaceFolder}/backend",
|
||||
"envFile": "${workspaceFolder}/.vscode/.env",
|
||||
"env": {
|
||||
"LOG_DANSWER_MODEL_INTERACTIONS": "True",
|
||||
"LOG_LEVEL": "DEBUG",
|
||||
"PYTHONUNBUFFERED": "1",
|
||||
"PYTHONPATH": "."
|
||||
},
|
||||
"args": [
|
||||
"--no-indexing"
|
||||
]
|
||||
},
|
||||
// For the listner to access the Slack API,
|
||||
// For the listener to access the Slack API,
|
||||
// DANSWER_BOT_SLACK_APP_TOKEN & DANSWER_BOT_SLACK_BOT_TOKEN need to be set in .env file located in the root of the project
|
||||
{
|
||||
"name": "Slack Bot",
|
||||
@@ -118,7 +146,151 @@
|
||||
"LOG_LEVEL": "DEBUG",
|
||||
"PYTHONUNBUFFERED": "1",
|
||||
"PYTHONPATH": "."
|
||||
}
|
||||
},
|
||||
"presentation": {
|
||||
"group": "2",
|
||||
},
|
||||
"consoleTitle": "Slack Bot Console"
|
||||
},
|
||||
{
|
||||
"name": "Celery primary",
|
||||
"type": "debugpy",
|
||||
"request": "launch",
|
||||
"module": "celery",
|
||||
"cwd": "${workspaceFolder}/backend",
|
||||
"envFile": "${workspaceFolder}/.vscode/.env",
|
||||
"env": {
|
||||
"LOG_LEVEL": "INFO",
|
||||
"PYTHONUNBUFFERED": "1",
|
||||
"PYTHONPATH": "."
|
||||
},
|
||||
"args": [
|
||||
"-A",
|
||||
"danswer.background.celery.versioned_apps.primary",
|
||||
"worker",
|
||||
"--pool=threads",
|
||||
"--concurrency=4",
|
||||
"--prefetch-multiplier=1",
|
||||
"--loglevel=INFO",
|
||||
"--hostname=primary@%n",
|
||||
"-Q",
|
||||
"celery",
|
||||
],
|
||||
"presentation": {
|
||||
"group": "2",
|
||||
},
|
||||
"consoleTitle": "Celery primary Console"
|
||||
},
|
||||
{
|
||||
"name": "Celery light",
|
||||
"type": "debugpy",
|
||||
"request": "launch",
|
||||
"module": "celery",
|
||||
"cwd": "${workspaceFolder}/backend",
|
||||
"envFile": "${workspaceFolder}/.vscode/.env",
|
||||
"env": {
|
||||
"LOG_LEVEL": "INFO",
|
||||
"PYTHONUNBUFFERED": "1",
|
||||
"PYTHONPATH": "."
|
||||
},
|
||||
"args": [
|
||||
"-A",
|
||||
"danswer.background.celery.versioned_apps.light",
|
||||
"worker",
|
||||
"--pool=threads",
|
||||
"--concurrency=64",
|
||||
"--prefetch-multiplier=8",
|
||||
"--loglevel=INFO",
|
||||
"--hostname=light@%n",
|
||||
"-Q",
|
||||
"vespa_metadata_sync,connector_deletion,doc_permissions_upsert",
|
||||
],
|
||||
"presentation": {
|
||||
"group": "2",
|
||||
},
|
||||
"consoleTitle": "Celery light Console"
|
||||
},
|
||||
{
|
||||
"name": "Celery heavy",
|
||||
"type": "debugpy",
|
||||
"request": "launch",
|
||||
"module": "celery",
|
||||
"cwd": "${workspaceFolder}/backend",
|
||||
"envFile": "${workspaceFolder}/.vscode/.env",
|
||||
"env": {
|
||||
"LOG_LEVEL": "INFO",
|
||||
"PYTHONUNBUFFERED": "1",
|
||||
"PYTHONPATH": "."
|
||||
},
|
||||
"args": [
|
||||
"-A",
|
||||
"danswer.background.celery.versioned_apps.heavy",
|
||||
"worker",
|
||||
"--pool=threads",
|
||||
"--concurrency=4",
|
||||
"--prefetch-multiplier=1",
|
||||
"--loglevel=INFO",
|
||||
"--hostname=heavy@%n",
|
||||
"-Q",
|
||||
"connector_pruning,connector_doc_permissions_sync,connector_external_group_sync",
|
||||
],
|
||||
"presentation": {
|
||||
"group": "2",
|
||||
},
|
||||
"consoleTitle": "Celery heavy Console"
|
||||
},
|
||||
{
|
||||
"name": "Celery indexing",
|
||||
"type": "debugpy",
|
||||
"request": "launch",
|
||||
"module": "celery",
|
||||
"cwd": "${workspaceFolder}/backend",
|
||||
"envFile": "${workspaceFolder}/.vscode/.env",
|
||||
"env": {
|
||||
"ENABLE_MULTIPASS_INDEXING": "false",
|
||||
"LOG_LEVEL": "DEBUG",
|
||||
"PYTHONUNBUFFERED": "1",
|
||||
"PYTHONPATH": "."
|
||||
},
|
||||
"args": [
|
||||
"-A",
|
||||
"danswer.background.celery.versioned_apps.indexing",
|
||||
"worker",
|
||||
"--pool=threads",
|
||||
"--concurrency=1",
|
||||
"--prefetch-multiplier=1",
|
||||
"--loglevel=INFO",
|
||||
"--hostname=indexing@%n",
|
||||
"-Q",
|
||||
"connector_indexing",
|
||||
],
|
||||
"presentation": {
|
||||
"group": "2",
|
||||
},
|
||||
"consoleTitle": "Celery indexing Console"
|
||||
},
|
||||
{
|
||||
"name": "Celery beat",
|
||||
"type": "debugpy",
|
||||
"request": "launch",
|
||||
"module": "celery",
|
||||
"cwd": "${workspaceFolder}/backend",
|
||||
"envFile": "${workspaceFolder}/.vscode/.env",
|
||||
"env": {
|
||||
"LOG_LEVEL": "DEBUG",
|
||||
"PYTHONUNBUFFERED": "1",
|
||||
"PYTHONPATH": "."
|
||||
},
|
||||
"args": [
|
||||
"-A",
|
||||
"danswer.background.celery.versioned_apps.beat",
|
||||
"beat",
|
||||
"--loglevel=INFO",
|
||||
],
|
||||
"presentation": {
|
||||
"group": "2",
|
||||
},
|
||||
"consoleTitle": "Celery beat Console"
|
||||
},
|
||||
{
|
||||
"name": "Pytest",
|
||||
@@ -137,8 +309,22 @@
|
||||
"-v"
|
||||
// Specify a sepcific module/test to run or provide nothing to run all tests
|
||||
//"tests/unit/danswer/llm/answering/test_prune_and_merge.py"
|
||||
]
|
||||
],
|
||||
"presentation": {
|
||||
"group": "2",
|
||||
},
|
||||
"consoleTitle": "Pytest Console"
|
||||
},
|
||||
{
|
||||
// Dummy entry used to label the group
|
||||
"name": "--- Tasks ---",
|
||||
"type": "node",
|
||||
"request": "launch",
|
||||
"presentation": {
|
||||
"group": "3",
|
||||
"order": 0
|
||||
}
|
||||
},
|
||||
{
|
||||
"name": "Clear and Restart External Volumes and Containers",
|
||||
"type": "node",
|
||||
@@ -147,7 +333,27 @@
|
||||
"runtimeArgs": ["${workspaceFolder}/backend/scripts/restart_containers.sh"],
|
||||
"cwd": "${workspaceFolder}",
|
||||
"console": "integratedTerminal",
|
||||
"stopOnEntry": true
|
||||
}
|
||||
"stopOnEntry": true,
|
||||
"presentation": {
|
||||
"group": "3",
|
||||
},
|
||||
},
|
||||
{
|
||||
// Celery jobs launched through a single background script (legacy)
|
||||
// Recommend using the "Celery (all)" compound launch instead.
|
||||
"name": "Background Jobs",
|
||||
"consoleName": "Background Jobs",
|
||||
"type": "debugpy",
|
||||
"request": "launch",
|
||||
"program": "scripts/dev_run_background_jobs.py",
|
||||
"cwd": "${workspaceFolder}/backend",
|
||||
"envFile": "${workspaceFolder}/.vscode/.env",
|
||||
"env": {
|
||||
"LOG_DANSWER_MODEL_INTERACTIONS": "True",
|
||||
"LOG_LEVEL": "DEBUG",
|
||||
"PYTHONUNBUFFERED": "1",
|
||||
"PYTHONPATH": "."
|
||||
},
|
||||
},
|
||||
]
|
||||
}
|
||||
|
||||
@@ -22,7 +22,7 @@ Your input is vital to making sure that Danswer moves in the right direction.
|
||||
Before starting on implementation, please raise a GitHub issue.
|
||||
|
||||
And always feel free to message us (Chris Weaver / Yuhong Sun) on
|
||||
[Slack](https://join.slack.com/t/danswer/shared_invite/zt-2afut44lv-Rw3kSWu6_OmdAXRpCv80DQ) /
|
||||
[Slack](https://join.slack.com/t/danswer/shared_invite/zt-2lcmqw703-071hBuZBfNEOGUsLa5PXvQ) /
|
||||
[Discord](https://discord.gg/TDJ59cGV2X) directly about anything at all.
|
||||
|
||||
|
||||
@@ -32,7 +32,7 @@ To contribute to this project, please follow the
|
||||
When opening a pull request, mention related issues and feel free to tag relevant maintainers.
|
||||
|
||||
Before creating a pull request please make sure that the new changes conform to the formatting and linting requirements.
|
||||
See the [Formatting and Linting](#-formatting-and-linting) section for how to run these checks locally.
|
||||
See the [Formatting and Linting](#formatting-and-linting) section for how to run these checks locally.
|
||||
|
||||
|
||||
### Getting Help 🙋
|
||||
|
||||
23
README.md
23
README.md
@@ -1,4 +1,5 @@
|
||||
<!-- DANSWER_METADATA={"link": "https://github.com/danswer-ai/danswer/blob/main/README.md"} -->
|
||||
<a name="readme-top"></a>
|
||||
|
||||
<h2 align="center">
|
||||
<a href="https://www.danswer.ai/"> <img width="50%" src="https://github.com/danswer-owners/danswer/blob/1fabd9372d66cd54238847197c33f091a724803b/DanswerWithName.png?raw=true)" /></a>
|
||||
@@ -11,7 +12,7 @@
|
||||
<a href="https://docs.danswer.dev/" target="_blank">
|
||||
<img src="https://img.shields.io/badge/docs-view-blue" alt="Documentation">
|
||||
</a>
|
||||
<a href="https://join.slack.com/t/danswer/shared_invite/zt-2lcmqw703-071hBuZBfNEOGUsLa5PXvQ" target="_blank">
|
||||
<a href="https://join.slack.com/t/danswer/shared_invite/zt-2twesxdr6-5iQitKZQpgq~hYIZ~dv3KA" target="_blank">
|
||||
<img src="https://img.shields.io/badge/slack-join-blue.svg?logo=slack" alt="Slack">
|
||||
</a>
|
||||
<a href="https://discord.gg/TDJ59cGV2X" target="_blank">
|
||||
@@ -68,13 +69,13 @@ We also have built-in support for deployment on Kubernetes. Files for that can b
|
||||
|
||||
## 🚧 Roadmap
|
||||
* Chat/Prompt sharing with specific teammates and user groups.
|
||||
* Multi-Model model support, chat with images, video etc.
|
||||
* Multimodal model support, chat with images, video etc.
|
||||
* Choosing between LLMs and parameters during chat session.
|
||||
* Tool calling and agent configurations options.
|
||||
* Organizational understanding and ability to locate and suggest experts from your team.
|
||||
|
||||
|
||||
## Other Noteable Benefits of Danswer
|
||||
## Other Notable Benefits of Danswer
|
||||
* User Authentication with document level access management.
|
||||
* Best in class Hybrid Search across all sources (BM-25 + prefix aware embedding models).
|
||||
* Admin Dashboard to configure connectors, document-sets, access, etc.
|
||||
@@ -127,3 +128,19 @@ To try the Danswer Enterprise Edition:
|
||||
|
||||
## 💡 Contributing
|
||||
Looking to contribute? Please check out the [Contribution Guide](CONTRIBUTING.md) for more details.
|
||||
|
||||
## ⭐Star History
|
||||
|
||||
[](https://star-history.com/#danswer-ai/danswer&Date)
|
||||
|
||||
## ✨Contributors
|
||||
|
||||
<a href="https://github.com/danswer-ai/danswer/graphs/contributors">
|
||||
<img alt="contributors" src="https://contrib.rocks/image?repo=danswer-ai/danswer"/>
|
||||
</a>
|
||||
|
||||
<p align="right" style="font-size: 14px; color: #555; margin-top: 20px;">
|
||||
<a href="#readme-top" style="text-decoration: none; color: #007bff; font-weight: bold;">
|
||||
↑ Back to Top ↑
|
||||
</a>
|
||||
</p>
|
||||
|
||||
@@ -8,10 +8,11 @@ Edition features outside of personal development or testing purposes. Please rea
|
||||
founders@danswer.ai for more information. Please visit https://github.com/danswer-ai/danswer"
|
||||
|
||||
# Default DANSWER_VERSION, typically overriden during builds by GitHub Actions.
|
||||
ARG DANSWER_VERSION=0.3-dev
|
||||
ARG DANSWER_VERSION=0.8-dev
|
||||
ENV DANSWER_VERSION=${DANSWER_VERSION} \
|
||||
DANSWER_RUNNING_IN_DOCKER="true"
|
||||
|
||||
|
||||
RUN echo "DANSWER_VERSION: ${DANSWER_VERSION}"
|
||||
# Install system dependencies
|
||||
# cmake needed for psycopg (postgres)
|
||||
@@ -36,6 +37,8 @@ RUN apt-get update && \
|
||||
rm -rf /var/lib/apt/lists/* && \
|
||||
apt-get clean
|
||||
|
||||
|
||||
|
||||
# Install Python dependencies
|
||||
# Remove py which is pulled in by retry, py is not needed and is a CVE
|
||||
COPY ./requirements/default.txt /tmp/requirements.txt
|
||||
@@ -70,11 +73,11 @@ RUN apt-get update && \
|
||||
rm -rf /var/lib/apt/lists/* && \
|
||||
rm -f /usr/local/lib/python3.11/site-packages/tornado/test/test.key
|
||||
|
||||
|
||||
# Pre-downloading models for setups with limited egress
|
||||
RUN python -c "from tokenizers import Tokenizer; \
|
||||
Tokenizer.from_pretrained('nomic-ai/nomic-embed-text-v1')"
|
||||
|
||||
|
||||
# Pre-downloading NLTK for setups with limited egress
|
||||
RUN python -c "import nltk; \
|
||||
nltk.download('stopwords', quiet=True); \
|
||||
@@ -92,6 +95,7 @@ COPY supervisord.conf /etc/supervisor/conf.d/supervisord.conf
|
||||
COPY ./danswer /app/danswer
|
||||
COPY ./shared_configs /app/shared_configs
|
||||
COPY ./alembic /app/alembic
|
||||
COPY ./alembic_tenants /app/alembic_tenants
|
||||
COPY ./alembic.ini /app/alembic.ini
|
||||
COPY supervisord.conf /usr/etc/supervisord.conf
|
||||
|
||||
@@ -101,7 +105,7 @@ COPY ./scripts/force_delete_connector_by_id.py /app/scripts/force_delete_connect
|
||||
# Put logo in assets
|
||||
COPY ./assets /app/assets
|
||||
|
||||
ENV PYTHONPATH /app
|
||||
ENV PYTHONPATH=/app
|
||||
|
||||
# Default command which does nothing
|
||||
# This container is used by api server and background which specify their own CMD
|
||||
|
||||
@@ -7,7 +7,7 @@ You can find it at https://hub.docker.com/r/danswer/danswer-model-server. For mo
|
||||
visit https://github.com/danswer-ai/danswer."
|
||||
|
||||
# Default DANSWER_VERSION, typically overriden during builds by GitHub Actions.
|
||||
ARG DANSWER_VERSION=0.3-dev
|
||||
ARG DANSWER_VERSION=0.8-dev
|
||||
ENV DANSWER_VERSION=${DANSWER_VERSION} \
|
||||
DANSWER_RUNNING_IN_DOCKER="true"
|
||||
|
||||
@@ -55,6 +55,6 @@ COPY ./shared_configs /app/shared_configs
|
||||
# Model Server main code
|
||||
COPY ./model_server /app/model_server
|
||||
|
||||
ENV PYTHONPATH /app
|
||||
ENV PYTHONPATH=/app
|
||||
|
||||
CMD ["uvicorn", "model_server.main:app", "--host", "0.0.0.0", "--port", "9000"]
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
# A generic, single database configuration.
|
||||
|
||||
[alembic]
|
||||
[DEFAULT]
|
||||
# path to migration scripts
|
||||
script_location = alembic
|
||||
|
||||
@@ -47,7 +47,8 @@ prepend_sys_path = .
|
||||
# version_path_separator = :
|
||||
# version_path_separator = ;
|
||||
# version_path_separator = space
|
||||
version_path_separator = os # Use os.pathsep. Default configuration used for new projects.
|
||||
version_path_separator = os
|
||||
# Use os.pathsep. Default configuration used for new projects.
|
||||
|
||||
# set to 'true' to search source files recursively
|
||||
# in each "version_locations" directory
|
||||
@@ -106,3 +107,12 @@ formatter = generic
|
||||
[formatter_generic]
|
||||
format = %(levelname)-5.5s [%(name)s] %(message)s
|
||||
datefmt = %H:%M:%S
|
||||
|
||||
|
||||
[alembic]
|
||||
script_location = alembic
|
||||
version_locations = %(script_location)s/versions
|
||||
|
||||
[schema_private]
|
||||
script_location = alembic_tenants
|
||||
version_locations = %(script_location)s/versions
|
||||
|
||||
@@ -1,107 +1,215 @@
|
||||
from sqlalchemy.engine.base import Connection
|
||||
from typing import Literal
|
||||
import asyncio
|
||||
from logging.config import fileConfig
|
||||
import logging
|
||||
|
||||
from alembic import context
|
||||
from sqlalchemy import pool
|
||||
from sqlalchemy.ext.asyncio import create_async_engine
|
||||
from sqlalchemy.sql import text
|
||||
from sqlalchemy.sql.schema import SchemaItem
|
||||
|
||||
from shared_configs.configs import MULTI_TENANT
|
||||
from danswer.db.engine import build_connection_string
|
||||
from danswer.db.models import Base
|
||||
from sqlalchemy import pool
|
||||
from sqlalchemy.engine import Connection
|
||||
from sqlalchemy.ext.asyncio import create_async_engine
|
||||
from celery.backends.database.session import ResultModelBase # type: ignore
|
||||
from sqlalchemy.schema import SchemaItem
|
||||
from danswer.db.engine import get_all_tenant_ids
|
||||
from shared_configs.configs import POSTGRES_DEFAULT_SCHEMA
|
||||
|
||||
# this is the Alembic Config object, which provides
|
||||
# access to the values within the .ini file in use.
|
||||
# Alembic Config object
|
||||
config = context.config
|
||||
|
||||
# Interpret the config file for Python logging.
|
||||
# This line sets up loggers basically.
|
||||
if config.config_file_name is not None and config.attributes.get(
|
||||
"configure_logger", True
|
||||
):
|
||||
fileConfig(config.config_file_name)
|
||||
|
||||
# add your model's MetaData object here
|
||||
# for 'autogenerate' support
|
||||
# from myapp import mymodel
|
||||
# target_metadata = mymodel.Base.metadata
|
||||
# Add your model's MetaData object here for 'autogenerate' support
|
||||
target_metadata = [Base.metadata, ResultModelBase.metadata]
|
||||
|
||||
# other values from the config, defined by the needs of env.py,
|
||||
# can be acquired:
|
||||
# my_important_option = config.get_main_option("my_important_option")
|
||||
# ... etc.
|
||||
|
||||
EXCLUDE_TABLES = {"kombu_queue", "kombu_message"}
|
||||
|
||||
# Set up logging
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
def include_object(
|
||||
object: SchemaItem,
|
||||
name: str,
|
||||
type_: str,
|
||||
name: str | None,
|
||||
type_: Literal[
|
||||
"schema",
|
||||
"table",
|
||||
"column",
|
||||
"index",
|
||||
"unique_constraint",
|
||||
"foreign_key_constraint",
|
||||
],
|
||||
reflected: bool,
|
||||
compare_to: SchemaItem | None,
|
||||
) -> bool:
|
||||
"""
|
||||
Determines whether a database object should be included in migrations.
|
||||
Excludes specified tables from migrations.
|
||||
"""
|
||||
if type_ == "table" and name in EXCLUDE_TABLES:
|
||||
return False
|
||||
return True
|
||||
|
||||
|
||||
def run_migrations_offline() -> None:
|
||||
"""Run migrations in 'offline' mode.
|
||||
|
||||
This configures the context with just a URL
|
||||
and not an Engine, though an Engine is acceptable
|
||||
here as well. By skipping the Engine creation
|
||||
we don't even need a DBAPI to be available.
|
||||
|
||||
Calls to context.execute() here emit the given string to the
|
||||
script output.
|
||||
|
||||
def get_schema_options() -> tuple[str, bool, bool]:
|
||||
"""
|
||||
url = build_connection_string()
|
||||
context.configure(
|
||||
url=url,
|
||||
target_metadata=target_metadata, # type: ignore
|
||||
literal_binds=True,
|
||||
dialect_opts={"paramstyle": "named"},
|
||||
)
|
||||
Parses command-line options passed via '-x' in Alembic commands.
|
||||
Recognizes 'schema', 'create_schema', and 'upgrade_all_tenants' options.
|
||||
"""
|
||||
x_args_raw = context.get_x_argument()
|
||||
x_args = {}
|
||||
for arg in x_args_raw:
|
||||
for pair in arg.split(","):
|
||||
if "=" in pair:
|
||||
key, value = pair.split("=", 1)
|
||||
x_args[key.strip()] = value.strip()
|
||||
schema_name = x_args.get("schema", POSTGRES_DEFAULT_SCHEMA)
|
||||
create_schema = x_args.get("create_schema", "true").lower() == "true"
|
||||
upgrade_all_tenants = x_args.get("upgrade_all_tenants", "false").lower() == "true"
|
||||
|
||||
with context.begin_transaction():
|
||||
context.run_migrations()
|
||||
if (
|
||||
MULTI_TENANT
|
||||
and schema_name == POSTGRES_DEFAULT_SCHEMA
|
||||
and not upgrade_all_tenants
|
||||
):
|
||||
raise ValueError(
|
||||
"Cannot run default migrations in public schema when multi-tenancy is enabled. "
|
||||
"Please specify a tenant-specific schema."
|
||||
)
|
||||
|
||||
return schema_name, create_schema, upgrade_all_tenants
|
||||
|
||||
|
||||
def do_run_migrations(connection: Connection) -> None:
|
||||
def do_run_migrations(
|
||||
connection: Connection, schema_name: str, create_schema: bool
|
||||
) -> None:
|
||||
"""
|
||||
Executes migrations in the specified schema.
|
||||
"""
|
||||
logger.info(f"About to migrate schema: {schema_name}")
|
||||
|
||||
if create_schema:
|
||||
connection.execute(text(f'CREATE SCHEMA IF NOT EXISTS "{schema_name}"'))
|
||||
connection.execute(text("COMMIT"))
|
||||
|
||||
# Set search_path to the target schema
|
||||
connection.execute(text(f'SET search_path TO "{schema_name}"'))
|
||||
|
||||
context.configure(
|
||||
connection=connection,
|
||||
target_metadata=target_metadata, # type: ignore
|
||||
include_object=include_object,
|
||||
) # type: ignore
|
||||
version_table_schema=schema_name,
|
||||
include_schemas=True,
|
||||
compare_type=True,
|
||||
compare_server_default=True,
|
||||
script_location=config.get_main_option("script_location"),
|
||||
)
|
||||
|
||||
with context.begin_transaction():
|
||||
context.run_migrations()
|
||||
|
||||
|
||||
async def run_async_migrations() -> None:
|
||||
"""In this scenario we need to create an Engine
|
||||
and associate a connection with the context.
|
||||
|
||||
"""
|
||||
Determines whether to run migrations for a single schema or all schemas,
|
||||
and executes migrations accordingly.
|
||||
"""
|
||||
schema_name, create_schema, upgrade_all_tenants = get_schema_options()
|
||||
|
||||
connectable = create_async_engine(
|
||||
engine = create_async_engine(
|
||||
build_connection_string(),
|
||||
poolclass=pool.NullPool,
|
||||
)
|
||||
|
||||
async with connectable.connect() as connection:
|
||||
await connection.run_sync(do_run_migrations)
|
||||
if upgrade_all_tenants:
|
||||
# Run migrations for all tenant schemas sequentially
|
||||
tenant_schemas = get_all_tenant_ids()
|
||||
|
||||
await connectable.dispose()
|
||||
for schema in tenant_schemas:
|
||||
try:
|
||||
logger.info(f"Migrating schema: {schema}")
|
||||
async with engine.connect() as connection:
|
||||
await connection.run_sync(
|
||||
do_run_migrations,
|
||||
schema_name=schema,
|
||||
create_schema=create_schema,
|
||||
)
|
||||
except Exception as e:
|
||||
logger.error(f"Error migrating schema {schema}: {e}")
|
||||
raise
|
||||
else:
|
||||
try:
|
||||
logger.info(f"Migrating schema: {schema_name}")
|
||||
async with engine.connect() as connection:
|
||||
await connection.run_sync(
|
||||
do_run_migrations,
|
||||
schema_name=schema_name,
|
||||
create_schema=create_schema,
|
||||
)
|
||||
except Exception as e:
|
||||
logger.error(f"Error migrating schema {schema_name}: {e}")
|
||||
raise
|
||||
|
||||
await engine.dispose()
|
||||
|
||||
|
||||
def run_migrations_offline() -> None:
|
||||
"""
|
||||
Run migrations in 'offline' mode.
|
||||
"""
|
||||
schema_name, _, upgrade_all_tenants = get_schema_options()
|
||||
url = build_connection_string()
|
||||
|
||||
if upgrade_all_tenants:
|
||||
# Run offline migrations for all tenant schemas
|
||||
engine = create_async_engine(url)
|
||||
tenant_schemas = get_all_tenant_ids()
|
||||
engine.sync_engine.dispose()
|
||||
|
||||
for schema in tenant_schemas:
|
||||
logger.info(f"Migrating schema: {schema}")
|
||||
context.configure(
|
||||
url=url,
|
||||
target_metadata=target_metadata, # type: ignore
|
||||
literal_binds=True,
|
||||
include_object=include_object,
|
||||
version_table_schema=schema,
|
||||
include_schemas=True,
|
||||
script_location=config.get_main_option("script_location"),
|
||||
dialect_opts={"paramstyle": "named"},
|
||||
)
|
||||
|
||||
with context.begin_transaction():
|
||||
context.run_migrations()
|
||||
else:
|
||||
logger.info(f"Migrating schema: {schema_name}")
|
||||
context.configure(
|
||||
url=url,
|
||||
target_metadata=target_metadata, # type: ignore
|
||||
literal_binds=True,
|
||||
include_object=include_object,
|
||||
version_table_schema=schema_name,
|
||||
include_schemas=True,
|
||||
script_location=config.get_main_option("script_location"),
|
||||
dialect_opts={"paramstyle": "named"},
|
||||
)
|
||||
|
||||
with context.begin_transaction():
|
||||
context.run_migrations()
|
||||
|
||||
|
||||
def run_migrations_online() -> None:
|
||||
"""Run migrations in 'online' mode."""
|
||||
|
||||
"""
|
||||
Runs migrations in 'online' mode using an asynchronous engine.
|
||||
"""
|
||||
asyncio.run(run_async_migrations())
|
||||
|
||||
|
||||
|
||||
@@ -0,0 +1,59 @@
|
||||
"""display custom llm models
|
||||
|
||||
Revision ID: 177de57c21c9
|
||||
Revises: 4ee1287bd26a
|
||||
Create Date: 2024-11-21 11:49:04.488677
|
||||
|
||||
"""
|
||||
from alembic import op
|
||||
import sqlalchemy as sa
|
||||
from sqlalchemy.dialects import postgresql
|
||||
from sqlalchemy import and_
|
||||
|
||||
revision = "177de57c21c9"
|
||||
down_revision = "4ee1287bd26a"
|
||||
branch_labels = None
|
||||
depends_on = None
|
||||
depends_on = None
|
||||
|
||||
|
||||
def upgrade() -> None:
|
||||
conn = op.get_bind()
|
||||
llm_provider = sa.table(
|
||||
"llm_provider",
|
||||
sa.column("id", sa.Integer),
|
||||
sa.column("provider", sa.String),
|
||||
sa.column("model_names", postgresql.ARRAY(sa.String)),
|
||||
sa.column("display_model_names", postgresql.ARRAY(sa.String)),
|
||||
)
|
||||
|
||||
excluded_providers = ["openai", "bedrock", "anthropic", "azure"]
|
||||
|
||||
providers_to_update = sa.select(
|
||||
llm_provider.c.id,
|
||||
llm_provider.c.model_names,
|
||||
llm_provider.c.display_model_names,
|
||||
).where(
|
||||
and_(
|
||||
~llm_provider.c.provider.in_(excluded_providers),
|
||||
llm_provider.c.model_names.isnot(None),
|
||||
)
|
||||
)
|
||||
|
||||
results = conn.execute(providers_to_update).fetchall()
|
||||
|
||||
for provider_id, model_names, display_model_names in results:
|
||||
if display_model_names is None:
|
||||
display_model_names = []
|
||||
|
||||
combined_model_names = list(set(display_model_names + model_names))
|
||||
update_stmt = (
|
||||
llm_provider.update()
|
||||
.where(llm_provider.c.id == provider_id)
|
||||
.values(display_model_names=combined_model_names)
|
||||
)
|
||||
conn.execute(update_stmt)
|
||||
|
||||
|
||||
def downgrade() -> None:
|
||||
pass
|
||||
@@ -0,0 +1,26 @@
|
||||
"""add additional data to notifications
|
||||
|
||||
Revision ID: 1b10e1fda030
|
||||
Revises: 6756efa39ada
|
||||
Create Date: 2024-10-15 19:26:44.071259
|
||||
|
||||
"""
|
||||
from alembic import op
|
||||
import sqlalchemy as sa
|
||||
from sqlalchemy.dialects import postgresql
|
||||
|
||||
# revision identifiers, used by Alembic.
|
||||
revision = "1b10e1fda030"
|
||||
down_revision = "6756efa39ada"
|
||||
branch_labels = None
|
||||
depends_on = None
|
||||
|
||||
|
||||
def upgrade() -> None:
|
||||
op.add_column(
|
||||
"notification", sa.Column("additional_data", postgresql.JSONB(), nullable=True)
|
||||
)
|
||||
|
||||
|
||||
def downgrade() -> None:
|
||||
op.drop_column("notification", "additional_data")
|
||||
@@ -0,0 +1,68 @@
|
||||
"""default chosen assistants to none
|
||||
|
||||
Revision ID: 26b931506ecb
|
||||
Revises: 2daa494a0851
|
||||
Create Date: 2024-11-12 13:23:29.858995
|
||||
|
||||
"""
|
||||
from alembic import op
|
||||
import sqlalchemy as sa
|
||||
from sqlalchemy.dialects import postgresql
|
||||
|
||||
# revision identifiers, used by Alembic.
|
||||
revision = "26b931506ecb"
|
||||
down_revision = "2daa494a0851"
|
||||
branch_labels = None
|
||||
depends_on = None
|
||||
|
||||
|
||||
def upgrade() -> None:
|
||||
op.add_column(
|
||||
"user", sa.Column("chosen_assistants_new", postgresql.JSONB(), nullable=True)
|
||||
)
|
||||
|
||||
op.execute(
|
||||
"""
|
||||
UPDATE "user"
|
||||
SET chosen_assistants_new =
|
||||
CASE
|
||||
WHEN chosen_assistants = '[-2, -1, 0]' THEN NULL
|
||||
ELSE chosen_assistants
|
||||
END
|
||||
"""
|
||||
)
|
||||
|
||||
op.drop_column("user", "chosen_assistants")
|
||||
|
||||
op.alter_column(
|
||||
"user", "chosen_assistants_new", new_column_name="chosen_assistants"
|
||||
)
|
||||
|
||||
|
||||
def downgrade() -> None:
|
||||
op.add_column(
|
||||
"user",
|
||||
sa.Column(
|
||||
"chosen_assistants_old",
|
||||
postgresql.JSONB(),
|
||||
nullable=False,
|
||||
server_default="[-2, -1, 0]",
|
||||
),
|
||||
)
|
||||
|
||||
op.execute(
|
||||
"""
|
||||
UPDATE "user"
|
||||
SET chosen_assistants_old =
|
||||
CASE
|
||||
WHEN chosen_assistants IS NULL THEN '[-2, -1, 0]'::jsonb
|
||||
ELSE chosen_assistants
|
||||
END
|
||||
"""
|
||||
)
|
||||
|
||||
op.drop_column("user", "chosen_assistants")
|
||||
|
||||
op.alter_column(
|
||||
"user", "chosen_assistants_old", new_column_name="chosen_assistants"
|
||||
)
|
||||
30
backend/alembic/versions/2daa494a0851_add_group_sync_time.py
Normal file
30
backend/alembic/versions/2daa494a0851_add_group_sync_time.py
Normal file
@@ -0,0 +1,30 @@
|
||||
"""add-group-sync-time
|
||||
|
||||
Revision ID: 2daa494a0851
|
||||
Revises: c0fd6e4da83a
|
||||
Create Date: 2024-11-11 10:57:22.991157
|
||||
|
||||
"""
|
||||
from alembic import op
|
||||
import sqlalchemy as sa
|
||||
|
||||
# revision identifiers, used by Alembic.
|
||||
revision = "2daa494a0851"
|
||||
down_revision = "c0fd6e4da83a"
|
||||
branch_labels = None
|
||||
depends_on = None
|
||||
|
||||
|
||||
def upgrade() -> None:
|
||||
op.add_column(
|
||||
"connector_credential_pair",
|
||||
sa.Column(
|
||||
"last_time_external_group_sync",
|
||||
sa.DateTime(timezone=True),
|
||||
nullable=True,
|
||||
),
|
||||
)
|
||||
|
||||
|
||||
def downgrade() -> None:
|
||||
op.drop_column("connector_credential_pair", "last_time_external_group_sync")
|
||||
@@ -0,0 +1,50 @@
|
||||
"""single tool call per message
|
||||
|
||||
Revision ID: 33cb72ea4d80
|
||||
Revises: 5b29123cd710
|
||||
Create Date: 2024-11-01 12:51:01.535003
|
||||
|
||||
"""
|
||||
from alembic import op
|
||||
import sqlalchemy as sa
|
||||
|
||||
|
||||
# revision identifiers, used by Alembic.
|
||||
revision = "33cb72ea4d80"
|
||||
down_revision = "5b29123cd710"
|
||||
branch_labels = None
|
||||
depends_on = None
|
||||
|
||||
|
||||
def upgrade() -> None:
|
||||
# Step 1: Delete extraneous ToolCall entries
|
||||
# Keep only the ToolCall with the smallest 'id' for each 'message_id'
|
||||
op.execute(
|
||||
sa.text(
|
||||
"""
|
||||
DELETE FROM tool_call
|
||||
WHERE id NOT IN (
|
||||
SELECT MIN(id)
|
||||
FROM tool_call
|
||||
WHERE message_id IS NOT NULL
|
||||
GROUP BY message_id
|
||||
);
|
||||
"""
|
||||
)
|
||||
)
|
||||
|
||||
# Step 2: Add a unique constraint on message_id
|
||||
op.create_unique_constraint(
|
||||
constraint_name="uq_tool_call_message_id",
|
||||
table_name="tool_call",
|
||||
columns=["message_id"],
|
||||
)
|
||||
|
||||
|
||||
def downgrade() -> None:
|
||||
# Step 1: Drop the unique constraint on message_id
|
||||
op.drop_constraint(
|
||||
constraint_name="uq_tool_call_message_id",
|
||||
table_name="tool_call",
|
||||
type_="unique",
|
||||
)
|
||||
@@ -0,0 +1,46 @@
|
||||
"""fix_user__external_user_group_id_fk
|
||||
|
||||
Revision ID: 46b7a812670f
|
||||
Revises: f32615f71aeb
|
||||
Create Date: 2024-09-23 12:58:03.894038
|
||||
|
||||
"""
|
||||
from alembic import op
|
||||
|
||||
# revision identifiers, used by Alembic.
|
||||
revision = "46b7a812670f"
|
||||
down_revision = "f32615f71aeb"
|
||||
branch_labels = None
|
||||
depends_on = None
|
||||
|
||||
|
||||
def upgrade() -> None:
|
||||
# Drop the existing primary key
|
||||
op.drop_constraint(
|
||||
"user__external_user_group_id_pkey",
|
||||
"user__external_user_group_id",
|
||||
type_="primary",
|
||||
)
|
||||
|
||||
# Add the new composite primary key
|
||||
op.create_primary_key(
|
||||
"user__external_user_group_id_pkey",
|
||||
"user__external_user_group_id",
|
||||
["user_id", "external_user_group_id", "cc_pair_id"],
|
||||
)
|
||||
|
||||
|
||||
def downgrade() -> None:
|
||||
# Drop the composite primary key
|
||||
op.drop_constraint(
|
||||
"user__external_user_group_id_pkey",
|
||||
"user__external_user_group_id",
|
||||
type_="primary",
|
||||
)
|
||||
# Delete all entries from the table
|
||||
op.execute("DELETE FROM user__external_user_group_id")
|
||||
|
||||
# Recreate the original primary key on user_id
|
||||
op.create_primary_key(
|
||||
"user__external_user_group_id_pkey", "user__external_user_group_id", ["user_id"]
|
||||
)
|
||||
@@ -0,0 +1,45 @@
|
||||
"""add persona categories
|
||||
|
||||
Revision ID: 47e5bef3a1d7
|
||||
Revises: dfbe9e93d3c7
|
||||
Create Date: 2024-11-05 18:55:02.221064
|
||||
|
||||
"""
|
||||
from alembic import op
|
||||
import sqlalchemy as sa
|
||||
|
||||
|
||||
# revision identifiers, used by Alembic.
|
||||
revision = "47e5bef3a1d7"
|
||||
down_revision = "dfbe9e93d3c7"
|
||||
branch_labels = None
|
||||
depends_on = None
|
||||
|
||||
|
||||
def upgrade() -> None:
|
||||
# Create the persona_category table
|
||||
op.create_table(
|
||||
"persona_category",
|
||||
sa.Column("id", sa.Integer(), nullable=False),
|
||||
sa.Column("name", sa.String(), nullable=False),
|
||||
sa.Column("description", sa.String(), nullable=True),
|
||||
sa.PrimaryKeyConstraint("id"),
|
||||
sa.UniqueConstraint("name"),
|
||||
)
|
||||
|
||||
# Add category_id to persona table
|
||||
op.add_column("persona", sa.Column("category_id", sa.Integer(), nullable=True))
|
||||
op.create_foreign_key(
|
||||
"fk_persona_category",
|
||||
"persona",
|
||||
"persona_category",
|
||||
["category_id"],
|
||||
["id"],
|
||||
ondelete="SET NULL",
|
||||
)
|
||||
|
||||
|
||||
def downgrade() -> None:
|
||||
op.drop_constraint("fk_persona_category", "persona", type_="foreignkey")
|
||||
op.drop_column("persona", "category_id")
|
||||
op.drop_table("persona_category")
|
||||
@@ -0,0 +1,280 @@
|
||||
"""add_multiple_slack_bot_support
|
||||
|
||||
Revision ID: 4ee1287bd26a
|
||||
Revises: 47e5bef3a1d7
|
||||
Create Date: 2024-11-06 13:15:53.302644
|
||||
|
||||
"""
|
||||
import logging
|
||||
from typing import cast
|
||||
from alembic import op
|
||||
import sqlalchemy as sa
|
||||
from sqlalchemy.orm import Session
|
||||
from danswer.key_value_store.factory import get_kv_store
|
||||
from danswer.db.models import SlackBot
|
||||
from sqlalchemy.dialects import postgresql
|
||||
|
||||
# revision identifiers, used by Alembic.
|
||||
revision = "4ee1287bd26a"
|
||||
down_revision = "47e5bef3a1d7"
|
||||
branch_labels: None = None
|
||||
depends_on: None = None
|
||||
|
||||
# Configure logging
|
||||
logger = logging.getLogger("alembic.runtime.migration")
|
||||
logger.setLevel(logging.INFO)
|
||||
|
||||
|
||||
def upgrade() -> None:
|
||||
logger.info(f"{revision}: create_table: slack_bot")
|
||||
# Create new slack_bot table
|
||||
op.create_table(
|
||||
"slack_bot",
|
||||
sa.Column("id", sa.Integer(), nullable=False),
|
||||
sa.Column("name", sa.String(), nullable=False),
|
||||
sa.Column("enabled", sa.Boolean(), nullable=False, server_default="true"),
|
||||
sa.Column("bot_token", sa.LargeBinary(), nullable=False),
|
||||
sa.Column("app_token", sa.LargeBinary(), nullable=False),
|
||||
sa.PrimaryKeyConstraint("id"),
|
||||
sa.UniqueConstraint("bot_token"),
|
||||
sa.UniqueConstraint("app_token"),
|
||||
)
|
||||
|
||||
# # Create new slack_channel_config table
|
||||
op.create_table(
|
||||
"slack_channel_config",
|
||||
sa.Column("id", sa.Integer(), nullable=False),
|
||||
sa.Column("slack_bot_id", sa.Integer(), nullable=True),
|
||||
sa.Column("persona_id", sa.Integer(), nullable=True),
|
||||
sa.Column("channel_config", postgresql.JSONB(), nullable=False),
|
||||
sa.Column("response_type", sa.String(), nullable=False),
|
||||
sa.Column(
|
||||
"enable_auto_filters", sa.Boolean(), nullable=False, server_default="false"
|
||||
),
|
||||
sa.ForeignKeyConstraint(
|
||||
["slack_bot_id"],
|
||||
["slack_bot.id"],
|
||||
),
|
||||
sa.ForeignKeyConstraint(
|
||||
["persona_id"],
|
||||
["persona.id"],
|
||||
),
|
||||
sa.PrimaryKeyConstraint("id"),
|
||||
)
|
||||
|
||||
# Handle existing Slack bot tokens first
|
||||
logger.info(f"{revision}: Checking for existing Slack bot.")
|
||||
bot_token = None
|
||||
app_token = None
|
||||
first_row_id = None
|
||||
|
||||
try:
|
||||
tokens = cast(dict, get_kv_store().load("slack_bot_tokens_config_key"))
|
||||
except Exception:
|
||||
logger.warning("No existing Slack bot tokens found.")
|
||||
tokens = {}
|
||||
|
||||
bot_token = tokens.get("bot_token")
|
||||
app_token = tokens.get("app_token")
|
||||
|
||||
if bot_token and app_token:
|
||||
logger.info(f"{revision}: Found bot and app tokens.")
|
||||
|
||||
session = Session(bind=op.get_bind())
|
||||
new_slack_bot = SlackBot(
|
||||
name="Slack Bot (Migrated)",
|
||||
enabled=True,
|
||||
bot_token=bot_token,
|
||||
app_token=app_token,
|
||||
)
|
||||
session.add(new_slack_bot)
|
||||
session.commit()
|
||||
first_row_id = new_slack_bot.id
|
||||
|
||||
# Create a default bot if none exists
|
||||
# This is in case there are no slack tokens but there are channels configured
|
||||
op.execute(
|
||||
sa.text(
|
||||
"""
|
||||
INSERT INTO slack_bot (name, enabled, bot_token, app_token)
|
||||
SELECT 'Default Bot', true, '', ''
|
||||
WHERE NOT EXISTS (SELECT 1 FROM slack_bot)
|
||||
RETURNING id;
|
||||
"""
|
||||
)
|
||||
)
|
||||
|
||||
# Get the bot ID to use (either from existing migration or newly created)
|
||||
bot_id_query = sa.text(
|
||||
"""
|
||||
SELECT COALESCE(
|
||||
:first_row_id,
|
||||
(SELECT id FROM slack_bot ORDER BY id ASC LIMIT 1)
|
||||
) as bot_id;
|
||||
"""
|
||||
)
|
||||
result = op.get_bind().execute(bot_id_query, {"first_row_id": first_row_id})
|
||||
bot_id = result.scalar()
|
||||
|
||||
# CTE (Common Table Expression) that transforms the old slack_bot_config table data
|
||||
# This splits up the channel_names into their own rows
|
||||
channel_names_cte = """
|
||||
WITH channel_names AS (
|
||||
SELECT
|
||||
sbc.id as config_id,
|
||||
sbc.persona_id,
|
||||
sbc.response_type,
|
||||
sbc.enable_auto_filters,
|
||||
jsonb_array_elements_text(sbc.channel_config->'channel_names') as channel_name,
|
||||
sbc.channel_config->>'respond_tag_only' as respond_tag_only,
|
||||
sbc.channel_config->>'respond_to_bots' as respond_to_bots,
|
||||
sbc.channel_config->'respond_member_group_list' as respond_member_group_list,
|
||||
sbc.channel_config->'answer_filters' as answer_filters,
|
||||
sbc.channel_config->'follow_up_tags' as follow_up_tags
|
||||
FROM slack_bot_config sbc
|
||||
)
|
||||
"""
|
||||
|
||||
# Insert the channel names into the new slack_channel_config table
|
||||
insert_statement = """
|
||||
INSERT INTO slack_channel_config (
|
||||
slack_bot_id,
|
||||
persona_id,
|
||||
channel_config,
|
||||
response_type,
|
||||
enable_auto_filters
|
||||
)
|
||||
SELECT
|
||||
:bot_id,
|
||||
channel_name.persona_id,
|
||||
jsonb_build_object(
|
||||
'channel_name', channel_name.channel_name,
|
||||
'respond_tag_only',
|
||||
COALESCE((channel_name.respond_tag_only)::boolean, false),
|
||||
'respond_to_bots',
|
||||
COALESCE((channel_name.respond_to_bots)::boolean, false),
|
||||
'respond_member_group_list',
|
||||
COALESCE(channel_name.respond_member_group_list, '[]'::jsonb),
|
||||
'answer_filters',
|
||||
COALESCE(channel_name.answer_filters, '[]'::jsonb),
|
||||
'follow_up_tags',
|
||||
COALESCE(channel_name.follow_up_tags, '[]'::jsonb)
|
||||
),
|
||||
channel_name.response_type,
|
||||
channel_name.enable_auto_filters
|
||||
FROM channel_names channel_name;
|
||||
"""
|
||||
|
||||
op.execute(sa.text(channel_names_cte + insert_statement).bindparams(bot_id=bot_id))
|
||||
|
||||
# Clean up old tokens if they existed
|
||||
try:
|
||||
if bot_token and app_token:
|
||||
logger.info(f"{revision}: Removing old bot and app tokens.")
|
||||
get_kv_store().delete("slack_bot_tokens_config_key")
|
||||
except Exception:
|
||||
logger.warning("tried to delete tokens in dynamic config but failed")
|
||||
# Rename the table
|
||||
op.rename_table(
|
||||
"slack_bot_config__standard_answer_category",
|
||||
"slack_channel_config__standard_answer_category",
|
||||
)
|
||||
|
||||
# Rename the column
|
||||
op.alter_column(
|
||||
"slack_channel_config__standard_answer_category",
|
||||
"slack_bot_config_id",
|
||||
new_column_name="slack_channel_config_id",
|
||||
)
|
||||
|
||||
# Drop the table with CASCADE to handle dependent objects
|
||||
op.execute("DROP TABLE slack_bot_config CASCADE")
|
||||
|
||||
logger.info(f"{revision}: Migration complete.")
|
||||
|
||||
|
||||
def downgrade() -> None:
|
||||
# Recreate the old slack_bot_config table
|
||||
op.create_table(
|
||||
"slack_bot_config",
|
||||
sa.Column("id", sa.Integer(), nullable=False),
|
||||
sa.Column("persona_id", sa.Integer(), nullable=True),
|
||||
sa.Column("channel_config", postgresql.JSONB(), nullable=False),
|
||||
sa.Column("response_type", sa.String(), nullable=False),
|
||||
sa.Column("enable_auto_filters", sa.Boolean(), nullable=False),
|
||||
sa.ForeignKeyConstraint(
|
||||
["persona_id"],
|
||||
["persona.id"],
|
||||
),
|
||||
sa.PrimaryKeyConstraint("id"),
|
||||
)
|
||||
|
||||
# Migrate data back to the old format
|
||||
# Group by persona_id to combine channel names back into arrays
|
||||
op.execute(
|
||||
sa.text(
|
||||
"""
|
||||
INSERT INTO slack_bot_config (
|
||||
persona_id,
|
||||
channel_config,
|
||||
response_type,
|
||||
enable_auto_filters
|
||||
)
|
||||
SELECT DISTINCT ON (persona_id)
|
||||
persona_id,
|
||||
jsonb_build_object(
|
||||
'channel_names', (
|
||||
SELECT jsonb_agg(c.channel_config->>'channel_name')
|
||||
FROM slack_channel_config c
|
||||
WHERE c.persona_id = scc.persona_id
|
||||
),
|
||||
'respond_tag_only', (channel_config->>'respond_tag_only')::boolean,
|
||||
'respond_to_bots', (channel_config->>'respond_to_bots')::boolean,
|
||||
'respond_member_group_list', channel_config->'respond_member_group_list',
|
||||
'answer_filters', channel_config->'answer_filters',
|
||||
'follow_up_tags', channel_config->'follow_up_tags'
|
||||
),
|
||||
response_type,
|
||||
enable_auto_filters
|
||||
FROM slack_channel_config scc
|
||||
WHERE persona_id IS NOT NULL;
|
||||
"""
|
||||
)
|
||||
)
|
||||
|
||||
# Rename the table back
|
||||
op.rename_table(
|
||||
"slack_channel_config__standard_answer_category",
|
||||
"slack_bot_config__standard_answer_category",
|
||||
)
|
||||
|
||||
# Rename the column back
|
||||
op.alter_column(
|
||||
"slack_bot_config__standard_answer_category",
|
||||
"slack_channel_config_id",
|
||||
new_column_name="slack_bot_config_id",
|
||||
)
|
||||
|
||||
# Try to save the first bot's tokens back to KV store
|
||||
try:
|
||||
first_bot = (
|
||||
op.get_bind()
|
||||
.execute(
|
||||
sa.text(
|
||||
"SELECT bot_token, app_token FROM slack_bot ORDER BY id LIMIT 1"
|
||||
)
|
||||
)
|
||||
.first()
|
||||
)
|
||||
if first_bot and first_bot.bot_token and first_bot.app_token:
|
||||
tokens = {
|
||||
"bot_token": first_bot.bot_token,
|
||||
"app_token": first_bot.app_token,
|
||||
}
|
||||
get_kv_store().store("slack_bot_tokens_config_key", tokens)
|
||||
except Exception:
|
||||
logger.warning("Failed to save tokens back to KV store")
|
||||
|
||||
# Drop the new tables in reverse order
|
||||
op.drop_table("slack_channel_config")
|
||||
op.drop_table("slack_bot")
|
||||
@@ -0,0 +1,70 @@
|
||||
"""nullable search settings for historic index attempts
|
||||
|
||||
Revision ID: 5b29123cd710
|
||||
Revises: 949b4a92a401
|
||||
Create Date: 2024-10-30 19:37:59.630704
|
||||
|
||||
"""
|
||||
from alembic import op
|
||||
import sqlalchemy as sa
|
||||
|
||||
|
||||
# revision identifiers, used by Alembic.
|
||||
revision = "5b29123cd710"
|
||||
down_revision = "949b4a92a401"
|
||||
branch_labels = None
|
||||
depends_on = None
|
||||
|
||||
|
||||
def upgrade() -> None:
|
||||
# Drop the existing foreign key constraint
|
||||
op.drop_constraint(
|
||||
"fk_index_attempt_search_settings", "index_attempt", type_="foreignkey"
|
||||
)
|
||||
|
||||
# Modify the column to be nullable
|
||||
op.alter_column(
|
||||
"index_attempt", "search_settings_id", existing_type=sa.INTEGER(), nullable=True
|
||||
)
|
||||
|
||||
# Add back the foreign key with ON DELETE SET NULL
|
||||
op.create_foreign_key(
|
||||
"fk_index_attempt_search_settings",
|
||||
"index_attempt",
|
||||
"search_settings",
|
||||
["search_settings_id"],
|
||||
["id"],
|
||||
ondelete="SET NULL",
|
||||
)
|
||||
|
||||
|
||||
def downgrade() -> None:
|
||||
# Warning: This will delete all index attempts that don't have search settings
|
||||
op.execute(
|
||||
"""
|
||||
DELETE FROM index_attempt
|
||||
WHERE search_settings_id IS NULL
|
||||
"""
|
||||
)
|
||||
|
||||
# Drop foreign key constraint
|
||||
op.drop_constraint(
|
||||
"fk_index_attempt_search_settings", "index_attempt", type_="foreignkey"
|
||||
)
|
||||
|
||||
# Modify the column to be not nullable
|
||||
op.alter_column(
|
||||
"index_attempt",
|
||||
"search_settings_id",
|
||||
existing_type=sa.INTEGER(),
|
||||
nullable=False,
|
||||
)
|
||||
|
||||
# Add back the foreign key without ON DELETE SET NULL
|
||||
op.create_foreign_key(
|
||||
"fk_index_attempt_search_settings",
|
||||
"index_attempt",
|
||||
"search_settings",
|
||||
["search_settings_id"],
|
||||
["id"],
|
||||
)
|
||||
@@ -0,0 +1,30 @@
|
||||
"""add api_version and deployment_name to search settings
|
||||
|
||||
Revision ID: 5d12a446f5c0
|
||||
Revises: e4334d5b33ba
|
||||
Create Date: 2024-10-08 15:56:07.975636
|
||||
|
||||
"""
|
||||
from alembic import op
|
||||
import sqlalchemy as sa
|
||||
|
||||
|
||||
# revision identifiers, used by Alembic.
|
||||
revision = "5d12a446f5c0"
|
||||
down_revision = "e4334d5b33ba"
|
||||
branch_labels = None
|
||||
depends_on = None
|
||||
|
||||
|
||||
def upgrade() -> None:
|
||||
op.add_column(
|
||||
"embedding_provider", sa.Column("api_version", sa.String(), nullable=True)
|
||||
)
|
||||
op.add_column(
|
||||
"embedding_provider", sa.Column("deployment_name", sa.String(), nullable=True)
|
||||
)
|
||||
|
||||
|
||||
def downgrade() -> None:
|
||||
op.drop_column("embedding_provider", "deployment_name")
|
||||
op.drop_column("embedding_provider", "api_version")
|
||||
@@ -0,0 +1,153 @@
|
||||
"""Migrate chat_session and chat_message tables to use UUID primary keys
|
||||
|
||||
Revision ID: 6756efa39ada
|
||||
Revises: 5d12a446f5c0
|
||||
Create Date: 2024-10-15 17:47:44.108537
|
||||
|
||||
"""
|
||||
from alembic import op
|
||||
import sqlalchemy as sa
|
||||
|
||||
revision = "6756efa39ada"
|
||||
down_revision = "5d12a446f5c0"
|
||||
branch_labels = None
|
||||
depends_on = None
|
||||
|
||||
"""
|
||||
This script:
|
||||
1. Adds UUID columns to chat_session and chat_message
|
||||
2. Populates new columns with UUIDs
|
||||
3. Updates foreign key relationships
|
||||
4. Removes old integer ID columns
|
||||
|
||||
Note: Downgrade will assign new integer IDs, not restore original ones.
|
||||
"""
|
||||
|
||||
|
||||
def upgrade() -> None:
|
||||
op.execute("CREATE EXTENSION IF NOT EXISTS pgcrypto;")
|
||||
|
||||
op.add_column(
|
||||
"chat_session",
|
||||
sa.Column(
|
||||
"new_id",
|
||||
sa.UUID(as_uuid=True),
|
||||
server_default=sa.text("gen_random_uuid()"),
|
||||
nullable=False,
|
||||
),
|
||||
)
|
||||
|
||||
op.execute("UPDATE chat_session SET new_id = gen_random_uuid();")
|
||||
|
||||
op.add_column(
|
||||
"chat_message",
|
||||
sa.Column("new_chat_session_id", sa.UUID(as_uuid=True), nullable=True),
|
||||
)
|
||||
|
||||
op.execute(
|
||||
"""
|
||||
UPDATE chat_message
|
||||
SET new_chat_session_id = cs.new_id
|
||||
FROM chat_session cs
|
||||
WHERE chat_message.chat_session_id = cs.id;
|
||||
"""
|
||||
)
|
||||
|
||||
op.drop_constraint(
|
||||
"chat_message_chat_session_id_fkey", "chat_message", type_="foreignkey"
|
||||
)
|
||||
|
||||
op.drop_column("chat_message", "chat_session_id")
|
||||
op.alter_column(
|
||||
"chat_message", "new_chat_session_id", new_column_name="chat_session_id"
|
||||
)
|
||||
|
||||
op.drop_constraint("chat_session_pkey", "chat_session", type_="primary")
|
||||
op.drop_column("chat_session", "id")
|
||||
op.alter_column("chat_session", "new_id", new_column_name="id")
|
||||
|
||||
op.create_primary_key("chat_session_pkey", "chat_session", ["id"])
|
||||
|
||||
op.create_foreign_key(
|
||||
"chat_message_chat_session_id_fkey",
|
||||
"chat_message",
|
||||
"chat_session",
|
||||
["chat_session_id"],
|
||||
["id"],
|
||||
ondelete="CASCADE",
|
||||
)
|
||||
|
||||
|
||||
def downgrade() -> None:
|
||||
op.drop_constraint(
|
||||
"chat_message_chat_session_id_fkey", "chat_message", type_="foreignkey"
|
||||
)
|
||||
|
||||
op.add_column(
|
||||
"chat_session",
|
||||
sa.Column("old_id", sa.Integer, autoincrement=True, nullable=True),
|
||||
)
|
||||
|
||||
op.execute("CREATE SEQUENCE chat_session_old_id_seq OWNED BY chat_session.old_id;")
|
||||
op.execute(
|
||||
"ALTER TABLE chat_session ALTER COLUMN old_id SET DEFAULT nextval('chat_session_old_id_seq');"
|
||||
)
|
||||
|
||||
op.execute(
|
||||
"UPDATE chat_session SET old_id = nextval('chat_session_old_id_seq') WHERE old_id IS NULL;"
|
||||
)
|
||||
|
||||
op.alter_column("chat_session", "old_id", nullable=False)
|
||||
|
||||
op.drop_constraint("chat_session_pkey", "chat_session", type_="primary")
|
||||
op.create_primary_key("chat_session_pkey", "chat_session", ["old_id"])
|
||||
|
||||
op.add_column(
|
||||
"chat_message",
|
||||
sa.Column("old_chat_session_id", sa.Integer, nullable=True),
|
||||
)
|
||||
|
||||
op.execute(
|
||||
"""
|
||||
UPDATE chat_message
|
||||
SET old_chat_session_id = cs.old_id
|
||||
FROM chat_session cs
|
||||
WHERE chat_message.chat_session_id = cs.id;
|
||||
"""
|
||||
)
|
||||
|
||||
op.drop_column("chat_message", "chat_session_id")
|
||||
op.alter_column(
|
||||
"chat_message", "old_chat_session_id", new_column_name="chat_session_id"
|
||||
)
|
||||
|
||||
op.create_foreign_key(
|
||||
"chat_message_chat_session_id_fkey",
|
||||
"chat_message",
|
||||
"chat_session",
|
||||
["chat_session_id"],
|
||||
["old_id"],
|
||||
ondelete="CASCADE",
|
||||
)
|
||||
|
||||
op.drop_column("chat_session", "id")
|
||||
op.alter_column("chat_session", "old_id", new_column_name="id")
|
||||
|
||||
op.alter_column(
|
||||
"chat_session",
|
||||
"id",
|
||||
type_=sa.Integer(),
|
||||
existing_type=sa.Integer(),
|
||||
existing_nullable=False,
|
||||
existing_server_default=False,
|
||||
)
|
||||
|
||||
# Rename the sequence
|
||||
op.execute("ALTER SEQUENCE chat_session_old_id_seq RENAME TO chat_session_id_seq;")
|
||||
|
||||
# Update the default value to use the renamed sequence
|
||||
op.alter_column(
|
||||
"chat_session",
|
||||
"id",
|
||||
server_default=sa.text("nextval('chat_session_id_seq'::regclass)"),
|
||||
)
|
||||
45
backend/alembic/versions/6d562f86c78b_remove_default_bot.py
Normal file
45
backend/alembic/versions/6d562f86c78b_remove_default_bot.py
Normal file
@@ -0,0 +1,45 @@
|
||||
"""remove default bot
|
||||
|
||||
Revision ID: 6d562f86c78b
|
||||
Revises: 177de57c21c9
|
||||
Create Date: 2024-11-22 11:51:29.331336
|
||||
|
||||
"""
|
||||
from alembic import op
|
||||
import sqlalchemy as sa
|
||||
|
||||
# revision identifiers, used by Alembic.
|
||||
revision = "6d562f86c78b"
|
||||
down_revision = "177de57c21c9"
|
||||
branch_labels = None
|
||||
depends_on = None
|
||||
|
||||
|
||||
def upgrade() -> None:
|
||||
op.execute(
|
||||
sa.text(
|
||||
"""
|
||||
DELETE FROM slack_bot
|
||||
WHERE name = 'Default Bot'
|
||||
AND bot_token = ''
|
||||
AND app_token = ''
|
||||
AND NOT EXISTS (
|
||||
SELECT 1 FROM slack_channel_config
|
||||
WHERE slack_channel_config.slack_bot_id = slack_bot.id
|
||||
)
|
||||
"""
|
||||
)
|
||||
)
|
||||
|
||||
|
||||
def downgrade() -> None:
|
||||
op.execute(
|
||||
sa.text(
|
||||
"""
|
||||
INSERT INTO slack_bot (name, enabled, bot_token, app_token)
|
||||
SELECT 'Default Bot', true, '', ''
|
||||
WHERE NOT EXISTS (SELECT 1 FROM slack_bot)
|
||||
RETURNING id;
|
||||
"""
|
||||
)
|
||||
)
|
||||
@@ -9,7 +9,7 @@ import json
|
||||
from typing import cast
|
||||
from alembic import op
|
||||
import sqlalchemy as sa
|
||||
from danswer.dynamic_configs.factory import get_dynamic_config_store
|
||||
from danswer.key_value_store.factory import get_kv_store
|
||||
|
||||
# revision identifiers, used by Alembic.
|
||||
revision = "703313b75876"
|
||||
@@ -54,9 +54,7 @@ def upgrade() -> None:
|
||||
)
|
||||
|
||||
try:
|
||||
settings_json = cast(
|
||||
str, get_dynamic_config_store().load("token_budget_settings")
|
||||
)
|
||||
settings_json = cast(str, get_kv_store().load("token_budget_settings"))
|
||||
settings = json.loads(settings_json)
|
||||
|
||||
is_enabled = settings.get("enable_token_budget", False)
|
||||
@@ -71,7 +69,7 @@ def upgrade() -> None:
|
||||
)
|
||||
|
||||
# Delete the dynamic config
|
||||
get_dynamic_config_store().delete("token_budget_settings")
|
||||
get_kv_store().delete("token_budget_settings")
|
||||
|
||||
except Exception:
|
||||
# Ignore if the dynamic config is not found
|
||||
|
||||
@@ -9,8 +9,8 @@ from alembic import op
|
||||
import sqlalchemy as sa
|
||||
|
||||
from danswer.db.models import IndexModelStatus
|
||||
from danswer.search.enums import RecencyBiasSetting
|
||||
from danswer.search.enums import SearchType
|
||||
from danswer.context.search.enums import RecencyBiasSetting
|
||||
from danswer.context.search.enums import SearchType
|
||||
|
||||
# revision identifiers, used by Alembic.
|
||||
revision = "776b3bbe9092"
|
||||
|
||||
@@ -0,0 +1,35 @@
|
||||
"""add web ui option to slack config
|
||||
|
||||
Revision ID: 93560ba1b118
|
||||
Revises: 6d562f86c78b
|
||||
Create Date: 2024-11-24 06:36:17.490612
|
||||
|
||||
"""
|
||||
from alembic import op
|
||||
|
||||
# revision identifiers, used by Alembic.
|
||||
revision = "93560ba1b118"
|
||||
down_revision = "6d562f86c78b"
|
||||
branch_labels = None
|
||||
depends_on = None
|
||||
|
||||
|
||||
def upgrade() -> None:
|
||||
# Add show_continue_in_web_ui with default False to all existing channel_configs
|
||||
op.execute(
|
||||
"""
|
||||
UPDATE slack_channel_config
|
||||
SET channel_config = channel_config || '{"show_continue_in_web_ui": false}'::jsonb
|
||||
WHERE NOT channel_config ? 'show_continue_in_web_ui'
|
||||
"""
|
||||
)
|
||||
|
||||
|
||||
def downgrade() -> None:
|
||||
# Remove show_continue_in_web_ui from all channel_configs
|
||||
op.execute(
|
||||
"""
|
||||
UPDATE slack_channel_config
|
||||
SET channel_config = channel_config - 'show_continue_in_web_ui'
|
||||
"""
|
||||
)
|
||||
72
backend/alembic/versions/949b4a92a401_remove_rt.py
Normal file
72
backend/alembic/versions/949b4a92a401_remove_rt.py
Normal file
@@ -0,0 +1,72 @@
|
||||
"""remove rt
|
||||
|
||||
Revision ID: 949b4a92a401
|
||||
Revises: 1b10e1fda030
|
||||
Create Date: 2024-10-26 13:06:06.937969
|
||||
|
||||
"""
|
||||
from alembic import op
|
||||
from sqlalchemy.orm import Session
|
||||
from sqlalchemy import text
|
||||
|
||||
# Import your models and constants
|
||||
from danswer.db.models import (
|
||||
Connector,
|
||||
ConnectorCredentialPair,
|
||||
Credential,
|
||||
IndexAttempt,
|
||||
)
|
||||
|
||||
|
||||
# revision identifiers, used by Alembic.
|
||||
revision = "949b4a92a401"
|
||||
down_revision = "1b10e1fda030"
|
||||
branch_labels = None
|
||||
depends_on = None
|
||||
|
||||
|
||||
def upgrade() -> None:
|
||||
# Deletes all RequestTracker connectors and associated data
|
||||
bind = op.get_bind()
|
||||
session = Session(bind=bind)
|
||||
|
||||
# Get connectors using raw SQL
|
||||
result = bind.execute(
|
||||
text("SELECT id FROM connector WHERE source = 'requesttracker'")
|
||||
)
|
||||
connector_ids = [row[0] for row in result]
|
||||
|
||||
if connector_ids:
|
||||
cc_pairs_to_delete = (
|
||||
session.query(ConnectorCredentialPair)
|
||||
.filter(ConnectorCredentialPair.connector_id.in_(connector_ids))
|
||||
.all()
|
||||
)
|
||||
|
||||
cc_pair_ids = [cc_pair.id for cc_pair in cc_pairs_to_delete]
|
||||
|
||||
if cc_pair_ids:
|
||||
session.query(IndexAttempt).filter(
|
||||
IndexAttempt.connector_credential_pair_id.in_(cc_pair_ids)
|
||||
).delete(synchronize_session=False)
|
||||
|
||||
session.query(ConnectorCredentialPair).filter(
|
||||
ConnectorCredentialPair.id.in_(cc_pair_ids)
|
||||
).delete(synchronize_session=False)
|
||||
|
||||
credential_ids = [cc_pair.credential_id for cc_pair in cc_pairs_to_delete]
|
||||
if credential_ids:
|
||||
session.query(Credential).filter(Credential.id.in_(credential_ids)).delete(
|
||||
synchronize_session=False
|
||||
)
|
||||
|
||||
session.query(Connector).filter(Connector.id.in_(connector_ids)).delete(
|
||||
synchronize_session=False
|
||||
)
|
||||
|
||||
session.commit()
|
||||
|
||||
|
||||
def downgrade() -> None:
|
||||
# No-op downgrade as we cannot restore deleted data
|
||||
pass
|
||||
@@ -0,0 +1,30 @@
|
||||
"""add creator to cc pair
|
||||
|
||||
Revision ID: 9cf5c00f72fe
|
||||
Revises: 26b931506ecb
|
||||
Create Date: 2024-11-12 15:16:42.682902
|
||||
|
||||
"""
|
||||
from alembic import op
|
||||
import sqlalchemy as sa
|
||||
|
||||
# revision identifiers, used by Alembic.
|
||||
revision = "9cf5c00f72fe"
|
||||
down_revision = "26b931506ecb"
|
||||
branch_labels = None
|
||||
depends_on = None
|
||||
|
||||
|
||||
def upgrade() -> None:
|
||||
op.add_column(
|
||||
"connector_credential_pair",
|
||||
sa.Column(
|
||||
"creator_id",
|
||||
sa.UUID(as_uuid=True),
|
||||
nullable=True,
|
||||
),
|
||||
)
|
||||
|
||||
|
||||
def downgrade() -> None:
|
||||
op.drop_column("connector_credential_pair", "creator_id")
|
||||
@@ -0,0 +1,36 @@
|
||||
"""Combine Search and Chat
|
||||
|
||||
Revision ID: 9f696734098f
|
||||
Revises: a8c2065484e6
|
||||
Create Date: 2024-11-27 15:32:19.694972
|
||||
|
||||
"""
|
||||
from alembic import op
|
||||
import sqlalchemy as sa
|
||||
|
||||
# revision identifiers, used by Alembic.
|
||||
revision = "9f696734098f"
|
||||
down_revision = "a8c2065484e6"
|
||||
branch_labels = None
|
||||
depends_on = None
|
||||
|
||||
|
||||
def upgrade() -> None:
|
||||
op.alter_column("chat_session", "description", nullable=True)
|
||||
op.drop_column("chat_session", "one_shot")
|
||||
op.drop_column("slack_channel_config", "response_type")
|
||||
|
||||
|
||||
def downgrade() -> None:
|
||||
op.execute("UPDATE chat_session SET description = '' WHERE description IS NULL")
|
||||
op.alter_column("chat_session", "description", nullable=False)
|
||||
op.add_column(
|
||||
"chat_session",
|
||||
sa.Column("one_shot", sa.Boolean(), nullable=False, server_default=sa.false()),
|
||||
)
|
||||
op.add_column(
|
||||
"slack_channel_config",
|
||||
sa.Column(
|
||||
"response_type", sa.String(), nullable=False, server_default="citations"
|
||||
),
|
||||
)
|
||||
@@ -0,0 +1,27 @@
|
||||
"""add auto scroll to user model
|
||||
|
||||
Revision ID: a8c2065484e6
|
||||
Revises: abe7378b8217
|
||||
Create Date: 2024-11-22 17:34:09.690295
|
||||
|
||||
"""
|
||||
from alembic import op
|
||||
import sqlalchemy as sa
|
||||
|
||||
|
||||
# revision identifiers, used by Alembic.
|
||||
revision = "a8c2065484e6"
|
||||
down_revision = "abe7378b8217"
|
||||
branch_labels = None
|
||||
depends_on = None
|
||||
|
||||
|
||||
def upgrade() -> None:
|
||||
op.add_column(
|
||||
"user",
|
||||
sa.Column("auto_scroll", sa.Boolean(), nullable=True, server_default=None),
|
||||
)
|
||||
|
||||
|
||||
def downgrade() -> None:
|
||||
op.drop_column("user", "auto_scroll")
|
||||
@@ -0,0 +1,30 @@
|
||||
"""add indexing trigger to cc_pair
|
||||
|
||||
Revision ID: abe7378b8217
|
||||
Revises: 6d562f86c78b
|
||||
Create Date: 2024-11-26 19:09:53.481171
|
||||
|
||||
"""
|
||||
from alembic import op
|
||||
import sqlalchemy as sa
|
||||
|
||||
# revision identifiers, used by Alembic.
|
||||
revision = "abe7378b8217"
|
||||
down_revision = "93560ba1b118"
|
||||
branch_labels = None
|
||||
depends_on = None
|
||||
|
||||
|
||||
def upgrade() -> None:
|
||||
op.add_column(
|
||||
"connector_credential_pair",
|
||||
sa.Column(
|
||||
"indexing_trigger",
|
||||
sa.Enum("UPDATE", "REINDEX", name="indexingmode", native_enum=False),
|
||||
nullable=True,
|
||||
),
|
||||
)
|
||||
|
||||
|
||||
def downgrade() -> None:
|
||||
op.drop_column("connector_credential_pair", "indexing_trigger")
|
||||
@@ -0,0 +1,27 @@
|
||||
"""add last_pruned to the connector_credential_pair table
|
||||
|
||||
Revision ID: ac5eaac849f9
|
||||
Revises: 52a219fb5233
|
||||
Create Date: 2024-09-10 15:04:26.437118
|
||||
|
||||
"""
|
||||
from alembic import op
|
||||
import sqlalchemy as sa
|
||||
|
||||
# revision identifiers, used by Alembic.
|
||||
revision = "ac5eaac849f9"
|
||||
down_revision = "46b7a812670f"
|
||||
branch_labels = None
|
||||
depends_on = None
|
||||
|
||||
|
||||
def upgrade() -> None:
|
||||
# last pruned represents the last time the connector was pruned
|
||||
op.add_column(
|
||||
"connector_credential_pair",
|
||||
sa.Column("last_pruned", sa.DateTime(timezone=True), nullable=True),
|
||||
)
|
||||
|
||||
|
||||
def downgrade() -> None:
|
||||
op.drop_column("connector_credential_pair", "last_pruned")
|
||||
@@ -31,6 +31,12 @@ def upgrade() -> None:
|
||||
|
||||
|
||||
def downgrade() -> None:
|
||||
# First, update any null values to a default value
|
||||
op.execute(
|
||||
"UPDATE connector_credential_pair SET last_attempt_status = 'NOT_STARTED' WHERE last_attempt_status IS NULL"
|
||||
)
|
||||
|
||||
# Then, make the column non-nullable
|
||||
op.alter_column(
|
||||
"connector_credential_pair",
|
||||
"last_attempt_status",
|
||||
|
||||
@@ -288,6 +288,15 @@ def upgrade() -> None:
|
||||
|
||||
|
||||
def downgrade() -> None:
|
||||
# NOTE: you will lose all chat history. This is to satisfy the non-nullable constraints
|
||||
# below
|
||||
op.execute("DELETE FROM chat_feedback")
|
||||
op.execute("DELETE FROM chat_message__search_doc")
|
||||
op.execute("DELETE FROM document_retrieval_feedback")
|
||||
op.execute("DELETE FROM document_retrieval_feedback")
|
||||
op.execute("DELETE FROM chat_message")
|
||||
op.execute("DELETE FROM chat_session")
|
||||
|
||||
op.drop_constraint(
|
||||
"chat_feedback__chat_message_fk", "chat_feedback", type_="foreignkey"
|
||||
)
|
||||
|
||||
@@ -0,0 +1,48 @@
|
||||
"""remove description from starter messages
|
||||
|
||||
Revision ID: b72ed7a5db0e
|
||||
Revises: 33cb72ea4d80
|
||||
Create Date: 2024-11-03 15:55:28.944408
|
||||
|
||||
"""
|
||||
from alembic import op
|
||||
import sqlalchemy as sa
|
||||
|
||||
|
||||
# revision identifiers, used by Alembic.
|
||||
revision = "b72ed7a5db0e"
|
||||
down_revision = "33cb72ea4d80"
|
||||
branch_labels = None
|
||||
depends_on = None
|
||||
|
||||
|
||||
def upgrade() -> None:
|
||||
op.execute(
|
||||
sa.text(
|
||||
"""
|
||||
UPDATE persona
|
||||
SET starter_messages = (
|
||||
SELECT jsonb_agg(elem - 'description')
|
||||
FROM jsonb_array_elements(starter_messages) elem
|
||||
)
|
||||
WHERE starter_messages IS NOT NULL
|
||||
AND jsonb_typeof(starter_messages) = 'array'
|
||||
"""
|
||||
)
|
||||
)
|
||||
|
||||
|
||||
def downgrade() -> None:
|
||||
op.execute(
|
||||
sa.text(
|
||||
"""
|
||||
UPDATE persona
|
||||
SET starter_messages = (
|
||||
SELECT jsonb_agg(elem || '{"description": ""}')
|
||||
FROM jsonb_array_elements(starter_messages) elem
|
||||
)
|
||||
WHERE starter_messages IS NOT NULL
|
||||
AND jsonb_typeof(starter_messages) = 'array'
|
||||
"""
|
||||
)
|
||||
)
|
||||
@@ -0,0 +1,29 @@
|
||||
"""add recent assistants
|
||||
|
||||
Revision ID: c0fd6e4da83a
|
||||
Revises: b72ed7a5db0e
|
||||
Create Date: 2024-11-03 17:28:54.916618
|
||||
|
||||
"""
|
||||
from alembic import op
|
||||
import sqlalchemy as sa
|
||||
from sqlalchemy.dialects import postgresql
|
||||
|
||||
# revision identifiers, used by Alembic.
|
||||
revision = "c0fd6e4da83a"
|
||||
down_revision = "b72ed7a5db0e"
|
||||
branch_labels = None
|
||||
depends_on = None
|
||||
|
||||
|
||||
def upgrade() -> None:
|
||||
op.add_column(
|
||||
"user",
|
||||
sa.Column(
|
||||
"recent_assistants", postgresql.JSONB(), server_default="[]", nullable=False
|
||||
),
|
||||
)
|
||||
|
||||
|
||||
def downgrade() -> None:
|
||||
op.drop_column("user", "recent_assistants")
|
||||
@@ -23,6 +23,56 @@ def upgrade() -> None:
|
||||
|
||||
|
||||
def downgrade() -> None:
|
||||
# Delete chat messages and feedback first since they reference chat sessions
|
||||
# Get chat messages from sessions with null persona_id
|
||||
chat_messages_query = """
|
||||
SELECT id
|
||||
FROM chat_message
|
||||
WHERE chat_session_id IN (
|
||||
SELECT id
|
||||
FROM chat_session
|
||||
WHERE persona_id IS NULL
|
||||
)
|
||||
"""
|
||||
|
||||
# Delete dependent records first
|
||||
op.execute(
|
||||
f"""
|
||||
DELETE FROM document_retrieval_feedback
|
||||
WHERE chat_message_id IN (
|
||||
{chat_messages_query}
|
||||
)
|
||||
"""
|
||||
)
|
||||
op.execute(
|
||||
f"""
|
||||
DELETE FROM chat_message__search_doc
|
||||
WHERE chat_message_id IN (
|
||||
{chat_messages_query}
|
||||
)
|
||||
"""
|
||||
)
|
||||
|
||||
# Delete chat messages
|
||||
op.execute(
|
||||
"""
|
||||
DELETE FROM chat_message
|
||||
WHERE chat_session_id IN (
|
||||
SELECT id
|
||||
FROM chat_session
|
||||
WHERE persona_id IS NULL
|
||||
)
|
||||
"""
|
||||
)
|
||||
|
||||
# Now we can safely delete the chat sessions
|
||||
op.execute(
|
||||
"""
|
||||
DELETE FROM chat_session
|
||||
WHERE persona_id IS NULL
|
||||
"""
|
||||
)
|
||||
|
||||
op.alter_column(
|
||||
"chat_session",
|
||||
"persona_id",
|
||||
|
||||
@@ -20,7 +20,7 @@ depends_on: None = None
|
||||
def upgrade() -> None:
|
||||
conn = op.get_bind()
|
||||
existing_ids_and_chosen_assistants = conn.execute(
|
||||
sa.text("select id, chosen_assistants from public.user")
|
||||
sa.text('select id, chosen_assistants from "user"')
|
||||
)
|
||||
op.drop_column(
|
||||
"user",
|
||||
@@ -37,7 +37,7 @@ def upgrade() -> None:
|
||||
for id, chosen_assistants in existing_ids_and_chosen_assistants:
|
||||
conn.execute(
|
||||
sa.text(
|
||||
"update public.user set chosen_assistants = :chosen_assistants where id = :id"
|
||||
'update "user" set chosen_assistants = :chosen_assistants where id = :id'
|
||||
),
|
||||
{"chosen_assistants": json.dumps(chosen_assistants), "id": id},
|
||||
)
|
||||
@@ -46,7 +46,7 @@ def upgrade() -> None:
|
||||
def downgrade() -> None:
|
||||
conn = op.get_bind()
|
||||
existing_ids_and_chosen_assistants = conn.execute(
|
||||
sa.text("select id, chosen_assistants from public.user")
|
||||
sa.text('select id, chosen_assistants from "user"')
|
||||
)
|
||||
op.drop_column(
|
||||
"user",
|
||||
@@ -59,7 +59,7 @@ def downgrade() -> None:
|
||||
for id, chosen_assistants in existing_ids_and_chosen_assistants:
|
||||
conn.execute(
|
||||
sa.text(
|
||||
"update public.user set chosen_assistants = :chosen_assistants where id = :id"
|
||||
'update "user" set chosen_assistants = :chosen_assistants where id = :id'
|
||||
),
|
||||
{"chosen_assistants": chosen_assistants, "id": id},
|
||||
)
|
||||
|
||||
@@ -0,0 +1,42 @@
|
||||
"""extended_role_for_non_web
|
||||
|
||||
Revision ID: dfbe9e93d3c7
|
||||
Revises: 9cf5c00f72fe
|
||||
Create Date: 2024-11-16 07:54:18.727906
|
||||
|
||||
"""
|
||||
from alembic import op
|
||||
import sqlalchemy as sa
|
||||
|
||||
# revision identifiers, used by Alembic.
|
||||
revision = "dfbe9e93d3c7"
|
||||
down_revision = "9cf5c00f72fe"
|
||||
branch_labels = None
|
||||
depends_on = None
|
||||
|
||||
|
||||
def upgrade() -> None:
|
||||
op.execute(
|
||||
"""
|
||||
UPDATE "user"
|
||||
SET role = 'EXT_PERM_USER'
|
||||
WHERE has_web_login = false
|
||||
"""
|
||||
)
|
||||
op.drop_column("user", "has_web_login")
|
||||
|
||||
|
||||
def downgrade() -> None:
|
||||
op.add_column(
|
||||
"user",
|
||||
sa.Column("has_web_login", sa.Boolean(), nullable=False, server_default="true"),
|
||||
)
|
||||
|
||||
op.execute(
|
||||
"""
|
||||
UPDATE "user"
|
||||
SET has_web_login = false,
|
||||
role = 'BASIC'
|
||||
WHERE role IN ('SLACK_USER', 'EXT_PERM_USER')
|
||||
"""
|
||||
)
|
||||
@@ -0,0 +1,26 @@
|
||||
"""add_deployment_name_to_llmprovider
|
||||
|
||||
Revision ID: e4334d5b33ba
|
||||
Revises: ac5eaac849f9
|
||||
Create Date: 2024-10-04 09:52:34.896867
|
||||
|
||||
"""
|
||||
from alembic import op
|
||||
import sqlalchemy as sa
|
||||
|
||||
|
||||
# revision identifiers, used by Alembic.
|
||||
revision = "e4334d5b33ba"
|
||||
down_revision = "ac5eaac849f9"
|
||||
branch_labels = None
|
||||
depends_on = None
|
||||
|
||||
|
||||
def upgrade() -> None:
|
||||
op.add_column(
|
||||
"llm_provider", sa.Column("deployment_name", sa.String(), nullable=True)
|
||||
)
|
||||
|
||||
|
||||
def downgrade() -> None:
|
||||
op.drop_column("llm_provider", "deployment_name")
|
||||
3
backend/alembic_tenants/README.md
Normal file
3
backend/alembic_tenants/README.md
Normal file
@@ -0,0 +1,3 @@
|
||||
These files are for public table migrations when operating with multi tenancy.
|
||||
|
||||
If you are not a Danswer developer, you can ignore this directory entirely.
|
||||
119
backend/alembic_tenants/env.py
Normal file
119
backend/alembic_tenants/env.py
Normal file
@@ -0,0 +1,119 @@
|
||||
import asyncio
|
||||
from logging.config import fileConfig
|
||||
from typing import Literal
|
||||
|
||||
from sqlalchemy import pool
|
||||
from sqlalchemy.engine import Connection
|
||||
from sqlalchemy.ext.asyncio import create_async_engine
|
||||
from sqlalchemy.schema import SchemaItem
|
||||
|
||||
from alembic import context
|
||||
from danswer.db.engine import build_connection_string
|
||||
from danswer.db.models import PublicBase
|
||||
|
||||
# this is the Alembic Config object, which provides
|
||||
# access to the values within the .ini file in use.
|
||||
config = context.config
|
||||
|
||||
# Interpret the config file for Python logging.
|
||||
# This line sets up loggers basically.
|
||||
if config.config_file_name is not None and config.attributes.get(
|
||||
"configure_logger", True
|
||||
):
|
||||
fileConfig(config.config_file_name)
|
||||
|
||||
# add your model's MetaData object here
|
||||
# for 'autogenerate' support
|
||||
# from myapp import mymodel
|
||||
# target_metadata = mymodel.Base.metadata
|
||||
target_metadata = [PublicBase.metadata]
|
||||
|
||||
# other values from the config, defined by the needs of env.py,
|
||||
# can be acquired:
|
||||
# my_important_option = config.get_main_option("my_important_option")
|
||||
# ... etc.
|
||||
|
||||
EXCLUDE_TABLES = {"kombu_queue", "kombu_message"}
|
||||
|
||||
|
||||
def include_object(
|
||||
object: SchemaItem,
|
||||
name: str | None,
|
||||
type_: Literal[
|
||||
"schema",
|
||||
"table",
|
||||
"column",
|
||||
"index",
|
||||
"unique_constraint",
|
||||
"foreign_key_constraint",
|
||||
],
|
||||
reflected: bool,
|
||||
compare_to: SchemaItem | None,
|
||||
) -> bool:
|
||||
if type_ == "table" and name in EXCLUDE_TABLES:
|
||||
return False
|
||||
return True
|
||||
|
||||
|
||||
def run_migrations_offline() -> None:
|
||||
"""Run migrations in 'offline' mode.
|
||||
|
||||
This configures the context with just a URL
|
||||
and not an Engine, though an Engine is acceptable
|
||||
here as well. By skipping the Engine creation
|
||||
we don't even need a DBAPI to be available.
|
||||
|
||||
Calls to context.execute() here emit the given string to the
|
||||
script output.
|
||||
|
||||
"""
|
||||
url = build_connection_string()
|
||||
context.configure(
|
||||
url=url,
|
||||
target_metadata=target_metadata, # type: ignore
|
||||
literal_binds=True,
|
||||
dialect_opts={"paramstyle": "named"},
|
||||
)
|
||||
|
||||
with context.begin_transaction():
|
||||
context.run_migrations()
|
||||
|
||||
|
||||
def do_run_migrations(connection: Connection) -> None:
|
||||
context.configure(
|
||||
connection=connection,
|
||||
target_metadata=target_metadata, # type: ignore
|
||||
include_object=include_object,
|
||||
) # type: ignore
|
||||
|
||||
with context.begin_transaction():
|
||||
context.run_migrations()
|
||||
|
||||
|
||||
async def run_async_migrations() -> None:
|
||||
"""In this scenario we need to create an Engine
|
||||
and associate a connection with the context.
|
||||
|
||||
"""
|
||||
|
||||
connectable = create_async_engine(
|
||||
build_connection_string(),
|
||||
poolclass=pool.NullPool,
|
||||
)
|
||||
|
||||
async with connectable.connect() as connection:
|
||||
await connection.run_sync(do_run_migrations)
|
||||
|
||||
await connectable.dispose()
|
||||
|
||||
|
||||
def run_migrations_online() -> None:
|
||||
"""Run migrations in 'online' mode."""
|
||||
|
||||
asyncio.run(run_async_migrations())
|
||||
|
||||
|
||||
if context.is_offline_mode():
|
||||
run_migrations_offline()
|
||||
else:
|
||||
run_migrations_online()
|
||||
24
backend/alembic_tenants/script.py.mako
Normal file
24
backend/alembic_tenants/script.py.mako
Normal file
@@ -0,0 +1,24 @@
|
||||
"""${message}
|
||||
|
||||
Revision ID: ${up_revision}
|
||||
Revises: ${down_revision | comma,n}
|
||||
Create Date: ${create_date}
|
||||
|
||||
"""
|
||||
from alembic import op
|
||||
import sqlalchemy as sa
|
||||
${imports if imports else ""}
|
||||
|
||||
# revision identifiers, used by Alembic.
|
||||
revision = ${repr(up_revision)}
|
||||
down_revision = ${repr(down_revision)}
|
||||
branch_labels = ${repr(branch_labels)}
|
||||
depends_on = ${repr(depends_on)}
|
||||
|
||||
|
||||
def upgrade() -> None:
|
||||
${upgrades if upgrades else "pass"}
|
||||
|
||||
|
||||
def downgrade() -> None:
|
||||
${downgrades if downgrades else "pass"}
|
||||
@@ -0,0 +1,24 @@
|
||||
import sqlalchemy as sa
|
||||
|
||||
from alembic import op
|
||||
|
||||
# revision identifiers, used by Alembic.
|
||||
revision = "14a83a331951"
|
||||
down_revision = None
|
||||
branch_labels = None
|
||||
depends_on = None
|
||||
|
||||
|
||||
def upgrade() -> None:
|
||||
op.create_table(
|
||||
"user_tenant_mapping",
|
||||
sa.Column("email", sa.String(), nullable=False),
|
||||
sa.Column("tenant_id", sa.String(), nullable=False),
|
||||
sa.UniqueConstraint("email", "tenant_id", name="uq_user_tenant"),
|
||||
sa.UniqueConstraint("email", name="uq_email"),
|
||||
schema="public",
|
||||
)
|
||||
|
||||
|
||||
def downgrade() -> None:
|
||||
op.drop_table("user_tenant_mapping", schema="public")
|
||||
@@ -1,3 +1,3 @@
|
||||
import os
|
||||
|
||||
__version__ = os.environ.get("DANSWER_VERSION", "") or "0.3-dev"
|
||||
__version__ = os.environ.get("DANSWER_VERSION", "") or "Development"
|
||||
|
||||
@@ -16,6 +16,46 @@ class ExternalAccess:
|
||||
is_public: bool
|
||||
|
||||
|
||||
@dataclass(frozen=True)
|
||||
class DocExternalAccess:
|
||||
"""
|
||||
This is just a class to wrap the external access and the document ID
|
||||
together. It's used for syncing document permissions to Redis.
|
||||
"""
|
||||
|
||||
external_access: ExternalAccess
|
||||
# The document ID
|
||||
doc_id: str
|
||||
|
||||
def to_dict(self) -> dict:
|
||||
return {
|
||||
"external_access": {
|
||||
"external_user_emails": list(self.external_access.external_user_emails),
|
||||
"external_user_group_ids": list(
|
||||
self.external_access.external_user_group_ids
|
||||
),
|
||||
"is_public": self.external_access.is_public,
|
||||
},
|
||||
"doc_id": self.doc_id,
|
||||
}
|
||||
|
||||
@classmethod
|
||||
def from_dict(cls, data: dict) -> "DocExternalAccess":
|
||||
external_access = ExternalAccess(
|
||||
external_user_emails=set(
|
||||
data["external_access"].get("external_user_emails", [])
|
||||
),
|
||||
external_user_group_ids=set(
|
||||
data["external_access"].get("external_user_group_ids", [])
|
||||
),
|
||||
is_public=data["external_access"]["is_public"],
|
||||
)
|
||||
return cls(
|
||||
external_access=external_access,
|
||||
doc_id=data["doc_id"],
|
||||
)
|
||||
|
||||
|
||||
@dataclass(frozen=True)
|
||||
class DocumentAccess(ExternalAccess):
|
||||
# User emails for Danswer users, None indicates admin
|
||||
@@ -70,3 +110,12 @@ class DocumentAccess(ExternalAccess):
|
||||
user_groups=set(user_groups),
|
||||
is_public=is_public,
|
||||
)
|
||||
|
||||
|
||||
default_public_access = DocumentAccess(
|
||||
external_user_emails=set(),
|
||||
external_user_group_ids=set(),
|
||||
user_emails=set(),
|
||||
user_groups=set(),
|
||||
is_public=True,
|
||||
)
|
||||
|
||||
42
backend/danswer/agent_search/core_qa_graph/edges.py
Normal file
42
backend/danswer/agent_search/core_qa_graph/edges.py
Normal file
@@ -0,0 +1,42 @@
|
||||
from collections.abc import Hashable
|
||||
from typing import Union
|
||||
|
||||
from langgraph.types import Send
|
||||
|
||||
from danswer.agent_search.core_qa_graph.states import BaseQAState
|
||||
from danswer.agent_search.primary_graph.states import RetrieverState
|
||||
from danswer.agent_search.primary_graph.states import VerifierState
|
||||
|
||||
|
||||
def sub_continue_to_verifier(state: BaseQAState) -> Union[Hashable, list[Hashable]]:
|
||||
# Routes each de-douped retrieved doc to the verifier step - in parallel
|
||||
# Notice the 'Send()' API that takes care of the parallelization
|
||||
|
||||
return [
|
||||
Send(
|
||||
"sub_verifier",
|
||||
VerifierState(
|
||||
document=doc,
|
||||
#question=state["original_question"],
|
||||
question=state["sub_question_str"],
|
||||
graph_start_time=state["graph_start_time"],
|
||||
),
|
||||
)
|
||||
for doc in state["sub_question_deduped_retrieval_docs"]
|
||||
]
|
||||
|
||||
|
||||
def sub_continue_to_retrieval(state: BaseQAState) -> Union[Hashable, list[Hashable]]:
|
||||
# Routes re-written queries to the (parallel) retrieval steps
|
||||
# Notice the 'Send()' API that takes care of the parallelization
|
||||
rewritten_queries = state["sub_question_search_queries"].rewritten_queries + [state["sub_question_str"]]
|
||||
return [
|
||||
Send(
|
||||
"sub_custom_retrieve",
|
||||
RetrieverState(
|
||||
rewritten_query=query,
|
||||
graph_start_time=state["graph_start_time"],
|
||||
),
|
||||
)
|
||||
for query in rewritten_queries
|
||||
]
|
||||
132
backend/danswer/agent_search/core_qa_graph/graph_builder.py
Normal file
132
backend/danswer/agent_search/core_qa_graph/graph_builder.py
Normal file
@@ -0,0 +1,132 @@
|
||||
from langgraph.graph import END
|
||||
from langgraph.graph import START
|
||||
from langgraph.graph import StateGraph
|
||||
|
||||
from danswer.agent_search.core_qa_graph.edges import sub_continue_to_retrieval
|
||||
from danswer.agent_search.core_qa_graph.edges import sub_continue_to_verifier
|
||||
from danswer.agent_search.core_qa_graph.nodes.combine_retrieved_docs import (
|
||||
sub_combine_retrieved_docs,
|
||||
)
|
||||
from danswer.agent_search.core_qa_graph.nodes.custom_retrieve import (
|
||||
sub_custom_retrieve,
|
||||
)
|
||||
from danswer.agent_search.core_qa_graph.nodes.dummy import sub_dummy
|
||||
from danswer.agent_search.core_qa_graph.nodes.final_format import (
|
||||
sub_final_format,
|
||||
)
|
||||
from danswer.agent_search.core_qa_graph.nodes.generate import sub_generate
|
||||
from danswer.agent_search.core_qa_graph.nodes.qa_check import sub_qa_check
|
||||
from danswer.agent_search.core_qa_graph.nodes.rewrite import sub_rewrite
|
||||
from danswer.agent_search.core_qa_graph.nodes.verifier import sub_verifier
|
||||
from danswer.agent_search.core_qa_graph.states import BaseQAOutputState
|
||||
from danswer.agent_search.core_qa_graph.states import BaseQAState
|
||||
from danswer.agent_search.core_qa_graph.states import CoreQAInputState
|
||||
|
||||
|
||||
def build_core_qa_graph() -> StateGraph:
|
||||
sub_answers_initial = StateGraph(
|
||||
state_schema=BaseQAState,
|
||||
output=BaseQAOutputState,
|
||||
)
|
||||
|
||||
### Add nodes ###
|
||||
sub_answers_initial.add_node(node="sub_dummy", action=sub_dummy)
|
||||
sub_answers_initial.add_node(node="sub_rewrite", action=sub_rewrite)
|
||||
sub_answers_initial.add_node(
|
||||
node="sub_custom_retrieve",
|
||||
action=sub_custom_retrieve,
|
||||
)
|
||||
sub_answers_initial.add_node(
|
||||
node="sub_combine_retrieved_docs",
|
||||
action=sub_combine_retrieved_docs,
|
||||
)
|
||||
sub_answers_initial.add_node(
|
||||
node="sub_verifier",
|
||||
action=sub_verifier,
|
||||
)
|
||||
sub_answers_initial.add_node(
|
||||
node="sub_generate",
|
||||
action=sub_generate,
|
||||
)
|
||||
sub_answers_initial.add_node(
|
||||
node="sub_qa_check",
|
||||
action=sub_qa_check,
|
||||
)
|
||||
sub_answers_initial.add_node(
|
||||
node="sub_final_format",
|
||||
action=sub_final_format,
|
||||
)
|
||||
|
||||
### Add edges ###
|
||||
sub_answers_initial.add_edge(START, "sub_dummy")
|
||||
sub_answers_initial.add_edge("sub_dummy", "sub_rewrite")
|
||||
|
||||
sub_answers_initial.add_conditional_edges(
|
||||
source="sub_rewrite",
|
||||
path=sub_continue_to_retrieval,
|
||||
)
|
||||
|
||||
sub_answers_initial.add_edge(
|
||||
start_key="sub_custom_retrieve",
|
||||
end_key="sub_combine_retrieved_docs",
|
||||
)
|
||||
|
||||
sub_answers_initial.add_conditional_edges(
|
||||
source="sub_combine_retrieved_docs",
|
||||
path=sub_continue_to_verifier,
|
||||
path_map=["sub_verifier"],
|
||||
)
|
||||
|
||||
sub_answers_initial.add_edge(
|
||||
start_key="sub_verifier",
|
||||
end_key="sub_generate",
|
||||
)
|
||||
|
||||
sub_answers_initial.add_edge(
|
||||
start_key="sub_generate",
|
||||
end_key="sub_qa_check",
|
||||
)
|
||||
|
||||
sub_answers_initial.add_edge(
|
||||
start_key="sub_qa_check",
|
||||
end_key="sub_final_format",
|
||||
)
|
||||
|
||||
sub_answers_initial.add_edge(
|
||||
start_key="sub_final_format",
|
||||
end_key=END,
|
||||
)
|
||||
# sub_answers_graph = sub_answers_initial.compile()
|
||||
return sub_answers_initial
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
# q = "Whose music is kind of hard to easily enjoy?"
|
||||
# q = "What is voice leading?"
|
||||
# q = "What are the types of motions in music?"
|
||||
# q = "What are key elements of music theory?"
|
||||
# q = "How can I best understand music theory using voice leading?"
|
||||
q = "What makes good music?"
|
||||
# q = "types of motions in music"
|
||||
# q = "What is the relationship between music and physics?"
|
||||
# q = "Can you compare various grunge styles?"
|
||||
# q = "Why is quantum gravity so hard?"
|
||||
|
||||
inputs = CoreQAInputState(
|
||||
original_question=q,
|
||||
sub_question_str=q,
|
||||
)
|
||||
sub_answers_graph = build_core_qa_graph()
|
||||
compiled_sub_answers = sub_answers_graph.compile()
|
||||
output = compiled_sub_answers.invoke(inputs)
|
||||
print("\nOUTPUT:")
|
||||
print(output.keys())
|
||||
for key, value in output.items():
|
||||
if key in [
|
||||
"sub_question_answer",
|
||||
"sub_question_str",
|
||||
"sub_qas",
|
||||
"initial_sub_qas",
|
||||
"sub_question_answer",
|
||||
]:
|
||||
print(f"{key}: {value}")
|
||||
@@ -0,0 +1,36 @@
|
||||
from datetime import datetime
|
||||
from typing import Any
|
||||
|
||||
from danswer.agent_search.core_qa_graph.states import BaseQAState
|
||||
from danswer.agent_search.shared_graph_utils.utils import generate_log_message
|
||||
from danswer.context.search.models import InferenceSection
|
||||
|
||||
|
||||
def sub_combine_retrieved_docs(state: BaseQAState) -> dict[str, Any]:
|
||||
"""
|
||||
Dedupe the retrieved docs.
|
||||
"""
|
||||
node_start_time = datetime.now()
|
||||
|
||||
sub_question_base_retrieval_docs = state["sub_question_base_retrieval_docs"]
|
||||
|
||||
print(f"Number of docs from steps: {len(sub_question_base_retrieval_docs)}")
|
||||
dedupe_docs: list[InferenceSection] = []
|
||||
for base_retrieval_doc in sub_question_base_retrieval_docs:
|
||||
if not any(
|
||||
base_retrieval_doc.center_chunk.chunk_id == doc.center_chunk.chunk_id
|
||||
for doc in dedupe_docs
|
||||
):
|
||||
dedupe_docs.append(base_retrieval_doc)
|
||||
|
||||
print(f"Number of deduped docs: {len(dedupe_docs)}")
|
||||
|
||||
|
||||
return {
|
||||
"sub_question_deduped_retrieval_docs": dedupe_docs,
|
||||
"log_messages": generate_log_message(
|
||||
message="sub - combine_retrieved_docs (dedupe)",
|
||||
node_start_time=node_start_time,
|
||||
graph_start_time=state["graph_start_time"],
|
||||
),
|
||||
}
|
||||
@@ -0,0 +1,66 @@
|
||||
import datetime
|
||||
from typing import Any
|
||||
|
||||
from danswer.agent_search.primary_graph.states import RetrieverState
|
||||
from danswer.agent_search.shared_graph_utils.utils import generate_log_message
|
||||
from danswer.context.search.models import InferenceSection
|
||||
from danswer.context.search.models import SearchRequest
|
||||
from danswer.context.search.pipeline import SearchPipeline
|
||||
from danswer.db.engine import get_session_context_manager
|
||||
from danswer.llm.factory import get_default_llms
|
||||
|
||||
|
||||
def sub_custom_retrieve(state: RetrieverState) -> dict[str, Any]:
|
||||
"""
|
||||
Retrieve documents
|
||||
|
||||
Args:
|
||||
state (dict): The current graph state
|
||||
|
||||
Returns:
|
||||
state (dict): New key added to state, documents, that contains retrieved documents
|
||||
"""
|
||||
print("---RETRIEVE SUB---")
|
||||
|
||||
node_start_time = datetime.datetime.now()
|
||||
|
||||
rewritten_query = state["rewritten_query"]
|
||||
|
||||
# Retrieval
|
||||
# TODO: add the actual retrieval, probably from search_tool.run()
|
||||
documents: list[InferenceSection] = []
|
||||
llm, fast_llm = get_default_llms()
|
||||
with get_session_context_manager() as db_session:
|
||||
documents = SearchPipeline(
|
||||
search_request=SearchRequest(
|
||||
query=rewritten_query,
|
||||
),
|
||||
user=None,
|
||||
llm=llm,
|
||||
fast_llm=fast_llm,
|
||||
db_session=db_session,
|
||||
)
|
||||
|
||||
reranked_docs = documents.reranked_sections
|
||||
|
||||
# initial metric to measure fit TODO: implement metric properly
|
||||
|
||||
top_1_score = reranked_docs[0].center_chunk.score
|
||||
top_5_score = sum([doc.center_chunk.score for doc in reranked_docs[:5]]) / 5
|
||||
top_10_score = sum([doc.center_chunk.score for doc in reranked_docs[:10]]) / 10
|
||||
|
||||
fit_score = 1/3 * (top_1_score + top_5_score + top_10_score)
|
||||
|
||||
chunk_ids = {'query': rewritten_query,
|
||||
'chunk_ids': [doc.center_chunk.chunk_id for doc in reranked_docs]}
|
||||
|
||||
|
||||
return {
|
||||
"sub_question_base_retrieval_docs": reranked_docs,
|
||||
"sub_chunk_ids": [chunk_ids],
|
||||
"log_messages": generate_log_message(
|
||||
message=f"sub - custom_retrieve, fit_score: {fit_score}",
|
||||
node_start_time=node_start_time,
|
||||
graph_start_time=state["graph_start_time"],
|
||||
),
|
||||
}
|
||||
24
backend/danswer/agent_search/core_qa_graph/nodes/dummy.py
Normal file
24
backend/danswer/agent_search/core_qa_graph/nodes/dummy.py
Normal file
@@ -0,0 +1,24 @@
|
||||
import datetime
|
||||
from typing import Any
|
||||
|
||||
from danswer.agent_search.core_qa_graph.states import BaseQAState
|
||||
from danswer.agent_search.shared_graph_utils.utils import generate_log_message
|
||||
|
||||
|
||||
def sub_dummy(state: BaseQAState) -> dict[str, Any]:
|
||||
"""
|
||||
Dummy step
|
||||
"""
|
||||
|
||||
print("---Sub Dummy---")
|
||||
|
||||
node_start_time = datetime.datetime.now()
|
||||
|
||||
return {
|
||||
"graph_start_time": node_start_time,
|
||||
"log_messages": generate_log_message(
|
||||
message="sub - dummy",
|
||||
node_start_time=node_start_time,
|
||||
graph_start_time=node_start_time,
|
||||
),
|
||||
}
|
||||
@@ -0,0 +1,22 @@
|
||||
from typing import Any
|
||||
|
||||
from danswer.agent_search.core_qa_graph.states import BaseQAState
|
||||
|
||||
|
||||
def sub_final_format(state: BaseQAState) -> dict[str, Any]:
|
||||
"""
|
||||
Create the final output for the QA subgraph
|
||||
"""
|
||||
|
||||
print("---BASE FINAL FORMAT---")
|
||||
|
||||
return {
|
||||
"sub_qas": [
|
||||
{
|
||||
"sub_question": state["sub_question_str"],
|
||||
"sub_answer": state["sub_question_answer"],
|
||||
"sub_answer_check": state["sub_question_answer_check"],
|
||||
}
|
||||
],
|
||||
"log_messages": state["log_messages"],
|
||||
}
|
||||
91
backend/danswer/agent_search/core_qa_graph/nodes/generate.py
Normal file
91
backend/danswer/agent_search/core_qa_graph/nodes/generate.py
Normal file
@@ -0,0 +1,91 @@
|
||||
from datetime import datetime
|
||||
from typing import Any
|
||||
|
||||
from langchain_core.messages import HumanMessage
|
||||
from langchain_core.messages import merge_message_runs
|
||||
|
||||
from danswer.agent_search.core_qa_graph.states import BaseQAState
|
||||
from danswer.agent_search.shared_graph_utils.prompts import BASE_RAG_PROMPT
|
||||
from danswer.agent_search.shared_graph_utils.utils import format_docs
|
||||
from danswer.agent_search.shared_graph_utils.utils import generate_log_message
|
||||
from danswer.llm.factory import get_default_llms
|
||||
|
||||
|
||||
def sub_generate(state: BaseQAState) -> dict[str, Any]:
|
||||
"""
|
||||
Generate answer
|
||||
|
||||
Args:
|
||||
state (messages): The current state
|
||||
|
||||
Returns:
|
||||
dict: The updated state with re-phrased question
|
||||
"""
|
||||
print("---GENERATE---")
|
||||
|
||||
# Create sub-query results
|
||||
|
||||
verified_chunks = [chunk.center_chunk.chunk_id for chunk in state["sub_question_verified_retrieval_docs"]]
|
||||
result_dict = {}
|
||||
|
||||
chunk_id_dicts = state["sub_chunk_ids"]
|
||||
expanded_chunks = []
|
||||
original_chunks = []
|
||||
|
||||
for chunk_id_dict in chunk_id_dicts:
|
||||
sub_question = chunk_id_dict['query']
|
||||
verified_sq_chunks = [chunk_id for chunk_id in chunk_id_dict['chunk_ids'] if chunk_id in verified_chunks]
|
||||
|
||||
if sub_question != state["original_question"]:
|
||||
expanded_chunks += verified_sq_chunks
|
||||
else:
|
||||
result_dict['ORIGINAL'] = len(verified_sq_chunks)
|
||||
original_chunks += verified_sq_chunks
|
||||
result_dict[sub_question[:30]] = len(verified_sq_chunks)
|
||||
|
||||
expansion_chunks = set(expanded_chunks)
|
||||
num_expansion_chunks = sum([1 for chunk_id in expansion_chunks if chunk_id in verified_chunks])
|
||||
num_original_relevant_chunks = len(original_chunks)
|
||||
num_missed_relevant_chunks = sum([1 for chunk_id in original_chunks if chunk_id not in expansion_chunks])
|
||||
num_gained_relevant_chunks = sum([1 for chunk_id in expansion_chunks if chunk_id not in original_chunks])
|
||||
result_dict['expansion_chunks'] = num_expansion_chunks
|
||||
|
||||
|
||||
|
||||
print(result_dict)
|
||||
|
||||
node_start_time = datetime.now()
|
||||
|
||||
question = state["sub_question_str"]
|
||||
docs = state["sub_question_verified_retrieval_docs"]
|
||||
|
||||
print(f"Number of verified retrieval docs: {len(docs)}")
|
||||
|
||||
# Only take the top 10 docs.
|
||||
# TODO: Make this dynamic or use config param?
|
||||
top_10_docs = docs[-10:]
|
||||
|
||||
msg = [
|
||||
HumanMessage(
|
||||
content=BASE_RAG_PROMPT.format(question=question, context=format_docs(top_10_docs))
|
||||
)
|
||||
]
|
||||
|
||||
# Grader
|
||||
_, fast_llm = get_default_llms()
|
||||
response = list(
|
||||
fast_llm.stream(
|
||||
prompt=msg,
|
||||
# structured_response_format=None,
|
||||
)
|
||||
)
|
||||
|
||||
answer_str = merge_message_runs(response, chunk_separator="")[0].content
|
||||
return {
|
||||
"sub_question_answer": answer_str,
|
||||
"log_messages": generate_log_message(
|
||||
message="base - generate",
|
||||
node_start_time=node_start_time,
|
||||
graph_start_time=state["graph_start_time"],
|
||||
),
|
||||
}
|
||||
51
backend/danswer/agent_search/core_qa_graph/nodes/qa_check.py
Normal file
51
backend/danswer/agent_search/core_qa_graph/nodes/qa_check.py
Normal file
@@ -0,0 +1,51 @@
|
||||
import datetime
|
||||
from typing import Any
|
||||
|
||||
from langchain_core.messages import HumanMessage
|
||||
from langchain_core.messages import merge_message_runs
|
||||
|
||||
from danswer.agent_search.core_qa_graph.states import BaseQAState
|
||||
from danswer.agent_search.shared_graph_utils.prompts import BASE_CHECK_PROMPT
|
||||
from danswer.agent_search.shared_graph_utils.utils import generate_log_message
|
||||
from danswer.llm.factory import get_default_llms
|
||||
|
||||
|
||||
def sub_qa_check(state: BaseQAState) -> dict[str, Any]:
|
||||
"""
|
||||
Check if the sub-question answer is satisfactory.
|
||||
|
||||
Args:
|
||||
state: The current SubQAState containing the sub-question and its answer
|
||||
|
||||
Returns:
|
||||
dict containing the check result and log message
|
||||
"""
|
||||
node_start_time = datetime.datetime.now()
|
||||
|
||||
msg = [
|
||||
HumanMessage(
|
||||
content=BASE_CHECK_PROMPT.format(
|
||||
question=state["sub_question_str"],
|
||||
base_answer=state["sub_question_answer"],
|
||||
)
|
||||
)
|
||||
]
|
||||
|
||||
_, fast_llm = get_default_llms()
|
||||
response = list(
|
||||
fast_llm.stream(
|
||||
prompt=msg,
|
||||
# structured_response_format=None,
|
||||
)
|
||||
)
|
||||
|
||||
response_str = merge_message_runs(response, chunk_separator="")[0].content
|
||||
|
||||
return {
|
||||
"sub_question_answer_check": response_str,
|
||||
"base_answer_messages": generate_log_message(
|
||||
message="sub - qa_check",
|
||||
node_start_time=node_start_time,
|
||||
graph_start_time=state["graph_start_time"],
|
||||
),
|
||||
}
|
||||
74
backend/danswer/agent_search/core_qa_graph/nodes/rewrite.py
Normal file
74
backend/danswer/agent_search/core_qa_graph/nodes/rewrite.py
Normal file
@@ -0,0 +1,74 @@
|
||||
import datetime
|
||||
from typing import Any
|
||||
|
||||
from langchain_core.messages import HumanMessage
|
||||
from langchain_core.messages import merge_message_runs
|
||||
|
||||
from danswer.agent_search.core_qa_graph.states import BaseQAState
|
||||
from danswer.agent_search.shared_graph_utils.models import RewrittenQueries
|
||||
from danswer.agent_search.shared_graph_utils.prompts import (
|
||||
REWRITE_PROMPT_MULTI_ORIGINAL,
|
||||
)
|
||||
from danswer.agent_search.shared_graph_utils.utils import generate_log_message
|
||||
from danswer.llm.factory import get_default_llms
|
||||
|
||||
|
||||
def sub_rewrite(state: BaseQAState) -> dict[str, Any]:
|
||||
"""
|
||||
Transform the initial question into more suitable search queries.
|
||||
|
||||
Args:
|
||||
state (messages): The current state
|
||||
|
||||
Returns:
|
||||
dict: The updated state with re-phrased question
|
||||
"""
|
||||
|
||||
print("---SUB TRANSFORM QUERY---")
|
||||
|
||||
node_start_time = datetime.datetime.now()
|
||||
|
||||
# messages = state["base_answer_messages"]
|
||||
question = state["sub_question_str"]
|
||||
|
||||
msg = [
|
||||
HumanMessage(
|
||||
content=REWRITE_PROMPT_MULTI_ORIGINAL.format(question=question),
|
||||
)
|
||||
]
|
||||
|
||||
"""
|
||||
msg = [
|
||||
HumanMessage(
|
||||
content=REWRITE_PROMPT_MULTI.format(question=question),
|
||||
)
|
||||
]
|
||||
"""
|
||||
|
||||
_, fast_llm = get_default_llms()
|
||||
llm_response_list = list(
|
||||
fast_llm.stream(
|
||||
prompt=msg,
|
||||
# structured_response_format={"type": "json_object", "schema": RewrittenQueries.model_json_schema()},
|
||||
# structured_response_format=RewrittenQueries.model_json_schema(),
|
||||
)
|
||||
)
|
||||
llm_response = merge_message_runs(llm_response_list, chunk_separator="")[0].content
|
||||
|
||||
print(f"llm_response: {llm_response}")
|
||||
|
||||
rewritten_queries = llm_response.split("--")
|
||||
# rewritten_queries = [llm_response.split("\n")[0]]
|
||||
|
||||
print(f"rewritten_queries: {rewritten_queries}")
|
||||
|
||||
rewritten_queries = RewrittenQueries(rewritten_queries=rewritten_queries)
|
||||
|
||||
return {
|
||||
"sub_question_search_queries": rewritten_queries,
|
||||
"log_messages": generate_log_message(
|
||||
message="sub - rewrite",
|
||||
node_start_time=node_start_time,
|
||||
graph_start_time=state["graph_start_time"],
|
||||
),
|
||||
}
|
||||
64
backend/danswer/agent_search/core_qa_graph/nodes/verifier.py
Normal file
64
backend/danswer/agent_search/core_qa_graph/nodes/verifier.py
Normal file
@@ -0,0 +1,64 @@
|
||||
import datetime
|
||||
from typing import Any
|
||||
|
||||
from langchain_core.messages import HumanMessage
|
||||
from langchain_core.messages import merge_message_runs
|
||||
|
||||
from danswer.agent_search.primary_graph.states import VerifierState
|
||||
from danswer.agent_search.shared_graph_utils.models import BinaryDecision
|
||||
from danswer.agent_search.shared_graph_utils.prompts import VERIFIER_PROMPT
|
||||
from danswer.agent_search.shared_graph_utils.utils import generate_log_message
|
||||
from danswer.llm.factory import get_default_llms
|
||||
|
||||
|
||||
def sub_verifier(state: VerifierState) -> dict[str, Any]:
|
||||
"""
|
||||
Check whether the document is relevant for the original user question
|
||||
|
||||
Args:
|
||||
state (VerifierState): The current state
|
||||
|
||||
Returns:
|
||||
dict: ict: The updated state with the final decision
|
||||
"""
|
||||
|
||||
# print("---VERIFY QUTPUT---")
|
||||
node_start_time = datetime.datetime.now()
|
||||
|
||||
question = state["question"]
|
||||
document_content = state["document"].combined_content
|
||||
|
||||
msg = [
|
||||
HumanMessage(
|
||||
content=VERIFIER_PROMPT.format(
|
||||
question=question, document_content=document_content
|
||||
)
|
||||
)
|
||||
]
|
||||
|
||||
# Grader
|
||||
llm, fast_llm = get_default_llms()
|
||||
response = list(
|
||||
llm.stream(
|
||||
prompt=msg,
|
||||
# structured_response_format=BinaryDecision.model_json_schema(),
|
||||
)
|
||||
)
|
||||
|
||||
response_string = merge_message_runs(response, chunk_separator="")[0].content
|
||||
# Convert string response to proper dictionary format
|
||||
decision_dict = {"decision": response_string.lower()}
|
||||
formatted_response = BinaryDecision.model_validate(decision_dict)
|
||||
|
||||
print(f"Verification end time: {datetime.datetime.now()}")
|
||||
|
||||
return {
|
||||
"sub_question_verified_retrieval_docs": [state["document"]]
|
||||
if formatted_response.decision == "yes"
|
||||
else [],
|
||||
"log_messages": generate_log_message(
|
||||
message=f"sub - verifier: {formatted_response.decision}",
|
||||
node_start_time=node_start_time,
|
||||
graph_start_time=state["graph_start_time"],
|
||||
),
|
||||
}
|
||||
90
backend/danswer/agent_search/core_qa_graph/states.py
Normal file
90
backend/danswer/agent_search/core_qa_graph/states.py
Normal file
@@ -0,0 +1,90 @@
|
||||
import operator
|
||||
from collections.abc import Sequence
|
||||
from datetime import datetime
|
||||
from typing import Annotated
|
||||
from typing import TypedDict
|
||||
|
||||
from langchain_core.messages import BaseMessage
|
||||
from langgraph.graph.message import add_messages
|
||||
|
||||
from danswer.agent_search.shared_graph_utils.models import RewrittenQueries
|
||||
from danswer.context.search.models import InferenceSection
|
||||
from danswer.llm.interfaces import LLM
|
||||
|
||||
|
||||
class SubQuestionRetrieverState(TypedDict):
|
||||
# The state for the parallel Retrievers. They each need to see only one query
|
||||
sub_question_rewritten_query: str
|
||||
|
||||
|
||||
class SubQuestionVerifierState(TypedDict):
|
||||
# The state for the parallel verification step. Each node execution need to see only one question/doc pair
|
||||
sub_question_document: InferenceSection
|
||||
sub_question: str
|
||||
|
||||
|
||||
class CoreQAInputState(TypedDict):
|
||||
sub_question_str: str
|
||||
original_question: str
|
||||
|
||||
|
||||
class BaseQAState(TypedDict):
|
||||
# The 'core SubQuestion' state.
|
||||
original_question: str
|
||||
graph_start_time: datetime
|
||||
# start time for parallel initial sub-questionn thread
|
||||
sub_query_start_time: datetime
|
||||
sub_question_rewritten_queries: list[str]
|
||||
sub_question_str: str
|
||||
sub_question_search_queries: RewrittenQueries
|
||||
sub_question_nr: int
|
||||
sub_chunk_ids: Annotated[Sequence[dict], operator.add]
|
||||
sub_question_base_retrieval_docs: Annotated[
|
||||
Sequence[InferenceSection], operator.add
|
||||
]
|
||||
sub_question_deduped_retrieval_docs: Annotated[
|
||||
Sequence[InferenceSection], operator.add
|
||||
]
|
||||
sub_question_verified_retrieval_docs: Annotated[
|
||||
Sequence[InferenceSection], operator.add
|
||||
]
|
||||
sub_question_reranked_retrieval_docs: Annotated[
|
||||
Sequence[InferenceSection], operator.add
|
||||
]
|
||||
sub_question_top_chunks: Annotated[Sequence[dict], operator.add]
|
||||
sub_question_answer: str
|
||||
sub_question_answer_check: str
|
||||
log_messages: Annotated[Sequence[BaseMessage], add_messages]
|
||||
sub_qas: Annotated[Sequence[dict], operator.add]
|
||||
# Answers sent back to core
|
||||
initial_sub_qas: Annotated[Sequence[dict], operator.add]
|
||||
primary_llm: LLM
|
||||
fast_llm: LLM
|
||||
|
||||
|
||||
class BaseQAOutputState(TypedDict):
|
||||
# The 'SubQuestion' output state. Removes all the intermediate states
|
||||
sub_question_rewritten_queries: list[str]
|
||||
sub_question_str: str
|
||||
sub_question_search_queries: list[str]
|
||||
sub_question_nr: int
|
||||
# Answers sent back to core
|
||||
sub_qas: Annotated[Sequence[dict], operator.add]
|
||||
# Answers sent back to core
|
||||
initial_sub_qas: Annotated[Sequence[dict], operator.add]
|
||||
sub_question_base_retrieval_docs: Annotated[
|
||||
Sequence[InferenceSection], operator.add
|
||||
]
|
||||
sub_question_deduped_retrieval_docs: Annotated[
|
||||
Sequence[InferenceSection], operator.add
|
||||
]
|
||||
sub_question_verified_retrieval_docs: Annotated[
|
||||
Sequence[InferenceSection], operator.add
|
||||
]
|
||||
sub_question_reranked_retrieval_docs: Annotated[
|
||||
Sequence[InferenceSection], operator.add
|
||||
]
|
||||
sub_question_top_chunks: Annotated[Sequence[dict], operator.add]
|
||||
sub_question_answer: str
|
||||
sub_question_answer_check: str
|
||||
log_messages: Annotated[Sequence[BaseMessage], add_messages]
|
||||
46
backend/danswer/agent_search/deep_qa_graph/edges.py
Normal file
46
backend/danswer/agent_search/deep_qa_graph/edges.py
Normal file
@@ -0,0 +1,46 @@
|
||||
from collections.abc import Hashable
|
||||
from typing import Union
|
||||
|
||||
from langgraph.types import Send
|
||||
|
||||
from danswer.agent_search.deep_qa_graph.states import ResearchQAState
|
||||
from danswer.agent_search.primary_graph.states import RetrieverState
|
||||
from danswer.agent_search.primary_graph.states import VerifierState
|
||||
|
||||
|
||||
def sub_continue_to_verifier(state: ResearchQAState) -> Union[Hashable, list[Hashable]]:
|
||||
# Routes each de-douped retrieved doc to the verifier step - in parallel
|
||||
# Notice the 'Send()' API that takes care of the parallelization
|
||||
|
||||
return [
|
||||
Send(
|
||||
"sub_verifier",
|
||||
VerifierState(
|
||||
document=doc,
|
||||
question=state["sub_question"],
|
||||
primary_llm=state["primary_llm"],
|
||||
fast_llm=state["fast_llm"],
|
||||
graph_start_time=state["graph_start_time"],
|
||||
),
|
||||
)
|
||||
for doc in state["sub_question_base_retrieval_docs"]
|
||||
]
|
||||
|
||||
|
||||
def sub_continue_to_retrieval(
|
||||
state: ResearchQAState,
|
||||
) -> Union[Hashable, list[Hashable]]:
|
||||
# Routes re-written queries to the (parallel) retrieval steps
|
||||
# Notice the 'Send()' API that takes care of the parallelization
|
||||
return [
|
||||
Send(
|
||||
"sub_custom_retrieve",
|
||||
RetrieverState(
|
||||
rewritten_query=query,
|
||||
primary_llm=state["primary_llm"],
|
||||
fast_llm=state["fast_llm"],
|
||||
graph_start_time=state["graph_start_time"],
|
||||
),
|
||||
)
|
||||
for query in state["sub_question_rewritten_queries"]
|
||||
]
|
||||
93
backend/danswer/agent_search/deep_qa_graph/graph_builder.py
Normal file
93
backend/danswer/agent_search/deep_qa_graph/graph_builder.py
Normal file
@@ -0,0 +1,93 @@
|
||||
from langgraph.graph import END
|
||||
from langgraph.graph import START
|
||||
from langgraph.graph import StateGraph
|
||||
|
||||
from danswer.agent_search.deep_qa_graph.edges import sub_continue_to_retrieval
|
||||
from danswer.agent_search.deep_qa_graph.edges import sub_continue_to_verifier
|
||||
from danswer.agent_search.deep_qa_graph.nodes.combine_retrieved_docs import (
|
||||
sub_combine_retrieved_docs,
|
||||
)
|
||||
from danswer.agent_search.deep_qa_graph.nodes.custom_retrieve import sub_custom_retrieve
|
||||
from danswer.agent_search.deep_qa_graph.nodes.dummy import sub_dummy
|
||||
from danswer.agent_search.deep_qa_graph.nodes.final_format import sub_final_format
|
||||
from danswer.agent_search.deep_qa_graph.nodes.generate import sub_generate
|
||||
from danswer.agent_search.deep_qa_graph.nodes.qa_check import sub_qa_check
|
||||
from danswer.agent_search.deep_qa_graph.nodes.verifier import sub_verifier
|
||||
from danswer.agent_search.deep_qa_graph.states import ResearchQAOutputState
|
||||
from danswer.agent_search.deep_qa_graph.states import ResearchQAState
|
||||
|
||||
|
||||
def build_deep_qa_graph() -> StateGraph:
|
||||
# Define the nodes we will cycle between
|
||||
sub_answers = StateGraph(state_schema=ResearchQAState, output=ResearchQAOutputState)
|
||||
|
||||
### Add Nodes ###
|
||||
|
||||
# Dummy node for initial processing
|
||||
sub_answers.add_node(node="sub_dummy", action=sub_dummy)
|
||||
|
||||
# The retrieval step
|
||||
sub_answers.add_node(node="sub_custom_retrieve", action=sub_custom_retrieve)
|
||||
|
||||
# The dedupe step
|
||||
sub_answers.add_node(
|
||||
node="sub_combine_retrieved_docs", action=sub_combine_retrieved_docs
|
||||
)
|
||||
|
||||
# Verifying retrieved information
|
||||
sub_answers.add_node(node="sub_verifier", action=sub_verifier)
|
||||
|
||||
# Generating the response
|
||||
sub_answers.add_node(node="sub_generate", action=sub_generate)
|
||||
|
||||
# Checking the quality of the answer
|
||||
sub_answers.add_node(node="sub_qa_check", action=sub_qa_check)
|
||||
|
||||
# Final formatting of the response
|
||||
sub_answers.add_node(node="sub_final_format", action=sub_final_format)
|
||||
|
||||
### Add Edges ###
|
||||
|
||||
# Generate multiple sub-questions
|
||||
sub_answers.add_edge(start_key=START, end_key="sub_rewrite")
|
||||
|
||||
# For each sub-question, perform a retrieval in parallel
|
||||
sub_answers.add_conditional_edges(
|
||||
source="sub_rewrite",
|
||||
path=sub_continue_to_retrieval,
|
||||
path_map=["sub_custom_retrieve"],
|
||||
)
|
||||
|
||||
# Combine the retrieved docs for each sub-question from the parallel retrievals
|
||||
sub_answers.add_edge(
|
||||
start_key="sub_custom_retrieve", end_key="sub_combine_retrieved_docs"
|
||||
)
|
||||
|
||||
# Go over all of the combined retrieved docs and verify them against the original question
|
||||
sub_answers.add_conditional_edges(
|
||||
source="sub_combine_retrieved_docs",
|
||||
path=sub_continue_to_verifier,
|
||||
path_map=["sub_verifier"],
|
||||
)
|
||||
|
||||
# Generate an answer for each verified retrieved doc
|
||||
sub_answers.add_edge(start_key="sub_verifier", end_key="sub_generate")
|
||||
|
||||
# Check the quality of the answer
|
||||
sub_answers.add_edge(start_key="sub_generate", end_key="sub_qa_check")
|
||||
|
||||
sub_answers.add_edge(start_key="sub_qa_check", end_key="sub_final_format")
|
||||
|
||||
sub_answers.add_edge(start_key="sub_final_format", end_key=END)
|
||||
|
||||
return sub_answers
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
# TODO: add the actual question
|
||||
inputs = {"sub_question": "Whose music is kind of hard to easily enjoy?"}
|
||||
sub_answers_graph = build_deep_qa_graph()
|
||||
compiled_sub_answers = sub_answers_graph.compile()
|
||||
output = compiled_sub_answers.invoke(inputs)
|
||||
print("\nOUTPUT:")
|
||||
print(output)
|
||||
@@ -0,0 +1,31 @@
|
||||
from datetime import datetime
|
||||
from typing import Any
|
||||
|
||||
from danswer.agent_search.deep_qa_graph.states import ResearchQAState
|
||||
from danswer.agent_search.shared_graph_utils.utils import generate_log_message
|
||||
|
||||
|
||||
def sub_combine_retrieved_docs(state: ResearchQAState) -> dict[str, Any]:
|
||||
"""
|
||||
Dedupe the retrieved docs.
|
||||
"""
|
||||
node_start_time = datetime.now()
|
||||
|
||||
sub_question_base_retrieval_docs = state["sub_question_base_retrieval_docs"]
|
||||
|
||||
print(f"Number of docs from steps: {len(sub_question_base_retrieval_docs)}")
|
||||
dedupe_docs = []
|
||||
for base_retrieval_doc in sub_question_base_retrieval_docs:
|
||||
if base_retrieval_doc not in dedupe_docs:
|
||||
dedupe_docs.append(base_retrieval_doc)
|
||||
|
||||
print(f"Number of deduped docs: {len(dedupe_docs)}")
|
||||
|
||||
return {
|
||||
"sub_question_deduped_retrieval_docs": dedupe_docs,
|
||||
"log_messages": generate_log_message(
|
||||
message="sub - combine_retrieved_docs (dedupe)",
|
||||
node_start_time=node_start_time,
|
||||
graph_start_time=state["graph_start_time"],
|
||||
),
|
||||
}
|
||||
@@ -0,0 +1,33 @@
|
||||
from datetime import datetime
|
||||
from typing import Any
|
||||
|
||||
from danswer.agent_search.primary_graph.states import RetrieverState
|
||||
from danswer.agent_search.shared_graph_utils.utils import generate_log_message
|
||||
from danswer.context.search.models import InferenceSection
|
||||
|
||||
|
||||
def sub_custom_retrieve(state: RetrieverState) -> dict[str, Any]:
|
||||
"""
|
||||
Retrieve documents
|
||||
|
||||
Args:
|
||||
state (dict): The current graph state
|
||||
|
||||
Returns:
|
||||
state (dict): New key added to state, documents, that contains retrieved documents
|
||||
"""
|
||||
print("---RETRIEVE SUB---")
|
||||
node_start_time = datetime.now()
|
||||
|
||||
# Retrieval
|
||||
# TODO: add the actual retrieval, probably from search_tool.run()
|
||||
documents: list[InferenceSection] = []
|
||||
|
||||
return {
|
||||
"sub_question_base_retrieval_docs": documents,
|
||||
"log_messages": generate_log_message(
|
||||
message="sub - custom_retrieve",
|
||||
node_start_time=node_start_time,
|
||||
graph_start_time=state["graph_start_time"],
|
||||
),
|
||||
}
|
||||
21
backend/danswer/agent_search/deep_qa_graph/nodes/dummy.py
Normal file
21
backend/danswer/agent_search/deep_qa_graph/nodes/dummy.py
Normal file
@@ -0,0 +1,21 @@
|
||||
from datetime import datetime
|
||||
from typing import Any
|
||||
|
||||
from danswer.agent_search.core_qa_graph.states import BaseQAState
|
||||
from danswer.agent_search.shared_graph_utils.utils import generate_log_message
|
||||
|
||||
|
||||
def sub_dummy(state: BaseQAState) -> dict[str, Any]:
|
||||
"""
|
||||
Dummy step
|
||||
"""
|
||||
|
||||
print("---Sub Dummy---")
|
||||
|
||||
return {
|
||||
"log_messages": generate_log_message(
|
||||
message="sub - dummy",
|
||||
node_start_time=datetime.now(),
|
||||
graph_start_time=state["graph_start_time"],
|
||||
),
|
||||
}
|
||||
@@ -0,0 +1,31 @@
|
||||
from datetime import datetime
|
||||
from typing import Any
|
||||
|
||||
from danswer.agent_search.deep_qa_graph.states import ResearchQAState
|
||||
from danswer.agent_search.shared_graph_utils.utils import generate_log_message
|
||||
|
||||
|
||||
def sub_final_format(state: ResearchQAState) -> dict[str, Any]:
|
||||
"""
|
||||
Create the final output for the QA subgraph
|
||||
"""
|
||||
|
||||
print("---SUB FINAL FORMAT---")
|
||||
node_start_time = datetime.now()
|
||||
|
||||
return {
|
||||
# TODO: Type this
|
||||
"sub_qas": [
|
||||
{
|
||||
"sub_question": state["sub_question"],
|
||||
"sub_answer": state["sub_question_answer"],
|
||||
"sub_question_nr": state["sub_question_nr"],
|
||||
"sub_answer_check": state["sub_question_answer_check"],
|
||||
}
|
||||
],
|
||||
"log_messages": generate_log_message(
|
||||
message="sub - final format",
|
||||
node_start_time=node_start_time,
|
||||
graph_start_time=state["graph_start_time"],
|
||||
),
|
||||
}
|
||||
56
backend/danswer/agent_search/deep_qa_graph/nodes/generate.py
Normal file
56
backend/danswer/agent_search/deep_qa_graph/nodes/generate.py
Normal file
@@ -0,0 +1,56 @@
|
||||
from datetime import datetime
|
||||
from typing import Any
|
||||
|
||||
from langchain_core.messages import HumanMessage
|
||||
from langchain_core.messages import merge_message_runs
|
||||
|
||||
from danswer.agent_search.deep_qa_graph.states import ResearchQAState
|
||||
from danswer.agent_search.shared_graph_utils.prompts import BASE_RAG_PROMPT
|
||||
from danswer.agent_search.shared_graph_utils.utils import format_docs
|
||||
from danswer.agent_search.shared_graph_utils.utils import generate_log_message
|
||||
|
||||
|
||||
def sub_generate(state: ResearchQAState) -> dict[str, Any]:
|
||||
"""
|
||||
Generate answer
|
||||
|
||||
Args:
|
||||
state (messages): The current state
|
||||
|
||||
Returns:
|
||||
dict: The updated state with re-phrased question
|
||||
"""
|
||||
print("---SUB GENERATE---")
|
||||
node_start_time = datetime.now()
|
||||
|
||||
question = state["sub_question"]
|
||||
docs = state["sub_question_verified_retrieval_docs"]
|
||||
|
||||
print(f"Number of verified retrieval docs for sub-question: {len(docs)}")
|
||||
|
||||
msg = [
|
||||
HumanMessage(
|
||||
content=BASE_RAG_PROMPT.format(question=question, context=format_docs(docs))
|
||||
)
|
||||
]
|
||||
|
||||
# Grader
|
||||
if len(docs) > 0:
|
||||
model = state["fast_llm"]
|
||||
response = list(
|
||||
model.stream(
|
||||
prompt=msg,
|
||||
)
|
||||
)
|
||||
response_str = merge_message_runs(response, chunk_separator="")[0].content
|
||||
else:
|
||||
response_str = ""
|
||||
|
||||
return {
|
||||
"sub_question_answer": response_str,
|
||||
"log_messages": generate_log_message(
|
||||
message="sub - generate",
|
||||
node_start_time=node_start_time,
|
||||
graph_start_time=state["graph_start_time"],
|
||||
),
|
||||
}
|
||||
57
backend/danswer/agent_search/deep_qa_graph/nodes/qa_check.py
Normal file
57
backend/danswer/agent_search/deep_qa_graph/nodes/qa_check.py
Normal file
@@ -0,0 +1,57 @@
|
||||
import json
|
||||
from datetime import datetime
|
||||
from typing import Any
|
||||
|
||||
from langchain_core.messages import HumanMessage
|
||||
|
||||
from danswer.agent_search.deep_qa_graph.prompts import SUB_CHECK_PROMPT
|
||||
from danswer.agent_search.deep_qa_graph.states import ResearchQAState
|
||||
from danswer.agent_search.shared_graph_utils.models import BinaryDecision
|
||||
from danswer.agent_search.shared_graph_utils.utils import generate_log_message
|
||||
|
||||
|
||||
def sub_qa_check(state: ResearchQAState) -> dict[str, Any]:
|
||||
"""
|
||||
Check whether the final output satisfies the original user question
|
||||
|
||||
Args:
|
||||
state (messages): The current state
|
||||
|
||||
Returns:
|
||||
dict: The updated state with the final decision
|
||||
"""
|
||||
|
||||
print("---CHECK SUB QUTPUT---")
|
||||
node_start_time = datetime.now()
|
||||
|
||||
sub_answer = state["sub_question_answer"]
|
||||
sub_question = state["sub_question"]
|
||||
|
||||
msg = [
|
||||
HumanMessage(
|
||||
content=SUB_CHECK_PROMPT.format(
|
||||
sub_question=sub_question, sub_answer=sub_answer
|
||||
)
|
||||
)
|
||||
]
|
||||
|
||||
# Grader
|
||||
model = state["fast_llm"]
|
||||
response = list(
|
||||
model.stream(
|
||||
prompt=msg,
|
||||
structured_response_format=BinaryDecision.model_json_schema(),
|
||||
)
|
||||
)
|
||||
|
||||
raw_response = json.loads(response[0].pretty_repr())
|
||||
formatted_response = BinaryDecision.model_validate(raw_response)
|
||||
|
||||
return {
|
||||
"sub_question_answer_check": formatted_response.decision,
|
||||
"log_messages": generate_log_message(
|
||||
message=f"sub - qa check: {formatted_response.decision}",
|
||||
node_start_time=node_start_time,
|
||||
graph_start_time=state["graph_start_time"],
|
||||
),
|
||||
}
|
||||
64
backend/danswer/agent_search/deep_qa_graph/nodes/rewrite.py
Normal file
64
backend/danswer/agent_search/deep_qa_graph/nodes/rewrite.py
Normal file
@@ -0,0 +1,64 @@
|
||||
import json
|
||||
from datetime import datetime
|
||||
from typing import Any
|
||||
|
||||
from langchain_core.messages import HumanMessage
|
||||
|
||||
from danswer.agent_search.deep_qa_graph.states import ResearchQAState
|
||||
from danswer.agent_search.shared_graph_utils.models import RewrittenQueries
|
||||
from danswer.agent_search.shared_graph_utils.prompts import REWRITE_PROMPT_MULTI
|
||||
from danswer.agent_search.shared_graph_utils.utils import generate_log_message
|
||||
from danswer.llm.interfaces import LLM
|
||||
|
||||
|
||||
def sub_rewrite(state: ResearchQAState) -> dict[str, Any]:
|
||||
"""
|
||||
Transform the initial question into more suitable search queries.
|
||||
|
||||
Args:
|
||||
state (messages): The current state
|
||||
|
||||
Returns:
|
||||
dict: The updated state with re-phrased question
|
||||
"""
|
||||
|
||||
print("---SUB TRANSFORM QUERY---")
|
||||
node_start_time = datetime.now()
|
||||
|
||||
question = state["sub_question"]
|
||||
|
||||
msg = [
|
||||
HumanMessage(
|
||||
content=REWRITE_PROMPT_MULTI.format(question=question),
|
||||
)
|
||||
]
|
||||
fast_llm: LLM = state["fast_llm"]
|
||||
llm_response = list(
|
||||
fast_llm.stream(
|
||||
prompt=msg,
|
||||
structured_response_format=RewrittenQueries.model_json_schema(),
|
||||
)
|
||||
)
|
||||
|
||||
# Get the rewritten queries in a defined format
|
||||
rewritten_queries: RewrittenQueries = json.loads(llm_response[0].pretty_repr())
|
||||
|
||||
print(f"rewritten_queries: {rewritten_queries}")
|
||||
|
||||
rewritten_queries = RewrittenQueries(
|
||||
rewritten_queries=[
|
||||
"music hard to listen to",
|
||||
"Music that is not fun or pleasant",
|
||||
]
|
||||
)
|
||||
|
||||
print(f"hardcoded rewritten_queries: {rewritten_queries}")
|
||||
|
||||
return {
|
||||
"sub_question_rewritten_queries": rewritten_queries,
|
||||
"log_messages": generate_log_message(
|
||||
message="sub - rewrite",
|
||||
node_start_time=node_start_time,
|
||||
graph_start_time=state["graph_start_time"],
|
||||
),
|
||||
}
|
||||
59
backend/danswer/agent_search/deep_qa_graph/nodes/verifier.py
Normal file
59
backend/danswer/agent_search/deep_qa_graph/nodes/verifier.py
Normal file
@@ -0,0 +1,59 @@
|
||||
import json
|
||||
from datetime import datetime
|
||||
from typing import Any
|
||||
|
||||
from langchain_core.messages import HumanMessage
|
||||
|
||||
from danswer.agent_search.primary_graph.states import VerifierState
|
||||
from danswer.agent_search.shared_graph_utils.models import BinaryDecision
|
||||
from danswer.agent_search.shared_graph_utils.prompts import VERIFIER_PROMPT
|
||||
from danswer.agent_search.shared_graph_utils.utils import generate_log_message
|
||||
|
||||
|
||||
def sub_verifier(state: VerifierState) -> dict[str, Any]:
|
||||
"""
|
||||
Check whether the document is relevant for the original user question
|
||||
|
||||
Args:
|
||||
state (VerifierState): The current state
|
||||
|
||||
Returns:
|
||||
dict: ict: The updated state with the final decision
|
||||
"""
|
||||
|
||||
print("---SUB VERIFY QUTPUT---")
|
||||
node_start_time = datetime.now()
|
||||
|
||||
question = state["question"]
|
||||
document_content = state["document"].combined_content
|
||||
|
||||
msg = [
|
||||
HumanMessage(
|
||||
content=VERIFIER_PROMPT.format(
|
||||
question=question, document_content=document_content
|
||||
)
|
||||
)
|
||||
]
|
||||
|
||||
# Grader
|
||||
model = state["fast_llm"]
|
||||
response = list(
|
||||
model.stream(
|
||||
prompt=msg,
|
||||
structured_response_format=BinaryDecision.model_json_schema(),
|
||||
)
|
||||
)
|
||||
|
||||
raw_response = json.loads(response[0].pretty_repr())
|
||||
formatted_response = BinaryDecision.model_validate(raw_response)
|
||||
|
||||
return {
|
||||
"deduped_retrieval_docs": [state["document"]]
|
||||
if formatted_response.decision == "yes"
|
||||
else [],
|
||||
"log_messages": generate_log_message(
|
||||
message=f"core - verifier: {formatted_response.decision}",
|
||||
node_start_time=node_start_time,
|
||||
graph_start_time=state["graph_start_time"],
|
||||
),
|
||||
}
|
||||
13
backend/danswer/agent_search/deep_qa_graph/prompts.py
Normal file
13
backend/danswer/agent_search/deep_qa_graph/prompts.py
Normal file
@@ -0,0 +1,13 @@
|
||||
SUB_CHECK_PROMPT = """ \n
|
||||
Please check whether the suggested answer seems to address the original question.
|
||||
|
||||
Please only answer with 'yes' or 'no' \n
|
||||
Here is the initial question:
|
||||
\n ------- \n
|
||||
{question}
|
||||
\n ------- \n
|
||||
Here is the proposed answer:
|
||||
\n ------- \n
|
||||
{base_answer}
|
||||
\n ------- \n
|
||||
Please answer with yes or no:"""
|
||||
64
backend/danswer/agent_search/deep_qa_graph/states.py
Normal file
64
backend/danswer/agent_search/deep_qa_graph/states.py
Normal file
@@ -0,0 +1,64 @@
|
||||
import operator
|
||||
from collections.abc import Sequence
|
||||
from datetime import datetime
|
||||
from typing import Annotated
|
||||
from typing import TypedDict
|
||||
|
||||
from langchain_core.messages import BaseMessage
|
||||
from langgraph.graph.message import add_messages
|
||||
|
||||
from danswer.context.search.models import InferenceSection
|
||||
from danswer.llm.interfaces import LLM
|
||||
|
||||
|
||||
class ResearchQAState(TypedDict):
|
||||
# The 'core SubQuestion' state.
|
||||
original_question: str
|
||||
graph_start_time: datetime
|
||||
sub_question_rewritten_queries: list[str]
|
||||
sub_question: str
|
||||
sub_question_nr: int
|
||||
sub_question_base_retrieval_docs: Annotated[
|
||||
Sequence[InferenceSection], operator.add
|
||||
]
|
||||
sub_question_deduped_retrieval_docs: Annotated[
|
||||
Sequence[InferenceSection], operator.add
|
||||
]
|
||||
sub_question_verified_retrieval_docs: Annotated[
|
||||
Sequence[InferenceSection], operator.add
|
||||
]
|
||||
sub_question_reranked_retrieval_docs: Annotated[
|
||||
Sequence[InferenceSection], operator.add
|
||||
]
|
||||
sub_question_top_chunks: Annotated[Sequence[dict], operator.add]
|
||||
sub_question_answer: str
|
||||
sub_question_answer_check: str
|
||||
log_messages: Annotated[Sequence[BaseMessage], add_messages]
|
||||
sub_qas: Annotated[Sequence[dict], operator.add]
|
||||
primary_llm: LLM
|
||||
fast_llm: LLM
|
||||
|
||||
|
||||
class ResearchQAOutputState(TypedDict):
|
||||
# The 'SubQuestion' output state. Removes all the intermediate states
|
||||
sub_question_rewritten_queries: list[str]
|
||||
sub_question: str
|
||||
sub_question_nr: int
|
||||
# Answers sent back to core
|
||||
sub_qas: Annotated[Sequence[dict], operator.add]
|
||||
sub_question_base_retrieval_docs: Annotated[
|
||||
Sequence[InferenceSection], operator.add
|
||||
]
|
||||
sub_question_deduped_retrieval_docs: Annotated[
|
||||
Sequence[InferenceSection], operator.add
|
||||
]
|
||||
sub_question_verified_retrieval_docs: Annotated[
|
||||
Sequence[InferenceSection], operator.add
|
||||
]
|
||||
sub_question_reranked_retrieval_docs: Annotated[
|
||||
Sequence[InferenceSection], operator.add
|
||||
]
|
||||
sub_question_top_chunks: Annotated[Sequence[dict], operator.add]
|
||||
sub_question_answer: str
|
||||
sub_question_answer_check: str
|
||||
log_messages: Annotated[Sequence[BaseMessage], add_messages]
|
||||
75
backend/danswer/agent_search/primary_graph/edges.py
Normal file
75
backend/danswer/agent_search/primary_graph/edges.py
Normal file
@@ -0,0 +1,75 @@
|
||||
from collections.abc import Hashable
|
||||
from typing import Union
|
||||
|
||||
from langchain_core.messages import HumanMessage
|
||||
from langgraph.types import Send
|
||||
|
||||
from danswer.agent_search.core_qa_graph.states import BaseQAState
|
||||
from danswer.agent_search.deep_qa_graph.states import ResearchQAState
|
||||
from danswer.agent_search.primary_graph.states import QAState
|
||||
from danswer.agent_search.shared_graph_utils.prompts import BASE_CHECK_PROMPT
|
||||
|
||||
|
||||
def continue_to_initial_sub_questions(
|
||||
state: QAState,
|
||||
) -> Union[Hashable, list[Hashable]]:
|
||||
# Routes re-written queries to the (parallel) retrieval steps
|
||||
# Notice the 'Send()' API that takes care of the parallelization
|
||||
return [
|
||||
Send(
|
||||
"sub_answers_graph_initial",
|
||||
BaseQAState(
|
||||
sub_question_str=initial_sub_question["sub_question_str"],
|
||||
sub_question_search_queries=initial_sub_question[
|
||||
"sub_question_search_queries"
|
||||
],
|
||||
sub_question_nr=initial_sub_question["sub_question_nr"],
|
||||
primary_llm=state["primary_llm"],
|
||||
fast_llm=state["fast_llm"],
|
||||
graph_start_time=state["graph_start_time"],
|
||||
),
|
||||
)
|
||||
for initial_sub_question in state["initial_sub_questions"]
|
||||
]
|
||||
|
||||
|
||||
def continue_to_answer_sub_questions(state: QAState) -> Union[Hashable, list[Hashable]]:
|
||||
# Routes re-written queries to the (parallel) retrieval steps
|
||||
# Notice the 'Send()' API that takes care of the parallelization
|
||||
return [
|
||||
Send(
|
||||
"sub_answers_graph",
|
||||
ResearchQAState(
|
||||
sub_question=sub_question["sub_question_str"],
|
||||
sub_question_nr=sub_question["sub_question_nr"],
|
||||
graph_start_time=state["graph_start_time"],
|
||||
primary_llm=state["primary_llm"],
|
||||
fast_llm=state["fast_llm"],
|
||||
),
|
||||
)
|
||||
for sub_question in state["sub_questions"]
|
||||
]
|
||||
|
||||
|
||||
def continue_to_deep_answer(state: QAState) -> Union[Hashable, list[Hashable]]:
|
||||
print("---GO TO DEEP ANSWER OR END---")
|
||||
|
||||
base_answer = state["base_answer"]
|
||||
|
||||
question = state["original_question"]
|
||||
|
||||
BASE_CHECK_MESSAGE = [
|
||||
HumanMessage(
|
||||
content=BASE_CHECK_PROMPT.format(question=question, base_answer=base_answer)
|
||||
)
|
||||
]
|
||||
|
||||
model = state["fast_llm"]
|
||||
response = model.invoke(BASE_CHECK_MESSAGE)
|
||||
|
||||
print(f"CAN WE CONTINUE W/O GENERATING A DEEP ANSWER? - {response.pretty_repr()}")
|
||||
|
||||
if response.pretty_repr() == "no":
|
||||
return "decompose"
|
||||
else:
|
||||
return "end"
|
||||
171
backend/danswer/agent_search/primary_graph/graph_builder.py
Normal file
171
backend/danswer/agent_search/primary_graph/graph_builder.py
Normal file
@@ -0,0 +1,171 @@
|
||||
from langgraph.graph import END
|
||||
from langgraph.graph import START
|
||||
from langgraph.graph import StateGraph
|
||||
|
||||
from danswer.agent_search.core_qa_graph.graph_builder import build_core_qa_graph
|
||||
from danswer.agent_search.deep_qa_graph.graph_builder import build_deep_qa_graph
|
||||
from danswer.agent_search.primary_graph.edges import continue_to_answer_sub_questions
|
||||
from danswer.agent_search.primary_graph.edges import continue_to_deep_answer
|
||||
from danswer.agent_search.primary_graph.edges import continue_to_initial_sub_questions
|
||||
from danswer.agent_search.primary_graph.nodes.base_wait import base_wait
|
||||
from danswer.agent_search.primary_graph.nodes.combine_retrieved_docs import (
|
||||
combine_retrieved_docs,
|
||||
)
|
||||
from danswer.agent_search.primary_graph.nodes.custom_retrieve import custom_retrieve
|
||||
from danswer.agent_search.primary_graph.nodes.decompose import decompose
|
||||
from danswer.agent_search.primary_graph.nodes.deep_answer_generation import (
|
||||
deep_answer_generation,
|
||||
)
|
||||
from danswer.agent_search.primary_graph.nodes.dummy_start import dummy_start
|
||||
from danswer.agent_search.primary_graph.nodes.entity_term_extraction import (
|
||||
entity_term_extraction,
|
||||
)
|
||||
from danswer.agent_search.primary_graph.nodes.final_stuff import final_stuff
|
||||
from danswer.agent_search.primary_graph.nodes.generate_initial import generate_initial
|
||||
from danswer.agent_search.primary_graph.nodes.main_decomp_base import main_decomp_base
|
||||
from danswer.agent_search.primary_graph.nodes.rewrite import rewrite
|
||||
from danswer.agent_search.primary_graph.nodes.sub_qa_level_aggregator import (
|
||||
sub_qa_level_aggregator,
|
||||
)
|
||||
from danswer.agent_search.primary_graph.nodes.sub_qa_manager import sub_qa_manager
|
||||
from danswer.agent_search.primary_graph.nodes.verifier import verifier
|
||||
from danswer.agent_search.primary_graph.states import QAState
|
||||
|
||||
|
||||
def build_core_graph() -> StateGraph:
|
||||
# Define the nodes we will cycle between
|
||||
core_answer_graph = StateGraph(state_schema=QAState)
|
||||
|
||||
### Add Nodes ###
|
||||
core_answer_graph.add_node(node="dummy_start",
|
||||
action=dummy_start)
|
||||
|
||||
# Re-writing the question
|
||||
core_answer_graph.add_node(node="rewrite",
|
||||
action=rewrite)
|
||||
|
||||
# The retrieval step
|
||||
core_answer_graph.add_node(node="custom_retrieve",
|
||||
action=custom_retrieve)
|
||||
|
||||
# Combine and dedupe retrieved docs.
|
||||
core_answer_graph.add_node(
|
||||
node="combine_retrieved_docs",
|
||||
action=combine_retrieved_docs
|
||||
)
|
||||
|
||||
# Extract entities, terms and relationships
|
||||
core_answer_graph.add_node(
|
||||
node="entity_term_extraction",
|
||||
action=entity_term_extraction
|
||||
)
|
||||
|
||||
# Verifying that a retrieved doc is relevant
|
||||
core_answer_graph.add_node(node="verifier",
|
||||
action=verifier)
|
||||
|
||||
# Initial question decomposition
|
||||
core_answer_graph.add_node(node="main_decomp_base",
|
||||
action=main_decomp_base)
|
||||
|
||||
# Build the base QA sub-graph and compile it
|
||||
compiled_core_qa_graph = build_core_qa_graph().compile()
|
||||
# Add the compiled base QA sub-graph as a node to the core graph
|
||||
core_answer_graph.add_node(
|
||||
node="sub_answers_graph_initial",
|
||||
action=compiled_core_qa_graph
|
||||
)
|
||||
|
||||
# Checking whether the initial answer is in the ballpark
|
||||
core_answer_graph.add_node(node="base_wait",
|
||||
action=base_wait)
|
||||
|
||||
# Decompose the question into sub-questions
|
||||
core_answer_graph.add_node(node="decompose",
|
||||
action=decompose)
|
||||
|
||||
# Manage the sub-questions
|
||||
core_answer_graph.add_node(node="sub_qa_manager",
|
||||
action=sub_qa_manager)
|
||||
|
||||
# Build the research QA sub-graph and compile it
|
||||
compiled_deep_qa_graph = build_deep_qa_graph().compile()
|
||||
# Add the compiled research QA sub-graph as a node to the core graph
|
||||
core_answer_graph.add_node(node="sub_answers_graph",
|
||||
action=compiled_deep_qa_graph)
|
||||
|
||||
# Aggregate the sub-questions
|
||||
core_answer_graph.add_node(
|
||||
node="sub_qa_level_aggregator",
|
||||
action=sub_qa_level_aggregator
|
||||
)
|
||||
|
||||
# aggregate sub questions and answers
|
||||
core_answer_graph.add_node(
|
||||
node="deep_answer_generation",
|
||||
action=deep_answer_generation
|
||||
)
|
||||
|
||||
# A final clean-up step
|
||||
core_answer_graph.add_node(node="final_stuff",
|
||||
action=final_stuff)
|
||||
|
||||
# Generating a response after we know the documents are relevant
|
||||
core_answer_graph.add_node(node="generate_initial",
|
||||
action=generate_initial)
|
||||
|
||||
### Add Edges ###
|
||||
|
||||
# start the initial sub-question decomposition
|
||||
core_answer_graph.add_edge(start_key=START,
|
||||
end_key="main_decomp_base")
|
||||
|
||||
core_answer_graph.add_conditional_edges(
|
||||
source="main_decomp_base",
|
||||
path=continue_to_initial_sub_questions,
|
||||
)
|
||||
|
||||
# use the retrieved information to generate the answer
|
||||
core_answer_graph.add_edge(
|
||||
start_key=["verifier", "sub_answers_graph_initial"],
|
||||
end_key="generate_initial"
|
||||
)
|
||||
core_answer_graph.add_edge(start_key="generate_initial",
|
||||
end_key="base_wait")
|
||||
|
||||
core_answer_graph.add_conditional_edges(
|
||||
source="base_wait",
|
||||
path=continue_to_deep_answer,
|
||||
path_map={"decompose": "entity_term_extraction", "end": "final_stuff"},
|
||||
)
|
||||
|
||||
core_answer_graph.add_edge(start_key="entity_term_extraction", end_key="decompose")
|
||||
|
||||
core_answer_graph.add_edge(start_key="decompose",
|
||||
end_key="sub_qa_manager")
|
||||
core_answer_graph.add_conditional_edges(
|
||||
source="sub_qa_manager",
|
||||
path=continue_to_answer_sub_questions,
|
||||
)
|
||||
|
||||
core_answer_graph.add_edge(
|
||||
start_key="sub_answers_graph",
|
||||
end_key="sub_qa_level_aggregator"
|
||||
)
|
||||
|
||||
core_answer_graph.add_edge(
|
||||
start_key="sub_qa_level_aggregator",
|
||||
end_key="deep_answer_generation"
|
||||
)
|
||||
|
||||
core_answer_graph.add_edge(
|
||||
start_key="deep_answer_generation",
|
||||
end_key="final_stuff"
|
||||
)
|
||||
|
||||
core_answer_graph.add_edge(start_key="final_stuff",
|
||||
end_key=END)
|
||||
|
||||
core_answer_graph.compile()
|
||||
|
||||
return core_answer_graph
|
||||
@@ -0,0 +1,27 @@
|
||||
from datetime import datetime
|
||||
from typing import Any
|
||||
|
||||
from danswer.agent_search.primary_graph.states import QAState
|
||||
from danswer.agent_search.shared_graph_utils.utils import generate_log_message
|
||||
|
||||
|
||||
def base_wait(state: QAState) -> dict[str, Any]:
|
||||
"""
|
||||
Ensures that all required steps are completed before proceeding to the next step
|
||||
|
||||
Args:
|
||||
state (messages): The current state
|
||||
|
||||
Returns:
|
||||
dict: {} (no operation, just logging)
|
||||
"""
|
||||
|
||||
print("---Base Wait ---")
|
||||
node_start_time = datetime.now()
|
||||
return {
|
||||
"log_messages": generate_log_message(
|
||||
message="core - base_wait",
|
||||
node_start_time=node_start_time,
|
||||
graph_start_time=state["graph_start_time"],
|
||||
),
|
||||
}
|
||||
@@ -0,0 +1,36 @@
|
||||
from collections.abc import Sequence
|
||||
from datetime import datetime
|
||||
from typing import Any
|
||||
|
||||
from danswer.agent_search.primary_graph.states import QAState
|
||||
from danswer.agent_search.shared_graph_utils.utils import generate_log_message
|
||||
from danswer.context.search.models import InferenceSection
|
||||
|
||||
|
||||
def combine_retrieved_docs(state: QAState) -> dict[str, Any]:
|
||||
"""
|
||||
Dedupe the retrieved docs.
|
||||
"""
|
||||
node_start_time = datetime.now()
|
||||
|
||||
base_retrieval_docs: Sequence[InferenceSection] = state["base_retrieval_docs"]
|
||||
|
||||
print(f"Number of docs from steps: {len(base_retrieval_docs)}")
|
||||
dedupe_docs: list[InferenceSection] = []
|
||||
for base_retrieval_doc in base_retrieval_docs:
|
||||
if not any(
|
||||
base_retrieval_doc.center_chunk.document_id == doc.center_chunk.document_id
|
||||
for doc in dedupe_docs
|
||||
):
|
||||
dedupe_docs.append(base_retrieval_doc)
|
||||
|
||||
print(f"Number of deduped docs: {len(dedupe_docs)}")
|
||||
|
||||
return {
|
||||
"deduped_retrieval_docs": dedupe_docs,
|
||||
"log_messages": generate_log_message(
|
||||
message="core - combine_retrieved_docs (dedupe)",
|
||||
node_start_time=node_start_time,
|
||||
graph_start_time=state["graph_start_time"],
|
||||
),
|
||||
}
|
||||
@@ -0,0 +1,52 @@
|
||||
from datetime import datetime
|
||||
from typing import Any
|
||||
|
||||
from danswer.agent_search.primary_graph.states import RetrieverState
|
||||
from danswer.agent_search.shared_graph_utils.utils import generate_log_message
|
||||
from danswer.context.search.models import InferenceSection
|
||||
from danswer.context.search.models import SearchRequest
|
||||
from danswer.context.search.pipeline import SearchPipeline
|
||||
from danswer.db.engine import get_session_context_manager
|
||||
from danswer.llm.factory import get_default_llms
|
||||
|
||||
|
||||
def custom_retrieve(state: RetrieverState) -> dict[str, Any]:
|
||||
"""
|
||||
Retrieve documents
|
||||
|
||||
Args:
|
||||
retriever_state (dict): The current graph state
|
||||
|
||||
Returns:
|
||||
state (dict): New key added to state, documents, that contains retrieved documents
|
||||
"""
|
||||
print("---RETRIEVE---")
|
||||
|
||||
node_start_time = datetime.now()
|
||||
|
||||
query = state["rewritten_query"]
|
||||
|
||||
# Retrieval
|
||||
# TODO: add the actual retrieval, probably from search_tool.run()
|
||||
llm, fast_llm = get_default_llms()
|
||||
with get_session_context_manager() as db_session:
|
||||
top_sections = SearchPipeline(
|
||||
search_request=SearchRequest(
|
||||
query=query,
|
||||
),
|
||||
user=None,
|
||||
llm=llm,
|
||||
fast_llm=fast_llm,
|
||||
db_session=db_session,
|
||||
).reranked_sections
|
||||
print(len(top_sections))
|
||||
documents: list[InferenceSection] = []
|
||||
|
||||
return {
|
||||
"base_retrieval_docs": documents,
|
||||
"log_messages": generate_log_message(
|
||||
message="core - custom_retrieve",
|
||||
node_start_time=node_start_time,
|
||||
graph_start_time=state["graph_start_time"],
|
||||
),
|
||||
}
|
||||
@@ -0,0 +1,78 @@
|
||||
import json
|
||||
import re
|
||||
from datetime import datetime
|
||||
from typing import Any
|
||||
|
||||
from langchain_core.messages import HumanMessage
|
||||
|
||||
from danswer.agent_search.primary_graph.states import QAState
|
||||
from danswer.agent_search.shared_graph_utils.prompts import DEEP_DECOMPOSE_PROMPT
|
||||
from danswer.agent_search.shared_graph_utils.utils import format_entity_term_extraction
|
||||
from danswer.agent_search.shared_graph_utils.utils import generate_log_message
|
||||
|
||||
|
||||
def decompose(state: QAState) -> dict[str, Any]:
|
||||
""" """
|
||||
|
||||
node_start_time = datetime.now()
|
||||
|
||||
question = state["original_question"]
|
||||
base_answer = state["base_answer"]
|
||||
|
||||
# get the entity term extraction dict and properly format it
|
||||
entity_term_extraction_dict = state["retrieved_entities_relationships"][
|
||||
"retrieved_entities_relationships"
|
||||
]
|
||||
|
||||
entity_term_extraction_str = format_entity_term_extraction(
|
||||
entity_term_extraction_dict
|
||||
)
|
||||
|
||||
initial_question_answers = state["initial_sub_qas"]
|
||||
|
||||
addressed_question_list = [
|
||||
x["sub_question"]
|
||||
for x in initial_question_answers
|
||||
if x["sub_answer_check"] == "yes"
|
||||
]
|
||||
failed_question_list = [
|
||||
x["sub_question"]
|
||||
for x in initial_question_answers
|
||||
if x["sub_answer_check"] == "no"
|
||||
]
|
||||
|
||||
msg = [
|
||||
HumanMessage(
|
||||
content=DEEP_DECOMPOSE_PROMPT.format(
|
||||
question=question,
|
||||
entity_term_extraction_str=entity_term_extraction_str,
|
||||
base_answer=base_answer,
|
||||
answered_sub_questions="\n - ".join(addressed_question_list),
|
||||
failed_sub_questions="\n - ".join(failed_question_list),
|
||||
),
|
||||
)
|
||||
]
|
||||
|
||||
# Grader
|
||||
model = state["fast_llm"]
|
||||
response = model.invoke(msg)
|
||||
|
||||
cleaned_response = re.sub(r"```json\n|\n```", "", response.pretty_repr())
|
||||
parsed_response = json.loads(cleaned_response)
|
||||
|
||||
sub_questions_dict = {}
|
||||
for sub_question_nr, sub_question_dict in enumerate(
|
||||
parsed_response["sub_questions"]
|
||||
):
|
||||
sub_question_dict["answered"] = False
|
||||
sub_question_dict["verified"] = False
|
||||
sub_questions_dict[sub_question_nr] = sub_question_dict
|
||||
|
||||
return {
|
||||
"decomposed_sub_questions_dict": sub_questions_dict,
|
||||
"log_messages": generate_log_message(
|
||||
message="deep - decompose",
|
||||
node_start_time=node_start_time,
|
||||
graph_start_time=state["graph_start_time"],
|
||||
),
|
||||
}
|
||||
@@ -0,0 +1,61 @@
|
||||
from datetime import datetime
|
||||
from typing import Any
|
||||
|
||||
from langchain_core.messages import HumanMessage
|
||||
|
||||
from danswer.agent_search.primary_graph.states import QAState
|
||||
from danswer.agent_search.shared_graph_utils.prompts import COMBINED_CONTEXT
|
||||
from danswer.agent_search.shared_graph_utils.prompts import MODIFIED_RAG_PROMPT
|
||||
from danswer.agent_search.shared_graph_utils.utils import format_docs
|
||||
from danswer.agent_search.shared_graph_utils.utils import generate_log_message
|
||||
from danswer.agent_search.shared_graph_utils.utils import normalize_whitespace
|
||||
|
||||
|
||||
# aggregate sub questions and answers
|
||||
def deep_answer_generation(state: QAState) -> dict[str, Any]:
|
||||
"""
|
||||
Generate answer
|
||||
|
||||
Args:
|
||||
state (messages): The current state
|
||||
|
||||
Returns:
|
||||
dict: The updated state with re-phrased question
|
||||
"""
|
||||
print("---DEEP GENERATE---")
|
||||
|
||||
node_start_time = datetime.now()
|
||||
|
||||
question = state["original_question"]
|
||||
docs = state["deduped_retrieval_docs"]
|
||||
|
||||
deep_answer_context = state["core_answer_dynamic_context"]
|
||||
|
||||
print(f"Number of verified retrieval docs - deep: {len(docs)}")
|
||||
|
||||
combined_context = normalize_whitespace(
|
||||
COMBINED_CONTEXT.format(
|
||||
deep_answer_context=deep_answer_context, formated_docs=format_docs(docs)
|
||||
)
|
||||
)
|
||||
|
||||
msg = [
|
||||
HumanMessage(
|
||||
content=MODIFIED_RAG_PROMPT.format(
|
||||
question=question, combined_context=combined_context
|
||||
)
|
||||
)
|
||||
]
|
||||
|
||||
# Grader
|
||||
model = state["fast_llm"]
|
||||
response = model.invoke(msg)
|
||||
|
||||
return {
|
||||
"deep_answer": response.content,
|
||||
"log_messages": generate_log_message(
|
||||
message="deep - deep answer generation",
|
||||
node_start_time=node_start_time,
|
||||
graph_start_time=state["graph_start_time"],
|
||||
),
|
||||
}
|
||||
@@ -0,0 +1,11 @@
|
||||
from datetime import datetime
|
||||
from typing import Any
|
||||
|
||||
from danswer.agent_search.primary_graph.states import QAState
|
||||
|
||||
|
||||
def dummy_start(state: QAState) -> dict[str, Any]:
|
||||
"""
|
||||
Dummy node to set the start time
|
||||
"""
|
||||
return {"start_time": datetime.now()}
|
||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user