mirror of
https://github.com/onyx-dot-app/onyx.git
synced 2026-04-15 03:42:52 +00:00
Compare commits
83 Commits
permission
...
edge
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
d10c901c43 | ||
|
|
f1ac555c57 | ||
|
|
ed52384c21 | ||
|
|
cb10376a0d | ||
|
|
5a25b70b9c | ||
|
|
8cbc37f281 | ||
|
|
9d78f71f23 | ||
|
|
fbf3179d84 | ||
|
|
779470b553 | ||
|
|
151e189898 | ||
|
|
72e08f81a4 | ||
|
|
65792a8ad8 | ||
|
|
497b700b3d | ||
|
|
c3ed2135f1 | ||
|
|
a969d56818 | ||
|
|
a31d862f48 | ||
|
|
a4e6d4cf43 | ||
|
|
1e6f94e00d | ||
|
|
a769b87a9d | ||
|
|
278fc7e9b1 | ||
|
|
eb34df470f | ||
|
|
9d1785273f | ||
|
|
ef69b17d26 | ||
|
|
787c961802 | ||
|
|
62bc4fa2a3 | ||
|
|
bb1c44daff | ||
|
|
f26ecafb51 | ||
|
|
9fdb425c0d | ||
|
|
47e20e89c5 | ||
|
|
8b28c127f2 | ||
|
|
9a861a71ad | ||
|
|
b4bc12f6dc | ||
|
|
9af9148ca7 | ||
|
|
8a517c4f10 | ||
|
|
6959d851ea | ||
|
|
6a2550fc2d | ||
|
|
b1cc0c2bf9 | ||
|
|
c28b17064b | ||
|
|
4dab92ab52 | ||
|
|
7eb68d61b0 | ||
|
|
8c7810d688 | ||
|
|
712e6fdf5e | ||
|
|
f1a9a3b41e | ||
|
|
c3405fb6bf | ||
|
|
3e962935f4 | ||
|
|
0aa1aa7ea0 | ||
|
|
771d2cf101 | ||
|
|
7ec50280ed | ||
|
|
5b2ba5caeb | ||
|
|
4a96ef13d7 | ||
|
|
822b0c99be | ||
|
|
bcf2851a85 | ||
|
|
a5a59bd8f0 | ||
|
|
32d2e7985a | ||
|
|
c4f8d5370b | ||
|
|
9e434f6a5a | ||
|
|
67dc819319 | ||
|
|
2d12274050 | ||
|
|
c727ba13ee | ||
|
|
6193dd5326 | ||
|
|
387a7d1cea | ||
|
|
869578eeed | ||
|
|
e68648ab74 | ||
|
|
da01002099 | ||
|
|
f5d66f389c | ||
|
|
82d89f78c6 | ||
|
|
6f49c5e32c | ||
|
|
41f2bd2f19 | ||
|
|
bfa2f672f9 | ||
|
|
a823c3ead1 | ||
|
|
bd7d378a9a | ||
|
|
dcec0c8ef3 | ||
|
|
6456b51dcf | ||
|
|
7cfe27e31e | ||
|
|
3c5f77f5a4 | ||
|
|
ab4d1dce01 | ||
|
|
80c928eb58 | ||
|
|
77528876b1 | ||
|
|
3bf53495f3 | ||
|
|
e4cfcda0bf | ||
|
|
475e8f6cdc | ||
|
|
945272c1d2 | ||
|
|
185b057483 |
62
.devcontainer/Dockerfile
Normal file
62
.devcontainer/Dockerfile
Normal file
@@ -0,0 +1,62 @@
|
||||
FROM ubuntu:26.04@sha256:cc925e589b7543b910fea57a240468940003fbfc0515245a495dd0ad8fe7cef1
|
||||
|
||||
RUN apt-get update && apt-get install -y --no-install-recommends \
|
||||
curl \
|
||||
fd-find \
|
||||
fzf \
|
||||
git \
|
||||
jq \
|
||||
less \
|
||||
make \
|
||||
neovim \
|
||||
openssh-client \
|
||||
python3-venv \
|
||||
ripgrep \
|
||||
sudo \
|
||||
ca-certificates \
|
||||
iptables \
|
||||
ipset \
|
||||
iproute2 \
|
||||
dnsutils \
|
||||
unzip \
|
||||
wget \
|
||||
zsh \
|
||||
&& curl -fsSL https://deb.nodesource.com/setup_20.x | bash - \
|
||||
&& apt-get install -y nodejs \
|
||||
&& install -m 0755 -d /etc/apt/keyrings \
|
||||
&& curl -fsSL https://cli.github.com/packages/githubcli-archive-keyring.gpg -o /etc/apt/keyrings/githubcli-archive-keyring.gpg \
|
||||
&& chmod go+r /etc/apt/keyrings/githubcli-archive-keyring.gpg \
|
||||
&& echo "deb [arch=$(dpkg --print-architecture) signed-by=/etc/apt/keyrings/githubcli-archive-keyring.gpg] https://cli.github.com/packages stable main" > /etc/apt/sources.list.d/github-cli.list \
|
||||
&& apt-get update \
|
||||
&& apt-get install -y --no-install-recommends gh \
|
||||
&& apt-get clean && rm -rf /var/lib/apt/lists/*
|
||||
|
||||
# fd-find installs as fdfind on Debian/Ubuntu — symlink to fd
|
||||
RUN ln -sf "$(which fdfind)" /usr/local/bin/fd
|
||||
|
||||
# Install uv (Python package manager)
|
||||
COPY --from=ghcr.io/astral-sh/uv:latest /uv /uvx /usr/local/bin/
|
||||
|
||||
# Create non-root dev user with passwordless sudo
|
||||
RUN useradd -m -s /bin/zsh dev && \
|
||||
echo "dev ALL=(ALL) NOPASSWD:ALL" > /etc/sudoers.d/dev && \
|
||||
chmod 0440 /etc/sudoers.d/dev
|
||||
|
||||
ENV DEVCONTAINER=true
|
||||
|
||||
RUN mkdir -p /workspace && \
|
||||
chown -R dev:dev /workspace
|
||||
|
||||
WORKDIR /workspace
|
||||
|
||||
# Install Claude Code
|
||||
ARG CLAUDE_CODE_VERSION=latest
|
||||
RUN npm install -g @anthropic-ai/claude-code@${CLAUDE_CODE_VERSION}
|
||||
|
||||
# Configure zsh — source the repo-local zshrc so shell customization
|
||||
# doesn't require an image rebuild.
|
||||
RUN chsh -s /bin/zsh root && \
|
||||
for rc in /root/.zshrc /home/dev/.zshrc; do \
|
||||
echo '[ -f /workspace/.devcontainer/zshrc ] && . /workspace/.devcontainer/zshrc' >> "$rc"; \
|
||||
done && \
|
||||
chown dev:dev /home/dev/.zshrc
|
||||
86
.devcontainer/README.md
Normal file
86
.devcontainer/README.md
Normal file
@@ -0,0 +1,86 @@
|
||||
# Onyx Dev Container
|
||||
|
||||
A containerized development environment for working on Onyx.
|
||||
|
||||
## What's included
|
||||
|
||||
- Ubuntu 26.04 base image
|
||||
- Node.js 20, uv, Claude Code
|
||||
- GitHub CLI (`gh`)
|
||||
- Neovim, ripgrep, fd, fzf, jq, make, wget, unzip
|
||||
- Zsh as default shell (sources host `~/.zshrc` if available)
|
||||
- Python venv auto-activation
|
||||
- Network firewall (default-deny, whitelists npm, GitHub, Anthropic APIs, Sentry, and VS Code update servers)
|
||||
|
||||
## Usage
|
||||
|
||||
### CLI (`ods dev`)
|
||||
|
||||
The [`ods` devtools CLI](../tools/ods/README.md) provides workspace-aware wrappers
|
||||
for all devcontainer operations (also available as `ods dc`):
|
||||
|
||||
```bash
|
||||
# Start the container
|
||||
ods dev up
|
||||
|
||||
# Open a shell
|
||||
ods dev into
|
||||
|
||||
# Run a command
|
||||
ods dev exec npm test
|
||||
|
||||
# Stop the container
|
||||
ods dev stop
|
||||
```
|
||||
|
||||
## Restarting the container
|
||||
|
||||
```bash
|
||||
# Restart the container
|
||||
ods dev restart
|
||||
|
||||
# Pull the latest published image and recreate
|
||||
ods dev rebuild
|
||||
```
|
||||
|
||||
## Image
|
||||
|
||||
The devcontainer uses a prebuilt image published to `onyxdotapp/onyx-devcontainer`.
|
||||
The tag is pinned in `devcontainer.json` — no local build is required.
|
||||
|
||||
To build the image locally (e.g. while iterating on the Dockerfile):
|
||||
|
||||
```bash
|
||||
docker buildx bake devcontainer
|
||||
```
|
||||
|
||||
The `devcontainer` target is defined in `docker-bake.hcl` at the repo root.
|
||||
|
||||
## User & permissions
|
||||
|
||||
The container runs as the `dev` user by default (`remoteUser` in devcontainer.json).
|
||||
An init script (`init-dev-user.sh`) runs at container start to ensure the active
|
||||
user has read/write access to the bind-mounted workspace:
|
||||
|
||||
- **Standard Docker** — `dev`'s UID/GID is remapped to match the workspace owner,
|
||||
so file permissions work seamlessly.
|
||||
- **Rootless Docker** — The workspace appears as root-owned (UID 0) inside the
|
||||
container due to user-namespace mapping. `ods dev up` auto-detects rootless Docker
|
||||
and sets `DEVCONTAINER_REMOTE_USER=root` so the container runs as root — which
|
||||
maps back to your host user via the user namespace. New files are owned by your
|
||||
host UID and no ACL workarounds are needed.
|
||||
|
||||
To override the auto-detection, set `DEVCONTAINER_REMOTE_USER` before running
|
||||
`ods dev up`.
|
||||
|
||||
## Firewall
|
||||
|
||||
The container starts with a default-deny firewall (`init-firewall.sh`) that only allows outbound traffic to:
|
||||
|
||||
- npm registry
|
||||
- GitHub
|
||||
- Anthropic API
|
||||
- Sentry
|
||||
- VS Code update servers
|
||||
|
||||
This requires the `NET_ADMIN` and `NET_RAW` capabilities, which are added via `runArgs` in `devcontainer.json`.
|
||||
23
.devcontainer/devcontainer.json
Normal file
23
.devcontainer/devcontainer.json
Normal file
@@ -0,0 +1,23 @@
|
||||
{
|
||||
"name": "Onyx Dev Sandbox",
|
||||
"image": "onyxdotapp/onyx-devcontainer@sha256:12184169c5bcc9cca0388286d5ffe504b569bc9c37bfa631b76ee8eee2064055",
|
||||
"runArgs": ["--cap-add=NET_ADMIN", "--cap-add=NET_RAW"],
|
||||
"mounts": [
|
||||
"source=${localEnv:HOME}/.claude,target=/home/dev/.claude,type=bind",
|
||||
"source=${localEnv:HOME}/.claude.json,target=/home/dev/.claude.json,type=bind",
|
||||
"source=${localEnv:HOME}/.zshrc,target=/home/dev/.zshrc.host,type=bind,readonly",
|
||||
"source=${localEnv:HOME}/.gitconfig,target=/home/dev/.gitconfig,type=bind,readonly",
|
||||
"source=${localEnv:HOME}/.config/nvim,target=/home/dev/.config/nvim,type=bind,readonly",
|
||||
"source=onyx-devcontainer-cache,target=/home/dev/.cache,type=volume",
|
||||
"source=onyx-devcontainer-local,target=/home/dev/.local,type=volume"
|
||||
],
|
||||
"containerEnv": {
|
||||
"SSH_AUTH_SOCK": "/tmp/ssh-agent.sock"
|
||||
},
|
||||
"remoteUser": "${localEnv:DEVCONTAINER_REMOTE_USER:dev}",
|
||||
"updateRemoteUserUID": false,
|
||||
"workspaceMount": "source=${localWorkspaceFolder},target=/workspace,type=bind,consistency=delegated",
|
||||
"workspaceFolder": "/workspace",
|
||||
"postStartCommand": "sudo bash /workspace/.devcontainer/init-dev-user.sh && sudo bash /workspace/.devcontainer/init-firewall.sh",
|
||||
"waitFor": "postStartCommand"
|
||||
}
|
||||
107
.devcontainer/init-dev-user.sh
Normal file
107
.devcontainer/init-dev-user.sh
Normal file
@@ -0,0 +1,107 @@
|
||||
#!/usr/bin/env bash
|
||||
set -euo pipefail
|
||||
|
||||
# Remap the dev user's UID/GID to match the workspace owner so that
|
||||
# bind-mounted files are accessible without running as root.
|
||||
#
|
||||
# Standard Docker: Workspace is owned by the host user's UID (e.g. 1000).
|
||||
# We remap dev to that UID -- fast and seamless.
|
||||
#
|
||||
# Rootless Docker: Workspace appears as root-owned (UID 0) inside the
|
||||
# container due to user-namespace mapping. Requires
|
||||
# DEVCONTAINER_REMOTE_USER=root (set automatically by
|
||||
# ods dev up). Container root IS the host user, so
|
||||
# bind-mounts and named volumes are symlinked into /root.
|
||||
|
||||
WORKSPACE=/workspace
|
||||
TARGET_USER=dev
|
||||
REMOTE_USER="${SUDO_USER:-$TARGET_USER}"
|
||||
|
||||
WS_UID=$(stat -c '%u' "$WORKSPACE")
|
||||
WS_GID=$(stat -c '%g' "$WORKSPACE")
|
||||
DEV_UID=$(id -u "$TARGET_USER")
|
||||
DEV_GID=$(id -g "$TARGET_USER")
|
||||
|
||||
# devcontainer.json bind-mounts and named volumes target /home/dev regardless
|
||||
# of remoteUser. When running as root ($HOME=/root), Phase 1 bridges the gap
|
||||
# with symlinks from ACTIVE_HOME → MOUNT_HOME.
|
||||
MOUNT_HOME=/home/"$TARGET_USER"
|
||||
|
||||
if [ "$REMOTE_USER" = "root" ]; then
|
||||
ACTIVE_HOME="/root"
|
||||
else
|
||||
ACTIVE_HOME="$MOUNT_HOME"
|
||||
fi
|
||||
|
||||
# ── Phase 1: home directory setup ───────────────────────────────────
|
||||
|
||||
# ~/.local and ~/.cache are named Docker volumes mounted under MOUNT_HOME.
|
||||
mkdir -p "$MOUNT_HOME"/.local/state "$MOUNT_HOME"/.local/share
|
||||
|
||||
# When running as root, symlink bind-mounts and named volumes into /root
|
||||
# so that $HOME-relative tools (Claude Code, git, etc.) find them.
|
||||
if [ "$ACTIVE_HOME" != "$MOUNT_HOME" ]; then
|
||||
for item in .claude .cache .local; do
|
||||
[ -d "$MOUNT_HOME/$item" ] || continue
|
||||
if [ -e "$ACTIVE_HOME/$item" ] && [ ! -L "$ACTIVE_HOME/$item" ]; then
|
||||
echo "warning: replacing $ACTIVE_HOME/$item with symlink to $MOUNT_HOME/$item" >&2
|
||||
rm -rf "$ACTIVE_HOME/$item"
|
||||
fi
|
||||
ln -sfn "$MOUNT_HOME/$item" "$ACTIVE_HOME/$item"
|
||||
done
|
||||
# Symlink files (not directories).
|
||||
for file in .claude.json .gitconfig .zshrc.host; do
|
||||
[ -f "$MOUNT_HOME/$file" ] && ln -sf "$MOUNT_HOME/$file" "$ACTIVE_HOME/$file"
|
||||
done
|
||||
|
||||
# Nested mount: .config/nvim
|
||||
if [ -d "$MOUNT_HOME/.config/nvim" ]; then
|
||||
mkdir -p "$ACTIVE_HOME/.config"
|
||||
if [ -e "$ACTIVE_HOME/.config/nvim" ] && [ ! -L "$ACTIVE_HOME/.config/nvim" ]; then
|
||||
echo "warning: replacing $ACTIVE_HOME/.config/nvim with symlink" >&2
|
||||
rm -rf "$ACTIVE_HOME/.config/nvim"
|
||||
fi
|
||||
ln -sfn "$MOUNT_HOME/.config/nvim" "$ACTIVE_HOME/.config/nvim"
|
||||
fi
|
||||
fi
|
||||
|
||||
# ── Phase 2: workspace access ───────────────────────────────────────
|
||||
|
||||
# Root always has workspace access; Phase 1 handled home setup.
|
||||
if [ "$REMOTE_USER" = "root" ]; then
|
||||
exit 0
|
||||
fi
|
||||
|
||||
# Already matching -- nothing to do.
|
||||
if [ "$WS_UID" = "$DEV_UID" ] && [ "$WS_GID" = "$DEV_GID" ]; then
|
||||
exit 0
|
||||
fi
|
||||
|
||||
if [ "$WS_UID" != "0" ]; then
|
||||
# ── Standard Docker ──────────────────────────────────────────────
|
||||
# Workspace is owned by a non-root UID (the host user).
|
||||
# Remap dev's UID/GID to match.
|
||||
if [ "$DEV_GID" != "$WS_GID" ]; then
|
||||
if ! groupmod -g "$WS_GID" "$TARGET_USER" 2>&1; then
|
||||
echo "warning: failed to remap $TARGET_USER GID to $WS_GID" >&2
|
||||
fi
|
||||
fi
|
||||
if [ "$DEV_UID" != "$WS_UID" ]; then
|
||||
if ! usermod -u "$WS_UID" -g "$WS_GID" "$TARGET_USER" 2>&1; then
|
||||
echo "warning: failed to remap $TARGET_USER UID to $WS_UID" >&2
|
||||
fi
|
||||
fi
|
||||
if ! chown -R "$TARGET_USER":"$TARGET_USER" "$MOUNT_HOME" 2>&1; then
|
||||
echo "warning: failed to chown $MOUNT_HOME" >&2
|
||||
fi
|
||||
else
|
||||
# ── Rootless Docker ──────────────────────────────────────────────
|
||||
# Workspace is root-owned (UID 0) due to user-namespace mapping.
|
||||
# The supported path is remoteUser=root (set DEVCONTAINER_REMOTE_USER=root),
|
||||
# which is handled above. If we reach here, the user is running as dev
|
||||
# under rootless Docker without the override.
|
||||
echo "error: rootless Docker detected but remoteUser is not root." >&2
|
||||
echo " Set DEVCONTAINER_REMOTE_USER=root before starting the container," >&2
|
||||
echo " or use 'ods dev up' which sets it automatically." >&2
|
||||
exit 1
|
||||
fi
|
||||
105
.devcontainer/init-firewall.sh
Executable file
105
.devcontainer/init-firewall.sh
Executable file
@@ -0,0 +1,105 @@
|
||||
#!/usr/bin/env bash
|
||||
|
||||
set -euo pipefail
|
||||
|
||||
echo "Setting up firewall..."
|
||||
|
||||
# Preserve docker dns resolution
|
||||
DOCKER_DNS_RULES=$(iptables-save | grep -E "^-A.*-d 127.0.0.11/32" || true)
|
||||
|
||||
# Flush all rules
|
||||
iptables -t nat -F
|
||||
iptables -t nat -X
|
||||
iptables -t mangle -F
|
||||
iptables -t mangle -X
|
||||
iptables -F
|
||||
iptables -X
|
||||
|
||||
# Restore docker dns rules
|
||||
if [ -n "$DOCKER_DNS_RULES" ]; then
|
||||
echo "$DOCKER_DNS_RULES" | iptables-restore -n
|
||||
fi
|
||||
|
||||
# Create ipset for allowed destinations
|
||||
ipset create allowed-domains hash:net || true
|
||||
ipset flush allowed-domains
|
||||
|
||||
# Fetch GitHub IP ranges (IPv4 only -- ipset hash:net and iptables are IPv4)
|
||||
GITHUB_IPS=$(curl -s https://api.github.com/meta | jq -r '.api[]' 2>/dev/null | grep -v ':' || echo "")
|
||||
for ip in $GITHUB_IPS; do
|
||||
if ! ipset add allowed-domains "$ip" -exist 2>&1; then
|
||||
echo "warning: failed to add GitHub IP $ip to allowlist" >&2
|
||||
fi
|
||||
done
|
||||
|
||||
# Resolve allowed domains
|
||||
ALLOWED_DOMAINS=(
|
||||
"registry.npmjs.org"
|
||||
"api.anthropic.com"
|
||||
"api-staging.anthropic.com"
|
||||
"files.anthropic.com"
|
||||
"sentry.io"
|
||||
"update.code.visualstudio.com"
|
||||
"pypi.org"
|
||||
"files.pythonhosted.org"
|
||||
"go.dev"
|
||||
"storage.googleapis.com"
|
||||
"static.rust-lang.org"
|
||||
)
|
||||
|
||||
for domain in "${ALLOWED_DOMAINS[@]}"; do
|
||||
IPS=$(getent ahosts "$domain" 2>/dev/null | awk '{print $1}' | grep -v ':' | sort -u || echo "")
|
||||
for ip in $IPS; do
|
||||
if ! ipset add allowed-domains "$ip/32" -exist 2>&1; then
|
||||
echo "warning: failed to add $domain ($ip) to allowlist" >&2
|
||||
fi
|
||||
done
|
||||
done
|
||||
|
||||
# Allow traffic to the Docker gateway so the container can reach host services
|
||||
# (e.g. the Onyx stack at localhost:3000, localhost:8080, etc.)
|
||||
DOCKER_GATEWAY=$(ip -4 route show default | awk '{print $3}')
|
||||
if [ -n "$DOCKER_GATEWAY" ]; then
|
||||
if ! ipset add allowed-domains "$DOCKER_GATEWAY/32" -exist 2>&1; then
|
||||
echo "warning: failed to add Docker gateway $DOCKER_GATEWAY to allowlist" >&2
|
||||
fi
|
||||
fi
|
||||
|
||||
# Set default policies to DROP
|
||||
iptables -P FORWARD DROP
|
||||
iptables -P INPUT DROP
|
||||
iptables -P OUTPUT DROP
|
||||
|
||||
# Allow established connections
|
||||
iptables -A INPUT -m conntrack --ctstate ESTABLISHED,RELATED -j ACCEPT
|
||||
iptables -A OUTPUT -m conntrack --ctstate ESTABLISHED,RELATED -j ACCEPT
|
||||
|
||||
# Allow loopback
|
||||
iptables -A INPUT -i lo -j ACCEPT
|
||||
iptables -A OUTPUT -o lo -j ACCEPT
|
||||
|
||||
# Allow DNS
|
||||
iptables -A OUTPUT -p udp --dport 53 -j ACCEPT
|
||||
iptables -A OUTPUT -p tcp --dport 53 -j ACCEPT
|
||||
|
||||
# Allow outbound to allowed destinations
|
||||
iptables -A OUTPUT -m set --match-set allowed-domains dst -j ACCEPT
|
||||
|
||||
# Reject unauthorized outbound
|
||||
iptables -A OUTPUT -j REJECT --reject-with icmp-host-unreachable
|
||||
|
||||
# Validate firewall configuration
|
||||
echo "Validating firewall configuration..."
|
||||
|
||||
BLOCKED_SITES=("example.com" "google.com" "facebook.com")
|
||||
for site in "${BLOCKED_SITES[@]}"; do
|
||||
if timeout 2 ping -c 1 "$site" &>/dev/null; then
|
||||
echo "Warning: $site is still reachable"
|
||||
fi
|
||||
done
|
||||
|
||||
if ! timeout 5 curl -s https://api.github.com/meta > /dev/null; then
|
||||
echo "Warning: GitHub API is not accessible"
|
||||
fi
|
||||
|
||||
echo "Firewall setup complete"
|
||||
10
.devcontainer/zshrc
Normal file
10
.devcontainer/zshrc
Normal file
@@ -0,0 +1,10 @@
|
||||
# Devcontainer zshrc — sourced automatically for both root and dev users.
|
||||
# Edit this file to customize the shell without rebuilding the image.
|
||||
|
||||
# Auto-activate Python venv
|
||||
if [ -f /workspace/.venv/bin/activate ]; then
|
||||
. /workspace/.venv/bin/activate
|
||||
fi
|
||||
|
||||
# Source host zshrc if bind-mounted
|
||||
[ -f ~/.zshrc.host ] && . ~/.zshrc.host
|
||||
8
.github/workflows/deployment.yml
vendored
8
.github/workflows/deployment.yml
vendored
@@ -13,7 +13,7 @@ permissions:
|
||||
id-token: write # zizmor: ignore[excessive-permissions]
|
||||
|
||||
env:
|
||||
EDGE_TAG: ${{ startsWith(github.ref_name, 'nightly-latest') }}
|
||||
EDGE_TAG: ${{ startsWith(github.ref_name, 'nightly-latest') || github.ref_name == 'edge' }}
|
||||
|
||||
jobs:
|
||||
# Determine which components to build based on the tag
|
||||
@@ -44,7 +44,7 @@ jobs:
|
||||
fetch-tags: true
|
||||
|
||||
- name: Setup uv
|
||||
uses: astral-sh/setup-uv@37802adc94f370d6bfd71619e3f0bf239e1f3b78 # ratchet:astral-sh/setup-uv@v7
|
||||
uses: astral-sh/setup-uv@cec208311dfd045dd5311c1add060b2062131d57 # ratchet:astral-sh/setup-uv@v8.0.0
|
||||
with:
|
||||
version: "0.9.9"
|
||||
enable-cache: false
|
||||
@@ -156,7 +156,7 @@ jobs:
|
||||
check-version-tag:
|
||||
runs-on: ubuntu-slim
|
||||
timeout-minutes: 10
|
||||
if: ${{ !startsWith(github.ref_name, 'nightly-latest') && github.event_name != 'workflow_dispatch' }}
|
||||
if: ${{ !startsWith(github.ref_name, 'nightly-latest') && github.ref_name != 'edge' && github.event_name != 'workflow_dispatch' }}
|
||||
steps:
|
||||
- name: Checkout
|
||||
uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # ratchet:actions/checkout@v6
|
||||
@@ -165,7 +165,7 @@ jobs:
|
||||
fetch-depth: 0
|
||||
|
||||
- name: Setup uv
|
||||
uses: astral-sh/setup-uv@37802adc94f370d6bfd71619e3f0bf239e1f3b78 # ratchet:astral-sh/setup-uv@v7
|
||||
uses: astral-sh/setup-uv@cec208311dfd045dd5311c1add060b2062131d57 # ratchet:astral-sh/setup-uv@v8.0.0
|
||||
with:
|
||||
version: "0.9.9"
|
||||
# NOTE: This isn't caching much and zizmor suggests this could be poisoned, so disable.
|
||||
|
||||
@@ -114,7 +114,7 @@ jobs:
|
||||
ref: main
|
||||
|
||||
- name: Install the latest version of uv
|
||||
uses: astral-sh/setup-uv@37802adc94f370d6bfd71619e3f0bf239e1f3b78 # ratchet:astral-sh/setup-uv@v7
|
||||
uses: astral-sh/setup-uv@cec208311dfd045dd5311c1add060b2062131d57 # ratchet:astral-sh/setup-uv@v8.0.0
|
||||
with:
|
||||
enable-cache: false
|
||||
version: "0.9.9"
|
||||
|
||||
4
.github/workflows/pr-playwright-tests.yml
vendored
4
.github/workflows/pr-playwright-tests.yml
vendored
@@ -471,7 +471,7 @@ jobs:
|
||||
|
||||
- name: Install the latest version of uv
|
||||
if: always()
|
||||
uses: astral-sh/setup-uv@37802adc94f370d6bfd71619e3f0bf239e1f3b78 # ratchet:astral-sh/setup-uv@v7
|
||||
uses: astral-sh/setup-uv@cec208311dfd045dd5311c1add060b2062131d57 # ratchet:astral-sh/setup-uv@v8.0.0
|
||||
with:
|
||||
enable-cache: false
|
||||
version: "0.9.9"
|
||||
@@ -710,7 +710,7 @@ jobs:
|
||||
pull-requests: write
|
||||
steps:
|
||||
- name: Download visual diff summaries
|
||||
uses: actions/download-artifact@70fc10c6e5e1ce46ad2ea6f2b72d43f7d47b13c3
|
||||
uses: actions/download-artifact@3e5f45b2cfb9172054b4087a40e8e0b5a5461e7c
|
||||
with:
|
||||
pattern: screenshot-diff-summary-*
|
||||
path: summaries/
|
||||
|
||||
2
.github/workflows/pr-quality-checks.yml
vendored
2
.github/workflows/pr-quality-checks.yml
vendored
@@ -38,7 +38,7 @@ jobs:
|
||||
- name: Install node dependencies
|
||||
working-directory: ./web
|
||||
run: npm ci
|
||||
- uses: j178/prek-action@0bb87d7f00b0c99306c8bcb8b8beba1eb581c037 # ratchet:j178/prek-action@v1
|
||||
- uses: j178/prek-action@cbc2f23eb5539cf20d82d1aabd0d0ecbcc56f4e3
|
||||
with:
|
||||
prek-version: '0.3.4'
|
||||
extra-args: ${{ github.event_name == 'pull_request' && format('--from-ref {0} --to-ref {1}', github.event.pull_request.base.sha, github.event.pull_request.head.sha) || github.event_name == 'merge_group' && format('--from-ref {0} --to-ref {1}', github.event.merge_group.base_sha, github.event.merge_group.head_sha) || github.ref_name == 'main' && '--all-files' || '' }}
|
||||
|
||||
2
.github/workflows/release-cli.yml
vendored
2
.github/workflows/release-cli.yml
vendored
@@ -17,7 +17,7 @@ jobs:
|
||||
- uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # ratchet:actions/checkout@v6
|
||||
with:
|
||||
persist-credentials: false
|
||||
- uses: astral-sh/setup-uv@37802adc94f370d6bfd71619e3f0bf239e1f3b78 # ratchet:astral-sh/setup-uv@v7
|
||||
- uses: astral-sh/setup-uv@cec208311dfd045dd5311c1add060b2062131d57 # ratchet:astral-sh/setup-uv@v8.0.0
|
||||
with:
|
||||
enable-cache: false
|
||||
version: "0.9.9"
|
||||
|
||||
2
.github/workflows/release-devtools.yml
vendored
2
.github/workflows/release-devtools.yml
vendored
@@ -26,7 +26,7 @@ jobs:
|
||||
- uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # ratchet:actions/checkout@v6
|
||||
with:
|
||||
persist-credentials: false
|
||||
- uses: astral-sh/setup-uv@37802adc94f370d6bfd71619e3f0bf239e1f3b78 # ratchet:astral-sh/setup-uv@v7
|
||||
- uses: astral-sh/setup-uv@cec208311dfd045dd5311c1add060b2062131d57 # ratchet:astral-sh/setup-uv@v8.0.0
|
||||
with:
|
||||
enable-cache: false
|
||||
version: "0.9.9"
|
||||
|
||||
2
.github/workflows/zizmor.yml
vendored
2
.github/workflows/zizmor.yml
vendored
@@ -24,7 +24,7 @@ jobs:
|
||||
persist-credentials: false
|
||||
|
||||
- name: Install the latest version of uv
|
||||
uses: astral-sh/setup-uv@37802adc94f370d6bfd71619e3f0bf239e1f3b78 # ratchet:astral-sh/setup-uv@v7
|
||||
uses: astral-sh/setup-uv@cec208311dfd045dd5311c1add060b2062131d57 # ratchet:astral-sh/setup-uv@v8.0.0
|
||||
with:
|
||||
enable-cache: false
|
||||
version: "0.9.9"
|
||||
|
||||
@@ -1,64 +1,57 @@
|
||||
{
|
||||
"labels": [],
|
||||
"comment": "",
|
||||
"fixWithAI": true,
|
||||
"hideFooter": false,
|
||||
"strictness": 3,
|
||||
"statusCheck": true,
|
||||
"commentTypes": [
|
||||
"logic",
|
||||
"syntax",
|
||||
"style"
|
||||
],
|
||||
"instructions": "",
|
||||
"disabledLabels": [],
|
||||
"excludeAuthors": [
|
||||
"dependabot[bot]",
|
||||
"renovate[bot]"
|
||||
],
|
||||
"ignoreKeywords": "",
|
||||
"ignorePatterns": "",
|
||||
"includeAuthors": [],
|
||||
"summarySection": {
|
||||
"included": true,
|
||||
"collapsible": false,
|
||||
"defaultOpen": false
|
||||
"labels": [],
|
||||
"comment": "",
|
||||
"fixWithAI": true,
|
||||
"hideFooter": false,
|
||||
"strictness": 3,
|
||||
"statusCheck": true,
|
||||
"commentTypes": ["logic", "syntax", "style"],
|
||||
"instructions": "",
|
||||
"disabledLabels": [],
|
||||
"excludeAuthors": ["dependabot[bot]", "renovate[bot]"],
|
||||
"ignoreKeywords": "",
|
||||
"ignorePatterns": "",
|
||||
"includeAuthors": [],
|
||||
"summarySection": {
|
||||
"included": true,
|
||||
"collapsible": false,
|
||||
"defaultOpen": false
|
||||
},
|
||||
"excludeBranches": [],
|
||||
"fileChangeLimit": 300,
|
||||
"includeBranches": [],
|
||||
"includeKeywords": "",
|
||||
"triggerOnUpdates": false,
|
||||
"updateExistingSummaryComment": true,
|
||||
"updateSummaryOnly": false,
|
||||
"issuesTableSection": {
|
||||
"included": true,
|
||||
"collapsible": false,
|
||||
"defaultOpen": false
|
||||
},
|
||||
"statusCommentsEnabled": true,
|
||||
"confidenceScoreSection": {
|
||||
"included": true,
|
||||
"collapsible": false
|
||||
},
|
||||
"sequenceDiagramSection": {
|
||||
"included": true,
|
||||
"collapsible": false,
|
||||
"defaultOpen": false
|
||||
},
|
||||
"shouldUpdateDescription": false,
|
||||
"rules": [
|
||||
{
|
||||
"scope": ["web/**"],
|
||||
"rule": "In Onyx's Next.js app, the `app/ee/admin/` directory is a filesystem convention for Enterprise Edition route overrides — it does NOT add an `/ee/` prefix to the URL. Both `app/admin/groups/page.tsx` and `app/ee/admin/groups/page.tsx` serve the same URL `/admin/groups`. Hardcoded `/admin/...` paths in router.push() calls are correct and do NOT break EE deployments. Do not flag hardcoded admin paths as bugs."
|
||||
},
|
||||
"excludeBranches": [],
|
||||
"fileChangeLimit": 300,
|
||||
"includeBranches": [],
|
||||
"includeKeywords": "",
|
||||
"triggerOnUpdates": true,
|
||||
"updateExistingSummaryComment": true,
|
||||
"updateSummaryOnly": false,
|
||||
"issuesTableSection": {
|
||||
"included": true,
|
||||
"collapsible": false,
|
||||
"defaultOpen": false
|
||||
{
|
||||
"scope": ["web/**"],
|
||||
"rule": "In Onyx, each API key creates a unique user row in the database with a unique `user_id` (UUID). There is a 1:1 mapping between API keys and their backing user records. Multiple API keys do NOT share the same `user_id`. Do not flag potential duplicate row IDs when using `user_id` from API key descriptors."
|
||||
},
|
||||
"statusCommentsEnabled": true,
|
||||
"confidenceScoreSection": {
|
||||
"included": true,
|
||||
"collapsible": false
|
||||
},
|
||||
"sequenceDiagramSection": {
|
||||
"included": true,
|
||||
"collapsible": false,
|
||||
"defaultOpen": false
|
||||
},
|
||||
"shouldUpdateDescription": false,
|
||||
"rules": [
|
||||
{
|
||||
"scope": ["web/**"],
|
||||
"rule": "In Onyx's Next.js app, the `app/ee/admin/` directory is a filesystem convention for Enterprise Edition route overrides — it does NOT add an `/ee/` prefix to the URL. Both `app/admin/groups/page.tsx` and `app/ee/admin/groups/page.tsx` serve the same URL `/admin/groups`. Hardcoded `/admin/...` paths in router.push() calls are correct and do NOT break EE deployments. Do not flag hardcoded admin paths as bugs."
|
||||
},
|
||||
{
|
||||
"scope": ["web/**"],
|
||||
"rule": "In Onyx, each API key creates a unique user row in the database with a unique `user_id` (UUID). There is a 1:1 mapping between API keys and their backing user records. Multiple API keys do NOT share the same `user_id`. Do not flag potential duplicate row IDs when using `user_id` from API key descriptors."
|
||||
},
|
||||
{
|
||||
"scope": ["backend/**/*.py"],
|
||||
"rule": "Never raise HTTPException directly in business code. Use `raise OnyxError(OnyxErrorCode.XXX, \"message\")` from `onyx.error_handling.exceptions`. A global FastAPI exception handler converts OnyxError into structured JSON responses with {\"error_code\": \"...\", \"detail\": \"...\"}. Error codes are defined in `onyx.error_handling.error_codes.OnyxErrorCode`. For upstream errors with dynamic HTTP status codes, use `status_code_override`: `raise OnyxError(OnyxErrorCode.BAD_GATEWAY, detail, status_code_override=upstream_status)`."
|
||||
}
|
||||
]
|
||||
{
|
||||
"scope": ["backend/**/*.py"],
|
||||
"rule": "Never raise HTTPException directly in business code. Use `raise OnyxError(OnyxErrorCode.XXX, \"message\")` from `onyx.error_handling.exceptions`. A global FastAPI exception handler converts OnyxError into structured JSON responses with {\"error_code\": \"...\", \"detail\": \"...\"}. Error codes are defined in `onyx.error_handling.error_codes.OnyxErrorCode`. For upstream errors with dynamic HTTP status codes, use `status_code_override`: `raise OnyxError(OnyxErrorCode.BAD_GATEWAY, detail, status_code_override=upstream_status)`."
|
||||
}
|
||||
]
|
||||
}
|
||||
|
||||
@@ -9,7 +9,6 @@ repos:
|
||||
rev: d30b4298e4fb63ce8609e29acdbcf4c9018a483c
|
||||
hooks:
|
||||
- id: uv-sync
|
||||
args: ["--locked", "--all-extras"]
|
||||
- id: uv-lock
|
||||
- id: uv-export
|
||||
name: uv-export default.txt
|
||||
@@ -18,7 +17,7 @@ repos:
|
||||
"--no-emit-project",
|
||||
"--no-default-groups",
|
||||
"--no-hashes",
|
||||
"--extra",
|
||||
"--group",
|
||||
"backend",
|
||||
"-o",
|
||||
"backend/requirements/default.txt",
|
||||
@@ -31,7 +30,7 @@ repos:
|
||||
"--no-emit-project",
|
||||
"--no-default-groups",
|
||||
"--no-hashes",
|
||||
"--extra",
|
||||
"--group",
|
||||
"dev",
|
||||
"-o",
|
||||
"backend/requirements/dev.txt",
|
||||
@@ -44,7 +43,7 @@ repos:
|
||||
"--no-emit-project",
|
||||
"--no-default-groups",
|
||||
"--no-hashes",
|
||||
"--extra",
|
||||
"--group",
|
||||
"ee",
|
||||
"-o",
|
||||
"backend/requirements/ee.txt",
|
||||
@@ -57,7 +56,7 @@ repos:
|
||||
"--no-emit-project",
|
||||
"--no-default-groups",
|
||||
"--no-hashes",
|
||||
"--extra",
|
||||
"--group",
|
||||
"model_server",
|
||||
"-o",
|
||||
"backend/requirements/model_server.txt",
|
||||
|
||||
3
.vscode/launch.json
vendored
3
.vscode/launch.json
vendored
@@ -531,8 +531,7 @@
|
||||
"request": "launch",
|
||||
"runtimeExecutable": "uv",
|
||||
"runtimeArgs": [
|
||||
"sync",
|
||||
"--all-extras"
|
||||
"sync"
|
||||
],
|
||||
"cwd": "${workspaceFolder}",
|
||||
"console": "integratedTerminal",
|
||||
|
||||
@@ -49,12 +49,12 @@ Onyx uses Celery for asynchronous task processing with multiple specialized work
|
||||
|
||||
4. **Light Worker** (`light`)
|
||||
- Handles lightweight, fast operations
|
||||
- Tasks: vespa operations, document permissions sync, external group sync
|
||||
- Tasks: vespa metadata sync, connector deletion, doc permissions upsert, checkpoint cleanup, index attempt cleanup
|
||||
- Higher concurrency for quick tasks
|
||||
|
||||
5. **Heavy Worker** (`heavy`)
|
||||
- Handles resource-intensive operations
|
||||
- Primary task: document pruning operations
|
||||
- Tasks: connector pruning, document permissions sync, external group sync, CSV generation
|
||||
- Runs with 4 threads concurrency
|
||||
|
||||
6. **KG Processing Worker** (`kg_processing`)
|
||||
|
||||
@@ -117,7 +117,7 @@ If using PowerShell, the command slightly differs:
|
||||
Install the required Python dependencies:
|
||||
|
||||
```bash
|
||||
uv sync --all-extras
|
||||
uv sync
|
||||
```
|
||||
|
||||
Install Playwright for Python (headless browser required by the Web Connector):
|
||||
|
||||
@@ -13,6 +13,7 @@ from ee.onyx.server.license.models import LicenseSource
|
||||
from onyx.auth.schemas import UserRole
|
||||
from onyx.cache.factory import get_cache_backend
|
||||
from onyx.configs.constants import ANONYMOUS_USER_EMAIL
|
||||
from onyx.db.enums import AccountType
|
||||
from onyx.db.models import License
|
||||
from onyx.db.models import User
|
||||
from onyx.utils.logger import setup_logger
|
||||
@@ -107,12 +108,13 @@ def get_used_seats(tenant_id: str | None = None) -> int:
|
||||
Get current seat usage directly from database.
|
||||
|
||||
For multi-tenant: counts users in UserTenantMapping for this tenant.
|
||||
For self-hosted: counts all active users (excludes EXT_PERM_USER role
|
||||
and the anonymous system user).
|
||||
For self-hosted: counts all active users.
|
||||
|
||||
TODO: Exclude API key dummy users from seat counting. API keys create
|
||||
users with emails like `__DANSWER_API_KEY_*` that should not count toward
|
||||
seat limits. See: https://linear.app/onyx-app/issue/ENG-3518
|
||||
Only human accounts count toward seat limits.
|
||||
SERVICE_ACCOUNT (API key dummy users), EXT_PERM_USER, and the
|
||||
anonymous system user are excluded. BOT (Slack users) ARE counted
|
||||
because they represent real humans and get upgraded to STANDARD
|
||||
when they log in via web.
|
||||
"""
|
||||
if MULTI_TENANT:
|
||||
from ee.onyx.server.tenants.user_mapping import get_tenant_count
|
||||
@@ -129,6 +131,7 @@ def get_used_seats(tenant_id: str | None = None) -> int:
|
||||
User.is_active == True, # type: ignore # noqa: E712
|
||||
User.role != UserRole.EXT_PERM_USER,
|
||||
User.email != ANONYMOUS_USER_EMAIL, # type: ignore
|
||||
User.account_type != AccountType.SERVICE_ACCOUNT,
|
||||
)
|
||||
)
|
||||
return result.scalar() or 0
|
||||
|
||||
@@ -11,6 +11,8 @@ require a valid SCIM bearer token.
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import hashlib
|
||||
import struct
|
||||
from uuid import UUID
|
||||
|
||||
from fastapi import APIRouter
|
||||
@@ -22,6 +24,7 @@ from fastapi import Response
|
||||
from fastapi.responses import JSONResponse
|
||||
from fastapi_users.password import PasswordHelper
|
||||
from sqlalchemy import func
|
||||
from sqlalchemy import text
|
||||
from sqlalchemy.exc import IntegrityError
|
||||
from sqlalchemy.orm import Session
|
||||
|
||||
@@ -65,12 +68,25 @@ from onyx.db.permissions import recompute_user_permissions__no_commit
|
||||
from onyx.db.users import assign_user_to_default_groups__no_commit
|
||||
from onyx.utils.logger import setup_logger
|
||||
from onyx.utils.variable_functionality import fetch_ee_implementation_or_noop
|
||||
from shared_configs.contextvars import get_current_tenant_id
|
||||
|
||||
logger = setup_logger()
|
||||
|
||||
# Group names reserved for system default groups (seeded by migration).
|
||||
_RESERVED_GROUP_NAMES = frozenset({"Admin", "Basic"})
|
||||
|
||||
# Namespace prefix for the seat-allocation advisory lock. Hashed together
|
||||
# with the tenant ID so the lock is scoped per-tenant (unrelated tenants
|
||||
# never block each other) and cannot collide with unrelated advisory locks.
|
||||
_SEAT_LOCK_NAMESPACE = "onyx_scim_seat_lock"
|
||||
|
||||
|
||||
def _seat_lock_id_for_tenant(tenant_id: str) -> int:
|
||||
"""Derive a stable 64-bit signed int lock id for this tenant's seat lock."""
|
||||
digest = hashlib.sha256(f"{_SEAT_LOCK_NAMESPACE}:{tenant_id}".encode()).digest()
|
||||
# pg_advisory_xact_lock takes a signed 8-byte int; unpack as such.
|
||||
return struct.unpack("q", digest[:8])[0]
|
||||
|
||||
|
||||
class ScimJSONResponse(JSONResponse):
|
||||
"""JSONResponse with Content-Type: application/scim+json (RFC 7644 §3.1)."""
|
||||
@@ -209,12 +225,37 @@ def _apply_exclusions(
|
||||
|
||||
|
||||
def _check_seat_availability(dal: ScimDAL) -> str | None:
|
||||
"""Return an error message if seat limit is reached, else None."""
|
||||
"""Return an error message if seat limit is reached, else None.
|
||||
|
||||
Acquires a transaction-scoped advisory lock so that concurrent
|
||||
SCIM requests are serialized. IdPs like Okta send provisioning
|
||||
requests in parallel batches — without serialization the check is
|
||||
vulnerable to a TOCTOU race where N concurrent requests each see
|
||||
"seats available", all insert, and the tenant ends up over its
|
||||
seat limit.
|
||||
|
||||
The lock is held until the caller's next COMMIT or ROLLBACK, which
|
||||
means the seat count cannot change between the check here and the
|
||||
subsequent INSERT/UPDATE. Each call site in this module follows
|
||||
the pattern: _check_seat_availability → write → dal.commit()
|
||||
(which releases the lock for the next waiting request).
|
||||
"""
|
||||
check_fn = fetch_ee_implementation_or_noop(
|
||||
"onyx.db.license", "check_seat_availability", None
|
||||
)
|
||||
if check_fn is None:
|
||||
return None
|
||||
|
||||
# Transaction-scoped advisory lock — released on dal.commit() / dal.rollback().
|
||||
# The lock id is derived from the tenant so unrelated tenants never block
|
||||
# each other, and from a namespace string so it cannot collide with
|
||||
# unrelated advisory locks elsewhere in the codebase.
|
||||
lock_id = _seat_lock_id_for_tenant(get_current_tenant_id())
|
||||
dal.session.execute(
|
||||
text("SELECT pg_advisory_xact_lock(:lock_id)"),
|
||||
{"lock_id": lock_id},
|
||||
)
|
||||
|
||||
result = check_fn(dal.session, seats_needed=1)
|
||||
if not result.available:
|
||||
return result.error_message or "Seat limit reached"
|
||||
|
||||
@@ -1,3 +1,4 @@
|
||||
import time
|
||||
from collections.abc import Generator
|
||||
from collections.abc import Iterator
|
||||
from collections.abc import Sequence
|
||||
@@ -30,6 +31,8 @@ from onyx.connectors.models import HierarchyNode
|
||||
from onyx.connectors.models import SlimDocument
|
||||
from onyx.httpx.httpx_pool import HttpxPool
|
||||
from onyx.indexing.indexing_heartbeat import IndexingHeartbeatInterface
|
||||
from onyx.server.metrics.pruning_metrics import inc_pruning_rate_limit_error
|
||||
from onyx.server.metrics.pruning_metrics import observe_pruning_enumeration_duration
|
||||
from onyx.utils.logger import setup_logger
|
||||
|
||||
|
||||
@@ -130,6 +133,7 @@ def _extract_from_batch(
|
||||
def extract_ids_from_runnable_connector(
|
||||
runnable_connector: BaseConnector,
|
||||
callback: IndexingHeartbeatInterface | None = None,
|
||||
connector_type: str = "unknown",
|
||||
) -> SlimConnectorExtractionResult:
|
||||
"""
|
||||
Extract document IDs and hierarchy nodes from a runnable connector.
|
||||
@@ -179,21 +183,38 @@ def extract_ids_from_runnable_connector(
|
||||
)
|
||||
|
||||
# process raw batches to extract both IDs and hierarchy nodes
|
||||
for doc_list in raw_batch_generator:
|
||||
if callback and callback.should_stop():
|
||||
raise RuntimeError(
|
||||
"extract_ids_from_runnable_connector: Stop signal detected"
|
||||
)
|
||||
enumeration_start = time.monotonic()
|
||||
try:
|
||||
for doc_list in raw_batch_generator:
|
||||
if callback and callback.should_stop():
|
||||
raise RuntimeError(
|
||||
"extract_ids_from_runnable_connector: Stop signal detected"
|
||||
)
|
||||
|
||||
batch_result = _extract_from_batch(doc_list)
|
||||
batch_ids = batch_result.raw_id_to_parent
|
||||
batch_nodes = batch_result.hierarchy_nodes
|
||||
doc_batch_processing_func(batch_ids)
|
||||
all_raw_id_to_parent.update(batch_ids)
|
||||
all_hierarchy_nodes.extend(batch_nodes)
|
||||
batch_result = _extract_from_batch(doc_list)
|
||||
batch_ids = batch_result.raw_id_to_parent
|
||||
batch_nodes = batch_result.hierarchy_nodes
|
||||
doc_batch_processing_func(batch_ids)
|
||||
all_raw_id_to_parent.update(batch_ids)
|
||||
all_hierarchy_nodes.extend(batch_nodes)
|
||||
|
||||
if callback:
|
||||
callback.progress("extract_ids_from_runnable_connector", len(batch_ids))
|
||||
if callback:
|
||||
callback.progress("extract_ids_from_runnable_connector", len(batch_ids))
|
||||
except Exception as e:
|
||||
# Best-effort rate limit detection via string matching.
|
||||
# Connectors surface rate limits inconsistently — some raise HTTP 429,
|
||||
# some use SDK-specific exceptions (e.g. google.api_core.exceptions.ResourceExhausted)
|
||||
# that may or may not include "rate limit" or "429" in the message.
|
||||
# TODO(Bo): replace with a standard ConnectorRateLimitError exception that all
|
||||
# connectors raise when rate limited, making this check precise.
|
||||
error_str = str(e)
|
||||
if "rate limit" in error_str.lower() or "429" in error_str:
|
||||
inc_pruning_rate_limit_error(connector_type)
|
||||
raise
|
||||
finally:
|
||||
observe_pruning_enumeration_duration(
|
||||
time.monotonic() - enumeration_start, connector_type
|
||||
)
|
||||
|
||||
return SlimConnectorExtractionResult(
|
||||
raw_id_to_parent=all_raw_id_to_parent,
|
||||
|
||||
@@ -102,7 +102,7 @@ def revoke_tasks_blocking_deletion(
|
||||
f"Revoked permissions sync task {permissions_sync_payload.celery_task_id}."
|
||||
)
|
||||
except Exception:
|
||||
task_logger.exception("Exception while revoking pruning task")
|
||||
task_logger.exception("Exception while revoking permissions sync task")
|
||||
|
||||
try:
|
||||
prune_payload = redis_connector.prune.payload
|
||||
@@ -110,7 +110,7 @@ def revoke_tasks_blocking_deletion(
|
||||
app.control.revoke(prune_payload.celery_task_id)
|
||||
task_logger.info(f"Revoked pruning task {prune_payload.celery_task_id}.")
|
||||
except Exception:
|
||||
task_logger.exception("Exception while revoking permissions sync task")
|
||||
task_logger.exception("Exception while revoking pruning task")
|
||||
|
||||
try:
|
||||
external_group_sync_payload = redis_connector.external_group_sync.payload
|
||||
@@ -508,7 +508,11 @@ def monitor_connector_deletion_taskset(
|
||||
db_session=db_session,
|
||||
connector_id=connector_id_to_delete,
|
||||
)
|
||||
if not connector or not len(connector.credentials):
|
||||
if not connector:
|
||||
task_logger.info(
|
||||
"Connector deletion - Connector already deleted, skipping connector cleanup"
|
||||
)
|
||||
elif not len(connector.credentials):
|
||||
task_logger.info(
|
||||
"Connector deletion - Found no credentials left for connector, deleting connector"
|
||||
)
|
||||
|
||||
@@ -72,6 +72,7 @@ from onyx.redis.redis_hierarchy import get_source_node_id_from_cache
|
||||
from onyx.redis.redis_hierarchy import HierarchyNodeCacheEntry
|
||||
from onyx.redis.redis_pool import get_redis_client
|
||||
from onyx.redis.redis_pool import get_redis_replica_client
|
||||
from onyx.server.metrics.pruning_metrics import observe_pruning_diff_duration
|
||||
from onyx.server.runtime.onyx_runtime import OnyxRuntime
|
||||
from onyx.server.utils import make_short_id
|
||||
from onyx.utils.logger import format_error_for_logging
|
||||
@@ -570,8 +571,9 @@ def connector_pruning_generator_task(
|
||||
)
|
||||
|
||||
# Extract docs and hierarchy nodes from the source
|
||||
connector_type = cc_pair.connector.source.value
|
||||
extraction_result = extract_ids_from_runnable_connector(
|
||||
runnable_connector, callback
|
||||
runnable_connector, callback, connector_type=connector_type
|
||||
)
|
||||
all_connector_doc_ids = extraction_result.raw_id_to_parent
|
||||
|
||||
@@ -636,40 +638,46 @@ def connector_pruning_generator_task(
|
||||
commit=True,
|
||||
)
|
||||
|
||||
# a list of docs in our local index
|
||||
all_indexed_document_ids = {
|
||||
doc.id
|
||||
for doc in get_documents_for_connector_credential_pair(
|
||||
db_session=db_session,
|
||||
connector_id=connector_id,
|
||||
credential_id=credential_id,
|
||||
diff_start = time.monotonic()
|
||||
try:
|
||||
# a list of docs in our local index
|
||||
all_indexed_document_ids = {
|
||||
doc.id
|
||||
for doc in get_documents_for_connector_credential_pair(
|
||||
db_session=db_session,
|
||||
connector_id=connector_id,
|
||||
credential_id=credential_id,
|
||||
)
|
||||
}
|
||||
|
||||
# generate list of docs to remove (no longer in the source)
|
||||
doc_ids_to_remove = list(
|
||||
all_indexed_document_ids - all_connector_doc_ids.keys()
|
||||
)
|
||||
}
|
||||
|
||||
# generate list of docs to remove (no longer in the source)
|
||||
doc_ids_to_remove = list(
|
||||
all_indexed_document_ids - all_connector_doc_ids.keys()
|
||||
)
|
||||
task_logger.info(
|
||||
"Pruning set collected: "
|
||||
f"cc_pair={cc_pair_id} "
|
||||
f"connector_source={cc_pair.connector.source} "
|
||||
f"docs_to_remove={len(doc_ids_to_remove)}"
|
||||
)
|
||||
|
||||
task_logger.info(
|
||||
"Pruning set collected: "
|
||||
f"cc_pair={cc_pair_id} "
|
||||
f"connector_source={cc_pair.connector.source} "
|
||||
f"docs_to_remove={len(doc_ids_to_remove)}"
|
||||
)
|
||||
task_logger.info(
|
||||
f"RedisConnector.prune.generate_tasks starting. cc_pair={cc_pair_id}"
|
||||
)
|
||||
tasks_generated = redis_connector.prune.generate_tasks(
|
||||
set(doc_ids_to_remove), self.app, db_session, None
|
||||
)
|
||||
if tasks_generated is None:
|
||||
return None
|
||||
|
||||
task_logger.info(
|
||||
f"RedisConnector.prune.generate_tasks starting. cc_pair={cc_pair_id}"
|
||||
)
|
||||
tasks_generated = redis_connector.prune.generate_tasks(
|
||||
set(doc_ids_to_remove), self.app, db_session, None
|
||||
)
|
||||
if tasks_generated is None:
|
||||
return None
|
||||
|
||||
task_logger.info(
|
||||
f"RedisConnector.prune.generate_tasks finished. cc_pair={cc_pair_id} tasks_generated={tasks_generated}"
|
||||
)
|
||||
task_logger.info(
|
||||
f"RedisConnector.prune.generate_tasks finished. cc_pair={cc_pair_id} tasks_generated={tasks_generated}"
|
||||
)
|
||||
finally:
|
||||
observe_pruning_diff_duration(
|
||||
time.monotonic() - diff_start, connector_type
|
||||
)
|
||||
|
||||
redis_connector.prune.generator_complete = tasks_generated
|
||||
|
||||
|
||||
@@ -42,6 +42,9 @@ from onyx.connectors.google_drive.file_retrieval import (
|
||||
get_all_files_in_my_drive_and_shared,
|
||||
)
|
||||
from onyx.connectors.google_drive.file_retrieval import get_external_access_for_folder
|
||||
from onyx.connectors.google_drive.file_retrieval import (
|
||||
get_files_by_web_view_links_batch,
|
||||
)
|
||||
from onyx.connectors.google_drive.file_retrieval import get_files_in_shared_drive
|
||||
from onyx.connectors.google_drive.file_retrieval import get_folder_metadata
|
||||
from onyx.connectors.google_drive.file_retrieval import get_root_folder_id
|
||||
@@ -70,11 +73,13 @@ from onyx.connectors.interfaces import CheckpointedConnectorWithPermSync
|
||||
from onyx.connectors.interfaces import CheckpointOutput
|
||||
from onyx.connectors.interfaces import GenerateSlimDocumentOutput
|
||||
from onyx.connectors.interfaces import NormalizationResult
|
||||
from onyx.connectors.interfaces import Resolver
|
||||
from onyx.connectors.interfaces import SecondsSinceUnixEpoch
|
||||
from onyx.connectors.interfaces import SlimConnectorWithPermSync
|
||||
from onyx.connectors.models import ConnectorFailure
|
||||
from onyx.connectors.models import ConnectorMissingCredentialError
|
||||
from onyx.connectors.models import Document
|
||||
from onyx.connectors.models import DocumentFailure
|
||||
from onyx.connectors.models import EntityFailure
|
||||
from onyx.connectors.models import HierarchyNode
|
||||
from onyx.connectors.models import SlimDocument
|
||||
@@ -202,7 +207,9 @@ class DriveIdStatus(Enum):
|
||||
|
||||
|
||||
class GoogleDriveConnector(
|
||||
SlimConnectorWithPermSync, CheckpointedConnectorWithPermSync[GoogleDriveCheckpoint]
|
||||
SlimConnectorWithPermSync,
|
||||
CheckpointedConnectorWithPermSync[GoogleDriveCheckpoint],
|
||||
Resolver,
|
||||
):
|
||||
def __init__(
|
||||
self,
|
||||
@@ -1665,6 +1672,82 @@ class GoogleDriveConnector(
|
||||
start, end, checkpoint, include_permissions=True
|
||||
)
|
||||
|
||||
@override
|
||||
def resolve_errors(
|
||||
self,
|
||||
errors: list[ConnectorFailure],
|
||||
include_permissions: bool = False,
|
||||
) -> Generator[Document | ConnectorFailure | HierarchyNode, None, None]:
|
||||
if self._creds is None or self._primary_admin_email is None:
|
||||
raise RuntimeError(
|
||||
"Credentials missing, should not call this method before calling load_credentials"
|
||||
)
|
||||
|
||||
logger.info(f"Resolving {len(errors)} errors")
|
||||
doc_ids = [
|
||||
failure.failed_document.document_id
|
||||
for failure in errors
|
||||
if failure.failed_document
|
||||
]
|
||||
service = get_drive_service(self.creds, self.primary_admin_email)
|
||||
field_type = (
|
||||
DriveFileFieldType.WITH_PERMISSIONS
|
||||
if include_permissions or self.exclude_domain_link_only
|
||||
else DriveFileFieldType.STANDARD
|
||||
)
|
||||
batch_result = get_files_by_web_view_links_batch(service, doc_ids, field_type)
|
||||
|
||||
for doc_id, error in batch_result.errors.items():
|
||||
yield ConnectorFailure(
|
||||
failed_document=DocumentFailure(
|
||||
document_id=doc_id,
|
||||
document_link=doc_id,
|
||||
),
|
||||
failure_message=f"Failed to retrieve file during error resolution: {error}",
|
||||
exception=error,
|
||||
)
|
||||
|
||||
permission_sync_context = (
|
||||
PermissionSyncContext(
|
||||
primary_admin_email=self.primary_admin_email,
|
||||
google_domain=self.google_domain,
|
||||
)
|
||||
if include_permissions
|
||||
else None
|
||||
)
|
||||
|
||||
retrieved_files = [
|
||||
RetrievedDriveFile(
|
||||
drive_file=file,
|
||||
user_email=self.primary_admin_email,
|
||||
completion_stage=DriveRetrievalStage.DONE,
|
||||
)
|
||||
for file in batch_result.files.values()
|
||||
]
|
||||
|
||||
yield from self._get_new_ancestors_for_files(
|
||||
files=retrieved_files,
|
||||
seen_hierarchy_node_raw_ids=ThreadSafeSet(),
|
||||
fully_walked_hierarchy_node_raw_ids=ThreadSafeSet(),
|
||||
permission_sync_context=permission_sync_context,
|
||||
add_prefix=True,
|
||||
)
|
||||
|
||||
func_with_args = [
|
||||
(
|
||||
self._convert_retrieved_file_to_document,
|
||||
(rf, permission_sync_context),
|
||||
)
|
||||
for rf in retrieved_files
|
||||
]
|
||||
results = cast(
|
||||
list[Document | ConnectorFailure | None],
|
||||
run_functions_tuples_in_parallel(func_with_args, max_workers=8),
|
||||
)
|
||||
for result in results:
|
||||
if result is not None:
|
||||
yield result
|
||||
|
||||
def _extract_slim_docs_from_google_drive(
|
||||
self,
|
||||
checkpoint: GoogleDriveCheckpoint,
|
||||
|
||||
@@ -9,6 +9,7 @@ from urllib.parse import urlparse
|
||||
|
||||
from googleapiclient.discovery import Resource # type: ignore
|
||||
from googleapiclient.errors import HttpError # type: ignore
|
||||
from googleapiclient.http import BatchHttpRequest # type: ignore
|
||||
|
||||
from onyx.access.models import ExternalAccess
|
||||
from onyx.connectors.google_drive.constants import DRIVE_FOLDER_TYPE
|
||||
@@ -60,6 +61,8 @@ SLIM_FILE_FIELDS = (
|
||||
)
|
||||
FOLDER_FIELDS = "nextPageToken, files(id, name, permissions, modifiedTime, webViewLink, shortcutDetails)"
|
||||
|
||||
MAX_BATCH_SIZE = 100
|
||||
|
||||
HIERARCHY_FIELDS = "id, name, parents, webViewLink, mimeType, driveId"
|
||||
|
||||
HIERARCHY_FIELDS_WITH_PERMISSIONS = (
|
||||
@@ -216,7 +219,7 @@ def get_external_access_for_folder(
|
||||
|
||||
|
||||
def _get_fields_for_file_type(field_type: DriveFileFieldType) -> str:
|
||||
"""Get the appropriate fields string based on the field type enum"""
|
||||
"""Get the appropriate fields string for files().list() based on the field type enum."""
|
||||
if field_type == DriveFileFieldType.SLIM:
|
||||
return SLIM_FILE_FIELDS
|
||||
elif field_type == DriveFileFieldType.WITH_PERMISSIONS:
|
||||
@@ -225,6 +228,25 @@ def _get_fields_for_file_type(field_type: DriveFileFieldType) -> str:
|
||||
return FILE_FIELDS
|
||||
|
||||
|
||||
def _extract_single_file_fields(list_fields: str) -> str:
|
||||
"""Convert a files().list() fields string to one suitable for files().get().
|
||||
|
||||
List fields look like "nextPageToken, files(field1, field2, ...)"
|
||||
Single-file fields should be just "field1, field2, ..."
|
||||
"""
|
||||
start = list_fields.find("files(")
|
||||
if start == -1:
|
||||
return list_fields
|
||||
inner_start = start + len("files(")
|
||||
inner_end = list_fields.rfind(")")
|
||||
return list_fields[inner_start:inner_end]
|
||||
|
||||
|
||||
def _get_single_file_fields(field_type: DriveFileFieldType) -> str:
|
||||
"""Get the appropriate fields string for files().get() based on the field type enum."""
|
||||
return _extract_single_file_fields(_get_fields_for_file_type(field_type))
|
||||
|
||||
|
||||
def _get_files_in_parent(
|
||||
service: Resource,
|
||||
parent_id: str,
|
||||
@@ -536,3 +558,74 @@ def get_file_by_web_view_link(
|
||||
)
|
||||
.execute()
|
||||
)
|
||||
|
||||
|
||||
class BatchRetrievalResult:
|
||||
"""Result of a batch file retrieval, separating successes from errors."""
|
||||
|
||||
def __init__(self) -> None:
|
||||
self.files: dict[str, GoogleDriveFileType] = {}
|
||||
self.errors: dict[str, Exception] = {}
|
||||
|
||||
|
||||
def get_files_by_web_view_links_batch(
|
||||
service: GoogleDriveService,
|
||||
web_view_links: list[str],
|
||||
field_type: DriveFileFieldType,
|
||||
) -> BatchRetrievalResult:
|
||||
"""Retrieve multiple Google Drive files by webViewLink using the batch API.
|
||||
|
||||
Returns a BatchRetrievalResult containing successful file retrievals
|
||||
and errors for any files that could not be fetched.
|
||||
Automatically splits into chunks of MAX_BATCH_SIZE.
|
||||
"""
|
||||
fields = _get_single_file_fields(field_type)
|
||||
if len(web_view_links) <= MAX_BATCH_SIZE:
|
||||
return _get_files_by_web_view_links_batch(service, web_view_links, fields)
|
||||
|
||||
combined = BatchRetrievalResult()
|
||||
for i in range(0, len(web_view_links), MAX_BATCH_SIZE):
|
||||
chunk = web_view_links[i : i + MAX_BATCH_SIZE]
|
||||
chunk_result = _get_files_by_web_view_links_batch(service, chunk, fields)
|
||||
combined.files.update(chunk_result.files)
|
||||
combined.errors.update(chunk_result.errors)
|
||||
return combined
|
||||
|
||||
|
||||
def _get_files_by_web_view_links_batch(
|
||||
service: GoogleDriveService,
|
||||
web_view_links: list[str],
|
||||
fields: str,
|
||||
) -> BatchRetrievalResult:
|
||||
"""Single-batch implementation."""
|
||||
|
||||
result = BatchRetrievalResult()
|
||||
|
||||
def callback(
|
||||
request_id: str,
|
||||
response: GoogleDriveFileType,
|
||||
exception: Exception | None,
|
||||
) -> None:
|
||||
if exception:
|
||||
logger.warning(f"Error retrieving file {request_id}: {exception}")
|
||||
result.errors[request_id] = exception
|
||||
else:
|
||||
result.files[request_id] = response
|
||||
|
||||
batch = cast(BatchHttpRequest, service.new_batch_http_request(callback=callback))
|
||||
|
||||
for web_view_link in web_view_links:
|
||||
try:
|
||||
file_id = _extract_file_id_from_web_view_link(web_view_link)
|
||||
request = service.files().get(
|
||||
fileId=file_id,
|
||||
supportsAllDrives=True,
|
||||
fields=fields,
|
||||
)
|
||||
batch.add(request, request_id=web_view_link)
|
||||
except ValueError as e:
|
||||
logger.warning(f"Failed to extract file ID from {web_view_link}: {e}")
|
||||
result.errors[web_view_link] = e
|
||||
|
||||
batch.execute()
|
||||
return result
|
||||
|
||||
@@ -298,6 +298,22 @@ class CheckpointedConnectorWithPermSync(CheckpointedConnector[CT]):
|
||||
raise NotImplementedError
|
||||
|
||||
|
||||
class Resolver(BaseConnector):
|
||||
@abc.abstractmethod
|
||||
def resolve_errors(
|
||||
self,
|
||||
errors: list[ConnectorFailure],
|
||||
include_permissions: bool = False,
|
||||
) -> Generator[Document | ConnectorFailure | HierarchyNode, None, None]:
|
||||
"""Attempts to yield back ALL the documents described by the errors, no checkpointing.
|
||||
|
||||
Caller's responsibility is to delete the old ConnectorFailures and replace with the new ones.
|
||||
If include_permissions is True, the documents will have permissions synced.
|
||||
May also yield HierarchyNode objects for ancestor folders of resolved documents.
|
||||
"""
|
||||
raise NotImplementedError
|
||||
|
||||
|
||||
class HierarchyConnector(BaseConnector):
|
||||
@abc.abstractmethod
|
||||
def load_hierarchy(
|
||||
|
||||
@@ -60,8 +60,10 @@ logger = setup_logger()
|
||||
|
||||
ONE_HOUR = 3600
|
||||
|
||||
_MAX_RESULTS_FETCH_IDS = 5000 # 5000
|
||||
_MAX_RESULTS_FETCH_IDS = 5000
|
||||
_JIRA_FULL_PAGE_SIZE = 50
|
||||
# https://developer.atlassian.com/cloud/jira/platform/rest/v3/api-group-issues/
|
||||
_JIRA_BULK_FETCH_LIMIT = 100
|
||||
|
||||
# Constants for Jira field names
|
||||
_FIELD_REPORTER = "reporter"
|
||||
@@ -255,15 +257,13 @@ def _bulk_fetch_request(
|
||||
return resp.json()["issues"]
|
||||
|
||||
|
||||
def bulk_fetch_issues(
|
||||
jira_client: JIRA, issue_ids: list[str], fields: str | None = None
|
||||
) -> list[Issue]:
|
||||
# TODO(evan): move away from this jira library if they continue to not support
|
||||
# the endpoints we need. Using private fields is not ideal, but
|
||||
# is likely fine for now since we pin the library version
|
||||
|
||||
def _bulk_fetch_batch(
|
||||
jira_client: JIRA, issue_ids: list[str], fields: str | None
|
||||
) -> list[dict[str, Any]]:
|
||||
"""Fetch a single batch (must be <= _JIRA_BULK_FETCH_LIMIT).
|
||||
On JSONDecodeError, recursively bisects until it succeeds or reaches size 1."""
|
||||
try:
|
||||
raw_issues = _bulk_fetch_request(jira_client, issue_ids, fields)
|
||||
return _bulk_fetch_request(jira_client, issue_ids, fields)
|
||||
except requests.exceptions.JSONDecodeError:
|
||||
if len(issue_ids) <= 1:
|
||||
logger.exception(
|
||||
@@ -277,12 +277,25 @@ def bulk_fetch_issues(
|
||||
f"Jira bulk-fetch JSON decode failed for batch of {len(issue_ids)} issues. "
|
||||
f"Splitting into sub-batches of {mid} and {len(issue_ids) - mid}."
|
||||
)
|
||||
left = bulk_fetch_issues(jira_client, issue_ids[:mid], fields)
|
||||
right = bulk_fetch_issues(jira_client, issue_ids[mid:], fields)
|
||||
left = _bulk_fetch_batch(jira_client, issue_ids[:mid], fields)
|
||||
right = _bulk_fetch_batch(jira_client, issue_ids[mid:], fields)
|
||||
return left + right
|
||||
except Exception as e:
|
||||
logger.error(f"Error fetching issues: {e}")
|
||||
raise
|
||||
|
||||
|
||||
def bulk_fetch_issues(
|
||||
jira_client: JIRA, issue_ids: list[str], fields: str | None = None
|
||||
) -> list[Issue]:
|
||||
# TODO(evan): move away from this jira library if they continue to not support
|
||||
# the endpoints we need. Using private fields is not ideal, but
|
||||
# is likely fine for now since we pin the library version
|
||||
|
||||
raw_issues: list[dict[str, Any]] = []
|
||||
for batch in chunked(issue_ids, _JIRA_BULK_FETCH_LIMIT):
|
||||
try:
|
||||
raw_issues.extend(_bulk_fetch_batch(jira_client, list(batch), fields))
|
||||
except Exception as e:
|
||||
logger.error(f"Error fetching issues: {e}")
|
||||
raise
|
||||
|
||||
return [
|
||||
Issue(jira_client._options, jira_client._session, raw=issue)
|
||||
|
||||
@@ -3,6 +3,7 @@ from datetime import datetime
|
||||
from enum import Enum
|
||||
from typing import Any
|
||||
from typing import cast
|
||||
from typing import Literal
|
||||
|
||||
from pydantic import BaseModel
|
||||
from pydantic import Field
|
||||
@@ -33,9 +34,17 @@ class ConnectorMissingCredentialError(PermissionError):
|
||||
)
|
||||
|
||||
|
||||
class SectionType(str, Enum):
|
||||
"""Discriminator for Section subclasses."""
|
||||
|
||||
TEXT = "text"
|
||||
IMAGE = "image"
|
||||
|
||||
|
||||
class Section(BaseModel):
|
||||
"""Base section class with common attributes"""
|
||||
|
||||
type: SectionType
|
||||
link: str | None = None
|
||||
text: str | None = None
|
||||
image_file_id: str | None = None
|
||||
@@ -44,6 +53,7 @@ class Section(BaseModel):
|
||||
class TextSection(Section):
|
||||
"""Section containing text content"""
|
||||
|
||||
type: Literal[SectionType.TEXT] = SectionType.TEXT
|
||||
text: str
|
||||
|
||||
def __sizeof__(self) -> int:
|
||||
@@ -53,6 +63,7 @@ class TextSection(Section):
|
||||
class ImageSection(Section):
|
||||
"""Section containing an image reference"""
|
||||
|
||||
type: Literal[SectionType.IMAGE] = SectionType.IMAGE
|
||||
image_file_id: str
|
||||
|
||||
def __sizeof__(self) -> int:
|
||||
|
||||
@@ -1,3 +1,4 @@
|
||||
from dataclasses import dataclass
|
||||
from datetime import datetime
|
||||
from typing import TypedDict
|
||||
|
||||
@@ -6,6 +7,14 @@ from pydantic import BaseModel
|
||||
from onyx.onyxbot.slack.models import ChannelType
|
||||
|
||||
|
||||
@dataclass(frozen=True)
|
||||
class DirectThreadFetch:
|
||||
"""Request to fetch a Slack thread directly by channel and timestamp."""
|
||||
|
||||
channel_id: str
|
||||
thread_ts: str
|
||||
|
||||
|
||||
class ChannelMetadata(TypedDict):
|
||||
"""Type definition for cached channel metadata."""
|
||||
|
||||
|
||||
@@ -19,6 +19,7 @@ from onyx.configs.chat_configs import DOC_TIME_DECAY
|
||||
from onyx.connectors.models import IndexingDocument
|
||||
from onyx.connectors.models import TextSection
|
||||
from onyx.context.search.federated.models import ChannelMetadata
|
||||
from onyx.context.search.federated.models import DirectThreadFetch
|
||||
from onyx.context.search.federated.models import SlackMessage
|
||||
from onyx.context.search.federated.slack_search_utils import ALL_CHANNEL_TYPES
|
||||
from onyx.context.search.federated.slack_search_utils import build_channel_query_filter
|
||||
@@ -49,7 +50,6 @@ from onyx.server.federated.models import FederatedConnectorDetail
|
||||
from onyx.utils.logger import setup_logger
|
||||
from onyx.utils.threadpool_concurrency import run_functions_tuples_in_parallel
|
||||
from onyx.utils.timing import log_function_time
|
||||
from shared_configs.configs import DOC_EMBEDDING_CONTEXT_SIZE
|
||||
|
||||
logger = setup_logger()
|
||||
|
||||
@@ -58,7 +58,6 @@ HIGHLIGHT_END_CHAR = "\ue001"
|
||||
|
||||
CHANNEL_METADATA_CACHE_TTL = 60 * 60 * 24 # 24 hours
|
||||
USER_PROFILE_CACHE_TTL = 60 * 60 * 24 # 24 hours
|
||||
SLACK_THREAD_CONTEXT_WINDOW = 3 # Number of messages before matched message to include
|
||||
CHANNEL_METADATA_MAX_RETRIES = 3 # Maximum retry attempts for channel metadata fetching
|
||||
CHANNEL_METADATA_RETRY_DELAY = 1 # Initial retry delay in seconds (exponential backoff)
|
||||
|
||||
@@ -421,6 +420,94 @@ class SlackQueryResult(BaseModel):
|
||||
filtered_channels: list[str] # Channels filtered out during this query
|
||||
|
||||
|
||||
def _fetch_thread_from_url(
|
||||
thread_fetch: DirectThreadFetch,
|
||||
access_token: str,
|
||||
channel_metadata_dict: dict[str, ChannelMetadata] | None = None,
|
||||
) -> SlackQueryResult:
|
||||
"""Fetch a thread directly from a Slack URL via conversations.replies."""
|
||||
channel_id = thread_fetch.channel_id
|
||||
thread_ts = thread_fetch.thread_ts
|
||||
|
||||
slack_client = WebClient(token=access_token)
|
||||
try:
|
||||
response = slack_client.conversations_replies(
|
||||
channel=channel_id,
|
||||
ts=thread_ts,
|
||||
)
|
||||
response.validate()
|
||||
messages: list[dict[str, Any]] = response.get("messages", [])
|
||||
except SlackApiError as e:
|
||||
logger.warning(
|
||||
f"Failed to fetch thread from URL (channel={channel_id}, ts={thread_ts}): {e}"
|
||||
)
|
||||
return SlackQueryResult(messages=[], filtered_channels=[])
|
||||
|
||||
if not messages:
|
||||
logger.warning(
|
||||
f"No messages found for URL override (channel={channel_id}, ts={thread_ts})"
|
||||
)
|
||||
return SlackQueryResult(messages=[], filtered_channels=[])
|
||||
|
||||
# Build thread text from all messages
|
||||
thread_text = _build_thread_text(messages, access_token, None, slack_client)
|
||||
|
||||
# Get channel name from metadata cache or API
|
||||
channel_name = "unknown"
|
||||
if channel_metadata_dict and channel_id in channel_metadata_dict:
|
||||
channel_name = channel_metadata_dict[channel_id].get("name", "unknown")
|
||||
else:
|
||||
try:
|
||||
ch_response = slack_client.conversations_info(channel=channel_id)
|
||||
ch_response.validate()
|
||||
channel_info: dict[str, Any] = ch_response.get("channel", {})
|
||||
channel_name = channel_info.get("name", "unknown")
|
||||
except SlackApiError:
|
||||
pass
|
||||
|
||||
# Build the SlackMessage
|
||||
parent_msg = messages[0]
|
||||
message_ts = parent_msg.get("ts", thread_ts)
|
||||
username = parent_msg.get("user", "unknown_user")
|
||||
parent_text = parent_msg.get("text", "")
|
||||
snippet = (
|
||||
parent_text[:50].rstrip() + "..." if len(parent_text) > 50 else parent_text
|
||||
).replace("\n", " ")
|
||||
|
||||
doc_time = datetime.fromtimestamp(float(message_ts))
|
||||
decay_factor = DOC_TIME_DECAY
|
||||
doc_age_years = (datetime.now() - doc_time).total_seconds() / (365 * 24 * 60 * 60)
|
||||
recency_bias = max(1 / (1 + decay_factor * doc_age_years), 0.75)
|
||||
|
||||
permalink = (
|
||||
f"https://slack.com/archives/{channel_id}/p{message_ts.replace('.', '')}"
|
||||
)
|
||||
|
||||
slack_message = SlackMessage(
|
||||
document_id=f"{channel_id}_{message_ts}",
|
||||
channel_id=channel_id,
|
||||
message_id=message_ts,
|
||||
thread_id=None, # Prevent double-enrichment in thread context fetch
|
||||
link=permalink,
|
||||
metadata={
|
||||
"channel": channel_name,
|
||||
"time": doc_time.isoformat(),
|
||||
},
|
||||
timestamp=doc_time,
|
||||
recency_bias=recency_bias,
|
||||
semantic_identifier=f"{username} in #{channel_name}: {snippet}",
|
||||
text=thread_text,
|
||||
highlighted_texts=set(),
|
||||
slack_score=100000.0, # High priority — user explicitly asked for this thread
|
||||
)
|
||||
|
||||
logger.info(
|
||||
f"URL override: fetched thread from channel={channel_id}, ts={thread_ts}, {len(messages)} messages"
|
||||
)
|
||||
|
||||
return SlackQueryResult(messages=[slack_message], filtered_channels=[])
|
||||
|
||||
|
||||
def query_slack(
|
||||
query_string: str,
|
||||
access_token: str,
|
||||
@@ -432,7 +519,6 @@ def query_slack(
|
||||
available_channels: list[str] | None = None,
|
||||
channel_metadata_dict: dict[str, ChannelMetadata] | None = None,
|
||||
) -> SlackQueryResult:
|
||||
|
||||
# Check if query has channel override (user specified channels in query)
|
||||
has_channel_override = query_string.startswith("__CHANNEL_OVERRIDE__")
|
||||
|
||||
@@ -662,7 +748,6 @@ def _fetch_thread_context(
|
||||
"""
|
||||
channel_id = message.channel_id
|
||||
thread_id = message.thread_id
|
||||
message_id = message.message_id
|
||||
|
||||
# If not a thread, return original text as success
|
||||
if thread_id is None:
|
||||
@@ -695,62 +780,37 @@ def _fetch_thread_context(
|
||||
if len(messages) <= 1:
|
||||
return ThreadContextResult.success(message.text)
|
||||
|
||||
# Build thread text from thread starter + context window around matched message
|
||||
thread_text = _build_thread_text(
|
||||
messages, message_id, thread_id, access_token, team_id, slack_client
|
||||
)
|
||||
# Build thread text from thread starter + all replies
|
||||
thread_text = _build_thread_text(messages, access_token, team_id, slack_client)
|
||||
return ThreadContextResult.success(thread_text)
|
||||
|
||||
|
||||
def _build_thread_text(
|
||||
messages: list[dict[str, Any]],
|
||||
message_id: str,
|
||||
thread_id: str,
|
||||
access_token: str,
|
||||
team_id: str | None,
|
||||
slack_client: WebClient,
|
||||
) -> str:
|
||||
"""Build the thread text from messages."""
|
||||
"""Build thread text including all replies.
|
||||
|
||||
Includes the thread parent message followed by all replies in order.
|
||||
"""
|
||||
msg_text = messages[0].get("text", "")
|
||||
msg_sender = messages[0].get("user", "")
|
||||
thread_text = f"<@{msg_sender}>: {msg_text}"
|
||||
|
||||
# All messages after index 0 are replies
|
||||
replies = messages[1:]
|
||||
if not replies:
|
||||
return thread_text
|
||||
|
||||
logger.debug(f"Thread {messages[0].get('ts')}: {len(replies)} replies included")
|
||||
thread_text += "\n\nReplies:"
|
||||
if thread_id == message_id:
|
||||
message_id_idx = 0
|
||||
else:
|
||||
message_id_idx = next(
|
||||
(i for i, msg in enumerate(messages) if msg.get("ts") == message_id), 0
|
||||
)
|
||||
if not message_id_idx:
|
||||
return thread_text
|
||||
|
||||
start_idx = max(1, message_id_idx - SLACK_THREAD_CONTEXT_WINDOW)
|
||||
|
||||
if start_idx > 1:
|
||||
thread_text += "\n..."
|
||||
|
||||
for i in range(start_idx, message_id_idx):
|
||||
msg_text = messages[i].get("text", "")
|
||||
msg_sender = messages[i].get("user", "")
|
||||
thread_text += f"\n\n<@{msg_sender}>: {msg_text}"
|
||||
|
||||
msg_text = messages[message_id_idx].get("text", "")
|
||||
msg_sender = messages[message_id_idx].get("user", "")
|
||||
thread_text += f"\n\n<@{msg_sender}>: {msg_text}"
|
||||
|
||||
# Add following replies
|
||||
len_replies = 0
|
||||
for msg in messages[message_id_idx + 1 :]:
|
||||
for msg in replies:
|
||||
msg_text = msg.get("text", "")
|
||||
msg_sender = msg.get("user", "")
|
||||
reply = f"\n\n<@{msg_sender}>: {msg_text}"
|
||||
thread_text += reply
|
||||
|
||||
len_replies += len(reply)
|
||||
if len_replies >= DOC_EMBEDDING_CONTEXT_SIZE * 4:
|
||||
thread_text += "\n..."
|
||||
break
|
||||
thread_text += f"\n\n<@{msg_sender}>: {msg_text}"
|
||||
|
||||
# Replace user IDs with names using cached lookups
|
||||
userids: set[str] = set(re.findall(r"<@([A-Z0-9]+)>", thread_text))
|
||||
@@ -976,7 +1036,16 @@ def slack_retrieval(
|
||||
|
||||
# Query slack with entity filtering
|
||||
llm = get_default_llm()
|
||||
query_strings = build_slack_queries(query, llm, entities, available_channels)
|
||||
query_items = build_slack_queries(query, llm, entities, available_channels)
|
||||
|
||||
# Partition into direct thread fetches and search query strings
|
||||
direct_fetches: list[DirectThreadFetch] = []
|
||||
query_strings: list[str] = []
|
||||
for item in query_items:
|
||||
if isinstance(item, DirectThreadFetch):
|
||||
direct_fetches.append(item)
|
||||
else:
|
||||
query_strings.append(item)
|
||||
|
||||
# Determine filtering based on entities OR context (bot)
|
||||
include_dm = False
|
||||
@@ -993,8 +1062,16 @@ def slack_retrieval(
|
||||
f"Private channel context: will only allow messages from {allowed_private_channel} + public channels"
|
||||
)
|
||||
|
||||
# Build search tasks
|
||||
search_tasks = [
|
||||
# Build search tasks — direct thread fetches + keyword searches
|
||||
search_tasks: list[tuple] = [
|
||||
(
|
||||
_fetch_thread_from_url,
|
||||
(fetch, access_token, channel_metadata_dict),
|
||||
)
|
||||
for fetch in direct_fetches
|
||||
]
|
||||
|
||||
search_tasks.extend(
|
||||
(
|
||||
query_slack,
|
||||
(
|
||||
@@ -1010,7 +1087,7 @@ def slack_retrieval(
|
||||
),
|
||||
)
|
||||
for query_string in query_strings
|
||||
]
|
||||
)
|
||||
|
||||
# If include_dm is True AND we're not already searching all channels,
|
||||
# add additional searches without channel filters.
|
||||
|
||||
@@ -10,6 +10,7 @@ from pydantic import ValidationError
|
||||
|
||||
from onyx.configs.app_configs import MAX_SLACK_QUERY_EXPANSIONS
|
||||
from onyx.context.search.federated.models import ChannelMetadata
|
||||
from onyx.context.search.federated.models import DirectThreadFetch
|
||||
from onyx.context.search.models import ChunkIndexRequest
|
||||
from onyx.federated_connectors.slack.models import SlackEntities
|
||||
from onyx.llm.interfaces import LLM
|
||||
@@ -638,12 +639,38 @@ def expand_query_with_llm(query_text: str, llm: LLM) -> list[str]:
|
||||
return [query_text]
|
||||
|
||||
|
||||
SLACK_URL_PATTERN = re.compile(
|
||||
r"https?://[a-z0-9-]+\.slack\.com/archives/([A-Z0-9]+)/p(\d{16})"
|
||||
)
|
||||
|
||||
|
||||
def extract_slack_message_urls(
|
||||
query_text: str,
|
||||
) -> list[tuple[str, str]]:
|
||||
"""Extract Slack message URLs from query text.
|
||||
|
||||
Parses URLs like:
|
||||
https://onyx-company.slack.com/archives/C097NBWMY8Y/p1775491616524769
|
||||
|
||||
Returns list of (channel_id, thread_ts) tuples.
|
||||
The 16-digit timestamp is converted to Slack ts format (with dot).
|
||||
"""
|
||||
results = []
|
||||
for match in SLACK_URL_PATTERN.finditer(query_text):
|
||||
channel_id = match.group(1)
|
||||
raw_ts = match.group(2)
|
||||
# Convert p1775491616524769 -> 1775491616.524769
|
||||
thread_ts = f"{raw_ts[:10]}.{raw_ts[10:]}"
|
||||
results.append((channel_id, thread_ts))
|
||||
return results
|
||||
|
||||
|
||||
def build_slack_queries(
|
||||
query: ChunkIndexRequest,
|
||||
llm: LLM,
|
||||
entities: dict[str, Any] | None = None,
|
||||
available_channels: list[str] | None = None,
|
||||
) -> list[str]:
|
||||
) -> list[str | DirectThreadFetch]:
|
||||
"""Build Slack query strings with date filtering and query expansion."""
|
||||
default_search_days = 30
|
||||
if entities:
|
||||
@@ -668,6 +695,15 @@ def build_slack_queries(
|
||||
cutoff_date = datetime.now(timezone.utc) - timedelta(days=days_back)
|
||||
time_filter = f" after:{cutoff_date.strftime('%Y-%m-%d')}"
|
||||
|
||||
# Check for Slack message URLs — if found, add direct fetch requests
|
||||
url_fetches: list[DirectThreadFetch] = []
|
||||
slack_urls = extract_slack_message_urls(query.query)
|
||||
for channel_id, thread_ts in slack_urls:
|
||||
url_fetches.append(
|
||||
DirectThreadFetch(channel_id=channel_id, thread_ts=thread_ts)
|
||||
)
|
||||
logger.info(f"Detected Slack URL: channel={channel_id}, ts={thread_ts}")
|
||||
|
||||
# ALWAYS extract channel references from the query (not just for recency queries)
|
||||
channel_references = extract_channel_references_from_query(query.query)
|
||||
|
||||
@@ -684,7 +720,9 @@ def build_slack_queries(
|
||||
|
||||
# If valid channels detected, use ONLY those channels with NO keywords
|
||||
# Return query with ONLY time filter + channel filter (no keywords)
|
||||
return [build_channel_override_query(channel_references, time_filter)]
|
||||
return url_fetches + [
|
||||
build_channel_override_query(channel_references, time_filter)
|
||||
]
|
||||
except ValueError as e:
|
||||
# If validation fails, log the error and continue with normal flow
|
||||
logger.warning(f"Channel reference validation failed: {e}")
|
||||
@@ -702,7 +740,8 @@ def build_slack_queries(
|
||||
rephrased_queries = expand_query_with_llm(query.query, llm)
|
||||
|
||||
# Build final query strings with time filters
|
||||
return [
|
||||
search_queries = [
|
||||
rephrased_query.strip() + time_filter
|
||||
for rephrased_query in rephrased_queries[:MAX_SLACK_QUERY_EXPANSIONS]
|
||||
]
|
||||
return url_fetches + search_queries
|
||||
|
||||
@@ -1,5 +1,3 @@
|
||||
from typing import cast
|
||||
|
||||
from chonkie import SentenceChunker
|
||||
|
||||
from onyx.configs.app_configs import AVERAGE_SUMMARY_EMBEDDINGS
|
||||
@@ -16,16 +14,14 @@ from onyx.connectors.cross_connector_utils.miscellaneous_utils import (
|
||||
get_metadata_keys_to_ignore,
|
||||
)
|
||||
from onyx.connectors.models import IndexingDocument
|
||||
from onyx.connectors.models import Section
|
||||
from onyx.indexing.chunking import DocumentChunker
|
||||
from onyx.indexing.chunking import extract_blurb
|
||||
from onyx.indexing.indexing_heartbeat import IndexingHeartbeatInterface
|
||||
from onyx.indexing.models import DocAwareChunk
|
||||
from onyx.llm.utils import MAX_CONTEXT_TOKENS
|
||||
from onyx.natural_language_processing.utils import BaseTokenizer
|
||||
from onyx.utils.logger import setup_logger
|
||||
from onyx.utils.text_processing import clean_text
|
||||
from onyx.utils.text_processing import shared_precompare_cleanup
|
||||
from shared_configs.configs import DOC_EMBEDDING_CONTEXT_SIZE
|
||||
from shared_configs.configs import STRICT_CHUNK_TOKEN_LIMIT
|
||||
|
||||
# Not supporting overlaps, we need a clean combination of chunks and it is unclear if overlaps
|
||||
# actually help quality at all
|
||||
@@ -154,9 +150,6 @@ class Chunker:
|
||||
self.tokenizer = tokenizer
|
||||
self.callback = callback
|
||||
|
||||
self.max_context = 0
|
||||
self.prompt_tokens = 0
|
||||
|
||||
# Create a token counter function that returns the count instead of the tokens
|
||||
def token_counter(text: str) -> int:
|
||||
return len(tokenizer.encode(text))
|
||||
@@ -186,234 +179,12 @@ class Chunker:
|
||||
else None
|
||||
)
|
||||
|
||||
def _split_oversized_chunk(self, text: str, content_token_limit: int) -> list[str]:
|
||||
"""
|
||||
Splits the text into smaller chunks based on token count to ensure
|
||||
no chunk exceeds the content_token_limit.
|
||||
"""
|
||||
tokens = self.tokenizer.tokenize(text)
|
||||
chunks = []
|
||||
start = 0
|
||||
total_tokens = len(tokens)
|
||||
while start < total_tokens:
|
||||
end = min(start + content_token_limit, total_tokens)
|
||||
token_chunk = tokens[start:end]
|
||||
chunk_text = " ".join(token_chunk)
|
||||
chunks.append(chunk_text)
|
||||
start = end
|
||||
return chunks
|
||||
|
||||
def _extract_blurb(self, text: str) -> str:
|
||||
"""
|
||||
Extract a short blurb from the text (first chunk of size `blurb_size`).
|
||||
"""
|
||||
# chunker is in `text` mode
|
||||
texts = cast(list[str], self.blurb_splitter.chunk(text))
|
||||
if not texts:
|
||||
return ""
|
||||
return texts[0]
|
||||
|
||||
def _get_mini_chunk_texts(self, chunk_text: str) -> list[str] | None:
|
||||
"""
|
||||
For "multipass" mode: additional sub-chunks (mini-chunks) for use in certain embeddings.
|
||||
"""
|
||||
if self.mini_chunk_splitter and chunk_text.strip():
|
||||
# chunker is in `text` mode
|
||||
return cast(list[str], self.mini_chunk_splitter.chunk(chunk_text))
|
||||
return None
|
||||
|
||||
# ADDED: extra param image_url to store in the chunk
|
||||
def _create_chunk(
|
||||
self,
|
||||
document: IndexingDocument,
|
||||
chunks_list: list[DocAwareChunk],
|
||||
text: str,
|
||||
links: dict[int, str],
|
||||
is_continuation: bool = False,
|
||||
title_prefix: str = "",
|
||||
metadata_suffix_semantic: str = "",
|
||||
metadata_suffix_keyword: str = "",
|
||||
image_file_id: str | None = None,
|
||||
) -> None:
|
||||
"""
|
||||
Helper to create a new DocAwareChunk, append it to chunks_list.
|
||||
"""
|
||||
new_chunk = DocAwareChunk(
|
||||
source_document=document,
|
||||
chunk_id=len(chunks_list),
|
||||
blurb=self._extract_blurb(text),
|
||||
content=text,
|
||||
source_links=links or {0: ""},
|
||||
image_file_id=image_file_id,
|
||||
section_continuation=is_continuation,
|
||||
title_prefix=title_prefix,
|
||||
metadata_suffix_semantic=metadata_suffix_semantic,
|
||||
metadata_suffix_keyword=metadata_suffix_keyword,
|
||||
mini_chunk_texts=self._get_mini_chunk_texts(text),
|
||||
large_chunk_id=None,
|
||||
doc_summary="",
|
||||
chunk_context="",
|
||||
contextual_rag_reserved_tokens=0, # set per-document in _handle_single_document
|
||||
self._document_chunker = DocumentChunker(
|
||||
tokenizer=tokenizer,
|
||||
blurb_splitter=self.blurb_splitter,
|
||||
chunk_splitter=self.chunk_splitter,
|
||||
mini_chunk_splitter=self.mini_chunk_splitter,
|
||||
)
|
||||
chunks_list.append(new_chunk)
|
||||
|
||||
def _chunk_document_with_sections(
|
||||
self,
|
||||
document: IndexingDocument,
|
||||
sections: list[Section],
|
||||
title_prefix: str,
|
||||
metadata_suffix_semantic: str,
|
||||
metadata_suffix_keyword: str,
|
||||
content_token_limit: int,
|
||||
) -> list[DocAwareChunk]:
|
||||
"""
|
||||
Loops through sections of the document, converting them into one or more chunks.
|
||||
Works with processed sections that are base Section objects.
|
||||
"""
|
||||
chunks: list[DocAwareChunk] = []
|
||||
link_offsets: dict[int, str] = {}
|
||||
chunk_text = ""
|
||||
|
||||
for section_idx, section in enumerate(sections):
|
||||
# Get section text and other attributes
|
||||
section_text = clean_text(str(section.text or ""))
|
||||
section_link_text = section.link or ""
|
||||
image_url = section.image_file_id
|
||||
|
||||
# If there is no useful content, skip
|
||||
if not section_text and (not document.title or section_idx > 0):
|
||||
logger.warning(
|
||||
f"Skipping empty or irrelevant section in doc {document.semantic_identifier}, link={section_link_text}"
|
||||
)
|
||||
continue
|
||||
|
||||
# CASE 1: If this section has an image, force a separate chunk
|
||||
if image_url:
|
||||
# First, if we have any partially built text chunk, finalize it
|
||||
if chunk_text.strip():
|
||||
self._create_chunk(
|
||||
document,
|
||||
chunks,
|
||||
chunk_text,
|
||||
link_offsets,
|
||||
is_continuation=False,
|
||||
title_prefix=title_prefix,
|
||||
metadata_suffix_semantic=metadata_suffix_semantic,
|
||||
metadata_suffix_keyword=metadata_suffix_keyword,
|
||||
)
|
||||
chunk_text = ""
|
||||
link_offsets = {}
|
||||
|
||||
# Create a chunk specifically for this image section
|
||||
# (Using the text summary that was generated during processing)
|
||||
self._create_chunk(
|
||||
document,
|
||||
chunks,
|
||||
section_text,
|
||||
links={0: section_link_text} if section_link_text else {},
|
||||
image_file_id=image_url,
|
||||
title_prefix=title_prefix,
|
||||
metadata_suffix_semantic=metadata_suffix_semantic,
|
||||
metadata_suffix_keyword=metadata_suffix_keyword,
|
||||
)
|
||||
# Continue to next section
|
||||
continue
|
||||
|
||||
# CASE 2: Normal text section
|
||||
section_token_count = len(self.tokenizer.encode(section_text))
|
||||
|
||||
# If the section is large on its own, split it separately
|
||||
if section_token_count > content_token_limit:
|
||||
if chunk_text.strip():
|
||||
self._create_chunk(
|
||||
document,
|
||||
chunks,
|
||||
chunk_text,
|
||||
link_offsets,
|
||||
False,
|
||||
title_prefix,
|
||||
metadata_suffix_semantic,
|
||||
metadata_suffix_keyword,
|
||||
)
|
||||
chunk_text = ""
|
||||
link_offsets = {}
|
||||
|
||||
# chunker is in `text` mode
|
||||
split_texts = cast(list[str], self.chunk_splitter.chunk(section_text))
|
||||
for i, split_text in enumerate(split_texts):
|
||||
# If even the split_text is bigger than strict limit, further split
|
||||
if (
|
||||
STRICT_CHUNK_TOKEN_LIMIT
|
||||
and len(self.tokenizer.encode(split_text)) > content_token_limit
|
||||
):
|
||||
smaller_chunks = self._split_oversized_chunk(
|
||||
split_text, content_token_limit
|
||||
)
|
||||
for j, small_chunk in enumerate(smaller_chunks):
|
||||
self._create_chunk(
|
||||
document,
|
||||
chunks,
|
||||
small_chunk,
|
||||
{0: section_link_text},
|
||||
is_continuation=(j != 0),
|
||||
title_prefix=title_prefix,
|
||||
metadata_suffix_semantic=metadata_suffix_semantic,
|
||||
metadata_suffix_keyword=metadata_suffix_keyword,
|
||||
)
|
||||
else:
|
||||
self._create_chunk(
|
||||
document,
|
||||
chunks,
|
||||
split_text,
|
||||
{0: section_link_text},
|
||||
is_continuation=(i != 0),
|
||||
title_prefix=title_prefix,
|
||||
metadata_suffix_semantic=metadata_suffix_semantic,
|
||||
metadata_suffix_keyword=metadata_suffix_keyword,
|
||||
)
|
||||
continue
|
||||
|
||||
# If we can still fit this section into the current chunk, do so
|
||||
current_token_count = len(self.tokenizer.encode(chunk_text))
|
||||
current_offset = len(shared_precompare_cleanup(chunk_text))
|
||||
next_section_tokens = (
|
||||
len(self.tokenizer.encode(SECTION_SEPARATOR)) + section_token_count
|
||||
)
|
||||
|
||||
if next_section_tokens + current_token_count <= content_token_limit:
|
||||
if chunk_text:
|
||||
chunk_text += SECTION_SEPARATOR
|
||||
chunk_text += section_text
|
||||
link_offsets[current_offset] = section_link_text
|
||||
else:
|
||||
# finalize the existing chunk
|
||||
self._create_chunk(
|
||||
document,
|
||||
chunks,
|
||||
chunk_text,
|
||||
link_offsets,
|
||||
False,
|
||||
title_prefix,
|
||||
metadata_suffix_semantic,
|
||||
metadata_suffix_keyword,
|
||||
)
|
||||
# start a new chunk
|
||||
link_offsets = {0: section_link_text}
|
||||
chunk_text = section_text
|
||||
|
||||
# finalize any leftover text chunk
|
||||
if chunk_text.strip() or not chunks:
|
||||
self._create_chunk(
|
||||
document,
|
||||
chunks,
|
||||
chunk_text,
|
||||
link_offsets or {0: ""}, # safe default
|
||||
False,
|
||||
title_prefix,
|
||||
metadata_suffix_semantic,
|
||||
metadata_suffix_keyword,
|
||||
)
|
||||
return chunks
|
||||
|
||||
def _handle_single_document(
|
||||
self, document: IndexingDocument
|
||||
@@ -423,7 +194,10 @@ class Chunker:
|
||||
logger.debug(f"Chunking {document.semantic_identifier}")
|
||||
|
||||
# Title prep
|
||||
title = self._extract_blurb(document.get_title_for_document_index() or "")
|
||||
title = extract_blurb(
|
||||
document.get_title_for_document_index() or "",
|
||||
self.blurb_splitter,
|
||||
)
|
||||
title_prefix = title + RETURN_SEPARATOR if title else ""
|
||||
title_tokens = len(self.tokenizer.encode(title_prefix))
|
||||
|
||||
@@ -491,7 +265,7 @@ class Chunker:
|
||||
# Use processed_sections if available (IndexingDocument), otherwise use original sections
|
||||
sections_to_chunk = document.processed_sections
|
||||
|
||||
normal_chunks = self._chunk_document_with_sections(
|
||||
normal_chunks = self._document_chunker.chunk(
|
||||
document,
|
||||
sections_to_chunk,
|
||||
title_prefix,
|
||||
|
||||
7
backend/onyx/indexing/chunking/__init__.py
Normal file
7
backend/onyx/indexing/chunking/__init__.py
Normal file
@@ -0,0 +1,7 @@
|
||||
from onyx.indexing.chunking.document_chunker import DocumentChunker
|
||||
from onyx.indexing.chunking.section_chunker import extract_blurb
|
||||
|
||||
__all__ = [
|
||||
"DocumentChunker",
|
||||
"extract_blurb",
|
||||
]
|
||||
109
backend/onyx/indexing/chunking/document_chunker.py
Normal file
109
backend/onyx/indexing/chunking/document_chunker.py
Normal file
@@ -0,0 +1,109 @@
|
||||
from chonkie import SentenceChunker
|
||||
|
||||
from onyx.connectors.models import IndexingDocument
|
||||
from onyx.connectors.models import Section
|
||||
from onyx.connectors.models import SectionType
|
||||
from onyx.indexing.chunking.image_section_chunker import ImageChunker
|
||||
from onyx.indexing.chunking.section_chunker import AccumulatorState
|
||||
from onyx.indexing.chunking.section_chunker import ChunkPayload
|
||||
from onyx.indexing.chunking.section_chunker import SectionChunker
|
||||
from onyx.indexing.chunking.text_section_chunker import TextChunker
|
||||
from onyx.indexing.models import DocAwareChunk
|
||||
from onyx.natural_language_processing.utils import BaseTokenizer
|
||||
from onyx.utils.logger import setup_logger
|
||||
from onyx.utils.text_processing import clean_text
|
||||
|
||||
logger = setup_logger()
|
||||
|
||||
|
||||
class DocumentChunker:
|
||||
"""Converts a document's processed sections into DocAwareChunks.
|
||||
|
||||
Drop-in replacement for `Chunker._chunk_document_with_sections`.
|
||||
"""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
tokenizer: BaseTokenizer,
|
||||
blurb_splitter: SentenceChunker,
|
||||
chunk_splitter: SentenceChunker,
|
||||
mini_chunk_splitter: SentenceChunker | None = None,
|
||||
) -> None:
|
||||
self.blurb_splitter = blurb_splitter
|
||||
self.mini_chunk_splitter = mini_chunk_splitter
|
||||
|
||||
self._dispatch: dict[SectionType, SectionChunker] = {
|
||||
SectionType.TEXT: TextChunker(
|
||||
tokenizer=tokenizer,
|
||||
chunk_splitter=chunk_splitter,
|
||||
),
|
||||
SectionType.IMAGE: ImageChunker(),
|
||||
}
|
||||
|
||||
def chunk(
|
||||
self,
|
||||
document: IndexingDocument,
|
||||
sections: list[Section],
|
||||
title_prefix: str,
|
||||
metadata_suffix_semantic: str,
|
||||
metadata_suffix_keyword: str,
|
||||
content_token_limit: int,
|
||||
) -> list[DocAwareChunk]:
|
||||
payloads = self._collect_section_payloads(
|
||||
document=document,
|
||||
sections=sections,
|
||||
content_token_limit=content_token_limit,
|
||||
)
|
||||
|
||||
if not payloads:
|
||||
payloads.append(ChunkPayload(text="", links={0: ""}))
|
||||
|
||||
return [
|
||||
payload.to_doc_aware_chunk(
|
||||
document=document,
|
||||
chunk_id=idx,
|
||||
blurb_splitter=self.blurb_splitter,
|
||||
mini_chunk_splitter=self.mini_chunk_splitter,
|
||||
title_prefix=title_prefix,
|
||||
metadata_suffix_semantic=metadata_suffix_semantic,
|
||||
metadata_suffix_keyword=metadata_suffix_keyword,
|
||||
)
|
||||
for idx, payload in enumerate(payloads)
|
||||
]
|
||||
|
||||
def _collect_section_payloads(
|
||||
self,
|
||||
document: IndexingDocument,
|
||||
sections: list[Section],
|
||||
content_token_limit: int,
|
||||
) -> list[ChunkPayload]:
|
||||
accumulator = AccumulatorState()
|
||||
payloads: list[ChunkPayload] = []
|
||||
|
||||
for section_idx, section in enumerate(sections):
|
||||
section_text = clean_text(str(section.text or ""))
|
||||
|
||||
if not section_text and (not document.title or section_idx > 0):
|
||||
logger.warning(
|
||||
f"Skipping empty or irrelevant section in doc "
|
||||
f"{document.semantic_identifier}, link={section.link}"
|
||||
)
|
||||
continue
|
||||
|
||||
chunker = self._select_chunker(section)
|
||||
result = chunker.chunk_section(
|
||||
section=section,
|
||||
accumulator=accumulator,
|
||||
content_token_limit=content_token_limit,
|
||||
)
|
||||
payloads.extend(result.payloads)
|
||||
accumulator = result.accumulator
|
||||
|
||||
payloads.extend(accumulator.flush_to_list())
|
||||
return payloads
|
||||
|
||||
def _select_chunker(self, section: Section) -> SectionChunker:
|
||||
try:
|
||||
return self._dispatch[section.type]
|
||||
except KeyError:
|
||||
raise ValueError(f"No SectionChunker registered for type={section.type}")
|
||||
35
backend/onyx/indexing/chunking/image_section_chunker.py
Normal file
35
backend/onyx/indexing/chunking/image_section_chunker.py
Normal file
@@ -0,0 +1,35 @@
|
||||
from onyx.connectors.models import Section
|
||||
from onyx.indexing.chunking.section_chunker import AccumulatorState
|
||||
from onyx.indexing.chunking.section_chunker import ChunkPayload
|
||||
from onyx.indexing.chunking.section_chunker import SectionChunker
|
||||
from onyx.indexing.chunking.section_chunker import SectionChunkerOutput
|
||||
from onyx.utils.text_processing import clean_text
|
||||
|
||||
|
||||
class ImageChunker(SectionChunker):
|
||||
def chunk_section(
|
||||
self,
|
||||
section: Section,
|
||||
accumulator: AccumulatorState,
|
||||
content_token_limit: int, # noqa: ARG002
|
||||
) -> SectionChunkerOutput:
|
||||
assert section.image_file_id is not None
|
||||
|
||||
section_text = clean_text(str(section.text or ""))
|
||||
section_link = section.link or ""
|
||||
|
||||
# Flush any partially built text chunks
|
||||
payloads = accumulator.flush_to_list()
|
||||
payloads.append(
|
||||
ChunkPayload(
|
||||
text=section_text,
|
||||
links={0: section_link} if section_link else {},
|
||||
image_file_id=section.image_file_id,
|
||||
is_continuation=False,
|
||||
)
|
||||
)
|
||||
|
||||
return SectionChunkerOutput(
|
||||
payloads=payloads,
|
||||
accumulator=AccumulatorState(),
|
||||
)
|
||||
100
backend/onyx/indexing/chunking/section_chunker.py
Normal file
100
backend/onyx/indexing/chunking/section_chunker.py
Normal file
@@ -0,0 +1,100 @@
|
||||
from abc import ABC
|
||||
from abc import abstractmethod
|
||||
from collections.abc import Sequence
|
||||
from typing import cast
|
||||
|
||||
from chonkie import SentenceChunker
|
||||
from pydantic import BaseModel
|
||||
from pydantic import Field
|
||||
|
||||
from onyx.connectors.models import IndexingDocument
|
||||
from onyx.connectors.models import Section
|
||||
from onyx.indexing.models import DocAwareChunk
|
||||
|
||||
|
||||
def extract_blurb(text: str, blurb_splitter: SentenceChunker) -> str:
|
||||
texts = cast(list[str], blurb_splitter.chunk(text))
|
||||
if not texts:
|
||||
return ""
|
||||
return texts[0]
|
||||
|
||||
|
||||
def get_mini_chunk_texts(
|
||||
chunk_text: str,
|
||||
mini_chunk_splitter: SentenceChunker | None,
|
||||
) -> list[str] | None:
|
||||
if mini_chunk_splitter and chunk_text.strip():
|
||||
return list(cast(Sequence[str], mini_chunk_splitter.chunk(chunk_text)))
|
||||
return None
|
||||
|
||||
|
||||
class ChunkPayload(BaseModel):
|
||||
"""Section-local chunk content without document-scoped fields.
|
||||
|
||||
The orchestrator upgrades these to DocAwareChunks via
|
||||
`to_doc_aware_chunk` after assigning chunk_ids and attaching
|
||||
title/metadata.
|
||||
"""
|
||||
|
||||
text: str
|
||||
links: dict[int, str]
|
||||
is_continuation: bool = False
|
||||
image_file_id: str | None = None
|
||||
|
||||
def to_doc_aware_chunk(
|
||||
self,
|
||||
document: IndexingDocument,
|
||||
chunk_id: int,
|
||||
blurb_splitter: SentenceChunker,
|
||||
title_prefix: str = "",
|
||||
metadata_suffix_semantic: str = "",
|
||||
metadata_suffix_keyword: str = "",
|
||||
mini_chunk_splitter: SentenceChunker | None = None,
|
||||
) -> DocAwareChunk:
|
||||
return DocAwareChunk(
|
||||
source_document=document,
|
||||
chunk_id=chunk_id,
|
||||
blurb=extract_blurb(self.text, blurb_splitter),
|
||||
content=self.text,
|
||||
source_links=self.links or {0: ""},
|
||||
image_file_id=self.image_file_id,
|
||||
section_continuation=self.is_continuation,
|
||||
title_prefix=title_prefix,
|
||||
metadata_suffix_semantic=metadata_suffix_semantic,
|
||||
metadata_suffix_keyword=metadata_suffix_keyword,
|
||||
mini_chunk_texts=get_mini_chunk_texts(self.text, mini_chunk_splitter),
|
||||
large_chunk_id=None,
|
||||
doc_summary="",
|
||||
chunk_context="",
|
||||
contextual_rag_reserved_tokens=0,
|
||||
)
|
||||
|
||||
|
||||
class AccumulatorState(BaseModel):
|
||||
"""Cross-section text buffer threaded through SectionChunkers."""
|
||||
|
||||
text: str = ""
|
||||
link_offsets: dict[int, str] = Field(default_factory=dict)
|
||||
|
||||
def is_empty(self) -> bool:
|
||||
return not self.text.strip()
|
||||
|
||||
def flush_to_list(self) -> list[ChunkPayload]:
|
||||
if self.is_empty():
|
||||
return []
|
||||
return [ChunkPayload(text=self.text, links=self.link_offsets)]
|
||||
|
||||
|
||||
class SectionChunkerOutput(BaseModel):
|
||||
payloads: list[ChunkPayload]
|
||||
accumulator: AccumulatorState
|
||||
|
||||
|
||||
class SectionChunker(ABC):
|
||||
@abstractmethod
|
||||
def chunk_section(
|
||||
self,
|
||||
section: Section,
|
||||
accumulator: AccumulatorState,
|
||||
content_token_limit: int,
|
||||
) -> SectionChunkerOutput: ...
|
||||
129
backend/onyx/indexing/chunking/text_section_chunker.py
Normal file
129
backend/onyx/indexing/chunking/text_section_chunker.py
Normal file
@@ -0,0 +1,129 @@
|
||||
from typing import cast
|
||||
|
||||
from chonkie import SentenceChunker
|
||||
|
||||
from onyx.configs.constants import SECTION_SEPARATOR
|
||||
from onyx.connectors.models import Section
|
||||
from onyx.indexing.chunking.section_chunker import AccumulatorState
|
||||
from onyx.indexing.chunking.section_chunker import ChunkPayload
|
||||
from onyx.indexing.chunking.section_chunker import SectionChunker
|
||||
from onyx.indexing.chunking.section_chunker import SectionChunkerOutput
|
||||
from onyx.natural_language_processing.utils import BaseTokenizer
|
||||
from onyx.natural_language_processing.utils import count_tokens
|
||||
from onyx.utils.text_processing import clean_text
|
||||
from onyx.utils.text_processing import shared_precompare_cleanup
|
||||
from shared_configs.configs import STRICT_CHUNK_TOKEN_LIMIT
|
||||
|
||||
|
||||
class TextChunker(SectionChunker):
|
||||
def __init__(
|
||||
self,
|
||||
tokenizer: BaseTokenizer,
|
||||
chunk_splitter: SentenceChunker,
|
||||
) -> None:
|
||||
self.tokenizer = tokenizer
|
||||
self.chunk_splitter = chunk_splitter
|
||||
|
||||
self.section_separator_token_count = count_tokens(
|
||||
SECTION_SEPARATOR,
|
||||
self.tokenizer,
|
||||
)
|
||||
|
||||
def chunk_section(
|
||||
self,
|
||||
section: Section,
|
||||
accumulator: AccumulatorState,
|
||||
content_token_limit: int,
|
||||
) -> SectionChunkerOutput:
|
||||
section_text = clean_text(str(section.text or ""))
|
||||
section_link = section.link or ""
|
||||
section_token_count = len(self.tokenizer.encode(section_text))
|
||||
|
||||
# Oversized — flush buffer and split the section
|
||||
if section_token_count > content_token_limit:
|
||||
return self._handle_oversized_section(
|
||||
section_text=section_text,
|
||||
section_link=section_link,
|
||||
accumulator=accumulator,
|
||||
content_token_limit=content_token_limit,
|
||||
)
|
||||
|
||||
current_token_count = count_tokens(accumulator.text, self.tokenizer)
|
||||
next_section_tokens = self.section_separator_token_count + section_token_count
|
||||
|
||||
# Fits — extend the accumulator
|
||||
if next_section_tokens + current_token_count <= content_token_limit:
|
||||
offset = len(shared_precompare_cleanup(accumulator.text))
|
||||
new_text = accumulator.text
|
||||
if new_text:
|
||||
new_text += SECTION_SEPARATOR
|
||||
new_text += section_text
|
||||
return SectionChunkerOutput(
|
||||
payloads=[],
|
||||
accumulator=AccumulatorState(
|
||||
text=new_text,
|
||||
link_offsets={**accumulator.link_offsets, offset: section_link},
|
||||
),
|
||||
)
|
||||
|
||||
# Doesn't fit — flush buffer and restart with this section
|
||||
return SectionChunkerOutput(
|
||||
payloads=accumulator.flush_to_list(),
|
||||
accumulator=AccumulatorState(
|
||||
text=section_text,
|
||||
link_offsets={0: section_link},
|
||||
),
|
||||
)
|
||||
|
||||
def _handle_oversized_section(
|
||||
self,
|
||||
section_text: str,
|
||||
section_link: str,
|
||||
accumulator: AccumulatorState,
|
||||
content_token_limit: int,
|
||||
) -> SectionChunkerOutput:
|
||||
payloads = accumulator.flush_to_list()
|
||||
|
||||
split_texts = cast(list[str], self.chunk_splitter.chunk(section_text))
|
||||
for i, split_text in enumerate(split_texts):
|
||||
if (
|
||||
STRICT_CHUNK_TOKEN_LIMIT
|
||||
and count_tokens(split_text, self.tokenizer) > content_token_limit
|
||||
):
|
||||
smaller_chunks = self._split_oversized_chunk(
|
||||
split_text, content_token_limit
|
||||
)
|
||||
for j, small_chunk in enumerate(smaller_chunks):
|
||||
payloads.append(
|
||||
ChunkPayload(
|
||||
text=small_chunk,
|
||||
links={0: section_link},
|
||||
is_continuation=(j != 0),
|
||||
)
|
||||
)
|
||||
else:
|
||||
payloads.append(
|
||||
ChunkPayload(
|
||||
text=split_text,
|
||||
links={0: section_link},
|
||||
is_continuation=(i != 0),
|
||||
)
|
||||
)
|
||||
|
||||
return SectionChunkerOutput(
|
||||
payloads=payloads,
|
||||
accumulator=AccumulatorState(),
|
||||
)
|
||||
|
||||
def _split_oversized_chunk(self, text: str, content_token_limit: int) -> list[str]:
|
||||
tokens = self.tokenizer.tokenize(text)
|
||||
chunks: list[str] = []
|
||||
start = 0
|
||||
total_tokens = len(tokens)
|
||||
while start < total_tokens:
|
||||
end = min(start + content_token_limit, total_tokens)
|
||||
token_chunk = tokens[start:end]
|
||||
chunk_text = " ".join(token_chunk)
|
||||
chunks.append(chunk_text)
|
||||
start = end
|
||||
return chunks
|
||||
@@ -542,6 +542,7 @@ def process_image_sections(documents: list[Document]) -> list[IndexingDocument]:
|
||||
**document.model_dump(),
|
||||
processed_sections=[
|
||||
Section(
|
||||
type=section.type,
|
||||
text=section.text if isinstance(section, TextSection) else "",
|
||||
link=section.link,
|
||||
image_file_id=(
|
||||
@@ -566,6 +567,7 @@ def process_image_sections(documents: list[Document]) -> list[IndexingDocument]:
|
||||
if isinstance(section, ImageSection):
|
||||
# Default section with image path preserved - ensure text is always a string
|
||||
processed_section = Section(
|
||||
type=section.type,
|
||||
link=section.link,
|
||||
image_file_id=section.image_file_id,
|
||||
text="", # Initialize with empty string
|
||||
@@ -609,6 +611,7 @@ def process_image_sections(documents: list[Document]) -> list[IndexingDocument]:
|
||||
# For TextSection, create a base Section with text and link
|
||||
elif isinstance(section, TextSection):
|
||||
processed_section = Section(
|
||||
type=section.type,
|
||||
text=section.text or "", # Ensure text is always a string, not None
|
||||
link=section.link,
|
||||
image_file_id=None,
|
||||
|
||||
@@ -66,7 +66,7 @@ PROVIDER_DISPLAY_NAMES: dict[str, str] = {
|
||||
LlmProviderNames.LM_STUDIO: "LM Studio",
|
||||
LlmProviderNames.LITELLM_PROXY: "LiteLLM Proxy",
|
||||
LlmProviderNames.BIFROST: "Bifrost",
|
||||
LlmProviderNames.OPENAI_COMPATIBLE: "OpenAI Compatible",
|
||||
LlmProviderNames.OPENAI_COMPATIBLE: "OpenAI-Compatible",
|
||||
"groq": "Groq",
|
||||
"anyscale": "Anyscale",
|
||||
"deepseek": "DeepSeek",
|
||||
@@ -87,6 +87,44 @@ PROVIDER_DISPLAY_NAMES: dict[str, str] = {
|
||||
"gemini": "Gemini",
|
||||
"stability": "Stability",
|
||||
"writer": "Writer",
|
||||
# Custom provider display names (used in the custom provider picker)
|
||||
"aiml": "AI/ML",
|
||||
"assemblyai": "AssemblyAI",
|
||||
"aws_polly": "AWS Polly",
|
||||
"azure_ai": "Azure AI",
|
||||
"chatgpt": "ChatGPT",
|
||||
"cohere_chat": "Cohere Chat",
|
||||
"datarobot": "DataRobot",
|
||||
"deepgram": "Deepgram",
|
||||
"deepinfra": "DeepInfra",
|
||||
"elevenlabs": "ElevenLabs",
|
||||
"fal_ai": "fal.ai",
|
||||
"featherless_ai": "Featherless AI",
|
||||
"fireworks_ai": "Fireworks AI",
|
||||
"friendliai": "FriendliAI",
|
||||
"gigachat": "GigaChat",
|
||||
"github_copilot": "GitHub Copilot",
|
||||
"gradient_ai": "Gradient AI",
|
||||
"huggingface": "HuggingFace",
|
||||
"jina_ai": "Jina AI",
|
||||
"lambda_ai": "Lambda AI",
|
||||
"llamagate": "LlamaGate",
|
||||
"meta_llama": "Meta Llama",
|
||||
"minimax": "MiniMax",
|
||||
"nlp_cloud": "NLP Cloud",
|
||||
"nvidia_nim": "NVIDIA NIM",
|
||||
"oci": "OCI",
|
||||
"ovhcloud": "OVHcloud",
|
||||
"palm": "PaLM",
|
||||
"publicai": "PublicAI",
|
||||
"runwayml": "RunwayML",
|
||||
"sambanova": "SambaNova",
|
||||
"together_ai": "Together AI",
|
||||
"vercel_ai_gateway": "Vercel AI Gateway",
|
||||
"volcengine": "Volcengine",
|
||||
"wandb": "W&B",
|
||||
"watsonx": "IBM watsonx",
|
||||
"zai": "ZAI",
|
||||
}
|
||||
|
||||
# Map vendors to their brand names (used for provider_display_name generation)
|
||||
|
||||
@@ -338,7 +338,7 @@ def get_provider_display_name(provider_name: str) -> str:
|
||||
VERTEXAI_PROVIDER_NAME: "Google Vertex AI",
|
||||
OPENROUTER_PROVIDER_NAME: "OpenRouter",
|
||||
LITELLM_PROXY_PROVIDER_NAME: "LiteLLM Proxy",
|
||||
OPENAI_COMPATIBLE_PROVIDER_NAME: "OpenAI Compatible",
|
||||
OPENAI_COMPATIBLE_PROVIDER_NAME: "OpenAI-Compatible",
|
||||
}
|
||||
|
||||
if provider_name in _ONYX_PROVIDER_DISPLAY_NAMES:
|
||||
|
||||
@@ -90,6 +90,7 @@ from onyx.onyxbot.slack.utils import respond_in_thread_or_channel
|
||||
from onyx.onyxbot.slack.utils import TenantSocketModeClient
|
||||
from onyx.redis.redis_pool import get_redis_client
|
||||
from onyx.server.manage.models import SlackBotTokens
|
||||
from onyx.tracing.setup import setup_tracing
|
||||
from onyx.utils.logger import setup_logger
|
||||
from onyx.utils.variable_functionality import fetch_ee_implementation_or_noop
|
||||
from onyx.utils.variable_functionality import set_is_ee_based_on_env_variable
|
||||
@@ -1206,6 +1207,7 @@ if __name__ == "__main__":
|
||||
tenant_handler = SlackbotHandler()
|
||||
|
||||
set_is_ee_based_on_env_variable()
|
||||
setup_tracing()
|
||||
|
||||
try:
|
||||
# Keep the main thread alive
|
||||
|
||||
@@ -58,7 +58,7 @@ docker buildx build --platform linux/amd64,linux/arm64 \
|
||||
|
||||
1. **Build and push** the new image (see above)
|
||||
|
||||
2. **Update the ConfigMap** in `cloud-deployment-yamls/danswer/configmap/env-configmap.yaml`:
|
||||
2. **Update the ConfigMap** in in the internal repo
|
||||
```yaml
|
||||
SANDBOX_CONTAINER_IMAGE: "onyxdotapp/sandbox:v0.1.x"
|
||||
```
|
||||
|
||||
@@ -618,6 +618,7 @@ done
|
||||
"app.kubernetes.io/managed-by": "onyx",
|
||||
"onyx.app/sandbox-id": sandbox_id,
|
||||
"onyx.app/tenant-id": tenant_id,
|
||||
"admission.datadoghq.com/enabled": "false",
|
||||
},
|
||||
),
|
||||
spec=pod_spec,
|
||||
|
||||
@@ -96,6 +96,32 @@ def _truncate_description(description: str | None, max_length: int = 500) -> str
|
||||
return description[: max_length - 3] + "..."
|
||||
|
||||
|
||||
# TODO: Replace mask-comparison approach with an explicit Unset sentinel from the
|
||||
# frontend indicating whether each credential field was actually modified. The current
|
||||
# approach is brittle (e.g. short credentials produce a fixed-length mask that could
|
||||
# collide) and mutates request values, which is surprising. The frontend should signal
|
||||
# "unchanged" vs "new value" directly rather than relying on masked-string equality.
|
||||
def _restore_masked_oauth_credentials(
|
||||
request_client_id: str | None,
|
||||
request_client_secret: str | None,
|
||||
existing_client: OAuthClientInformationFull,
|
||||
) -> tuple[str | None, str | None]:
|
||||
"""If the frontend sent back masked credentials, restore the real stored values."""
|
||||
if (
|
||||
request_client_id
|
||||
and existing_client.client_id
|
||||
and request_client_id == mask_string(existing_client.client_id)
|
||||
):
|
||||
request_client_id = existing_client.client_id
|
||||
if (
|
||||
request_client_secret
|
||||
and existing_client.client_secret
|
||||
and request_client_secret == mask_string(existing_client.client_secret)
|
||||
):
|
||||
request_client_secret = existing_client.client_secret
|
||||
return request_client_id, request_client_secret
|
||||
|
||||
|
||||
router = APIRouter(prefix="/mcp")
|
||||
admin_router = APIRouter(prefix="/admin/mcp")
|
||||
STATE_TTL_SECONDS = 60 * 5 # 5 minutes
|
||||
@@ -392,6 +418,26 @@ async def _connect_oauth(
|
||||
detail=f"Server was configured with authentication type {auth_type_str}",
|
||||
)
|
||||
|
||||
# If the frontend sent back masked credentials (unchanged by the user),
|
||||
# restore the real stored values so we don't overwrite them with masks.
|
||||
if mcp_server.admin_connection_config:
|
||||
existing_data = extract_connection_data(
|
||||
mcp_server.admin_connection_config, apply_mask=False
|
||||
)
|
||||
existing_client_raw = existing_data.get(MCPOAuthKeys.CLIENT_INFO.value)
|
||||
if existing_client_raw:
|
||||
existing_client = OAuthClientInformationFull.model_validate(
|
||||
existing_client_raw
|
||||
)
|
||||
(
|
||||
request.oauth_client_id,
|
||||
request.oauth_client_secret,
|
||||
) = _restore_masked_oauth_credentials(
|
||||
request.oauth_client_id,
|
||||
request.oauth_client_secret,
|
||||
existing_client,
|
||||
)
|
||||
|
||||
# Create admin config with client info if provided
|
||||
config_data = MCPConnectionData(headers={})
|
||||
if request.oauth_client_id and request.oauth_client_secret:
|
||||
@@ -1356,6 +1402,19 @@ def _upsert_mcp_server(
|
||||
if client_info_raw:
|
||||
client_info = OAuthClientInformationFull.model_validate(client_info_raw)
|
||||
|
||||
# If the frontend sent back masked credentials (unchanged by the user),
|
||||
# restore the real stored values so the comparison below sees no change
|
||||
# and the credentials aren't overwritten with masked strings.
|
||||
if client_info and request.auth_type == MCPAuthenticationType.OAUTH:
|
||||
(
|
||||
request.oauth_client_id,
|
||||
request.oauth_client_secret,
|
||||
) = _restore_masked_oauth_credentials(
|
||||
request.oauth_client_id,
|
||||
request.oauth_client_secret,
|
||||
client_info,
|
||||
)
|
||||
|
||||
changing_connection_config = (
|
||||
not mcp_server.admin_connection_config
|
||||
or (
|
||||
|
||||
@@ -40,6 +40,8 @@ from onyx.db.models import User
|
||||
from onyx.db.persona import user_can_access_persona
|
||||
from onyx.error_handling.error_codes import OnyxErrorCode
|
||||
from onyx.error_handling.exceptions import OnyxError
|
||||
from onyx.llm.constants import PROVIDER_DISPLAY_NAMES
|
||||
from onyx.llm.constants import WELL_KNOWN_PROVIDER_NAMES
|
||||
from onyx.llm.factory import get_default_llm
|
||||
from onyx.llm.factory import get_llm
|
||||
from onyx.llm.factory import get_max_input_tokens_from_llm_provider
|
||||
@@ -60,6 +62,7 @@ from onyx.server.manage.llm.models import BedrockFinalModelResponse
|
||||
from onyx.server.manage.llm.models import BedrockModelsRequest
|
||||
from onyx.server.manage.llm.models import BifrostFinalModelResponse
|
||||
from onyx.server.manage.llm.models import BifrostModelsRequest
|
||||
from onyx.server.manage.llm.models import CustomProviderOption
|
||||
from onyx.server.manage.llm.models import DefaultModel
|
||||
from onyx.server.manage.llm.models import LitellmFinalModelResponse
|
||||
from onyx.server.manage.llm.models import LitellmModelDetails
|
||||
@@ -108,6 +111,43 @@ def _mask_string(value: str) -> str:
|
||||
return value[:4] + "****" + value[-4:]
|
||||
|
||||
|
||||
def _resolve_api_key(
|
||||
api_key: str | None,
|
||||
provider_name: str | None,
|
||||
api_base: str | None,
|
||||
db_session: Session,
|
||||
) -> str | None:
|
||||
"""Return the real API key for model-fetch endpoints.
|
||||
|
||||
When editing an existing provider the form value is masked (e.g.
|
||||
``sk-a****b1c2``). If *provider_name* is supplied we can look up
|
||||
the unmasked key from the database so the external request succeeds.
|
||||
|
||||
The stored key is only returned when the request's *api_base*
|
||||
matches the value stored in the database.
|
||||
"""
|
||||
if not provider_name:
|
||||
return api_key
|
||||
|
||||
existing_provider = fetch_existing_llm_provider(
|
||||
name=provider_name, db_session=db_session
|
||||
)
|
||||
if existing_provider and existing_provider.api_key:
|
||||
# Normalise both URLs before comparing so trailing-slash
|
||||
# differences don't cause a false mismatch.
|
||||
stored_base = (existing_provider.api_base or "").strip().rstrip("/")
|
||||
request_base = (api_base or "").strip().rstrip("/")
|
||||
if stored_base != request_base:
|
||||
return api_key
|
||||
|
||||
stored_key = existing_provider.api_key.get_value(apply_mask=False)
|
||||
# Only resolve when the incoming value is the masked form of the
|
||||
# stored key — i.e. the user hasn't typed a new key.
|
||||
if api_key and api_key == _mask_string(stored_key):
|
||||
return stored_key
|
||||
return api_key
|
||||
|
||||
|
||||
def _sync_fetched_models(
|
||||
db_session: Session,
|
||||
provider_name: str,
|
||||
@@ -250,6 +290,29 @@ def _validate_llm_provider_change(
|
||||
)
|
||||
|
||||
|
||||
@admin_router.get("/custom-provider-names")
|
||||
def fetch_custom_provider_names(
|
||||
_: User = Depends(require_permission(Permission.FULL_ADMIN_PANEL_ACCESS)),
|
||||
) -> list[CustomProviderOption]:
|
||||
"""Returns the sorted list of LiteLLM provider names that can be used
|
||||
with the custom provider modal (i.e. everything that is not already
|
||||
covered by a well-known provider modal)."""
|
||||
import litellm
|
||||
|
||||
well_known = {p.value for p in WELL_KNOWN_PROVIDER_NAMES}
|
||||
return sorted(
|
||||
(
|
||||
CustomProviderOption(
|
||||
value=name,
|
||||
label=PROVIDER_DISPLAY_NAMES.get(name, name.replace("_", " ").title()),
|
||||
)
|
||||
for name in litellm.models_by_provider.keys()
|
||||
if name not in well_known
|
||||
),
|
||||
key=lambda o: o.label.lower(),
|
||||
)
|
||||
|
||||
|
||||
@admin_router.get("/built-in/options")
|
||||
def fetch_llm_options(
|
||||
_: User = Depends(require_permission(Permission.FULL_ADMIN_PANEL_ACCESS)),
|
||||
@@ -1148,16 +1211,17 @@ def get_ollama_available_models(
|
||||
return sorted_results
|
||||
|
||||
|
||||
def _get_openrouter_models_response(api_base: str, api_key: str) -> dict:
|
||||
def _get_openrouter_models_response(api_base: str, api_key: str | None) -> dict:
|
||||
"""Perform GET to OpenRouter /models and return parsed JSON."""
|
||||
cleaned_api_base = api_base.strip().rstrip("/")
|
||||
url = f"{cleaned_api_base}/models"
|
||||
headers = {
|
||||
"Authorization": f"Bearer {api_key}",
|
||||
headers: dict[str, str] = {
|
||||
# Optional headers recommended by OpenRouter for attribution
|
||||
"HTTP-Referer": "https://onyx.app",
|
||||
"X-Title": "Onyx",
|
||||
}
|
||||
if api_key:
|
||||
headers["Authorization"] = f"Bearer {api_key}"
|
||||
try:
|
||||
response = httpx.get(url, headers=headers, timeout=10.0)
|
||||
response.raise_for_status()
|
||||
@@ -1180,8 +1244,12 @@ def get_openrouter_available_models(
|
||||
Parses id, name (display), context_length, and architecture.input_modalities.
|
||||
"""
|
||||
|
||||
api_key = _resolve_api_key(
|
||||
request.api_key, request.provider_name, request.api_base, db_session
|
||||
)
|
||||
|
||||
response_json = _get_openrouter_models_response(
|
||||
api_base=request.api_base, api_key=request.api_key
|
||||
api_base=request.api_base, api_key=api_key
|
||||
)
|
||||
|
||||
data = response_json.get("data", [])
|
||||
@@ -1274,13 +1342,18 @@ def get_lm_studio_available_models(
|
||||
|
||||
# If provider_name is given and the api_key hasn't been changed by the user,
|
||||
# fall back to the stored API key from the database (the form value is masked).
|
||||
# Only do so when the api_base matches what is stored.
|
||||
api_key = request.api_key
|
||||
if request.provider_name and not request.api_key_changed:
|
||||
existing_provider = fetch_existing_llm_provider(
|
||||
name=request.provider_name, db_session=db_session
|
||||
)
|
||||
if existing_provider and existing_provider.custom_config:
|
||||
api_key = existing_provider.custom_config.get(LM_STUDIO_API_KEY_CONFIG_KEY)
|
||||
stored_base = (existing_provider.api_base or "").strip().rstrip("/")
|
||||
if stored_base == cleaned_api_base:
|
||||
api_key = existing_provider.custom_config.get(
|
||||
LM_STUDIO_API_KEY_CONFIG_KEY
|
||||
)
|
||||
|
||||
url = f"{cleaned_api_base}/api/v1/models"
|
||||
headers: dict[str, str] = {}
|
||||
@@ -1364,8 +1437,12 @@ def get_litellm_available_models(
|
||||
db_session: Session = Depends(get_session),
|
||||
) -> list[LitellmFinalModelResponse]:
|
||||
"""Fetch available models from Litellm proxy /v1/models endpoint."""
|
||||
api_key = _resolve_api_key(
|
||||
request.api_key, request.provider_name, request.api_base, db_session
|
||||
)
|
||||
|
||||
response_json = _get_litellm_models_response(
|
||||
api_key=request.api_key, api_base=request.api_base
|
||||
api_key=api_key, api_base=request.api_base
|
||||
)
|
||||
|
||||
models = response_json.get("data", [])
|
||||
@@ -1422,7 +1499,7 @@ def get_litellm_available_models(
|
||||
return sorted_results
|
||||
|
||||
|
||||
def _get_litellm_models_response(api_key: str, api_base: str) -> dict:
|
||||
def _get_litellm_models_response(api_key: str | None, api_base: str) -> dict:
|
||||
"""Perform GET to Litellm proxy /api/v1/models and return parsed JSON."""
|
||||
cleaned_api_base = api_base.strip().rstrip("/")
|
||||
url = f"{cleaned_api_base}/v1/models"
|
||||
@@ -1497,8 +1574,12 @@ def get_bifrost_available_models(
|
||||
db_session: Session = Depends(get_session),
|
||||
) -> list[BifrostFinalModelResponse]:
|
||||
"""Fetch available models from Bifrost gateway /v1/models endpoint."""
|
||||
api_key = _resolve_api_key(
|
||||
request.api_key, request.provider_name, request.api_base, db_session
|
||||
)
|
||||
|
||||
response_json = _get_bifrost_models_response(
|
||||
api_base=request.api_base, api_key=request.api_key
|
||||
api_base=request.api_base, api_key=api_key
|
||||
)
|
||||
|
||||
models = response_json.get("data", [])
|
||||
@@ -1587,8 +1668,12 @@ def get_openai_compatible_server_available_models(
|
||||
db_session: Session = Depends(get_session),
|
||||
) -> list[OpenAICompatibleFinalModelResponse]:
|
||||
"""Fetch available models from a generic OpenAI-compatible /v1/models endpoint."""
|
||||
api_key = _resolve_api_key(
|
||||
request.api_key, request.provider_name, request.api_base, db_session
|
||||
)
|
||||
|
||||
response_json = _get_openai_compatible_server_response(
|
||||
api_base=request.api_base, api_key=request.api_key
|
||||
api_base=request.api_base, api_key=api_key
|
||||
)
|
||||
|
||||
models = response_json.get("data", [])
|
||||
@@ -1648,7 +1733,7 @@ def get_openai_compatible_server_available_models(
|
||||
)
|
||||
for r in sorted_results
|
||||
],
|
||||
source_label="OpenAI Compatible",
|
||||
source_label="OpenAI-Compatible",
|
||||
)
|
||||
|
||||
return sorted_results
|
||||
@@ -1667,6 +1752,6 @@ def _get_openai_compatible_server_response(
|
||||
|
||||
return _get_openai_compatible_models_response(
|
||||
url=url,
|
||||
source_name="OpenAI Compatible",
|
||||
source_name="OpenAI-Compatible",
|
||||
api_key=api_key,
|
||||
)
|
||||
|
||||
@@ -28,6 +28,13 @@ if TYPE_CHECKING:
|
||||
T = TypeVar("T", "LLMProviderDescriptor", "LLMProviderView", "VisionProviderResponse")
|
||||
|
||||
|
||||
class CustomProviderOption(BaseModel):
|
||||
"""A provider slug + human-friendly label for the custom-provider picker."""
|
||||
|
||||
value: str
|
||||
label: str
|
||||
|
||||
|
||||
class TestLLMRequest(BaseModel):
|
||||
# provider level
|
||||
id: int | None = None
|
||||
|
||||
72
backend/onyx/server/metrics/pruning_metrics.py
Normal file
72
backend/onyx/server/metrics/pruning_metrics.py
Normal file
@@ -0,0 +1,72 @@
|
||||
"""Pruning-specific Prometheus metrics.
|
||||
|
||||
Tracks three pruning pipeline phases for connector_pruning_generator_task:
|
||||
1. Document ID enumeration duration (extract_ids_from_runnable_connector)
|
||||
2. Diff + dispatch duration (DB lookup, set diff, generate_tasks)
|
||||
3. Rate limit errors during enumeration
|
||||
|
||||
All metrics are labeled by connector_type to identify which connector sources
|
||||
are the most expensive to prune. cc_pair_id is intentionally excluded to avoid
|
||||
unbounded cardinality.
|
||||
|
||||
Usage:
|
||||
from onyx.server.metrics.pruning_metrics import (
|
||||
observe_pruning_enumeration_duration,
|
||||
observe_pruning_diff_duration,
|
||||
inc_pruning_rate_limit_error,
|
||||
)
|
||||
"""
|
||||
|
||||
from prometheus_client import Counter
|
||||
from prometheus_client import Histogram
|
||||
|
||||
from onyx.utils.logger import setup_logger
|
||||
|
||||
logger = setup_logger()
|
||||
|
||||
PRUNING_ENUMERATION_DURATION = Histogram(
|
||||
"onyx_pruning_enumeration_duration_seconds",
|
||||
"Duration of document ID enumeration from the source connector during pruning",
|
||||
["connector_type"],
|
||||
buckets=[1, 5, 15, 30, 60, 120, 300, 600, 1800, 3600],
|
||||
)
|
||||
|
||||
PRUNING_DIFF_DURATION = Histogram(
|
||||
"onyx_pruning_diff_duration_seconds",
|
||||
"Duration of diff computation and subtask dispatch during pruning",
|
||||
["connector_type"],
|
||||
buckets=[1, 5, 15, 30, 60, 120, 300, 600, 1800, 3600],
|
||||
)
|
||||
|
||||
PRUNING_RATE_LIMIT_ERRORS = Counter(
|
||||
"onyx_pruning_rate_limit_errors_total",
|
||||
"Total rate limit errors encountered during pruning document ID enumeration",
|
||||
["connector_type"],
|
||||
)
|
||||
|
||||
|
||||
def observe_pruning_enumeration_duration(
|
||||
duration_seconds: float, connector_type: str
|
||||
) -> None:
|
||||
try:
|
||||
PRUNING_ENUMERATION_DURATION.labels(connector_type=connector_type).observe(
|
||||
duration_seconds
|
||||
)
|
||||
except Exception:
|
||||
logger.debug("Failed to record pruning enumeration duration", exc_info=True)
|
||||
|
||||
|
||||
def observe_pruning_diff_duration(duration_seconds: float, connector_type: str) -> None:
|
||||
try:
|
||||
PRUNING_DIFF_DURATION.labels(connector_type=connector_type).observe(
|
||||
duration_seconds
|
||||
)
|
||||
except Exception:
|
||||
logger.debug("Failed to record pruning diff duration", exc_info=True)
|
||||
|
||||
|
||||
def inc_pruning_rate_limit_error(connector_type: str) -> None:
|
||||
try:
|
||||
PRUNING_RATE_LIMIT_ERRORS.labels(connector_type=connector_type).inc()
|
||||
except Exception:
|
||||
logger.debug("Failed to record pruning rate limit error", exc_info=True)
|
||||
@@ -65,7 +65,8 @@ class Settings(BaseModel):
|
||||
anonymous_user_enabled: bool | None = None
|
||||
invite_only_enabled: bool = False
|
||||
deep_research_enabled: bool | None = None
|
||||
search_ui_enabled: bool | None = None
|
||||
multi_model_chat_enabled: bool | None = True
|
||||
search_ui_enabled: bool | None = True
|
||||
|
||||
# Whether EE features are unlocked for use.
|
||||
# Depends on license status: True when the user has a valid license
|
||||
@@ -89,7 +90,8 @@ class Settings(BaseModel):
|
||||
default=DEFAULT_USER_FILE_MAX_UPLOAD_SIZE_MB, ge=0
|
||||
)
|
||||
file_token_count_threshold_k: int | None = Field(
|
||||
default=None, ge=0 # thousands of tokens; None = context-aware default
|
||||
default=None,
|
||||
ge=0, # thousands of tokens; None = context-aware default
|
||||
)
|
||||
|
||||
# Connector settings
|
||||
|
||||
@@ -2,7 +2,6 @@
|
||||
|
||||
from collections.abc import Callable
|
||||
|
||||
from fastapi import HTTPException
|
||||
from sqlalchemy.orm import Session
|
||||
|
||||
from onyx.configs.app_configs import ANTHROPIC_DEFAULT_API_KEY
|
||||
@@ -12,6 +11,8 @@ from onyx.configs.app_configs import OPENROUTER_DEFAULT_API_KEY
|
||||
from onyx.db.usage import check_usage_limit
|
||||
from onyx.db.usage import UsageLimitExceededError
|
||||
from onyx.db.usage import UsageType
|
||||
from onyx.error_handling.error_codes import OnyxErrorCode
|
||||
from onyx.error_handling.exceptions import OnyxError
|
||||
from onyx.server.tenant_usage_limits import TenantUsageLimitKeys
|
||||
from onyx.server.tenant_usage_limits import TenantUsageLimitOverrides
|
||||
from onyx.utils.logger import setup_logger
|
||||
@@ -255,11 +256,14 @@ def check_usage_and_raise(
|
||||
"Please upgrade your plan or wait for the next billing period."
|
||||
)
|
||||
elif usage_type == UsageType.API_CALLS:
|
||||
detail = (
|
||||
f"API call limit exceeded for {user_type} account. "
|
||||
f"Calls: {int(e.current)}, Limit: {int(e.limit)} per week. "
|
||||
"Please upgrade your plan or wait for the next billing period."
|
||||
)
|
||||
if is_trial and e.limit == 0:
|
||||
detail = "API access is not available on trial accounts. Please upgrade to a paid plan to use the API and chat widget."
|
||||
else:
|
||||
detail = (
|
||||
f"API call limit exceeded for {user_type} account. "
|
||||
f"Calls: {int(e.current)}, Limit: {int(e.limit)} per week. "
|
||||
"Please upgrade your plan or wait for the next billing period."
|
||||
)
|
||||
else:
|
||||
detail = (
|
||||
f"Non-streaming API call limit exceeded for {user_type} account. "
|
||||
@@ -267,4 +271,4 @@ def check_usage_and_raise(
|
||||
"Please upgrade your plan or wait for the next billing period."
|
||||
)
|
||||
|
||||
raise HTTPException(status_code=429, detail=detail)
|
||||
raise OnyxError(OnyxErrorCode.RATE_LIMITED, detail)
|
||||
|
||||
@@ -17,6 +17,7 @@ def documents_to_indexing_documents(
|
||||
processed_sections = []
|
||||
for section in document.sections:
|
||||
processed_section = Section(
|
||||
type=section.type,
|
||||
text=section.text or "",
|
||||
link=section.link,
|
||||
image_file_id=None,
|
||||
|
||||
@@ -1,10 +0,0 @@
|
||||
[project]
|
||||
name = "onyx-backend"
|
||||
version = "0.0.0"
|
||||
requires-python = ">=3.11"
|
||||
dependencies = [
|
||||
"onyx[backend,dev,ee]",
|
||||
]
|
||||
|
||||
[tool.uv.sources]
|
||||
onyx = { workspace = true }
|
||||
@@ -46,11 +46,11 @@ curl -LsSf https://astral.py/uv/install.sh | sh
|
||||
|
||||
1. Edit `pyproject.toml`
|
||||
2. Add/update/remove dependencies in the appropriate section:
|
||||
- `[dependency-groups]` for dev tools
|
||||
- `[project.dependencies]` for **shared** dependencies (used by both backend and model_server)
|
||||
- `[project.optional-dependencies.backend]` for backend-only dependencies
|
||||
- `[project.optional-dependencies.model_server]` for model_server-only dependencies (ML packages)
|
||||
- `[project.optional-dependencies.ee]` for EE features
|
||||
- `[dependency-groups.backend]` for backend-only dependencies
|
||||
- `[dependency-groups.dev]` for dev tools
|
||||
- `[dependency-groups.ee]` for EE features
|
||||
- `[dependency-groups.model_server]` for model_server-only dependencies (ML packages)
|
||||
3. Commit your changes - pre-commit hooks will automatically regenerate the lock file and requirements
|
||||
|
||||
### 3. Generating Lock File and Requirements
|
||||
@@ -64,10 +64,10 @@ To manually regenerate:
|
||||
|
||||
```bash
|
||||
uv lock
|
||||
uv export --no-emit-project --no-default-groups --no-hashes --extra backend -o backend/requirements/default.txt
|
||||
uv export --no-emit-project --no-default-groups --no-hashes --group backend -o backend/requirements/default.txt
|
||||
uv export --no-emit-project --no-default-groups --no-hashes --group dev -o backend/requirements/dev.txt
|
||||
uv export --no-emit-project --no-default-groups --no-hashes --extra ee -o backend/requirements/ee.txt
|
||||
uv export --no-emit-project --no-default-groups --no-hashes --extra model_server -o backend/requirements/model_server.txt
|
||||
uv export --no-emit-project --no-default-groups --no-hashes --group ee -o backend/requirements/ee.txt
|
||||
uv export --no-emit-project --no-default-groups --no-hashes --group model_server -o backend/requirements/model_server.txt
|
||||
```
|
||||
|
||||
### 4. Installing Dependencies
|
||||
@@ -76,30 +76,14 @@ If enabled, all packages are installed automatically by the `uv-sync` pre-commit
|
||||
branches or pulling new changes.
|
||||
|
||||
```bash
|
||||
# For everything (most common)
|
||||
uv sync --all-extras
|
||||
# For development (most common) — installs shared + backend + dev + ee
|
||||
uv sync
|
||||
|
||||
# For backend production (shared + backend dependencies)
|
||||
uv sync --extra backend
|
||||
|
||||
# For backend development (shared + backend + dev tools)
|
||||
uv sync --extra backend --extra dev
|
||||
|
||||
# For backend with EE (shared + backend + ee)
|
||||
uv sync --extra backend --extra ee
|
||||
# For backend production only (shared + backend dependencies)
|
||||
uv sync --no-default-groups --group backend
|
||||
|
||||
# For model server (shared + model_server, NO backend deps!)
|
||||
uv sync --extra model_server
|
||||
```
|
||||
|
||||
`uv` aggressively [ignores active virtual environments](https://docs.astral.sh/uv/concepts/projects/config/#project-environment-path) and prefers the root virtual environment.
|
||||
When working in workspace packages, be sure to pass `--active` when syncing the virtual environment:
|
||||
|
||||
```bash
|
||||
cd backend/
|
||||
source .venv/bin/activate
|
||||
uv sync --active
|
||||
uv run --active ...
|
||||
uv sync --no-default-groups --group model_server
|
||||
```
|
||||
|
||||
### 5. Upgrading Dependencies
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
# This file was autogenerated by uv via the following command:
|
||||
# uv export --no-emit-project --no-default-groups --no-hashes --extra backend -o backend/requirements/default.txt
|
||||
# uv export --no-emit-project --no-default-groups --no-hashes --group backend -o backend/requirements/default.txt
|
||||
agent-client-protocol==0.7.1
|
||||
# via onyx
|
||||
aioboto3==15.1.0
|
||||
@@ -19,7 +19,6 @@ aiohttp==3.13.4
|
||||
# aiobotocore
|
||||
# discord-py
|
||||
# litellm
|
||||
# onyx
|
||||
# voyageai
|
||||
aioitertools==0.13.0
|
||||
# via aiobotocore
|
||||
@@ -28,7 +27,6 @@ aiolimiter==1.2.1
|
||||
aiosignal==1.4.0
|
||||
# via aiohttp
|
||||
alembic==1.10.4
|
||||
# via onyx
|
||||
amqp==5.3.1
|
||||
# via kombu
|
||||
annotated-doc==0.0.4
|
||||
@@ -51,13 +49,10 @@ argon2-cffi==23.1.0
|
||||
argon2-cffi-bindings==25.1.0
|
||||
# via argon2-cffi
|
||||
asana==5.0.8
|
||||
# via onyx
|
||||
async-timeout==5.0.1 ; python_full_version < '3.11.3'
|
||||
# via redis
|
||||
asyncpg==0.30.0
|
||||
# via onyx
|
||||
atlassian-python-api==3.41.16
|
||||
# via onyx
|
||||
attrs==25.4.0
|
||||
# via
|
||||
# aiohttp
|
||||
@@ -68,7 +63,6 @@ attrs==25.4.0
|
||||
authlib==1.6.9
|
||||
# via fastmcp
|
||||
azure-cognitiveservices-speech==1.38.0
|
||||
# via onyx
|
||||
babel==2.17.0
|
||||
# via courlan
|
||||
backoff==2.2.1
|
||||
@@ -86,7 +80,6 @@ beautifulsoup4==4.12.3
|
||||
# atlassian-python-api
|
||||
# markdownify
|
||||
# markitdown
|
||||
# onyx
|
||||
# unstructured
|
||||
billiard==4.2.3
|
||||
# via celery
|
||||
@@ -94,9 +87,7 @@ boto3==1.39.11
|
||||
# via
|
||||
# aiobotocore
|
||||
# cohere
|
||||
# onyx
|
||||
boto3-stubs==1.39.11
|
||||
# via onyx
|
||||
botocore==1.39.11
|
||||
# via
|
||||
# aiobotocore
|
||||
@@ -105,7 +96,6 @@ botocore==1.39.11
|
||||
botocore-stubs==1.40.74
|
||||
# via boto3-stubs
|
||||
braintrust==0.3.9
|
||||
# via onyx
|
||||
brotli==1.2.0
|
||||
# via onyx
|
||||
bytecode==0.17.0
|
||||
@@ -115,7 +105,6 @@ cachetools==6.2.2
|
||||
caio==0.9.25
|
||||
# via aiofile
|
||||
celery==5.5.1
|
||||
# via onyx
|
||||
certifi==2025.11.12
|
||||
# via
|
||||
# asana
|
||||
@@ -134,7 +123,6 @@ cffi==2.0.0
|
||||
# pynacl
|
||||
# zstandard
|
||||
chardet==5.2.0
|
||||
# via onyx
|
||||
charset-normalizer==3.4.4
|
||||
# via
|
||||
# htmldate
|
||||
@@ -146,7 +134,6 @@ charset-normalizer==3.4.4
|
||||
chevron==0.14.0
|
||||
# via braintrust
|
||||
chonkie==1.0.10
|
||||
# via onyx
|
||||
claude-agent-sdk==0.1.19
|
||||
# via onyx
|
||||
click==8.3.1
|
||||
@@ -201,15 +188,12 @@ cryptography==46.0.6
|
||||
cyclopts==4.2.4
|
||||
# via fastmcp
|
||||
dask==2026.1.1
|
||||
# via
|
||||
# distributed
|
||||
# onyx
|
||||
# via distributed
|
||||
dataclasses-json==0.6.7
|
||||
# via unstructured
|
||||
dateparser==1.2.2
|
||||
# via htmldate
|
||||
ddtrace==3.10.0
|
||||
# via onyx
|
||||
decorator==5.2.1
|
||||
# via retry
|
||||
defusedxml==0.7.1
|
||||
@@ -223,7 +207,6 @@ deprecated==1.3.1
|
||||
discord-py==2.4.0
|
||||
# via onyx
|
||||
distributed==2026.1.1
|
||||
# via onyx
|
||||
distro==1.9.0
|
||||
# via
|
||||
# openai
|
||||
@@ -235,7 +218,6 @@ docstring-parser==0.17.0
|
||||
docutils==0.22.3
|
||||
# via rich-rst
|
||||
dropbox==12.0.2
|
||||
# via onyx
|
||||
durationpy==0.10
|
||||
# via kubernetes
|
||||
email-validator==2.2.0
|
||||
@@ -251,7 +233,6 @@ et-xmlfile==2.0.0
|
||||
events==0.5
|
||||
# via opensearch-py
|
||||
exa-py==1.15.4
|
||||
# via onyx
|
||||
exceptiongroup==1.3.0
|
||||
# via
|
||||
# braintrust
|
||||
@@ -262,23 +243,16 @@ fastapi==0.133.1
|
||||
# fastapi-users
|
||||
# onyx
|
||||
fastapi-limiter==0.1.6
|
||||
# via onyx
|
||||
fastapi-users==15.0.4
|
||||
# via
|
||||
# fastapi-users-db-sqlalchemy
|
||||
# onyx
|
||||
# via fastapi-users-db-sqlalchemy
|
||||
fastapi-users-db-sqlalchemy==7.0.0
|
||||
# via onyx
|
||||
fastavro==1.12.1
|
||||
# via cohere
|
||||
fastmcp==3.2.0
|
||||
# via onyx
|
||||
fastuuid==0.14.0
|
||||
# via litellm
|
||||
filelock==3.20.3
|
||||
# via
|
||||
# huggingface-hub
|
||||
# onyx
|
||||
# via huggingface-hub
|
||||
filetype==1.2.0
|
||||
# via unstructured
|
||||
flatbuffers==25.9.23
|
||||
@@ -298,7 +272,6 @@ gitpython==3.1.45
|
||||
google-api-core==2.28.1
|
||||
# via google-api-python-client
|
||||
google-api-python-client==2.86.0
|
||||
# via onyx
|
||||
google-auth==2.48.0
|
||||
# via
|
||||
# google-api-core
|
||||
@@ -308,11 +281,8 @@ google-auth==2.48.0
|
||||
# google-genai
|
||||
# kubernetes
|
||||
google-auth-httplib2==0.1.0
|
||||
# via
|
||||
# google-api-python-client
|
||||
# onyx
|
||||
# via google-api-python-client
|
||||
google-auth-oauthlib==1.0.0
|
||||
# via onyx
|
||||
google-genai==1.52.0
|
||||
# via onyx
|
||||
googleapis-common-protos==1.72.0
|
||||
@@ -340,7 +310,6 @@ htmldate==1.9.1
|
||||
httpcore==1.0.9
|
||||
# via
|
||||
# httpx
|
||||
# onyx
|
||||
# unstructured-client
|
||||
httplib2==0.31.0
|
||||
# via
|
||||
@@ -357,21 +326,16 @@ httpx==0.28.1
|
||||
# langsmith
|
||||
# litellm
|
||||
# mcp
|
||||
# onyx
|
||||
# openai
|
||||
# unstructured-client
|
||||
httpx-oauth==0.15.1
|
||||
# via onyx
|
||||
httpx-sse==0.4.3
|
||||
# via
|
||||
# cohere
|
||||
# mcp
|
||||
hubspot-api-client==11.1.0
|
||||
# via onyx
|
||||
huggingface-hub==0.35.3
|
||||
# via
|
||||
# onyx
|
||||
# tokenizers
|
||||
# via tokenizers
|
||||
humanfriendly==10.0
|
||||
# via coloredlogs
|
||||
hyperframe==6.1.0
|
||||
@@ -390,9 +354,7 @@ importlib-metadata==8.7.0
|
||||
# litellm
|
||||
# opentelemetry-api
|
||||
inflection==0.5.1
|
||||
# via
|
||||
# onyx
|
||||
# pyairtable
|
||||
# via pyairtable
|
||||
iniconfig==2.3.0
|
||||
# via pytest
|
||||
isodate==0.7.2
|
||||
@@ -414,7 +376,6 @@ jinja2==3.1.6
|
||||
# distributed
|
||||
# litellm
|
||||
jira==3.10.5
|
||||
# via onyx
|
||||
jiter==0.12.0
|
||||
# via openai
|
||||
jmespath==1.0.1
|
||||
@@ -430,9 +391,7 @@ jsonpatch==1.33
|
||||
jsonpointer==3.0.0
|
||||
# via jsonpatch
|
||||
jsonref==1.1.0
|
||||
# via
|
||||
# fastmcp
|
||||
# onyx
|
||||
# via fastmcp
|
||||
jsonschema==4.25.1
|
||||
# via
|
||||
# litellm
|
||||
@@ -450,15 +409,12 @@ kombu==5.5.4
|
||||
kubernetes==31.0.0
|
||||
# via onyx
|
||||
langchain-core==1.2.22
|
||||
# via onyx
|
||||
langdetect==1.0.9
|
||||
# via unstructured
|
||||
langfuse==3.10.0
|
||||
# via onyx
|
||||
langsmith==0.3.45
|
||||
# via langchain-core
|
||||
lazy-imports==1.0.1
|
||||
# via onyx
|
||||
legacy-cgi==2.6.4 ; python_full_version >= '3.13'
|
||||
# via ddtrace
|
||||
litellm==1.81.6
|
||||
@@ -473,7 +429,6 @@ lxml==5.3.0
|
||||
# justext
|
||||
# lxml-html-clean
|
||||
# markitdown
|
||||
# onyx
|
||||
# python-docx
|
||||
# python-pptx
|
||||
# python3-saml
|
||||
@@ -488,9 +443,7 @@ magika==0.6.3
|
||||
makefun==1.16.0
|
||||
# via fastapi-users
|
||||
mako==1.2.4
|
||||
# via
|
||||
# alembic
|
||||
# onyx
|
||||
# via alembic
|
||||
mammoth==1.11.0
|
||||
# via markitdown
|
||||
markdown-it-py==4.0.0
|
||||
@@ -498,7 +451,6 @@ markdown-it-py==4.0.0
|
||||
markdownify==1.2.2
|
||||
# via markitdown
|
||||
markitdown==0.1.2
|
||||
# via onyx
|
||||
markupsafe==3.0.3
|
||||
# via
|
||||
# jinja2
|
||||
@@ -512,11 +464,9 @@ mcp==1.26.0
|
||||
# via
|
||||
# claude-agent-sdk
|
||||
# fastmcp
|
||||
# onyx
|
||||
mdurl==0.1.2
|
||||
# via markdown-it-py
|
||||
mistune==3.2.0
|
||||
# via onyx
|
||||
more-itertools==10.8.0
|
||||
# via
|
||||
# jaraco-classes
|
||||
@@ -525,13 +475,10 @@ more-itertools==10.8.0
|
||||
mpmath==1.3.0
|
||||
# via sympy
|
||||
msal==1.34.0
|
||||
# via
|
||||
# office365-rest-python-client
|
||||
# onyx
|
||||
# via office365-rest-python-client
|
||||
msgpack==1.1.2
|
||||
# via distributed
|
||||
msoffcrypto-tool==5.4.2
|
||||
# via onyx
|
||||
multidict==6.7.0
|
||||
# via
|
||||
# aiobotocore
|
||||
@@ -548,7 +495,6 @@ mypy-extensions==1.0.0
|
||||
# mypy
|
||||
# typing-inspect
|
||||
nest-asyncio==1.6.0
|
||||
# via onyx
|
||||
nltk==3.9.4
|
||||
# via unstructured
|
||||
numpy==2.4.1
|
||||
@@ -563,10 +509,8 @@ oauthlib==3.2.2
|
||||
# via
|
||||
# atlassian-python-api
|
||||
# kubernetes
|
||||
# onyx
|
||||
# requests-oauthlib
|
||||
office365-rest-python-client==2.6.2
|
||||
# via onyx
|
||||
olefile==0.47
|
||||
# via
|
||||
# msoffcrypto-tool
|
||||
@@ -582,15 +526,11 @@ openai==2.14.0
|
||||
openapi-pydantic==0.5.1
|
||||
# via fastmcp
|
||||
openinference-instrumentation==0.1.42
|
||||
# via onyx
|
||||
openinference-semantic-conventions==0.1.25
|
||||
# via openinference-instrumentation
|
||||
openpyxl==3.0.10
|
||||
# via
|
||||
# markitdown
|
||||
# onyx
|
||||
# via markitdown
|
||||
opensearch-py==3.0.0
|
||||
# via onyx
|
||||
opentelemetry-api==1.39.1
|
||||
# via
|
||||
# ddtrace
|
||||
@@ -606,7 +546,6 @@ opentelemetry-exporter-otlp-proto-http==1.39.1
|
||||
# via langfuse
|
||||
opentelemetry-proto==1.39.1
|
||||
# via
|
||||
# onyx
|
||||
# opentelemetry-exporter-otlp-proto-common
|
||||
# opentelemetry-exporter-otlp-proto-http
|
||||
opentelemetry-sdk==1.39.1
|
||||
@@ -640,21 +579,18 @@ parameterized==0.9.0
|
||||
partd==1.4.2
|
||||
# via dask
|
||||
passlib==1.7.4
|
||||
# via onyx
|
||||
pathable==0.4.4
|
||||
# via jsonschema-path
|
||||
pdfminer-six==20251107
|
||||
# via markitdown
|
||||
pillow==12.1.1
|
||||
pillow==12.2.0
|
||||
# via python-pptx
|
||||
platformdirs==4.5.0
|
||||
# via
|
||||
# fastmcp
|
||||
# zeep
|
||||
playwright==1.55.0
|
||||
# via
|
||||
# onyx
|
||||
# pytest-playwright
|
||||
# via pytest-playwright
|
||||
pluggy==1.6.0
|
||||
# via pytest
|
||||
ply==3.11
|
||||
@@ -684,12 +620,9 @@ protobuf==6.33.5
|
||||
psutil==7.1.3
|
||||
# via
|
||||
# distributed
|
||||
# onyx
|
||||
# unstructured
|
||||
psycopg2-binary==2.9.9
|
||||
# via onyx
|
||||
puremagic==1.28
|
||||
# via onyx
|
||||
pwdlib==0.3.0
|
||||
# via fastapi-users
|
||||
py==1.11.0
|
||||
@@ -697,7 +630,6 @@ py==1.11.0
|
||||
py-key-value-aio==0.4.4
|
||||
# via fastmcp
|
||||
pyairtable==3.0.1
|
||||
# via onyx
|
||||
pyasn1==0.6.3
|
||||
# via
|
||||
# pyasn1-modules
|
||||
@@ -707,7 +639,6 @@ pyasn1-modules==0.4.2
|
||||
pycparser==2.23 ; implementation_name != 'PyPy'
|
||||
# via cffi
|
||||
pycryptodome==3.19.1
|
||||
# via onyx
|
||||
pydantic==2.11.7
|
||||
# via
|
||||
# agent-client-protocol
|
||||
@@ -734,7 +665,6 @@ pydantic-settings==2.12.0
|
||||
pyee==13.0.0
|
||||
# via playwright
|
||||
pygithub==2.5.0
|
||||
# via onyx
|
||||
pygments==2.20.0
|
||||
# via rich
|
||||
pyjwt==2.12.0
|
||||
@@ -745,17 +675,13 @@ pyjwt==2.12.0
|
||||
# pygithub
|
||||
# simple-salesforce
|
||||
pympler==1.1
|
||||
# via onyx
|
||||
pynacl==1.6.2
|
||||
# via pygithub
|
||||
pypandoc-binary==1.16.2
|
||||
# via onyx
|
||||
pyparsing==3.2.5
|
||||
# via httplib2
|
||||
pypdf==6.9.2
|
||||
# via
|
||||
# onyx
|
||||
# unstructured-client
|
||||
# via unstructured-client
|
||||
pyperclip==1.11.0
|
||||
# via fastmcp
|
||||
pyreadline3==3.5.4 ; sys_platform == 'win32'
|
||||
@@ -768,9 +694,7 @@ pytest==8.3.5
|
||||
pytest-base-url==2.1.0
|
||||
# via pytest-playwright
|
||||
pytest-mock==3.12.0
|
||||
# via onyx
|
||||
pytest-playwright==0.7.0
|
||||
# via onyx
|
||||
python-dateutil==2.8.2
|
||||
# via
|
||||
# aiobotocore
|
||||
@@ -781,11 +705,9 @@ python-dateutil==2.8.2
|
||||
# htmldate
|
||||
# hubspot-api-client
|
||||
# kubernetes
|
||||
# onyx
|
||||
# opensearch-py
|
||||
# pandas
|
||||
python-docx==1.1.2
|
||||
# via onyx
|
||||
python-dotenv==1.1.1
|
||||
# via
|
||||
# braintrust
|
||||
@@ -793,10 +715,8 @@ python-dotenv==1.1.1
|
||||
# litellm
|
||||
# magika
|
||||
# mcp
|
||||
# onyx
|
||||
# pydantic-settings
|
||||
python-gitlab==5.6.0
|
||||
# via onyx
|
||||
python-http-client==3.3.7
|
||||
# via sendgrid
|
||||
python-iso639==2025.11.16
|
||||
@@ -807,19 +727,15 @@ python-multipart==0.0.22
|
||||
# via
|
||||
# fastapi-users
|
||||
# mcp
|
||||
# onyx
|
||||
python-oxmsg==0.0.2
|
||||
# via unstructured
|
||||
python-pptx==0.6.23
|
||||
# via
|
||||
# markitdown
|
||||
# onyx
|
||||
# via markitdown
|
||||
python-slugify==8.0.4
|
||||
# via
|
||||
# braintrust
|
||||
# pytest-playwright
|
||||
python3-saml==1.15.0
|
||||
# via onyx
|
||||
pytz==2025.2
|
||||
# via
|
||||
# dateparser
|
||||
@@ -827,7 +743,6 @@ pytz==2025.2
|
||||
# pandas
|
||||
# zeep
|
||||
pywikibot==9.0.0
|
||||
# via onyx
|
||||
pywin32==311 ; sys_platform == 'win32'
|
||||
# via
|
||||
# mcp
|
||||
@@ -844,13 +759,9 @@ pyyaml==6.0.3
|
||||
# kubernetes
|
||||
# langchain-core
|
||||
rapidfuzz==3.13.0
|
||||
# via
|
||||
# onyx
|
||||
# unstructured
|
||||
# via unstructured
|
||||
redis==5.0.8
|
||||
# via
|
||||
# fastapi-limiter
|
||||
# onyx
|
||||
# via fastapi-limiter
|
||||
referencing==0.36.2
|
||||
# via
|
||||
# jsonschema
|
||||
@@ -881,7 +792,6 @@ requests==2.33.0
|
||||
# matrix-client
|
||||
# msal
|
||||
# office365-rest-python-client
|
||||
# onyx
|
||||
# opensearch-py
|
||||
# opentelemetry-exporter-otlp-proto-http
|
||||
# pyairtable
|
||||
@@ -907,7 +817,6 @@ requests-oauthlib==1.3.1
|
||||
# google-auth-oauthlib
|
||||
# jira
|
||||
# kubernetes
|
||||
# onyx
|
||||
requests-toolbelt==1.0.0
|
||||
# via
|
||||
# jira
|
||||
@@ -918,7 +827,6 @@ requests-toolbelt==1.0.0
|
||||
retry==0.9.2
|
||||
# via onyx
|
||||
rfc3986==1.5.0
|
||||
# via onyx
|
||||
rich==14.2.0
|
||||
# via
|
||||
# cyclopts
|
||||
@@ -938,15 +846,12 @@ s3transfer==0.13.1
|
||||
secretstorage==3.5.0 ; sys_platform == 'linux'
|
||||
# via keyring
|
||||
sendgrid==6.12.5
|
||||
# via onyx
|
||||
sentry-sdk==2.14.0
|
||||
# via onyx
|
||||
shapely==2.0.6
|
||||
# via onyx
|
||||
shellingham==1.5.4
|
||||
# via typer
|
||||
simple-salesforce==1.12.6
|
||||
# via onyx
|
||||
six==1.17.0
|
||||
# via
|
||||
# asana
|
||||
@@ -961,7 +866,6 @@ six==1.17.0
|
||||
# python-dateutil
|
||||
# stone
|
||||
slack-sdk==3.20.2
|
||||
# via onyx
|
||||
smmap==5.0.2
|
||||
# via gitdb
|
||||
sniffio==1.3.1
|
||||
@@ -976,7 +880,6 @@ sqlalchemy==2.0.15
|
||||
# via
|
||||
# alembic
|
||||
# fastapi-users-db-sqlalchemy
|
||||
# onyx
|
||||
sse-starlette==3.0.3
|
||||
# via mcp
|
||||
sseclient-py==1.8.0
|
||||
@@ -985,14 +888,11 @@ starlette==0.49.3
|
||||
# via
|
||||
# fastapi
|
||||
# mcp
|
||||
# onyx
|
||||
# prometheus-fastapi-instrumentator
|
||||
stone==3.3.1
|
||||
# via dropbox
|
||||
stripe==10.12.0
|
||||
# via onyx
|
||||
supervisor==4.3.0
|
||||
# via onyx
|
||||
sympy==1.14.0
|
||||
# via onnxruntime
|
||||
tblib==3.2.2
|
||||
@@ -1005,11 +905,8 @@ tenacity==9.1.2
|
||||
text-unidecode==1.3
|
||||
# via python-slugify
|
||||
tiktoken==0.7.0
|
||||
# via
|
||||
# litellm
|
||||
# onyx
|
||||
# via litellm
|
||||
timeago==1.0.16
|
||||
# via onyx
|
||||
tld==0.13.1
|
||||
# via courlan
|
||||
tokenizers==0.21.4
|
||||
@@ -1033,13 +930,11 @@ tqdm==4.67.1
|
||||
# openai
|
||||
# unstructured
|
||||
trafilatura==1.12.2
|
||||
# via onyx
|
||||
typer==0.20.0
|
||||
# via mcp
|
||||
types-awscrt==0.28.4
|
||||
# via botocore-stubs
|
||||
types-openpyxl==3.0.4.7
|
||||
# via onyx
|
||||
types-requests==2.32.0.20250328
|
||||
# via cohere
|
||||
types-s3transfer==0.14.0
|
||||
@@ -1105,11 +1000,8 @@ tzlocal==5.3.1
|
||||
uncalled-for==0.2.0
|
||||
# via fastmcp
|
||||
unstructured==0.18.27
|
||||
# via onyx
|
||||
unstructured-client==0.42.6
|
||||
# via
|
||||
# onyx
|
||||
# unstructured
|
||||
# via unstructured
|
||||
uritemplate==4.2.0
|
||||
# via google-api-python-client
|
||||
urllib3==2.6.3
|
||||
@@ -1121,7 +1013,6 @@ urllib3==2.6.3
|
||||
# htmldate
|
||||
# hubspot-api-client
|
||||
# kubernetes
|
||||
# onyx
|
||||
# opensearch-py
|
||||
# pyairtable
|
||||
# pygithub
|
||||
@@ -1171,9 +1062,7 @@ xlrd==2.0.2
|
||||
xlsxwriter==3.2.9
|
||||
# via python-pptx
|
||||
xmlsec==1.3.14
|
||||
# via
|
||||
# onyx
|
||||
# python3-saml
|
||||
# via python3-saml
|
||||
xmltodict==1.0.2
|
||||
# via ddtrace
|
||||
yarl==1.22.0
|
||||
@@ -1187,4 +1076,3 @@ zipp==3.23.0
|
||||
zstandard==0.23.0
|
||||
# via langsmith
|
||||
zulip==0.8.2
|
||||
# via onyx
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
# This file was autogenerated by uv via the following command:
|
||||
# uv export --no-emit-project --no-default-groups --no-hashes --extra dev -o backend/requirements/dev.txt
|
||||
# uv export --no-emit-project --no-default-groups --no-hashes --group dev -o backend/requirements/dev.txt
|
||||
agent-client-protocol==0.7.1
|
||||
# via onyx
|
||||
aioboto3==15.1.0
|
||||
@@ -47,7 +47,6 @@ attrs==25.4.0
|
||||
# jsonschema
|
||||
# referencing
|
||||
black==25.1.0
|
||||
# via onyx
|
||||
boto3==1.39.11
|
||||
# via
|
||||
# aiobotocore
|
||||
@@ -60,7 +59,6 @@ botocore==1.39.11
|
||||
brotli==1.2.0
|
||||
# via onyx
|
||||
celery-types==0.19.0
|
||||
# via onyx
|
||||
certifi==2025.11.12
|
||||
# via
|
||||
# httpcore
|
||||
@@ -122,7 +120,6 @@ execnet==2.1.2
|
||||
executing==2.2.1
|
||||
# via stack-data
|
||||
faker==40.1.2
|
||||
# via onyx
|
||||
fastapi==0.133.1
|
||||
# via
|
||||
# onyx
|
||||
@@ -156,7 +153,6 @@ h11==0.16.0
|
||||
# httpcore
|
||||
# uvicorn
|
||||
hatchling==1.28.0
|
||||
# via onyx
|
||||
hf-xet==1.2.0 ; platform_machine == 'aarch64' or platform_machine == 'amd64' or platform_machine == 'arm64' or platform_machine == 'x86_64'
|
||||
# via huggingface-hub
|
||||
httpcore==1.0.9
|
||||
@@ -187,7 +183,6 @@ importlib-metadata==8.7.0
|
||||
iniconfig==2.3.0
|
||||
# via pytest
|
||||
ipykernel==6.29.5
|
||||
# via onyx
|
||||
ipython==9.7.0
|
||||
# via ipykernel
|
||||
ipython-pygments-lexers==1.1.1
|
||||
@@ -224,13 +219,11 @@ litellm==1.81.6
|
||||
mako==1.2.4
|
||||
# via alembic
|
||||
manygo==0.2.0
|
||||
# via onyx
|
||||
markupsafe==3.0.3
|
||||
# via
|
||||
# jinja2
|
||||
# mako
|
||||
matplotlib==3.10.8
|
||||
# via onyx
|
||||
matplotlib-inline==0.2.1
|
||||
# via
|
||||
# ipykernel
|
||||
@@ -243,12 +236,10 @@ multidict==6.7.0
|
||||
# aiohttp
|
||||
# yarl
|
||||
mypy==1.13.0
|
||||
# via onyx
|
||||
mypy-extensions==1.0.0
|
||||
# via
|
||||
# black
|
||||
# mypy
|
||||
# onyx
|
||||
nest-asyncio==1.6.0
|
||||
# via ipykernel
|
||||
nodeenv==1.9.1
|
||||
@@ -263,16 +254,13 @@ oauthlib==3.2.2
|
||||
# via
|
||||
# kubernetes
|
||||
# requests-oauthlib
|
||||
onyx-devtools==0.7.2
|
||||
# via onyx
|
||||
onyx-devtools==0.7.5
|
||||
openai==2.14.0
|
||||
# via
|
||||
# litellm
|
||||
# onyx
|
||||
openapi-generator-cli==7.17.0
|
||||
# via
|
||||
# onyx
|
||||
# onyx-devtools
|
||||
# via onyx-devtools
|
||||
packaging==24.2
|
||||
# via
|
||||
# black
|
||||
@@ -282,7 +270,6 @@ packaging==24.2
|
||||
# matplotlib
|
||||
# pytest
|
||||
pandas-stubs==2.3.3.251201
|
||||
# via onyx
|
||||
parameterized==0.9.0
|
||||
# via cohere
|
||||
parso==0.8.5
|
||||
@@ -293,7 +280,7 @@ pathspec==0.12.1
|
||||
# hatchling
|
||||
pexpect==4.9.0 ; sys_platform != 'emscripten' and sys_platform != 'win32'
|
||||
# via ipython
|
||||
pillow==12.1.1
|
||||
pillow==12.2.0
|
||||
# via matplotlib
|
||||
platformdirs==4.5.0
|
||||
# via
|
||||
@@ -305,7 +292,6 @@ pluggy==1.6.0
|
||||
# hatchling
|
||||
# pytest
|
||||
pre-commit==3.2.2
|
||||
# via onyx
|
||||
prometheus-client==0.23.1
|
||||
# via
|
||||
# onyx
|
||||
@@ -359,22 +345,16 @@ pyparsing==3.2.5
|
||||
# via matplotlib
|
||||
pytest==8.3.5
|
||||
# via
|
||||
# onyx
|
||||
# pytest-alembic
|
||||
# pytest-asyncio
|
||||
# pytest-dotenv
|
||||
# pytest-repeat
|
||||
# pytest-xdist
|
||||
pytest-alembic==0.12.1
|
||||
# via onyx
|
||||
pytest-asyncio==1.3.0
|
||||
# via onyx
|
||||
pytest-dotenv==0.5.2
|
||||
# via onyx
|
||||
pytest-repeat==0.9.4
|
||||
# via onyx
|
||||
pytest-xdist==3.8.0
|
||||
# via onyx
|
||||
python-dateutil==2.8.2
|
||||
# via
|
||||
# aiobotocore
|
||||
@@ -407,9 +387,7 @@ referencing==0.36.2
|
||||
regex==2025.11.3
|
||||
# via tiktoken
|
||||
release-tag==0.5.2
|
||||
# via onyx
|
||||
reorder-python-imports-black==3.14.0
|
||||
# via onyx
|
||||
requests==2.33.0
|
||||
# via
|
||||
# cohere
|
||||
@@ -430,7 +408,6 @@ rpds-py==0.29.0
|
||||
rsa==4.9.1
|
||||
# via google-auth
|
||||
ruff==0.12.0
|
||||
# via onyx
|
||||
s3transfer==0.13.1
|
||||
# via boto3
|
||||
sentry-sdk==2.14.0
|
||||
@@ -484,39 +461,22 @@ traitlets==5.14.3
|
||||
trove-classifiers==2025.12.1.14
|
||||
# via hatchling
|
||||
types-beautifulsoup4==4.12.0.3
|
||||
# via onyx
|
||||
types-html5lib==1.1.11.13
|
||||
# via
|
||||
# onyx
|
||||
# types-beautifulsoup4
|
||||
# via types-beautifulsoup4
|
||||
types-oauthlib==3.2.0.9
|
||||
# via onyx
|
||||
types-passlib==1.7.7.20240106
|
||||
# via onyx
|
||||
types-pillow==10.2.0.20240822
|
||||
# via onyx
|
||||
types-psutil==7.1.3.20251125
|
||||
# via onyx
|
||||
types-psycopg2==2.9.21.10
|
||||
# via onyx
|
||||
types-python-dateutil==2.8.19.13
|
||||
# via onyx
|
||||
types-pytz==2023.3.1.1
|
||||
# via
|
||||
# onyx
|
||||
# pandas-stubs
|
||||
# via pandas-stubs
|
||||
types-pyyaml==6.0.12.11
|
||||
# via onyx
|
||||
types-regex==2023.3.23.1
|
||||
# via onyx
|
||||
types-requests==2.32.0.20250328
|
||||
# via
|
||||
# cohere
|
||||
# onyx
|
||||
# via cohere
|
||||
types-retry==0.9.9.3
|
||||
# via onyx
|
||||
types-setuptools==68.0.0.3
|
||||
# via onyx
|
||||
typing-extensions==4.15.0
|
||||
# via
|
||||
# aiosignal
|
||||
@@ -574,4 +534,3 @@ yarl==1.22.0
|
||||
zipp==3.23.0
|
||||
# via importlib-metadata
|
||||
zizmor==1.18.0
|
||||
# via onyx
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
# This file was autogenerated by uv via the following command:
|
||||
# uv export --no-emit-project --no-default-groups --no-hashes --extra ee -o backend/requirements/ee.txt
|
||||
# uv export --no-emit-project --no-default-groups --no-hashes --group ee -o backend/requirements/ee.txt
|
||||
agent-client-protocol==0.7.1
|
||||
# via onyx
|
||||
aioboto3==15.1.0
|
||||
@@ -182,7 +182,6 @@ packaging==24.2
|
||||
parameterized==0.9.0
|
||||
# via cohere
|
||||
posthog==3.7.4
|
||||
# via onyx
|
||||
prometheus-client==0.23.1
|
||||
# via
|
||||
# onyx
|
||||
|
||||
@@ -1,7 +1,6 @@
|
||||
# This file was autogenerated by uv via the following command:
|
||||
# uv export --no-emit-project --no-default-groups --no-hashes --extra model_server -o backend/requirements/model_server.txt
|
||||
# uv export --no-emit-project --no-default-groups --no-hashes --group model_server -o backend/requirements/model_server.txt
|
||||
accelerate==1.6.0
|
||||
# via onyx
|
||||
agent-client-protocol==0.7.1
|
||||
# via onyx
|
||||
aioboto3==15.1.0
|
||||
@@ -105,7 +104,6 @@ distro==1.9.0
|
||||
durationpy==0.10
|
||||
# via kubernetes
|
||||
einops==0.8.1
|
||||
# via onyx
|
||||
fastapi==0.133.1
|
||||
# via
|
||||
# onyx
|
||||
@@ -207,7 +205,6 @@ networkx==3.5
|
||||
numpy==2.4.1
|
||||
# via
|
||||
# accelerate
|
||||
# onyx
|
||||
# scikit-learn
|
||||
# scipy
|
||||
# transformers
|
||||
@@ -267,7 +264,7 @@ packaging==24.2
|
||||
# transformers
|
||||
parameterized==0.9.0
|
||||
# via cohere
|
||||
pillow==12.1.1
|
||||
pillow==12.2.0
|
||||
# via sentence-transformers
|
||||
prometheus-client==0.23.1
|
||||
# via
|
||||
@@ -363,7 +360,6 @@ s3transfer==0.13.1
|
||||
safetensors==0.5.3
|
||||
# via
|
||||
# accelerate
|
||||
# onyx
|
||||
# transformers
|
||||
scikit-learn==1.7.2
|
||||
# via sentence-transformers
|
||||
@@ -372,7 +368,6 @@ scipy==1.16.3
|
||||
# scikit-learn
|
||||
# sentence-transformers
|
||||
sentence-transformers==4.0.2
|
||||
# via onyx
|
||||
sentry-sdk==2.14.0
|
||||
# via onyx
|
||||
setuptools==80.9.0 ; python_full_version >= '3.12'
|
||||
@@ -411,7 +406,6 @@ tokenizers==0.21.4
|
||||
torch==2.9.1
|
||||
# via
|
||||
# accelerate
|
||||
# onyx
|
||||
# sentence-transformers
|
||||
tqdm==4.67.1
|
||||
# via
|
||||
@@ -420,9 +414,7 @@ tqdm==4.67.1
|
||||
# sentence-transformers
|
||||
# transformers
|
||||
transformers==4.53.0
|
||||
# via
|
||||
# onyx
|
||||
# sentence-transformers
|
||||
# via sentence-transformers
|
||||
triton==3.5.1 ; platform_machine == 'x86_64' and sys_platform == 'linux'
|
||||
# via torch
|
||||
types-requests==2.32.0.20250328
|
||||
|
||||
239
backend/tests/daily/connectors/google_drive/test_resolver.py
Normal file
239
backend/tests/daily/connectors/google_drive/test_resolver.py
Normal file
@@ -0,0 +1,239 @@
|
||||
"""Tests for GoogleDriveConnector.resolve_errors against real Google Drive."""
|
||||
|
||||
import json
|
||||
import os
|
||||
from collections.abc import Callable
|
||||
from unittest.mock import patch
|
||||
|
||||
from onyx.connectors.google_drive.connector import GoogleDriveConnector
|
||||
from onyx.connectors.models import ConnectorFailure
|
||||
from onyx.connectors.models import Document
|
||||
from onyx.connectors.models import DocumentFailure
|
||||
from onyx.connectors.models import HierarchyNode
|
||||
from tests.daily.connectors.google_drive.consts_and_utils import ADMIN_EMAIL
|
||||
from tests.daily.connectors.google_drive.consts_and_utils import (
|
||||
ALL_EXPECTED_HIERARCHY_NODES,
|
||||
)
|
||||
from tests.daily.connectors.google_drive.consts_and_utils import FOLDER_1_ID
|
||||
from tests.daily.connectors.google_drive.consts_and_utils import SHARED_DRIVE_1_ID
|
||||
|
||||
_DRIVE_ID_MAPPING_PATH = os.path.join(
|
||||
os.path.dirname(__file__), "drive_id_mapping.json"
|
||||
)
|
||||
|
||||
|
||||
def _load_web_view_links(file_ids: list[int]) -> list[str]:
|
||||
with open(_DRIVE_ID_MAPPING_PATH) as f:
|
||||
mapping: dict[str, str] = json.load(f)
|
||||
return [mapping[str(fid)] for fid in file_ids]
|
||||
|
||||
|
||||
def _build_failures(web_view_links: list[str]) -> list[ConnectorFailure]:
|
||||
return [
|
||||
ConnectorFailure(
|
||||
failed_document=DocumentFailure(
|
||||
document_id=link,
|
||||
document_link=link,
|
||||
),
|
||||
failure_message=f"Synthetic failure for {link}",
|
||||
)
|
||||
for link in web_view_links
|
||||
]
|
||||
|
||||
|
||||
@patch("onyx.file_processing.extract_file_text.get_unstructured_api_key")
|
||||
def test_resolve_single_file(
|
||||
mock_api_key: None, # noqa: ARG001
|
||||
google_drive_service_acct_connector_factory: Callable[..., GoogleDriveConnector],
|
||||
) -> None:
|
||||
"""Resolve a single known file and verify we get back exactly one Document."""
|
||||
connector = google_drive_service_acct_connector_factory(
|
||||
primary_admin_email=ADMIN_EMAIL,
|
||||
include_shared_drives=True,
|
||||
shared_drive_urls=None,
|
||||
include_my_drives=True,
|
||||
my_drive_emails=None,
|
||||
shared_folder_urls=None,
|
||||
include_files_shared_with_me=False,
|
||||
)
|
||||
|
||||
web_view_links = _load_web_view_links([0])
|
||||
failures = _build_failures(web_view_links)
|
||||
|
||||
results = list(connector.resolve_errors(failures))
|
||||
|
||||
docs = [r for r in results if isinstance(r, Document)]
|
||||
new_failures = [r for r in results if isinstance(r, ConnectorFailure)]
|
||||
hierarchy_nodes = [r for r in results if isinstance(r, HierarchyNode)]
|
||||
|
||||
assert len(docs) == 1
|
||||
assert len(new_failures) == 0
|
||||
assert docs[0].semantic_identifier == "file_0.txt"
|
||||
|
||||
# Should yield at least one hierarchy node (the file's parent folder chain)
|
||||
assert len(hierarchy_nodes) > 0
|
||||
|
||||
|
||||
@patch("onyx.file_processing.extract_file_text.get_unstructured_api_key")
|
||||
def test_resolve_multiple_files(
|
||||
mock_api_key: None, # noqa: ARG001
|
||||
google_drive_service_acct_connector_factory: Callable[..., GoogleDriveConnector],
|
||||
) -> None:
|
||||
"""Resolve multiple files across different folders via batch API."""
|
||||
connector = google_drive_service_acct_connector_factory(
|
||||
primary_admin_email=ADMIN_EMAIL,
|
||||
include_shared_drives=True,
|
||||
shared_drive_urls=None,
|
||||
include_my_drives=True,
|
||||
my_drive_emails=None,
|
||||
shared_folder_urls=None,
|
||||
include_files_shared_with_me=False,
|
||||
)
|
||||
|
||||
# Pick files from different folders: admin files (0-4), shared drive 1 (20-24), folder_2 (45-49)
|
||||
file_ids = [0, 1, 20, 21, 45]
|
||||
web_view_links = _load_web_view_links(file_ids)
|
||||
failures = _build_failures(web_view_links)
|
||||
|
||||
results = list(connector.resolve_errors(failures))
|
||||
|
||||
docs = [r for r in results if isinstance(r, Document)]
|
||||
new_failures = [r for r in results if isinstance(r, ConnectorFailure)]
|
||||
hierarchy_nodes = [r for r in results if isinstance(r, HierarchyNode)]
|
||||
|
||||
assert len(new_failures) == 0
|
||||
retrieved_names = {doc.semantic_identifier for doc in docs}
|
||||
expected_names = {f"file_{fid}.txt" for fid in file_ids}
|
||||
assert expected_names == retrieved_names
|
||||
|
||||
# Files span multiple folders, so we should get hierarchy nodes
|
||||
assert len(hierarchy_nodes) > 0
|
||||
|
||||
|
||||
@patch("onyx.file_processing.extract_file_text.get_unstructured_api_key")
|
||||
def test_resolve_hierarchy_nodes_are_valid(
|
||||
mock_api_key: None, # noqa: ARG001
|
||||
google_drive_service_acct_connector_factory: Callable[..., GoogleDriveConnector],
|
||||
) -> None:
|
||||
"""Verify that hierarchy nodes from resolve_errors match expected structure."""
|
||||
connector = google_drive_service_acct_connector_factory(
|
||||
primary_admin_email=ADMIN_EMAIL,
|
||||
include_shared_drives=True,
|
||||
shared_drive_urls=None,
|
||||
include_my_drives=True,
|
||||
my_drive_emails=None,
|
||||
shared_folder_urls=None,
|
||||
include_files_shared_with_me=False,
|
||||
)
|
||||
|
||||
# File in folder_1 (inside shared_drive_1) — should walk up to shared_drive_1 root
|
||||
web_view_links = _load_web_view_links([25])
|
||||
failures = _build_failures(web_view_links)
|
||||
|
||||
results = list(connector.resolve_errors(failures))
|
||||
|
||||
hierarchy_nodes = [r for r in results if isinstance(r, HierarchyNode)]
|
||||
node_ids = {node.raw_node_id for node in hierarchy_nodes}
|
||||
|
||||
# File 25 is in folder_1 which is inside shared_drive_1.
|
||||
# The parent walk must yield at least these two ancestors.
|
||||
assert (
|
||||
FOLDER_1_ID in node_ids
|
||||
), f"Expected folder_1 ({FOLDER_1_ID}) in hierarchy nodes, got: {node_ids}"
|
||||
assert (
|
||||
SHARED_DRIVE_1_ID in node_ids
|
||||
), f"Expected shared_drive_1 ({SHARED_DRIVE_1_ID}) in hierarchy nodes, got: {node_ids}"
|
||||
|
||||
for node in hierarchy_nodes:
|
||||
if node.raw_node_id not in ALL_EXPECTED_HIERARCHY_NODES:
|
||||
continue
|
||||
expected = ALL_EXPECTED_HIERARCHY_NODES[node.raw_node_id]
|
||||
assert node.display_name == expected.display_name, (
|
||||
f"Display name mismatch for {node.raw_node_id}: "
|
||||
f"expected '{expected.display_name}', got '{node.display_name}'"
|
||||
)
|
||||
assert node.node_type == expected.node_type, (
|
||||
f"Node type mismatch for {node.raw_node_id}: "
|
||||
f"expected '{expected.node_type}', got '{node.node_type}'"
|
||||
)
|
||||
|
||||
|
||||
@patch("onyx.file_processing.extract_file_text.get_unstructured_api_key")
|
||||
def test_resolve_with_invalid_link(
|
||||
mock_api_key: None, # noqa: ARG001
|
||||
google_drive_service_acct_connector_factory: Callable[..., GoogleDriveConnector],
|
||||
) -> None:
|
||||
"""Resolve with a mix of valid and invalid links — invalid ones yield ConnectorFailure."""
|
||||
connector = google_drive_service_acct_connector_factory(
|
||||
primary_admin_email=ADMIN_EMAIL,
|
||||
include_shared_drives=True,
|
||||
shared_drive_urls=None,
|
||||
include_my_drives=True,
|
||||
my_drive_emails=None,
|
||||
shared_folder_urls=None,
|
||||
include_files_shared_with_me=False,
|
||||
)
|
||||
|
||||
valid_links = _load_web_view_links([0])
|
||||
invalid_link = "https://drive.google.com/file/d/NONEXISTENT_FILE_ID_12345"
|
||||
failures = _build_failures(valid_links + [invalid_link])
|
||||
|
||||
results = list(connector.resolve_errors(failures))
|
||||
|
||||
docs = [r for r in results if isinstance(r, Document)]
|
||||
new_failures = [r for r in results if isinstance(r, ConnectorFailure)]
|
||||
|
||||
assert len(docs) == 1
|
||||
assert docs[0].semantic_identifier == "file_0.txt"
|
||||
assert len(new_failures) == 1
|
||||
assert new_failures[0].failed_document is not None
|
||||
assert new_failures[0].failed_document.document_id == invalid_link
|
||||
|
||||
|
||||
@patch("onyx.file_processing.extract_file_text.get_unstructured_api_key")
|
||||
def test_resolve_empty_errors(
|
||||
mock_api_key: None, # noqa: ARG001
|
||||
google_drive_service_acct_connector_factory: Callable[..., GoogleDriveConnector],
|
||||
) -> None:
|
||||
"""Resolving an empty error list should yield nothing."""
|
||||
connector = google_drive_service_acct_connector_factory(
|
||||
primary_admin_email=ADMIN_EMAIL,
|
||||
include_shared_drives=True,
|
||||
shared_drive_urls=None,
|
||||
include_my_drives=True,
|
||||
my_drive_emails=None,
|
||||
shared_folder_urls=None,
|
||||
include_files_shared_with_me=False,
|
||||
)
|
||||
|
||||
results = list(connector.resolve_errors([]))
|
||||
|
||||
assert len(results) == 0
|
||||
|
||||
|
||||
@patch("onyx.file_processing.extract_file_text.get_unstructured_api_key")
|
||||
def test_resolve_entity_failures_are_skipped(
|
||||
mock_api_key: None, # noqa: ARG001
|
||||
google_drive_service_acct_connector_factory: Callable[..., GoogleDriveConnector],
|
||||
) -> None:
|
||||
"""Entity failures (not document failures) should be skipped by resolve_errors."""
|
||||
from onyx.connectors.models import EntityFailure
|
||||
|
||||
connector = google_drive_service_acct_connector_factory(
|
||||
primary_admin_email=ADMIN_EMAIL,
|
||||
include_shared_drives=True,
|
||||
shared_drive_urls=None,
|
||||
include_my_drives=True,
|
||||
my_drive_emails=None,
|
||||
shared_folder_urls=None,
|
||||
include_files_shared_with_me=False,
|
||||
)
|
||||
|
||||
entity_failure = ConnectorFailure(
|
||||
failed_entity=EntityFailure(entity_id="some_stage"),
|
||||
failure_message="retrieval failure",
|
||||
)
|
||||
|
||||
results = list(connector.resolve_errors([entity_failure]))
|
||||
|
||||
assert len(results) == 0
|
||||
0
backend/tests/unit/background/__init__.py
Normal file
0
backend/tests/unit/background/__init__.py
Normal file
0
backend/tests/unit/background/celery/__init__.py
Normal file
0
backend/tests/unit/background/celery/__init__.py
Normal file
149
backend/tests/unit/background/celery/test_celery_utils.py
Normal file
149
backend/tests/unit/background/celery/test_celery_utils.py
Normal file
@@ -0,0 +1,149 @@
|
||||
"""Unit tests for extract_ids_from_runnable_connector metrics instrumentation."""
|
||||
|
||||
from collections.abc import Iterator
|
||||
from unittest.mock import MagicMock
|
||||
|
||||
import pytest
|
||||
|
||||
from onyx.background.celery.celery_utils import extract_ids_from_runnable_connector
|
||||
from onyx.connectors.interfaces import SlimConnector
|
||||
from onyx.connectors.models import SlimDocument
|
||||
from onyx.server.metrics.pruning_metrics import PRUNING_ENUMERATION_DURATION
|
||||
from onyx.server.metrics.pruning_metrics import PRUNING_RATE_LIMIT_ERRORS
|
||||
|
||||
|
||||
def _make_slim_connector(doc_ids: list[str]) -> SlimConnector:
|
||||
"""Mock SlimConnector that yields the given doc IDs in one batch."""
|
||||
connector = MagicMock(spec=SlimConnector)
|
||||
docs = [
|
||||
MagicMock(spec=SlimDocument, id=doc_id, parent_hierarchy_raw_node_id=None)
|
||||
for doc_id in doc_ids
|
||||
]
|
||||
connector.retrieve_all_slim_docs.return_value = iter([docs])
|
||||
return connector
|
||||
|
||||
|
||||
def _raising_connector(message: str) -> SlimConnector:
|
||||
"""Mock SlimConnector whose generator raises with the given message."""
|
||||
connector = MagicMock(spec=SlimConnector)
|
||||
|
||||
def raising_iter() -> Iterator:
|
||||
raise Exception(message)
|
||||
yield
|
||||
|
||||
connector.retrieve_all_slim_docs.return_value = raising_iter()
|
||||
return connector
|
||||
|
||||
|
||||
class TestEnumerationDuration:
|
||||
def test_recorded_on_success(self) -> None:
|
||||
connector = _make_slim_connector(["doc1"])
|
||||
before = PRUNING_ENUMERATION_DURATION.labels(
|
||||
connector_type="google_drive"
|
||||
)._sum.get()
|
||||
|
||||
extract_ids_from_runnable_connector(connector, connector_type="google_drive")
|
||||
|
||||
after = PRUNING_ENUMERATION_DURATION.labels(
|
||||
connector_type="google_drive"
|
||||
)._sum.get()
|
||||
assert after >= before # duration observed (non-negative)
|
||||
|
||||
def test_recorded_on_exception(self) -> None:
|
||||
connector = _raising_connector("unexpected error")
|
||||
before = PRUNING_ENUMERATION_DURATION.labels(
|
||||
connector_type="confluence"
|
||||
)._sum.get()
|
||||
|
||||
with pytest.raises(Exception):
|
||||
extract_ids_from_runnable_connector(connector, connector_type="confluence")
|
||||
|
||||
after = PRUNING_ENUMERATION_DURATION.labels(
|
||||
connector_type="confluence"
|
||||
)._sum.get()
|
||||
assert after >= before # duration observed even on exception
|
||||
|
||||
|
||||
class TestRateLimitDetection:
|
||||
def test_increments_on_rate_limit_message(self) -> None:
|
||||
connector = _raising_connector("rate limit exceeded")
|
||||
before = PRUNING_RATE_LIMIT_ERRORS.labels(
|
||||
connector_type="google_drive"
|
||||
)._value.get()
|
||||
|
||||
with pytest.raises(Exception, match="rate limit exceeded"):
|
||||
extract_ids_from_runnable_connector(
|
||||
connector, connector_type="google_drive"
|
||||
)
|
||||
|
||||
after = PRUNING_RATE_LIMIT_ERRORS.labels(
|
||||
connector_type="google_drive"
|
||||
)._value.get()
|
||||
assert after == before + 1
|
||||
|
||||
def test_increments_on_429_in_message(self) -> None:
|
||||
connector = _raising_connector("HTTP 429 Too Many Requests")
|
||||
before = PRUNING_RATE_LIMIT_ERRORS.labels(
|
||||
connector_type="confluence"
|
||||
)._value.get()
|
||||
|
||||
with pytest.raises(Exception, match="429"):
|
||||
extract_ids_from_runnable_connector(connector, connector_type="confluence")
|
||||
|
||||
after = PRUNING_RATE_LIMIT_ERRORS.labels(
|
||||
connector_type="confluence"
|
||||
)._value.get()
|
||||
assert after == before + 1
|
||||
|
||||
def test_does_not_increment_on_non_rate_limit_exception(self) -> None:
|
||||
connector = _raising_connector("connection timeout")
|
||||
before = PRUNING_RATE_LIMIT_ERRORS.labels(connector_type="slack")._value.get()
|
||||
|
||||
with pytest.raises(Exception, match="connection timeout"):
|
||||
extract_ids_from_runnable_connector(connector, connector_type="slack")
|
||||
|
||||
after = PRUNING_RATE_LIMIT_ERRORS.labels(connector_type="slack")._value.get()
|
||||
assert after == before
|
||||
|
||||
def test_rate_limit_detection_is_case_insensitive(self) -> None:
|
||||
connector = _raising_connector("RATE LIMIT exceeded")
|
||||
before = PRUNING_RATE_LIMIT_ERRORS.labels(connector_type="jira")._value.get()
|
||||
|
||||
with pytest.raises(Exception):
|
||||
extract_ids_from_runnable_connector(connector, connector_type="jira")
|
||||
|
||||
after = PRUNING_RATE_LIMIT_ERRORS.labels(connector_type="jira")._value.get()
|
||||
assert after == before + 1
|
||||
|
||||
def test_connector_type_label_matches_input(self) -> None:
|
||||
connector = _raising_connector("rate limit exceeded")
|
||||
before_gd = PRUNING_RATE_LIMIT_ERRORS.labels(
|
||||
connector_type="google_drive"
|
||||
)._value.get()
|
||||
before_jira = PRUNING_RATE_LIMIT_ERRORS.labels(
|
||||
connector_type="jira"
|
||||
)._value.get()
|
||||
|
||||
with pytest.raises(Exception):
|
||||
extract_ids_from_runnable_connector(
|
||||
connector, connector_type="google_drive"
|
||||
)
|
||||
|
||||
assert (
|
||||
PRUNING_RATE_LIMIT_ERRORS.labels(connector_type="google_drive")._value.get()
|
||||
== before_gd + 1
|
||||
)
|
||||
assert (
|
||||
PRUNING_RATE_LIMIT_ERRORS.labels(connector_type="jira")._value.get()
|
||||
== before_jira
|
||||
)
|
||||
|
||||
def test_defaults_to_unknown_connector_type(self) -> None:
|
||||
connector = _raising_connector("rate limit exceeded")
|
||||
before = PRUNING_RATE_LIMIT_ERRORS.labels(connector_type="unknown")._value.get()
|
||||
|
||||
with pytest.raises(Exception):
|
||||
extract_ids_from_runnable_connector(connector)
|
||||
|
||||
after = PRUNING_RATE_LIMIT_ERRORS.labels(connector_type="unknown")._value.get()
|
||||
assert after == before + 1
|
||||
@@ -9,6 +9,7 @@ from unittest.mock import patch
|
||||
from ee.onyx.db.license import check_seat_availability
|
||||
from ee.onyx.db.license import delete_license
|
||||
from ee.onyx.db.license import get_license
|
||||
from ee.onyx.db.license import get_used_seats
|
||||
from ee.onyx.db.license import upsert_license
|
||||
from ee.onyx.server.license.models import LicenseMetadata
|
||||
from ee.onyx.server.license.models import LicenseSource
|
||||
@@ -214,3 +215,43 @@ class TestCheckSeatAvailabilityMultiTenant:
|
||||
assert result.available is False
|
||||
assert result.error_message is not None
|
||||
mock_tenant_count.assert_called_once_with("tenant-abc")
|
||||
|
||||
|
||||
class TestGetUsedSeatsAccountTypeFiltering:
|
||||
"""Verify get_used_seats query excludes SERVICE_ACCOUNT but includes BOT."""
|
||||
|
||||
@patch("ee.onyx.db.license.MULTI_TENANT", False)
|
||||
@patch("onyx.db.engine.sql_engine.get_session_with_current_tenant")
|
||||
def test_excludes_service_accounts(self, mock_get_session: MagicMock) -> None:
|
||||
"""SERVICE_ACCOUNT users should not count toward seats."""
|
||||
mock_session = MagicMock()
|
||||
mock_get_session.return_value.__enter__ = MagicMock(return_value=mock_session)
|
||||
mock_get_session.return_value.__exit__ = MagicMock(return_value=False)
|
||||
mock_session.execute.return_value.scalar.return_value = 5
|
||||
|
||||
result = get_used_seats()
|
||||
|
||||
assert result == 5
|
||||
# Inspect the compiled query to verify account_type filter
|
||||
call_args = mock_session.execute.call_args
|
||||
query = call_args[0][0]
|
||||
compiled = str(query.compile(compile_kwargs={"literal_binds": True}))
|
||||
assert "SERVICE_ACCOUNT" in compiled
|
||||
# BOT should NOT be excluded
|
||||
assert "BOT" not in compiled
|
||||
|
||||
@patch("ee.onyx.db.license.MULTI_TENANT", False)
|
||||
@patch("onyx.db.engine.sql_engine.get_session_with_current_tenant")
|
||||
def test_still_excludes_ext_perm_user(self, mock_get_session: MagicMock) -> None:
|
||||
"""EXT_PERM_USER exclusion should still be present."""
|
||||
mock_session = MagicMock()
|
||||
mock_get_session.return_value.__enter__ = MagicMock(return_value=mock_session)
|
||||
mock_get_session.return_value.__exit__ = MagicMock(return_value=False)
|
||||
mock_session.execute.return_value.scalar.return_value = 3
|
||||
|
||||
get_used_seats()
|
||||
|
||||
call_args = mock_session.execute.call_args
|
||||
query = call_args[0][0]
|
||||
compiled = str(query.compile(compile_kwargs={"literal_binds": True}))
|
||||
assert "EXT_PERM_USER" in compiled
|
||||
|
||||
@@ -6,6 +6,7 @@ import requests
|
||||
from jira import JIRA
|
||||
from jira.resources import Issue
|
||||
|
||||
from onyx.connectors.jira.connector import _JIRA_BULK_FETCH_LIMIT
|
||||
from onyx.connectors.jira.connector import bulk_fetch_issues
|
||||
|
||||
|
||||
@@ -145,3 +146,29 @@ def test_bulk_fetch_recursive_splitting_raises_on_bad_issue() -> None:
|
||||
|
||||
with pytest.raises(requests.exceptions.JSONDecodeError):
|
||||
bulk_fetch_issues(client, ["1", "2", bad_id, "3", "4", "5"])
|
||||
|
||||
|
||||
def test_bulk_fetch_respects_api_batch_limit() -> None:
|
||||
"""Requests to the bulkfetch endpoint never exceed _JIRA_BULK_FETCH_LIMIT IDs."""
|
||||
client = _mock_jira_client()
|
||||
total_issues = _JIRA_BULK_FETCH_LIMIT * 3 + 7
|
||||
all_ids = [str(i) for i in range(total_issues)]
|
||||
|
||||
batch_sizes: list[int] = []
|
||||
|
||||
def _post_side_effect(url: str, json: dict[str, Any]) -> MagicMock: # noqa: ARG001
|
||||
ids = json["issueIdsOrKeys"]
|
||||
batch_sizes.append(len(ids))
|
||||
resp = MagicMock()
|
||||
resp.json.return_value = {"issues": [_make_raw_issue(i) for i in ids]}
|
||||
return resp
|
||||
|
||||
client._session.post.side_effect = _post_side_effect
|
||||
|
||||
result = bulk_fetch_issues(client, all_ids)
|
||||
|
||||
assert len(result) == total_issues
|
||||
# keeping this hardcoded because it's the documented limit
|
||||
# https://developer.atlassian.com/cloud/jira/platform/rest/v3/api-group-issues/
|
||||
assert all(size <= 100 for size in batch_sizes)
|
||||
assert len(batch_sizes) == 4
|
||||
|
||||
@@ -0,0 +1,67 @@
|
||||
"""Tests for _build_thread_text function."""
|
||||
|
||||
from unittest.mock import MagicMock
|
||||
from unittest.mock import patch
|
||||
|
||||
from onyx.context.search.federated.slack_search import _build_thread_text
|
||||
|
||||
|
||||
def _make_msg(user: str, text: str, ts: str) -> dict[str, str]:
|
||||
return {"user": user, "text": text, "ts": ts}
|
||||
|
||||
|
||||
class TestBuildThreadText:
|
||||
"""Verify _build_thread_text includes full thread replies up to cap."""
|
||||
|
||||
@patch("onyx.context.search.federated.slack_search.batch_get_user_profiles")
|
||||
def test_includes_all_replies(self, mock_profiles: MagicMock) -> None:
|
||||
"""All replies within cap are included in output."""
|
||||
mock_profiles.return_value = {}
|
||||
messages = [
|
||||
_make_msg("U1", "parent msg", "1000.0"),
|
||||
_make_msg("U2", "reply 1", "1001.0"),
|
||||
_make_msg("U3", "reply 2", "1002.0"),
|
||||
_make_msg("U4", "reply 3", "1003.0"),
|
||||
]
|
||||
result = _build_thread_text(messages, "token", "T123", MagicMock())
|
||||
assert "parent msg" in result
|
||||
assert "reply 1" in result
|
||||
assert "reply 2" in result
|
||||
assert "reply 3" in result
|
||||
assert "..." not in result
|
||||
|
||||
@patch("onyx.context.search.federated.slack_search.batch_get_user_profiles")
|
||||
def test_non_thread_returns_parent_only(self, mock_profiles: MagicMock) -> None:
|
||||
"""Single message (no replies) returns just the parent text."""
|
||||
mock_profiles.return_value = {}
|
||||
messages = [_make_msg("U1", "just a message", "1000.0")]
|
||||
result = _build_thread_text(messages, "token", "T123", MagicMock())
|
||||
assert "just a message" in result
|
||||
assert "Replies:" not in result
|
||||
|
||||
@patch("onyx.context.search.federated.slack_search.batch_get_user_profiles")
|
||||
def test_parent_always_first(self, mock_profiles: MagicMock) -> None:
|
||||
"""Thread parent message is always the first line of output."""
|
||||
mock_profiles.return_value = {}
|
||||
messages = [
|
||||
_make_msg("U1", "I am the parent", "1000.0"),
|
||||
_make_msg("U2", "I am a reply", "1001.0"),
|
||||
]
|
||||
result = _build_thread_text(messages, "token", "T123", MagicMock())
|
||||
parent_pos = result.index("I am the parent")
|
||||
reply_pos = result.index("I am a reply")
|
||||
assert parent_pos < reply_pos
|
||||
|
||||
@patch("onyx.context.search.federated.slack_search.batch_get_user_profiles")
|
||||
def test_user_profiles_resolved(self, mock_profiles: MagicMock) -> None:
|
||||
"""User IDs in thread text are replaced with display names."""
|
||||
mock_profiles.return_value = {"U1": "Alice", "U2": "Bob"}
|
||||
messages = [
|
||||
_make_msg("U1", "hello", "1000.0"),
|
||||
_make_msg("U2", "world", "1001.0"),
|
||||
]
|
||||
result = _build_thread_text(messages, "token", "T123", MagicMock())
|
||||
assert "Alice" in result
|
||||
assert "Bob" in result
|
||||
assert "<@U1>" not in result
|
||||
assert "<@U2>" not in result
|
||||
@@ -0,0 +1,108 @@
|
||||
"""Tests for Slack URL parsing and direct thread fetch via URL override."""
|
||||
|
||||
from unittest.mock import MagicMock
|
||||
from unittest.mock import patch
|
||||
|
||||
from onyx.context.search.federated.models import DirectThreadFetch
|
||||
from onyx.context.search.federated.slack_search import _fetch_thread_from_url
|
||||
from onyx.context.search.federated.slack_search_utils import extract_slack_message_urls
|
||||
|
||||
|
||||
class TestExtractSlackMessageUrls:
|
||||
"""Verify URL parsing extracts channel_id and timestamp correctly."""
|
||||
|
||||
def test_standard_url(self) -> None:
|
||||
query = "summarize https://mycompany.slack.com/archives/C097NBWMY8Y/p1775491616524769"
|
||||
results = extract_slack_message_urls(query)
|
||||
assert len(results) == 1
|
||||
assert results[0] == ("C097NBWMY8Y", "1775491616.524769")
|
||||
|
||||
def test_multiple_urls(self) -> None:
|
||||
query = (
|
||||
"compare https://co.slack.com/archives/C111/p1234567890123456 "
|
||||
"and https://co.slack.com/archives/C222/p9876543210987654"
|
||||
)
|
||||
results = extract_slack_message_urls(query)
|
||||
assert len(results) == 2
|
||||
assert results[0] == ("C111", "1234567890.123456")
|
||||
assert results[1] == ("C222", "9876543210.987654")
|
||||
|
||||
def test_no_urls(self) -> None:
|
||||
query = "what happened in #general last week?"
|
||||
results = extract_slack_message_urls(query)
|
||||
assert len(results) == 0
|
||||
|
||||
def test_non_slack_url_ignored(self) -> None:
|
||||
query = "check https://google.com/archives/C111/p1234567890123456"
|
||||
results = extract_slack_message_urls(query)
|
||||
assert len(results) == 0
|
||||
|
||||
def test_timestamp_conversion(self) -> None:
|
||||
"""p prefix removed, dot inserted after 10th digit."""
|
||||
query = "https://x.slack.com/archives/CABC123/p1775491616524769"
|
||||
results = extract_slack_message_urls(query)
|
||||
channel_id, ts = results[0]
|
||||
assert channel_id == "CABC123"
|
||||
assert ts == "1775491616.524769"
|
||||
assert not ts.startswith("p")
|
||||
assert "." in ts
|
||||
|
||||
|
||||
class TestFetchThreadFromUrl:
|
||||
"""Verify _fetch_thread_from_url calls conversations.replies and returns SlackMessage."""
|
||||
|
||||
@patch("onyx.context.search.federated.slack_search._build_thread_text")
|
||||
@patch("onyx.context.search.federated.slack_search.WebClient")
|
||||
def test_successful_fetch(
|
||||
self, mock_webclient_cls: MagicMock, mock_build_thread: MagicMock
|
||||
) -> None:
|
||||
mock_client = MagicMock()
|
||||
mock_webclient_cls.return_value = mock_client
|
||||
|
||||
# Mock conversations_replies
|
||||
mock_response = MagicMock()
|
||||
mock_response.get.return_value = [
|
||||
{"user": "U1", "text": "parent", "ts": "1775491616.524769"},
|
||||
{"user": "U2", "text": "reply 1", "ts": "1775491617.000000"},
|
||||
{"user": "U3", "text": "reply 2", "ts": "1775491618.000000"},
|
||||
]
|
||||
mock_client.conversations_replies.return_value = mock_response
|
||||
|
||||
# Mock channel info
|
||||
mock_ch_response = MagicMock()
|
||||
mock_ch_response.get.return_value = {"name": "general"}
|
||||
mock_client.conversations_info.return_value = mock_ch_response
|
||||
|
||||
mock_build_thread.return_value = (
|
||||
"U1: parent\n\nReplies:\n\nU2: reply 1\n\nU3: reply 2"
|
||||
)
|
||||
|
||||
fetch = DirectThreadFetch(
|
||||
channel_id="C097NBWMY8Y", thread_ts="1775491616.524769"
|
||||
)
|
||||
result = _fetch_thread_from_url(fetch, "xoxp-token")
|
||||
|
||||
assert len(result.messages) == 1
|
||||
msg = result.messages[0]
|
||||
assert msg.channel_id == "C097NBWMY8Y"
|
||||
assert msg.thread_id is None # Prevents double-enrichment
|
||||
assert msg.slack_score == 100000.0
|
||||
assert "parent" in msg.text
|
||||
mock_client.conversations_replies.assert_called_once_with(
|
||||
channel="C097NBWMY8Y", ts="1775491616.524769"
|
||||
)
|
||||
|
||||
@patch("onyx.context.search.federated.slack_search.WebClient")
|
||||
def test_api_error_returns_empty(self, mock_webclient_cls: MagicMock) -> None:
|
||||
from slack_sdk.errors import SlackApiError
|
||||
|
||||
mock_client = MagicMock()
|
||||
mock_webclient_cls.return_value = mock_client
|
||||
mock_client.conversations_replies.side_effect = SlackApiError(
|
||||
message="channel_not_found",
|
||||
response=MagicMock(status_code=404),
|
||||
)
|
||||
|
||||
fetch = DirectThreadFetch(channel_id="CBAD", thread_ts="1234567890.123456")
|
||||
result = _fetch_thread_from_url(fetch, "xoxp-token")
|
||||
assert len(result.messages) == 0
|
||||
787
backend/tests/unit/onyx/indexing/test_document_chunker.py
Normal file
787
backend/tests/unit/onyx/indexing/test_document_chunker.py
Normal file
@@ -0,0 +1,787 @@
|
||||
import pytest
|
||||
from chonkie import SentenceChunker
|
||||
|
||||
from onyx.configs.constants import DocumentSource
|
||||
from onyx.configs.constants import SECTION_SEPARATOR
|
||||
from onyx.connectors.models import IndexingDocument
|
||||
from onyx.connectors.models import Section
|
||||
from onyx.connectors.models import SectionType
|
||||
from onyx.indexing.chunking import DocumentChunker
|
||||
from onyx.indexing.chunking import text_section_chunker as text_chunker_module
|
||||
from onyx.natural_language_processing.utils import BaseTokenizer
|
||||
|
||||
|
||||
class CharTokenizer(BaseTokenizer):
|
||||
"""1 character == 1 token. Deterministic & trivial to reason about."""
|
||||
|
||||
def encode(self, string: str) -> list[int]:
|
||||
return [ord(c) for c in string]
|
||||
|
||||
def tokenize(self, string: str) -> list[str]:
|
||||
return list(string)
|
||||
|
||||
def decode(self, tokens: list[int]) -> str:
|
||||
return "".join(chr(t) for t in tokens)
|
||||
|
||||
|
||||
# With a char-level tokenizer, each char is a token. 200 is comfortably
|
||||
# above BLURB_SIZE (128) so the blurb splitter won't get weird on small text.
|
||||
CHUNK_LIMIT = 200
|
||||
|
||||
|
||||
def _make_document_chunker(
|
||||
chunk_token_limit: int = CHUNK_LIMIT,
|
||||
) -> DocumentChunker:
|
||||
def token_counter(text: str) -> int:
|
||||
return len(text)
|
||||
|
||||
return DocumentChunker(
|
||||
tokenizer=CharTokenizer(),
|
||||
blurb_splitter=SentenceChunker(
|
||||
tokenizer_or_token_counter=token_counter,
|
||||
chunk_size=128,
|
||||
chunk_overlap=0,
|
||||
return_type="texts",
|
||||
),
|
||||
chunk_splitter=SentenceChunker(
|
||||
tokenizer_or_token_counter=token_counter,
|
||||
chunk_size=chunk_token_limit,
|
||||
chunk_overlap=0,
|
||||
return_type="texts",
|
||||
),
|
||||
)
|
||||
|
||||
|
||||
def _make_doc(
|
||||
sections: list[Section],
|
||||
title: str | None = "Test Doc",
|
||||
doc_id: str = "doc1",
|
||||
) -> IndexingDocument:
|
||||
return IndexingDocument(
|
||||
id=doc_id,
|
||||
source=DocumentSource.WEB,
|
||||
semantic_identifier=doc_id,
|
||||
title=title,
|
||||
metadata={},
|
||||
sections=[], # real sections unused — method reads processed_sections
|
||||
processed_sections=sections,
|
||||
)
|
||||
|
||||
|
||||
# --- Empty / degenerate input -------------------------------------------------
|
||||
|
||||
|
||||
def test_empty_processed_sections_returns_single_empty_safety_chunk() -> None:
|
||||
"""No sections at all should still yield one empty chunk (the
|
||||
`or not chunks` safety branch at the end)."""
|
||||
dc = _make_document_chunker()
|
||||
doc = _make_doc(sections=[])
|
||||
|
||||
chunks = dc.chunk(
|
||||
document=doc,
|
||||
sections=[],
|
||||
title_prefix="TITLE\n",
|
||||
metadata_suffix_semantic="meta_sem",
|
||||
metadata_suffix_keyword="meta_kw",
|
||||
content_token_limit=CHUNK_LIMIT,
|
||||
)
|
||||
|
||||
assert len(chunks) == 1
|
||||
assert chunks[0].content == ""
|
||||
assert chunks[0].chunk_id == 0
|
||||
assert chunks[0].title_prefix == "TITLE\n"
|
||||
assert chunks[0].metadata_suffix_semantic == "meta_sem"
|
||||
assert chunks[0].metadata_suffix_keyword == "meta_kw"
|
||||
# safe default link offsets
|
||||
assert chunks[0].source_links == {0: ""}
|
||||
|
||||
|
||||
def test_empty_section_on_first_position_without_title_is_skipped() -> None:
|
||||
"""Doc has no title, first section has empty text — the guard
|
||||
`(not document.title or section_idx > 0)` means it IS skipped."""
|
||||
dc = _make_document_chunker()
|
||||
doc = _make_doc(
|
||||
sections=[Section(type=SectionType.TEXT, text="", link="l0")],
|
||||
title=None,
|
||||
)
|
||||
|
||||
chunks = dc.chunk(
|
||||
document=doc,
|
||||
sections=doc.processed_sections,
|
||||
title_prefix="",
|
||||
metadata_suffix_semantic="",
|
||||
metadata_suffix_keyword="",
|
||||
content_token_limit=CHUNK_LIMIT,
|
||||
)
|
||||
|
||||
# skipped → no real content, but safety branch still yields 1 empty chunk
|
||||
assert len(chunks) == 1
|
||||
assert chunks[0].content == ""
|
||||
|
||||
|
||||
def test_empty_section_on_later_position_is_skipped_even_with_title() -> None:
|
||||
"""Index > 0 empty sections are skipped regardless of title."""
|
||||
dc = _make_document_chunker()
|
||||
doc = _make_doc(
|
||||
sections=[
|
||||
Section(type=SectionType.TEXT, text="Alpha.", link="l0"),
|
||||
Section(type=SectionType.TEXT, text="", link="l1"), # should be skipped
|
||||
Section(type=SectionType.TEXT, text="Beta.", link="l2"),
|
||||
],
|
||||
)
|
||||
|
||||
chunks = dc.chunk(
|
||||
document=doc,
|
||||
sections=doc.processed_sections,
|
||||
title_prefix="",
|
||||
metadata_suffix_semantic="",
|
||||
metadata_suffix_keyword="",
|
||||
content_token_limit=CHUNK_LIMIT,
|
||||
)
|
||||
|
||||
assert len(chunks) == 1
|
||||
assert "Alpha." in chunks[0].content
|
||||
assert "Beta." in chunks[0].content
|
||||
# link offsets should only contain l0 and l2 (no l1)
|
||||
assert "l1" not in (chunks[0].source_links or {}).values()
|
||||
|
||||
|
||||
# --- Single text section ------------------------------------------------------
|
||||
|
||||
|
||||
def test_single_small_text_section_becomes_one_chunk() -> None:
|
||||
dc = _make_document_chunker()
|
||||
doc = _make_doc(
|
||||
sections=[Section(type=SectionType.TEXT, text="Hello world.", link="https://a")]
|
||||
)
|
||||
|
||||
chunks = dc.chunk(
|
||||
document=doc,
|
||||
sections=doc.processed_sections,
|
||||
title_prefix="TITLE\n",
|
||||
metadata_suffix_semantic="ms",
|
||||
metadata_suffix_keyword="mk",
|
||||
content_token_limit=CHUNK_LIMIT,
|
||||
)
|
||||
|
||||
assert len(chunks) == 1
|
||||
chunk = chunks[0]
|
||||
assert chunk.content == "Hello world."
|
||||
assert chunk.source_links == {0: "https://a"}
|
||||
assert chunk.title_prefix == "TITLE\n"
|
||||
assert chunk.metadata_suffix_semantic == "ms"
|
||||
assert chunk.metadata_suffix_keyword == "mk"
|
||||
assert chunk.section_continuation is False
|
||||
assert chunk.image_file_id is None
|
||||
|
||||
|
||||
# --- Multiple text sections combined -----------------------------------------
|
||||
|
||||
|
||||
def test_multiple_small_sections_combine_into_one_chunk() -> None:
|
||||
dc = _make_document_chunker()
|
||||
sections = [
|
||||
Section(type=SectionType.TEXT, text="Part one.", link="l1"),
|
||||
Section(type=SectionType.TEXT, text="Part two.", link="l2"),
|
||||
Section(type=SectionType.TEXT, text="Part three.", link="l3"),
|
||||
]
|
||||
doc = _make_doc(sections=sections)
|
||||
|
||||
chunks = dc.chunk(
|
||||
document=doc,
|
||||
sections=doc.processed_sections,
|
||||
title_prefix="",
|
||||
metadata_suffix_semantic="",
|
||||
metadata_suffix_keyword="",
|
||||
content_token_limit=CHUNK_LIMIT,
|
||||
)
|
||||
|
||||
assert len(chunks) == 1
|
||||
expected = SECTION_SEPARATOR.join(["Part one.", "Part two.", "Part three."])
|
||||
assert chunks[0].content == expected
|
||||
|
||||
# link_offsets: indexed by shared_precompare_cleanup length of the
|
||||
# chunk_text *before* each section was appended.
|
||||
# "" -> "", len 0
|
||||
# "Part one." -> "partone", len 7
|
||||
# "Part one.\n\nPart two." -> "partoneparttwo", len 14
|
||||
assert chunks[0].source_links == {0: "l1", 7: "l2", 14: "l3"}
|
||||
|
||||
|
||||
def test_sections_overflow_into_second_chunk() -> None:
|
||||
"""Two sections that together exceed content_token_limit should
|
||||
finalize the first as one chunk and start a new one."""
|
||||
dc = _make_document_chunker()
|
||||
# char-level: 120 char section → 120 tokens. 2 of these plus separator
|
||||
# exceed a 200-token limit, forcing a flush.
|
||||
a = "A" * 120
|
||||
b = "B" * 120
|
||||
doc = _make_doc(
|
||||
sections=[
|
||||
Section(type=SectionType.TEXT, text=a, link="la"),
|
||||
Section(type=SectionType.TEXT, text=b, link="lb"),
|
||||
],
|
||||
)
|
||||
|
||||
chunks = dc.chunk(
|
||||
document=doc,
|
||||
sections=doc.processed_sections,
|
||||
title_prefix="",
|
||||
metadata_suffix_semantic="",
|
||||
metadata_suffix_keyword="",
|
||||
content_token_limit=CHUNK_LIMIT,
|
||||
)
|
||||
|
||||
assert len(chunks) == 2
|
||||
assert chunks[0].content == a
|
||||
assert chunks[1].content == b
|
||||
# first chunk is not a continuation; second starts a new section → not either
|
||||
assert chunks[0].section_continuation is False
|
||||
assert chunks[1].section_continuation is False
|
||||
# chunk_ids should be sequential starting at 0
|
||||
assert chunks[0].chunk_id == 0
|
||||
assert chunks[1].chunk_id == 1
|
||||
# links routed appropriately
|
||||
assert chunks[0].source_links == {0: "la"}
|
||||
assert chunks[1].source_links == {0: "lb"}
|
||||
|
||||
|
||||
# --- Image section handling --------------------------------------------------
|
||||
|
||||
|
||||
def test_image_only_section_produces_single_chunk_with_image_id() -> None:
|
||||
dc = _make_document_chunker()
|
||||
doc = _make_doc(
|
||||
sections=[
|
||||
Section(
|
||||
type=SectionType.IMAGE,
|
||||
text="summary of image",
|
||||
link="https://img",
|
||||
image_file_id="img-abc",
|
||||
)
|
||||
],
|
||||
)
|
||||
|
||||
chunks = dc.chunk(
|
||||
document=doc,
|
||||
sections=doc.processed_sections,
|
||||
title_prefix="",
|
||||
metadata_suffix_semantic="",
|
||||
metadata_suffix_keyword="",
|
||||
content_token_limit=CHUNK_LIMIT,
|
||||
)
|
||||
|
||||
assert len(chunks) == 1
|
||||
assert chunks[0].image_file_id == "img-abc"
|
||||
assert chunks[0].content == "summary of image"
|
||||
assert chunks[0].source_links == {0: "https://img"}
|
||||
|
||||
|
||||
def test_image_section_flushes_pending_text_and_creates_its_own_chunk() -> None:
|
||||
"""A buffered text section followed by an image section:
|
||||
the pending text should be flushed first, then the image chunk."""
|
||||
dc = _make_document_chunker()
|
||||
doc = _make_doc(
|
||||
sections=[
|
||||
Section(type=SectionType.TEXT, text="Pending text.", link="ltext"),
|
||||
Section(
|
||||
type=SectionType.IMAGE,
|
||||
text="image summary",
|
||||
link="limage",
|
||||
image_file_id="img-1",
|
||||
),
|
||||
Section(type=SectionType.TEXT, text="Trailing text.", link="ltail"),
|
||||
],
|
||||
)
|
||||
|
||||
chunks = dc.chunk(
|
||||
document=doc,
|
||||
sections=doc.processed_sections,
|
||||
title_prefix="",
|
||||
metadata_suffix_semantic="",
|
||||
metadata_suffix_keyword="",
|
||||
content_token_limit=CHUNK_LIMIT,
|
||||
)
|
||||
|
||||
assert len(chunks) == 3
|
||||
|
||||
# 0: flushed pending text
|
||||
assert chunks[0].content == "Pending text."
|
||||
assert chunks[0].image_file_id is None
|
||||
assert chunks[0].source_links == {0: "ltext"}
|
||||
|
||||
# 1: image chunk
|
||||
assert chunks[1].content == "image summary"
|
||||
assert chunks[1].image_file_id == "img-1"
|
||||
assert chunks[1].source_links == {0: "limage"}
|
||||
|
||||
# 2: trailing text, started fresh after image
|
||||
assert chunks[2].content == "Trailing text."
|
||||
assert chunks[2].image_file_id is None
|
||||
assert chunks[2].source_links == {0: "ltail"}
|
||||
|
||||
|
||||
def test_image_section_without_link_gets_empty_links_dict() -> None:
|
||||
"""If an image section has no link, links param is {} (not {0: ""})."""
|
||||
dc = _make_document_chunker()
|
||||
doc = _make_doc(
|
||||
sections=[
|
||||
Section(
|
||||
type=SectionType.IMAGE,
|
||||
text="img",
|
||||
link=None,
|
||||
image_file_id="img-xyz",
|
||||
),
|
||||
],
|
||||
)
|
||||
|
||||
chunks = dc.chunk(
|
||||
document=doc,
|
||||
sections=doc.processed_sections,
|
||||
title_prefix="",
|
||||
metadata_suffix_semantic="",
|
||||
metadata_suffix_keyword="",
|
||||
content_token_limit=CHUNK_LIMIT,
|
||||
)
|
||||
|
||||
assert len(chunks) == 1
|
||||
assert chunks[0].image_file_id == "img-xyz"
|
||||
# to_doc_aware_chunk falls back to {0: ""} when given an empty dict
|
||||
assert chunks[0].source_links == {0: ""}
|
||||
|
||||
|
||||
# --- Oversized section splitting ---------------------------------------------
|
||||
|
||||
|
||||
def test_oversized_section_is_split_across_multiple_chunks() -> None:
|
||||
"""A section whose text exceeds content_token_limit should be passed
|
||||
through chunk_splitter and yield >1 chunks; only the first is not a
|
||||
continuation."""
|
||||
dc = _make_document_chunker()
|
||||
# Build a section whose char-count is well over CHUNK_LIMIT (200), made
|
||||
# of many short sentences so chonkie's SentenceChunker can split cleanly.
|
||||
section_text = (
|
||||
"Alpha beta gamma. Delta epsilon zeta. Eta theta iota. "
|
||||
"Kappa lambda mu. Nu xi omicron. Pi rho sigma. Tau upsilon phi. "
|
||||
"Chi psi omega. One two three. Four five six. Seven eight nine. "
|
||||
"Ten eleven twelve. Thirteen fourteen fifteen. "
|
||||
"Sixteen seventeen eighteen. Nineteen twenty."
|
||||
)
|
||||
assert len(section_text) > CHUNK_LIMIT
|
||||
|
||||
doc = _make_doc(
|
||||
sections=[Section(type=SectionType.TEXT, text=section_text, link="big-link")],
|
||||
)
|
||||
|
||||
chunks = dc.chunk(
|
||||
document=doc,
|
||||
sections=doc.processed_sections,
|
||||
title_prefix="",
|
||||
metadata_suffix_semantic="",
|
||||
metadata_suffix_keyword="",
|
||||
content_token_limit=CHUNK_LIMIT,
|
||||
)
|
||||
|
||||
assert len(chunks) >= 2
|
||||
# First chunk is fresh, rest are continuations
|
||||
assert chunks[0].section_continuation is False
|
||||
for c in chunks[1:]:
|
||||
assert c.section_continuation is True
|
||||
# Every produced chunk should carry the section's link
|
||||
for c in chunks:
|
||||
assert c.source_links == {0: "big-link"}
|
||||
# Concatenated content should roughly cover the original (allowing
|
||||
# for chunker boundary whitespace differences).
|
||||
joined = "".join(c.content for c in chunks)
|
||||
for word in ("Alpha", "omega", "twenty"):
|
||||
assert word in joined
|
||||
|
||||
|
||||
def test_oversized_section_flushes_pending_text_first() -> None:
|
||||
"""A buffered text section followed by an oversized section should
|
||||
flush the pending chunk first, then emit the split chunks."""
|
||||
dc = _make_document_chunker()
|
||||
pending = "Pending buffered text."
|
||||
big = (
|
||||
"Alpha beta gamma. Delta epsilon zeta. Eta theta iota. "
|
||||
"Kappa lambda mu. Nu xi omicron. Pi rho sigma. Tau upsilon phi. "
|
||||
"Chi psi omega. One two three. Four five six. Seven eight nine. "
|
||||
"Ten eleven twelve. Thirteen fourteen fifteen. Sixteen seventeen."
|
||||
)
|
||||
assert len(big) > CHUNK_LIMIT
|
||||
|
||||
doc = _make_doc(
|
||||
sections=[
|
||||
Section(type=SectionType.TEXT, text=pending, link="l-pending"),
|
||||
Section(type=SectionType.TEXT, text=big, link="l-big"),
|
||||
],
|
||||
)
|
||||
|
||||
chunks = dc.chunk(
|
||||
document=doc,
|
||||
sections=doc.processed_sections,
|
||||
title_prefix="",
|
||||
metadata_suffix_semantic="",
|
||||
metadata_suffix_keyword="",
|
||||
content_token_limit=CHUNK_LIMIT,
|
||||
)
|
||||
|
||||
# First chunk is the flushed pending text
|
||||
assert chunks[0].content == pending
|
||||
assert chunks[0].source_links == {0: "l-pending"}
|
||||
assert chunks[0].section_continuation is False
|
||||
|
||||
# Remaining chunks correspond to the oversized section
|
||||
assert len(chunks) >= 2
|
||||
for c in chunks[1:]:
|
||||
assert c.source_links == {0: "l-big"}
|
||||
# Within the oversized section, the first is fresh and the rest are
|
||||
# continuations.
|
||||
assert chunks[1].section_continuation is False
|
||||
for c in chunks[2:]:
|
||||
assert c.section_continuation is True
|
||||
|
||||
|
||||
# --- Title prefix / metadata propagation -------------------------------------
|
||||
|
||||
|
||||
def test_title_prefix_and_metadata_propagate_to_all_chunks() -> None:
|
||||
dc = _make_document_chunker()
|
||||
doc = _make_doc(
|
||||
sections=[
|
||||
Section(type=SectionType.TEXT, text="A" * 120, link="la"),
|
||||
Section(type=SectionType.TEXT, text="B" * 120, link="lb"),
|
||||
],
|
||||
)
|
||||
|
||||
chunks = dc.chunk(
|
||||
document=doc,
|
||||
sections=doc.processed_sections,
|
||||
title_prefix="MY_TITLE\n",
|
||||
metadata_suffix_semantic="MS",
|
||||
metadata_suffix_keyword="MK",
|
||||
content_token_limit=CHUNK_LIMIT,
|
||||
)
|
||||
|
||||
assert len(chunks) == 2
|
||||
for chunk in chunks:
|
||||
assert chunk.title_prefix == "MY_TITLE\n"
|
||||
assert chunk.metadata_suffix_semantic == "MS"
|
||||
assert chunk.metadata_suffix_keyword == "MK"
|
||||
|
||||
|
||||
# --- chunk_id monotonicity ---------------------------------------------------
|
||||
|
||||
|
||||
def test_chunk_ids_are_sequential_starting_at_zero() -> None:
|
||||
dc = _make_document_chunker()
|
||||
doc = _make_doc(
|
||||
sections=[
|
||||
Section(type=SectionType.TEXT, text="A" * 120, link="la"),
|
||||
Section(type=SectionType.TEXT, text="B" * 120, link="lb"),
|
||||
Section(type=SectionType.TEXT, text="C" * 120, link="lc"),
|
||||
],
|
||||
)
|
||||
|
||||
chunks = dc.chunk(
|
||||
document=doc,
|
||||
sections=doc.processed_sections,
|
||||
title_prefix="",
|
||||
metadata_suffix_semantic="",
|
||||
metadata_suffix_keyword="",
|
||||
content_token_limit=CHUNK_LIMIT,
|
||||
)
|
||||
|
||||
assert [c.chunk_id for c in chunks] == list(range(len(chunks)))
|
||||
|
||||
|
||||
# --- Overflow accumulation behavior ------------------------------------------
|
||||
|
||||
|
||||
def test_overflow_flush_then_subsequent_section_joins_new_chunk() -> None:
|
||||
"""After an overflow flush starts a new chunk, the next fitting section
|
||||
should combine into that same new chunk (not spawn a third)."""
|
||||
dc = _make_document_chunker()
|
||||
# 120 + 120 > 200 → first two sections produce two chunks.
|
||||
# Third section is small (20 chars) → should fit with second.
|
||||
doc = _make_doc(
|
||||
sections=[
|
||||
Section(type=SectionType.TEXT, text="A" * 120, link="la"),
|
||||
Section(type=SectionType.TEXT, text="B" * 120, link="lb"),
|
||||
Section(type=SectionType.TEXT, text="C" * 20, link="lc"),
|
||||
],
|
||||
)
|
||||
|
||||
chunks = dc.chunk(
|
||||
document=doc,
|
||||
sections=doc.processed_sections,
|
||||
title_prefix="",
|
||||
metadata_suffix_semantic="",
|
||||
metadata_suffix_keyword="",
|
||||
content_token_limit=CHUNK_LIMIT,
|
||||
)
|
||||
|
||||
assert len(chunks) == 2
|
||||
assert chunks[0].content == "A" * 120
|
||||
assert chunks[1].content == ("B" * 120) + SECTION_SEPARATOR + ("C" * 20)
|
||||
# link_offsets on second chunk: lb at 0, lc at precompare-len("BBBB...")=120
|
||||
assert chunks[1].source_links == {0: "lb", 120: "lc"}
|
||||
|
||||
|
||||
def test_small_section_after_oversized_starts_a_fresh_chunk() -> None:
|
||||
"""After an oversized section is emitted as its own chunks, the internal
|
||||
accumulator should be empty so a following small section starts a new
|
||||
chunk instead of being swallowed."""
|
||||
dc = _make_document_chunker()
|
||||
big = (
|
||||
"Alpha beta gamma. Delta epsilon zeta. Eta theta iota. "
|
||||
"Kappa lambda mu. Nu xi omicron. Pi rho sigma. Tau upsilon phi. "
|
||||
"Chi psi omega. One two three. Four five six. Seven eight nine. "
|
||||
"Ten eleven twelve. Thirteen fourteen fifteen. Sixteen seventeen."
|
||||
)
|
||||
assert len(big) > CHUNK_LIMIT
|
||||
doc = _make_doc(
|
||||
sections=[
|
||||
Section(type=SectionType.TEXT, text=big, link="l-big"),
|
||||
Section(type=SectionType.TEXT, text="Tail text.", link="l-tail"),
|
||||
],
|
||||
)
|
||||
|
||||
chunks = dc.chunk(
|
||||
document=doc,
|
||||
sections=doc.processed_sections,
|
||||
title_prefix="",
|
||||
metadata_suffix_semantic="",
|
||||
metadata_suffix_keyword="",
|
||||
content_token_limit=CHUNK_LIMIT,
|
||||
)
|
||||
|
||||
# All-but-last chunks belong to the oversized section; the very last is
|
||||
# the tail text starting fresh (not a continuation).
|
||||
assert len(chunks) >= 2
|
||||
assert chunks[-1].content == "Tail text."
|
||||
assert chunks[-1].source_links == {0: "l-tail"}
|
||||
assert chunks[-1].section_continuation is False
|
||||
# And earlier oversized chunks never leaked the tail link
|
||||
for c in chunks[:-1]:
|
||||
assert c.source_links == {0: "l-big"}
|
||||
|
||||
|
||||
# --- STRICT_CHUNK_TOKEN_LIMIT fallback path ----------------------------------
|
||||
|
||||
|
||||
def test_strict_chunk_token_limit_subdivides_oversized_split(
|
||||
monkeypatch: pytest.MonkeyPatch,
|
||||
) -> None:
|
||||
"""When STRICT_CHUNK_TOKEN_LIMIT is enabled and chonkie's chunk_splitter
|
||||
still produces a piece larger than content_token_limit (e.g. a single
|
||||
no-period run), the code must fall back to _split_oversized_chunk."""
|
||||
monkeypatch.setattr(text_chunker_module, "STRICT_CHUNK_TOKEN_LIMIT", True)
|
||||
dc = _make_document_chunker()
|
||||
# 500 non-whitespace chars with no sentence boundaries — chonkie will
|
||||
# return it as one oversized piece (>200) which triggers the fallback.
|
||||
run = "a" * 500
|
||||
doc = _make_doc(sections=[Section(type=SectionType.TEXT, text=run, link="l-run")])
|
||||
|
||||
chunks = dc.chunk(
|
||||
document=doc,
|
||||
sections=doc.processed_sections,
|
||||
title_prefix="",
|
||||
metadata_suffix_semantic="",
|
||||
metadata_suffix_keyword="",
|
||||
content_token_limit=CHUNK_LIMIT,
|
||||
)
|
||||
|
||||
# With CHUNK_LIMIT=200 and a 500-char run we expect ceil(500/200)=3 sub-chunks.
|
||||
assert len(chunks) == 3
|
||||
# First is fresh, rest are continuations (is_continuation=(j != 0))
|
||||
assert chunks[0].section_continuation is False
|
||||
assert chunks[1].section_continuation is True
|
||||
assert chunks[2].section_continuation is True
|
||||
# All carry the section link
|
||||
for c in chunks:
|
||||
assert c.source_links == {0: "l-run"}
|
||||
# NOTE: we do NOT assert the chunks are at or below content_token_limit.
|
||||
# _split_oversized_chunk joins tokens with " ", which means the resulting
|
||||
# chunk contents can exceed the limit when tokens are short. That's a
|
||||
# quirk of the current implementation and this test pins the window
|
||||
# slicing, not the post-join length.
|
||||
|
||||
|
||||
def test_strict_chunk_token_limit_disabled_allows_oversized_split(
|
||||
monkeypatch: pytest.MonkeyPatch,
|
||||
) -> None:
|
||||
"""Same pathological input, but with STRICT disabled: the oversized
|
||||
split is emitted verbatim as a single chunk (current behavior)."""
|
||||
monkeypatch.setattr(text_chunker_module, "STRICT_CHUNK_TOKEN_LIMIT", False)
|
||||
dc = _make_document_chunker()
|
||||
run = "a" * 500
|
||||
doc = _make_doc(sections=[Section(type=SectionType.TEXT, text=run, link="l-run")])
|
||||
|
||||
chunks = dc.chunk(
|
||||
document=doc,
|
||||
sections=doc.processed_sections,
|
||||
title_prefix="",
|
||||
metadata_suffix_semantic="",
|
||||
metadata_suffix_keyword="",
|
||||
content_token_limit=CHUNK_LIMIT,
|
||||
)
|
||||
|
||||
assert len(chunks) == 1
|
||||
assert chunks[0].content == run
|
||||
assert chunks[0].section_continuation is False
|
||||
|
||||
|
||||
# --- First-section-with-empty-text-but-document-has-title edge case ----------
|
||||
|
||||
|
||||
def test_first_empty_section_with_title_is_processed_not_skipped() -> None:
|
||||
"""The guard `(not document.title or section_idx > 0)` means: when
|
||||
the doc has a title AND it's the first section, an empty text section
|
||||
is NOT skipped. This pins current behavior so a refactor can't silently
|
||||
change it."""
|
||||
dc = _make_document_chunker()
|
||||
doc = _make_doc(
|
||||
sections=[
|
||||
Section(
|
||||
type=SectionType.TEXT, text="", link="l0"
|
||||
), # empty first section, kept
|
||||
Section(type=SectionType.TEXT, text="Real content.", link="l1"),
|
||||
],
|
||||
title="Has A Title",
|
||||
)
|
||||
|
||||
chunks = dc.chunk(
|
||||
document=doc,
|
||||
sections=doc.processed_sections,
|
||||
title_prefix="",
|
||||
metadata_suffix_semantic="",
|
||||
metadata_suffix_keyword="",
|
||||
content_token_limit=CHUNK_LIMIT,
|
||||
)
|
||||
|
||||
assert len(chunks) == 1
|
||||
assert chunks[0].content == "Real content."
|
||||
# First (empty) section did register a link_offset at 0 before being
|
||||
# overwritten; that offset is then reused when "Real content." is added,
|
||||
# because shared_precompare_cleanup("") is still "". End state: {0: "l1"}
|
||||
assert chunks[0].source_links == {0: "l1"}
|
||||
|
||||
|
||||
# --- clean_text is applied to section text -----------------------------------
|
||||
|
||||
|
||||
def test_clean_text_strips_control_chars_from_section_content() -> None:
|
||||
"""clean_text() should remove control chars before the text enters the
|
||||
accumulator — verifies the call isn't dropped by a refactor."""
|
||||
dc = _make_document_chunker()
|
||||
# NUL + BEL are control chars below 0x20 and not \n or \t → should be
|
||||
# stripped by clean_text.
|
||||
dirty = "Hello\x00 World\x07!"
|
||||
doc = _make_doc(sections=[Section(type=SectionType.TEXT, text=dirty, link="l1")])
|
||||
|
||||
chunks = dc.chunk(
|
||||
document=doc,
|
||||
sections=doc.processed_sections,
|
||||
title_prefix="",
|
||||
metadata_suffix_semantic="",
|
||||
metadata_suffix_keyword="",
|
||||
content_token_limit=CHUNK_LIMIT,
|
||||
)
|
||||
|
||||
assert len(chunks) == 1
|
||||
assert chunks[0].content == "Hello World!"
|
||||
|
||||
|
||||
# --- None-valued fields ------------------------------------------------------
|
||||
|
||||
|
||||
def test_section_with_none_text_behaves_like_empty_string() -> None:
|
||||
"""`section.text` may be None — the method coerces via
|
||||
`str(section.text or "")`, so a None-text section behaves identically
|
||||
to an empty one (skipped unless it's the first section of a titled doc)."""
|
||||
dc = _make_document_chunker()
|
||||
doc = _make_doc(
|
||||
sections=[
|
||||
Section(type=SectionType.TEXT, text="Alpha.", link="la"),
|
||||
Section(type=SectionType.TEXT, text=None, link="lnone"), # idx 1 → skipped
|
||||
Section(type=SectionType.TEXT, text="Beta.", link="lb"),
|
||||
],
|
||||
)
|
||||
|
||||
chunks = dc.chunk(
|
||||
document=doc,
|
||||
sections=doc.processed_sections,
|
||||
title_prefix="",
|
||||
metadata_suffix_semantic="",
|
||||
metadata_suffix_keyword="",
|
||||
content_token_limit=CHUNK_LIMIT,
|
||||
)
|
||||
|
||||
assert len(chunks) == 1
|
||||
assert "Alpha." in chunks[0].content
|
||||
assert "Beta." in chunks[0].content
|
||||
assert "lnone" not in (chunks[0].source_links or {}).values()
|
||||
|
||||
|
||||
# --- Trailing empty chunk suppression ----------------------------------------
|
||||
|
||||
|
||||
def test_no_trailing_empty_chunk_when_last_section_was_image() -> None:
|
||||
"""If the final section was an image (which emits its own chunk and
|
||||
resets chunk_text), the safety `or not chunks` branch should NOT fire
|
||||
because chunks is non-empty. Pin this explicitly."""
|
||||
dc = _make_document_chunker()
|
||||
doc = _make_doc(
|
||||
sections=[
|
||||
Section(type=SectionType.TEXT, text="Leading text.", link="ltext"),
|
||||
Section(
|
||||
type=SectionType.IMAGE,
|
||||
text="img summary",
|
||||
link="limg",
|
||||
image_file_id="img-final",
|
||||
),
|
||||
],
|
||||
)
|
||||
|
||||
chunks = dc.chunk(
|
||||
document=doc,
|
||||
sections=doc.processed_sections,
|
||||
title_prefix="",
|
||||
metadata_suffix_semantic="",
|
||||
metadata_suffix_keyword="",
|
||||
content_token_limit=CHUNK_LIMIT,
|
||||
)
|
||||
|
||||
assert len(chunks) == 2
|
||||
assert chunks[0].content == "Leading text."
|
||||
assert chunks[0].image_file_id is None
|
||||
assert chunks[1].content == "img summary"
|
||||
assert chunks[1].image_file_id == "img-final"
|
||||
# Crucially: no third empty chunk got appended at the end.
|
||||
|
||||
|
||||
def test_no_trailing_empty_chunk_when_last_section_was_oversized() -> None:
|
||||
"""Same guarantee for oversized sections: their splits fully clear the
|
||||
accumulator, and the trailing safety branch should be a no-op."""
|
||||
dc = _make_document_chunker()
|
||||
big = (
|
||||
"Alpha beta gamma. Delta epsilon zeta. Eta theta iota. "
|
||||
"Kappa lambda mu. Nu xi omicron. Pi rho sigma. Tau upsilon phi. "
|
||||
"Chi psi omega. One two three. Four five six. Seven eight nine. "
|
||||
"Ten eleven twelve. Thirteen fourteen fifteen. Sixteen seventeen."
|
||||
)
|
||||
assert len(big) > CHUNK_LIMIT
|
||||
doc = _make_doc(sections=[Section(type=SectionType.TEXT, text=big, link="l-big")])
|
||||
|
||||
chunks = dc.chunk(
|
||||
document=doc,
|
||||
sections=doc.processed_sections,
|
||||
title_prefix="",
|
||||
metadata_suffix_semantic="",
|
||||
metadata_suffix_keyword="",
|
||||
content_token_limit=CHUNK_LIMIT,
|
||||
)
|
||||
|
||||
# Every chunk should be non-empty — no dangling "" chunk at the tail.
|
||||
assert all(c.content.strip() for c in chunks)
|
||||
@@ -505,6 +505,7 @@ class TestGetLMStudioAvailableModels:
|
||||
|
||||
mock_session = MagicMock()
|
||||
mock_provider = MagicMock()
|
||||
mock_provider.api_base = "http://localhost:1234"
|
||||
mock_provider.custom_config = {"LM_STUDIO_API_KEY": "stored-secret"}
|
||||
|
||||
response = {
|
||||
|
||||
@@ -2,6 +2,7 @@
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from typing import Any
|
||||
from unittest.mock import MagicMock
|
||||
from unittest.mock import patch
|
||||
from uuid import uuid4
|
||||
@@ -9,7 +10,9 @@ from uuid import uuid4
|
||||
from fastapi import Response
|
||||
from sqlalchemy.exc import IntegrityError
|
||||
|
||||
from ee.onyx.server.scim.api import _check_seat_availability
|
||||
from ee.onyx.server.scim.api import _scim_name_to_str
|
||||
from ee.onyx.server.scim.api import _seat_lock_id_for_tenant
|
||||
from ee.onyx.server.scim.api import create_user
|
||||
from ee.onyx.server.scim.api import delete_user
|
||||
from ee.onyx.server.scim.api import get_user
|
||||
@@ -741,3 +744,80 @@ class TestEmailCasePreservation:
|
||||
resource = parse_scim_user(result)
|
||||
assert resource.userName == "Alice@Example.COM"
|
||||
assert resource.emails[0].value == "Alice@Example.COM"
|
||||
|
||||
|
||||
class TestSeatLock:
|
||||
"""Tests for the advisory lock in _check_seat_availability."""
|
||||
|
||||
@patch("ee.onyx.server.scim.api.get_current_tenant_id", return_value="tenant_abc")
|
||||
def test_acquires_advisory_lock_before_checking(
|
||||
self,
|
||||
_mock_tenant: MagicMock,
|
||||
mock_dal: MagicMock,
|
||||
) -> None:
|
||||
"""The advisory lock must be acquired before the seat check runs."""
|
||||
call_order: list[str] = []
|
||||
|
||||
def track_execute(stmt: Any, _params: Any = None) -> None:
|
||||
if "pg_advisory_xact_lock" in str(stmt):
|
||||
call_order.append("lock")
|
||||
|
||||
mock_dal.session.execute.side_effect = track_execute
|
||||
|
||||
with patch(
|
||||
"ee.onyx.server.scim.api.fetch_ee_implementation_or_noop"
|
||||
) as mock_fetch:
|
||||
mock_result = MagicMock()
|
||||
mock_result.available = True
|
||||
mock_fn = MagicMock(return_value=mock_result)
|
||||
mock_fetch.return_value = mock_fn
|
||||
|
||||
def track_check(*_args: Any, **_kwargs: Any) -> Any:
|
||||
call_order.append("check")
|
||||
return mock_result
|
||||
|
||||
mock_fn.side_effect = track_check
|
||||
|
||||
_check_seat_availability(mock_dal)
|
||||
|
||||
assert call_order == ["lock", "check"]
|
||||
|
||||
@patch("ee.onyx.server.scim.api.get_current_tenant_id", return_value="tenant_xyz")
|
||||
def test_lock_uses_tenant_scoped_key(
|
||||
self,
|
||||
_mock_tenant: MagicMock,
|
||||
mock_dal: MagicMock,
|
||||
) -> None:
|
||||
"""The lock id must be derived from the tenant via _seat_lock_id_for_tenant."""
|
||||
mock_result = MagicMock()
|
||||
mock_result.available = True
|
||||
mock_check = MagicMock(return_value=mock_result)
|
||||
|
||||
with patch(
|
||||
"ee.onyx.server.scim.api.fetch_ee_implementation_or_noop",
|
||||
return_value=mock_check,
|
||||
):
|
||||
_check_seat_availability(mock_dal)
|
||||
|
||||
mock_dal.session.execute.assert_called_once()
|
||||
params = mock_dal.session.execute.call_args[0][1]
|
||||
assert params["lock_id"] == _seat_lock_id_for_tenant("tenant_xyz")
|
||||
|
||||
def test_seat_lock_id_is_stable_and_tenant_scoped(self) -> None:
|
||||
"""Lock id must be deterministic and differ across tenants."""
|
||||
assert _seat_lock_id_for_tenant("t1") == _seat_lock_id_for_tenant("t1")
|
||||
assert _seat_lock_id_for_tenant("t1") != _seat_lock_id_for_tenant("t2")
|
||||
|
||||
def test_no_lock_when_ee_absent(
|
||||
self,
|
||||
mock_dal: MagicMock,
|
||||
) -> None:
|
||||
"""No advisory lock should be acquired when the EE check is absent."""
|
||||
with patch(
|
||||
"ee.onyx.server.scim.api.fetch_ee_implementation_or_noop",
|
||||
return_value=None,
|
||||
):
|
||||
result = _check_seat_availability(mock_dal)
|
||||
|
||||
assert result is None
|
||||
mock_dal.session.execute.assert_not_called()
|
||||
|
||||
128
backend/tests/unit/server/metrics/test_pruning_metrics.py
Normal file
128
backend/tests/unit/server/metrics/test_pruning_metrics.py
Normal file
@@ -0,0 +1,128 @@
|
||||
"""Tests for pruning-specific Prometheus metrics."""
|
||||
|
||||
import pytest
|
||||
|
||||
from onyx.server.metrics.pruning_metrics import inc_pruning_rate_limit_error
|
||||
from onyx.server.metrics.pruning_metrics import observe_pruning_diff_duration
|
||||
from onyx.server.metrics.pruning_metrics import observe_pruning_enumeration_duration
|
||||
from onyx.server.metrics.pruning_metrics import PRUNING_DIFF_DURATION
|
||||
from onyx.server.metrics.pruning_metrics import PRUNING_ENUMERATION_DURATION
|
||||
from onyx.server.metrics.pruning_metrics import PRUNING_RATE_LIMIT_ERRORS
|
||||
|
||||
|
||||
class TestObservePruningEnumerationDuration:
|
||||
def test_observes_duration(self) -> None:
|
||||
before = PRUNING_ENUMERATION_DURATION.labels(
|
||||
connector_type="google_drive"
|
||||
)._sum.get()
|
||||
|
||||
observe_pruning_enumeration_duration(10.0, "google_drive")
|
||||
|
||||
after = PRUNING_ENUMERATION_DURATION.labels(
|
||||
connector_type="google_drive"
|
||||
)._sum.get()
|
||||
assert after == pytest.approx(before + 10.0)
|
||||
|
||||
def test_labels_by_connector_type(self) -> None:
|
||||
before_gd = PRUNING_ENUMERATION_DURATION.labels(
|
||||
connector_type="google_drive"
|
||||
)._sum.get()
|
||||
before_conf = PRUNING_ENUMERATION_DURATION.labels(
|
||||
connector_type="confluence"
|
||||
)._sum.get()
|
||||
|
||||
observe_pruning_enumeration_duration(5.0, "google_drive")
|
||||
|
||||
after_gd = PRUNING_ENUMERATION_DURATION.labels(
|
||||
connector_type="google_drive"
|
||||
)._sum.get()
|
||||
after_conf = PRUNING_ENUMERATION_DURATION.labels(
|
||||
connector_type="confluence"
|
||||
)._sum.get()
|
||||
|
||||
assert after_gd == pytest.approx(before_gd + 5.0)
|
||||
assert after_conf == pytest.approx(before_conf)
|
||||
|
||||
def test_does_not_raise_on_exception(self, monkeypatch: pytest.MonkeyPatch) -> None:
|
||||
monkeypatch.setattr(
|
||||
PRUNING_ENUMERATION_DURATION,
|
||||
"labels",
|
||||
lambda **_: (_ for _ in ()).throw(RuntimeError("boom")),
|
||||
)
|
||||
observe_pruning_enumeration_duration(1.0, "google_drive")
|
||||
|
||||
|
||||
class TestObservePruningDiffDuration:
|
||||
def test_observes_duration(self) -> None:
|
||||
before = PRUNING_DIFF_DURATION.labels(connector_type="confluence")._sum.get()
|
||||
|
||||
observe_pruning_diff_duration(3.0, "confluence")
|
||||
|
||||
after = PRUNING_DIFF_DURATION.labels(connector_type="confluence")._sum.get()
|
||||
assert after == pytest.approx(before + 3.0)
|
||||
|
||||
def test_labels_by_connector_type(self) -> None:
|
||||
before_conf = PRUNING_DIFF_DURATION.labels(
|
||||
connector_type="confluence"
|
||||
)._sum.get()
|
||||
before_slack = PRUNING_DIFF_DURATION.labels(connector_type="slack")._sum.get()
|
||||
|
||||
observe_pruning_diff_duration(2.0, "confluence")
|
||||
|
||||
after_conf = PRUNING_DIFF_DURATION.labels(
|
||||
connector_type="confluence"
|
||||
)._sum.get()
|
||||
after_slack = PRUNING_DIFF_DURATION.labels(connector_type="slack")._sum.get()
|
||||
|
||||
assert after_conf == pytest.approx(before_conf + 2.0)
|
||||
assert after_slack == pytest.approx(before_slack)
|
||||
|
||||
def test_does_not_raise_on_exception(self, monkeypatch: pytest.MonkeyPatch) -> None:
|
||||
monkeypatch.setattr(
|
||||
PRUNING_DIFF_DURATION,
|
||||
"labels",
|
||||
lambda **_: (_ for _ in ()).throw(RuntimeError("boom")),
|
||||
)
|
||||
observe_pruning_diff_duration(1.0, "confluence")
|
||||
|
||||
|
||||
class TestIncPruningRateLimitError:
|
||||
def test_increments_counter(self) -> None:
|
||||
before = PRUNING_RATE_LIMIT_ERRORS.labels(
|
||||
connector_type="google_drive"
|
||||
)._value.get()
|
||||
|
||||
inc_pruning_rate_limit_error("google_drive")
|
||||
|
||||
after = PRUNING_RATE_LIMIT_ERRORS.labels(
|
||||
connector_type="google_drive"
|
||||
)._value.get()
|
||||
assert after == before + 1
|
||||
|
||||
def test_labels_by_connector_type(self) -> None:
|
||||
before_gd = PRUNING_RATE_LIMIT_ERRORS.labels(
|
||||
connector_type="google_drive"
|
||||
)._value.get()
|
||||
before_jira = PRUNING_RATE_LIMIT_ERRORS.labels(
|
||||
connector_type="jira"
|
||||
)._value.get()
|
||||
|
||||
inc_pruning_rate_limit_error("google_drive")
|
||||
|
||||
after_gd = PRUNING_RATE_LIMIT_ERRORS.labels(
|
||||
connector_type="google_drive"
|
||||
)._value.get()
|
||||
after_jira = PRUNING_RATE_LIMIT_ERRORS.labels(
|
||||
connector_type="jira"
|
||||
)._value.get()
|
||||
|
||||
assert after_gd == before_gd + 1
|
||||
assert after_jira == before_jira
|
||||
|
||||
def test_does_not_raise_on_exception(self, monkeypatch: pytest.MonkeyPatch) -> None:
|
||||
monkeypatch.setattr(
|
||||
PRUNING_RATE_LIMIT_ERRORS,
|
||||
"labels",
|
||||
lambda **_: (_ for _ in ()).throw(RuntimeError("boom")),
|
||||
)
|
||||
inc_pruning_rate_limit_error("google_drive")
|
||||
@@ -1,3 +1,17 @@
|
||||
# OAuth callback page must be served by the web server (Next.js),
|
||||
# not the MCP server. Exact match takes priority over the regex below.
|
||||
location = /mcp/oauth/callback {
|
||||
proxy_set_header X-Real-IP $remote_addr;
|
||||
proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;
|
||||
proxy_set_header X-Forwarded-Proto $scheme;
|
||||
proxy_set_header X-Forwarded-Host $host;
|
||||
proxy_set_header X-Forwarded-Port $server_port;
|
||||
proxy_set_header Host $host;
|
||||
proxy_http_version 1.1;
|
||||
proxy_redirect off;
|
||||
proxy_pass http://web_server;
|
||||
}
|
||||
|
||||
# MCP Server - Model Context Protocol for LLM integrations
|
||||
# Match /mcp, /mcp/, or /mcp/* but NOT /mcpserver, /mcpapi, etc.
|
||||
location ~ ^/mcp(/.*)?$ {
|
||||
|
||||
@@ -19,6 +19,6 @@ dependencies:
|
||||
version: 5.4.0
|
||||
- name: code-interpreter
|
||||
repository: https://onyx-dot-app.github.io/python-sandbox/
|
||||
version: 0.3.2
|
||||
digest: sha256:74908ea45ace2b4be913ff762772e6d87e40bab64e92c6662aa51730eaeb9d87
|
||||
generated: "2026-04-06T15:34:02.597166-07:00"
|
||||
version: 0.3.3
|
||||
digest: sha256:a57f29088b1624a72f6c70e4c3ccc2f2aad675e4624278c4e9be92083d6d5dad
|
||||
generated: "2026-04-08T16:47:29.33368-07:00"
|
||||
|
||||
@@ -5,7 +5,7 @@ home: https://www.onyx.app/
|
||||
sources:
|
||||
- "https://github.com/onyx-dot-app/onyx"
|
||||
type: application
|
||||
version: 0.4.40
|
||||
version: 0.4.44
|
||||
appVersion: latest
|
||||
annotations:
|
||||
category: Productivity
|
||||
@@ -45,6 +45,6 @@ dependencies:
|
||||
repository: https://charts.min.io/
|
||||
condition: minio.enabled
|
||||
- name: code-interpreter
|
||||
version: 0.3.2
|
||||
version: 0.3.3
|
||||
repository: https://onyx-dot-app.github.io/python-sandbox/
|
||||
condition: codeInterpreter.enabled
|
||||
|
||||
@@ -0,0 +1,349 @@
|
||||
{
|
||||
"annotations": {
|
||||
"list": [
|
||||
{
|
||||
"builtIn": 1,
|
||||
"datasource": { "type": "grafana", "uid": "-- Grafana --" },
|
||||
"enable": true,
|
||||
"hide": true,
|
||||
"iconColor": "rgba(0, 211, 255, 1)",
|
||||
"name": "Annotations & Alerts",
|
||||
"type": "dashboard"
|
||||
}
|
||||
]
|
||||
},
|
||||
"editable": true,
|
||||
"fiscalYearStartMonth": 0,
|
||||
"graphTooltip": 1,
|
||||
"id": null,
|
||||
"links": [],
|
||||
"liveNow": true,
|
||||
"panels": [
|
||||
{
|
||||
"title": "Client-Side Search Latency (P50 / P95 / P99)",
|
||||
"description": "End-to-end latency as measured by the Python client, including network round-trip and serialization overhead.",
|
||||
"type": "timeseries",
|
||||
"gridPos": { "h": 10, "w": 12, "x": 0, "y": 0 },
|
||||
"id": 1,
|
||||
"datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS}" },
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"color": { "mode": "palette-classic" },
|
||||
"custom": {
|
||||
"axisBorderShow": false,
|
||||
"axisCenteredZero": false,
|
||||
"axisLabel": "seconds",
|
||||
"axisPlacement": "auto",
|
||||
"drawStyle": "line",
|
||||
"fillOpacity": 0,
|
||||
"gradientMode": "none",
|
||||
"lineInterpolation": "smooth",
|
||||
"lineWidth": 2,
|
||||
"pointSize": 5,
|
||||
"scaleDistribution": { "type": "linear" },
|
||||
"showPoints": "never",
|
||||
"spanNulls": false,
|
||||
"stacking": { "group": "A", "mode": "none" },
|
||||
"thresholdsStyle": { "mode": "dashed" }
|
||||
},
|
||||
"thresholds": {
|
||||
"mode": "absolute",
|
||||
"steps": [
|
||||
{ "color": "green", "value": null },
|
||||
{ "color": "yellow", "value": 0.5 },
|
||||
{ "color": "red", "value": 2.0 }
|
||||
]
|
||||
},
|
||||
"unit": "s",
|
||||
"min": 0
|
||||
},
|
||||
"overrides": []
|
||||
},
|
||||
"targets": [
|
||||
{
|
||||
"datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS}" },
|
||||
"expr": "histogram_quantile(0.5, sum by (le) (rate(onyx_opensearch_search_client_duration_seconds_bucket[5m])))",
|
||||
"legendFormat": "P50",
|
||||
"refId": "A"
|
||||
},
|
||||
{
|
||||
"datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS}" },
|
||||
"expr": "histogram_quantile(0.95, sum by (le) (rate(onyx_opensearch_search_client_duration_seconds_bucket[5m])))",
|
||||
"legendFormat": "P95",
|
||||
"refId": "B"
|
||||
},
|
||||
{
|
||||
"datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS}" },
|
||||
"expr": "histogram_quantile(0.99, sum by (le) (rate(onyx_opensearch_search_client_duration_seconds_bucket[5m])))",
|
||||
"legendFormat": "P99",
|
||||
"refId": "C"
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
"title": "Server-Side Search Latency (P50 / P95 / P99)",
|
||||
"description": "OpenSearch server-side execution time from the 'took' field in the response. Does not include network or client-side overhead.",
|
||||
"type": "timeseries",
|
||||
"gridPos": { "h": 10, "w": 12, "x": 12, "y": 0 },
|
||||
"id": 2,
|
||||
"datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS}" },
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"color": { "mode": "palette-classic" },
|
||||
"custom": {
|
||||
"axisBorderShow": false,
|
||||
"axisCenteredZero": false,
|
||||
"axisLabel": "seconds",
|
||||
"axisPlacement": "auto",
|
||||
"drawStyle": "line",
|
||||
"fillOpacity": 0,
|
||||
"gradientMode": "none",
|
||||
"lineInterpolation": "smooth",
|
||||
"lineWidth": 2,
|
||||
"pointSize": 5,
|
||||
"scaleDistribution": { "type": "linear" },
|
||||
"showPoints": "never",
|
||||
"spanNulls": false,
|
||||
"stacking": { "group": "A", "mode": "none" },
|
||||
"thresholdsStyle": { "mode": "dashed" }
|
||||
},
|
||||
"thresholds": {
|
||||
"mode": "absolute",
|
||||
"steps": [
|
||||
{ "color": "green", "value": null },
|
||||
{ "color": "yellow", "value": 0.5 },
|
||||
{ "color": "red", "value": 2.0 }
|
||||
]
|
||||
},
|
||||
"unit": "s",
|
||||
"min": 0
|
||||
},
|
||||
"overrides": []
|
||||
},
|
||||
"targets": [
|
||||
{
|
||||
"datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS}" },
|
||||
"expr": "histogram_quantile(0.5, sum by (le) (rate(onyx_opensearch_search_server_duration_seconds_bucket[5m])))",
|
||||
"legendFormat": "P50",
|
||||
"refId": "A"
|
||||
},
|
||||
{
|
||||
"datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS}" },
|
||||
"expr": "histogram_quantile(0.95, sum by (le) (rate(onyx_opensearch_search_server_duration_seconds_bucket[5m])))",
|
||||
"legendFormat": "P95",
|
||||
"refId": "B"
|
||||
},
|
||||
{
|
||||
"datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS}" },
|
||||
"expr": "histogram_quantile(0.99, sum by (le) (rate(onyx_opensearch_search_server_duration_seconds_bucket[5m])))",
|
||||
"legendFormat": "P99",
|
||||
"refId": "C"
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
"title": "Client-Side Latency by Search Type (P95)",
|
||||
"description": "P95 client-side latency broken down by search type (hybrid, keyword, semantic, random, doc_id_retrieval).",
|
||||
"type": "timeseries",
|
||||
"gridPos": { "h": 10, "w": 12, "x": 0, "y": 10 },
|
||||
"id": 3,
|
||||
"datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS}" },
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"color": { "mode": "palette-classic" },
|
||||
"custom": {
|
||||
"axisBorderShow": false,
|
||||
"axisCenteredZero": false,
|
||||
"axisLabel": "seconds",
|
||||
"axisPlacement": "auto",
|
||||
"drawStyle": "line",
|
||||
"fillOpacity": 0,
|
||||
"gradientMode": "none",
|
||||
"lineInterpolation": "smooth",
|
||||
"lineWidth": 2,
|
||||
"pointSize": 5,
|
||||
"scaleDistribution": { "type": "linear" },
|
||||
"showPoints": "never",
|
||||
"spanNulls": false,
|
||||
"stacking": { "group": "A", "mode": "none" },
|
||||
"thresholdsStyle": { "mode": "off" }
|
||||
},
|
||||
"unit": "s",
|
||||
"min": 0
|
||||
},
|
||||
"overrides": []
|
||||
},
|
||||
"targets": [
|
||||
{
|
||||
"datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS}" },
|
||||
"expr": "histogram_quantile(0.95, sum by (search_type, le) (rate(onyx_opensearch_search_client_duration_seconds_bucket[5m])))",
|
||||
"legendFormat": "{{ search_type }}",
|
||||
"refId": "A"
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
"title": "Search Throughput by Type",
|
||||
"description": "Searches per second broken down by search type.",
|
||||
"type": "timeseries",
|
||||
"gridPos": { "h": 10, "w": 12, "x": 12, "y": 10 },
|
||||
"id": 4,
|
||||
"datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS}" },
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"color": { "mode": "palette-classic" },
|
||||
"custom": {
|
||||
"axisBorderShow": false,
|
||||
"axisCenteredZero": false,
|
||||
"axisLabel": "searches/s",
|
||||
"axisPlacement": "auto",
|
||||
"drawStyle": "line",
|
||||
"fillOpacity": 0,
|
||||
"gradientMode": "none",
|
||||
"lineInterpolation": "smooth",
|
||||
"lineWidth": 2,
|
||||
"pointSize": 5,
|
||||
"scaleDistribution": { "type": "linear" },
|
||||
"showPoints": "never",
|
||||
"spanNulls": false,
|
||||
"stacking": { "group": "A", "mode": "normal" },
|
||||
"thresholdsStyle": { "mode": "off" }
|
||||
},
|
||||
"unit": "ops",
|
||||
"min": 0
|
||||
},
|
||||
"overrides": []
|
||||
},
|
||||
"targets": [
|
||||
{
|
||||
"datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS}" },
|
||||
"expr": "sum by (search_type) (rate(onyx_opensearch_search_total[5m]))",
|
||||
"legendFormat": "{{ search_type }}",
|
||||
"refId": "A"
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
"title": "Concurrent Searches In Progress",
|
||||
"description": "Number of OpenSearch searches currently in flight, broken down by search type. Summed across all instances.",
|
||||
"type": "timeseries",
|
||||
"gridPos": { "h": 10, "w": 12, "x": 0, "y": 20 },
|
||||
"id": 5,
|
||||
"datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS}" },
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"color": { "mode": "palette-classic" },
|
||||
"custom": {
|
||||
"axisBorderShow": false,
|
||||
"axisCenteredZero": false,
|
||||
"axisLabel": "searches",
|
||||
"axisPlacement": "auto",
|
||||
"drawStyle": "line",
|
||||
"fillOpacity": 0,
|
||||
"gradientMode": "none",
|
||||
"lineInterpolation": "smooth",
|
||||
"lineWidth": 2,
|
||||
"pointSize": 5,
|
||||
"scaleDistribution": { "type": "linear" },
|
||||
"showPoints": "never",
|
||||
"spanNulls": false,
|
||||
"stacking": { "group": "A", "mode": "normal" },
|
||||
"thresholdsStyle": { "mode": "off" }
|
||||
},
|
||||
"min": 0
|
||||
},
|
||||
"overrides": []
|
||||
},
|
||||
"targets": [
|
||||
{
|
||||
"datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS}" },
|
||||
"expr": "sum by (search_type) (onyx_opensearch_searches_in_progress)",
|
||||
"legendFormat": "{{ search_type }}",
|
||||
"refId": "A"
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
"title": "Client vs Server Latency Overhead (P50)",
|
||||
"description": "Difference between client-side and server-side P50 latency. Reveals network, serialization, and untracked OpenSearch overhead.",
|
||||
"type": "timeseries",
|
||||
"gridPos": { "h": 10, "w": 12, "x": 12, "y": 20 },
|
||||
"id": 6,
|
||||
"datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS}" },
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"color": { "mode": "palette-classic" },
|
||||
"custom": {
|
||||
"axisBorderShow": false,
|
||||
"axisCenteredZero": false,
|
||||
"axisLabel": "seconds",
|
||||
"axisPlacement": "auto",
|
||||
"drawStyle": "line",
|
||||
"fillOpacity": 0,
|
||||
"gradientMode": "none",
|
||||
"lineInterpolation": "smooth",
|
||||
"lineWidth": 2,
|
||||
"pointSize": 5,
|
||||
"scaleDistribution": { "type": "linear" },
|
||||
"showPoints": "never",
|
||||
"spanNulls": false,
|
||||
"stacking": { "group": "A", "mode": "none" },
|
||||
"thresholdsStyle": { "mode": "off" }
|
||||
},
|
||||
"unit": "s",
|
||||
"min": 0
|
||||
},
|
||||
"overrides": []
|
||||
},
|
||||
"targets": [
|
||||
{
|
||||
"datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS}" },
|
||||
"expr": "histogram_quantile(0.5, sum by (le) (rate(onyx_opensearch_search_client_duration_seconds_bucket[5m]))) - histogram_quantile(0.5, sum by (le) (rate(onyx_opensearch_search_server_duration_seconds_bucket[5m])))",
|
||||
"legendFormat": "Client - Server overhead (P50)",
|
||||
"refId": "A"
|
||||
},
|
||||
{
|
||||
"datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS}" },
|
||||
"expr": "histogram_quantile(0.5, sum by (le) (rate(onyx_opensearch_search_client_duration_seconds_bucket[5m])))",
|
||||
"legendFormat": "Client P50",
|
||||
"refId": "B"
|
||||
},
|
||||
{
|
||||
"datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS}" },
|
||||
"expr": "histogram_quantile(0.5, sum by (le) (rate(onyx_opensearch_search_server_duration_seconds_bucket[5m])))",
|
||||
"legendFormat": "Server P50",
|
||||
"refId": "C"
|
||||
}
|
||||
]
|
||||
}
|
||||
],
|
||||
"refresh": "5s",
|
||||
"schemaVersion": 37,
|
||||
"style": "dark",
|
||||
"tags": ["onyx", "opensearch", "search", "latency"],
|
||||
"templating": {
|
||||
"list": [
|
||||
{
|
||||
"current": {
|
||||
"text": "Prometheus",
|
||||
"value": "prometheus"
|
||||
},
|
||||
"includeAll": false,
|
||||
"name": "DS_PROMETHEUS",
|
||||
"options": [],
|
||||
"query": "prometheus",
|
||||
"refresh": 1,
|
||||
"type": "datasource"
|
||||
}
|
||||
]
|
||||
},
|
||||
"time": { "from": "now-60m", "to": "now" },
|
||||
"timepicker": {
|
||||
"refresh_intervals": ["5s", "10s", "30s", "1m"]
|
||||
},
|
||||
"timezone": "",
|
||||
"title": "Onyx OpenSearch Search Latency",
|
||||
"uid": "onyx-opensearch-search-latency",
|
||||
"version": 0,
|
||||
"weekStart": ""
|
||||
}
|
||||
606
deployment/helm/charts/onyx/dashboards/redis-queues.json
Normal file
606
deployment/helm/charts/onyx/dashboards/redis-queues.json
Normal file
@@ -0,0 +1,606 @@
|
||||
{
|
||||
"id": null,
|
||||
"annotations": {
|
||||
"list": [
|
||||
{
|
||||
"builtIn": 1,
|
||||
"datasource": {
|
||||
"type": "grafana",
|
||||
"uid": "-- Grafana --"
|
||||
},
|
||||
"enable": true,
|
||||
"hide": true,
|
||||
"iconColor": "rgba(0, 211, 255, 1)",
|
||||
"name": "Annotations & Alerts",
|
||||
"type": "dashboard"
|
||||
}
|
||||
]
|
||||
},
|
||||
"editable": true,
|
||||
"fiscalYearStartMonth": 0,
|
||||
"graphTooltip": 0,
|
||||
"links": [],
|
||||
"panels": [
|
||||
{
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "${DS_PROMETHEUS}"
|
||||
},
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"color": {
|
||||
"mode": "palette-classic"
|
||||
},
|
||||
"custom": {
|
||||
"axisBorderShow": false,
|
||||
"axisCenteredZero": false,
|
||||
"axisColorMode": "text",
|
||||
"axisLabel": "",
|
||||
"axisPlacement": "auto",
|
||||
"barAlignment": 0,
|
||||
"drawStyle": "line",
|
||||
"fillOpacity": 18,
|
||||
"gradientMode": "none",
|
||||
"hideFrom": {
|
||||
"legend": false,
|
||||
"tooltip": false,
|
||||
"viz": false
|
||||
},
|
||||
"insertNulls": false,
|
||||
"lineInterpolation": "linear",
|
||||
"lineWidth": 2,
|
||||
"pointSize": 4,
|
||||
"scaleDistribution": {
|
||||
"type": "linear"
|
||||
},
|
||||
"showPoints": "never",
|
||||
"spanNulls": false,
|
||||
"stacking": {
|
||||
"group": "A",
|
||||
"mode": "none"
|
||||
},
|
||||
"thresholdsStyle": {
|
||||
"mode": "off"
|
||||
}
|
||||
},
|
||||
"mappings": [],
|
||||
"thresholds": {
|
||||
"mode": "absolute",
|
||||
"steps": [
|
||||
{
|
||||
"color": "green",
|
||||
"value": null
|
||||
},
|
||||
{
|
||||
"color": "orange",
|
||||
"value": 10
|
||||
},
|
||||
{
|
||||
"color": "red",
|
||||
"value": 50
|
||||
}
|
||||
]
|
||||
}
|
||||
},
|
||||
"overrides": []
|
||||
},
|
||||
"gridPos": {
|
||||
"h": 10,
|
||||
"w": 24,
|
||||
"x": 0,
|
||||
"y": 0
|
||||
},
|
||||
"id": 1,
|
||||
"options": {
|
||||
"legend": {
|
||||
"calcs": ["lastNotNull", "max"],
|
||||
"displayMode": "table",
|
||||
"placement": "right",
|
||||
"showLegend": true
|
||||
},
|
||||
"tooltip": {
|
||||
"mode": "multi",
|
||||
"sort": "desc"
|
||||
}
|
||||
},
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "${DS_PROMETHEUS}"
|
||||
},
|
||||
"expr": "onyx_celery_queue_depth{queue=~\"$queue\"}",
|
||||
"legendFormat": "{{queue}}",
|
||||
"range": true,
|
||||
"refId": "A"
|
||||
}
|
||||
],
|
||||
"title": "Queue Depth by Queue",
|
||||
"type": "timeseries"
|
||||
},
|
||||
{
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "${DS_PROMETHEUS}"
|
||||
},
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"color": {
|
||||
"mode": "thresholds"
|
||||
},
|
||||
"mappings": [],
|
||||
"thresholds": {
|
||||
"mode": "absolute",
|
||||
"steps": [
|
||||
{
|
||||
"color": "green",
|
||||
"value": null
|
||||
},
|
||||
{
|
||||
"color": "orange",
|
||||
"value": 20
|
||||
},
|
||||
{
|
||||
"color": "red",
|
||||
"value": 100
|
||||
}
|
||||
]
|
||||
}
|
||||
},
|
||||
"overrides": []
|
||||
},
|
||||
"gridPos": {
|
||||
"h": 4,
|
||||
"w": 6,
|
||||
"x": 0,
|
||||
"y": 10
|
||||
},
|
||||
"id": 2,
|
||||
"options": {
|
||||
"colorMode": "background",
|
||||
"graphMode": "area",
|
||||
"justifyMode": "auto",
|
||||
"orientation": "auto",
|
||||
"reduceOptions": {
|
||||
"calcs": ["lastNotNull"],
|
||||
"fields": "",
|
||||
"values": false
|
||||
},
|
||||
"textMode": "auto"
|
||||
},
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "${DS_PROMETHEUS}"
|
||||
},
|
||||
"expr": "sum(onyx_celery_queue_depth)",
|
||||
"refId": "A"
|
||||
}
|
||||
],
|
||||
"title": "Total Queued Tasks",
|
||||
"type": "stat"
|
||||
},
|
||||
{
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "${DS_PROMETHEUS}"
|
||||
},
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"color": {
|
||||
"mode": "thresholds"
|
||||
},
|
||||
"mappings": [],
|
||||
"thresholds": {
|
||||
"mode": "absolute",
|
||||
"steps": [
|
||||
{
|
||||
"color": "green",
|
||||
"value": null
|
||||
},
|
||||
{
|
||||
"color": "orange",
|
||||
"value": 20
|
||||
},
|
||||
{
|
||||
"color": "red",
|
||||
"value": 100
|
||||
}
|
||||
]
|
||||
}
|
||||
},
|
||||
"overrides": []
|
||||
},
|
||||
"gridPos": {
|
||||
"h": 4,
|
||||
"w": 6,
|
||||
"x": 6,
|
||||
"y": 10
|
||||
},
|
||||
"id": 3,
|
||||
"options": {
|
||||
"colorMode": "background",
|
||||
"graphMode": "area",
|
||||
"justifyMode": "auto",
|
||||
"orientation": "auto",
|
||||
"reduceOptions": {
|
||||
"calcs": ["lastNotNull"],
|
||||
"fields": "",
|
||||
"values": false
|
||||
},
|
||||
"textMode": "auto"
|
||||
},
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "${DS_PROMETHEUS}"
|
||||
},
|
||||
"expr": "onyx_celery_unacked_tasks",
|
||||
"refId": "A"
|
||||
}
|
||||
],
|
||||
"title": "Unacked Tasks",
|
||||
"type": "stat"
|
||||
},
|
||||
{
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "${DS_PROMETHEUS}"
|
||||
},
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"color": {
|
||||
"mode": "thresholds"
|
||||
},
|
||||
"mappings": [],
|
||||
"thresholds": {
|
||||
"mode": "absolute",
|
||||
"steps": [
|
||||
{
|
||||
"color": "green",
|
||||
"value": null
|
||||
},
|
||||
{
|
||||
"color": "orange",
|
||||
"value": 10
|
||||
},
|
||||
{
|
||||
"color": "red",
|
||||
"value": 50
|
||||
}
|
||||
]
|
||||
}
|
||||
},
|
||||
"overrides": []
|
||||
},
|
||||
"gridPos": {
|
||||
"h": 4,
|
||||
"w": 6,
|
||||
"x": 12,
|
||||
"y": 10
|
||||
},
|
||||
"id": 4,
|
||||
"options": {
|
||||
"colorMode": "background",
|
||||
"graphMode": "none",
|
||||
"justifyMode": "center",
|
||||
"orientation": "auto",
|
||||
"reduceOptions": {
|
||||
"calcs": ["lastNotNull"],
|
||||
"fields": "",
|
||||
"values": false
|
||||
},
|
||||
"textMode": "auto"
|
||||
},
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "${DS_PROMETHEUS}"
|
||||
},
|
||||
"expr": "onyx_celery_queue_depth{queue=\"docprocessing\"}",
|
||||
"refId": "A"
|
||||
}
|
||||
],
|
||||
"title": "Docprocessing Queue",
|
||||
"type": "stat"
|
||||
},
|
||||
{
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "${DS_PROMETHEUS}"
|
||||
},
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"color": {
|
||||
"mode": "thresholds"
|
||||
},
|
||||
"mappings": [],
|
||||
"thresholds": {
|
||||
"mode": "absolute",
|
||||
"steps": [
|
||||
{
|
||||
"color": "green",
|
||||
"value": null
|
||||
},
|
||||
{
|
||||
"color": "orange",
|
||||
"value": 10
|
||||
},
|
||||
{
|
||||
"color": "red",
|
||||
"value": 50
|
||||
}
|
||||
]
|
||||
}
|
||||
},
|
||||
"overrides": []
|
||||
},
|
||||
"gridPos": {
|
||||
"h": 4,
|
||||
"w": 6,
|
||||
"x": 18,
|
||||
"y": 10
|
||||
},
|
||||
"id": 5,
|
||||
"options": {
|
||||
"colorMode": "background",
|
||||
"graphMode": "none",
|
||||
"justifyMode": "center",
|
||||
"orientation": "auto",
|
||||
"reduceOptions": {
|
||||
"calcs": ["lastNotNull"],
|
||||
"fields": "",
|
||||
"values": false
|
||||
},
|
||||
"textMode": "auto"
|
||||
},
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "${DS_PROMETHEUS}"
|
||||
},
|
||||
"expr": "onyx_celery_queue_depth{queue=\"connector_doc_fetching\"}",
|
||||
"refId": "A"
|
||||
}
|
||||
],
|
||||
"title": "Docfetching Queue",
|
||||
"type": "stat"
|
||||
},
|
||||
{
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "${DS_PROMETHEUS}"
|
||||
},
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"color": {
|
||||
"mode": "palette-classic"
|
||||
},
|
||||
"custom": {
|
||||
"axisBorderShow": false,
|
||||
"axisCenteredZero": false,
|
||||
"axisColorMode": "text",
|
||||
"axisLabel": "",
|
||||
"axisPlacement": "auto",
|
||||
"barAlignment": 0,
|
||||
"drawStyle": "bars",
|
||||
"fillOpacity": 80,
|
||||
"gradientMode": "none",
|
||||
"hideFrom": {
|
||||
"legend": false,
|
||||
"tooltip": false,
|
||||
"viz": false
|
||||
},
|
||||
"lineWidth": 1,
|
||||
"scaleDistribution": {
|
||||
"type": "linear"
|
||||
},
|
||||
"showPoints": "never",
|
||||
"stacking": {
|
||||
"group": "A",
|
||||
"mode": "none"
|
||||
}
|
||||
},
|
||||
"mappings": [],
|
||||
"thresholds": {
|
||||
"mode": "absolute",
|
||||
"steps": [
|
||||
{
|
||||
"color": "green",
|
||||
"value": null
|
||||
},
|
||||
{
|
||||
"color": "orange",
|
||||
"value": 10
|
||||
},
|
||||
{
|
||||
"color": "red",
|
||||
"value": 50
|
||||
}
|
||||
]
|
||||
}
|
||||
},
|
||||
"overrides": []
|
||||
},
|
||||
"gridPos": {
|
||||
"h": 10,
|
||||
"w": 12,
|
||||
"x": 0,
|
||||
"y": 14
|
||||
},
|
||||
"id": 6,
|
||||
"options": {
|
||||
"legend": {
|
||||
"calcs": ["lastNotNull"],
|
||||
"displayMode": "list",
|
||||
"placement": "bottom",
|
||||
"showLegend": false
|
||||
},
|
||||
"tooltip": {
|
||||
"mode": "single",
|
||||
"sort": "none"
|
||||
}
|
||||
},
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "${DS_PROMETHEUS}"
|
||||
},
|
||||
"expr": "topk(10, onyx_celery_queue_depth)",
|
||||
"legendFormat": "{{queue}}",
|
||||
"range": true,
|
||||
"refId": "A"
|
||||
}
|
||||
],
|
||||
"title": "Top 10 Queue Backlogs",
|
||||
"type": "timeseries"
|
||||
},
|
||||
{
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "${DS_PROMETHEUS}"
|
||||
},
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"custom": {
|
||||
"align": "auto",
|
||||
"cellOptions": {
|
||||
"type": "auto"
|
||||
},
|
||||
"inspect": false
|
||||
},
|
||||
"mappings": [],
|
||||
"thresholds": {
|
||||
"mode": "absolute",
|
||||
"steps": [
|
||||
{
|
||||
"color": "green",
|
||||
"value": null
|
||||
},
|
||||
{
|
||||
"color": "orange",
|
||||
"value": 10
|
||||
},
|
||||
{
|
||||
"color": "red",
|
||||
"value": 50
|
||||
}
|
||||
]
|
||||
}
|
||||
},
|
||||
"overrides": []
|
||||
},
|
||||
"gridPos": {
|
||||
"h": 10,
|
||||
"w": 12,
|
||||
"x": 12,
|
||||
"y": 14
|
||||
},
|
||||
"id": 7,
|
||||
"options": {
|
||||
"cellHeight": "sm",
|
||||
"footer": {
|
||||
"countRows": false,
|
||||
"fields": "",
|
||||
"reducer": ["sum"],
|
||||
"show": false
|
||||
},
|
||||
"showHeader": true,
|
||||
"sortBy": [
|
||||
{
|
||||
"desc": true,
|
||||
"displayName": "Value"
|
||||
}
|
||||
]
|
||||
},
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "${DS_PROMETHEUS}"
|
||||
},
|
||||
"expr": "sort_desc(onyx_celery_queue_depth)",
|
||||
"format": "table",
|
||||
"instant": true,
|
||||
"refId": "A"
|
||||
}
|
||||
],
|
||||
"title": "Current Queue Depth",
|
||||
"transformations": [
|
||||
{
|
||||
"id": "labelsToFields",
|
||||
"options": {
|
||||
"mode": "columns"
|
||||
}
|
||||
}
|
||||
],
|
||||
"type": "table"
|
||||
}
|
||||
],
|
||||
"refresh": "30s",
|
||||
"schemaVersion": 39,
|
||||
"style": "dark",
|
||||
"tags": ["onyx", "redis", "celery"],
|
||||
"templating": {
|
||||
"list": [
|
||||
{
|
||||
"current": {
|
||||
"selected": true,
|
||||
"text": "Prometheus",
|
||||
"value": "Prometheus"
|
||||
},
|
||||
"hide": 0,
|
||||
"includeAll": false,
|
||||
"label": "Datasource",
|
||||
"name": "DS_PROMETHEUS",
|
||||
"options": [],
|
||||
"query": "prometheus",
|
||||
"refresh": 1,
|
||||
"regex": "",
|
||||
"type": "datasource"
|
||||
},
|
||||
{
|
||||
"allValue": ".*",
|
||||
"current": {
|
||||
"selected": true,
|
||||
"text": "All",
|
||||
"value": ".*"
|
||||
},
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "${DS_PROMETHEUS}"
|
||||
},
|
||||
"definition": "label_values(onyx_celery_queue_depth, queue)",
|
||||
"hide": 0,
|
||||
"includeAll": true,
|
||||
"label": "Queue",
|
||||
"multi": true,
|
||||
"name": "queue",
|
||||
"options": [],
|
||||
"query": {
|
||||
"query": "label_values(onyx_celery_queue_depth, queue)",
|
||||
"refId": "StandardVariableQuery"
|
||||
},
|
||||
"refresh": 2,
|
||||
"regex": "",
|
||||
"sort": 1,
|
||||
"type": "query"
|
||||
}
|
||||
]
|
||||
},
|
||||
"time": {
|
||||
"from": "now-6h",
|
||||
"to": "now"
|
||||
},
|
||||
"timepicker": {},
|
||||
"timezone": "",
|
||||
"title": "Onyx Redis Queues",
|
||||
"uid": "onyx-redis-queues",
|
||||
"version": 1,
|
||||
"weekStart": ""
|
||||
}
|
||||
@@ -0,0 +1,23 @@
|
||||
{{- if .Values.monitoring.serviceMonitors.enabled }}
|
||||
apiVersion: monitoring.coreos.com/v1
|
||||
kind: ServiceMonitor
|
||||
metadata:
|
||||
name: {{ include "onyx.fullname" . }}-api
|
||||
labels:
|
||||
{{- include "onyx.labels" . | nindent 4 }}
|
||||
{{- with .Values.monitoring.serviceMonitors.labels }}
|
||||
{{- toYaml . | nindent 4 }}
|
||||
{{- end }}
|
||||
spec:
|
||||
namespaceSelector:
|
||||
matchNames:
|
||||
- {{ .Release.Namespace }}
|
||||
selector:
|
||||
matchLabels:
|
||||
app: {{ .Values.api.deploymentLabels.app }}
|
||||
endpoints:
|
||||
- port: api-server-port
|
||||
path: /metrics
|
||||
interval: 30s
|
||||
scrapeTimeout: 10s
|
||||
{{- end }}
|
||||
@@ -0,0 +1,26 @@
|
||||
{{- /* Metrics port must match the default in metrics_server.py (_DEFAULT_PORTS).
|
||||
Do NOT use PROMETHEUS_METRICS_PORT env var in Helm — each worker needs its own port. */ -}}
|
||||
{{- if gt (int .Values.celery_worker_heavy.replicaCount) 0 }}
|
||||
apiVersion: v1
|
||||
kind: Service
|
||||
metadata:
|
||||
name: {{ include "onyx.fullname" . }}-celery-worker-heavy-metrics
|
||||
labels:
|
||||
{{- include "onyx.labels" . | nindent 4 }}
|
||||
{{- if .Values.celery_worker_heavy.deploymentLabels }}
|
||||
{{- toYaml .Values.celery_worker_heavy.deploymentLabels | nindent 4 }}
|
||||
{{- end }}
|
||||
metrics: "true"
|
||||
spec:
|
||||
type: ClusterIP
|
||||
ports:
|
||||
- port: 9094
|
||||
targetPort: metrics
|
||||
protocol: TCP
|
||||
name: metrics
|
||||
selector:
|
||||
{{- include "onyx.selectorLabels" . | nindent 4 }}
|
||||
{{- if .Values.celery_worker_heavy.deploymentLabels }}
|
||||
{{- toYaml .Values.celery_worker_heavy.deploymentLabels | nindent 4 }}
|
||||
{{- end }}
|
||||
{{- end }}
|
||||
@@ -70,6 +70,10 @@ spec:
|
||||
"-Q",
|
||||
"connector_pruning,connector_doc_permissions_sync,connector_external_group_sync,csv_generation,sandbox",
|
||||
]
|
||||
ports:
|
||||
- name: metrics
|
||||
containerPort: 9094
|
||||
protocol: TCP
|
||||
resources:
|
||||
{{- toYaml .Values.celery_worker_heavy.resources | nindent 12 }}
|
||||
envFrom:
|
||||
|
||||
@@ -74,4 +74,29 @@ spec:
|
||||
interval: 30s
|
||||
scrapeTimeout: 10s
|
||||
{{- end }}
|
||||
{{- if gt (int .Values.celery_worker_heavy.replicaCount) 0 }}
|
||||
---
|
||||
apiVersion: monitoring.coreos.com/v1
|
||||
kind: ServiceMonitor
|
||||
metadata:
|
||||
name: {{ include "onyx.fullname" . }}-celery-worker-heavy
|
||||
labels:
|
||||
{{- include "onyx.labels" . | nindent 4 }}
|
||||
{{- with .Values.monitoring.serviceMonitors.labels }}
|
||||
{{- toYaml . | nindent 4 }}
|
||||
{{- end }}
|
||||
spec:
|
||||
namespaceSelector:
|
||||
matchNames:
|
||||
- {{ .Release.Namespace }}
|
||||
selector:
|
||||
matchLabels:
|
||||
app: {{ .Values.celery_worker_heavy.deploymentLabels.app }}
|
||||
metrics: "true"
|
||||
endpoints:
|
||||
- port: metrics
|
||||
path: /metrics
|
||||
interval: 30s
|
||||
scrapeTimeout: 10s
|
||||
{{- end }}
|
||||
{{- end }}
|
||||
|
||||
@@ -12,4 +12,30 @@ metadata:
|
||||
data:
|
||||
onyx-indexing-pipeline.json: |
|
||||
{{- .Files.Get "dashboards/indexing-pipeline.json" | nindent 4 }}
|
||||
---
|
||||
apiVersion: v1
|
||||
kind: ConfigMap
|
||||
metadata:
|
||||
name: {{ include "onyx.fullname" . }}-opensearch-search-latency-dashboard
|
||||
labels:
|
||||
{{- include "onyx.labels" . | nindent 4 }}
|
||||
grafana_dashboard: "1"
|
||||
annotations:
|
||||
grafana_folder: "Onyx"
|
||||
data:
|
||||
onyx-opensearch-search-latency.json: |
|
||||
{{- .Files.Get "dashboards/opensearch-search-latency.json" | nindent 4 }}
|
||||
---
|
||||
apiVersion: v1
|
||||
kind: ConfigMap
|
||||
metadata:
|
||||
name: {{ include "onyx.fullname" . }}-redis-queues-dashboard
|
||||
labels:
|
||||
{{- include "onyx.labels" . | nindent 4 }}
|
||||
grafana_dashboard: "1"
|
||||
annotations:
|
||||
grafana_folder: "Onyx"
|
||||
data:
|
||||
onyx-redis-queues.json: |
|
||||
{{- .Files.Get "dashboards/redis-queues.json" | nindent 4 }}
|
||||
{{- end }}
|
||||
|
||||
@@ -0,0 +1,30 @@
|
||||
{{- if and .Values.ingress.enabled .Values.mcpServer.enabled -}}
|
||||
apiVersion: networking.k8s.io/v1
|
||||
kind: Ingress
|
||||
metadata:
|
||||
name: {{ include "onyx.fullname" . }}-ingress-mcp-oauth-callback
|
||||
annotations:
|
||||
{{- if not .Values.ingress.className }}
|
||||
kubernetes.io/ingress.class: nginx
|
||||
{{- end }}
|
||||
cert-manager.io/cluster-issuer: {{ include "onyx.fullname" . }}-letsencrypt
|
||||
spec:
|
||||
{{- if .Values.ingress.className }}
|
||||
ingressClassName: {{ .Values.ingress.className }}
|
||||
{{- end }}
|
||||
rules:
|
||||
- host: {{ .Values.ingress.api.host }}
|
||||
http:
|
||||
paths:
|
||||
- path: /mcp/oauth/callback
|
||||
pathType: Exact
|
||||
backend:
|
||||
service:
|
||||
name: {{ include "onyx.fullname" . }}-webserver
|
||||
port:
|
||||
number: {{ .Values.webserver.service.servicePort }}
|
||||
tls:
|
||||
- hosts:
|
||||
- {{ .Values.ingress.api.host }}
|
||||
secretName: {{ include "onyx.fullname" . }}-ingress-mcp-oauth-callback-tls
|
||||
{{- end }}
|
||||
@@ -42,6 +42,22 @@ data:
|
||||
client_max_body_size 5G;
|
||||
{{- if .Values.mcpServer.enabled }}
|
||||
|
||||
# OAuth callback page must be served by the web server (Next.js),
|
||||
# not the MCP server. Exact match takes priority over the regex below.
|
||||
location = /mcp/oauth/callback {
|
||||
proxy_set_header X-Real-IP $remote_addr;
|
||||
proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;
|
||||
proxy_set_header X-Forwarded-Proto $scheme;
|
||||
proxy_set_header X-Forwarded-Host $host;
|
||||
proxy_set_header Host $host;
|
||||
proxy_http_version 1.1;
|
||||
proxy_redirect off;
|
||||
proxy_connect_timeout {{ .Values.nginx.timeouts.connect }}s;
|
||||
proxy_send_timeout {{ .Values.nginx.timeouts.send }}s;
|
||||
proxy_read_timeout {{ .Values.nginx.timeouts.read }}s;
|
||||
proxy_pass http://web_server;
|
||||
}
|
||||
|
||||
# MCP Server - Model Context Protocol for LLM integrations
|
||||
# Match /mcp, /mcp/, or /mcp/* but NOT /mcpserver, /mcpapi, etc.
|
||||
location ~ ^/mcp(/.*)?$ {
|
||||
|
||||
@@ -264,7 +264,7 @@ monitoring:
|
||||
# The sidecar must be configured with label selector: grafana_dashboard=1
|
||||
enabled: false
|
||||
serviceMonitors:
|
||||
# -- Set to true to deploy ServiceMonitor resources for Celery worker metrics endpoints.
|
||||
# -- Set to true to deploy ServiceMonitor resources for API server and Celery worker metrics endpoints.
|
||||
# Requires the Prometheus Operator CRDs (included in kube-prometheus-stack).
|
||||
# Use `labels` to match your Prometheus CR's serviceMonitorSelector (e.g. release: onyx-monitoring).
|
||||
enabled: false
|
||||
@@ -296,7 +296,7 @@ nginx:
|
||||
# The ingress-nginx subchart doesn't auto-detect our custom ConfigMap changes.
|
||||
# Workaround: Helm upgrade will restart if the following annotation value changes.
|
||||
podAnnotations:
|
||||
onyx.app/nginx-config-version: "3"
|
||||
onyx.app/nginx-config-version: "4"
|
||||
|
||||
# Propagate DOMAIN into nginx so server_name continues to use the same env var
|
||||
extraEnvs:
|
||||
|
||||
@@ -22,6 +22,10 @@ variable "CLI_REPOSITORY" {
|
||||
default = "onyxdotapp/onyx-cli"
|
||||
}
|
||||
|
||||
variable "DEVCONTAINER_REPOSITORY" {
|
||||
default = "onyxdotapp/onyx-devcontainer"
|
||||
}
|
||||
|
||||
variable "TAG" {
|
||||
default = "latest"
|
||||
}
|
||||
@@ -90,3 +94,16 @@ target "cli" {
|
||||
|
||||
tags = ["${CLI_REPOSITORY}:${TAG}"]
|
||||
}
|
||||
|
||||
target "devcontainer" {
|
||||
context = ".devcontainer"
|
||||
dockerfile = "Dockerfile"
|
||||
|
||||
cache-from = [
|
||||
"type=registry,ref=${DEVCONTAINER_REPOSITORY}:latest",
|
||||
"type=registry,ref=${DEVCONTAINER_REPOSITORY}:edge",
|
||||
]
|
||||
cache-to = ["type=inline"]
|
||||
|
||||
tags = ["${DEVCONTAINER_REPOSITORY}:${TAG}"]
|
||||
}
|
||||
|
||||
82
examples/widget/package-lock.json
generated
82
examples/widget/package-lock.json
generated
@@ -8,7 +8,7 @@
|
||||
"name": "widget",
|
||||
"version": "0.1.0",
|
||||
"dependencies": {
|
||||
"next": "^16.1.7",
|
||||
"next": "^16.2.3",
|
||||
"react": "^19",
|
||||
"react-dom": "^19",
|
||||
"react-markdown": "^10.1.0"
|
||||
@@ -1023,9 +1023,9 @@
|
||||
}
|
||||
},
|
||||
"node_modules/@next/env": {
|
||||
"version": "16.1.7",
|
||||
"resolved": "https://registry.npmjs.org/@next/env/-/env-16.1.7.tgz",
|
||||
"integrity": "sha512-rJJbIdJB/RQr2F1nylZr/PJzamvNNhfr3brdKP6s/GW850jbtR70QlSfFselvIBbcPUOlQwBakexjFzqLzF6pg==",
|
||||
"version": "16.2.3",
|
||||
"resolved": "https://registry.npmjs.org/@next/env/-/env-16.2.3.tgz",
|
||||
"integrity": "sha512-ZWXyj4uNu4GCWQw9cjRxWlbD+33mcDszIo9iQxFnBX3Wmgq9ulaSJcl6VhuWx5pCWqqD+9W6Wfz7N0lM5lYPMA==",
|
||||
"license": "MIT"
|
||||
},
|
||||
"node_modules/@next/eslint-plugin-next": {
|
||||
@@ -1039,9 +1039,9 @@
|
||||
}
|
||||
},
|
||||
"node_modules/@next/swc-darwin-arm64": {
|
||||
"version": "16.1.7",
|
||||
"resolved": "https://registry.npmjs.org/@next/swc-darwin-arm64/-/swc-darwin-arm64-16.1.7.tgz",
|
||||
"integrity": "sha512-b2wWIE8sABdyafc4IM8r5Y/dS6kD80JRtOGrUiKTsACFQfWWgUQ2NwoUX1yjFMXVsAwcQeNpnucF2ZrujsBBPg==",
|
||||
"version": "16.2.3",
|
||||
"resolved": "https://registry.npmjs.org/@next/swc-darwin-arm64/-/swc-darwin-arm64-16.2.3.tgz",
|
||||
"integrity": "sha512-u37KDKTKQ+OQLvY+z7SNXixwo4Q2/IAJFDzU1fYe66IbCE51aDSAzkNDkWmLN0yjTUh4BKBd+hb69jYn6qqqSg==",
|
||||
"cpu": [
|
||||
"arm64"
|
||||
],
|
||||
@@ -1055,9 +1055,9 @@
|
||||
}
|
||||
},
|
||||
"node_modules/@next/swc-darwin-x64": {
|
||||
"version": "16.1.7",
|
||||
"resolved": "https://registry.npmjs.org/@next/swc-darwin-x64/-/swc-darwin-x64-16.1.7.tgz",
|
||||
"integrity": "sha512-zcnVaaZulS1WL0Ss38R5Q6D2gz7MtBu8GZLPfK+73D/hp4GFMrC2sudLky1QibfV7h6RJBJs/gOFvYP0X7UVlQ==",
|
||||
"version": "16.2.3",
|
||||
"resolved": "https://registry.npmjs.org/@next/swc-darwin-x64/-/swc-darwin-x64-16.2.3.tgz",
|
||||
"integrity": "sha512-gHjL/qy6Q6CG3176FWbAKyKh9IfntKZTB3RY/YOJdDFpHGsUDXVH38U4mMNpHVGXmeYW4wj22dMp1lTfmu/bTQ==",
|
||||
"cpu": [
|
||||
"x64"
|
||||
],
|
||||
@@ -1071,9 +1071,9 @@
|
||||
}
|
||||
},
|
||||
"node_modules/@next/swc-linux-arm64-gnu": {
|
||||
"version": "16.1.7",
|
||||
"resolved": "https://registry.npmjs.org/@next/swc-linux-arm64-gnu/-/swc-linux-arm64-gnu-16.1.7.tgz",
|
||||
"integrity": "sha512-2ant89Lux/Q3VyC8vNVg7uBaFVP9SwoK2jJOOR0L8TQnX8CAYnh4uctAScy2Hwj2dgjVHqHLORQZJ2wH6VxhSQ==",
|
||||
"version": "16.2.3",
|
||||
"resolved": "https://registry.npmjs.org/@next/swc-linux-arm64-gnu/-/swc-linux-arm64-gnu-16.2.3.tgz",
|
||||
"integrity": "sha512-U6vtblPtU/P14Y/b/n9ZY0GOxbbIhTFuaFR7F4/uMBidCi2nSdaOFhA0Go81L61Zd6527+yvuX44T4ksnf8T+Q==",
|
||||
"cpu": [
|
||||
"arm64"
|
||||
],
|
||||
@@ -1087,9 +1087,9 @@
|
||||
}
|
||||
},
|
||||
"node_modules/@next/swc-linux-arm64-musl": {
|
||||
"version": "16.1.7",
|
||||
"resolved": "https://registry.npmjs.org/@next/swc-linux-arm64-musl/-/swc-linux-arm64-musl-16.1.7.tgz",
|
||||
"integrity": "sha512-uufcze7LYv0FQg9GnNeZ3/whYfo+1Q3HnQpm16o6Uyi0OVzLlk2ZWoY7j07KADZFY8qwDbsmFnMQP3p3+Ftprw==",
|
||||
"version": "16.2.3",
|
||||
"resolved": "https://registry.npmjs.org/@next/swc-linux-arm64-musl/-/swc-linux-arm64-musl-16.2.3.tgz",
|
||||
"integrity": "sha512-/YV0LgjHUmfhQpn9bVoGc4x4nan64pkhWR5wyEV8yCOfwwrH630KpvRg86olQHTwHIn1z59uh6JwKvHq1h4QEw==",
|
||||
"cpu": [
|
||||
"arm64"
|
||||
],
|
||||
@@ -1103,9 +1103,9 @@
|
||||
}
|
||||
},
|
||||
"node_modules/@next/swc-linux-x64-gnu": {
|
||||
"version": "16.1.7",
|
||||
"resolved": "https://registry.npmjs.org/@next/swc-linux-x64-gnu/-/swc-linux-x64-gnu-16.1.7.tgz",
|
||||
"integrity": "sha512-KWVf2gxYvHtvuT+c4MBOGxuse5TD7DsMFYSxVxRBnOzok/xryNeQSjXgxSv9QpIVlaGzEn/pIuI6Koosx8CGWA==",
|
||||
"version": "16.2.3",
|
||||
"resolved": "https://registry.npmjs.org/@next/swc-linux-x64-gnu/-/swc-linux-x64-gnu-16.2.3.tgz",
|
||||
"integrity": "sha512-/HiWEcp+WMZ7VajuiMEFGZ6cg0+aYZPqCJD3YJEfpVWQsKYSjXQG06vJP6F1rdA03COD9Fef4aODs3YxKx+RDQ==",
|
||||
"cpu": [
|
||||
"x64"
|
||||
],
|
||||
@@ -1119,9 +1119,9 @@
|
||||
}
|
||||
},
|
||||
"node_modules/@next/swc-linux-x64-musl": {
|
||||
"version": "16.1.7",
|
||||
"resolved": "https://registry.npmjs.org/@next/swc-linux-x64-musl/-/swc-linux-x64-musl-16.1.7.tgz",
|
||||
"integrity": "sha512-HguhaGwsGr1YAGs68uRKc4aGWxLET+NevJskOcCAwXbwj0fYX0RgZW2gsOCzr9S11CSQPIkxmoSbuVaBp4Z3dA==",
|
||||
"version": "16.2.3",
|
||||
"resolved": "https://registry.npmjs.org/@next/swc-linux-x64-musl/-/swc-linux-x64-musl-16.2.3.tgz",
|
||||
"integrity": "sha512-Kt44hGJfZSefebhk/7nIdivoDr3Ugp5+oNz9VvF3GUtfxutucUIHfIO0ZYO8QlOPDQloUVQn4NVC/9JvHRk9hw==",
|
||||
"cpu": [
|
||||
"x64"
|
||||
],
|
||||
@@ -1135,9 +1135,9 @@
|
||||
}
|
||||
},
|
||||
"node_modules/@next/swc-win32-arm64-msvc": {
|
||||
"version": "16.1.7",
|
||||
"resolved": "https://registry.npmjs.org/@next/swc-win32-arm64-msvc/-/swc-win32-arm64-msvc-16.1.7.tgz",
|
||||
"integrity": "sha512-S0n3KrDJokKTeFyM/vGGGR8+pCmXYrjNTk2ZozOL1C/JFdfUIL9O1ATaJOl5r2POe56iRChbsszrjMAdWSv7kQ==",
|
||||
"version": "16.2.3",
|
||||
"resolved": "https://registry.npmjs.org/@next/swc-win32-arm64-msvc/-/swc-win32-arm64-msvc-16.2.3.tgz",
|
||||
"integrity": "sha512-O2NZ9ie3Tq6xj5Z5CSwBT3+aWAMW2PIZ4egUi9MaWLkwaehgtB7YZjPm+UpcNpKOme0IQuqDcor7BsW6QBiQBw==",
|
||||
"cpu": [
|
||||
"arm64"
|
||||
],
|
||||
@@ -1151,9 +1151,9 @@
|
||||
}
|
||||
},
|
||||
"node_modules/@next/swc-win32-x64-msvc": {
|
||||
"version": "16.1.7",
|
||||
"resolved": "https://registry.npmjs.org/@next/swc-win32-x64-msvc/-/swc-win32-x64-msvc-16.1.7.tgz",
|
||||
"integrity": "sha512-mwgtg8CNZGYm06LeEd+bNnOUfwOyNem/rOiP14Lsz+AnUY92Zq/LXwtebtUiaeVkhbroRCQ0c8GlR4UT1U+0yg==",
|
||||
"version": "16.2.3",
|
||||
"resolved": "https://registry.npmjs.org/@next/swc-win32-x64-msvc/-/swc-win32-x64-msvc-16.2.3.tgz",
|
||||
"integrity": "sha512-Ibm29/GgB/ab5n7XKqlStkm54qqZE8v2FnijUPBgrd67FWrac45o/RsNlaOWjme/B5UqeWt/8KM4aWBwA1D2Kw==",
|
||||
"cpu": [
|
||||
"x64"
|
||||
],
|
||||
@@ -5929,12 +5929,12 @@
|
||||
"dev": true
|
||||
},
|
||||
"node_modules/next": {
|
||||
"version": "16.1.7",
|
||||
"resolved": "https://registry.npmjs.org/next/-/next-16.1.7.tgz",
|
||||
"integrity": "sha512-WM0L7WrSvKwoLegLYr6V+mz+RIofqQgVAfHhMp9a88ms0cFX8iX9ew+snpWlSBwpkURJOUdvCEt3uLl3NNzvWg==",
|
||||
"version": "16.2.3",
|
||||
"resolved": "https://registry.npmjs.org/next/-/next-16.2.3.tgz",
|
||||
"integrity": "sha512-9V3zV4oZFza3PVev5/poB9g0dEafVcgNyQ8eTRop8GvxZjV2G15FC5ARuG1eFD42QgeYkzJBJzHghNP8Ad9xtA==",
|
||||
"license": "MIT",
|
||||
"dependencies": {
|
||||
"@next/env": "16.1.7",
|
||||
"@next/env": "16.2.3",
|
||||
"@swc/helpers": "0.5.15",
|
||||
"baseline-browser-mapping": "^2.9.19",
|
||||
"caniuse-lite": "^1.0.30001579",
|
||||
@@ -5948,15 +5948,15 @@
|
||||
"node": ">=20.9.0"
|
||||
},
|
||||
"optionalDependencies": {
|
||||
"@next/swc-darwin-arm64": "16.1.7",
|
||||
"@next/swc-darwin-x64": "16.1.7",
|
||||
"@next/swc-linux-arm64-gnu": "16.1.7",
|
||||
"@next/swc-linux-arm64-musl": "16.1.7",
|
||||
"@next/swc-linux-x64-gnu": "16.1.7",
|
||||
"@next/swc-linux-x64-musl": "16.1.7",
|
||||
"@next/swc-win32-arm64-msvc": "16.1.7",
|
||||
"@next/swc-win32-x64-msvc": "16.1.7",
|
||||
"sharp": "^0.34.4"
|
||||
"@next/swc-darwin-arm64": "16.2.3",
|
||||
"@next/swc-darwin-x64": "16.2.3",
|
||||
"@next/swc-linux-arm64-gnu": "16.2.3",
|
||||
"@next/swc-linux-arm64-musl": "16.2.3",
|
||||
"@next/swc-linux-x64-gnu": "16.2.3",
|
||||
"@next/swc-linux-x64-musl": "16.2.3",
|
||||
"@next/swc-win32-arm64-msvc": "16.2.3",
|
||||
"@next/swc-win32-x64-msvc": "16.2.3",
|
||||
"sharp": "^0.34.5"
|
||||
},
|
||||
"peerDependencies": {
|
||||
"@opentelemetry/api": "^1.1.0",
|
||||
|
||||
@@ -9,7 +9,7 @@
|
||||
"lint": "next lint"
|
||||
},
|
||||
"dependencies": {
|
||||
"next": "^16.1.7",
|
||||
"next": "^16.2.3",
|
||||
"react": "^19",
|
||||
"react-dom": "^19",
|
||||
"react-markdown": "^10.1.0"
|
||||
|
||||
@@ -28,7 +28,7 @@ dependencies = [
|
||||
"kubernetes>=31.0.0",
|
||||
]
|
||||
|
||||
[project.optional-dependencies]
|
||||
[dependency-groups]
|
||||
# Main backend application dependencies
|
||||
backend = [
|
||||
"aiohttp==3.13.4",
|
||||
@@ -148,7 +148,7 @@ dev = [
|
||||
"matplotlib==3.10.8",
|
||||
"mypy-extensions==1.0.0",
|
||||
"mypy==1.13.0",
|
||||
"onyx-devtools==0.7.2",
|
||||
"onyx-devtools==0.7.5",
|
||||
"openapi-generator-cli==7.17.0",
|
||||
"pandas-stubs~=2.3.3",
|
||||
"pre-commit==3.2.2",
|
||||
@@ -195,6 +195,9 @@ model_server = [
|
||||
"sentry-sdk[fastapi,celery,starlette]==2.14.0",
|
||||
]
|
||||
|
||||
[tool.uv]
|
||||
default-groups = ["backend", "dev", "ee", "model_server"]
|
||||
|
||||
[tool.mypy]
|
||||
plugins = "sqlalchemy.ext.mypy.plugin"
|
||||
mypy_path = "backend"
|
||||
@@ -230,7 +233,7 @@ follow_imports = "skip"
|
||||
ignore_errors = true
|
||||
|
||||
[tool.uv.workspace]
|
||||
members = ["backend", "tools/ods"]
|
||||
members = ["tools/ods"]
|
||||
|
||||
[tool.basedpyright]
|
||||
include = ["backend"]
|
||||
|
||||
@@ -244,6 +244,54 @@ ods web lint
|
||||
ods web test --watch
|
||||
```
|
||||
|
||||
### `dev` - Devcontainer Management
|
||||
|
||||
Manage the Onyx devcontainer. Also available as `ods dc`.
|
||||
|
||||
Requires the [devcontainer CLI](https://github.com/devcontainers/cli) (`npm install -g @devcontainers/cli`).
|
||||
|
||||
```shell
|
||||
ods dev <subcommand>
|
||||
```
|
||||
|
||||
**Subcommands:**
|
||||
|
||||
- `up` - Start the devcontainer (pulls the image if needed)
|
||||
- `into` - Open a zsh shell inside the running devcontainer
|
||||
- `exec` - Run an arbitrary command inside the devcontainer
|
||||
- `restart` - Remove and recreate the devcontainer
|
||||
- `rebuild` - Pull the latest published image and recreate
|
||||
- `stop` - Stop the running devcontainer
|
||||
|
||||
The devcontainer image is published to `onyxdotapp/onyx-devcontainer` and
|
||||
referenced by tag in `.devcontainer/devcontainer.json` — no local build needed.
|
||||
|
||||
**Examples:**
|
||||
|
||||
```shell
|
||||
# Start the devcontainer
|
||||
ods dev up
|
||||
|
||||
# Open a shell
|
||||
ods dev into
|
||||
|
||||
# Run a command
|
||||
ods dev exec -- npm test
|
||||
|
||||
# Restart the container
|
||||
ods dev restart
|
||||
|
||||
# Pull latest image and recreate
|
||||
ods dev rebuild
|
||||
|
||||
# Stop the container
|
||||
ods dev stop
|
||||
|
||||
# Same commands work with the dc alias
|
||||
ods dc up
|
||||
ods dc into
|
||||
```
|
||||
|
||||
### `db` - Database Administration
|
||||
|
||||
Manage PostgreSQL database dumps, restores, and migrations.
|
||||
|
||||
19
tools/ods/cmd/deploy.go
Normal file
19
tools/ods/cmd/deploy.go
Normal file
@@ -0,0 +1,19 @@
|
||||
package cmd
|
||||
|
||||
import (
|
||||
"github.com/spf13/cobra"
|
||||
)
|
||||
|
||||
// NewDeployCommand creates the parent `ods deploy` command. Subcommands hang
|
||||
// off it (e.g. `ods deploy edge`) and represent ad-hoc deployment workflows.
|
||||
func NewDeployCommand() *cobra.Command {
|
||||
cmd := &cobra.Command{
|
||||
Use: "deploy",
|
||||
Short: "Trigger ad-hoc deployments",
|
||||
Long: "Trigger ad-hoc deployments to Onyx-managed environments.",
|
||||
}
|
||||
|
||||
cmd.AddCommand(NewDeployEdgeCommand())
|
||||
|
||||
return cmd
|
||||
}
|
||||
353
tools/ods/cmd/deploy_edge.go
Normal file
353
tools/ods/cmd/deploy_edge.go
Normal file
@@ -0,0 +1,353 @@
|
||||
package cmd
|
||||
|
||||
import (
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"os/exec"
|
||||
"sort"
|
||||
"time"
|
||||
|
||||
log "github.com/sirupsen/logrus"
|
||||
"github.com/spf13/cobra"
|
||||
|
||||
"github.com/onyx-dot-app/onyx/tools/ods/internal/config"
|
||||
"github.com/onyx-dot-app/onyx/tools/ods/internal/git"
|
||||
"github.com/onyx-dot-app/onyx/tools/ods/internal/paths"
|
||||
"github.com/onyx-dot-app/onyx/tools/ods/internal/prompt"
|
||||
)
|
||||
|
||||
const (
|
||||
onyxRepo = "onyx-dot-app/onyx"
|
||||
deploymentWorkflowFile = "deployment.yml"
|
||||
edgeTagName = "edge"
|
||||
|
||||
// Polling configuration. Build runs typically take 20-30 minutes; deploys
|
||||
// are much shorter. The "discover" phase polls fast for a short window
|
||||
// because the run usually appears within seconds of pushing the tag /
|
||||
// dispatching the workflow.
|
||||
runDiscoveryInterval = 5 * time.Second
|
||||
runDiscoveryTimeout = 2 * time.Minute
|
||||
runProgressInterval = 30 * time.Second
|
||||
buildPollTimeout = 60 * time.Minute
|
||||
deployPollTimeout = 30 * time.Minute
|
||||
)
|
||||
|
||||
// DeployEdgeOptions holds options for the deploy edge command.
|
||||
type DeployEdgeOptions struct {
|
||||
TargetRepo string
|
||||
TargetWorkflow string
|
||||
DryRun bool
|
||||
Yes bool
|
||||
NoWaitDeploy bool
|
||||
}
|
||||
|
||||
// NewDeployEdgeCommand creates the `ods deploy edge` command.
|
||||
func NewDeployEdgeCommand() *cobra.Command {
|
||||
opts := &DeployEdgeOptions{}
|
||||
|
||||
cmd := &cobra.Command{
|
||||
Use: "edge",
|
||||
Short: "Build edge images off main and deploy to the configured target",
|
||||
Long: `Build edge images off origin/main and dispatch the configured deploy workflow.
|
||||
|
||||
This command will:
|
||||
1. Force-push the 'edge' tag to origin/main, triggering the build
|
||||
2. Wait for the build workflow to finish
|
||||
3. Dispatch the configured deploy workflow with version_tag=edge
|
||||
4. Wait for the deploy workflow to finish
|
||||
|
||||
All GitHub operations run through the gh CLI, so authorization is enforced
|
||||
by your gh credentials and GitHub's repo/workflow permissions.
|
||||
|
||||
On first run, you'll be prompted for the deploy target repo and workflow
|
||||
filename. These are saved to the ods config file (~/.config/onyx-dev/config.json
|
||||
on Linux/macOS) and reused on subsequent runs. Pass --target-repo or
|
||||
--target-workflow to override the saved values.
|
||||
|
||||
Example usage:
|
||||
|
||||
$ ods deploy edge`,
|
||||
Args: cobra.NoArgs,
|
||||
Run: func(cmd *cobra.Command, args []string) {
|
||||
deployEdge(opts)
|
||||
},
|
||||
}
|
||||
|
||||
cmd.Flags().StringVar(&opts.TargetRepo, "target-repo", "", "GitHub repo (owner/name) hosting the deploy workflow; overrides saved config")
|
||||
cmd.Flags().StringVar(&opts.TargetWorkflow, "target-workflow", "", "Filename of the deploy workflow within the target repo; overrides saved config")
|
||||
cmd.Flags().BoolVar(&opts.DryRun, "dry-run", false, "Perform local operations only; skip pushing the tag and dispatching workflows")
|
||||
cmd.Flags().BoolVar(&opts.Yes, "yes", false, "Skip the confirmation prompt")
|
||||
cmd.Flags().BoolVar(&opts.NoWaitDeploy, "no-wait-deploy", false, "Do not wait for the deploy workflow to finish after dispatching it")
|
||||
|
||||
return cmd
|
||||
}
|
||||
|
||||
func deployEdge(opts *DeployEdgeOptions) {
|
||||
git.CheckGitHubCLI()
|
||||
|
||||
deployRepo, deployWorkflow := resolveDeployTarget(opts)
|
||||
|
||||
if opts.DryRun {
|
||||
log.Warning("=== DRY RUN MODE: tag push and workflow dispatch will be skipped (read-only gh and git fetch still run) ===")
|
||||
}
|
||||
|
||||
if !opts.Yes {
|
||||
msg := "About to force-push tag 'edge' to origin/main and trigger an ad-hoc deploy. Continue? (Y/n): "
|
||||
if !prompt.Confirm(msg) {
|
||||
log.Info("Exiting...")
|
||||
return
|
||||
}
|
||||
}
|
||||
|
||||
// Capture the most recent existing edge build run id BEFORE pushing, so we
|
||||
// can reliably identify the new run we trigger and not pick up a stale one.
|
||||
priorBuildRunID, err := latestWorkflowRunID(onyxRepo, deploymentWorkflowFile, "push", edgeTagName)
|
||||
if err != nil {
|
||||
log.Fatalf("Failed to query existing deployment runs: %v", err)
|
||||
}
|
||||
log.Debugf("Most recent prior edge build run id: %d", priorBuildRunID)
|
||||
|
||||
log.Info("Fetching origin/main...")
|
||||
if err := git.RunCommand("fetch", "origin", "main"); err != nil {
|
||||
log.Fatalf("Failed to fetch origin/main: %v", err)
|
||||
}
|
||||
|
||||
if opts.DryRun {
|
||||
log.Warnf("[DRY RUN] Would move local '%s' tag to origin/main", edgeTagName)
|
||||
log.Warnf("[DRY RUN] Would force-push tag '%s' to origin", edgeTagName)
|
||||
log.Warn("[DRY RUN] Would wait for build then dispatch the configured deploy workflow")
|
||||
return
|
||||
}
|
||||
|
||||
log.Infof("Moving local '%s' tag to origin/main...", edgeTagName)
|
||||
if err := git.RunCommand("tag", "-f", edgeTagName, "origin/main"); err != nil {
|
||||
log.Fatalf("Failed to move local tag: %v", err)
|
||||
}
|
||||
|
||||
log.Infof("Force-pushing tag '%s' to origin...", edgeTagName)
|
||||
if err := git.RunCommand("push", "-f", "origin", edgeTagName); err != nil {
|
||||
log.Fatalf("Failed to push edge tag: %v", err)
|
||||
}
|
||||
|
||||
// Find the new build run, then poll it to completion.
|
||||
log.Info("Waiting for build workflow to start...")
|
||||
buildRun, err := waitForNewRun(onyxRepo, deploymentWorkflowFile, "push", edgeTagName, priorBuildRunID)
|
||||
if err != nil {
|
||||
log.Fatalf("Failed to find triggered build run: %v", err)
|
||||
}
|
||||
log.Infof("Build run started: %s", buildRun.URL)
|
||||
|
||||
if err := waitForRunCompletion(onyxRepo, buildRun.DatabaseID, buildPollTimeout, "build"); err != nil {
|
||||
log.Fatalf("Build did not complete successfully: %v", err)
|
||||
}
|
||||
log.Info("Build completed successfully.")
|
||||
|
||||
// Dispatch the deploy workflow.
|
||||
priorDeployRunID, err := latestWorkflowRunID(deployRepo, deployWorkflow, "workflow_dispatch", "")
|
||||
if err != nil {
|
||||
log.Fatalf("Failed to query existing deploy runs: %v", err)
|
||||
}
|
||||
log.Debugf("Most recent prior deploy run id: %d", priorDeployRunID)
|
||||
|
||||
log.Info("Dispatching deploy workflow with version_tag=edge...")
|
||||
if err := dispatchWorkflow(deployRepo, deployWorkflow, map[string]string{"version_tag": edgeTagName}); err != nil {
|
||||
log.Fatalf("Failed to dispatch deploy workflow: %v", err)
|
||||
}
|
||||
|
||||
deployRun, err := waitForNewRun(deployRepo, deployWorkflow, "workflow_dispatch", "", priorDeployRunID)
|
||||
if err != nil {
|
||||
log.Fatalf("Failed to find dispatched deploy run: %v", err)
|
||||
}
|
||||
log.Infof("Deploy run started: %s", deployRun.URL)
|
||||
log.Info("A kickoff Slack message will appear in #monitor-deployments.")
|
||||
|
||||
if opts.NoWaitDeploy {
|
||||
log.Info("--no-wait-deploy set; not waiting for deploy completion.")
|
||||
return
|
||||
}
|
||||
|
||||
if err := waitForRunCompletion(deployRepo, deployRun.DatabaseID, deployPollTimeout, "deploy"); err != nil {
|
||||
log.Fatalf("Deploy did not complete successfully: %v", err)
|
||||
}
|
||||
log.Info("Deploy completed successfully.")
|
||||
}
|
||||
|
||||
// resolveDeployTarget returns the deploy target repo and workflow to use,
|
||||
// preferring explicit flags, then saved config, then prompting the user on
|
||||
// first-time setup. Any newly-prompted values are persisted back to the
|
||||
// config file so subsequent runs are non-interactive.
|
||||
func resolveDeployTarget(opts *DeployEdgeOptions) (string, string) {
|
||||
cfg, err := config.Load()
|
||||
if err != nil {
|
||||
log.Fatalf("Failed to load ods config: %v", err)
|
||||
}
|
||||
|
||||
repo := opts.TargetRepo
|
||||
if repo == "" {
|
||||
repo = cfg.DeployEdge.TargetRepo
|
||||
}
|
||||
workflow := opts.TargetWorkflow
|
||||
if workflow == "" {
|
||||
workflow = cfg.DeployEdge.TargetWorkflow
|
||||
}
|
||||
|
||||
prompted := false
|
||||
if repo == "" {
|
||||
log.Infof("First-time setup: ods will save your deploy target to %s", paths.ConfigFilePath())
|
||||
repo = prompt.String("Deploy target repo (owner/name): ")
|
||||
prompted = true
|
||||
}
|
||||
if workflow == "" {
|
||||
workflow = prompt.String("Deploy workflow filename (e.g. some-workflow.yml): ")
|
||||
prompted = true
|
||||
}
|
||||
|
||||
if prompted {
|
||||
cfg.DeployEdge.TargetRepo = repo
|
||||
cfg.DeployEdge.TargetWorkflow = workflow
|
||||
if err := config.Save(cfg); err != nil {
|
||||
log.Fatalf("Failed to save ods config: %v", err)
|
||||
}
|
||||
log.Infof("Saved deploy target to %s", paths.ConfigFilePath())
|
||||
}
|
||||
|
||||
return repo, workflow
|
||||
}
|
||||
|
||||
// workflowRun is a partial representation of a `gh run list` JSON entry.
|
||||
type workflowRun struct {
|
||||
DatabaseID int64 `json:"databaseId"`
|
||||
Status string `json:"status"`
|
||||
Conclusion string `json:"conclusion"`
|
||||
URL string `json:"url"`
|
||||
Event string `json:"event"`
|
||||
HeadBranch string `json:"headBranch"`
|
||||
}
|
||||
|
||||
// latestWorkflowRunID returns the highest databaseId for runs of the given
|
||||
// workflow filtered by event (and optional branch). Returns 0 if no runs
|
||||
// exist yet, which is a valid state.
|
||||
func latestWorkflowRunID(repo, workflowFile, event, branch string) (int64, error) {
|
||||
runs, err := listWorkflowRuns(repo, workflowFile, event, branch, 10)
|
||||
if err != nil {
|
||||
return 0, err
|
||||
}
|
||||
var maxID int64
|
||||
for _, r := range runs {
|
||||
if r.DatabaseID > maxID {
|
||||
maxID = r.DatabaseID
|
||||
}
|
||||
}
|
||||
return maxID, nil
|
||||
}
|
||||
|
||||
func listWorkflowRuns(repo, workflowFile, event, branch string, limit int) ([]workflowRun, error) {
|
||||
args := []string{
|
||||
"run", "list",
|
||||
"-R", repo,
|
||||
"--workflow", workflowFile,
|
||||
"--limit", fmt.Sprintf("%d", limit),
|
||||
"--json", "databaseId,status,conclusion,url,event,headBranch",
|
||||
}
|
||||
if event != "" {
|
||||
args = append(args, "--event", event)
|
||||
}
|
||||
if branch != "" {
|
||||
args = append(args, "--branch", branch)
|
||||
}
|
||||
cmd := exec.Command("gh", args...)
|
||||
output, err := cmd.Output()
|
||||
if err != nil {
|
||||
if exitErr, ok := err.(*exec.ExitError); ok {
|
||||
return nil, fmt.Errorf("gh run list failed: %w: %s", err, string(exitErr.Stderr))
|
||||
}
|
||||
return nil, fmt.Errorf("gh run list failed: %w", err)
|
||||
}
|
||||
var runs []workflowRun
|
||||
if err := json.Unmarshal(output, &runs); err != nil {
|
||||
return nil, fmt.Errorf("failed to parse gh run list output: %w", err)
|
||||
}
|
||||
// Sort newest-first by databaseId for predictable iteration.
|
||||
sort.Slice(runs, func(i, j int) bool { return runs[i].DatabaseID > runs[j].DatabaseID })
|
||||
return runs, nil
|
||||
}
|
||||
|
||||
// waitForNewRun polls until a workflow run with databaseId > priorRunID
|
||||
// appears, or the discovery timeout fires.
|
||||
func waitForNewRun(repo, workflowFile, event, branch string, priorRunID int64) (*workflowRun, error) {
|
||||
deadline := time.Now().Add(runDiscoveryTimeout)
|
||||
for {
|
||||
runs, err := listWorkflowRuns(repo, workflowFile, event, branch, 5)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
for _, r := range runs {
|
||||
if r.DatabaseID > priorRunID {
|
||||
return &r, nil
|
||||
}
|
||||
}
|
||||
if time.Now().After(deadline) {
|
||||
return nil, fmt.Errorf("no new run appeared within %s", runDiscoveryTimeout)
|
||||
}
|
||||
time.Sleep(runDiscoveryInterval)
|
||||
}
|
||||
}
|
||||
|
||||
// waitForRunCompletion polls a specific run until it reaches a terminal
|
||||
// status. Returns an error if the run does not conclude with success or the
|
||||
// timeout fires.
|
||||
func waitForRunCompletion(repo string, runID int64, timeout time.Duration, label string) error {
|
||||
deadline := time.Now().Add(timeout)
|
||||
for {
|
||||
run, err := getRun(repo, runID)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
log.Infof("[%s] run %d status=%s conclusion=%s", label, runID, run.Status, run.Conclusion)
|
||||
if run.Status == "completed" {
|
||||
if run.Conclusion == "success" {
|
||||
return nil
|
||||
}
|
||||
return fmt.Errorf("%s run %d concluded with status %q (see %s)", label, runID, run.Conclusion, run.URL)
|
||||
}
|
||||
if time.Now().After(deadline) {
|
||||
return fmt.Errorf("%s run %d did not complete within %s (see %s)", label, runID, timeout, run.URL)
|
||||
}
|
||||
time.Sleep(runProgressInterval)
|
||||
}
|
||||
}
|
||||
|
||||
func getRun(repo string, runID int64) (*workflowRun, error) {
|
||||
cmd := exec.Command(
|
||||
"gh", "run", "view", fmt.Sprintf("%d", runID),
|
||||
"-R", repo,
|
||||
"--json", "databaseId,status,conclusion,url,event,headBranch",
|
||||
)
|
||||
output, err := cmd.Output()
|
||||
if err != nil {
|
||||
if exitErr, ok := err.(*exec.ExitError); ok {
|
||||
return nil, fmt.Errorf("gh run view failed: %w: %s", err, string(exitErr.Stderr))
|
||||
}
|
||||
return nil, fmt.Errorf("gh run view failed: %w", err)
|
||||
}
|
||||
var run workflowRun
|
||||
if err := json.Unmarshal(output, &run); err != nil {
|
||||
return nil, fmt.Errorf("failed to parse gh run view output: %w", err)
|
||||
}
|
||||
return &run, nil
|
||||
}
|
||||
|
||||
// dispatchWorkflow fires a workflow_dispatch event for the given workflow with
|
||||
// the supplied string inputs.
|
||||
func dispatchWorkflow(repo, workflowFile string, inputs map[string]string) error {
|
||||
args := []string{"workflow", "run", workflowFile, "-R", repo}
|
||||
for k, v := range inputs {
|
||||
args = append(args, "-f", fmt.Sprintf("%s=%s", k, v))
|
||||
}
|
||||
cmd := exec.Command("gh", args...)
|
||||
output, err := cmd.CombinedOutput()
|
||||
if err != nil {
|
||||
return fmt.Errorf("gh workflow run failed: %w: %s", err, string(output))
|
||||
}
|
||||
return nil
|
||||
}
|
||||
34
tools/ods/cmd/dev.go
Normal file
34
tools/ods/cmd/dev.go
Normal file
@@ -0,0 +1,34 @@
|
||||
package cmd
|
||||
|
||||
import (
|
||||
"github.com/spf13/cobra"
|
||||
)
|
||||
|
||||
// NewDevCommand creates the parent dev command for devcontainer operations.
|
||||
func NewDevCommand() *cobra.Command {
|
||||
cmd := &cobra.Command{
|
||||
Use: "dev",
|
||||
Aliases: []string{"dc"},
|
||||
Short: "Manage the devcontainer",
|
||||
Long: `Manage the Onyx devcontainer.
|
||||
|
||||
Wraps the devcontainer CLI with workspace-aware defaults.
|
||||
|
||||
Commands:
|
||||
up Start the devcontainer
|
||||
into Open a shell inside the running devcontainer
|
||||
exec Run a command inside the devcontainer
|
||||
restart Remove and recreate the devcontainer
|
||||
rebuild Pull the latest image and recreate
|
||||
stop Stop the running devcontainer`,
|
||||
}
|
||||
|
||||
cmd.AddCommand(newDevUpCommand())
|
||||
cmd.AddCommand(newDevIntoCommand())
|
||||
cmd.AddCommand(newDevExecCommand())
|
||||
cmd.AddCommand(newDevRestartCommand())
|
||||
cmd.AddCommand(newDevRebuildCommand())
|
||||
cmd.AddCommand(newDevStopCommand())
|
||||
|
||||
return cmd
|
||||
}
|
||||
29
tools/ods/cmd/dev_exec.go
Normal file
29
tools/ods/cmd/dev_exec.go
Normal file
@@ -0,0 +1,29 @@
|
||||
package cmd
|
||||
|
||||
import (
|
||||
"github.com/spf13/cobra"
|
||||
)
|
||||
|
||||
func newDevExecCommand() *cobra.Command {
|
||||
cmd := &cobra.Command{
|
||||
Use: "exec [--] <command> [args...]",
|
||||
Short: "Run a command inside the devcontainer",
|
||||
Long: `Run an arbitrary command inside the running devcontainer.
|
||||
All arguments are treated as positional (flags like -it are passed through).
|
||||
|
||||
Examples:
|
||||
ods dev exec npm test
|
||||
ods dev exec -- ls -la
|
||||
ods dev exec -it echo hello`,
|
||||
Args: cobra.MinimumNArgs(1),
|
||||
DisableFlagParsing: true,
|
||||
Run: func(cmd *cobra.Command, args []string) {
|
||||
if len(args) > 0 && args[0] == "--" {
|
||||
args = args[1:]
|
||||
}
|
||||
runDevExec(args)
|
||||
},
|
||||
}
|
||||
|
||||
return cmd
|
||||
}
|
||||
53
tools/ods/cmd/dev_into.go
Normal file
53
tools/ods/cmd/dev_into.go
Normal file
@@ -0,0 +1,53 @@
|
||||
package cmd
|
||||
|
||||
import (
|
||||
"os"
|
||||
"os/exec"
|
||||
|
||||
log "github.com/sirupsen/logrus"
|
||||
"github.com/spf13/cobra"
|
||||
|
||||
"github.com/onyx-dot-app/onyx/tools/ods/internal/paths"
|
||||
)
|
||||
|
||||
func newDevIntoCommand() *cobra.Command {
|
||||
cmd := &cobra.Command{
|
||||
Use: "into",
|
||||
Short: "Open a shell inside the running devcontainer",
|
||||
Long: `Open an interactive zsh shell inside the running devcontainer.
|
||||
|
||||
Examples:
|
||||
ods dev into`,
|
||||
Run: func(cmd *cobra.Command, args []string) {
|
||||
runDevExec([]string{"zsh"})
|
||||
},
|
||||
}
|
||||
|
||||
return cmd
|
||||
}
|
||||
|
||||
// runDevExec executes "devcontainer exec --workspace-folder <root> <command...>".
|
||||
func runDevExec(command []string) {
|
||||
checkDevcontainerCLI()
|
||||
ensureDockerSock()
|
||||
ensureRemoteUser()
|
||||
|
||||
root, err := paths.GitRoot()
|
||||
if err != nil {
|
||||
log.Fatalf("Failed to find git root: %v", err)
|
||||
}
|
||||
|
||||
args := []string{"exec", "--workspace-folder", root}
|
||||
args = append(args, command...)
|
||||
|
||||
log.Debugf("Running: devcontainer %v", args)
|
||||
|
||||
c := exec.Command("devcontainer", args...)
|
||||
c.Stdout = os.Stdout
|
||||
c.Stderr = os.Stderr
|
||||
c.Stdin = os.Stdin
|
||||
|
||||
if err := c.Run(); err != nil {
|
||||
log.Fatalf("devcontainer exec failed: %v", err)
|
||||
}
|
||||
}
|
||||
41
tools/ods/cmd/dev_rebuild.go
Normal file
41
tools/ods/cmd/dev_rebuild.go
Normal file
@@ -0,0 +1,41 @@
|
||||
package cmd
|
||||
|
||||
import (
|
||||
"os"
|
||||
"os/exec"
|
||||
|
||||
log "github.com/sirupsen/logrus"
|
||||
"github.com/spf13/cobra"
|
||||
)
|
||||
|
||||
func newDevRebuildCommand() *cobra.Command {
|
||||
cmd := &cobra.Command{
|
||||
Use: "rebuild",
|
||||
Short: "Pull the latest devcontainer image and recreate",
|
||||
Long: `Pull the latest devcontainer image and recreate the container.
|
||||
|
||||
Use after the published image has been updated or after changing devcontainer.json.
|
||||
|
||||
Examples:
|
||||
ods dev rebuild`,
|
||||
Run: func(cmd *cobra.Command, args []string) {
|
||||
runDevRebuild()
|
||||
},
|
||||
}
|
||||
|
||||
return cmd
|
||||
}
|
||||
|
||||
func runDevRebuild() {
|
||||
image := devcontainerImage()
|
||||
|
||||
log.Infof("Pulling %s...", image)
|
||||
pull := exec.Command("docker", "pull", image)
|
||||
pull.Stdout = os.Stdout
|
||||
pull.Stderr = os.Stderr
|
||||
if err := pull.Run(); err != nil {
|
||||
log.Warnf("Failed to pull image (continuing with local copy): %v", err)
|
||||
}
|
||||
|
||||
runDevcontainer("up", []string{"--remove-existing-container"})
|
||||
}
|
||||
23
tools/ods/cmd/dev_restart.go
Normal file
23
tools/ods/cmd/dev_restart.go
Normal file
@@ -0,0 +1,23 @@
|
||||
package cmd
|
||||
|
||||
import (
|
||||
"github.com/spf13/cobra"
|
||||
)
|
||||
|
||||
func newDevRestartCommand() *cobra.Command {
|
||||
cmd := &cobra.Command{
|
||||
Use: "restart",
|
||||
Short: "Remove and recreate the devcontainer",
|
||||
Long: `Remove the existing devcontainer and recreate it.
|
||||
|
||||
Uses the cached image — for a full image rebuild, use "ods dev rebuild".
|
||||
|
||||
Examples:
|
||||
ods dev restart`,
|
||||
Run: func(cmd *cobra.Command, args []string) {
|
||||
runDevcontainer("up", []string{"--remove-existing-container"})
|
||||
},
|
||||
}
|
||||
|
||||
return cmd
|
||||
}
|
||||
56
tools/ods/cmd/dev_stop.go
Normal file
56
tools/ods/cmd/dev_stop.go
Normal file
@@ -0,0 +1,56 @@
|
||||
package cmd
|
||||
|
||||
import (
|
||||
"os/exec"
|
||||
"strings"
|
||||
|
||||
log "github.com/sirupsen/logrus"
|
||||
"github.com/spf13/cobra"
|
||||
|
||||
"github.com/onyx-dot-app/onyx/tools/ods/internal/paths"
|
||||
)
|
||||
|
||||
func newDevStopCommand() *cobra.Command {
|
||||
cmd := &cobra.Command{
|
||||
Use: "stop",
|
||||
Short: "Stop the running devcontainer",
|
||||
Long: `Stop the running devcontainer.
|
||||
|
||||
Examples:
|
||||
ods dev stop`,
|
||||
Run: func(cmd *cobra.Command, args []string) {
|
||||
runDevStop()
|
||||
},
|
||||
}
|
||||
|
||||
return cmd
|
||||
}
|
||||
|
||||
func runDevStop() {
|
||||
root, err := paths.GitRoot()
|
||||
if err != nil {
|
||||
log.Fatalf("Failed to find git root: %v", err)
|
||||
}
|
||||
|
||||
// Find the container by the devcontainer label
|
||||
out, err := exec.Command(
|
||||
"docker", "ps", "-q",
|
||||
"--filter", "label=devcontainer.local_folder="+root,
|
||||
).Output()
|
||||
if err != nil {
|
||||
log.Fatalf("Failed to find devcontainer: %v", err)
|
||||
}
|
||||
|
||||
containerID := strings.TrimSpace(string(out))
|
||||
if containerID == "" {
|
||||
log.Info("No running devcontainer found")
|
||||
return
|
||||
}
|
||||
|
||||
log.Infof("Stopping devcontainer %s...", containerID)
|
||||
c := exec.Command("docker", "stop", containerID)
|
||||
if err := c.Run(); err != nil {
|
||||
log.Fatalf("Failed to stop devcontainer: %v", err)
|
||||
}
|
||||
log.Info("Devcontainer stopped")
|
||||
}
|
||||
223
tools/ods/cmd/dev_up.go
Normal file
223
tools/ods/cmd/dev_up.go
Normal file
@@ -0,0 +1,223 @@
|
||||
package cmd
|
||||
|
||||
import (
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"os"
|
||||
"os/exec"
|
||||
"path/filepath"
|
||||
"runtime"
|
||||
"strings"
|
||||
|
||||
log "github.com/sirupsen/logrus"
|
||||
"github.com/spf13/cobra"
|
||||
|
||||
"github.com/onyx-dot-app/onyx/tools/ods/internal/paths"
|
||||
)
|
||||
|
||||
func newDevUpCommand() *cobra.Command {
|
||||
cmd := &cobra.Command{
|
||||
Use: "up",
|
||||
Short: "Start the devcontainer",
|
||||
Long: `Start the devcontainer, pulling the image if needed.
|
||||
|
||||
Examples:
|
||||
ods dev up`,
|
||||
Run: func(cmd *cobra.Command, args []string) {
|
||||
runDevcontainer("up", nil)
|
||||
},
|
||||
}
|
||||
|
||||
return cmd
|
||||
}
|
||||
|
||||
// devcontainerImage reads the image field from .devcontainer/devcontainer.json.
|
||||
func devcontainerImage() string {
|
||||
root, err := paths.GitRoot()
|
||||
if err != nil {
|
||||
log.Fatalf("Failed to find git root: %v", err)
|
||||
}
|
||||
|
||||
data, err := os.ReadFile(filepath.Join(root, ".devcontainer", "devcontainer.json"))
|
||||
if err != nil {
|
||||
log.Fatalf("Failed to read devcontainer.json: %v", err)
|
||||
}
|
||||
|
||||
var cfg struct {
|
||||
Image string `json:"image"`
|
||||
}
|
||||
if err := json.Unmarshal(data, &cfg); err != nil {
|
||||
log.Fatalf("Failed to parse devcontainer.json: %v", err)
|
||||
}
|
||||
if cfg.Image == "" {
|
||||
log.Fatal("No image field in devcontainer.json")
|
||||
}
|
||||
return cfg.Image
|
||||
}
|
||||
|
||||
// checkDevcontainerCLI ensures the devcontainer CLI is installed.
|
||||
func checkDevcontainerCLI() {
|
||||
if _, err := exec.LookPath("devcontainer"); err != nil {
|
||||
log.Fatal("devcontainer CLI is not installed. Install it with: npm install -g @devcontainers/cli")
|
||||
}
|
||||
}
|
||||
|
||||
// ensureDockerSock sets the DOCKER_SOCK environment variable if not already set.
|
||||
// Used by ensureRemoteUser to detect rootless Docker.
|
||||
func ensureDockerSock() {
|
||||
if os.Getenv("DOCKER_SOCK") != "" {
|
||||
return
|
||||
}
|
||||
|
||||
sock := detectDockerSock()
|
||||
if err := os.Setenv("DOCKER_SOCK", sock); err != nil {
|
||||
log.Fatalf("Failed to set DOCKER_SOCK: %v", err)
|
||||
}
|
||||
}
|
||||
|
||||
// detectDockerSock returns the path to the Docker socket on the host.
|
||||
func detectDockerSock() string {
|
||||
// Prefer explicit DOCKER_HOST (strip unix:// prefix if present).
|
||||
if dh := os.Getenv("DOCKER_HOST"); dh != "" {
|
||||
const prefix = "unix://"
|
||||
if len(dh) > len(prefix) && dh[:len(prefix)] == prefix {
|
||||
return dh[len(prefix):]
|
||||
}
|
||||
// Only bare paths (starting with /) are valid socket paths.
|
||||
// Non-unix schemes (e.g. tcp://) can't be bind-mounted.
|
||||
if len(dh) > 0 && dh[0] == '/' {
|
||||
return dh
|
||||
}
|
||||
log.Warnf("DOCKER_HOST=%q is not a unix socket path; falling back to local socket detection", dh)
|
||||
}
|
||||
|
||||
// Linux rootless Docker: $XDG_RUNTIME_DIR/docker.sock
|
||||
if runtime.GOOS == "linux" {
|
||||
if xdg := os.Getenv("XDG_RUNTIME_DIR"); xdg != "" {
|
||||
sock := filepath.Join(xdg, "docker.sock")
|
||||
if _, err := os.Stat(sock); err == nil {
|
||||
return sock
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// macOS Docker Desktop: ~/.docker/run/docker.sock
|
||||
if runtime.GOOS == "darwin" {
|
||||
if home, err := os.UserHomeDir(); err == nil {
|
||||
sock := filepath.Join(home, ".docker", "run", "docker.sock")
|
||||
if _, err := os.Stat(sock); err == nil {
|
||||
return sock
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Fallback: standard socket path (Linux with standard Docker, macOS symlink)
|
||||
return "/var/run/docker.sock"
|
||||
}
|
||||
|
||||
// worktreeGitMount returns a --mount flag value that makes a git worktree's
|
||||
// .git reference resolve inside the container. In a worktree, .git is a file
|
||||
// containing "gitdir: /path/to/main/.git/worktrees/<name>", so we need the
|
||||
// main repo's .git directory to exist at the same absolute host path inside
|
||||
// the container.
|
||||
//
|
||||
// Returns ("", false) when the workspace is not a worktree.
|
||||
func worktreeGitMount(root string) (string, bool) {
|
||||
dotgit := filepath.Join(root, ".git")
|
||||
info, err := os.Lstat(dotgit)
|
||||
if err != nil || info.IsDir() {
|
||||
return "", false // regular repo or no .git
|
||||
}
|
||||
|
||||
// .git is a file — parse the gitdir path.
|
||||
out, err := exec.Command("git", "-C", root, "rev-parse", "--git-common-dir").Output()
|
||||
if err != nil {
|
||||
log.Warnf("Failed to detect git common dir: %v", err)
|
||||
return "", false
|
||||
}
|
||||
commonDir := strings.TrimSpace(string(out))
|
||||
|
||||
// Resolve to absolute path.
|
||||
if !filepath.IsAbs(commonDir) {
|
||||
commonDir = filepath.Join(root, commonDir)
|
||||
}
|
||||
commonDir, _ = filepath.EvalSymlinks(commonDir)
|
||||
|
||||
mount := fmt.Sprintf("type=bind,source=%s,target=%s", commonDir, commonDir)
|
||||
log.Debugf("Worktree detected — mounting main .git: %s", commonDir)
|
||||
return mount, true
|
||||
}
|
||||
|
||||
// sshAgentMount returns a --mount flag value that forwards the host's SSH agent
|
||||
// socket into the container. Returns ("", false) when SSH_AUTH_SOCK is unset or
|
||||
// the socket is not accessible.
|
||||
func sshAgentMount() (string, bool) {
|
||||
sock := os.Getenv("SSH_AUTH_SOCK")
|
||||
if sock == "" {
|
||||
log.Debug("SSH_AUTH_SOCK not set — skipping SSH agent forwarding")
|
||||
return "", false
|
||||
}
|
||||
if _, err := os.Stat(sock); err != nil {
|
||||
log.Debugf("SSH_AUTH_SOCK=%s not accessible: %v", sock, err)
|
||||
return "", false
|
||||
}
|
||||
mount := fmt.Sprintf("type=bind,source=%s,target=/tmp/ssh-agent.sock", sock)
|
||||
log.Debugf("Forwarding SSH agent: %s", sock)
|
||||
return mount, true
|
||||
}
|
||||
|
||||
// ensureRemoteUser sets DEVCONTAINER_REMOTE_USER when rootless Docker is
|
||||
// detected. Container root maps to the host user in rootless mode, so running
|
||||
// as root inside the container avoids the UID mismatch on new files.
|
||||
// Must be called after ensureDockerSock.
|
||||
func ensureRemoteUser() {
|
||||
if os.Getenv("DEVCONTAINER_REMOTE_USER") != "" {
|
||||
return
|
||||
}
|
||||
|
||||
if runtime.GOOS == "linux" {
|
||||
sock := os.Getenv("DOCKER_SOCK")
|
||||
xdg := os.Getenv("XDG_RUNTIME_DIR")
|
||||
// Heuristic: rootless Docker on Linux typically places its socket
|
||||
// under $XDG_RUNTIME_DIR. If DOCKER_SOCK was set to a custom path
|
||||
// outside XDG_RUNTIME_DIR, set DEVCONTAINER_REMOTE_USER=root manually.
|
||||
if xdg != "" && strings.HasPrefix(sock, xdg) {
|
||||
log.Debug("Rootless Docker detected — setting DEVCONTAINER_REMOTE_USER=root")
|
||||
if err := os.Setenv("DEVCONTAINER_REMOTE_USER", "root"); err != nil {
|
||||
log.Warnf("Failed to set DEVCONTAINER_REMOTE_USER: %v", err)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// runDevcontainer executes "devcontainer <action> --workspace-folder <root> [extraArgs...]".
|
||||
func runDevcontainer(action string, extraArgs []string) {
|
||||
checkDevcontainerCLI()
|
||||
ensureDockerSock()
|
||||
ensureRemoteUser()
|
||||
|
||||
root, err := paths.GitRoot()
|
||||
if err != nil {
|
||||
log.Fatalf("Failed to find git root: %v", err)
|
||||
}
|
||||
|
||||
args := []string{action, "--workspace-folder", root}
|
||||
if mount, ok := worktreeGitMount(root); ok {
|
||||
args = append(args, "--mount", mount)
|
||||
}
|
||||
if mount, ok := sshAgentMount(); ok {
|
||||
args = append(args, "--mount", mount)
|
||||
}
|
||||
args = append(args, extraArgs...)
|
||||
|
||||
log.Debugf("Running: devcontainer %v", args)
|
||||
|
||||
c := exec.Command("devcontainer", args...)
|
||||
c.Stdout = os.Stdout
|
||||
c.Stderr = os.Stderr
|
||||
c.Stdin = os.Stdin
|
||||
|
||||
if err := c.Run(); err != nil {
|
||||
log.Fatalf("devcontainer %s failed: %v", action, err)
|
||||
}
|
||||
}
|
||||
@@ -45,6 +45,7 @@ func NewRootCommand() *cobra.Command {
|
||||
cmd.AddCommand(NewCheckLazyImportsCommand())
|
||||
cmd.AddCommand(NewCherryPickCommand())
|
||||
cmd.AddCommand(NewDBCommand())
|
||||
cmd.AddCommand(NewDeployCommand())
|
||||
cmd.AddCommand(NewOpenAPICommand())
|
||||
cmd.AddCommand(NewComposeCommand())
|
||||
cmd.AddCommand(NewLogsCommand())
|
||||
@@ -52,6 +53,7 @@ func NewRootCommand() *cobra.Command {
|
||||
cmd.AddCommand(NewRunCICommand())
|
||||
cmd.AddCommand(NewScreenshotDiffCommand())
|
||||
cmd.AddCommand(NewDesktopCommand())
|
||||
cmd.AddCommand(NewDevCommand())
|
||||
cmd.AddCommand(NewWebCommand())
|
||||
cmd.AddCommand(NewLatestStableTagCommand())
|
||||
cmd.AddCommand(NewWhoisCommand())
|
||||
|
||||
56
tools/ods/internal/config/config.go
Normal file
56
tools/ods/internal/config/config.go
Normal file
@@ -0,0 +1,56 @@
|
||||
package config
|
||||
|
||||
import (
|
||||
"encoding/json"
|
||||
"errors"
|
||||
"fmt"
|
||||
"os"
|
||||
|
||||
"github.com/onyx-dot-app/onyx/tools/ods/internal/paths"
|
||||
)
|
||||
|
||||
// DeployEdgeConfig holds the persisted settings for `ods deploy edge`.
|
||||
type DeployEdgeConfig struct {
|
||||
TargetRepo string `json:"target_repo,omitempty"`
|
||||
TargetWorkflow string `json:"target_workflow,omitempty"`
|
||||
}
|
||||
|
||||
// Config is the top-level on-disk schema for ~/.config/onyx-dev/config.json.
|
||||
// New per-command sections should be added as additional fields.
|
||||
type Config struct {
|
||||
DeployEdge DeployEdgeConfig `json:"deploy_edge,omitempty"`
|
||||
}
|
||||
|
||||
// Load reads the config file. Returns a zero-valued Config if the file does
|
||||
// not exist (a fresh first-run state, not an error).
|
||||
func Load() (*Config, error) {
|
||||
path := paths.ConfigFilePath()
|
||||
data, err := os.ReadFile(path)
|
||||
if err != nil {
|
||||
if errors.Is(err, os.ErrNotExist) {
|
||||
return &Config{}, nil
|
||||
}
|
||||
return nil, fmt.Errorf("failed to read config file %s: %w", path, err)
|
||||
}
|
||||
var cfg Config
|
||||
if err := json.Unmarshal(data, &cfg); err != nil {
|
||||
return nil, fmt.Errorf("failed to parse config file %s: %w", path, err)
|
||||
}
|
||||
return &cfg, nil
|
||||
}
|
||||
|
||||
// Save persists the config to disk, creating the parent directory if needed.
|
||||
func Save(cfg *Config) error {
|
||||
if err := paths.EnsureConfigDir(); err != nil {
|
||||
return fmt.Errorf("failed to create config directory: %w", err)
|
||||
}
|
||||
data, err := json.MarshalIndent(cfg, "", " ")
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to marshal config: %w", err)
|
||||
}
|
||||
path := paths.ConfigFilePath()
|
||||
if err := os.WriteFile(path, data, 0644); err != nil {
|
||||
return fmt.Errorf("failed to write config file %s: %w", path, err)
|
||||
}
|
||||
return nil
|
||||
}
|
||||
@@ -47,6 +47,43 @@ func DataDir() string {
|
||||
return filepath.Join(base, "onyx-dev")
|
||||
}
|
||||
|
||||
// ConfigDir returns the per-user config directory for onyx-dev tools.
|
||||
// On Linux/macOS: ~/.config/onyx-dev/ (respects XDG_CONFIG_HOME)
|
||||
// On Windows: %APPDATA%/onyx-dev/
|
||||
func ConfigDir() string {
|
||||
var base string
|
||||
if runtime.GOOS == "windows" {
|
||||
base = os.Getenv("APPDATA")
|
||||
if base == "" {
|
||||
base = os.Getenv("USERPROFILE")
|
||||
if base == "" {
|
||||
log.Fatalf("Cannot determine config directory: APPDATA and USERPROFILE are not set")
|
||||
}
|
||||
base = filepath.Join(base, "AppData", "Roaming")
|
||||
}
|
||||
} else {
|
||||
base = os.Getenv("XDG_CONFIG_HOME")
|
||||
if base == "" {
|
||||
home, err := os.UserHomeDir()
|
||||
if err != nil || home == "" {
|
||||
log.Fatalf("Cannot determine config directory: XDG_CONFIG_HOME not set and home directory unknown: %v", err)
|
||||
}
|
||||
base = filepath.Join(home, ".config")
|
||||
}
|
||||
}
|
||||
return filepath.Join(base, "onyx-dev")
|
||||
}
|
||||
|
||||
// ConfigFilePath returns the path to the ods config file.
|
||||
func ConfigFilePath() string {
|
||||
return filepath.Join(ConfigDir(), "config.json")
|
||||
}
|
||||
|
||||
// EnsureConfigDir creates the config directory if it doesn't exist.
|
||||
func EnsureConfigDir() error {
|
||||
return os.MkdirAll(ConfigDir(), 0755)
|
||||
}
|
||||
|
||||
// SnapshotsDir returns the directory for database snapshots.
|
||||
func SnapshotsDir() string {
|
||||
return filepath.Join(DataDir(), "snapshots")
|
||||
|
||||
@@ -12,6 +12,23 @@ import (
|
||||
// reader is the input reader, can be replaced for testing
|
||||
var reader = bufio.NewReader(os.Stdin)
|
||||
|
||||
// String prompts the user for a free-form line of input. Re-prompts until a
|
||||
// non-empty value is entered.
|
||||
func String(prompt string) string {
|
||||
for {
|
||||
fmt.Print(prompt)
|
||||
response, err := reader.ReadString('\n')
|
||||
if err != nil {
|
||||
log.Fatalf("Failed to read input: %v", err)
|
||||
}
|
||||
response = strings.TrimSpace(response)
|
||||
if response != "" {
|
||||
return response
|
||||
}
|
||||
fmt.Println("Value cannot be empty.")
|
||||
}
|
||||
}
|
||||
|
||||
// Confirm prompts the user with a yes/no question and returns true for yes, false for no.
|
||||
// It will keep prompting until a valid response is given.
|
||||
// Empty input (just pressing Enter) defaults to yes.
|
||||
|
||||
486
uv.lock
generated
486
uv.lock
generated
@@ -14,12 +14,6 @@ resolution-markers = [
|
||||
"python_full_version < '3.12' and sys_platform != 'win32'",
|
||||
]
|
||||
|
||||
[manifest]
|
||||
members = [
|
||||
"onyx",
|
||||
"onyx-backend",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "accelerate"
|
||||
version = "1.6.0"
|
||||
@@ -4234,7 +4228,7 @@ dependencies = [
|
||||
{ name = "voyageai" },
|
||||
]
|
||||
|
||||
[package.optional-dependencies]
|
||||
[package.dev-dependencies]
|
||||
backend = [
|
||||
{ name = "aiohttp" },
|
||||
{ name = "alembic" },
|
||||
@@ -4388,195 +4382,191 @@ model-server = [
|
||||
|
||||
[package.metadata]
|
||||
requires-dist = [
|
||||
{ name = "accelerate", marker = "extra == 'model-server'", specifier = "==1.6.0" },
|
||||
{ name = "agent-client-protocol", specifier = ">=0.7.1" },
|
||||
{ name = "aioboto3", specifier = "==15.1.0" },
|
||||
{ name = "aiohttp", marker = "extra == 'backend'", specifier = "==3.13.4" },
|
||||
{ name = "alembic", marker = "extra == 'backend'", specifier = "==1.10.4" },
|
||||
{ name = "asana", marker = "extra == 'backend'", specifier = "==5.0.8" },
|
||||
{ name = "asyncpg", marker = "extra == 'backend'", specifier = "==0.30.0" },
|
||||
{ name = "atlassian-python-api", marker = "extra == 'backend'", specifier = "==3.41.16" },
|
||||
{ name = "azure-cognitiveservices-speech", marker = "extra == 'backend'", specifier = "==1.38.0" },
|
||||
{ name = "beautifulsoup4", marker = "extra == 'backend'", specifier = "==4.12.3" },
|
||||
{ name = "black", marker = "extra == 'dev'", specifier = "==25.1.0" },
|
||||
{ name = "boto3", marker = "extra == 'backend'", specifier = "==1.39.11" },
|
||||
{ name = "boto3-stubs", extras = ["s3"], marker = "extra == 'backend'", specifier = "==1.39.11" },
|
||||
{ name = "braintrust", marker = "extra == 'backend'", specifier = "==0.3.9" },
|
||||
{ name = "brotli", specifier = ">=1.2.0" },
|
||||
{ name = "celery", marker = "extra == 'backend'", specifier = "==5.5.1" },
|
||||
{ name = "celery-types", marker = "extra == 'dev'", specifier = "==0.19.0" },
|
||||
{ name = "chardet", marker = "extra == 'backend'", specifier = "==5.2.0" },
|
||||
{ name = "chonkie", marker = "extra == 'backend'", specifier = "==1.0.10" },
|
||||
{ name = "claude-agent-sdk", specifier = ">=0.1.19" },
|
||||
{ name = "cohere", specifier = "==5.6.1" },
|
||||
{ name = "dask", marker = "extra == 'backend'", specifier = "==2026.1.1" },
|
||||
{ name = "ddtrace", marker = "extra == 'backend'", specifier = "==3.10.0" },
|
||||
{ name = "discord-py", specifier = "==2.4.0" },
|
||||
{ name = "discord-py", marker = "extra == 'backend'", specifier = "==2.4.0" },
|
||||
{ name = "distributed", marker = "extra == 'backend'", specifier = "==2026.1.1" },
|
||||
{ name = "dropbox", marker = "extra == 'backend'", specifier = "==12.0.2" },
|
||||
{ name = "einops", marker = "extra == 'model-server'", specifier = "==0.8.1" },
|
||||
{ name = "exa-py", marker = "extra == 'backend'", specifier = "==1.15.4" },
|
||||
{ name = "faker", marker = "extra == 'dev'", specifier = "==40.1.2" },
|
||||
{ name = "fastapi", specifier = "==0.133.1" },
|
||||
{ name = "fastapi-limiter", marker = "extra == 'backend'", specifier = "==0.1.6" },
|
||||
{ name = "fastapi-users", marker = "extra == 'backend'", specifier = "==15.0.4" },
|
||||
{ name = "fastapi-users-db-sqlalchemy", marker = "extra == 'backend'", specifier = "==7.0.0" },
|
||||
{ name = "fastmcp", marker = "extra == 'backend'", specifier = "==3.2.0" },
|
||||
{ name = "filelock", marker = "extra == 'backend'", specifier = "==3.20.3" },
|
||||
{ name = "google-api-python-client", marker = "extra == 'backend'", specifier = "==2.86.0" },
|
||||
{ name = "google-auth-httplib2", marker = "extra == 'backend'", specifier = "==0.1.0" },
|
||||
{ name = "google-auth-oauthlib", marker = "extra == 'backend'", specifier = "==1.0.0" },
|
||||
{ name = "google-genai", specifier = "==1.52.0" },
|
||||
{ name = "hatchling", marker = "extra == 'dev'", specifier = "==1.28.0" },
|
||||
{ name = "httpcore", marker = "extra == 'backend'", specifier = "==1.0.9" },
|
||||
{ name = "httpx", extras = ["http2"], marker = "extra == 'backend'", specifier = "==0.28.1" },
|
||||
{ name = "httpx-oauth", marker = "extra == 'backend'", specifier = "==0.15.1" },
|
||||
{ name = "hubspot-api-client", marker = "extra == 'backend'", specifier = "==11.1.0" },
|
||||
{ name = "huggingface-hub", marker = "extra == 'backend'", specifier = "==0.35.3" },
|
||||
{ name = "inflection", marker = "extra == 'backend'", specifier = "==0.5.1" },
|
||||
{ name = "ipykernel", marker = "extra == 'dev'", specifier = "==6.29.5" },
|
||||
{ name = "jira", marker = "extra == 'backend'", specifier = "==3.10.5" },
|
||||
{ name = "jsonref", marker = "extra == 'backend'", specifier = "==1.1.0" },
|
||||
{ name = "kubernetes", specifier = ">=31.0.0" },
|
||||
{ name = "kubernetes", marker = "extra == 'backend'", specifier = "==31.0.0" },
|
||||
{ name = "langchain-core", marker = "extra == 'backend'", specifier = "==1.2.22" },
|
||||
{ name = "langfuse", marker = "extra == 'backend'", specifier = "==3.10.0" },
|
||||
{ name = "lazy-imports", marker = "extra == 'backend'", specifier = "==1.0.1" },
|
||||
{ name = "litellm", specifier = "==1.81.6" },
|
||||
{ name = "lxml", marker = "extra == 'backend'", specifier = "==5.3.0" },
|
||||
{ name = "mako", marker = "extra == 'backend'", specifier = "==1.2.4" },
|
||||
{ name = "manygo", marker = "extra == 'dev'", specifier = "==0.2.0" },
|
||||
{ name = "markitdown", extras = ["pdf", "docx", "pptx", "xlsx", "xls"], marker = "extra == 'backend'", specifier = "==0.1.2" },
|
||||
{ name = "matplotlib", marker = "extra == 'dev'", specifier = "==3.10.8" },
|
||||
{ name = "mcp", extras = ["cli"], marker = "extra == 'backend'", specifier = "==1.26.0" },
|
||||
{ name = "mistune", marker = "extra == 'backend'", specifier = "==3.2.0" },
|
||||
{ name = "msal", marker = "extra == 'backend'", specifier = "==1.34.0" },
|
||||
{ name = "msoffcrypto-tool", marker = "extra == 'backend'", specifier = "==5.4.2" },
|
||||
{ name = "mypy", marker = "extra == 'dev'", specifier = "==1.13.0" },
|
||||
{ name = "mypy-extensions", marker = "extra == 'dev'", specifier = "==1.0.0" },
|
||||
{ name = "nest-asyncio", marker = "extra == 'backend'", specifier = "==1.6.0" },
|
||||
{ name = "numpy", marker = "extra == 'model-server'", specifier = "==2.4.1" },
|
||||
{ name = "oauthlib", marker = "extra == 'backend'", specifier = "==3.2.2" },
|
||||
{ name = "office365-rest-python-client", marker = "extra == 'backend'", specifier = "==2.6.2" },
|
||||
{ name = "onyx-devtools", marker = "extra == 'dev'", specifier = "==0.7.2" },
|
||||
{ name = "openai", specifier = "==2.14.0" },
|
||||
{ name = "openapi-generator-cli", marker = "extra == 'dev'", specifier = "==7.17.0" },
|
||||
{ name = "openinference-instrumentation", marker = "extra == 'backend'", specifier = "==0.1.42" },
|
||||
{ name = "openpyxl", marker = "extra == 'backend'", specifier = "==3.0.10" },
|
||||
{ name = "opensearch-py", marker = "extra == 'backend'", specifier = "==3.0.0" },
|
||||
{ name = "opentelemetry-proto", marker = "extra == 'backend'", specifier = ">=1.39.0" },
|
||||
{ name = "pandas-stubs", marker = "extra == 'dev'", specifier = "~=2.3.3" },
|
||||
{ name = "passlib", marker = "extra == 'backend'", specifier = "==1.7.4" },
|
||||
{ name = "playwright", marker = "extra == 'backend'", specifier = "==1.55.0" },
|
||||
{ name = "posthog", marker = "extra == 'ee'", specifier = "==3.7.4" },
|
||||
{ name = "pre-commit", marker = "extra == 'dev'", specifier = "==3.2.2" },
|
||||
{ name = "prometheus-client", specifier = ">=0.21.1" },
|
||||
{ name = "prometheus-fastapi-instrumentator", specifier = "==7.1.0" },
|
||||
{ name = "psutil", marker = "extra == 'backend'", specifier = "==7.1.3" },
|
||||
{ name = "psycopg2-binary", marker = "extra == 'backend'", specifier = "==2.9.9" },
|
||||
{ name = "puremagic", marker = "extra == 'backend'", specifier = "==1.28" },
|
||||
{ name = "pyairtable", marker = "extra == 'backend'", specifier = "==3.0.1" },
|
||||
{ name = "pycryptodome", marker = "extra == 'backend'", specifier = "==3.19.1" },
|
||||
{ name = "pydantic", specifier = "==2.11.7" },
|
||||
{ name = "pygithub", marker = "extra == 'backend'", specifier = "==2.5.0" },
|
||||
{ name = "pympler", marker = "extra == 'backend'", specifier = "==1.1" },
|
||||
{ name = "pypandoc-binary", marker = "extra == 'backend'", specifier = "==1.16.2" },
|
||||
{ name = "pypdf", marker = "extra == 'backend'", specifier = "==6.9.2" },
|
||||
{ name = "pytest", marker = "extra == 'dev'", specifier = "==8.3.5" },
|
||||
{ name = "pytest-alembic", marker = "extra == 'dev'", specifier = "==0.12.1" },
|
||||
{ name = "pytest-asyncio", marker = "extra == 'dev'", specifier = "==1.3.0" },
|
||||
{ name = "pytest-dotenv", marker = "extra == 'dev'", specifier = "==0.5.2" },
|
||||
{ name = "pytest-mock", marker = "extra == 'backend'", specifier = "==3.12.0" },
|
||||
{ name = "pytest-playwright", marker = "extra == 'backend'", specifier = "==0.7.0" },
|
||||
{ name = "pytest-repeat", marker = "extra == 'dev'", specifier = "==0.9.4" },
|
||||
{ name = "pytest-xdist", marker = "extra == 'dev'", specifier = "==3.8.0" },
|
||||
{ name = "python-dateutil", marker = "extra == 'backend'", specifier = "==2.8.2" },
|
||||
{ name = "python-docx", marker = "extra == 'backend'", specifier = "==1.1.2" },
|
||||
{ name = "python-dotenv", marker = "extra == 'backend'", specifier = "==1.1.1" },
|
||||
{ name = "python-gitlab", marker = "extra == 'backend'", specifier = "==5.6.0" },
|
||||
{ name = "python-multipart", marker = "extra == 'backend'", specifier = "==0.0.22" },
|
||||
{ name = "python-pptx", marker = "extra == 'backend'", specifier = "==0.6.23" },
|
||||
{ name = "python3-saml", marker = "extra == 'backend'", specifier = "==1.15.0" },
|
||||
{ name = "pywikibot", marker = "extra == 'backend'", specifier = "==9.0.0" },
|
||||
{ name = "rapidfuzz", marker = "extra == 'backend'", specifier = "==3.13.0" },
|
||||
{ name = "redis", marker = "extra == 'backend'", specifier = "==5.0.8" },
|
||||
{ name = "release-tag", marker = "extra == 'dev'", specifier = "==0.5.2" },
|
||||
{ name = "reorder-python-imports-black", marker = "extra == 'dev'", specifier = "==3.14.0" },
|
||||
{ name = "requests", marker = "extra == 'backend'", specifier = "==2.33.0" },
|
||||
{ name = "requests-oauthlib", marker = "extra == 'backend'", specifier = "==1.3.1" },
|
||||
{ name = "retry", specifier = "==0.9.2" },
|
||||
{ name = "rfc3986", marker = "extra == 'backend'", specifier = "==1.5.0" },
|
||||
{ name = "ruff", marker = "extra == 'dev'", specifier = "==0.12.0" },
|
||||
{ name = "safetensors", marker = "extra == 'model-server'", specifier = "==0.5.3" },
|
||||
{ name = "sendgrid", marker = "extra == 'backend'", specifier = "==6.12.5" },
|
||||
{ name = "sentence-transformers", marker = "extra == 'model-server'", specifier = "==4.0.2" },
|
||||
{ name = "sentry-sdk", specifier = "==2.14.0" },
|
||||
{ name = "sentry-sdk", extras = ["fastapi", "celery", "starlette"], marker = "extra == 'model-server'", specifier = "==2.14.0" },
|
||||
{ name = "shapely", marker = "extra == 'backend'", specifier = "==2.0.6" },
|
||||
{ name = "simple-salesforce", marker = "extra == 'backend'", specifier = "==1.12.6" },
|
||||
{ name = "slack-sdk", marker = "extra == 'backend'", specifier = "==3.20.2" },
|
||||
{ name = "sqlalchemy", extras = ["mypy"], marker = "extra == 'backend'", specifier = "==2.0.15" },
|
||||
{ name = "starlette", marker = "extra == 'backend'", specifier = "==0.49.3" },
|
||||
{ name = "stripe", marker = "extra == 'backend'", specifier = "==10.12.0" },
|
||||
{ name = "supervisor", marker = "extra == 'backend'", specifier = "==4.3.0" },
|
||||
{ name = "tiktoken", marker = "extra == 'backend'", specifier = "==0.7.0" },
|
||||
{ name = "timeago", marker = "extra == 'backend'", specifier = "==1.0.16" },
|
||||
{ name = "torch", marker = "extra == 'model-server'", specifier = "==2.9.1" },
|
||||
{ name = "trafilatura", marker = "extra == 'backend'", specifier = "==1.12.2" },
|
||||
{ name = "transformers", marker = "extra == 'model-server'", specifier = "==4.53.0" },
|
||||
{ name = "types-beautifulsoup4", marker = "extra == 'dev'", specifier = "==4.12.0.3" },
|
||||
{ name = "types-html5lib", marker = "extra == 'dev'", specifier = "==1.1.11.13" },
|
||||
{ name = "types-oauthlib", marker = "extra == 'dev'", specifier = "==3.2.0.9" },
|
||||
{ name = "types-openpyxl", marker = "extra == 'backend'", specifier = "==3.0.4.7" },
|
||||
{ name = "types-passlib", marker = "extra == 'dev'", specifier = "==1.7.7.20240106" },
|
||||
{ name = "types-pillow", marker = "extra == 'dev'", specifier = "==10.2.0.20240822" },
|
||||
{ name = "types-psutil", marker = "extra == 'dev'", specifier = "==7.1.3.20251125" },
|
||||
{ name = "types-psycopg2", marker = "extra == 'dev'", specifier = "==2.9.21.10" },
|
||||
{ name = "types-python-dateutil", marker = "extra == 'dev'", specifier = "==2.8.19.13" },
|
||||
{ name = "types-pytz", marker = "extra == 'dev'", specifier = "==2023.3.1.1" },
|
||||
{ name = "types-pyyaml", marker = "extra == 'dev'", specifier = "==6.0.12.11" },
|
||||
{ name = "types-regex", marker = "extra == 'dev'", specifier = "==2023.3.23.1" },
|
||||
{ name = "types-requests", marker = "extra == 'dev'", specifier = "==2.32.0.20250328" },
|
||||
{ name = "types-retry", marker = "extra == 'dev'", specifier = "==0.9.9.3" },
|
||||
{ name = "types-setuptools", marker = "extra == 'dev'", specifier = "==68.0.0.3" },
|
||||
{ name = "unstructured", marker = "extra == 'backend'", specifier = "==0.18.27" },
|
||||
{ name = "unstructured-client", marker = "extra == 'backend'", specifier = "==0.42.6" },
|
||||
{ name = "urllib3", marker = "extra == 'backend'", specifier = "==2.6.3" },
|
||||
{ name = "uvicorn", specifier = "==0.35.0" },
|
||||
{ name = "voyageai", specifier = "==0.2.3" },
|
||||
{ name = "xmlsec", marker = "extra == 'backend'", specifier = "==1.3.14" },
|
||||
{ name = "zizmor", marker = "extra == 'dev'", specifier = "==1.18.0" },
|
||||
{ name = "zulip", marker = "extra == 'backend'", specifier = "==0.8.2" },
|
||||
]
|
||||
provides-extras = ["backend", "dev", "ee", "model-server"]
|
||||
|
||||
[[package]]
|
||||
name = "onyx-backend"
|
||||
version = "0.0.0"
|
||||
source = { virtual = "backend" }
|
||||
dependencies = [
|
||||
{ name = "onyx", extra = ["backend", "dev", "ee"] },
|
||||
]
|
||||
|
||||
[package.metadata]
|
||||
requires-dist = [{ name = "onyx", extras = ["backend", "dev", "ee"], editable = "." }]
|
||||
[package.metadata.requires-dev]
|
||||
backend = [
|
||||
{ name = "aiohttp", specifier = "==3.13.4" },
|
||||
{ name = "alembic", specifier = "==1.10.4" },
|
||||
{ name = "asana", specifier = "==5.0.8" },
|
||||
{ name = "asyncpg", specifier = "==0.30.0" },
|
||||
{ name = "atlassian-python-api", specifier = "==3.41.16" },
|
||||
{ name = "azure-cognitiveservices-speech", specifier = "==1.38.0" },
|
||||
{ name = "beautifulsoup4", specifier = "==4.12.3" },
|
||||
{ name = "boto3", specifier = "==1.39.11" },
|
||||
{ name = "boto3-stubs", extras = ["s3"], specifier = "==1.39.11" },
|
||||
{ name = "braintrust", specifier = "==0.3.9" },
|
||||
{ name = "celery", specifier = "==5.5.1" },
|
||||
{ name = "chardet", specifier = "==5.2.0" },
|
||||
{ name = "chonkie", specifier = "==1.0.10" },
|
||||
{ name = "dask", specifier = "==2026.1.1" },
|
||||
{ name = "ddtrace", specifier = "==3.10.0" },
|
||||
{ name = "discord-py", specifier = "==2.4.0" },
|
||||
{ name = "distributed", specifier = "==2026.1.1" },
|
||||
{ name = "dropbox", specifier = "==12.0.2" },
|
||||
{ name = "exa-py", specifier = "==1.15.4" },
|
||||
{ name = "fastapi-limiter", specifier = "==0.1.6" },
|
||||
{ name = "fastapi-users", specifier = "==15.0.4" },
|
||||
{ name = "fastapi-users-db-sqlalchemy", specifier = "==7.0.0" },
|
||||
{ name = "fastmcp", specifier = "==3.2.0" },
|
||||
{ name = "filelock", specifier = "==3.20.3" },
|
||||
{ name = "google-api-python-client", specifier = "==2.86.0" },
|
||||
{ name = "google-auth-httplib2", specifier = "==0.1.0" },
|
||||
{ name = "google-auth-oauthlib", specifier = "==1.0.0" },
|
||||
{ name = "httpcore", specifier = "==1.0.9" },
|
||||
{ name = "httpx", extras = ["http2"], specifier = "==0.28.1" },
|
||||
{ name = "httpx-oauth", specifier = "==0.15.1" },
|
||||
{ name = "hubspot-api-client", specifier = "==11.1.0" },
|
||||
{ name = "huggingface-hub", specifier = "==0.35.3" },
|
||||
{ name = "inflection", specifier = "==0.5.1" },
|
||||
{ name = "jira", specifier = "==3.10.5" },
|
||||
{ name = "jsonref", specifier = "==1.1.0" },
|
||||
{ name = "kubernetes", specifier = "==31.0.0" },
|
||||
{ name = "langchain-core", specifier = "==1.2.22" },
|
||||
{ name = "langfuse", specifier = "==3.10.0" },
|
||||
{ name = "lazy-imports", specifier = "==1.0.1" },
|
||||
{ name = "lxml", specifier = "==5.3.0" },
|
||||
{ name = "mako", specifier = "==1.2.4" },
|
||||
{ name = "markitdown", extras = ["pdf", "docx", "pptx", "xlsx", "xls"], specifier = "==0.1.2" },
|
||||
{ name = "mcp", extras = ["cli"], specifier = "==1.26.0" },
|
||||
{ name = "mistune", specifier = "==3.2.0" },
|
||||
{ name = "msal", specifier = "==1.34.0" },
|
||||
{ name = "msoffcrypto-tool", specifier = "==5.4.2" },
|
||||
{ name = "nest-asyncio", specifier = "==1.6.0" },
|
||||
{ name = "oauthlib", specifier = "==3.2.2" },
|
||||
{ name = "office365-rest-python-client", specifier = "==2.6.2" },
|
||||
{ name = "openinference-instrumentation", specifier = "==0.1.42" },
|
||||
{ name = "openpyxl", specifier = "==3.0.10" },
|
||||
{ name = "opensearch-py", specifier = "==3.0.0" },
|
||||
{ name = "opentelemetry-proto", specifier = ">=1.39.0" },
|
||||
{ name = "passlib", specifier = "==1.7.4" },
|
||||
{ name = "playwright", specifier = "==1.55.0" },
|
||||
{ name = "psutil", specifier = "==7.1.3" },
|
||||
{ name = "psycopg2-binary", specifier = "==2.9.9" },
|
||||
{ name = "puremagic", specifier = "==1.28" },
|
||||
{ name = "pyairtable", specifier = "==3.0.1" },
|
||||
{ name = "pycryptodome", specifier = "==3.19.1" },
|
||||
{ name = "pygithub", specifier = "==2.5.0" },
|
||||
{ name = "pympler", specifier = "==1.1" },
|
||||
{ name = "pypandoc-binary", specifier = "==1.16.2" },
|
||||
{ name = "pypdf", specifier = "==6.9.2" },
|
||||
{ name = "pytest-mock", specifier = "==3.12.0" },
|
||||
{ name = "pytest-playwright", specifier = "==0.7.0" },
|
||||
{ name = "python-dateutil", specifier = "==2.8.2" },
|
||||
{ name = "python-docx", specifier = "==1.1.2" },
|
||||
{ name = "python-dotenv", specifier = "==1.1.1" },
|
||||
{ name = "python-gitlab", specifier = "==5.6.0" },
|
||||
{ name = "python-multipart", specifier = "==0.0.22" },
|
||||
{ name = "python-pptx", specifier = "==0.6.23" },
|
||||
{ name = "python3-saml", specifier = "==1.15.0" },
|
||||
{ name = "pywikibot", specifier = "==9.0.0" },
|
||||
{ name = "rapidfuzz", specifier = "==3.13.0" },
|
||||
{ name = "redis", specifier = "==5.0.8" },
|
||||
{ name = "requests", specifier = "==2.33.0" },
|
||||
{ name = "requests-oauthlib", specifier = "==1.3.1" },
|
||||
{ name = "rfc3986", specifier = "==1.5.0" },
|
||||
{ name = "sendgrid", specifier = "==6.12.5" },
|
||||
{ name = "shapely", specifier = "==2.0.6" },
|
||||
{ name = "simple-salesforce", specifier = "==1.12.6" },
|
||||
{ name = "slack-sdk", specifier = "==3.20.2" },
|
||||
{ name = "sqlalchemy", extras = ["mypy"], specifier = "==2.0.15" },
|
||||
{ name = "starlette", specifier = "==0.49.3" },
|
||||
{ name = "stripe", specifier = "==10.12.0" },
|
||||
{ name = "supervisor", specifier = "==4.3.0" },
|
||||
{ name = "tiktoken", specifier = "==0.7.0" },
|
||||
{ name = "timeago", specifier = "==1.0.16" },
|
||||
{ name = "trafilatura", specifier = "==1.12.2" },
|
||||
{ name = "types-openpyxl", specifier = "==3.0.4.7" },
|
||||
{ name = "unstructured", specifier = "==0.18.27" },
|
||||
{ name = "unstructured-client", specifier = "==0.42.6" },
|
||||
{ name = "urllib3", specifier = "==2.6.3" },
|
||||
{ name = "xmlsec", specifier = "==1.3.14" },
|
||||
{ name = "zulip", specifier = "==0.8.2" },
|
||||
]
|
||||
dev = [
|
||||
{ name = "black", specifier = "==25.1.0" },
|
||||
{ name = "celery-types", specifier = "==0.19.0" },
|
||||
{ name = "faker", specifier = "==40.1.2" },
|
||||
{ name = "hatchling", specifier = "==1.28.0" },
|
||||
{ name = "ipykernel", specifier = "==6.29.5" },
|
||||
{ name = "manygo", specifier = "==0.2.0" },
|
||||
{ name = "matplotlib", specifier = "==3.10.8" },
|
||||
{ name = "mypy", specifier = "==1.13.0" },
|
||||
{ name = "mypy-extensions", specifier = "==1.0.0" },
|
||||
{ name = "onyx-devtools", specifier = "==0.7.5" },
|
||||
{ name = "openapi-generator-cli", specifier = "==7.17.0" },
|
||||
{ name = "pandas-stubs", specifier = "~=2.3.3" },
|
||||
{ name = "pre-commit", specifier = "==3.2.2" },
|
||||
{ name = "pytest", specifier = "==8.3.5" },
|
||||
{ name = "pytest-alembic", specifier = "==0.12.1" },
|
||||
{ name = "pytest-asyncio", specifier = "==1.3.0" },
|
||||
{ name = "pytest-dotenv", specifier = "==0.5.2" },
|
||||
{ name = "pytest-repeat", specifier = "==0.9.4" },
|
||||
{ name = "pytest-xdist", specifier = "==3.8.0" },
|
||||
{ name = "release-tag", specifier = "==0.5.2" },
|
||||
{ name = "reorder-python-imports-black", specifier = "==3.14.0" },
|
||||
{ name = "ruff", specifier = "==0.12.0" },
|
||||
{ name = "types-beautifulsoup4", specifier = "==4.12.0.3" },
|
||||
{ name = "types-html5lib", specifier = "==1.1.11.13" },
|
||||
{ name = "types-oauthlib", specifier = "==3.2.0.9" },
|
||||
{ name = "types-passlib", specifier = "==1.7.7.20240106" },
|
||||
{ name = "types-pillow", specifier = "==10.2.0.20240822" },
|
||||
{ name = "types-psutil", specifier = "==7.1.3.20251125" },
|
||||
{ name = "types-psycopg2", specifier = "==2.9.21.10" },
|
||||
{ name = "types-python-dateutil", specifier = "==2.8.19.13" },
|
||||
{ name = "types-pytz", specifier = "==2023.3.1.1" },
|
||||
{ name = "types-pyyaml", specifier = "==6.0.12.11" },
|
||||
{ name = "types-regex", specifier = "==2023.3.23.1" },
|
||||
{ name = "types-requests", specifier = "==2.32.0.20250328" },
|
||||
{ name = "types-retry", specifier = "==0.9.9.3" },
|
||||
{ name = "types-setuptools", specifier = "==68.0.0.3" },
|
||||
{ name = "zizmor", specifier = "==1.18.0" },
|
||||
]
|
||||
ee = [{ name = "posthog", specifier = "==3.7.4" }]
|
||||
model-server = [
|
||||
{ name = "accelerate", specifier = "==1.6.0" },
|
||||
{ name = "einops", specifier = "==0.8.1" },
|
||||
{ name = "numpy", specifier = "==2.4.1" },
|
||||
{ name = "safetensors", specifier = "==0.5.3" },
|
||||
{ name = "sentence-transformers", specifier = "==4.0.2" },
|
||||
{ name = "sentry-sdk", extras = ["fastapi", "celery", "starlette"], specifier = "==2.14.0" },
|
||||
{ name = "torch", specifier = "==2.9.1" },
|
||||
{ name = "transformers", specifier = "==4.53.0" },
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "onyx-devtools"
|
||||
version = "0.7.2"
|
||||
version = "0.7.5"
|
||||
source = { registry = "https://pypi.org/simple" }
|
||||
dependencies = [
|
||||
{ name = "fastapi" },
|
||||
{ name = "openapi-generator-cli" },
|
||||
]
|
||||
wheels = [
|
||||
{ url = "https://files.pythonhosted.org/packages/22/b0/765ed49157470e8ccc8ab89e6a896ade50cde3aa2a494662ad4db92a48c4/onyx_devtools-0.7.2-py3-none-macosx_10_12_x86_64.whl", hash = "sha256:553a2b5e61b29b7913c991c8d5aed78f930f0f81a0f42229c6a8de2b1e8ff57e", size = 4203859, upload-time = "2026-03-27T15:09:49.63Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/f7/9d/bba0a44a16d2fc27e5441aaf10727e10514e7a49bce70eca02bced566eb9/onyx_devtools-0.7.2-py3-none-macosx_11_0_arm64.whl", hash = "sha256:5cf0782dca8b3d861de9e18e65e990cfce5161cd559df44d8fabd3fefd54fdcd", size = 3879750, upload-time = "2026-03-27T15:09:42.413Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/4d/d8/c5725e8af14c74fe0aeed29e4746400bb3c0a078fd1240df729dc6432b84/onyx_devtools-0.7.2-py3-none-manylinux_2_17_aarch64.whl", hash = "sha256:9a0d67373e16b4fbb38a5290c0d9dfd4cfa837e5da0c165b32841b9d37f7455b", size = 3743529, upload-time = "2026-03-27T15:09:44.546Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/1a/82/b7c398a21dbc3e14fd7a29e49caa86b1bc0f8d7c75c051514785441ab779/onyx_devtools-0.7.2-py3-none-manylinux_2_17_x86_64.whl", hash = "sha256:794af14b2de575d0ae41b94551399eca8f8ba9b950c5db7acb7612767fd228f9", size = 4166562, upload-time = "2026-03-27T15:09:49.471Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/26/76/be129e2baafc91fe792d919b1f4d73fc943ba9c2b728a60f1fb98e0c115a/onyx_devtools-0.7.2-py3-none-win_amd64.whl", hash = "sha256:83b3eb84df58d865e4f714222a5fab3ea464836e2c8690569454a940bbb651ff", size = 4282270, upload-time = "2026-03-27T15:09:44.676Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/3b/72/29b8c8dbcf069c56475f00511f04c4aaa5ba3faba1dfc8276107d4b3ef7f/onyx_devtools-0.7.2-py3-none-win_arm64.whl", hash = "sha256:62f0836624ee6a5b31e64fd93162e7fce142ac8a4f959607e411824bc2b88174", size = 3823053, upload-time = "2026-03-27T15:09:43.546Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/cb/f8/844e34f5126ae40fff0d012bba0b28f031f8871062759bb3789eae4f5e0a/onyx_devtools-0.7.5-py3-none-macosx_10_12_x86_64.whl", hash = "sha256:b3cd434c722ae48a1f651748a9f094711b29d1a9f37fbbadef3144f2cdb0f16d", size = 4238900, upload-time = "2026-04-10T07:02:16.382Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/2d/97/d1db725f900b199fa3f7a7a7c9b51ae75d4b18755c924f00f06a7703e552/onyx_devtools-0.7.5-py3-none-macosx_11_0_arm64.whl", hash = "sha256:c50e3d76d4f8cc4faa6250e758d42f0249067f0e17bc82b99c6c00dd48114393", size = 3913672, upload-time = "2026-04-10T07:02:17.46Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/31/83/e11bedb0a1321b63c844a418be1990c172ed363c6ee612978c3a38df71f1/onyx_devtools-0.7.5-py3-none-manylinux_2_17_aarch64.whl", hash = "sha256:ec01aeaaa14854b0933bb85bbfc51184599d3dbf1c0097ff59c1c72db8222a5a", size = 3779585, upload-time = "2026-04-10T07:02:16.31Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/b3/85/128d25cd35c1adc436dcff9ab4f2c20cf29528d09415280c1230ff0ca993/onyx_devtools-0.7.5-py3-none-manylinux_2_17_x86_64.whl", hash = "sha256:586d50ecb6dcea95611135e4cd4529ebedd8ab84a41b1adf3be1280a48dc52af", size = 4201962, upload-time = "2026-04-10T07:02:14.466Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/99/5d/83c80f918b399fea998cd41bfe90bda733eda77e133ca4dc1e9ce18a9b4a/onyx_devtools-0.7.5-py3-none-win_amd64.whl", hash = "sha256:c45d80f0093ba738120b77c4c0bde13843e33d786ae8608eb10490f06183d89b", size = 4320088, upload-time = "2026-04-10T07:02:17.09Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/26/bf/b9c85cc61981bd71c0f1cbb50192763b11788a7c8636b1e01f750251c92c/onyx_devtools-0.7.5-py3-none-win_arm64.whl", hash = "sha256:9852a7cc29939371e016b794f2cffdb88680280d857d24c191c5188884416a3d", size = 3858839, upload-time = "2026-04-10T07:02:20.098Z" },
|
||||
]
|
||||
|
||||
[[package]]
|
||||
@@ -4982,89 +4972,89 @@ wheels = [
|
||||
|
||||
[[package]]
|
||||
name = "pillow"
|
||||
version = "12.1.1"
|
||||
version = "12.2.0"
|
||||
source = { registry = "https://pypi.org/simple" }
|
||||
sdist = { url = "https://files.pythonhosted.org/packages/1f/42/5c74462b4fd957fcd7b13b04fb3205ff8349236ea74c7c375766d6c82288/pillow-12.1.1.tar.gz", hash = "sha256:9ad8fa5937ab05218e2b6a4cff30295ad35afd2f83ac592e68c0d871bb0fdbc4", size = 46980264, upload-time = "2026-02-11T04:23:07.146Z" }
|
||||
sdist = { url = "https://files.pythonhosted.org/packages/8c/21/c2bcdd5906101a30244eaffc1b6e6ce71a31bd0742a01eb89e660ebfac2d/pillow-12.2.0.tar.gz", hash = "sha256:a830b1a40919539d07806aa58e1b114df53ddd43213d9c8b75847eee6c0182b5", size = 46987819, upload-time = "2026-04-01T14:46:17.687Z" }
|
||||
wheels = [
|
||||
{ url = "https://files.pythonhosted.org/packages/2b/46/5da1ec4a5171ee7bf1a0efa064aba70ba3d6e0788ce3f5acd1375d23c8c0/pillow-12.1.1-cp311-cp311-macosx_10_10_x86_64.whl", hash = "sha256:e879bb6cd5c73848ef3b2b48b8af9ff08c5b71ecda8048b7dd22d8a33f60be32", size = 5304084, upload-time = "2026-02-11T04:20:27.501Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/78/93/a29e9bc02d1cf557a834da780ceccd54e02421627200696fcf805ebdc3fb/pillow-12.1.1-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:365b10bb9417dd4498c0e3b128018c4a624dc11c7b97d8cc54effe3b096f4c38", size = 4657866, upload-time = "2026-02-11T04:20:29.827Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/13/84/583a4558d492a179d31e4aae32eadce94b9acf49c0337c4ce0b70e0a01f2/pillow-12.1.1-cp311-cp311-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:d4ce8e329c93845720cd2014659ca67eac35f6433fd3050393d85f3ecef0dad5", size = 6232148, upload-time = "2026-02-11T04:20:31.329Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/d5/e2/53c43334bbbb2d3b938978532fbda8e62bb6e0b23a26ce8592f36bcc4987/pillow-12.1.1-cp311-cp311-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:fc354a04072b765eccf2204f588a7a532c9511e8b9c7f900e1b64e3e33487090", size = 8038007, upload-time = "2026-02-11T04:20:34.225Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/b8/a6/3d0e79c8a9d58150dd98e199d7c1c56861027f3829a3a60b3c2784190180/pillow-12.1.1-cp311-cp311-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:7e7976bf1910a8116b523b9f9f58bf410f3e8aa330cd9a2bb2953f9266ab49af", size = 6345418, upload-time = "2026-02-11T04:20:35.858Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/a2/c8/46dfeac5825e600579157eea177be43e2f7ff4a99da9d0d0a49533509ac5/pillow-12.1.1-cp311-cp311-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:597bd9c8419bc7c6af5604e55847789b69123bbe25d65cc6ad3012b4f3c98d8b", size = 7034590, upload-time = "2026-02-11T04:20:37.91Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/af/bf/e6f65d3db8a8bbfeaf9e13cc0417813f6319863a73de934f14b2229ada18/pillow-12.1.1-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:2c1fc0f2ca5f96a3c8407e41cca26a16e46b21060fe6d5b099d2cb01412222f5", size = 6458655, upload-time = "2026-02-11T04:20:39.496Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/f9/c2/66091f3f34a25894ca129362e510b956ef26f8fb67a0e6417bc5744e56f1/pillow-12.1.1-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:578510d88c6229d735855e1f278aa305270438d36a05031dfaae5067cc8eb04d", size = 7159286, upload-time = "2026-02-11T04:20:41.139Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/7b/5a/24bc8eb526a22f957d0cec6243146744966d40857e3d8deb68f7902ca6c1/pillow-12.1.1-cp311-cp311-win32.whl", hash = "sha256:7311c0a0dcadb89b36b7025dfd8326ecfa36964e29913074d47382706e516a7c", size = 6328663, upload-time = "2026-02-11T04:20:43.184Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/31/03/bef822e4f2d8f9d7448c133d0a18185d3cce3e70472774fffefe8b0ed562/pillow-12.1.1-cp311-cp311-win_amd64.whl", hash = "sha256:fbfa2a7c10cc2623f412753cddf391c7f971c52ca40a3f65dc5039b2939e8563", size = 7031448, upload-time = "2026-02-11T04:20:44.696Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/49/70/f76296f53610bd17b2e7d31728b8b7825e3ac3b5b3688b51f52eab7c0818/pillow-12.1.1-cp311-cp311-win_arm64.whl", hash = "sha256:b81b5e3511211631b3f672a595e3221252c90af017e399056d0faabb9538aa80", size = 2453651, upload-time = "2026-02-11T04:20:46.243Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/07/d3/8df65da0d4df36b094351dce696f2989bec731d4f10e743b1c5f4da4d3bf/pillow-12.1.1-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:ab323b787d6e18b3d91a72fc99b1a2c28651e4358749842b8f8dfacd28ef2052", size = 5262803, upload-time = "2026-02-11T04:20:47.653Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/d6/71/5026395b290ff404b836e636f51d7297e6c83beceaa87c592718747e670f/pillow-12.1.1-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:adebb5bee0f0af4909c30db0d890c773d1a92ffe83da908e2e9e720f8edf3984", size = 4657601, upload-time = "2026-02-11T04:20:49.328Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/b1/2e/1001613d941c67442f745aff0f7cc66dd8df9a9c084eb497e6a543ee6f7e/pillow-12.1.1-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:bb66b7cc26f50977108790e2456b7921e773f23db5630261102233eb355a3b79", size = 6234995, upload-time = "2026-02-11T04:20:51.032Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/07/26/246ab11455b2549b9233dbd44d358d033a2f780fa9007b61a913c5b2d24e/pillow-12.1.1-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:aee2810642b2898bb187ced9b349e95d2a7272930796e022efaf12e99dccd293", size = 8045012, upload-time = "2026-02-11T04:20:52.882Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/b2/8b/07587069c27be7535ac1fe33874e32de118fbd34e2a73b7f83436a88368c/pillow-12.1.1-cp312-cp312-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:a0b1cd6232e2b618adcc54d9882e4e662a089d5768cd188f7c245b4c8c44a397", size = 6349638, upload-time = "2026-02-11T04:20:54.444Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/ff/79/6df7b2ee763d619cda2fb4fea498e5f79d984dae304d45a8999b80d6cf5c/pillow-12.1.1-cp312-cp312-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:7aac39bcf8d4770d089588a2e1dd111cbaa42df5a94be3114222057d68336bd0", size = 7041540, upload-time = "2026-02-11T04:20:55.97Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/2c/5e/2ba19e7e7236d7529f4d873bdaf317a318896bac289abebd4bb00ef247f0/pillow-12.1.1-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:ab174cd7d29a62dd139c44bf74b698039328f45cb03b4596c43473a46656b2f3", size = 6462613, upload-time = "2026-02-11T04:20:57.542Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/03/03/31216ec124bb5c3dacd74ce8efff4cc7f52643653bad4825f8f08c697743/pillow-12.1.1-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:339ffdcb7cbeaa08221cd401d517d4b1fe7a9ed5d400e4a8039719238620ca35", size = 7166745, upload-time = "2026-02-11T04:20:59.196Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/1f/e7/7c4552d80052337eb28653b617eafdef39adfb137c49dd7e831b8dc13bc5/pillow-12.1.1-cp312-cp312-win32.whl", hash = "sha256:5d1f9575a12bed9e9eedd9a4972834b08c97a352bd17955ccdebfeca5913fa0a", size = 6328823, upload-time = "2026-02-11T04:21:01.385Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/3d/17/688626d192d7261bbbf98846fc98995726bddc2c945344b65bec3a29d731/pillow-12.1.1-cp312-cp312-win_amd64.whl", hash = "sha256:21329ec8c96c6e979cd0dfd29406c40c1d52521a90544463057d2aaa937d66a6", size = 7033367, upload-time = "2026-02-11T04:21:03.536Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/ed/fe/a0ef1f73f939b0eca03ee2c108d0043a87468664770612602c63266a43c4/pillow-12.1.1-cp312-cp312-win_arm64.whl", hash = "sha256:af9a332e572978f0218686636610555ae3defd1633597be015ed50289a03c523", size = 2453811, upload-time = "2026-02-11T04:21:05.116Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/d5/11/6db24d4bd7685583caeae54b7009584e38da3c3d4488ed4cd25b439de486/pillow-12.1.1-cp313-cp313-ios_13_0_arm64_iphoneos.whl", hash = "sha256:d242e8ac078781f1de88bf823d70c1a9b3c7950a44cdf4b7c012e22ccbcd8e4e", size = 4062689, upload-time = "2026-02-11T04:21:06.804Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/33/c0/ce6d3b1fe190f0021203e0d9b5b99e57843e345f15f9ef22fcd43842fd21/pillow-12.1.1-cp313-cp313-ios_13_0_arm64_iphonesimulator.whl", hash = "sha256:02f84dfad02693676692746df05b89cf25597560db2857363a208e393429f5e9", size = 4138535, upload-time = "2026-02-11T04:21:08.452Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/a0/c6/d5eb6a4fb32a3f9c21a8c7613ec706534ea1cf9f4b3663e99f0d83f6fca8/pillow-12.1.1-cp313-cp313-ios_13_0_x86_64_iphonesimulator.whl", hash = "sha256:e65498daf4b583091ccbb2556c7000abf0f3349fcd57ef7adc9a84a394ed29f6", size = 3601364, upload-time = "2026-02-11T04:21:10.194Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/14/a1/16c4b823838ba4c9c52c0e6bbda903a3fe5a1bdbf1b8eb4fff7156f3e318/pillow-12.1.1-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:6c6db3b84c87d48d0088943bf33440e0c42370b99b1c2a7989216f7b42eede60", size = 5262561, upload-time = "2026-02-11T04:21:11.742Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/bb/ad/ad9dc98ff24f485008aa5cdedaf1a219876f6f6c42a4626c08bc4e80b120/pillow-12.1.1-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:8b7e5304e34942bf62e15184219a7b5ad4ff7f3bb5cca4d984f37df1a0e1aee2", size = 4657460, upload-time = "2026-02-11T04:21:13.786Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/9e/1b/f1a4ea9a895b5732152789326202a82464d5254759fbacae4deea3069334/pillow-12.1.1-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:18e5bddd742a44b7e6b1e773ab5db102bd7a94c32555ba656e76d319d19c3850", size = 6232698, upload-time = "2026-02-11T04:21:15.949Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/95/f4/86f51b8745070daf21fd2e5b1fe0eb35d4db9ca26e6d58366562fb56a743/pillow-12.1.1-cp313-cp313-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:fc44ef1f3de4f45b50ccf9136999d71abb99dca7706bc75d222ed350b9fd2289", size = 8041706, upload-time = "2026-02-11T04:21:17.723Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/29/9b/d6ecd956bb1266dd1045e995cce9b8d77759e740953a1c9aad9502a0461e/pillow-12.1.1-cp313-cp313-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:5a8eb7ed8d4198bccbd07058416eeec51686b498e784eda166395a23eb99138e", size = 6346621, upload-time = "2026-02-11T04:21:19.547Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/71/24/538bff45bde96535d7d998c6fed1a751c75ac7c53c37c90dc2601b243893/pillow-12.1.1-cp313-cp313-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:47b94983da0c642de92ced1702c5b6c292a84bd3a8e1d1702ff923f183594717", size = 7038069, upload-time = "2026-02-11T04:21:21.378Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/94/0e/58cb1a6bc48f746bc4cb3adb8cabff73e2742c92b3bf7a220b7cf69b9177/pillow-12.1.1-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:518a48c2aab7ce596d3bf79d0e275661b846e86e4d0e7dec34712c30fe07f02a", size = 6460040, upload-time = "2026-02-11T04:21:23.148Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/6c/57/9045cb3ff11eeb6c1adce3b2d60d7d299d7b273a2e6c8381a524abfdc474/pillow-12.1.1-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:a550ae29b95c6dc13cf69e2c9dc5747f814c54eeb2e32d683e5e93af56caa029", size = 7164523, upload-time = "2026-02-11T04:21:25.01Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/73/f2/9be9cb99f2175f0d4dbadd6616ce1bf068ee54a28277ea1bf1fbf729c250/pillow-12.1.1-cp313-cp313-win32.whl", hash = "sha256:a003d7422449f6d1e3a34e3dd4110c22148336918ddbfc6a32581cd54b2e0b2b", size = 6332552, upload-time = "2026-02-11T04:21:27.238Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/3f/eb/b0834ad8b583d7d9d42b80becff092082a1c3c156bb582590fcc973f1c7c/pillow-12.1.1-cp313-cp313-win_amd64.whl", hash = "sha256:344cf1e3dab3be4b1fa08e449323d98a2a3f819ad20f4b22e77a0ede31f0faa1", size = 7040108, upload-time = "2026-02-11T04:21:29.462Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/d5/7d/fc09634e2aabdd0feabaff4a32f4a7d97789223e7c2042fd805ea4b4d2c2/pillow-12.1.1-cp313-cp313-win_arm64.whl", hash = "sha256:5c0dd1636633e7e6a0afe7bf6a51a14992b7f8e60de5789018ebbdfae55b040a", size = 2453712, upload-time = "2026-02-11T04:21:31.072Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/19/2a/b9d62794fc8a0dd14c1943df68347badbd5511103e0d04c035ffe5cf2255/pillow-12.1.1-cp313-cp313t-macosx_10_13_x86_64.whl", hash = "sha256:0330d233c1a0ead844fc097a7d16c0abff4c12e856c0b325f231820fee1f39da", size = 5264880, upload-time = "2026-02-11T04:21:32.865Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/26/9d/e03d857d1347fa5ed9247e123fcd2a97b6220e15e9cb73ca0a8d91702c6e/pillow-12.1.1-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:5dae5f21afb91322f2ff791895ddd8889e5e947ff59f71b46041c8ce6db790bc", size = 4660616, upload-time = "2026-02-11T04:21:34.97Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/f7/ec/8a6d22afd02570d30954e043f09c32772bfe143ba9285e2fdb11284952cd/pillow-12.1.1-cp313-cp313t-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:2e0c664be47252947d870ac0d327fea7e63985a08794758aa8af5b6cb6ec0c9c", size = 6269008, upload-time = "2026-02-11T04:21:36.623Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/3d/1d/6d875422c9f28a4a361f495a5f68d9de4a66941dc2c619103ca335fa6446/pillow-12.1.1-cp313-cp313t-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:691ab2ac363b8217f7d31b3497108fb1f50faab2f75dfb03284ec2f217e87bf8", size = 8073226, upload-time = "2026-02-11T04:21:38.585Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/a1/cd/134b0b6ee5eda6dc09e25e24b40fdafe11a520bc725c1d0bbaa5e00bf95b/pillow-12.1.1-cp313-cp313t-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:e9e8064fb1cc019296958595f6db671fba95209e3ceb0c4734c9baf97de04b20", size = 6380136, upload-time = "2026-02-11T04:21:40.562Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/7a/a9/7628f013f18f001c1b98d8fffe3452f306a70dc6aba7d931019e0492f45e/pillow-12.1.1-cp313-cp313t-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:472a8d7ded663e6162dafdf20015c486a7009483ca671cece7a9279b512fcb13", size = 7067129, upload-time = "2026-02-11T04:21:42.521Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/1e/f8/66ab30a2193b277785601e82ee2d49f68ea575d9637e5e234faaa98efa4c/pillow-12.1.1-cp313-cp313t-musllinux_1_2_aarch64.whl", hash = "sha256:89b54027a766529136a06cfebeecb3a04900397a3590fd252160b888479517bf", size = 6491807, upload-time = "2026-02-11T04:21:44.22Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/da/0b/a877a6627dc8318fdb84e357c5e1a758c0941ab1ddffdafd231983788579/pillow-12.1.1-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:86172b0831b82ce4f7877f280055892b31179e1576aa00d0df3bb1bbf8c3e524", size = 7190954, upload-time = "2026-02-11T04:21:46.114Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/83/43/6f732ff85743cf746b1361b91665d9f5155e1483817f693f8d57ea93147f/pillow-12.1.1-cp313-cp313t-win32.whl", hash = "sha256:44ce27545b6efcf0fdbdceb31c9a5bdea9333e664cda58a7e674bb74608b3986", size = 6336441, upload-time = "2026-02-11T04:21:48.22Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/3b/44/e865ef3986611bb75bfabdf94a590016ea327833f434558801122979cd0e/pillow-12.1.1-cp313-cp313t-win_amd64.whl", hash = "sha256:a285e3eb7a5a45a2ff504e31f4a8d1b12ef62e84e5411c6804a42197c1cf586c", size = 7045383, upload-time = "2026-02-11T04:21:50.015Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/a8/c6/f4fb24268d0c6908b9f04143697ea18b0379490cb74ba9e8d41b898bd005/pillow-12.1.1-cp313-cp313t-win_arm64.whl", hash = "sha256:cc7d296b5ea4d29e6570dabeaed58d31c3fea35a633a69679fb03d7664f43fb3", size = 2456104, upload-time = "2026-02-11T04:21:51.633Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/03/d0/bebb3ffbf31c5a8e97241476c4cf8b9828954693ce6744b4a2326af3e16b/pillow-12.1.1-cp314-cp314-ios_13_0_arm64_iphoneos.whl", hash = "sha256:417423db963cb4be8bac3fc1204fe61610f6abeed1580a7a2cbb2fbda20f12af", size = 4062652, upload-time = "2026-02-11T04:21:53.19Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/2d/c0/0e16fb0addda4851445c28f8350d8c512f09de27bbb0d6d0bbf8b6709605/pillow-12.1.1-cp314-cp314-ios_13_0_arm64_iphonesimulator.whl", hash = "sha256:b957b71c6b2387610f556a7eb0828afbe40b4a98036fc0d2acfa5a44a0c2036f", size = 4138823, upload-time = "2026-02-11T04:22:03.088Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/6b/fb/6170ec655d6f6bb6630a013dd7cf7bc218423d7b5fa9071bf63dc32175ae/pillow-12.1.1-cp314-cp314-ios_13_0_x86_64_iphonesimulator.whl", hash = "sha256:097690ba1f2efdeb165a20469d59d8bb03c55fb6621eb2041a060ae8ea3e9642", size = 3601143, upload-time = "2026-02-11T04:22:04.909Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/59/04/dc5c3f297510ba9a6837cbb318b87dd2b8f73eb41a43cc63767f65cb599c/pillow-12.1.1-cp314-cp314-macosx_10_15_x86_64.whl", hash = "sha256:2815a87ab27848db0321fb78c7f0b2c8649dee134b7f2b80c6a45c6831d75ccd", size = 5266254, upload-time = "2026-02-11T04:22:07.656Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/05/30/5db1236b0d6313f03ebf97f5e17cda9ca060f524b2fcc875149a8360b21c/pillow-12.1.1-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:f7ed2c6543bad5a7d5530eb9e78c53132f93dfa44a28492db88b41cdab885202", size = 4657499, upload-time = "2026-02-11T04:22:09.613Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/6f/18/008d2ca0eb612e81968e8be0bbae5051efba24d52debf930126d7eaacbba/pillow-12.1.1-cp314-cp314-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:652a2c9ccfb556235b2b501a3a7cf3742148cd22e04b5625c5fe057ea3e3191f", size = 6232137, upload-time = "2026-02-11T04:22:11.434Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/70/f1/f14d5b8eeb4b2cd62b9f9f847eb6605f103df89ef619ac68f92f748614ea/pillow-12.1.1-cp314-cp314-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:d6e4571eedf43af33d0fc233a382a76e849badbccdf1ac438841308652a08e1f", size = 8042721, upload-time = "2026-02-11T04:22:13.321Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/5a/d6/17824509146e4babbdabf04d8171491fa9d776f7061ff6e727522df9bd03/pillow-12.1.1-cp314-cp314-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:b574c51cf7d5d62e9be37ba446224b59a2da26dc4c1bb2ecbe936a4fb1a7cb7f", size = 6347798, upload-time = "2026-02-11T04:22:15.449Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/d1/ee/c85a38a9ab92037a75615aba572c85ea51e605265036e00c5b67dfafbfe2/pillow-12.1.1-cp314-cp314-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:a37691702ed687799de29a518d63d4682d9016932db66d4e90c345831b02fb4e", size = 7039315, upload-time = "2026-02-11T04:22:17.24Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/ec/f3/bc8ccc6e08a148290d7523bde4d9a0d6c981db34631390dc6e6ec34cacf6/pillow-12.1.1-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:f95c00d5d6700b2b890479664a06e754974848afaae5e21beb4d83c106923fd0", size = 6462360, upload-time = "2026-02-11T04:22:19.111Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/f6/ab/69a42656adb1d0665ab051eec58a41f169ad295cf81ad45406963105408f/pillow-12.1.1-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:559b38da23606e68681337ad74622c4dbba02254fc9cb4488a305dd5975c7eeb", size = 7165438, upload-time = "2026-02-11T04:22:21.041Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/02/46/81f7aa8941873f0f01d4b55cc543b0a3d03ec2ee30d617a0448bf6bd6dec/pillow-12.1.1-cp314-cp314-win32.whl", hash = "sha256:03edcc34d688572014ff223c125a3f77fb08091e4607e7745002fc214070b35f", size = 6431503, upload-time = "2026-02-11T04:22:22.833Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/40/72/4c245f7d1044b67affc7f134a09ea619d4895333d35322b775b928180044/pillow-12.1.1-cp314-cp314-win_amd64.whl", hash = "sha256:50480dcd74fa63b8e78235957d302d98d98d82ccbfac4c7e12108ba9ecbdba15", size = 7176748, upload-time = "2026-02-11T04:22:24.64Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/e4/ad/8a87bdbe038c5c698736e3348af5c2194ffb872ea52f11894c95f9305435/pillow-12.1.1-cp314-cp314-win_arm64.whl", hash = "sha256:5cb1785d97b0c3d1d1a16bc1d710c4a0049daefc4935f3a8f31f827f4d3d2e7f", size = 2544314, upload-time = "2026-02-11T04:22:26.685Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/6c/9d/efd18493f9de13b87ede7c47e69184b9e859e4427225ea962e32e56a49bc/pillow-12.1.1-cp314-cp314t-macosx_10_15_x86_64.whl", hash = "sha256:1f90cff8aa76835cba5769f0b3121a22bd4eb9e6884cfe338216e557a9a548b8", size = 5268612, upload-time = "2026-02-11T04:22:29.884Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/f8/f1/4f42eb2b388eb2ffc660dcb7f7b556c1015c53ebd5f7f754965ef997585b/pillow-12.1.1-cp314-cp314t-macosx_11_0_arm64.whl", hash = "sha256:1f1be78ce9466a7ee64bfda57bdba0f7cc499d9794d518b854816c41bf0aa4e9", size = 4660567, upload-time = "2026-02-11T04:22:31.799Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/01/54/df6ef130fa43e4b82e32624a7b821a2be1c5653a5fdad8469687a7db4e00/pillow-12.1.1-cp314-cp314t-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:42fc1f4677106188ad9a55562bbade416f8b55456f522430fadab3cef7cd4e60", size = 6269951, upload-time = "2026-02-11T04:22:33.921Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/a9/48/618752d06cc44bb4aae8ce0cd4e6426871929ed7b46215638088270d9b34/pillow-12.1.1-cp314-cp314t-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:98edb152429ab62a1818039744d8fbb3ccab98a7c29fc3d5fcef158f3f1f68b7", size = 8074769, upload-time = "2026-02-11T04:22:35.877Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/c3/bd/f1d71eb39a72fa088d938655afba3e00b38018d052752f435838961127d8/pillow-12.1.1-cp314-cp314t-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:d470ab1178551dd17fdba0fef463359c41aaa613cdcd7ff8373f54be629f9f8f", size = 6381358, upload-time = "2026-02-11T04:22:37.698Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/64/ef/c784e20b96674ed36a5af839305f55616f8b4f8aa8eeccf8531a6e312243/pillow-12.1.1-cp314-cp314t-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:6408a7b064595afcab0a49393a413732a35788f2a5092fdc6266952ed67de586", size = 7068558, upload-time = "2026-02-11T04:22:39.597Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/73/cb/8059688b74422ae61278202c4e1ad992e8a2e7375227be0a21c6b87ca8d5/pillow-12.1.1-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:5d8c41325b382c07799a3682c1c258469ea2ff97103c53717b7893862d0c98ce", size = 6493028, upload-time = "2026-02-11T04:22:42.73Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/c6/da/e3c008ed7d2dd1f905b15949325934510b9d1931e5df999bb15972756818/pillow-12.1.1-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:c7697918b5be27424e9ce568193efd13d925c4481dd364e43f5dff72d33e10f8", size = 7191940, upload-time = "2026-02-11T04:22:44.543Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/01/4a/9202e8d11714c1fc5951f2e1ef362f2d7fbc595e1f6717971d5dd750e969/pillow-12.1.1-cp314-cp314t-win32.whl", hash = "sha256:d2912fd8114fc5545aa3a4b5576512f64c55a03f3ebcca4c10194d593d43ea36", size = 6438736, upload-time = "2026-02-11T04:22:46.347Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/f3/ca/cbce2327eb9885476b3957b2e82eb12c866a8b16ad77392864ad601022ce/pillow-12.1.1-cp314-cp314t-win_amd64.whl", hash = "sha256:4ceb838d4bd9dab43e06c363cab2eebf63846d6a4aeaea283bbdfd8f1a8ed58b", size = 7182894, upload-time = "2026-02-11T04:22:48.114Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/ec/d2/de599c95ba0a973b94410477f8bf0b6f0b5e67360eb89bcb1ad365258beb/pillow-12.1.1-cp314-cp314t-win_arm64.whl", hash = "sha256:7b03048319bfc6170e93bd60728a1af51d3dd7704935feb228c4d4faab35d334", size = 2546446, upload-time = "2026-02-11T04:22:50.342Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/56/11/5d43209aa4cb58e0cc80127956ff1796a68b928e6324bbf06ef4db34367b/pillow-12.1.1-pp311-pypy311_pp73-macosx_10_15_x86_64.whl", hash = "sha256:600fd103672b925fe62ed08e0d874ea34d692474df6f4bf7ebe148b30f89f39f", size = 5228606, upload-time = "2026-02-11T04:22:52.106Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/5f/d5/3b005b4e4fda6698b371fa6c21b097d4707585d7db99e98d9b0b87ac612a/pillow-12.1.1-pp311-pypy311_pp73-macosx_11_0_arm64.whl", hash = "sha256:665e1b916b043cef294bc54d47bf02d87e13f769bc4bc5fa225a24b3a6c5aca9", size = 4622321, upload-time = "2026-02-11T04:22:53.827Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/df/36/ed3ea2d594356fd8037e5a01f6156c74bc8d92dbb0fa60746cc96cabb6e8/pillow-12.1.1-pp311-pypy311_pp73-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:495c302af3aad1ca67420ddd5c7bd480c8867ad173528767d906428057a11f0e", size = 5247579, upload-time = "2026-02-11T04:22:56.094Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/54/9a/9cc3e029683cf6d20ae5085da0dafc63148e3252c2f13328e553aaa13cfb/pillow-12.1.1-pp311-pypy311_pp73-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:8fd420ef0c52c88b5a035a0886f367748c72147b2b8f384c9d12656678dfdfa9", size = 6989094, upload-time = "2026-02-11T04:22:58.288Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/00/98/fc53ab36da80b88df0967896b6c4b4cd948a0dc5aa40a754266aa3ae48b3/pillow-12.1.1-pp311-pypy311_pp73-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:f975aa7ef9684ce7e2c18a3aa8f8e2106ce1e46b94ab713d156b2898811651d3", size = 5313850, upload-time = "2026-02-11T04:23:00.554Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/30/02/00fa585abfd9fe9d73e5f6e554dc36cc2b842898cbfc46d70353dae227f8/pillow-12.1.1-pp311-pypy311_pp73-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:8089c852a56c2966cf18835db62d9b34fef7ba74c726ad943928d494fa7f4735", size = 5963343, upload-time = "2026-02-11T04:23:02.934Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/f2/26/c56ce33ca856e358d27fda9676c055395abddb82c35ac0f593877ed4562e/pillow-12.1.1-pp311-pypy311_pp73-win_amd64.whl", hash = "sha256:cb9bb857b2d057c6dfc72ac5f3b44836924ba15721882ef103cecb40d002d80e", size = 7029880, upload-time = "2026-02-11T04:23:04.783Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/68/e1/748f5663efe6edcfc4e74b2b93edfb9b8b99b67f21a854c3ae416500a2d9/pillow-12.2.0-cp311-cp311-macosx_10_10_x86_64.whl", hash = "sha256:8be29e59487a79f173507c30ddf57e733a357f67881430449bb32614075a40ab", size = 5354347, upload-time = "2026-04-01T14:42:44.255Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/47/a1/d5ff69e747374c33a3b53b9f98cca7889fce1fd03d79cdc4e1bccc6c5a87/pillow-12.2.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:71cde9a1e1551df7d34a25462fc60325e8a11a82cc2e2f54578e5e9a1e153d65", size = 4695873, upload-time = "2026-04-01T14:42:46.452Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/df/21/e3fbdf54408a973c7f7f89a23b2cb97a7ef30c61ab4142af31eee6aebc88/pillow-12.2.0-cp311-cp311-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:f490f9368b6fc026f021db16d7ec2fbf7d89e2edb42e8ec09d2c60505f5729c7", size = 6280168, upload-time = "2026-04-01T14:42:49.228Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/d3/f1/00b7278c7dd52b17ad4329153748f87b6756ec195ff786c2bdf12518337d/pillow-12.2.0-cp311-cp311-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:8bd7903a5f2a4545f6fd5935c90058b89d30045568985a71c79f5fd6edf9b91e", size = 8088188, upload-time = "2026-04-01T14:42:51.735Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/ad/cf/220a5994ef1b10e70e85748b75649d77d506499352be135a4989c957b701/pillow-12.2.0-cp311-cp311-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:3997232e10d2920a68d25191392e3a4487d8183039e1c74c2297f00ed1c50705", size = 6394401, upload-time = "2026-04-01T14:42:54.343Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/e9/bd/e51a61b1054f09437acfbc2ff9106c30d1eb76bc1453d428399946781253/pillow-12.2.0-cp311-cp311-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:e74473c875d78b8e9d5da2a70f7099549f9eb37ded4e2f6a463e60125bccd176", size = 7079655, upload-time = "2026-04-01T14:42:56.954Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/6b/3d/45132c57d5fb4b5744567c3817026480ac7fc3ce5d4c47902bc0e7f6f853/pillow-12.2.0-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:56a3f9c60a13133a98ecff6197af34d7824de9b7b38c3654861a725c970c197b", size = 6503105, upload-time = "2026-04-01T14:42:59.847Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/7d/2e/9df2fc1e82097b1df3dce58dc43286aa01068e918c07574711fcc53e6fb4/pillow-12.2.0-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:90e6f81de50ad6b534cab6e5aef77ff6e37722b2f5d908686f4a5c9eba17a909", size = 7203402, upload-time = "2026-04-01T14:43:02.664Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/bd/2e/2941e42858ebb67e50ae741473de81c2984e6eff7b397017623c676e2e8d/pillow-12.2.0-cp311-cp311-win32.whl", hash = "sha256:8c984051042858021a54926eb597d6ee3012393ce9c181814115df4c60b9a808", size = 6378149, upload-time = "2026-04-01T14:43:05.274Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/69/42/836b6f3cd7f3e5fa10a1f1a5420447c17966044c8fbf589cc0452d5502db/pillow-12.2.0-cp311-cp311-win_amd64.whl", hash = "sha256:6e6b2a0c538fc200b38ff9eb6628228b77908c319a005815f2dde585a0664b60", size = 7082626, upload-time = "2026-04-01T14:43:08.557Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/c2/88/549194b5d6f1f494b485e493edc6693c0a16f4ada488e5bd974ed1f42fad/pillow-12.2.0-cp311-cp311-win_arm64.whl", hash = "sha256:9a8a34cc89c67a65ea7437ce257cea81a9dad65b29805f3ecee8c8fe8ff25ffe", size = 2463531, upload-time = "2026-04-01T14:43:10.743Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/58/be/7482c8a5ebebbc6470b3eb791812fff7d5e0216c2be3827b30b8bb6603ed/pillow-12.2.0-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:2d192a155bbcec180f8564f693e6fd9bccff5a7af9b32e2e4bf8c9c69dbad6b5", size = 5308279, upload-time = "2026-04-01T14:43:13.246Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/d8/95/0a351b9289c2b5cbde0bacd4a83ebc44023e835490a727b2a3bd60ddc0f4/pillow-12.2.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:f3f40b3c5a968281fd507d519e444c35f0ff171237f4fdde090dd60699458421", size = 4695490, upload-time = "2026-04-01T14:43:15.584Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/de/af/4e8e6869cbed569d43c416fad3dc4ecb944cb5d9492defaed89ddd6fe871/pillow-12.2.0-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:03e7e372d5240cc23e9f07deca4d775c0817bffc641b01e9c3af208dbd300987", size = 6284462, upload-time = "2026-04-01T14:43:18.268Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/e9/9e/c05e19657fd57841e476be1ab46c4d501bffbadbafdc31a6d665f8b737b6/pillow-12.2.0-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:b86024e52a1b269467a802258c25521e6d742349d760728092e1bc2d135b4d76", size = 8094744, upload-time = "2026-04-01T14:43:20.716Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/2b/54/1789c455ed10176066b6e7e6da1b01e50e36f94ba584dc68d9eebfe9156d/pillow-12.2.0-cp312-cp312-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:7371b48c4fa448d20d2714c9a1f775a81155050d383333e0a6c15b1123dda005", size = 6398371, upload-time = "2026-04-01T14:43:23.443Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/43/e3/fdc657359e919462369869f1c9f0e973f353f9a9ee295a39b1fea8ee1a77/pillow-12.2.0-cp312-cp312-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:62f5409336adb0663b7caa0da5c7d9e7bdbaae9ce761d34669420c2a801b2780", size = 7087215, upload-time = "2026-04-01T14:43:26.758Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/8b/f8/2f6825e441d5b1959d2ca5adec984210f1ec086435b0ed5f52c19b3b8a6e/pillow-12.2.0-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:01afa7cf67f74f09523699b4e88c73fb55c13346d212a59a2db1f86b0a63e8c5", size = 6509783, upload-time = "2026-04-01T14:43:29.56Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/67/f9/029a27095ad20f854f9dba026b3ea6428548316e057e6fc3545409e86651/pillow-12.2.0-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:fc3d34d4a8fbec3e88a79b92e5465e0f9b842b628675850d860b8bd300b159f5", size = 7212112, upload-time = "2026-04-01T14:43:32.091Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/be/42/025cfe05d1be22dbfdb4f264fe9de1ccda83f66e4fc3aac94748e784af04/pillow-12.2.0-cp312-cp312-win32.whl", hash = "sha256:58f62cc0f00fd29e64b29f4fd923ffdb3859c9f9e6105bfc37ba1d08994e8940", size = 6378489, upload-time = "2026-04-01T14:43:34.601Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/5d/7b/25a221d2c761c6a8ae21bfa3874988ff2583e19cf8a27bf2fee358df7942/pillow-12.2.0-cp312-cp312-win_amd64.whl", hash = "sha256:7f84204dee22a783350679a0333981df803dac21a0190d706a50475e361c93f5", size = 7084129, upload-time = "2026-04-01T14:43:37.213Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/10/e1/542a474affab20fd4a0f1836cb234e8493519da6b76899e30bcc5d990b8b/pillow-12.2.0-cp312-cp312-win_arm64.whl", hash = "sha256:af73337013e0b3b46f175e79492d96845b16126ddf79c438d7ea7ff27783a414", size = 2463612, upload-time = "2026-04-01T14:43:39.421Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/4a/01/53d10cf0dbad820a8db274d259a37ba50b88b24768ddccec07355382d5ad/pillow-12.2.0-cp313-cp313-ios_13_0_arm64_iphoneos.whl", hash = "sha256:8297651f5b5679c19968abefd6bb84d95fe30ef712eb1b2d9b2d31ca61267f4c", size = 4100837, upload-time = "2026-04-01T14:43:41.506Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/0f/98/f3a6657ecb698c937f6c76ee564882945f29b79bad496abcba0e84659ec5/pillow-12.2.0-cp313-cp313-ios_13_0_arm64_iphonesimulator.whl", hash = "sha256:50d8520da2a6ce0af445fa6d648c4273c3eeefbc32d7ce049f22e8b5c3daecc2", size = 4176528, upload-time = "2026-04-01T14:43:43.773Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/69/bc/8986948f05e3ea490b8442ea1c1d4d990b24a7e43d8a51b2c7d8b1dced36/pillow-12.2.0-cp313-cp313-ios_13_0_x86_64_iphonesimulator.whl", hash = "sha256:766cef22385fa1091258ad7e6216792b156dc16d8d3fa607e7545b2b72061f1c", size = 3640401, upload-time = "2026-04-01T14:43:45.87Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/34/46/6c717baadcd62bc8ed51d238d521ab651eaa74838291bda1f86fe1f864c9/pillow-12.2.0-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:5d2fd0fa6b5d9d1de415060363433f28da8b1526c1c129020435e186794b3795", size = 5308094, upload-time = "2026-04-01T14:43:48.438Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/71/43/905a14a8b17fdb1ccb58d282454490662d2cb89a6bfec26af6d3520da5ec/pillow-12.2.0-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:56b25336f502b6ed02e889f4ece894a72612fe885889a6e8c4c80239ff6e5f5f", size = 4695402, upload-time = "2026-04-01T14:43:51.292Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/73/dd/42107efcb777b16fa0393317eac58f5b5cf30e8392e266e76e51cff28c3d/pillow-12.2.0-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:f1c943e96e85df3d3478f7b691f229887e143f81fedab9b20205349ab04d73ed", size = 6280005, upload-time = "2026-04-01T14:43:54.242Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/a8/68/b93e09e5e8549019e61acf49f65b1a8530765a7f812c77a7461bca7e4494/pillow-12.2.0-cp313-cp313-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:03f6fab9219220f041c74aeaa2939ff0062bd5c364ba9ce037197f4c6d498cd9", size = 8090669, upload-time = "2026-04-01T14:43:57.335Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/4b/6e/3ccb54ce8ec4ddd1accd2d89004308b7b0b21c4ac3d20fa70af4760a4330/pillow-12.2.0-cp313-cp313-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:5cdfebd752ec52bf5bb4e35d9c64b40826bc5b40a13df7c3cda20a2c03a0f5ed", size = 6395194, upload-time = "2026-04-01T14:43:59.864Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/67/ee/21d4e8536afd1a328f01b359b4d3997b291ffd35a237c877b331c1c3b71c/pillow-12.2.0-cp313-cp313-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:eedf4b74eda2b5a4b2b2fb4c006d6295df3bf29e459e198c90ea48e130dc75c3", size = 7082423, upload-time = "2026-04-01T14:44:02.74Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/78/5f/e9f86ab0146464e8c133fe85df987ed9e77e08b29d8d35f9f9f4d6f917ba/pillow-12.2.0-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:00a2865911330191c0b818c59103b58a5e697cae67042366970a6b6f1b20b7f9", size = 6505667, upload-time = "2026-04-01T14:44:05.381Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/ed/1e/409007f56a2fdce61584fd3acbc2bbc259857d555196cedcadc68c015c82/pillow-12.2.0-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:1e1757442ed87f4912397c6d35a0db6a7b52592156014706f17658ff58bbf795", size = 7208580, upload-time = "2026-04-01T14:44:08.39Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/23/c4/7349421080b12fb35414607b8871e9534546c128a11965fd4a7002ccfbee/pillow-12.2.0-cp313-cp313-win32.whl", hash = "sha256:144748b3af2d1b358d41286056d0003f47cb339b8c43a9ea42f5fea4d8c66b6e", size = 6375896, upload-time = "2026-04-01T14:44:11.197Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/3f/82/8a3739a5e470b3c6cbb1d21d315800d8e16bff503d1f16b03a4ec3212786/pillow-12.2.0-cp313-cp313-win_amd64.whl", hash = "sha256:390ede346628ccc626e5730107cde16c42d3836b89662a115a921f28440e6a3b", size = 7081266, upload-time = "2026-04-01T14:44:13.947Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/c3/25/f968f618a062574294592f668218f8af564830ccebdd1fa6200f598e65c5/pillow-12.2.0-cp313-cp313-win_arm64.whl", hash = "sha256:8023abc91fba39036dbce14a7d6535632f99c0b857807cbbbf21ecc9f4717f06", size = 2463508, upload-time = "2026-04-01T14:44:16.312Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/4d/a4/b342930964e3cb4dce5038ae34b0eab4653334995336cd486c5a8c25a00c/pillow-12.2.0-cp313-cp313t-macosx_10_13_x86_64.whl", hash = "sha256:042db20a421b9bafecc4b84a8b6e444686bd9d836c7fd24542db3e7df7baad9b", size = 5309927, upload-time = "2026-04-01T14:44:18.89Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/9f/de/23198e0a65a9cf06123f5435a5d95cea62a635697f8f03d134d3f3a96151/pillow-12.2.0-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:dd025009355c926a84a612fecf58bb315a3f6814b17ead51a8e48d3823d9087f", size = 4698624, upload-time = "2026-04-01T14:44:21.115Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/01/a6/1265e977f17d93ea37aa28aa81bad4fa597933879fac2520d24e021c8da3/pillow-12.2.0-cp313-cp313t-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:88ddbc66737e277852913bd1e07c150cc7bb124539f94c4e2df5344494e0a612", size = 6321252, upload-time = "2026-04-01T14:44:23.663Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/3c/83/5982eb4a285967baa70340320be9f88e57665a387e3a53a7f0db8231a0cd/pillow-12.2.0-cp313-cp313t-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:d362d1878f00c142b7e1a16e6e5e780f02be8195123f164edf7eddd911eefe7c", size = 8126550, upload-time = "2026-04-01T14:44:26.772Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/4e/48/6ffc514adce69f6050d0753b1a18fd920fce8cac87620d5a31231b04bfc5/pillow-12.2.0-cp313-cp313t-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:2c727a6d53cb0018aadd8018c2b938376af27914a68a492f59dfcaca650d5eea", size = 6433114, upload-time = "2026-04-01T14:44:29.615Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/36/a3/f9a77144231fb8d40ee27107b4463e205fa4677e2ca2548e14da5cf18dce/pillow-12.2.0-cp313-cp313t-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:efd8c21c98c5cc60653bcb311bef2ce0401642b7ce9d09e03a7da87c878289d4", size = 7115667, upload-time = "2026-04-01T14:44:32.773Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/c1/fc/ac4ee3041e7d5a565e1c4fd72a113f03b6394cc72ab7089d27608f8aaccb/pillow-12.2.0-cp313-cp313t-musllinux_1_2_aarch64.whl", hash = "sha256:9f08483a632889536b8139663db60f6724bfcb443c96f1b18855860d7d5c0fd4", size = 6538966, upload-time = "2026-04-01T14:44:35.252Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/c0/a8/27fb307055087f3668f6d0a8ccb636e7431d56ed0750e07a60547b1e083e/pillow-12.2.0-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:dac8d77255a37e81a2efcbd1fc05f1c15ee82200e6c240d7e127e25e365c39ea", size = 7238241, upload-time = "2026-04-01T14:44:37.875Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/ad/4b/926ab182c07fccae9fcb120043464e1ff1564775ec8864f21a0ebce6ac25/pillow-12.2.0-cp313-cp313t-win32.whl", hash = "sha256:ee3120ae9dff32f121610bb08e4313be87e03efeadfc6c0d18f89127e24d0c24", size = 6379592, upload-time = "2026-04-01T14:44:40.336Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/c2/c4/f9e476451a098181b30050cc4c9a3556b64c02cf6497ea421ac047e89e4b/pillow-12.2.0-cp313-cp313t-win_amd64.whl", hash = "sha256:325ca0528c6788d2a6c3d40e3568639398137346c3d6e66bb61db96b96511c98", size = 7085542, upload-time = "2026-04-01T14:44:43.251Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/00/a4/285f12aeacbe2d6dc36c407dfbbe9e96d4a80b0fb710a337f6d2ad978c75/pillow-12.2.0-cp313-cp313t-win_arm64.whl", hash = "sha256:2e5a76d03a6c6dcef67edabda7a52494afa4035021a79c8558e14af25313d453", size = 2465765, upload-time = "2026-04-01T14:44:45.996Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/bf/98/4595daa2365416a86cb0d495248a393dfc84e96d62ad080c8546256cb9c0/pillow-12.2.0-cp314-cp314-ios_13_0_arm64_iphoneos.whl", hash = "sha256:3adc9215e8be0448ed6e814966ecf3d9952f0ea40eb14e89a102b87f450660d8", size = 4100848, upload-time = "2026-04-01T14:44:48.48Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/0b/79/40184d464cf89f6663e18dfcf7ca21aae2491fff1a16127681bf1fa9b8cf/pillow-12.2.0-cp314-cp314-ios_13_0_arm64_iphonesimulator.whl", hash = "sha256:6a9adfc6d24b10f89588096364cc726174118c62130c817c2837c60cf08a392b", size = 4176515, upload-time = "2026-04-01T14:44:51.353Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/b0/63/703f86fd4c422a9cf722833670f4f71418fb116b2853ff7da722ea43f184/pillow-12.2.0-cp314-cp314-ios_13_0_x86_64_iphonesimulator.whl", hash = "sha256:6a6e67ea2e6feda684ed370f9a1c52e7a243631c025ba42149a2cc5934dec295", size = 3640159, upload-time = "2026-04-01T14:44:53.588Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/71/e0/fb22f797187d0be2270f83500aab851536101b254bfa1eae10795709d283/pillow-12.2.0-cp314-cp314-macosx_10_15_x86_64.whl", hash = "sha256:2bb4a8d594eacdfc59d9e5ad972aa8afdd48d584ffd5f13a937a664c3e7db0ed", size = 5312185, upload-time = "2026-04-01T14:44:56.039Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/ba/8c/1a9e46228571de18f8e28f16fabdfc20212a5d019f3e3303452b3f0a580d/pillow-12.2.0-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:80b2da48193b2f33ed0c32c38140f9d3186583ce7d516526d462645fd98660ae", size = 4695386, upload-time = "2026-04-01T14:44:58.663Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/70/62/98f6b7f0c88b9addd0e87c217ded307b36be024d4ff8869a812b241d1345/pillow-12.2.0-cp314-cp314-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:22db17c68434de69d8ecfc2fe821569195c0c373b25cccb9cbdacf2c6e53c601", size = 6280384, upload-time = "2026-04-01T14:45:01.5Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/5e/03/688747d2e91cfbe0e64f316cd2e8005698f76ada3130d0194664174fa5de/pillow-12.2.0-cp314-cp314-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:7b14cc0106cd9aecda615dd6903840a058b4700fcb817687d0ee4fc8b6e389be", size = 8091599, upload-time = "2026-04-01T14:45:04.5Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/f6/35/577e22b936fcdd66537329b33af0b4ccfefaeabd8aec04b266528cddb33c/pillow-12.2.0-cp314-cp314-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:8cbeb542b2ebc6fcdacabf8aca8c1a97c9b3ad3927d46b8723f9d4f033288a0f", size = 6396021, upload-time = "2026-04-01T14:45:07.117Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/11/8d/d2532ad2a603ca2b93ad9f5135732124e57811d0168155852f37fbce2458/pillow-12.2.0-cp314-cp314-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:4bfd07bc812fbd20395212969e41931001fd59eb55a60658b0e5710872e95286", size = 7083360, upload-time = "2026-04-01T14:45:09.763Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/5e/26/d325f9f56c7e039034897e7380e9cc202b1e368bfd04d4cbe6a441f02885/pillow-12.2.0-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:9aba9a17b623ef750a4d11b742cbafffeb48a869821252b30ee21b5e91392c50", size = 6507628, upload-time = "2026-04-01T14:45:12.378Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/5f/f7/769d5632ffb0988f1c5e7660b3e731e30f7f8ec4318e94d0a5d674eb65a4/pillow-12.2.0-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:deede7c263feb25dba4e82ea23058a235dcc2fe1f6021025dc71f2b618e26104", size = 7209321, upload-time = "2026-04-01T14:45:15.122Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/6a/7a/c253e3c645cd47f1aceea6a8bacdba9991bf45bb7dfe927f7c893e89c93c/pillow-12.2.0-cp314-cp314-win32.whl", hash = "sha256:632ff19b2778e43162304d50da0181ce24ac5bb8180122cbe1bf4673428328c7", size = 6479723, upload-time = "2026-04-01T14:45:17.797Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/cd/8b/601e6566b957ca50e28725cb6c355c59c2c8609751efbecd980db44e0349/pillow-12.2.0-cp314-cp314-win_amd64.whl", hash = "sha256:4e6c62e9d237e9b65fac06857d511e90d8461a32adcc1b9065ea0c0fa3a28150", size = 7217400, upload-time = "2026-04-01T14:45:20.529Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/d6/94/220e46c73065c3e2951bb91c11a1fb636c8c9ad427ac3ce7d7f3359b9b2f/pillow-12.2.0-cp314-cp314-win_arm64.whl", hash = "sha256:b1c1fbd8a5a1af3412a0810d060a78b5136ec0836c8a4ef9aa11807f2a22f4e1", size = 2554835, upload-time = "2026-04-01T14:45:23.162Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/b6/ab/1b426a3974cb0e7da5c29ccff4807871d48110933a57207b5a676cccc155/pillow-12.2.0-cp314-cp314t-macosx_10_15_x86_64.whl", hash = "sha256:57850958fe9c751670e49b2cecf6294acc99e562531f4bd317fa5ddee2068463", size = 5314225, upload-time = "2026-04-01T14:45:25.637Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/19/1e/dce46f371be2438eecfee2a1960ee2a243bbe5e961890146d2dee1ff0f12/pillow-12.2.0-cp314-cp314t-macosx_11_0_arm64.whl", hash = "sha256:d5d38f1411c0ed9f97bcb49b7bd59b6b7c314e0e27420e34d99d844b9ce3b6f3", size = 4698541, upload-time = "2026-04-01T14:45:28.355Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/55/c3/7fbecf70adb3a0c33b77a300dc52e424dc22ad8cdc06557a2e49523b703d/pillow-12.2.0-cp314-cp314t-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:5c0a9f29ca8e79f09de89293f82fc9b0270bb4af1d58bc98f540cc4aedf03166", size = 6322251, upload-time = "2026-04-01T14:45:30.924Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/1c/3c/7fbc17cfb7e4fe0ef1642e0abc17fc6c94c9f7a16be41498e12e2ba60408/pillow-12.2.0-cp314-cp314t-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:1610dd6c61621ae1cf811bef44d77e149ce3f7b95afe66a4512f8c59f25d9ebe", size = 8127807, upload-time = "2026-04-01T14:45:33.908Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/ff/c3/a8ae14d6defd2e448493ff512fae903b1e9bd40b72efb6ec55ce0048c8ce/pillow-12.2.0-cp314-cp314t-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:0a34329707af4f73cf1782a36cd2289c0368880654a2c11f027bcee9052d35dd", size = 6433935, upload-time = "2026-04-01T14:45:36.623Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/6e/32/2880fb3a074847ac159d8f902cb43278a61e85f681661e7419e6596803ed/pillow-12.2.0-cp314-cp314t-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:8e9c4f5b3c546fa3458a29ab22646c1c6c787ea8f5ef51300e5a60300736905e", size = 7116720, upload-time = "2026-04-01T14:45:39.258Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/46/87/495cc9c30e0129501643f24d320076f4cc54f718341df18cc70ec94c44e1/pillow-12.2.0-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:fb043ee2f06b41473269765c2feae53fc2e2fbf96e5e22ca94fb5ad677856f06", size = 6540498, upload-time = "2026-04-01T14:45:41.879Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/18/53/773f5edca692009d883a72211b60fdaf8871cbef075eaa9d577f0a2f989e/pillow-12.2.0-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:f278f034eb75b4e8a13a54a876cc4a5ab39173d2cdd93a638e1b467fc545ac43", size = 7239413, upload-time = "2026-04-01T14:45:44.705Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/c9/e4/4b64a97d71b2a83158134abbb2f5bd3f8a2ea691361282f010998f339ec7/pillow-12.2.0-cp314-cp314t-win32.whl", hash = "sha256:6bb77b2dcb06b20f9f4b4a8454caa581cd4dd0643a08bacf821216a16d9c8354", size = 6482084, upload-time = "2026-04-01T14:45:47.568Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/ba/13/306d275efd3a3453f72114b7431c877d10b1154014c1ebbedd067770d629/pillow-12.2.0-cp314-cp314t-win_amd64.whl", hash = "sha256:6562ace0d3fb5f20ed7290f1f929cae41b25ae29528f2af1722966a0a02e2aa1", size = 7225152, upload-time = "2026-04-01T14:45:50.032Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/ff/6e/cf826fae916b8658848d7b9f38d88da6396895c676e8086fc0988073aaf8/pillow-12.2.0-cp314-cp314t-win_arm64.whl", hash = "sha256:aa88ccfe4e32d362816319ed727a004423aab09c5cea43c01a4b435643fa34eb", size = 2556579, upload-time = "2026-04-01T14:45:52.529Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/4e/b7/2437044fb910f499610356d1352e3423753c98e34f915252aafecc64889f/pillow-12.2.0-pp311-pypy311_pp73-macosx_10_15_x86_64.whl", hash = "sha256:0538bd5e05efec03ae613fd89c4ce0368ecd2ba239cc25b9f9be7ed426b0af1f", size = 5273969, upload-time = "2026-04-01T14:45:55.538Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/f6/f4/8316e31de11b780f4ac08ef3654a75555e624a98db1056ecb2122d008d5a/pillow-12.2.0-pp311-pypy311_pp73-macosx_11_0_arm64.whl", hash = "sha256:394167b21da716608eac917c60aa9b969421b5dcbbe02ae7f013e7b85811c69d", size = 4659674, upload-time = "2026-04-01T14:45:58.093Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/d4/37/664fca7201f8bb2aa1d20e2c3d5564a62e6ae5111741966c8319ca802361/pillow-12.2.0-pp311-pypy311_pp73-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:5d04bfa02cc2d23b497d1e90a0f927070043f6cbf303e738300532379a4b4e0f", size = 5288479, upload-time = "2026-04-01T14:46:01.141Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/49/62/5b0ed78fce87346be7a5cfcfaaad91f6a1f98c26f86bdbafa2066c647ef6/pillow-12.2.0-pp311-pypy311_pp73-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:0c838a5125cee37e68edec915651521191cef1e6aa336b855f495766e77a366e", size = 7032230, upload-time = "2026-04-01T14:46:03.874Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/c3/28/ec0fc38107fc32536908034e990c47914c57cd7c5a3ece4d8d8f7ffd7e27/pillow-12.2.0-pp311-pypy311_pp73-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:4a6c9fa44005fa37a91ebfc95d081e8079757d2e904b27103f4f5fa6f0bf78c0", size = 5355404, upload-time = "2026-04-01T14:46:06.33Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/5e/8b/51b0eddcfa2180d60e41f06bd6d0a62202b20b59c68f5a132e615b75aecf/pillow-12.2.0-pp311-pypy311_pp73-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:25373b66e0dd5905ed63fa3cae13c82fbddf3079f2c8bf15c6fb6a35586324c1", size = 6002215, upload-time = "2026-04-01T14:46:08.83Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/bc/60/5382c03e1970de634027cee8e1b7d39776b778b81812aaf45b694dfe9e28/pillow-12.2.0-pp311-pypy311_pp73-win_amd64.whl", hash = "sha256:bfa9c230d2fe991bed5318a5f119bd6780cda2915cca595393649fc118ab895e", size = 7080946, upload-time = "2026-04-01T14:46:11.734Z" },
|
||||
]
|
||||
|
||||
[[package]]
|
||||
|
||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user