Compare commits

..

4 Commits

Author SHA1 Message Date
Jamison Lahman
822b0c99be chore(devtools): upgrade ods: 0.7.3->0.7.4 (#10039) 2026-04-09 14:44:56 -07:00
Jamison Lahman
bcf2851a85 chore(devtools): introduce a .devcontainer (#10035)
Co-authored-by: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
2026-04-09 14:27:30 -07:00
Nikolas Garza
a5a59bd8f0 feat(helm): add API and heavy worker ServiceMonitors (#10025) 2026-04-09 21:03:27 +00:00
Nikolas Garza
32d2e7985a feat(slack-bot): make agent selector searchable (#10036) 2026-04-09 20:53:47 +00:00
36 changed files with 1075 additions and 500 deletions

65
.devcontainer/Dockerfile Normal file
View File

@@ -0,0 +1,65 @@
FROM ubuntu:26.04@sha256:cc925e589b7543b910fea57a240468940003fbfc0515245a495dd0ad8fe7cef1
RUN apt-get update && apt-get install -y --no-install-recommends \
acl \
curl \
fd-find \
fzf \
git \
jq \
less \
make \
neovim \
openssh-client \
python3-venv \
ripgrep \
sudo \
ca-certificates \
iptables \
ipset \
iproute2 \
dnsutils \
unzip \
wget \
zsh \
&& curl -fsSL https://deb.nodesource.com/setup_20.x | bash - \
&& apt-get install -y nodejs \
&& install -m 0755 -d /etc/apt/keyrings \
&& curl -fsSL https://download.docker.com/linux/ubuntu/gpg -o /etc/apt/keyrings/docker.asc \
&& echo "deb [arch=$(dpkg --print-architecture) signed-by=/etc/apt/keyrings/docker.asc] https://download.docker.com/linux/ubuntu $(. /etc/os-release && echo "$VERSION_CODENAME") stable" > /etc/apt/sources.list.d/docker.list \
&& curl -fsSL https://cli.github.com/packages/githubcli-archive-keyring.gpg -o /etc/apt/keyrings/githubcli-archive-keyring.gpg \
&& chmod go+r /etc/apt/keyrings/githubcli-archive-keyring.gpg \
&& echo "deb [arch=$(dpkg --print-architecture) signed-by=/etc/apt/keyrings/githubcli-archive-keyring.gpg] https://cli.github.com/packages stable main" > /etc/apt/sources.list.d/github-cli.list \
&& apt-get update \
&& apt-get install -y --no-install-recommends docker-ce-cli docker-compose-plugin gh \
&& apt-get clean && rm -rf /var/lib/apt/lists/*
# fd-find installs as fdfind on Debian/Ubuntu — symlink to fd
RUN ln -sf "$(which fdfind)" /usr/local/bin/fd
# Install uv (Python package manager)
COPY --from=ghcr.io/astral-sh/uv:latest /uv /uvx /usr/local/bin/
# Create non-root dev user with passwordless sudo
RUN useradd -m -s /bin/zsh dev && \
echo "dev ALL=(ALL) NOPASSWD:ALL" > /etc/sudoers.d/dev && \
chmod 0440 /etc/sudoers.d/dev
ENV DEVCONTAINER=true
RUN mkdir -p /workspace && \
chown -R dev:dev /workspace
WORKDIR /workspace
# Install Claude Code
ARG CLAUDE_CODE_VERSION=latest
RUN npm install -g @anthropic-ai/claude-code@${CLAUDE_CODE_VERSION}
# Configure zsh — source the repo-local zshrc so shell customization
# doesn't require an image rebuild.
RUN chsh -s /bin/zsh root && \
for rc in /root/.zshrc /home/dev/.zshrc; do \
echo '[ -f /workspace/.devcontainer/zshrc ] && . /workspace/.devcontainer/zshrc' >> "$rc"; \
done && \
chown dev:dev /home/dev/.zshrc

126
.devcontainer/README.md Normal file
View File

@@ -0,0 +1,126 @@
# Onyx Dev Container
A containerized development environment for working on Onyx.
## What's included
- Ubuntu 26.04 base image
- Node.js 20, uv, Claude Code
- Docker CLI, GitHub CLI (`gh`)
- Neovim, ripgrep, fd, fzf, jq, make, wget, unzip
- Zsh as default shell (sources host `~/.zshrc` if available)
- Python venv auto-activation
- Network firewall (default-deny, whitelists npm, GitHub, Anthropic APIs, Sentry, and VS Code update servers)
## Usage
### VS Code
1. Install the [Dev Containers extension](https://marketplace.visualstudio.com/items?itemName=ms-vscode-remote.remote-containers)
2. Open this repo in VS Code
3. "Reopen in Container" when prompted
### CLI (`ods dev`)
The [`ods` devtools CLI](../tools/ods/README.md) provides workspace-aware wrappers
for all devcontainer operations (also available as `ods dc`):
```bash
# Start the container
ods dev up
# Open a shell
ods dev into
# Run a command
ods dev exec npm test
# Stop the container
ods dev stop
```
If you don't have `ods` installed, use the `devcontainer` CLI directly:
```bash
npm install -g @devcontainers/cli
devcontainer up --workspace-folder .
devcontainer exec --workspace-folder . zsh
```
## Restarting the container
### VS Code
Open the Command Palette (`Ctrl+Shift+P` / `Cmd+Shift+P`) and run:
- **Dev Containers: Reopen in Container** — restarts the container without rebuilding
### CLI
```bash
# Restart the container
ods dev restart
# Pull the latest published image and recreate
ods dev rebuild
```
Or without `ods`:
```bash
devcontainer up --workspace-folder . --remove-existing-container
```
## Image
The devcontainer uses a prebuilt image published to `onyxdotapp/onyx-devcontainer`.
The tag is pinned in `devcontainer.json` — no local build is required.
To build the image locally (e.g. while iterating on the Dockerfile):
```bash
docker buildx bake devcontainer
```
The `devcontainer` target is defined in `docker-bake.hcl` at the repo root.
## User & permissions
The container runs as the `dev` user by default (`remoteUser` in devcontainer.json).
An init script (`init-dev-user.sh`) runs at container start to ensure `dev` has
read/write access to the bind-mounted workspace:
- **Standard Docker** — `dev`'s UID/GID is remapped to match the workspace owner,
so file permissions work seamlessly.
- **Rootless Docker** — The workspace appears as root-owned (UID 0) inside the
container due to user-namespace mapping. The init script grants `dev` access via
POSIX ACLs (`setfacl`), which adds a few seconds to the first container start on
large repos.
## Docker socket
The container mounts the host's Docker socket so you can run `docker` commands
from inside. `ods dev` auto-detects the socket path and sets `DOCKER_SOCK`:
| Environment | Socket path |
| ----------------------- | ------------------------------ |
| Linux (rootless Docker) | `$XDG_RUNTIME_DIR/docker.sock` |
| macOS (Docker Desktop) | `~/.docker/run/docker.sock` |
| Linux (standard Docker) | `/var/run/docker.sock` |
To override, set `DOCKER_SOCK` before running `ods dev up`. When using the
VS Code extension or `devcontainer` CLI directly (without `ods`), you must set
`DOCKER_SOCK` yourself.
## Firewall
The container starts with a default-deny firewall (`init-firewall.sh`) that only allows outbound traffic to:
- npm registry
- GitHub
- Anthropic API
- Sentry
- VS Code update servers
This requires the `NET_ADMIN` and `NET_RAW` capabilities, which are added via `runArgs` in `devcontainer.json`.

View File

@@ -0,0 +1,21 @@
{
"name": "Onyx Dev Sandbox",
"image": "onyxdotapp/onyx-devcontainer@sha256:12184169c5bcc9cca0388286d5ffe504b569bc9c37bfa631b76ee8eee2064055",
"runArgs": ["--cap-add=NET_ADMIN", "--cap-add=NET_RAW"],
"mounts": [
"source=${localEnv:DOCKER_SOCK},target=/var/run/docker.sock,type=bind",
"source=${localEnv:HOME}/.claude,target=/home/dev/.claude,type=bind",
"source=${localEnv:HOME}/.claude.json,target=/home/dev/.claude.json,type=bind",
"source=${localEnv:HOME}/.zshrc,target=/home/dev/.zshrc.host,type=bind,readonly",
"source=${localEnv:HOME}/.gitconfig,target=/home/dev/.gitconfig.host,type=bind,readonly",
"source=${localEnv:HOME}/.ssh,target=/home/dev/.ssh.host,type=bind,readonly",
"source=${localEnv:HOME}/.config/nvim,target=/home/dev/.config/nvim.host,type=bind,readonly",
"source=onyx-devcontainer-local,target=/home/dev/.local,type=volume"
],
"remoteUser": "dev",
"updateRemoteUserUID": false,
"workspaceMount": "source=${localWorkspaceFolder},target=/workspace,type=bind,consistency=delegated",
"workspaceFolder": "/workspace",
"postStartCommand": "sudo bash /workspace/.devcontainer/init-dev-user.sh && sudo bash /workspace/.devcontainer/init-firewall.sh",
"waitFor": "postStartCommand"
}

View File

@@ -0,0 +1,104 @@
#!/usr/bin/env bash
set -euo pipefail
# Remap the dev user's UID/GID to match the workspace owner so that
# bind-mounted files are accessible without running as root.
#
# Standard Docker: Workspace is owned by the host user's UID (e.g. 1000).
# We remap dev to that UID — fast and seamless.
#
# Rootless Docker: Workspace appears as root-owned (UID 0) inside the
# container due to user-namespace mapping. We can't remap
# dev to UID 0 (that's root), so we grant access with
# POSIX ACLs instead.
WORKSPACE=/workspace
TARGET_USER=dev
WS_UID=$(stat -c '%u' "$WORKSPACE")
WS_GID=$(stat -c '%g' "$WORKSPACE")
DEV_UID=$(id -u "$TARGET_USER")
DEV_GID=$(id -g "$TARGET_USER")
DEV_HOME=/home/"$TARGET_USER"
# Ensure directories that tools expect exist under ~dev.
# ~/.local is a named Docker volume — ensure subdirs exist and are owned by dev.
mkdir -p "$DEV_HOME"/.local/state "$DEV_HOME"/.local/share
chown -R "$TARGET_USER":"$TARGET_USER" "$DEV_HOME"/.local
# Copy host configs mounted as *.host into their real locations.
# This gives the dev user owned copies without touching host originals.
if [ -d "$DEV_HOME/.ssh.host" ]; then
cp -a "$DEV_HOME/.ssh.host" "$DEV_HOME/.ssh"
chmod 700 "$DEV_HOME/.ssh"
chmod 600 "$DEV_HOME"/.ssh/id_* 2>/dev/null || true
chown -R "$TARGET_USER":"$TARGET_USER" "$DEV_HOME/.ssh"
fi
if [ -d "$DEV_HOME/.config/nvim.host" ]; then
mkdir -p "$DEV_HOME/.config"
cp -a "$DEV_HOME/.config/nvim.host" "$DEV_HOME/.config/nvim"
chown -R "$TARGET_USER":"$TARGET_USER" "$DEV_HOME/.config/nvim"
fi
# Already matching — nothing to do.
if [ "$WS_UID" = "$DEV_UID" ] && [ "$WS_GID" = "$DEV_GID" ]; then
exit 0
fi
if [ "$WS_UID" != "0" ]; then
# ── Standard Docker ──────────────────────────────────────────────
# Workspace is owned by a non-root UID (the host user).
# Remap dev's UID/GID to match.
if [ "$DEV_GID" != "$WS_GID" ]; then
if ! groupmod -g "$WS_GID" "$TARGET_USER" 2>&1; then
echo "warning: failed to remap $TARGET_USER GID to $WS_GID" >&2
fi
fi
if [ "$DEV_UID" != "$WS_UID" ]; then
if ! usermod -u "$WS_UID" -g "$WS_GID" "$TARGET_USER" 2>&1; then
echo "warning: failed to remap $TARGET_USER UID to $WS_UID" >&2
fi
fi
if ! chown -R "$TARGET_USER":"$TARGET_USER" /home/"$TARGET_USER" 2>&1; then
echo "warning: failed to chown /home/$TARGET_USER" >&2
fi
else
# ── Rootless Docker ──────────────────────────────────────────────
# Workspace is root-owned inside the container. Grant dev access
# via POSIX ACLs (preserves ownership, works across the namespace
# boundary).
if command -v setfacl &>/dev/null; then
setfacl -Rm "u:${TARGET_USER}:rwX" "$WORKSPACE"
setfacl -Rdm "u:${TARGET_USER}:rwX" "$WORKSPACE" # default ACL for new files
# Git refuses to operate in repos owned by a different UID.
# Host gitconfig is mounted readonly as ~/.gitconfig.host.
# Create a real ~/.gitconfig that includes it plus container overrides.
printf '[include]\n\tpath = %s/.gitconfig.host\n[safe]\n\tdirectory = %s\n' \
"$DEV_HOME" "$WORKSPACE" > "$DEV_HOME/.gitconfig"
chown "$TARGET_USER":"$TARGET_USER" "$DEV_HOME/.gitconfig"
# If this is a worktree, the main .git dir is bind-mounted at its
# host absolute path. Grant dev access so git operations work.
GIT_COMMON_DIR=$(git -C "$WORKSPACE" rev-parse --git-common-dir 2>/dev/null || true)
if [ -n "$GIT_COMMON_DIR" ] && [ "$GIT_COMMON_DIR" != "$WORKSPACE/.git" ]; then
[ ! -d "$GIT_COMMON_DIR" ] && GIT_COMMON_DIR="$WORKSPACE/$GIT_COMMON_DIR"
if [ -d "$GIT_COMMON_DIR" ]; then
setfacl -Rm "u:${TARGET_USER}:rwX" "$GIT_COMMON_DIR"
setfacl -Rdm "u:${TARGET_USER}:rwX" "$GIT_COMMON_DIR"
git config -f "$DEV_HOME/.gitconfig" --add safe.directory "$(dirname "$GIT_COMMON_DIR")"
fi
fi
# Also fix bind-mounted dirs under ~dev that appear root-owned.
for dir in /home/"$TARGET_USER"/.claude; do
[ -d "$dir" ] && setfacl -Rm "u:${TARGET_USER}:rwX" "$dir" && setfacl -Rdm "u:${TARGET_USER}:rwX" "$dir"
done
[ -f /home/"$TARGET_USER"/.claude.json ] && \
setfacl -m "u:${TARGET_USER}:rw" /home/"$TARGET_USER"/.claude.json
else
echo "warning: setfacl not found; dev user may not have write access to workspace" >&2
echo " install the 'acl' package or set remoteUser to root" >&2
fi
fi

101
.devcontainer/init-firewall.sh Executable file
View File

@@ -0,0 +1,101 @@
#!/usr/bin/env bash
set -euo pipefail
echo "Setting up firewall..."
# Preserve docker dns resolution
DOCKER_DNS_RULES=$(iptables-save | grep -E "^-A.*-d 127.0.0.11/32" || true)
# Flush all rules
iptables -t nat -F
iptables -t nat -X
iptables -t mangle -F
iptables -t mangle -X
iptables -F
iptables -X
# Restore docker dns rules
if [ -n "$DOCKER_DNS_RULES" ]; then
echo "$DOCKER_DNS_RULES" | iptables-restore -n
fi
# Create ipset for allowed destinations
ipset create allowed-domains hash:net || true
ipset flush allowed-domains
# Fetch GitHub IP ranges (IPv4 only — ipset hash:net and iptables are IPv4)
GITHUB_IPS=$(curl -s https://api.github.com/meta | jq -r '.api[]' 2>/dev/null | grep -v ':' || echo "")
for ip in $GITHUB_IPS; do
if ! ipset add allowed-domains "$ip" -exist 2>&1; then
echo "warning: failed to add GitHub IP $ip to allowlist" >&2
fi
done
# Resolve allowed domains
ALLOWED_DOMAINS=(
"registry.npmjs.org"
"api.anthropic.com"
"api-staging.anthropic.com"
"files.anthropic.com"
"sentry.io"
"update.code.visualstudio.com"
"pypi.org"
"files.pythonhosted.org"
)
for domain in "${ALLOWED_DOMAINS[@]}"; do
IPS=$(getent ahosts "$domain" 2>/dev/null | awk '{print $1}' | grep -v ':' | sort -u || echo "")
for ip in $IPS; do
if ! ipset add allowed-domains "$ip/32" -exist 2>&1; then
echo "warning: failed to add $domain ($ip) to allowlist" >&2
fi
done
done
# Detect host network
if [[ "${DOCKER_HOST:-}" == "unix://"* ]]; then
DOCKER_GATEWAY=$(ip -4 route show | grep "^default" | awk '{print $3}')
if ! ipset add allowed-domains "$DOCKER_GATEWAY/32" -exist 2>&1; then
echo "warning: failed to add Docker gateway $DOCKER_GATEWAY to allowlist" >&2
fi
fi
# Set default policies to DROP
iptables -P FORWARD DROP
iptables -P INPUT DROP
iptables -P OUTPUT DROP
# Allow established connections
iptables -A INPUT -m conntrack --ctstate ESTABLISHED,RELATED -j ACCEPT
iptables -A OUTPUT -m conntrack --ctstate ESTABLISHED,RELATED -j ACCEPT
# Allow loopback
iptables -A INPUT -i lo -j ACCEPT
iptables -A OUTPUT -o lo -j ACCEPT
# Allow DNS
iptables -A OUTPUT -p udp --dport 53 -j ACCEPT
iptables -A OUTPUT -p tcp --dport 53 -j ACCEPT
# Allow outbound to allowed destinations
iptables -A OUTPUT -m set --match-set allowed-domains dst -j ACCEPT
# Reject unauthorized outbound
iptables -A OUTPUT -j REJECT --reject-with icmp-host-unreachable
# Validate firewall configuration
echo "Validating firewall configuration..."
BLOCKED_SITES=("example.com" "google.com" "facebook.com")
for site in "${BLOCKED_SITES[@]}"; do
if timeout 2 ping -c 1 "$site" &>/dev/null; then
echo "Warning: $site is still reachable"
fi
done
if ! timeout 5 curl -s https://api.github.com/meta > /dev/null; then
echo "Warning: GitHub API is not accessible"
fi
echo "Firewall setup complete"

10
.devcontainer/zshrc Normal file
View File

@@ -0,0 +1,10 @@
# Devcontainer zshrc — sourced automatically for both root and dev users.
# Edit this file to customize the shell without rebuilding the image.
# Auto-activate Python venv
if [ -f /workspace/.venv/bin/activate ]; then
. /workspace/.venv/bin/activate
fi
# Source host zshrc if bind-mounted
[ -f ~/.zshrc.host ] && . ~/.zshrc.host

View File

@@ -60,10 +60,8 @@ logger = setup_logger()
ONE_HOUR = 3600
_MAX_RESULTS_FETCH_IDS = 5000
_MAX_RESULTS_FETCH_IDS = 5000 # 5000
_JIRA_FULL_PAGE_SIZE = 50
# https://developer.atlassian.com/cloud/jira/platform/rest/v3/api-group-issues/
_JIRA_BULK_FETCH_LIMIT = 100
# Constants for Jira field names
_FIELD_REPORTER = "reporter"
@@ -257,13 +255,15 @@ def _bulk_fetch_request(
return resp.json()["issues"]
def _bulk_fetch_batch(
jira_client: JIRA, issue_ids: list[str], fields: str | None
) -> list[dict[str, Any]]:
"""Fetch a single batch (must be <= _JIRA_BULK_FETCH_LIMIT).
On JSONDecodeError, recursively bisects until it succeeds or reaches size 1."""
def bulk_fetch_issues(
jira_client: JIRA, issue_ids: list[str], fields: str | None = None
) -> list[Issue]:
# TODO(evan): move away from this jira library if they continue to not support
# the endpoints we need. Using private fields is not ideal, but
# is likely fine for now since we pin the library version
try:
return _bulk_fetch_request(jira_client, issue_ids, fields)
raw_issues = _bulk_fetch_request(jira_client, issue_ids, fields)
except requests.exceptions.JSONDecodeError:
if len(issue_ids) <= 1:
logger.exception(
@@ -277,25 +277,12 @@ def _bulk_fetch_batch(
f"Jira bulk-fetch JSON decode failed for batch of {len(issue_ids)} issues. "
f"Splitting into sub-batches of {mid} and {len(issue_ids) - mid}."
)
left = _bulk_fetch_batch(jira_client, issue_ids[:mid], fields)
right = _bulk_fetch_batch(jira_client, issue_ids[mid:], fields)
left = bulk_fetch_issues(jira_client, issue_ids[:mid], fields)
right = bulk_fetch_issues(jira_client, issue_ids[mid:], fields)
return left + right
def bulk_fetch_issues(
jira_client: JIRA, issue_ids: list[str], fields: str | None = None
) -> list[Issue]:
# TODO(evan): move away from this jira library if they continue to not support
# the endpoints we need. Using private fields is not ideal, but
# is likely fine for now since we pin the library version
raw_issues: list[dict[str, Any]] = []
for batch in chunked(issue_ids, _JIRA_BULK_FETCH_LIMIT):
try:
raw_issues.extend(_bulk_fetch_batch(jira_client, list(batch), fields))
except Exception as e:
logger.error(f"Error fetching issues: {e}")
raise
except Exception as e:
logger.error(f"Error fetching issues: {e}")
raise
return [
Issue(jira_client._options, jira_client._session, raw=issue)

View File

@@ -1,4 +1,3 @@
from dataclasses import dataclass
from datetime import datetime
from typing import TypedDict
@@ -7,14 +6,6 @@ from pydantic import BaseModel
from onyx.onyxbot.slack.models import ChannelType
@dataclass(frozen=True)
class DirectThreadFetch:
"""Request to fetch a Slack thread directly by channel and timestamp."""
channel_id: str
thread_ts: str
class ChannelMetadata(TypedDict):
"""Type definition for cached channel metadata."""

View File

@@ -19,7 +19,6 @@ from onyx.configs.chat_configs import DOC_TIME_DECAY
from onyx.connectors.models import IndexingDocument
from onyx.connectors.models import TextSection
from onyx.context.search.federated.models import ChannelMetadata
from onyx.context.search.federated.models import DirectThreadFetch
from onyx.context.search.federated.models import SlackMessage
from onyx.context.search.federated.slack_search_utils import ALL_CHANNEL_TYPES
from onyx.context.search.federated.slack_search_utils import build_channel_query_filter
@@ -50,6 +49,7 @@ from onyx.server.federated.models import FederatedConnectorDetail
from onyx.utils.logger import setup_logger
from onyx.utils.threadpool_concurrency import run_functions_tuples_in_parallel
from onyx.utils.timing import log_function_time
from shared_configs.configs import DOC_EMBEDDING_CONTEXT_SIZE
logger = setup_logger()
@@ -58,6 +58,7 @@ HIGHLIGHT_END_CHAR = "\ue001"
CHANNEL_METADATA_CACHE_TTL = 60 * 60 * 24 # 24 hours
USER_PROFILE_CACHE_TTL = 60 * 60 * 24 # 24 hours
SLACK_THREAD_CONTEXT_WINDOW = 3 # Number of messages before matched message to include
CHANNEL_METADATA_MAX_RETRIES = 3 # Maximum retry attempts for channel metadata fetching
CHANNEL_METADATA_RETRY_DELAY = 1 # Initial retry delay in seconds (exponential backoff)
@@ -420,94 +421,6 @@ class SlackQueryResult(BaseModel):
filtered_channels: list[str] # Channels filtered out during this query
def _fetch_thread_from_url(
thread_fetch: DirectThreadFetch,
access_token: str,
channel_metadata_dict: dict[str, ChannelMetadata] | None = None,
) -> SlackQueryResult:
"""Fetch a thread directly from a Slack URL via conversations.replies."""
channel_id = thread_fetch.channel_id
thread_ts = thread_fetch.thread_ts
slack_client = WebClient(token=access_token)
try:
response = slack_client.conversations_replies(
channel=channel_id,
ts=thread_ts,
)
response.validate()
messages: list[dict[str, Any]] = response.get("messages", [])
except SlackApiError as e:
logger.warning(
f"Failed to fetch thread from URL (channel={channel_id}, ts={thread_ts}): {e}"
)
return SlackQueryResult(messages=[], filtered_channels=[])
if not messages:
logger.warning(
f"No messages found for URL override (channel={channel_id}, ts={thread_ts})"
)
return SlackQueryResult(messages=[], filtered_channels=[])
# Build thread text from all messages
thread_text = _build_thread_text(messages, access_token, None, slack_client)
# Get channel name from metadata cache or API
channel_name = "unknown"
if channel_metadata_dict and channel_id in channel_metadata_dict:
channel_name = channel_metadata_dict[channel_id].get("name", "unknown")
else:
try:
ch_response = slack_client.conversations_info(channel=channel_id)
ch_response.validate()
channel_info: dict[str, Any] = ch_response.get("channel", {})
channel_name = channel_info.get("name", "unknown")
except SlackApiError:
pass
# Build the SlackMessage
parent_msg = messages[0]
message_ts = parent_msg.get("ts", thread_ts)
username = parent_msg.get("user", "unknown_user")
parent_text = parent_msg.get("text", "")
snippet = (
parent_text[:50].rstrip() + "..." if len(parent_text) > 50 else parent_text
).replace("\n", " ")
doc_time = datetime.fromtimestamp(float(message_ts))
decay_factor = DOC_TIME_DECAY
doc_age_years = (datetime.now() - doc_time).total_seconds() / (365 * 24 * 60 * 60)
recency_bias = max(1 / (1 + decay_factor * doc_age_years), 0.75)
permalink = (
f"https://slack.com/archives/{channel_id}/p{message_ts.replace('.', '')}"
)
slack_message = SlackMessage(
document_id=f"{channel_id}_{message_ts}",
channel_id=channel_id,
message_id=message_ts,
thread_id=None, # Prevent double-enrichment in thread context fetch
link=permalink,
metadata={
"channel": channel_name,
"time": doc_time.isoformat(),
},
timestamp=doc_time,
recency_bias=recency_bias,
semantic_identifier=f"{username} in #{channel_name}: {snippet}",
text=thread_text,
highlighted_texts=set(),
slack_score=100000.0, # High priority — user explicitly asked for this thread
)
logger.info(
f"URL override: fetched thread from channel={channel_id}, ts={thread_ts}, {len(messages)} messages"
)
return SlackQueryResult(messages=[slack_message], filtered_channels=[])
def query_slack(
query_string: str,
access_token: str,
@@ -519,6 +432,7 @@ def query_slack(
available_channels: list[str] | None = None,
channel_metadata_dict: dict[str, ChannelMetadata] | None = None,
) -> SlackQueryResult:
# Check if query has channel override (user specified channels in query)
has_channel_override = query_string.startswith("__CHANNEL_OVERRIDE__")
@@ -748,6 +662,7 @@ def _fetch_thread_context(
"""
channel_id = message.channel_id
thread_id = message.thread_id
message_id = message.message_id
# If not a thread, return original text as success
if thread_id is None:
@@ -780,37 +695,62 @@ def _fetch_thread_context(
if len(messages) <= 1:
return ThreadContextResult.success(message.text)
# Build thread text from thread starter + all replies
thread_text = _build_thread_text(messages, access_token, team_id, slack_client)
# Build thread text from thread starter + context window around matched message
thread_text = _build_thread_text(
messages, message_id, thread_id, access_token, team_id, slack_client
)
return ThreadContextResult.success(thread_text)
def _build_thread_text(
messages: list[dict[str, Any]],
message_id: str,
thread_id: str,
access_token: str,
team_id: str | None,
slack_client: WebClient,
) -> str:
"""Build thread text including all replies.
Includes the thread parent message followed by all replies in order.
"""
"""Build the thread text from messages."""
msg_text = messages[0].get("text", "")
msg_sender = messages[0].get("user", "")
thread_text = f"<@{msg_sender}>: {msg_text}"
# All messages after index 0 are replies
replies = messages[1:]
if not replies:
return thread_text
logger.debug(f"Thread {messages[0].get('ts')}: {len(replies)} replies included")
thread_text += "\n\nReplies:"
if thread_id == message_id:
message_id_idx = 0
else:
message_id_idx = next(
(i for i, msg in enumerate(messages) if msg.get("ts") == message_id), 0
)
if not message_id_idx:
return thread_text
for msg in replies:
start_idx = max(1, message_id_idx - SLACK_THREAD_CONTEXT_WINDOW)
if start_idx > 1:
thread_text += "\n..."
for i in range(start_idx, message_id_idx):
msg_text = messages[i].get("text", "")
msg_sender = messages[i].get("user", "")
thread_text += f"\n\n<@{msg_sender}>: {msg_text}"
msg_text = messages[message_id_idx].get("text", "")
msg_sender = messages[message_id_idx].get("user", "")
thread_text += f"\n\n<@{msg_sender}>: {msg_text}"
# Add following replies
len_replies = 0
for msg in messages[message_id_idx + 1 :]:
msg_text = msg.get("text", "")
msg_sender = msg.get("user", "")
thread_text += f"\n\n<@{msg_sender}>: {msg_text}"
reply = f"\n\n<@{msg_sender}>: {msg_text}"
thread_text += reply
len_replies += len(reply)
if len_replies >= DOC_EMBEDDING_CONTEXT_SIZE * 4:
thread_text += "\n..."
break
# Replace user IDs with names using cached lookups
userids: set[str] = set(re.findall(r"<@([A-Z0-9]+)>", thread_text))
@@ -1036,16 +976,7 @@ def slack_retrieval(
# Query slack with entity filtering
llm = get_default_llm()
query_items = build_slack_queries(query, llm, entities, available_channels)
# Partition into direct thread fetches and search query strings
direct_fetches: list[DirectThreadFetch] = []
query_strings: list[str] = []
for item in query_items:
if isinstance(item, DirectThreadFetch):
direct_fetches.append(item)
else:
query_strings.append(item)
query_strings = build_slack_queries(query, llm, entities, available_channels)
# Determine filtering based on entities OR context (bot)
include_dm = False
@@ -1062,16 +993,8 @@ def slack_retrieval(
f"Private channel context: will only allow messages from {allowed_private_channel} + public channels"
)
# Build search tasks — direct thread fetches + keyword searches
search_tasks: list[tuple] = [
(
_fetch_thread_from_url,
(fetch, access_token, channel_metadata_dict),
)
for fetch in direct_fetches
]
search_tasks.extend(
# Build search tasks
search_tasks = [
(
query_slack,
(
@@ -1087,7 +1010,7 @@ def slack_retrieval(
),
)
for query_string in query_strings
)
]
# If include_dm is True AND we're not already searching all channels,
# add additional searches without channel filters.

View File

@@ -10,7 +10,6 @@ from pydantic import ValidationError
from onyx.configs.app_configs import MAX_SLACK_QUERY_EXPANSIONS
from onyx.context.search.federated.models import ChannelMetadata
from onyx.context.search.federated.models import DirectThreadFetch
from onyx.context.search.models import ChunkIndexRequest
from onyx.federated_connectors.slack.models import SlackEntities
from onyx.llm.interfaces import LLM
@@ -639,38 +638,12 @@ def expand_query_with_llm(query_text: str, llm: LLM) -> list[str]:
return [query_text]
SLACK_URL_PATTERN = re.compile(
r"https?://[a-z0-9-]+\.slack\.com/archives/([A-Z0-9]+)/p(\d{16})"
)
def extract_slack_message_urls(
query_text: str,
) -> list[tuple[str, str]]:
"""Extract Slack message URLs from query text.
Parses URLs like:
https://onyx-company.slack.com/archives/C097NBWMY8Y/p1775491616524769
Returns list of (channel_id, thread_ts) tuples.
The 16-digit timestamp is converted to Slack ts format (with dot).
"""
results = []
for match in SLACK_URL_PATTERN.finditer(query_text):
channel_id = match.group(1)
raw_ts = match.group(2)
# Convert p1775491616524769 -> 1775491616.524769
thread_ts = f"{raw_ts[:10]}.{raw_ts[10:]}"
results.append((channel_id, thread_ts))
return results
def build_slack_queries(
query: ChunkIndexRequest,
llm: LLM,
entities: dict[str, Any] | None = None,
available_channels: list[str] | None = None,
) -> list[str | DirectThreadFetch]:
) -> list[str]:
"""Build Slack query strings with date filtering and query expansion."""
default_search_days = 30
if entities:
@@ -695,15 +668,6 @@ def build_slack_queries(
cutoff_date = datetime.now(timezone.utc) - timedelta(days=days_back)
time_filter = f" after:{cutoff_date.strftime('%Y-%m-%d')}"
# Check for Slack message URLs — if found, add direct fetch requests
url_fetches: list[DirectThreadFetch] = []
slack_urls = extract_slack_message_urls(query.query)
for channel_id, thread_ts in slack_urls:
url_fetches.append(
DirectThreadFetch(channel_id=channel_id, thread_ts=thread_ts)
)
logger.info(f"Detected Slack URL: channel={channel_id}, ts={thread_ts}")
# ALWAYS extract channel references from the query (not just for recency queries)
channel_references = extract_channel_references_from_query(query.query)
@@ -720,9 +684,7 @@ def build_slack_queries(
# If valid channels detected, use ONLY those channels with NO keywords
# Return query with ONLY time filter + channel filter (no keywords)
return url_fetches + [
build_channel_override_query(channel_references, time_filter)
]
return [build_channel_override_query(channel_references, time_filter)]
except ValueError as e:
# If validation fails, log the error and continue with normal flow
logger.warning(f"Channel reference validation failed: {e}")
@@ -740,8 +702,7 @@ def build_slack_queries(
rephrased_queries = expand_query_with_llm(query.query, llm)
# Build final query strings with time filters
search_queries = [
return [
rephrased_query.strip() + time_filter
for rephrased_query in rephrased_queries[:MAX_SLACK_QUERY_EXPANSIONS]
]
return url_fetches + search_queries

View File

@@ -111,43 +111,6 @@ def _mask_string(value: str) -> str:
return value[:4] + "****" + value[-4:]
def _resolve_api_key(
api_key: str | None,
provider_name: str | None,
api_base: str | None,
db_session: Session,
) -> str | None:
"""Return the real API key for model-fetch endpoints.
When editing an existing provider the form value is masked (e.g.
``sk-a****b1c2``). If *provider_name* is supplied we can look up
the unmasked key from the database so the external request succeeds.
The stored key is only returned when the request's *api_base*
matches the value stored in the database.
"""
if not provider_name:
return api_key
existing_provider = fetch_existing_llm_provider(
name=provider_name, db_session=db_session
)
if existing_provider and existing_provider.api_key:
# Normalise both URLs before comparing so trailing-slash
# differences don't cause a false mismatch.
stored_base = (existing_provider.api_base or "").strip().rstrip("/")
request_base = (api_base or "").strip().rstrip("/")
if stored_base != request_base:
return api_key
stored_key = existing_provider.api_key.get_value(apply_mask=False)
# Only resolve when the incoming value is the masked form of the
# stored key — i.e. the user hasn't typed a new key.
if api_key and api_key == _mask_string(stored_key):
return stored_key
return api_key
def _sync_fetched_models(
db_session: Session,
provider_name: str,
@@ -1211,17 +1174,16 @@ def get_ollama_available_models(
return sorted_results
def _get_openrouter_models_response(api_base: str, api_key: str | None) -> dict:
def _get_openrouter_models_response(api_base: str, api_key: str) -> dict:
"""Perform GET to OpenRouter /models and return parsed JSON."""
cleaned_api_base = api_base.strip().rstrip("/")
url = f"{cleaned_api_base}/models"
headers: dict[str, str] = {
headers = {
"Authorization": f"Bearer {api_key}",
# Optional headers recommended by OpenRouter for attribution
"HTTP-Referer": "https://onyx.app",
"X-Title": "Onyx",
}
if api_key:
headers["Authorization"] = f"Bearer {api_key}"
try:
response = httpx.get(url, headers=headers, timeout=10.0)
response.raise_for_status()
@@ -1244,12 +1206,8 @@ def get_openrouter_available_models(
Parses id, name (display), context_length, and architecture.input_modalities.
"""
api_key = _resolve_api_key(
request.api_key, request.provider_name, request.api_base, db_session
)
response_json = _get_openrouter_models_response(
api_base=request.api_base, api_key=api_key
api_base=request.api_base, api_key=request.api_key
)
data = response_json.get("data", [])
@@ -1342,18 +1300,13 @@ def get_lm_studio_available_models(
# If provider_name is given and the api_key hasn't been changed by the user,
# fall back to the stored API key from the database (the form value is masked).
# Only do so when the api_base matches what is stored.
api_key = request.api_key
if request.provider_name and not request.api_key_changed:
existing_provider = fetch_existing_llm_provider(
name=request.provider_name, db_session=db_session
)
if existing_provider and existing_provider.custom_config:
stored_base = (existing_provider.api_base or "").strip().rstrip("/")
if stored_base == cleaned_api_base:
api_key = existing_provider.custom_config.get(
LM_STUDIO_API_KEY_CONFIG_KEY
)
api_key = existing_provider.custom_config.get(LM_STUDIO_API_KEY_CONFIG_KEY)
url = f"{cleaned_api_base}/api/v1/models"
headers: dict[str, str] = {}
@@ -1437,12 +1390,8 @@ def get_litellm_available_models(
db_session: Session = Depends(get_session),
) -> list[LitellmFinalModelResponse]:
"""Fetch available models from Litellm proxy /v1/models endpoint."""
api_key = _resolve_api_key(
request.api_key, request.provider_name, request.api_base, db_session
)
response_json = _get_litellm_models_response(
api_key=api_key, api_base=request.api_base
api_key=request.api_key, api_base=request.api_base
)
models = response_json.get("data", [])
@@ -1499,7 +1448,7 @@ def get_litellm_available_models(
return sorted_results
def _get_litellm_models_response(api_key: str | None, api_base: str) -> dict:
def _get_litellm_models_response(api_key: str, api_base: str) -> dict:
"""Perform GET to Litellm proxy /api/v1/models and return parsed JSON."""
cleaned_api_base = api_base.strip().rstrip("/")
url = f"{cleaned_api_base}/v1/models"
@@ -1574,12 +1523,8 @@ def get_bifrost_available_models(
db_session: Session = Depends(get_session),
) -> list[BifrostFinalModelResponse]:
"""Fetch available models from Bifrost gateway /v1/models endpoint."""
api_key = _resolve_api_key(
request.api_key, request.provider_name, request.api_base, db_session
)
response_json = _get_bifrost_models_response(
api_base=request.api_base, api_key=api_key
api_base=request.api_base, api_key=request.api_key
)
models = response_json.get("data", [])
@@ -1668,12 +1613,8 @@ def get_openai_compatible_server_available_models(
db_session: Session = Depends(get_session),
) -> list[OpenAICompatibleFinalModelResponse]:
"""Fetch available models from a generic OpenAI-compatible /v1/models endpoint."""
api_key = _resolve_api_key(
request.api_key, request.provider_name, request.api_base, db_session
)
response_json = _get_openai_compatible_server_response(
api_base=request.api_base, api_key=api_key
api_base=request.api_base, api_key=request.api_key
)
models = response_json.get("data", [])

View File

@@ -254,7 +254,7 @@ oauthlib==3.2.2
# via
# kubernetes
# requests-oauthlib
onyx-devtools==0.7.3
onyx-devtools==0.7.4
openai==2.14.0
# via
# litellm

View File

@@ -6,7 +6,6 @@ import requests
from jira import JIRA
from jira.resources import Issue
from onyx.connectors.jira.connector import _JIRA_BULK_FETCH_LIMIT
from onyx.connectors.jira.connector import bulk_fetch_issues
@@ -146,29 +145,3 @@ def test_bulk_fetch_recursive_splitting_raises_on_bad_issue() -> None:
with pytest.raises(requests.exceptions.JSONDecodeError):
bulk_fetch_issues(client, ["1", "2", bad_id, "3", "4", "5"])
def test_bulk_fetch_respects_api_batch_limit() -> None:
"""Requests to the bulkfetch endpoint never exceed _JIRA_BULK_FETCH_LIMIT IDs."""
client = _mock_jira_client()
total_issues = _JIRA_BULK_FETCH_LIMIT * 3 + 7
all_ids = [str(i) for i in range(total_issues)]
batch_sizes: list[int] = []
def _post_side_effect(url: str, json: dict[str, Any]) -> MagicMock: # noqa: ARG001
ids = json["issueIdsOrKeys"]
batch_sizes.append(len(ids))
resp = MagicMock()
resp.json.return_value = {"issues": [_make_raw_issue(i) for i in ids]}
return resp
client._session.post.side_effect = _post_side_effect
result = bulk_fetch_issues(client, all_ids)
assert len(result) == total_issues
# keeping this hardcoded because it's the documented limit
# https://developer.atlassian.com/cloud/jira/platform/rest/v3/api-group-issues/
assert all(size <= 100 for size in batch_sizes)
assert len(batch_sizes) == 4

View File

@@ -1,67 +0,0 @@
"""Tests for _build_thread_text function."""
from unittest.mock import MagicMock
from unittest.mock import patch
from onyx.context.search.federated.slack_search import _build_thread_text
def _make_msg(user: str, text: str, ts: str) -> dict[str, str]:
return {"user": user, "text": text, "ts": ts}
class TestBuildThreadText:
"""Verify _build_thread_text includes full thread replies up to cap."""
@patch("onyx.context.search.federated.slack_search.batch_get_user_profiles")
def test_includes_all_replies(self, mock_profiles: MagicMock) -> None:
"""All replies within cap are included in output."""
mock_profiles.return_value = {}
messages = [
_make_msg("U1", "parent msg", "1000.0"),
_make_msg("U2", "reply 1", "1001.0"),
_make_msg("U3", "reply 2", "1002.0"),
_make_msg("U4", "reply 3", "1003.0"),
]
result = _build_thread_text(messages, "token", "T123", MagicMock())
assert "parent msg" in result
assert "reply 1" in result
assert "reply 2" in result
assert "reply 3" in result
assert "..." not in result
@patch("onyx.context.search.federated.slack_search.batch_get_user_profiles")
def test_non_thread_returns_parent_only(self, mock_profiles: MagicMock) -> None:
"""Single message (no replies) returns just the parent text."""
mock_profiles.return_value = {}
messages = [_make_msg("U1", "just a message", "1000.0")]
result = _build_thread_text(messages, "token", "T123", MagicMock())
assert "just a message" in result
assert "Replies:" not in result
@patch("onyx.context.search.federated.slack_search.batch_get_user_profiles")
def test_parent_always_first(self, mock_profiles: MagicMock) -> None:
"""Thread parent message is always the first line of output."""
mock_profiles.return_value = {}
messages = [
_make_msg("U1", "I am the parent", "1000.0"),
_make_msg("U2", "I am a reply", "1001.0"),
]
result = _build_thread_text(messages, "token", "T123", MagicMock())
parent_pos = result.index("I am the parent")
reply_pos = result.index("I am a reply")
assert parent_pos < reply_pos
@patch("onyx.context.search.federated.slack_search.batch_get_user_profiles")
def test_user_profiles_resolved(self, mock_profiles: MagicMock) -> None:
"""User IDs in thread text are replaced with display names."""
mock_profiles.return_value = {"U1": "Alice", "U2": "Bob"}
messages = [
_make_msg("U1", "hello", "1000.0"),
_make_msg("U2", "world", "1001.0"),
]
result = _build_thread_text(messages, "token", "T123", MagicMock())
assert "Alice" in result
assert "Bob" in result
assert "<@U1>" not in result
assert "<@U2>" not in result

View File

@@ -1,108 +0,0 @@
"""Tests for Slack URL parsing and direct thread fetch via URL override."""
from unittest.mock import MagicMock
from unittest.mock import patch
from onyx.context.search.federated.models import DirectThreadFetch
from onyx.context.search.federated.slack_search import _fetch_thread_from_url
from onyx.context.search.federated.slack_search_utils import extract_slack_message_urls
class TestExtractSlackMessageUrls:
"""Verify URL parsing extracts channel_id and timestamp correctly."""
def test_standard_url(self) -> None:
query = "summarize https://mycompany.slack.com/archives/C097NBWMY8Y/p1775491616524769"
results = extract_slack_message_urls(query)
assert len(results) == 1
assert results[0] == ("C097NBWMY8Y", "1775491616.524769")
def test_multiple_urls(self) -> None:
query = (
"compare https://co.slack.com/archives/C111/p1234567890123456 "
"and https://co.slack.com/archives/C222/p9876543210987654"
)
results = extract_slack_message_urls(query)
assert len(results) == 2
assert results[0] == ("C111", "1234567890.123456")
assert results[1] == ("C222", "9876543210.987654")
def test_no_urls(self) -> None:
query = "what happened in #general last week?"
results = extract_slack_message_urls(query)
assert len(results) == 0
def test_non_slack_url_ignored(self) -> None:
query = "check https://google.com/archives/C111/p1234567890123456"
results = extract_slack_message_urls(query)
assert len(results) == 0
def test_timestamp_conversion(self) -> None:
"""p prefix removed, dot inserted after 10th digit."""
query = "https://x.slack.com/archives/CABC123/p1775491616524769"
results = extract_slack_message_urls(query)
channel_id, ts = results[0]
assert channel_id == "CABC123"
assert ts == "1775491616.524769"
assert not ts.startswith("p")
assert "." in ts
class TestFetchThreadFromUrl:
"""Verify _fetch_thread_from_url calls conversations.replies and returns SlackMessage."""
@patch("onyx.context.search.federated.slack_search._build_thread_text")
@patch("onyx.context.search.federated.slack_search.WebClient")
def test_successful_fetch(
self, mock_webclient_cls: MagicMock, mock_build_thread: MagicMock
) -> None:
mock_client = MagicMock()
mock_webclient_cls.return_value = mock_client
# Mock conversations_replies
mock_response = MagicMock()
mock_response.get.return_value = [
{"user": "U1", "text": "parent", "ts": "1775491616.524769"},
{"user": "U2", "text": "reply 1", "ts": "1775491617.000000"},
{"user": "U3", "text": "reply 2", "ts": "1775491618.000000"},
]
mock_client.conversations_replies.return_value = mock_response
# Mock channel info
mock_ch_response = MagicMock()
mock_ch_response.get.return_value = {"name": "general"}
mock_client.conversations_info.return_value = mock_ch_response
mock_build_thread.return_value = (
"U1: parent\n\nReplies:\n\nU2: reply 1\n\nU3: reply 2"
)
fetch = DirectThreadFetch(
channel_id="C097NBWMY8Y", thread_ts="1775491616.524769"
)
result = _fetch_thread_from_url(fetch, "xoxp-token")
assert len(result.messages) == 1
msg = result.messages[0]
assert msg.channel_id == "C097NBWMY8Y"
assert msg.thread_id is None # Prevents double-enrichment
assert msg.slack_score == 100000.0
assert "parent" in msg.text
mock_client.conversations_replies.assert_called_once_with(
channel="C097NBWMY8Y", ts="1775491616.524769"
)
@patch("onyx.context.search.federated.slack_search.WebClient")
def test_api_error_returns_empty(self, mock_webclient_cls: MagicMock) -> None:
from slack_sdk.errors import SlackApiError
mock_client = MagicMock()
mock_webclient_cls.return_value = mock_client
mock_client.conversations_replies.side_effect = SlackApiError(
message="channel_not_found",
response=MagicMock(status_code=404),
)
fetch = DirectThreadFetch(channel_id="CBAD", thread_ts="1234567890.123456")
result = _fetch_thread_from_url(fetch, "xoxp-token")
assert len(result.messages) == 0

View File

@@ -505,7 +505,6 @@ class TestGetLMStudioAvailableModels:
mock_session = MagicMock()
mock_provider = MagicMock()
mock_provider.api_base = "http://localhost:1234"
mock_provider.custom_config = {"LM_STUDIO_API_KEY": "stored-secret"}
response = {

View File

@@ -5,7 +5,7 @@ home: https://www.onyx.app/
sources:
- "https://github.com/onyx-dot-app/onyx"
type: application
version: 0.4.40
version: 0.4.41
appVersion: latest
annotations:
category: Productivity

View File

@@ -0,0 +1,23 @@
{{- if .Values.monitoring.serviceMonitors.enabled }}
apiVersion: monitoring.coreos.com/v1
kind: ServiceMonitor
metadata:
name: {{ include "onyx.fullname" . }}-api
labels:
{{- include "onyx.labels" . | nindent 4 }}
{{- with .Values.monitoring.serviceMonitors.labels }}
{{- toYaml . | nindent 4 }}
{{- end }}
spec:
namespaceSelector:
matchNames:
- {{ .Release.Namespace }}
selector:
matchLabels:
app: {{ .Values.api.deploymentLabels.app }}
endpoints:
- port: api-server-port
path: /metrics
interval: 30s
scrapeTimeout: 10s
{{- end }}

View File

@@ -74,4 +74,29 @@ spec:
interval: 30s
scrapeTimeout: 10s
{{- end }}
{{- if gt (int .Values.celery_worker_heavy.replicaCount) 0 }}
---
apiVersion: monitoring.coreos.com/v1
kind: ServiceMonitor
metadata:
name: {{ include "onyx.fullname" . }}-celery-worker-heavy
labels:
{{- include "onyx.labels" . | nindent 4 }}
{{- with .Values.monitoring.serviceMonitors.labels }}
{{- toYaml . | nindent 4 }}
{{- end }}
spec:
namespaceSelector:
matchNames:
- {{ .Release.Namespace }}
selector:
matchLabels:
app: {{ .Values.celery_worker_heavy.deploymentLabels.app }}
metrics: "true"
endpoints:
- port: metrics
path: /metrics
interval: 30s
scrapeTimeout: 10s
{{- end }}
{{- end }}

View File

@@ -264,7 +264,7 @@ monitoring:
# The sidecar must be configured with label selector: grafana_dashboard=1
enabled: false
serviceMonitors:
# -- Set to true to deploy ServiceMonitor resources for Celery worker metrics endpoints.
# -- Set to true to deploy ServiceMonitor resources for API server and Celery worker metrics endpoints.
# Requires the Prometheus Operator CRDs (included in kube-prometheus-stack).
# Use `labels` to match your Prometheus CR's serviceMonitorSelector (e.g. release: onyx-monitoring).
enabled: false

View File

@@ -22,6 +22,10 @@ variable "CLI_REPOSITORY" {
default = "onyxdotapp/onyx-cli"
}
variable "DEVCONTAINER_REPOSITORY" {
default = "onyxdotapp/onyx-devcontainer"
}
variable "TAG" {
default = "latest"
}
@@ -90,3 +94,16 @@ target "cli" {
tags = ["${CLI_REPOSITORY}:${TAG}"]
}
target "devcontainer" {
context = ".devcontainer"
dockerfile = "Dockerfile"
cache-from = [
"type=registry,ref=${DEVCONTAINER_REPOSITORY}:latest",
"type=registry,ref=${DEVCONTAINER_REPOSITORY}:edge",
]
cache-to = ["type=inline"]
tags = ["${DEVCONTAINER_REPOSITORY}:${TAG}"]
}

View File

@@ -148,7 +148,7 @@ dev = [
"matplotlib==3.10.8",
"mypy-extensions==1.0.0",
"mypy==1.13.0",
"onyx-devtools==0.7.3",
"onyx-devtools==0.7.4",
"openapi-generator-cli==7.17.0",
"pandas-stubs~=2.3.3",
"pre-commit==3.2.2",

View File

@@ -244,6 +244,54 @@ ods web lint
ods web test --watch
```
### `dev` - Devcontainer Management
Manage the Onyx devcontainer. Also available as `ods dc`.
Requires the [devcontainer CLI](https://github.com/devcontainers/cli) (`npm install -g @devcontainers/cli`).
```shell
ods dev <subcommand>
```
**Subcommands:**
- `up` - Start the devcontainer (pulls the image if needed)
- `into` - Open a zsh shell inside the running devcontainer
- `exec` - Run an arbitrary command inside the devcontainer
- `restart` - Remove and recreate the devcontainer
- `rebuild` - Pull the latest published image and recreate
- `stop` - Stop the running devcontainer
The devcontainer image is published to `onyxdotapp/onyx-devcontainer` and
referenced by tag in `.devcontainer/devcontainer.json` — no local build needed.
**Examples:**
```shell
# Start the devcontainer
ods dev up
# Open a shell
ods dev into
# Run a command
ods dev exec -- npm test
# Restart the container
ods dev restart
# Pull latest image and recreate
ods dev rebuild
# Stop the container
ods dev stop
# Same commands work with the dc alias
ods dc up
ods dc into
```
### `db` - Database Administration
Manage PostgreSQL database dumps, restores, and migrations.

34
tools/ods/cmd/dev.go Normal file
View File

@@ -0,0 +1,34 @@
package cmd
import (
"github.com/spf13/cobra"
)
// NewDevCommand creates the parent dev command for devcontainer operations.
func NewDevCommand() *cobra.Command {
cmd := &cobra.Command{
Use: "dev",
Aliases: []string{"dc"},
Short: "Manage the devcontainer",
Long: `Manage the Onyx devcontainer.
Wraps the devcontainer CLI with workspace-aware defaults.
Commands:
up Start the devcontainer
into Open a shell inside the running devcontainer
exec Run a command inside the devcontainer
restart Remove and recreate the devcontainer
rebuild Pull the latest image and recreate
stop Stop the running devcontainer`,
}
cmd.AddCommand(newDevUpCommand())
cmd.AddCommand(newDevIntoCommand())
cmd.AddCommand(newDevExecCommand())
cmd.AddCommand(newDevRestartCommand())
cmd.AddCommand(newDevRebuildCommand())
cmd.AddCommand(newDevStopCommand())
return cmd
}

29
tools/ods/cmd/dev_exec.go Normal file
View File

@@ -0,0 +1,29 @@
package cmd
import (
"github.com/spf13/cobra"
)
func newDevExecCommand() *cobra.Command {
cmd := &cobra.Command{
Use: "exec [--] <command> [args...]",
Short: "Run a command inside the devcontainer",
Long: `Run an arbitrary command inside the running devcontainer.
All arguments are treated as positional (flags like -it are passed through).
Examples:
ods dev exec npm test
ods dev exec -- ls -la
ods dev exec -it echo hello`,
Args: cobra.MinimumNArgs(1),
DisableFlagParsing: true,
Run: func(cmd *cobra.Command, args []string) {
if len(args) > 0 && args[0] == "--" {
args = args[1:]
}
runDevExec(args)
},
}
return cmd
}

51
tools/ods/cmd/dev_into.go Normal file
View File

@@ -0,0 +1,51 @@
package cmd
import (
"os"
"os/exec"
log "github.com/sirupsen/logrus"
"github.com/spf13/cobra"
"github.com/onyx-dot-app/onyx/tools/ods/internal/paths"
)
func newDevIntoCommand() *cobra.Command {
cmd := &cobra.Command{
Use: "into",
Short: "Open a shell inside the running devcontainer",
Long: `Open an interactive zsh shell inside the running devcontainer.
Examples:
ods dev into`,
Run: func(cmd *cobra.Command, args []string) {
runDevExec([]string{"zsh"})
},
}
return cmd
}
// runDevExec executes "devcontainer exec --workspace-folder <root> <command...>".
func runDevExec(command []string) {
checkDevcontainerCLI()
root, err := paths.GitRoot()
if err != nil {
log.Fatalf("Failed to find git root: %v", err)
}
args := []string{"exec", "--workspace-folder", root}
args = append(args, command...)
log.Debugf("Running: devcontainer %v", args)
c := exec.Command("devcontainer", args...)
c.Stdout = os.Stdout
c.Stderr = os.Stderr
c.Stdin = os.Stdin
if err := c.Run(); err != nil {
log.Fatalf("devcontainer exec failed: %v", err)
}
}

View File

@@ -0,0 +1,41 @@
package cmd
import (
"os"
"os/exec"
log "github.com/sirupsen/logrus"
"github.com/spf13/cobra"
)
func newDevRebuildCommand() *cobra.Command {
cmd := &cobra.Command{
Use: "rebuild",
Short: "Pull the latest devcontainer image and recreate",
Long: `Pull the latest devcontainer image and recreate the container.
Use after the published image has been updated or after changing devcontainer.json.
Examples:
ods dev rebuild`,
Run: func(cmd *cobra.Command, args []string) {
runDevRebuild()
},
}
return cmd
}
func runDevRebuild() {
image := devcontainerImage()
log.Infof("Pulling %s...", image)
pull := exec.Command("docker", "pull", image)
pull.Stdout = os.Stdout
pull.Stderr = os.Stderr
if err := pull.Run(); err != nil {
log.Warnf("Failed to pull image (continuing with local copy): %v", err)
}
runDevcontainer("up", []string{"--remove-existing-container"})
}

View File

@@ -0,0 +1,23 @@
package cmd
import (
"github.com/spf13/cobra"
)
func newDevRestartCommand() *cobra.Command {
cmd := &cobra.Command{
Use: "restart",
Short: "Remove and recreate the devcontainer",
Long: `Remove the existing devcontainer and recreate it.
Uses the cached image — for a full image rebuild, use "ods dev rebuild".
Examples:
ods dev restart`,
Run: func(cmd *cobra.Command, args []string) {
runDevcontainer("up", []string{"--remove-existing-container"})
},
}
return cmd
}

56
tools/ods/cmd/dev_stop.go Normal file
View File

@@ -0,0 +1,56 @@
package cmd
import (
"os/exec"
"strings"
log "github.com/sirupsen/logrus"
"github.com/spf13/cobra"
"github.com/onyx-dot-app/onyx/tools/ods/internal/paths"
)
func newDevStopCommand() *cobra.Command {
cmd := &cobra.Command{
Use: "stop",
Short: "Stop the running devcontainer",
Long: `Stop the running devcontainer.
Examples:
ods dev stop`,
Run: func(cmd *cobra.Command, args []string) {
runDevStop()
},
}
return cmd
}
func runDevStop() {
root, err := paths.GitRoot()
if err != nil {
log.Fatalf("Failed to find git root: %v", err)
}
// Find the container by the devcontainer label
out, err := exec.Command(
"docker", "ps", "-q",
"--filter", "label=devcontainer.local_folder="+root,
).Output()
if err != nil {
log.Fatalf("Failed to find devcontainer: %v", err)
}
containerID := strings.TrimSpace(string(out))
if containerID == "" {
log.Info("No running devcontainer found")
return
}
log.Infof("Stopping devcontainer %s...", containerID)
c := exec.Command("docker", "stop", containerID)
if err := c.Run(); err != nil {
log.Fatalf("Failed to stop devcontainer: %v", err)
}
log.Info("Devcontainer stopped")
}

177
tools/ods/cmd/dev_up.go Normal file
View File

@@ -0,0 +1,177 @@
package cmd
import (
"encoding/json"
"fmt"
"os"
"os/exec"
"path/filepath"
"runtime"
"strings"
log "github.com/sirupsen/logrus"
"github.com/spf13/cobra"
"github.com/onyx-dot-app/onyx/tools/ods/internal/paths"
)
func newDevUpCommand() *cobra.Command {
cmd := &cobra.Command{
Use: "up",
Short: "Start the devcontainer",
Long: `Start the devcontainer, pulling the image if needed.
Examples:
ods dev up`,
Run: func(cmd *cobra.Command, args []string) {
runDevcontainer("up", nil)
},
}
return cmd
}
// devcontainerImage reads the image field from .devcontainer/devcontainer.json.
func devcontainerImage() string {
root, err := paths.GitRoot()
if err != nil {
log.Fatalf("Failed to find git root: %v", err)
}
data, err := os.ReadFile(filepath.Join(root, ".devcontainer", "devcontainer.json"))
if err != nil {
log.Fatalf("Failed to read devcontainer.json: %v", err)
}
var cfg struct {
Image string `json:"image"`
}
if err := json.Unmarshal(data, &cfg); err != nil {
log.Fatalf("Failed to parse devcontainer.json: %v", err)
}
if cfg.Image == "" {
log.Fatal("No image field in devcontainer.json")
}
return cfg.Image
}
// checkDevcontainerCLI ensures the devcontainer CLI is installed.
func checkDevcontainerCLI() {
if _, err := exec.LookPath("devcontainer"); err != nil {
log.Fatal("devcontainer CLI is not installed. Install it with: npm install -g @devcontainers/cli")
}
}
// ensureDockerSock sets the DOCKER_SOCK environment variable if not already set.
// devcontainer.json references ${localEnv:DOCKER_SOCK} for the socket mount.
func ensureDockerSock() {
if os.Getenv("DOCKER_SOCK") != "" {
return
}
sock := detectDockerSock()
if err := os.Setenv("DOCKER_SOCK", sock); err != nil {
log.Fatalf("Failed to set DOCKER_SOCK: %v", err)
}
}
// detectDockerSock returns the path to the Docker socket on the host.
func detectDockerSock() string {
// Prefer explicit DOCKER_HOST (strip unix:// prefix if present).
if dh := os.Getenv("DOCKER_HOST"); dh != "" {
const prefix = "unix://"
if len(dh) > len(prefix) && dh[:len(prefix)] == prefix {
return dh[len(prefix):]
}
// Only bare paths (starting with /) are valid socket paths.
// Non-unix schemes (e.g. tcp://) can't be bind-mounted.
if len(dh) > 0 && dh[0] == '/' {
return dh
}
log.Warnf("DOCKER_HOST=%q is not a unix socket path; falling back to local socket detection", dh)
}
// Linux rootless Docker: $XDG_RUNTIME_DIR/docker.sock
if runtime.GOOS == "linux" {
if xdg := os.Getenv("XDG_RUNTIME_DIR"); xdg != "" {
sock := filepath.Join(xdg, "docker.sock")
if _, err := os.Stat(sock); err == nil {
return sock
}
}
}
// macOS Docker Desktop: ~/.docker/run/docker.sock
if runtime.GOOS == "darwin" {
if home, err := os.UserHomeDir(); err == nil {
sock := filepath.Join(home, ".docker", "run", "docker.sock")
if _, err := os.Stat(sock); err == nil {
return sock
}
}
}
// Fallback: standard socket path (Linux with standard Docker, macOS symlink)
return "/var/run/docker.sock"
}
// worktreeGitMount returns a --mount flag value that makes a git worktree's
// .git reference resolve inside the container. In a worktree, .git is a file
// containing "gitdir: /path/to/main/.git/worktrees/<name>", so we need the
// main repo's .git directory to exist at the same absolute host path inside
// the container.
//
// Returns ("", false) when the workspace is not a worktree.
func worktreeGitMount(root string) (string, bool) {
dotgit := filepath.Join(root, ".git")
info, err := os.Lstat(dotgit)
if err != nil || info.IsDir() {
return "", false // regular repo or no .git
}
// .git is a file — parse the gitdir path.
out, err := exec.Command("git", "-C", root, "rev-parse", "--git-common-dir").Output()
if err != nil {
log.Warnf("Failed to detect git common dir: %v", err)
return "", false
}
commonDir := strings.TrimSpace(string(out))
// Resolve to absolute path.
if !filepath.IsAbs(commonDir) {
commonDir = filepath.Join(root, commonDir)
}
commonDir, _ = filepath.EvalSymlinks(commonDir)
mount := fmt.Sprintf("type=bind,source=%s,target=%s", commonDir, commonDir)
log.Debugf("Worktree detected — mounting main .git: %s", commonDir)
return mount, true
}
// runDevcontainer executes "devcontainer <action> --workspace-folder <root> [extraArgs...]".
func runDevcontainer(action string, extraArgs []string) {
checkDevcontainerCLI()
ensureDockerSock()
root, err := paths.GitRoot()
if err != nil {
log.Fatalf("Failed to find git root: %v", err)
}
args := []string{action, "--workspace-folder", root}
if mount, ok := worktreeGitMount(root); ok {
args = append(args, "--mount", mount)
}
args = append(args, extraArgs...)
log.Debugf("Running: devcontainer %v", args)
c := exec.Command("devcontainer", args...)
c.Stdout = os.Stdout
c.Stderr = os.Stderr
c.Stdin = os.Stdin
if err := c.Run(); err != nil {
log.Fatalf("devcontainer %s failed: %v", action, err)
}
}

View File

@@ -53,6 +53,7 @@ func NewRootCommand() *cobra.Command {
cmd.AddCommand(NewRunCICommand())
cmd.AddCommand(NewScreenshotDiffCommand())
cmd.AddCommand(NewDesktopCommand())
cmd.AddCommand(NewDevCommand())
cmd.AddCommand(NewWebCommand())
cmd.AddCommand(NewLatestStableTagCommand())
cmd.AddCommand(NewWhoisCommand())

16
uv.lock generated
View File

@@ -4511,7 +4511,7 @@ dev = [
{ name = "matplotlib", specifier = "==3.10.8" },
{ name = "mypy", specifier = "==1.13.0" },
{ name = "mypy-extensions", specifier = "==1.0.0" },
{ name = "onyx-devtools", specifier = "==0.7.3" },
{ name = "onyx-devtools", specifier = "==0.7.4" },
{ name = "openapi-generator-cli", specifier = "==7.17.0" },
{ name = "pandas-stubs", specifier = "~=2.3.3" },
{ name = "pre-commit", specifier = "==3.2.2" },
@@ -4554,19 +4554,19 @@ model-server = [
[[package]]
name = "onyx-devtools"
version = "0.7.3"
version = "0.7.4"
source = { registry = "https://pypi.org/simple" }
dependencies = [
{ name = "fastapi" },
{ name = "openapi-generator-cli" },
]
wheels = [
{ url = "https://files.pythonhosted.org/packages/72/64/c75be8ab325896cc64bccd0e1e139a03ce305bf05598967922d380fc4694/onyx_devtools-0.7.3-py3-none-macosx_10_12_x86_64.whl", hash = "sha256:675e2fdbd8d291fba4b8a6dfcf2bc94c56d22d11f395a9f0d0c3c0e5b39d7f9b", size = 4220613, upload-time = "2026-04-09T00:04:36.624Z" },
{ url = "https://files.pythonhosted.org/packages/ae/1f/589ff6bd446c4498f5bcdfd2a315709e91fc15edf5440c91ff64cbf0800f/onyx_devtools-0.7.3-py3-none-macosx_11_0_arm64.whl", hash = "sha256:bf3993de8ba02d6c2f1ab12b5b9b965e005040b37502f97db8a7d88d9b0cde4b", size = 3897867, upload-time = "2026-04-09T00:04:40.781Z" },
{ url = "https://files.pythonhosted.org/packages/10/c0/53c9173eefc13218707282c5b99753960d039684994c3b3caf90ce286094/onyx_devtools-0.7.3-py3-none-manylinux_2_17_aarch64.whl", hash = "sha256:6138a94084bed05c674ad210a0bc4006c43bc4384e8eb54d469233de85c72bd7", size = 3762408, upload-time = "2026-04-09T00:04:41.592Z" },
{ url = "https://files.pythonhosted.org/packages/d2/37/69fadb65112854a596d200f704da94b837817d4dd0f46cb4482dc0309c94/onyx_devtools-0.7.3-py3-none-manylinux_2_17_x86_64.whl", hash = "sha256:90dac91b0cdc32eb8861f6e83545009a34c439fd3c41fc7dd499acd0105b660e", size = 4184427, upload-time = "2026-04-09T00:04:41.525Z" },
{ url = "https://files.pythonhosted.org/packages/bd/45/91c829ccb45f1a15e7c9641eccc6dd154adb540e03c7dee2a8f28cea24d0/onyx_devtools-0.7.3-py3-none-win_amd64.whl", hash = "sha256:abc68d70bec06e349481beec4b212de28a1a8b7ed6ef3b41daf7093ee10b44f3", size = 4299935, upload-time = "2026-04-09T00:04:40.262Z" },
{ url = "https://files.pythonhosted.org/packages/cc/30/c5adcb8e3b46b71d8d92c3f9ee0c1d0bc5e2adc9f46e93931f21b36a3ee4/onyx_devtools-0.7.3-py3-none-win_arm64.whl", hash = "sha256:9e4411cadc5e81fabc9ed991402e3b4b40f02800681299c277b2142e5af0dcee", size = 3840228, upload-time = "2026-04-09T00:04:39.708Z" },
{ url = "https://files.pythonhosted.org/packages/cc/3f/584bb003333b6e6d632b06bbf99d410c7a71adde1711076fd44fe88d966d/onyx_devtools-0.7.4-py3-none-macosx_10_12_x86_64.whl", hash = "sha256:6c51d9199ff8ff8fe64a3cfcf77f8170508722b33a1de54c5474be0447b7afa8", size = 4237700, upload-time = "2026-04-09T21:28:20.694Z" },
{ url = "https://files.pythonhosted.org/packages/0a/04/8c28522d51a66b1bdc997a1c72821122eab23f048459646c6ee62a39f6eb/onyx_devtools-0.7.4-py3-none-macosx_11_0_arm64.whl", hash = "sha256:f64a4cec6d3616b9ca7354e326994882c9ff2cb3f9fc9a44e55f0eb6a6ff1c1c", size = 3912751, upload-time = "2026-04-09T21:28:23.079Z" },
{ url = "https://files.pythonhosted.org/packages/8c/e6/ae60307cc50064dacb58e003c9a367d5c85118fd89a597abf3de5fd66f0a/onyx_devtools-0.7.4-py3-none-manylinux_2_17_aarch64.whl", hash = "sha256:31c7cecaaa329e3f6d53864290bc53fd0b823453c6cfdb8be7931a8925f5c075", size = 3778188, upload-time = "2026-04-09T21:28:23.14Z" },
{ url = "https://files.pythonhosted.org/packages/f1/d1/5a2789efac7d8f19d30d4d8da1862dd10a16b65d8c9b200542a959094a17/onyx_devtools-0.7.4-py3-none-manylinux_2_17_x86_64.whl", hash = "sha256:4c44e3c21253ea92127af483155190c14426c729d93e244aedc33875f74d3514", size = 4200526, upload-time = "2026-04-09T21:28:23.711Z" },
{ url = "https://files.pythonhosted.org/packages/0a/40/56a467eaa7b78411971898191cf0dc3ee49b7f448d1cfe76cd432f6458d3/onyx_devtools-0.7.4-py3-none-win_amd64.whl", hash = "sha256:6fa2b63b702bc5ecbeed5f9eadec57d61ac5c4a646cf5fbd66ee340f53b7d81c", size = 4319090, upload-time = "2026-04-09T21:28:23.26Z" },
{ url = "https://files.pythonhosted.org/packages/fa/ef/c866fa8ce1f75e1ac67bc239e767b8944cb1a12a44950986ce57e06db17f/onyx_devtools-0.7.4-py3-none-win_arm64.whl", hash = "sha256:c84cbe6a85474dc9f005f079796cf031e80c4249897432ad9f370cd27f72970a", size = 3857229, upload-time = "2026-04-09T21:28:23.484Z" },
]
[[package]]

View File

@@ -17,6 +17,7 @@ import DocumentSetCard from "@/sections/cards/DocumentSetCard";
import CollapsibleSection from "@/app/admin/agents/CollapsibleSection";
import { StandardAnswerCategoryResponse } from "@/components/standardAnswers/getStandardAnswerCategoriesIfEE";
import { StandardAnswerCategoryDropdownField } from "@/components/standardAnswers/StandardAnswerCategoryDropdown";
import InputComboBox from "@/refresh-components/inputs/InputComboBox";
import { RadioGroup } from "@/components/ui/radio-group";
import { RadioGroupItemField } from "@/components/ui/RadioGroupItemField";
import { AlertCircle } from "lucide-react";
@@ -126,6 +127,24 @@ export function SlackChannelConfigFormFields({
return documentSets.filter((ds) => !documentSetContainsSync(ds));
}, [documentSets]);
const searchAgentOptions = useMemo(
() =>
availableAgents.map((persona) => ({
label: persona.name,
value: String(persona.id),
})),
[availableAgents]
);
const nonSearchAgentOptions = useMemo(
() =>
nonSearchAgents.map((persona) => ({
label: persona.name,
value: String(persona.id),
})),
[nonSearchAgents]
);
useEffect(() => {
const invalidSelected = values.document_sets.filter((dsId: number) =>
unselectableSets.some((us) => us.id === dsId)
@@ -355,12 +374,14 @@ export function SlackChannelConfigFormFields({
</>
</SubLabel>
<SelectorFormField
name="persona_id"
options={availableAgents.map((persona) => ({
name: persona.name,
value: persona.id,
}))}
<InputComboBox
placeholder="Search for an agent..."
value={String(values.persona_id ?? "")}
onValueChange={(val) =>
setFieldValue("persona_id", val ? Number(val) : null)
}
options={searchAgentOptions}
strict
/>
{viewSyncEnabledAgents && syncEnabledAgents.length > 0 && (
<div className="mt-4">
@@ -419,12 +440,14 @@ export function SlackChannelConfigFormFields({
</>
</SubLabel>
<SelectorFormField
name="persona_id"
options={nonSearchAgents.map((persona) => ({
name: persona.name,
value: persona.id,
}))}
<InputComboBox
placeholder="Search for an agent..."
value={String(values.persona_id ?? "")}
onValueChange={(val) =>
setFieldValue("persona_id", val ? Number(val) : null)
}
options={nonSearchAgentOptions}
strict
/>
</div>
)}

View File

@@ -50,7 +50,7 @@ function BifrostModalInternals({
const { models, error } = await fetchBifrostModels({
api_base: formikProps.values.api_base,
api_key: formikProps.values.api_key || undefined,
provider_name: existingLlmProvider?.name,
provider_name: LLMProviderName.BIFROST,
});
if (error) {
throw new Error(error);

View File

@@ -52,7 +52,7 @@ function LiteLLMProxyModalInternals({
const { models, error } = await fetchLiteLLMProxyModels({
api_base: formikProps.values.api_base,
api_key: formikProps.values.api_key,
provider_name: existingLlmProvider?.name,
provider_name: LLMProviderName.LITELLM_PROXY,
});
if (error) {
throw new Error(error);

View File

@@ -52,7 +52,7 @@ function OpenRouterModalInternals({
const { models, error } = await fetchOpenRouterModels({
api_base: formikProps.values.api_base,
api_key: formikProps.values.api_key,
provider_name: existingLlmProvider?.name,
provider_name: LLMProviderName.OPENROUTER,
});
if (error) {
throw new Error(error);