Compare commits

..

3 Commits

Author SHA1 Message Date
Yuhong Sun
34356a5853 Fix sidebar 2026-03-13 13:56:47 -07:00
Yuhong Sun
82fb535015 Done 2026-03-13 13:55:47 -07:00
Yuhong Sun
6bb9a4970b Small touchups in UI 2026-03-13 13:55:47 -07:00
17 changed files with 313 additions and 861 deletions

View File

@@ -1046,46 +1046,10 @@ def llm_loop_completion_handle(
)
_CITATION_LINK_START_PATTERN = re.compile(r"\s*\[\[\d+\]\]\(")
def _find_markdown_link_end(text: str, destination_start: int) -> int | None:
depth = 0
i = destination_start
while i < len(text):
curr = text[i]
if curr == "\\":
i += 2
continue
if curr == "(":
depth += 1
elif curr == ")":
if depth == 0:
return i
depth -= 1
i += 1
return None
def remove_answer_citations(answer: str) -> str:
stripped_parts: list[str] = []
cursor = 0
pattern = r"\s*\[\[\d+\]\]\(http[s]?://[^\s]+\)"
while match := _CITATION_LINK_START_PATTERN.search(answer, cursor):
stripped_parts.append(answer[cursor : match.start()])
link_end = _find_markdown_link_end(answer, match.end())
if link_end is None:
stripped_parts.append(answer[match.start() :])
return "".join(stripped_parts)
cursor = link_end + 1
stripped_parts.append(answer[cursor:])
return "".join(stripped_parts)
return re.sub(pattern, "", answer)
@log_function_time()

View File

@@ -957,7 +957,7 @@ ENTERPRISE_EDITION_ENABLED = (
#####
# Image Generation Configuration (DEPRECATED)
# These environment variables will be deprecated soon.
# To configure image generation, please visit the Image Generation page in the Admin Panel.
# To configure image generation, please visit the Image Generation page in the Admin Settings.
#####
# Azure Image Configurations
AZURE_IMAGE_API_VERSION = os.environ.get("AZURE_IMAGE_API_VERSION") or os.environ.get(

View File

@@ -0,0 +1,239 @@
from __future__ import annotations
import re
from onyx.context.search.models import SavedSearchDoc
from onyx.context.search.models import SearchDoc
from onyx.server.query_and_chat.placement import Placement
from onyx.server.query_and_chat.streaming_models import AgentResponseDelta
from onyx.server.query_and_chat.streaming_models import AgentResponseStart
from onyx.server.query_and_chat.streaming_models import CitationInfo
from onyx.server.query_and_chat.streaming_models import GeneratedImage
from onyx.server.query_and_chat.streaming_models import ImageGenerationFinal
from onyx.server.query_and_chat.streaming_models import ImageGenerationToolStart
from onyx.server.query_and_chat.streaming_models import OpenUrlDocuments
from onyx.server.query_and_chat.streaming_models import OpenUrlStart
from onyx.server.query_and_chat.streaming_models import OpenUrlUrls
from onyx.server.query_and_chat.streaming_models import Packet
from onyx.server.query_and_chat.streaming_models import ReasoningDelta
from onyx.server.query_and_chat.streaming_models import ReasoningStart
from onyx.server.query_and_chat.streaming_models import SearchToolDocumentsDelta
from onyx.server.query_and_chat.streaming_models import SearchToolQueriesDelta
from onyx.server.query_and_chat.streaming_models import SearchToolStart
from onyx.server.query_and_chat.streaming_models import SectionEnd
_CANNOT_SHOW_STEP_RESULTS_STR = "[Cannot display step results]"
def _adjust_message_text_for_agent_search_results(
adjusted_message_text: str,
final_documents: list[SavedSearchDoc], # noqa: ARG001
) -> str:
# Remove all [Q<integer>] patterns (sub-question citations)
return re.sub(r"\[Q\d+\]", "", adjusted_message_text)
def _replace_d_citations_with_links(
message_text: str, final_documents: list[SavedSearchDoc]
) -> str:
def replace_citation(match: re.Match[str]) -> str:
d_number = match.group(1)
try:
doc_index = int(d_number) - 1
if 0 <= doc_index < len(final_documents):
doc = final_documents[doc_index]
link = doc.link if doc.link else ""
return f"[[{d_number}]]({link})"
return match.group(0)
except (ValueError, IndexError):
return match.group(0)
return re.sub(r"\[D(\d+)\]", replace_citation, message_text)
def create_message_packets(
message_text: str,
final_documents: list[SavedSearchDoc] | None,
turn_index: int,
is_legacy_agentic: bool = False,
) -> list[Packet]:
packets: list[Packet] = []
packets.append(
Packet(
placement=Placement(turn_index=turn_index),
obj=AgentResponseStart(
final_documents=SearchDoc.from_saved_search_docs(final_documents or []),
),
)
)
adjusted_message_text = message_text
if is_legacy_agentic:
if final_documents is not None:
adjusted_message_text = _adjust_message_text_for_agent_search_results(
message_text, final_documents
)
adjusted_message_text = _replace_d_citations_with_links(
adjusted_message_text, final_documents
)
else:
adjusted_message_text = re.sub(r"\[Q\d+\]", "", message_text)
packets.append(
Packet(
placement=Placement(turn_index=turn_index),
obj=AgentResponseDelta(
content=adjusted_message_text,
),
),
)
packets.append(
Packet(
placement=Placement(turn_index=turn_index),
obj=SectionEnd(),
)
)
return packets
def create_citation_packets(
citation_info_list: list[CitationInfo], turn_index: int
) -> list[Packet]:
packets: list[Packet] = []
# Emit each citation as a separate CitationInfo packet
for citation_info in citation_info_list:
packets.append(
Packet(
placement=Placement(turn_index=turn_index),
obj=citation_info,
)
)
packets.append(Packet(placement=Placement(turn_index=turn_index), obj=SectionEnd()))
return packets
def create_reasoning_packets(reasoning_text: str, turn_index: int) -> list[Packet]:
packets: list[Packet] = []
packets.append(
Packet(placement=Placement(turn_index=turn_index), obj=ReasoningStart())
)
packets.append(
Packet(
placement=Placement(turn_index=turn_index),
obj=ReasoningDelta(
reasoning=reasoning_text,
),
),
)
packets.append(Packet(placement=Placement(turn_index=turn_index), obj=SectionEnd()))
return packets
def create_image_generation_packets(
images: list[GeneratedImage], turn_index: int
) -> list[Packet]:
packets: list[Packet] = []
packets.append(
Packet(
placement=Placement(turn_index=turn_index),
obj=ImageGenerationToolStart(),
)
)
packets.append(
Packet(
placement=Placement(turn_index=turn_index),
obj=ImageGenerationFinal(images=images),
),
)
packets.append(Packet(placement=Placement(turn_index=turn_index), obj=SectionEnd()))
return packets
def create_fetch_packets(
fetch_docs: list[SavedSearchDoc],
urls: list[str],
turn_index: int,
) -> list[Packet]:
packets: list[Packet] = []
# Emit start packet
packets.append(
Packet(
placement=Placement(turn_index=turn_index),
obj=OpenUrlStart(),
)
)
# Emit URLs packet
packets.append(
Packet(
placement=Placement(turn_index=turn_index),
obj=OpenUrlUrls(urls=urls),
)
)
# Emit documents packet
packets.append(
Packet(
placement=Placement(turn_index=turn_index),
obj=OpenUrlDocuments(
documents=SearchDoc.from_saved_search_docs(fetch_docs)
),
)
)
packets.append(Packet(placement=Placement(turn_index=turn_index), obj=SectionEnd()))
return packets
def create_search_packets(
search_queries: list[str],
saved_search_docs: list[SavedSearchDoc],
is_internet_search: bool,
turn_index: int,
) -> list[Packet]:
packets: list[Packet] = []
packets.append(
Packet(
placement=Placement(turn_index=turn_index),
obj=SearchToolStart(
is_internet_search=is_internet_search,
),
)
)
# Emit queries if present
if search_queries:
packets.append(
Packet(
placement=Placement(turn_index=turn_index),
obj=SearchToolQueriesDelta(queries=search_queries),
),
)
# Emit documents if present
if saved_search_docs:
packets.append(
Packet(
placement=Placement(turn_index=turn_index),
obj=SearchToolDocumentsDelta(
documents=SearchDoc.from_saved_search_docs(saved_search_docs)
),
),
)
packets.append(Packet(placement=Placement(turn_index=turn_index), obj=SectionEnd()))
return packets

View File

@@ -1,471 +0,0 @@
from __future__ import annotations
import argparse
import asyncio
import json
import logging
import sys
from dataclasses import asdict
from dataclasses import dataclass
from pathlib import Path
from typing import Any
from typing import TypedDict
from typing import TypeGuard
import aiohttp
logging.basicConfig(
level=logging.INFO,
format="%(asctime)s %(levelname)s %(message)s",
)
logger = logging.getLogger(__name__)
DEFAULT_API_BASE = "http://localhost:3000"
INTERNAL_SEARCH_TOOL_NAME = "internal_search"
INTERNAL_SEARCH_IN_CODE_TOOL_ID = "SearchTool"
MAX_REQUEST_ATTEMPTS = 5
RETRIABLE_STATUS_CODES = {429, 500, 502, 503, 504}
@dataclass(frozen=True)
class QuestionRecord:
question_id: str
question: str
@dataclass(frozen=True)
class AnswerRecord:
question_id: str
answer: str
document_ids: list[str]
@dataclass(frozen=True)
class FailedQuestionRecord:
question_id: str
error: str
class Citation(TypedDict, total=False):
citation_number: int
document_id: str
def parse_args() -> argparse.Namespace:
parser = argparse.ArgumentParser(
description=(
"Submit questions to Onyx chat with internal search forced and write "
"answers to a JSONL file."
)
)
parser.add_argument(
"--questions-file",
type=Path,
required=True,
help="Path to the input questions JSONL file.",
)
parser.add_argument(
"--output-file",
type=Path,
required=True,
help="Path to the output answers JSONL file.",
)
parser.add_argument(
"--api-key",
type=str,
required=True,
help="API key used to authenticate against Onyx.",
)
parser.add_argument(
"--api-base",
type=str,
default=DEFAULT_API_BASE,
help=(
"Frontend base URL for Onyx. If `/api` is omitted, it will be added "
f"automatically. Default: {DEFAULT_API_BASE}"
),
)
parser.add_argument(
"--parallelism",
type=int,
default=1,
help="Number of questions to process in parallel. Default: 1.",
)
parser.add_argument(
"--max-questions",
type=int,
default=None,
help="Optional cap on how many questions to process. Defaults to all.",
)
return parser.parse_args()
def normalize_api_base(api_base: str) -> str:
normalized = api_base.rstrip("/")
if normalized.endswith("/api"):
return normalized
return f"{normalized}/api"
def load_questions(questions_file: Path) -> list[QuestionRecord]:
if not questions_file.exists():
raise FileNotFoundError(f"Questions file not found: {questions_file}")
questions: list[QuestionRecord] = []
with questions_file.open("r", encoding="utf-8") as file:
for line_number, line in enumerate(file, start=1):
stripped_line = line.strip()
if not stripped_line:
continue
try:
payload = json.loads(stripped_line)
except json.JSONDecodeError as exc:
raise ValueError(
f"Invalid JSON on line {line_number} of {questions_file}"
) from exc
question_id = payload.get("question_id")
question = payload.get("question")
if not isinstance(question_id, str) or not question_id:
raise ValueError(
f"Line {line_number} is missing a non-empty `question_id`."
)
if not isinstance(question, str) or not question:
raise ValueError(
f"Line {line_number} is missing a non-empty `question`."
)
questions.append(QuestionRecord(question_id=question_id, question=question))
return questions
async def read_json_response(
response: aiohttp.ClientResponse,
) -> dict[str, Any] | list[dict[str, Any]]:
response_text = await response.text()
if response.status >= 400:
raise RuntimeError(
f"Request to {response.url} failed with {response.status}: {response_text}"
)
try:
payload = json.loads(response_text)
except json.JSONDecodeError as exc:
raise RuntimeError(
f"Request to {response.url} returned non-JSON content: {response_text}"
) from exc
if not isinstance(payload, (dict, list)):
raise RuntimeError(
f"Unexpected response payload type from {response.url}: {type(payload)}"
)
return payload
async def request_json_with_retries(
session: aiohttp.ClientSession,
method: str,
url: str,
headers: dict[str, str],
json_payload: dict[str, Any] | None = None,
) -> dict[str, Any] | list[dict[str, Any]]:
backoff_seconds = 1.0
for attempt in range(1, MAX_REQUEST_ATTEMPTS + 1):
try:
async with session.request(
method=method,
url=url,
headers=headers,
json=json_payload,
) as response:
if (
response.status in RETRIABLE_STATUS_CODES
and attempt < MAX_REQUEST_ATTEMPTS
):
response_text = await response.text()
logger.warning(
"Retryable response from %s on attempt %s/%s: %s %s",
url,
attempt,
MAX_REQUEST_ATTEMPTS,
response.status,
response_text,
)
await asyncio.sleep(backoff_seconds)
backoff_seconds *= 2
continue
return await read_json_response(response)
except (aiohttp.ClientError, asyncio.TimeoutError) as exc:
if attempt == MAX_REQUEST_ATTEMPTS:
raise RuntimeError(
f"Request to {url} failed after {MAX_REQUEST_ATTEMPTS} attempts."
) from exc
logger.warning(
"Request to %s failed on attempt %s/%s: %s",
url,
attempt,
MAX_REQUEST_ATTEMPTS,
exc,
)
await asyncio.sleep(backoff_seconds)
backoff_seconds *= 2
raise RuntimeError(f"Request to {url} failed unexpectedly.")
def extract_document_ids(citation_info: object) -> list[str]:
if not isinstance(citation_info, list):
return []
sorted_citations = sorted(
(citation for citation in citation_info if _is_valid_citation(citation)),
key=_citation_sort_key,
)
document_ids: list[str] = []
seen_document_ids: set[str] = set()
for citation in sorted_citations:
document_id = citation["document_id"]
if document_id not in seen_document_ids:
seen_document_ids.add(document_id)
document_ids.append(document_id)
return document_ids
def _is_valid_citation(citation: object) -> TypeGuard[Citation]:
return (
isinstance(citation, dict)
and isinstance(citation.get("document_id"), str)
and bool(citation["document_id"])
)
def _citation_sort_key(citation: Citation) -> int:
citation_number = citation.get("citation_number")
if isinstance(citation_number, int):
return citation_number
return sys.maxsize
async def fetch_internal_search_tool_id(
session: aiohttp.ClientSession,
api_base: str,
headers: dict[str, str],
) -> int:
payload = await request_json_with_retries(
session=session,
method="GET",
url=f"{api_base}/tool",
headers=headers,
)
if not isinstance(payload, list):
raise RuntimeError("Expected `/tool` to return a list.")
for tool in payload:
if not isinstance(tool, dict):
continue
if tool.get("in_code_tool_id") == INTERNAL_SEARCH_IN_CODE_TOOL_ID:
tool_id = tool.get("id")
if isinstance(tool_id, int):
return tool_id
for tool in payload:
if not isinstance(tool, dict):
continue
if tool.get("name") == INTERNAL_SEARCH_TOOL_NAME:
tool_id = tool.get("id")
if isinstance(tool_id, int):
return tool_id
raise RuntimeError(
"Could not find the internal search tool in `/tool`. "
"Make sure SearchTool is available for this environment."
)
async def submit_question(
session: aiohttp.ClientSession,
api_base: str,
headers: dict[str, str],
internal_search_tool_id: int,
question_record: QuestionRecord,
) -> AnswerRecord:
payload = {
"message": question_record.question,
"chat_session_info": {"persona_id": 0},
"parent_message_id": None,
"file_descriptors": [],
"allowed_tool_ids": [internal_search_tool_id],
"forced_tool_id": internal_search_tool_id,
"stream": False,
}
response_payload = await request_json_with_retries(
session=session,
method="POST",
url=f"{api_base}/chat/send-chat-message",
headers=headers,
json_payload=payload,
)
if not isinstance(response_payload, dict):
raise RuntimeError(
"Expected `/chat/send-chat-message` to return an object when `stream=false`."
)
answer = response_payload.get("answer_citationless")
if not isinstance(answer, str):
answer = response_payload.get("answer")
if not isinstance(answer, str):
raise RuntimeError(
f"Response for question {question_record.question_id} is missing `answer`."
)
return AnswerRecord(
question_id=question_record.question_id,
answer=answer,
document_ids=extract_document_ids(response_payload.get("citation_info")),
)
async def generate_answers(
questions: list[QuestionRecord],
output_file: Path,
api_base: str,
api_key: str,
parallelism: int,
) -> None:
if parallelism < 1:
raise ValueError("`--parallelism` must be at least 1.")
headers = {
"Authorization": f"Bearer {api_key}",
"Content-Type": "application/json",
}
timeout = aiohttp.ClientTimeout(
total=None,
connect=30,
sock_connect=30,
sock_read=600,
)
connector = aiohttp.TCPConnector(limit=parallelism)
output_file.parent.mkdir(parents=True, exist_ok=True)
with output_file.open("a", encoding="utf-8") as file:
async with aiohttp.ClientSession(
timeout=timeout, connector=connector
) as session:
internal_search_tool_id = await fetch_internal_search_tool_id(
session=session,
api_base=api_base,
headers=headers,
)
logger.info("Using internal search tool id %s", internal_search_tool_id)
semaphore = asyncio.Semaphore(parallelism)
progress_lock = asyncio.Lock()
write_lock = asyncio.Lock()
completed = 0
successful = 0
failed_questions: list[FailedQuestionRecord] = []
total = len(questions)
async def process_question(question_record: QuestionRecord) -> None:
nonlocal completed
nonlocal successful
try:
async with semaphore:
result = await submit_question(
session=session,
api_base=api_base,
headers=headers,
internal_search_tool_id=internal_search_tool_id,
question_record=question_record,
)
except Exception as exc:
async with progress_lock:
completed += 1
failed_questions.append(
FailedQuestionRecord(
question_id=question_record.question_id,
error=str(exc),
)
)
logger.exception(
"Failed question %s (%s/%s)",
question_record.question_id,
completed,
total,
)
return
async with write_lock:
file.write(json.dumps(asdict(result), ensure_ascii=False))
file.write("\n")
file.flush()
async with progress_lock:
completed += 1
successful += 1
logger.info("Processed %s/%s questions", completed, total)
await asyncio.gather(
*(process_question(question_record) for question_record in questions)
)
if failed_questions:
logger.warning(
"Completed with %s failed questions and %s successful questions.",
len(failed_questions),
successful,
)
for failed_question in failed_questions:
logger.warning(
"Failed question %s: %s",
failed_question.question_id,
failed_question.error,
)
def main() -> None:
args = parse_args()
questions = load_questions(args.questions_file)
api_base = normalize_api_base(args.api_base)
if args.max_questions is not None:
if args.max_questions < 1:
raise ValueError("`--max-questions` must be at least 1 when provided.")
questions = questions[: args.max_questions]
logger.info("Loaded %s questions from %s", len(questions), args.questions_file)
logger.info("Writing answers to %s", args.output_file)
asyncio.run(
generate_answers(
questions=questions,
output_file=args.output_file,
api_base=api_base,
api_key=args.api_key,
parallelism=args.parallelism,
)
)
if __name__ == "__main__":
main()

View File

@@ -1,291 +0,0 @@
"""
Script to upload files from a directory as individual file connectors in Onyx.
Each file gets its own connector named after the file.
Usage:
python upload_files_as_connectors.py --data-dir /path/to/files --api-key YOUR_KEY
python upload_files_as_connectors.py --data-dir /path/to/files --api-key YOUR_KEY --api-base http://onyxserver:3000
python upload_files_as_connectors.py --data-dir /path/to/files --api-key YOUR_KEY --file-glob '*.zip'
Requires:
pip install requests
"""
import argparse
import fnmatch
import os
import sys
import threading
import time
import requests
REQUEST_TIMEOUT = 900 # 15 minutes
def _elapsed_printer(label: str, stop_event: threading.Event) -> None:
"""Print a live elapsed-time counter until stop_event is set."""
start = time.monotonic()
while not stop_event.wait(timeout=1):
elapsed = int(time.monotonic() - start)
m, s = divmod(elapsed, 60)
print(f"\r {label} ... {m:02d}:{s:02d}", end="", flush=True)
elapsed = int(time.monotonic() - start)
m, s = divmod(elapsed, 60)
print(f"\r {label} ... {m:02d}:{s:02d} done")
def _timed_request(label: str, fn: object) -> requests.Response:
"""Run a request function while displaying a live elapsed timer."""
stop = threading.Event()
t = threading.Thread(target=_elapsed_printer, args=(label, stop), daemon=True)
t.start()
try:
resp = fn() # type: ignore[operator]
finally:
stop.set()
t.join()
return resp
def upload_file(
session: requests.Session, base_url: str, file_path: str
) -> dict | None:
"""Upload a single file and return the response with file_paths and file_names."""
with open(file_path, "rb") as f:
resp = _timed_request(
"Uploading",
lambda: session.post(
f"{base_url}/api/manage/admin/connector/file/upload",
files={"files": (os.path.basename(file_path), f)},
timeout=REQUEST_TIMEOUT,
),
)
if not resp.ok:
print(f" ERROR uploading: {resp.text}")
return None
return resp.json()
def create_connector(
session: requests.Session,
base_url: str,
name: str,
file_paths: list[str],
file_names: list[str],
zip_metadata_file_id: str | None,
) -> int | None:
"""Create a file connector and return its ID."""
resp = _timed_request(
"Creating connector",
lambda: session.post(
f"{base_url}/api/manage/admin/connector",
json={
"name": name,
"source": "file",
"input_type": "load_state",
"connector_specific_config": {
"file_locations": file_paths,
"file_names": file_names,
"zip_metadata_file_id": zip_metadata_file_id,
},
"refresh_freq": None,
"prune_freq": None,
"indexing_start": None,
"access_type": "public",
"groups": [],
},
timeout=REQUEST_TIMEOUT,
),
)
if not resp.ok:
print(f" ERROR creating connector: {resp.text}")
return None
return resp.json()["id"]
def create_credential(
session: requests.Session, base_url: str, name: str
) -> int | None:
"""Create a dummy credential for the file connector."""
resp = session.post(
f"{base_url}/api/manage/credential",
json={
"credential_json": {},
"admin_public": True,
"source": "file",
"curator_public": True,
"groups": [],
"name": name,
},
timeout=REQUEST_TIMEOUT,
)
if not resp.ok:
print(f" ERROR creating credential: {resp.text}")
return None
return resp.json()["id"]
def link_credential(
session: requests.Session,
base_url: str,
connector_id: int,
credential_id: int,
name: str,
) -> bool:
"""Link the connector to the credential (create CC pair)."""
resp = session.put(
f"{base_url}/api/manage/connector/{connector_id}/credential/{credential_id}",
json={
"name": name,
"access_type": "public",
"groups": [],
"auto_sync_options": None,
"processing_mode": "REGULAR",
},
timeout=REQUEST_TIMEOUT,
)
if not resp.ok:
print(f" ERROR linking credential: {resp.text}")
return False
return True
def run_connector(
session: requests.Session,
base_url: str,
connector_id: int,
credential_id: int,
) -> bool:
"""Trigger the connector to start indexing."""
resp = session.post(
f"{base_url}/api/manage/admin/connector/run-once",
json={
"connector_id": connector_id,
"credentialIds": [credential_id],
"from_beginning": False,
},
timeout=REQUEST_TIMEOUT,
)
if not resp.ok:
print(f" ERROR running connector: {resp.text}")
return False
return True
def process_file(session: requests.Session, base_url: str, file_path: str) -> bool:
"""Process a single file through the full connector creation flow."""
file_name = os.path.basename(file_path)
connector_name = file_name
print(f"Processing: {file_name}")
# Step 1: Upload
upload_resp = upload_file(session, base_url, file_path)
if not upload_resp:
return False
# Step 2: Create connector
connector_id = create_connector(
session,
base_url,
name=f"FileConnector-{connector_name}",
file_paths=upload_resp["file_paths"],
file_names=upload_resp["file_names"],
zip_metadata_file_id=upload_resp.get("zip_metadata_file_id"),
)
if connector_id is None:
return False
# Step 3: Create credential
credential_id = create_credential(session, base_url, name=connector_name)
if credential_id is None:
return False
# Step 4: Link connector to credential
if not link_credential(
session, base_url, connector_id, credential_id, connector_name
):
return False
# Step 5: Trigger indexing
if not run_connector(session, base_url, connector_id, credential_id):
return False
print(f" OK (connector_id={connector_id})")
return True
def get_authenticated_session(api_key: str) -> requests.Session:
"""Create a session authenticated with an API key."""
session = requests.Session()
session.headers.update({"Authorization": f"Bearer {api_key}"})
return session
def main() -> None:
parser = argparse.ArgumentParser(
description="Upload files as individual Onyx file connectors."
)
parser.add_argument(
"--data-dir",
required=True,
help="Directory containing files to upload.",
)
parser.add_argument(
"--api-base",
default="http://localhost:3000",
help="Base URL for the Onyx API (default: http://localhost:3000).",
)
parser.add_argument(
"--api-key",
required=True,
help="API key for authentication.",
)
parser.add_argument(
"--file-glob",
default=None,
help="Glob pattern to filter files (e.g. '*.json', '*.zip').",
)
args = parser.parse_args()
data_dir = args.data_dir
base_url = args.api_base.rstrip("/")
api_key = args.api_key
file_glob = args.file_glob
if not os.path.isdir(data_dir):
print(f"Error: {data_dir} is not a directory")
sys.exit(1)
script_path = os.path.realpath(__file__)
files = sorted(
os.path.join(data_dir, f)
for f in os.listdir(data_dir)
if os.path.isfile(os.path.join(data_dir, f))
and os.path.realpath(os.path.join(data_dir, f)) != script_path
and (file_glob is None or fnmatch.fnmatch(f, file_glob))
)
if not files:
print(f"No files found in {data_dir}")
sys.exit(1)
print(f"Found {len(files)} file(s) in {data_dir}\n")
session = get_authenticated_session(api_key)
success = 0
failed = 0
for file_path in files:
if process_file(session, base_url, file_path):
success += 1
else:
failed += 1
# Small delay to avoid overwhelming the server
time.sleep(0.5)
print(f"\nDone: {success} succeeded, {failed} failed out of {len(files)} files.")
if __name__ == "__main__":
main()

View File

@@ -1,34 +0,0 @@
from onyx.chat.process_message import remove_answer_citations
def test_remove_answer_citations_strips_http_markdown_citation() -> None:
answer = "The answer is Paris [[1]](https://example.com/doc)."
assert remove_answer_citations(answer) == "The answer is Paris."
def test_remove_answer_citations_strips_empty_markdown_citation() -> None:
answer = "The answer is Paris [[1]]()."
assert remove_answer_citations(answer) == "The answer is Paris."
def test_remove_answer_citations_strips_citation_with_parentheses_in_url() -> None:
answer = (
"The answer is Paris "
"[[1]](https://en.wikipedia.org/wiki/Function_(mathematics))."
)
assert remove_answer_citations(answer) == "The answer is Paris."
def test_remove_answer_citations_preserves_non_citation_markdown_links() -> None:
answer = (
"See [reference](https://example.com/Function_(mathematics)) "
"for context [[1]](https://en.wikipedia.org/wiki/Function_(mathematics))."
)
assert (
remove_answer_citations(answer)
== "See [reference](https://example.com/Function_(mathematics)) for context."
)

View File

@@ -464,7 +464,7 @@ export default function VoiceConfigurationPage() {
return (
<>
<AdminPageTitle
title="Voice"
title="Voice Mode"
icon={SvgMicrophone}
includeDivider={false}
/>
@@ -484,7 +484,7 @@ export default function VoiceConfigurationPage() {
return (
<>
<AdminPageTitle
title="Voice"
title="Voice Mode"
icon={SvgMicrophone}
includeDivider={false}
/>
@@ -497,7 +497,7 @@ export default function VoiceConfigurationPage() {
return (
<>
<AdminPageTitle icon={SvgAudio} title="Voice" />
<AdminPageTitle icon={SvgAudio} title="Voice Mode" />
<div className="pt-4 pb-4">
<Text as="p" secondaryBody text03>
Speech to text (STT) and text to speech (TTS) capabilities.

View File

@@ -79,7 +79,7 @@ interface SelectedConnectorState {
}
/**
* Build Admin Panel - Connector configuration page
* Build Admin Settings - Connector configuration page
*
* Renders in the center panel area (replacing ChatPanel + OutputPanel).
* Uses SettingsLayouts like AgentEditorPage does.

View File

@@ -22,10 +22,10 @@ export default function NoAgentModal() {
<>
<Text as="p">
As an administrator, you can create a new agent by visiting the
admin panel.
admin settings.
</Text>
<Button width="full" href="/admin/agents">
Go to Admin Panel
Go to Admin Settings
</Button>
</>
) : (

View File

@@ -766,6 +766,7 @@ export default function AppPage({ firstMessage }: ChatPageProps) {
{(appFocus.isNewSession() || appFocus.isAgent()) &&
(state.phase === "idle" ||
state.phase === "classifying") &&
!isLoadingOnboarding &&
(showOnboarding || !user?.personalization?.name) &&
!onboardingDismissed && (
<OnboardingFlow

View File

@@ -977,7 +977,7 @@ function ChatPreferencesSettings() {
<Section gap={0.75}>
<Content
title="Voice"
title="Voice Mode"
sizePreset="main-content"
variant="section"
widthVariant="full"

View File

@@ -92,7 +92,7 @@ export default function ChatDocumentDisplay({
) : (
<SourceIcon sourceType={document.source_type} iconSize={18} />
)}
<Truncated className="line-clamp-2" side="left">
<Truncated className="line-clamp-2" side="left" disable>
{title}
</Truncated>
</div>

View File

@@ -3,7 +3,15 @@
import { MinimalOnyxDocument, OnyxDocument } from "@/lib/search/interfaces";
import ChatDocumentDisplay from "@/sections/document-sidebar/ChatDocumentDisplay";
import { removeDuplicateDocs } from "@/lib/documentUtils";
import { Dispatch, SetStateAction, useMemo, memo } from "react";
import {
Dispatch,
SetStateAction,
useMemo,
memo,
useRef,
useState,
useCallback,
} from "react";
import { getCitations } from "@/app/app/services/packetUtils";
import {
useCurrentMessageTree,
@@ -40,25 +48,30 @@ const buildOnyxDocumentFromFile = (
interface HeaderProps {
children: string;
onClose: () => void;
onClose?: () => void;
isTop?: boolean;
}
function Header({ children, onClose }: HeaderProps) {
function Header({ children, onClose, isTop }: HeaderProps) {
return (
<div className="sticky top-0 z-sticky bg-background-tint-01">
<div className="flex flex-row w-full items-center justify-between gap-2 py-3">
<div className="flex items-center gap-2 w-full px-3">
<SvgSearchMenu className="w-[1.3rem] h-[1.3rem] stroke-text-03" />
{isTop && (
<SvgSearchMenu className="w-[1.3rem] h-[1.3rem] stroke-text-03" />
)}
<Text as="p" headingH3 text03>
{children}
</Text>
</div>
<Button
icon={SvgX}
prominence="tertiary"
onClick={onClose}
tooltip="Close Sidebar"
/>
{onClose && (
<Button
icon={SvgX}
prominence="tertiary"
onClick={onClose}
tooltip="Close Sidebar"
/>
)}
</div>
<Separator noPadding />
</div>
@@ -122,6 +135,26 @@ const DocumentsSidebar = memo(
return { citedDocumentIds, citationOrder };
}, [idOfMessageToDisplay, selectedMessage?.packets.length]);
const observerRef = useRef<IntersectionObserver | null>(null);
const [isMoreStuck, setIsMoreStuck] = useState(false);
const moreSentinelRef = useCallback((node: HTMLDivElement | null) => {
observerRef.current?.disconnect();
observerRef.current = null;
if (!node) return;
const root = node.closest("#onyx-chat-sidebar");
if (!root) return;
const observer = new IntersectionObserver(
(entries) => setIsMoreStuck(!entries[0]?.isIntersecting),
{ root, threshold: 0 }
);
observer.observe(node);
observerRef.current = observer;
}, []);
// if these are missing for some reason, then nothing we can do. Just
// don't render.
// TODO: improve this display
@@ -167,7 +200,9 @@ const DocumentsSidebar = memo(
<div className="flex flex-col px-3 gap-6">
{hasCited && (
<div>
<Header onClose={closeSidebar}>Cited Sources</Header>
<Header isTop onClose={closeSidebar}>
Cited Sources
</Header>
<ChatDocumentDisplayWrapper>
{citedDocuments.map((document) => (
<ChatDocumentDisplay
@@ -186,7 +221,11 @@ const DocumentsSidebar = memo(
{hasOther && (
<div>
<Header onClose={closeSidebar}>
<div ref={moreSentinelRef} className="h-px" />
<Header
isTop={!hasCited || isMoreStuck}
onClose={!hasCited || isMoreStuck ? closeSidebar : undefined}
>
{citedDocuments.length > 0 ? "More" : "Found Sources"}
</Header>
<ChatDocumentDisplayWrapper>
@@ -207,7 +246,12 @@ const DocumentsSidebar = memo(
{humanFileDescriptors && humanFileDescriptors.length > 0 && (
<div>
<Header onClose={closeSidebar}>User Files</Header>
<Header
isTop={!hasCited && !hasOther}
onClose={!hasCited && !hasOther ? closeSidebar : undefined}
>
User Files
</Header>
<ChatDocumentDisplayWrapper>
{humanFileDescriptors.map((file) => (
<ChatDocumentDisplay

View File

@@ -649,9 +649,9 @@ const AppInputBar = React.memo(
<Disabled disabled>
<Button
icon={SvgMicrophone}
aria-label="Set up voice"
aria-label="Configure Voice Mode"
prominence="tertiary"
tooltip="Voice not configured. Set up in admin settings."
tooltip="Configure Voice Mode in Admin Settings."
/>
</Disabled>
))}

View File

@@ -143,7 +143,7 @@ const LLMStepInner = ({
rightIcon={SvgExternalLink}
href="/admin/configuration/llm"
>
View in Admin Panel
View in Admin Settings
</Button>
</Disabled>
}

View File

@@ -134,7 +134,7 @@ const collections = (
sidebarItem(ADMIN_PATHS.WEB_SEARCH),
sidebarItem(ADMIN_PATHS.IMAGE_GENERATION),
{
name: "Voice",
name: "Voice Mode",
icon: SvgAudio,
link: "/admin/configuration/voice",
},

View File

@@ -613,7 +613,7 @@ const MemoizedAppSidebarInner = memo(
icon={SvgSettings}
folded={folded}
>
{isAdmin ? "Admin Panel" : "Curator Panel"}
{isAdmin ? "Admin Settings" : "Curator Panel"}
</SidebarTab>
)}
<UserAvatarPopover