Compare commits

...

5 Commits

Author SHA1 Message Date
Chris Weaver
5d7c1f6012 Add option to disable my documents (#5020)
* Add option to disable my documents

* cleanup
2025-07-14 23:17:50 -07:00
joachim-danswer
4fd88b4e06 docker dev and prod template (#4936)
* docker dev and prod template

* more dev files
2025-06-26 22:13:45 -07:00
joachim-danswer
97928e2d6f Forcing vespa language 2025-06-26 22:10:54 -07:00
joachim-danswer
7bce2d287d Dual search pipeline for non-tool-calling LLMs (#4872)
Added dual pipeline also for non-tool-calling LLMs. 
A helper function was created.
2025-06-12 19:51:19 -07:00
Evan Lohn
71712df320 jira daylight savings handling (#4797) 2025-06-03 09:17:11 -07:00
15 changed files with 181 additions and 47 deletions

View File

@@ -83,6 +83,22 @@ def _expand_query(
return rephrased_query
def _expand_query_non_tool_calling_llm(
expanded_keyword_thread: TimeoutThread[str],
expanded_semantic_thread: TimeoutThread[str],
) -> QueryExpansions | None:
keyword_expansion: str | None = wait_on_background(expanded_keyword_thread)
semantic_expansion: str | None = wait_on_background(expanded_semantic_thread)
if keyword_expansion is None or semantic_expansion is None:
return None
return QueryExpansions(
keywords_expansions=[keyword_expansion],
semantic_expansions=[semantic_expansion],
)
# TODO: break this out into an implementation function
# and a function that handles extracting the necessary fields
# from the state and config
@@ -186,6 +202,28 @@ def choose_tool(
is_keyword, keywords = wait_on_background(keyword_thread)
override_kwargs.precomputed_is_keyword = is_keyword
override_kwargs.precomputed_keywords = keywords
# dual keyword expansion needs to be added here for non-tool calling LLM case
if (
USE_SEMANTIC_KEYWORD_EXPANSIONS_BASIC_SEARCH
and expanded_keyword_thread
and expanded_semantic_thread
and tool.name == SearchTool._NAME
):
override_kwargs.expanded_queries = _expand_query_non_tool_calling_llm(
expanded_keyword_thread=expanded_keyword_thread,
expanded_semantic_thread=expanded_semantic_thread,
)
if (
USE_SEMANTIC_KEYWORD_EXPANSIONS_BASIC_SEARCH
and tool.name == SearchTool._NAME
and override_kwargs.expanded_queries
):
if (
override_kwargs.expanded_queries.keywords_expansions is None
or override_kwargs.expanded_queries.semantic_expansions is None
):
raise ValueError("No expanded keyword or semantic threads found.")
return ToolChoiceUpdate(
tool_choice=ToolChoice(
tool=tool,
@@ -283,18 +321,23 @@ def choose_tool(
and expanded_keyword_thread
and expanded_semantic_thread
):
keyword_expansion = wait_on_background(expanded_keyword_thread)
semantic_expansion = wait_on_background(expanded_semantic_thread)
override_kwargs.expanded_queries = QueryExpansions(
keywords_expansions=[keyword_expansion],
semantic_expansions=[semantic_expansion],
)
logger.info(
f"Original query: {agent_config.inputs.prompt_builder.raw_user_query}"
override_kwargs.expanded_queries = _expand_query_non_tool_calling_llm(
expanded_keyword_thread=expanded_keyword_thread,
expanded_semantic_thread=expanded_semantic_thread,
)
logger.info(f"Expanded keyword queries: {keyword_expansion}")
logger.info(f"Expanded semantic queries: {semantic_expansion}")
if (
USE_SEMANTIC_KEYWORD_EXPANSIONS_BASIC_SEARCH
and selected_tool.name == SearchTool._NAME
and override_kwargs.expanded_queries
):
# TODO: this is a hack to handle the case where the expanded queries are not found.
# We should refactor this to be more robust.
if (
override_kwargs.expanded_queries.keywords_expansions is None
or override_kwargs.expanded_queries.semantic_expansions is None
):
raise ValueError("No expanded keyword or semantic threads found.")
return ToolChoiceUpdate(
tool_choice=ToolChoice(

View File

@@ -35,6 +35,9 @@ GENERATIVE_MODEL_ACCESS_CHECK_FREQ = int(
) # 1 day
DISABLE_GENERATIVE_AI = os.environ.get("DISABLE_GENERATIVE_AI", "").lower() == "true"
# Controls whether users can use User Knowledge (personal documents) in assistants
DISABLE_USER_KNOWLEDGE = os.environ.get("DISABLE_USER_KNOWLEDGE", "").lower() == "true"
# Controls whether to allow admin query history reports with:
# 1. associated user emails
# 2. anonymized user emails
@@ -746,3 +749,7 @@ IMAGE_ANALYSIS_SYSTEM_PROMPT = os.environ.get(
DISABLE_AUTO_AUTH_REFRESH = (
os.environ.get("DISABLE_AUTO_AUTH_REFRESH", "").lower() == "true"
)
# Forcing Vespa Language
# English: en, German:de, etc. See: https://docs.vespa.ai/en/linguistics.html
VESPA_LANGUAGE_OVERRIDE = os.environ.get("VESPA_LANGUAGE_OVERRIDE")

View File

@@ -21,6 +21,9 @@ from onyx.connectors.confluence.utils import datetime_from_string
from onyx.connectors.confluence.utils import process_attachment
from onyx.connectors.confluence.utils import update_param_in_path
from onyx.connectors.confluence.utils import validate_attachment_filetype
from onyx.connectors.cross_connector_utils.miscellaneous_utils import (
is_atlassian_date_error,
)
from onyx.connectors.exceptions import ConnectorValidationError
from onyx.connectors.exceptions import CredentialExpiredError
from onyx.connectors.exceptions import InsufficientPermissionsError
@@ -76,10 +79,6 @@ ONE_DAY = ONE_HOUR * 24
MAX_CACHED_IDS = 100
def _should_propagate_error(e: Exception) -> bool:
return "field 'updated' is invalid" in str(e)
class ConfluenceCheckpoint(ConnectorCheckpoint):
next_page_url: str | None
@@ -367,7 +366,7 @@ class ConfluenceConnector(
)
except Exception as e:
logger.error(f"Error converting page {page.get('id', 'unknown')}: {e}")
if _should_propagate_error(e):
if is_atlassian_date_error(e): # propagate error to be caught and retried
raise
return ConnectorFailure(
failed_document=DocumentFailure(
@@ -446,7 +445,9 @@ class ConfluenceConnector(
f"Failed to extract/summarize attachment {attachment['title']}",
exc_info=e,
)
if _should_propagate_error(e):
if is_atlassian_date_error(
e
): # propagate error to be caught and retried
raise
return ConnectorFailure(
failed_document=DocumentFailure(
@@ -536,7 +537,7 @@ class ConfluenceConnector(
try:
return self._fetch_document_batches(checkpoint, start, end)
except Exception as e:
if _should_propagate_error(e) and start is not None:
if is_atlassian_date_error(e) and start is not None:
logger.warning(
"Confluence says we provided an invalid 'updated' field. This may indicate"
"a real issue, but can also appear during edge cases like daylight"

View File

@@ -86,3 +86,7 @@ def get_oauth_callback_uri(base_domain: str, connector_id: str) -> str:
# Used for development
base_domain = CONNECTOR_LOCALHOST_OVERRIDE
return f"{base_domain.strip('/')}/connector/oauth/callback/{connector_id}"
def is_atlassian_date_error(e: Exception) -> bool:
return "field 'updated' is invalid" in str(e)

View File

@@ -12,6 +12,9 @@ from onyx.configs.app_configs import INDEX_BATCH_SIZE
from onyx.configs.app_configs import JIRA_CONNECTOR_LABELS_TO_SKIP
from onyx.configs.app_configs import JIRA_CONNECTOR_MAX_TICKET_SIZE
from onyx.configs.constants import DocumentSource
from onyx.connectors.cross_connector_utils.miscellaneous_utils import (
is_atlassian_date_error,
)
from onyx.connectors.cross_connector_utils.miscellaneous_utils import time_str_to_utc
from onyx.connectors.exceptions import ConnectorValidationError
from onyx.connectors.exceptions import CredentialExpiredError
@@ -40,6 +43,8 @@ from onyx.utils.logger import setup_logger
logger = setup_logger()
ONE_HOUR = 3600
JIRA_API_VERSION = os.environ.get("JIRA_API_VERSION") or "2"
_JIRA_SLIM_PAGE_SIZE = 500
_JIRA_FULL_PAGE_SIZE = 50
@@ -240,7 +245,17 @@ class JiraConnector(CheckpointedConnector[JiraConnectorCheckpoint], SlimConnecto
checkpoint: JiraConnectorCheckpoint,
) -> CheckpointOutput[JiraConnectorCheckpoint]:
jql = self._get_jql_query(start, end)
try:
return self._load_from_checkpoint(jql, checkpoint)
except Exception as e:
if is_atlassian_date_error(e):
jql = self._get_jql_query(start - ONE_HOUR, end)
return self._load_from_checkpoint(jql, checkpoint)
raise e
def _load_from_checkpoint(
self, jql: str, checkpoint: JiraConnectorCheckpoint
) -> CheckpointOutput[JiraConnectorCheckpoint]:
# Get the current offset from checkpoint or start at 0
starting_offset = checkpoint.offset or 0
current_offset = starting_offset

View File

@@ -11,6 +11,7 @@ import httpx
from retry import retry
from onyx.configs.app_configs import LOG_VESPA_TIMING_INFORMATION
from onyx.configs.app_configs import VESPA_LANGUAGE_OVERRIDE
from onyx.context.search.models import IndexFilters
from onyx.context.search.models import InferenceChunkUncleaned
from onyx.document_index.interfaces import VespaChunkRequest
@@ -308,6 +309,9 @@ def query_vespa(
),
)
if VESPA_LANGUAGE_OVERRIDE:
params["language"] = VESPA_LANGUAGE_OVERRIDE
try:
with get_vespa_http_client() as http_client:
response = http_client.post(SEARCH_ENDPOINT, json=params)

View File

@@ -53,6 +53,7 @@ from onyx.server.features.persona.models import PersonaSnapshot
from onyx.server.features.persona.models import PersonaUpsertRequest
from onyx.server.features.persona.models import PromptSnapshot
from onyx.server.models import DisplayPriorityRequest
from onyx.server.settings.store import load_settings
from onyx.tools.utils import is_image_generation_available
from onyx.utils.logger import setup_logger
from onyx.utils.telemetry import create_milestone_and_report
@@ -60,6 +61,23 @@ from shared_configs.contextvars import get_current_tenant_id
logger = setup_logger()
def _validate_user_knowledge_enabled(
persona_upsert_request: PersonaUpsertRequest, action: str
) -> None:
"""Check if user knowledge is enabled when user files/folders are provided."""
settings = load_settings()
if not settings.user_knowledge_enabled:
if (
persona_upsert_request.user_file_ids
or persona_upsert_request.user_folder_ids
):
raise HTTPException(
status_code=400,
detail=f"User Knowledge is disabled. Cannot {action} assistant with user files or folders.",
)
admin_router = APIRouter(prefix="/admin/persona")
basic_router = APIRouter(prefix="/persona")
@@ -204,6 +222,8 @@ def create_persona(
) -> PersonaSnapshot:
tenant_id = get_current_tenant_id()
_validate_user_knowledge_enabled(persona_upsert_request, "create")
prompt_id = (
persona_upsert_request.prompt_ids[0]
if persona_upsert_request.prompt_ids
@@ -251,6 +271,7 @@ def update_persona(
user: User | None = Depends(current_user),
db_session: Session = Depends(get_session),
) -> PersonaSnapshot:
_validate_user_knowledge_enabled(persona_upsert_request, "update")
prompt_id = (
persona_upsert_request.prompt_ids[0]
if persona_upsert_request.prompt_ids

View File

@@ -59,6 +59,9 @@ class Settings(BaseModel):
search_time_image_analysis_enabled: bool | None = False
image_analysis_max_size_mb: int | None = 20
# User Knowledge settings
user_knowledge_enabled: bool | None = True
class UserSettings(Settings):
notifications: list[Notification]

View File

@@ -1,3 +1,4 @@
from onyx.configs.app_configs import DISABLE_USER_KNOWLEDGE
from onyx.configs.app_configs import ONYX_QUERY_HISTORY_TYPE
from onyx.configs.constants import KV_SETTINGS_KEY
from onyx.configs.constants import OnyxRedisLocks
@@ -48,6 +49,10 @@ def load_settings() -> Settings:
settings.anonymous_user_enabled = anonymous_user_enabled
settings.query_history_type = ONYX_QUERY_HISTORY_TYPE
# Override user knowledge setting if disabled via environment variable
if DISABLE_USER_KNOWLEDGE:
settings.user_knowledge_enabled = False
return settings

View File

@@ -124,6 +124,11 @@ services:
# Seeding configuration
- USE_IAM_AUTH=${USE_IAM_AUTH:-}
- ONYX_QUERY_HISTORY_TYPE=${ONYX_QUERY_HISTORY_TYPE:-}
# Vespa Language Forcing
# See: https://docs.vespa.ai/en/linguistics.html
- VESPA_LANGUAGE_OVERRIDE=${VESPA_LANGUAGE_OVERRIDE:-}
# Uncomment the line below to use if IAM_AUTH is true and you are using iam auth for postgres
# volumes:
# - ./bundle.pem:/app/bundle.pem:ro

View File

@@ -92,6 +92,10 @@ services:
# Chat Configs
- HARD_DELETE_CHATS=${HARD_DELETE_CHATS:-}
# Vespa Language Forcing
# See: https://docs.vespa.ai/en/linguistics.html
- VESPA_LANGUAGE_OVERRIDE=${VESPA_LANGUAGE_OVERRIDE:-}
# Enterprise Edition only
- API_KEY_HASH_ROUNDS=${API_KEY_HASH_ROUNDS:-}
- ENABLE_PAID_ENTERPRISE_EDITION_FEATURES=${ENABLE_PAID_ENTERPRISE_EDITION_FEATURES:-false}

View File

@@ -106,6 +106,10 @@ services:
- API_KEY_HASH_ROUNDS=${API_KEY_HASH_ROUNDS:-}
# Seeding configuration
- USE_IAM_AUTH=${USE_IAM_AUTH:-}
# Vespa Language Forcing
# See: https://docs.vespa.ai/en/linguistics.html
- VESPA_LANGUAGE_OVERRIDE=${VESPA_LANGUAGE_OVERRIDE:-}
extra_hosts:
- "host.docker.internal:host-gateway"
logging:

View File

@@ -55,3 +55,8 @@ SESSION_EXPIRE_TIME_SECONDS=604800
# Default values here are what Postgres uses by default, feel free to change.
POSTGRES_USER=postgres
POSTGRES_PASSWORD=password
# If setting the vespa language is required, set this ('en', 'de', etc.).
# See: https://docs.vespa.ai/en/linguistics.html
#VESPA_LANGUAGE_OVERRIDE=

View File

@@ -40,8 +40,9 @@ import {
} from "@/components/ui/tooltip";
import Link from "next/link";
import { useRouter, useSearchParams } from "next/navigation";
import { useEffect, useMemo, useState } from "react";
import { useContext, useEffect, useMemo, useState } from "react";
import * as Yup from "yup";
import { SettingsContext } from "@/components/settings/SettingsProvider";
import { FullPersona, PersonaLabel, StarterMessage } from "./interfaces";
import {
PersonaUpsertParameters,
@@ -147,6 +148,7 @@ export function AssistantEditor({
const { popup, setPopup } = usePopup();
const { labels, refreshLabels, createLabel, updateLabel, deleteLabel } =
useLabels();
const settings = useContext(SettingsContext);
const colorOptions = [
"#FF6FBF",
@@ -237,6 +239,14 @@ export function AssistantEditor({
const [showVisibilityWarning, setShowVisibilityWarning] = useState(false);
const canShowKnowledgeSource =
ccPairs.length > 0 &&
searchTool &&
!(user?.role === UserRole.BASIC && documentSets.length === 0);
const userKnowledgeEnabled =
settings?.settings?.user_knowledge_enabled ?? true;
const initialValues = {
name: existingPersona?.name ?? "",
description: existingPersona?.description ?? "",
@@ -275,11 +285,14 @@ export function AssistantEditor({
selectedGroups: existingPersona?.groups ?? [],
user_file_ids: existingPersona?.user_file_ids ?? [],
user_folder_ids: existingPersona?.user_folder_ids ?? [],
knowledge_source:
(existingPersona?.user_file_ids?.length ?? 0) > 0 ||
(existingPersona?.user_folder_ids?.length ?? 0) > 0
? "user_files"
: "team_knowledge",
knowledge_source: !canShowKnowledgeSource
? "user_files"
: !userKnowledgeEnabled
? "team_knowledge"
: (existingPersona?.user_file_ids?.length ?? 0) > 0 ||
(existingPersona?.user_folder_ids?.length ?? 0) > 0
? "user_files"
: "team_knowledge",
is_default_persona: existingPersona?.is_default_persona ?? false,
};
@@ -374,11 +387,6 @@ export function AssistantEditor({
}
};
const canShowKnowledgeSource =
ccPairs.length > 0 &&
searchTool &&
!(user?.role != "admin" && documentSets.length === 0);
return (
<div className="mx-auto max-w-4xl">
<style>
@@ -950,26 +958,28 @@ export function AssistantEditor({
</p>
</div>
<div
className={`w-[150px] h-[110px] rounded-lg border flex flex-col items-center justify-center cursor-pointer transition-all ${
values.knowledge_source === "user_files"
? "border-2 border-blue-500 bg-blue-50 dark:bg-blue-950/20"
: "border-gray-200 hover:border-gray-300 dark:border-gray-700 dark:hover:border-gray-600"
}`}
onClick={() =>
setFieldValue(
"knowledge_source",
"user_files"
)
}
>
<div className="text-blue-500 mb-2">
<FileIcon size={24} />
{userKnowledgeEnabled && (
<div
className={`w-[150px] h-[110px] rounded-lg border flex flex-col items-center justify-center cursor-pointer transition-all ${
values.knowledge_source === "user_files"
? "border-2 border-blue-500 bg-blue-50 dark:bg-blue-950/20"
: "border-gray-200 hover:border-gray-300 dark:border-gray-700 dark:hover:border-gray-600"
}`}
onClick={() =>
setFieldValue(
"knowledge_source",
"user_files"
)
}
>
<div className="text-blue-500 mb-2">
<FileIcon size={24} />
</div>
<p className="font-medium text-xs">
User Knowledge
</p>
</div>
<p className="font-medium text-xs">
User Knowledge
</p>
</div>
)}
</div>
</div>
</>

View File

@@ -27,6 +27,9 @@ export interface Settings {
image_extraction_and_analysis_enabled?: boolean;
search_time_image_analysis_enabled?: boolean;
image_analysis_max_size_mb?: number | null;
// User Knowledge settings
user_knowledge_enabled?: boolean;
}
export enum NotificationType {