mirror of
https://github.com/onyx-dot-app/onyx.git
synced 2026-04-09 17:02:48 +00:00
Compare commits
9 Commits
edge
...
assistants
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
2052f6a2b9 | ||
|
|
70c0a32aea | ||
|
|
b61bd3da2f | ||
|
|
17807d5a56 | ||
|
|
7e7c984d3c | ||
|
|
f5c60690c1 | ||
|
|
8d1185a383 | ||
|
|
8487b082b0 | ||
|
|
c236b140a6 |
1
.gitignore
vendored
1
.gitignore
vendored
@@ -3,6 +3,7 @@
|
||||
.venv
|
||||
.mypy_cache
|
||||
.idea
|
||||
/backend/apicache/
|
||||
/deployment/data/nginx/app.conf
|
||||
.vscode/launch.json
|
||||
*.sw?
|
||||
|
||||
@@ -0,0 +1,24 @@
|
||||
"""add persona filtering columns
|
||||
|
||||
Revision ID: 6dffd0cbb64f
|
||||
Revises: bc9771dccadf
|
||||
Create Date: 2024-06-26 11:26:22.013659
|
||||
|
||||
"""
|
||||
from alembic import op
|
||||
import sqlalchemy as sa
|
||||
|
||||
|
||||
# revision identifiers, used by Alembic.
|
||||
revision = "6dffd0cbb64f"
|
||||
down_revision = "bc9771dccadf"
|
||||
branch_labels: None = None
|
||||
depends_on: None = None
|
||||
|
||||
|
||||
def upgrade() -> None:
|
||||
op.add_column("persona", sa.Column("num_days", sa.Float(), nullable=True))
|
||||
|
||||
|
||||
def downgrade() -> None:
|
||||
op.drop_column("persona", "num_days")
|
||||
@@ -12,8 +12,8 @@ import fastapi_users_db_sqlalchemy
|
||||
# revision identifiers, used by Alembic.
|
||||
revision = "bc9771dccadf"
|
||||
down_revision = "0568ccf46a6b"
|
||||
branch_labels = None
|
||||
depends_on = None
|
||||
branch_labels: None = None
|
||||
depends_on: None = None
|
||||
|
||||
|
||||
def upgrade() -> None:
|
||||
|
||||
@@ -85,6 +85,7 @@ def load_personas_from_yaml(
|
||||
num_chunks=persona.get("num_chunks")
|
||||
if persona.get("num_chunks") is not None
|
||||
else default_chunks,
|
||||
num_days=persona.get("num_days"),
|
||||
llm_relevance_filter=persona.get("llm_relevance_filter"),
|
||||
starter_messages=persona.get("starter_messages"),
|
||||
llm_filter_extraction=persona.get("llm_filter_extraction"),
|
||||
|
||||
@@ -15,6 +15,10 @@ personas:
|
||||
# Remove the field to set to the system default number of chunks/tokens to pass to Gen AI
|
||||
# Each chunk is 512 tokens long
|
||||
num_chunks: 10
|
||||
# Specifies how young a document should be to be included in search context
|
||||
# If a documents last_updated_time is over the current time + num_days it is excluded
|
||||
# Field unused if set to null
|
||||
num_days: null
|
||||
# Enable/Disable usage of the LLM chunk filter feature whereby each chunk is passed to the LLM to determine
|
||||
# if the chunk is useful or not towards the latest user query
|
||||
# This feature can be overriden for all personas via DISABLE_LLM_CHUNK_FILTER env variable
|
||||
|
||||
@@ -971,6 +971,8 @@ class Persona(Base):
|
||||
)
|
||||
# Number of chunks to pass to the LLM for generation.
|
||||
num_chunks: Mapped[float | None] = mapped_column(Float, nullable=True)
|
||||
# Number of days old for last_updated documents, not used if null
|
||||
num_days: Mapped[float | None] = mapped_column(Float, nullable=True)
|
||||
# Pass every chunk through LLM for evaluation, fairly expensive
|
||||
# Can be turned off globally by admin, in which case, this setting is ignored
|
||||
llm_relevance_filter: Mapped[bool] = mapped_column(Boolean)
|
||||
|
||||
@@ -82,6 +82,7 @@ def create_update_persona(
|
||||
name=create_persona_request.name,
|
||||
description=create_persona_request.description,
|
||||
num_chunks=create_persona_request.num_chunks,
|
||||
num_days=create_persona_request.num_days,
|
||||
llm_relevance_filter=create_persona_request.llm_relevance_filter,
|
||||
llm_filter_extraction=create_persona_request.llm_filter_extraction,
|
||||
recency_bias=create_persona_request.recency_bias,
|
||||
@@ -327,6 +328,7 @@ def upsert_persona(
|
||||
name: str,
|
||||
description: str,
|
||||
num_chunks: float,
|
||||
num_days: float | None,
|
||||
llm_relevance_filter: bool,
|
||||
llm_filter_extraction: bool,
|
||||
recency_bias: RecencyBiasSetting,
|
||||
@@ -365,6 +367,7 @@ def upsert_persona(
|
||||
persona.name = name
|
||||
persona.description = description
|
||||
persona.num_chunks = num_chunks
|
||||
persona.num_days = num_days
|
||||
persona.llm_relevance_filter = llm_relevance_filter
|
||||
persona.llm_filter_extraction = llm_filter_extraction
|
||||
persona.recency_bias = recency_bias
|
||||
@@ -396,6 +399,7 @@ def upsert_persona(
|
||||
name=name,
|
||||
description=description,
|
||||
num_chunks=num_chunks,
|
||||
num_days=num_days,
|
||||
llm_relevance_filter=llm_relevance_filter,
|
||||
llm_filter_extraction=llm_filter_extraction,
|
||||
recency_bias=recency_bias,
|
||||
|
||||
@@ -58,6 +58,7 @@ def create_slack_bot_persona(
|
||||
name=persona_name,
|
||||
description="",
|
||||
num_chunks=num_chunks,
|
||||
num_days=None,
|
||||
llm_relevance_filter=True,
|
||||
llm_filter_extraction=True,
|
||||
recency_bias=RecencyBiasSetting.AUTO,
|
||||
|
||||
@@ -249,6 +249,7 @@ class Answer:
|
||||
|
||||
if tool.name() == SearchTool.NAME:
|
||||
self._update_prompt_builder_for_search_tool(prompt_builder, [])
|
||||
|
||||
elif tool.name() == ImageGenerationTool.NAME:
|
||||
prompt_builder.update_user_prompt(
|
||||
build_image_generation_user_prompt(
|
||||
|
||||
@@ -1,4 +1,6 @@
|
||||
from datetime import datetime
|
||||
from datetime import timedelta
|
||||
from datetime import timezone
|
||||
from typing import Any
|
||||
|
||||
from pydantic import BaseModel
|
||||
@@ -32,6 +34,49 @@ class BaseFilters(BaseModel):
|
||||
time_cutoff: datetime | None = None
|
||||
tags: list[Tag] | None = None
|
||||
|
||||
@classmethod
|
||||
def from_instance(cls, base_filters: "BaseFilters | None") -> "BaseFilters | None":
|
||||
if base_filters is None:
|
||||
return None
|
||||
else:
|
||||
return cls(
|
||||
source_type=base_filters.source_type,
|
||||
document_set=base_filters.document_set,
|
||||
time_cutoff=base_filters.time_cutoff,
|
||||
tags=base_filters.tags,
|
||||
)
|
||||
|
||||
@classmethod
|
||||
def from_persona(cls, persona: Persona) -> "BaseFilters | None":
|
||||
if persona.num_days is not None:
|
||||
from_date = datetime.now(timezone.utc) - timedelta(days=persona.num_days)
|
||||
return cls(time_cutoff=from_date)
|
||||
else:
|
||||
return None
|
||||
|
||||
# Merges two filters into one, giving preference to the human-specified retrieval filters
|
||||
@classmethod
|
||||
def merge(
|
||||
cls,
|
||||
retrieval_filter: "BaseFilters | None",
|
||||
persona_filter: "BaseFilters | None",
|
||||
) -> "BaseFilters | None":
|
||||
if retrieval_filter is None and persona_filter is None:
|
||||
return None
|
||||
elif retrieval_filter is None:
|
||||
return cls.from_instance(persona_filter)
|
||||
elif persona_filter is None:
|
||||
return cls.from_instance(retrieval_filter)
|
||||
else:
|
||||
return cls(
|
||||
source_type=retrieval_filter.source_type,
|
||||
document_set=retrieval_filter.document_set,
|
||||
time_cutoff=retrieval_filter.time_cutoff
|
||||
if retrieval_filter.time_cutoff is not None
|
||||
else persona_filter.time_cutoff,
|
||||
tags=retrieval_filter.tags,
|
||||
)
|
||||
|
||||
|
||||
class IndexFilters(BaseFilters):
|
||||
access_control_list: list[str] | None
|
||||
|
||||
@@ -19,6 +19,7 @@ class CreatePersonaRequest(BaseModel):
|
||||
name: str
|
||||
description: str
|
||||
num_chunks: float
|
||||
num_days: float
|
||||
llm_relevance_filter: bool
|
||||
is_public: bool
|
||||
llm_filter_extraction: bool
|
||||
@@ -44,6 +45,7 @@ class PersonaSnapshot(BaseModel):
|
||||
display_priority: int | None
|
||||
description: str
|
||||
num_chunks: float | None
|
||||
num_days: float | None
|
||||
llm_relevance_filter: bool
|
||||
llm_filter_extraction: bool
|
||||
llm_model_provider_override: str | None
|
||||
@@ -80,6 +82,7 @@ class PersonaSnapshot(BaseModel):
|
||||
display_priority=persona.display_priority,
|
||||
description=persona.description,
|
||||
num_chunks=persona.num_chunks,
|
||||
num_days=persona.num_days,
|
||||
llm_relevance_filter=persona.llm_relevance_filter,
|
||||
llm_filter_extraction=persona.llm_filter_extraction,
|
||||
llm_model_provider_override=persona.llm_model_provider_override,
|
||||
|
||||
@@ -20,6 +20,7 @@ from danswer.llm.answering.models import PromptConfig
|
||||
from danswer.llm.interfaces import LLM
|
||||
from danswer.search.enums import QueryFlow
|
||||
from danswer.search.enums import SearchType
|
||||
from danswer.search.models import BaseFilters
|
||||
from danswer.search.models import IndexFilters
|
||||
from danswer.search.models import InferenceSection
|
||||
from danswer.search.models import RetrievalDetails
|
||||
@@ -193,11 +194,16 @@ class SearchTool(Tool):
|
||||
yield from self._build_response_for_specified_sections(query)
|
||||
return
|
||||
|
||||
persona_filter = BaseFilters.from_persona(self.persona)
|
||||
retrieval_filter = (
|
||||
self.retrieval_options.filters if self.retrieval_options else None
|
||||
)
|
||||
|
||||
search_pipeline = SearchPipeline(
|
||||
search_request=SearchRequest(
|
||||
query=query,
|
||||
human_selected_filters=(
|
||||
self.retrieval_options.filters if self.retrieval_options else None
|
||||
human_selected_filters=BaseFilters.merge(
|
||||
retrieval_filter=retrieval_filter, persona_filter=persona_filter
|
||||
),
|
||||
persona=self.persona,
|
||||
offset=self.retrieval_options.offset
|
||||
|
||||
@@ -176,6 +176,8 @@ export function AssistantEditor({
|
||||
existingPersona?.document_sets?.map((documentSet) => documentSet.id) ??
|
||||
([] as number[]),
|
||||
num_chunks: existingPersona?.num_chunks ?? null,
|
||||
recent_documents_enabled: existingPersona?.num_days != null,
|
||||
num_days: existingPersona?.num_days ?? null,
|
||||
include_citations: existingPersona?.prompts[0]?.include_citations ?? true,
|
||||
llm_relevance_filter: existingPersona?.llm_relevance_filter ?? false,
|
||||
llm_model_provider_override:
|
||||
@@ -211,6 +213,8 @@ export function AssistantEditor({
|
||||
is_public: Yup.boolean().required(),
|
||||
document_set_ids: Yup.array().of(Yup.number()),
|
||||
num_chunks: Yup.number().nullable(),
|
||||
recent_documents_enabled: Yup.boolean().nullable(),
|
||||
num_days: Yup.number().nullable(),
|
||||
include_citations: Yup.boolean().required(),
|
||||
llm_relevance_filter: Yup.boolean().required(),
|
||||
llm_model_version_override: Yup.string().nullable(),
|
||||
@@ -298,6 +302,10 @@ export function AssistantEditor({
|
||||
// if disable_retrieval is set, set num_chunks to 0
|
||||
// to tell the backend to not fetch any documents
|
||||
const numChunks = searchToolEnabled ? values.num_chunks || 10 : 0;
|
||||
const numDays = searchToolEnabled ? values.num_days || 30 : 0;
|
||||
const useRecentDocuments = searchToolEnabled
|
||||
? values.recent_documents_enabled
|
||||
: false;
|
||||
|
||||
// don't set groups if marked as public
|
||||
const groups = values.is_public ? [] : values.groups;
|
||||
@@ -310,6 +318,7 @@ export function AssistantEditor({
|
||||
existingPromptId: existingPrompt?.id,
|
||||
...values,
|
||||
num_chunks: numChunks,
|
||||
num_days: numDays,
|
||||
users:
|
||||
user && !checkUserIsNoAuthUser(user.id) ? [user.id] : undefined,
|
||||
groups,
|
||||
@@ -319,6 +328,7 @@ export function AssistantEditor({
|
||||
[promptResponse, personaResponse] = await createPersona({
|
||||
...values,
|
||||
num_chunks: numChunks,
|
||||
num_days: numDays,
|
||||
users:
|
||||
user && !checkUserIsNoAuthUser(user.id) ? [user.id] : undefined,
|
||||
groups,
|
||||
@@ -580,6 +590,47 @@ export function AssistantEditor({
|
||||
}
|
||||
/>
|
||||
|
||||
<BooleanFormField
|
||||
name={`recent_documents_enabled`}
|
||||
label="Only Use Recent Documents"
|
||||
subtext={
|
||||
"If enabled, only recent documents will be used for context."
|
||||
}
|
||||
onChange={() => {
|
||||
setFieldValue("num_days", null);
|
||||
setFieldValue(
|
||||
"recent_documents_enabled",
|
||||
!values.recent_documents_enabled
|
||||
);
|
||||
}}
|
||||
/>
|
||||
|
||||
{values.recent_documents_enabled && (
|
||||
<div className="pl-4 border-l-2 ml-4 border-border">
|
||||
<TextFormField
|
||||
name="num_days"
|
||||
label="Number of Days"
|
||||
placeholder="Defaults to last 30 days."
|
||||
subtext={
|
||||
<div>
|
||||
How recent should the documents be
|
||||
that are used for context?
|
||||
</div>
|
||||
}
|
||||
onChange={(e) => {
|
||||
const value = e.target.value;
|
||||
// Allow only integer values
|
||||
if (
|
||||
value === "" ||
|
||||
/^[0-9]+$/.test(value)
|
||||
) {
|
||||
setFieldValue("num_days", value);
|
||||
}
|
||||
}}
|
||||
/>
|
||||
</div>
|
||||
)}
|
||||
|
||||
<BooleanFormField
|
||||
name="include_citations"
|
||||
label="Include Citations"
|
||||
|
||||
@@ -35,6 +35,8 @@ export interface Persona {
|
||||
llm_model_provider_override?: string;
|
||||
llm_model_version_override?: string;
|
||||
starter_messages: StarterMessage[] | null;
|
||||
use_recent_documents: boolean;
|
||||
num_days: number | null;
|
||||
default_persona: boolean;
|
||||
users: MinimalUserSnapshot[];
|
||||
groups: number[];
|
||||
|
||||
@@ -7,6 +7,7 @@ interface PersonaCreationRequest {
|
||||
task_prompt: string;
|
||||
document_set_ids: number[];
|
||||
num_chunks: number | null;
|
||||
num_days: number | null;
|
||||
include_citations: boolean;
|
||||
is_public: boolean;
|
||||
llm_relevance_filter: boolean | null;
|
||||
@@ -27,6 +28,7 @@ interface PersonaUpdateRequest {
|
||||
task_prompt: string;
|
||||
document_set_ids: number[];
|
||||
num_chunks: number | null;
|
||||
num_days: number | null;
|
||||
include_citations: boolean;
|
||||
is_public: boolean;
|
||||
llm_relevance_filter: boolean | null;
|
||||
@@ -105,6 +107,7 @@ function buildPersonaAPIBody(
|
||||
description,
|
||||
document_set_ids,
|
||||
num_chunks,
|
||||
num_days,
|
||||
llm_relevance_filter,
|
||||
is_public,
|
||||
groups,
|
||||
@@ -116,6 +119,7 @@ function buildPersonaAPIBody(
|
||||
name,
|
||||
description,
|
||||
num_chunks,
|
||||
num_days,
|
||||
llm_relevance_filter,
|
||||
llm_filter_extraction: false,
|
||||
is_public,
|
||||
|
||||
Reference in New Issue
Block a user