Compare commits

...

9 Commits

Author SHA1 Message Date
Luke Wulf
2052f6a2b9 small fix 2024-06-27 18:01:09 -07:00
Luke Wulf
70c0a32aea small change 2024-06-27 17:59:40 -07:00
Luke Wulf
b61bd3da2f merging two columns into one nullable column, num_days 2024-06-27 17:49:08 -07:00
Luke Wulf
17807d5a56 Address comments 2024-06-27 16:32:02 -07:00
Luke Wulf
7e7c984d3c additional file changes 2024-06-27 15:44:03 -07:00
Luke Wulf
f5c60690c1 Address Comments 2024-06-27 15:27:37 -07:00
Luke Wulf
8d1185a383 build fixes 2024-06-26 18:22:09 -07:00
Luke Wulf
8487b082b0 clean up 2024-06-26 17:59:12 -07:00
Luke Wulf
c236b140a6 Configuring Assistants for Most Recent Documents 2024-06-26 17:17:40 -07:00
15 changed files with 153 additions and 4 deletions

1
.gitignore vendored
View File

@@ -3,6 +3,7 @@
.venv
.mypy_cache
.idea
/backend/apicache/
/deployment/data/nginx/app.conf
.vscode/launch.json
*.sw?

View File

@@ -0,0 +1,24 @@
"""add persona filtering columns
Revision ID: 6dffd0cbb64f
Revises: bc9771dccadf
Create Date: 2024-06-26 11:26:22.013659
"""
from alembic import op
import sqlalchemy as sa
# revision identifiers, used by Alembic.
revision = "6dffd0cbb64f"
down_revision = "bc9771dccadf"
branch_labels: None = None
depends_on: None = None
def upgrade() -> None:
op.add_column("persona", sa.Column("num_days", sa.Float(), nullable=True))
def downgrade() -> None:
op.drop_column("persona", "num_days")

View File

@@ -12,8 +12,8 @@ import fastapi_users_db_sqlalchemy
# revision identifiers, used by Alembic.
revision = "bc9771dccadf"
down_revision = "0568ccf46a6b"
branch_labels = None
depends_on = None
branch_labels: None = None
depends_on: None = None
def upgrade() -> None:

View File

@@ -85,6 +85,7 @@ def load_personas_from_yaml(
num_chunks=persona.get("num_chunks")
if persona.get("num_chunks") is not None
else default_chunks,
num_days=persona.get("num_days"),
llm_relevance_filter=persona.get("llm_relevance_filter"),
starter_messages=persona.get("starter_messages"),
llm_filter_extraction=persona.get("llm_filter_extraction"),

View File

@@ -15,6 +15,10 @@ personas:
# Remove the field to set to the system default number of chunks/tokens to pass to Gen AI
# Each chunk is 512 tokens long
num_chunks: 10
# Specifies how young a document should be to be included in search context
# If a documents last_updated_time is over the current time + num_days it is excluded
# Field unused if set to null
num_days: null
# Enable/Disable usage of the LLM chunk filter feature whereby each chunk is passed to the LLM to determine
# if the chunk is useful or not towards the latest user query
# This feature can be overriden for all personas via DISABLE_LLM_CHUNK_FILTER env variable

View File

@@ -971,6 +971,8 @@ class Persona(Base):
)
# Number of chunks to pass to the LLM for generation.
num_chunks: Mapped[float | None] = mapped_column(Float, nullable=True)
# Number of days old for last_updated documents, not used if null
num_days: Mapped[float | None] = mapped_column(Float, nullable=True)
# Pass every chunk through LLM for evaluation, fairly expensive
# Can be turned off globally by admin, in which case, this setting is ignored
llm_relevance_filter: Mapped[bool] = mapped_column(Boolean)

View File

@@ -82,6 +82,7 @@ def create_update_persona(
name=create_persona_request.name,
description=create_persona_request.description,
num_chunks=create_persona_request.num_chunks,
num_days=create_persona_request.num_days,
llm_relevance_filter=create_persona_request.llm_relevance_filter,
llm_filter_extraction=create_persona_request.llm_filter_extraction,
recency_bias=create_persona_request.recency_bias,
@@ -327,6 +328,7 @@ def upsert_persona(
name: str,
description: str,
num_chunks: float,
num_days: float | None,
llm_relevance_filter: bool,
llm_filter_extraction: bool,
recency_bias: RecencyBiasSetting,
@@ -365,6 +367,7 @@ def upsert_persona(
persona.name = name
persona.description = description
persona.num_chunks = num_chunks
persona.num_days = num_days
persona.llm_relevance_filter = llm_relevance_filter
persona.llm_filter_extraction = llm_filter_extraction
persona.recency_bias = recency_bias
@@ -396,6 +399,7 @@ def upsert_persona(
name=name,
description=description,
num_chunks=num_chunks,
num_days=num_days,
llm_relevance_filter=llm_relevance_filter,
llm_filter_extraction=llm_filter_extraction,
recency_bias=recency_bias,

View File

@@ -58,6 +58,7 @@ def create_slack_bot_persona(
name=persona_name,
description="",
num_chunks=num_chunks,
num_days=None,
llm_relevance_filter=True,
llm_filter_extraction=True,
recency_bias=RecencyBiasSetting.AUTO,

View File

@@ -249,6 +249,7 @@ class Answer:
if tool.name() == SearchTool.NAME:
self._update_prompt_builder_for_search_tool(prompt_builder, [])
elif tool.name() == ImageGenerationTool.NAME:
prompt_builder.update_user_prompt(
build_image_generation_user_prompt(

View File

@@ -1,4 +1,6 @@
from datetime import datetime
from datetime import timedelta
from datetime import timezone
from typing import Any
from pydantic import BaseModel
@@ -32,6 +34,49 @@ class BaseFilters(BaseModel):
time_cutoff: datetime | None = None
tags: list[Tag] | None = None
@classmethod
def from_instance(cls, base_filters: "BaseFilters | None") -> "BaseFilters | None":
if base_filters is None:
return None
else:
return cls(
source_type=base_filters.source_type,
document_set=base_filters.document_set,
time_cutoff=base_filters.time_cutoff,
tags=base_filters.tags,
)
@classmethod
def from_persona(cls, persona: Persona) -> "BaseFilters | None":
if persona.num_days is not None:
from_date = datetime.now(timezone.utc) - timedelta(days=persona.num_days)
return cls(time_cutoff=from_date)
else:
return None
# Merges two filters into one, giving preference to the human-specified retrieval filters
@classmethod
def merge(
cls,
retrieval_filter: "BaseFilters | None",
persona_filter: "BaseFilters | None",
) -> "BaseFilters | None":
if retrieval_filter is None and persona_filter is None:
return None
elif retrieval_filter is None:
return cls.from_instance(persona_filter)
elif persona_filter is None:
return cls.from_instance(retrieval_filter)
else:
return cls(
source_type=retrieval_filter.source_type,
document_set=retrieval_filter.document_set,
time_cutoff=retrieval_filter.time_cutoff
if retrieval_filter.time_cutoff is not None
else persona_filter.time_cutoff,
tags=retrieval_filter.tags,
)
class IndexFilters(BaseFilters):
access_control_list: list[str] | None

View File

@@ -19,6 +19,7 @@ class CreatePersonaRequest(BaseModel):
name: str
description: str
num_chunks: float
num_days: float
llm_relevance_filter: bool
is_public: bool
llm_filter_extraction: bool
@@ -44,6 +45,7 @@ class PersonaSnapshot(BaseModel):
display_priority: int | None
description: str
num_chunks: float | None
num_days: float | None
llm_relevance_filter: bool
llm_filter_extraction: bool
llm_model_provider_override: str | None
@@ -80,6 +82,7 @@ class PersonaSnapshot(BaseModel):
display_priority=persona.display_priority,
description=persona.description,
num_chunks=persona.num_chunks,
num_days=persona.num_days,
llm_relevance_filter=persona.llm_relevance_filter,
llm_filter_extraction=persona.llm_filter_extraction,
llm_model_provider_override=persona.llm_model_provider_override,

View File

@@ -20,6 +20,7 @@ from danswer.llm.answering.models import PromptConfig
from danswer.llm.interfaces import LLM
from danswer.search.enums import QueryFlow
from danswer.search.enums import SearchType
from danswer.search.models import BaseFilters
from danswer.search.models import IndexFilters
from danswer.search.models import InferenceSection
from danswer.search.models import RetrievalDetails
@@ -193,11 +194,16 @@ class SearchTool(Tool):
yield from self._build_response_for_specified_sections(query)
return
persona_filter = BaseFilters.from_persona(self.persona)
retrieval_filter = (
self.retrieval_options.filters if self.retrieval_options else None
)
search_pipeline = SearchPipeline(
search_request=SearchRequest(
query=query,
human_selected_filters=(
self.retrieval_options.filters if self.retrieval_options else None
human_selected_filters=BaseFilters.merge(
retrieval_filter=retrieval_filter, persona_filter=persona_filter
),
persona=self.persona,
offset=self.retrieval_options.offset

View File

@@ -176,6 +176,8 @@ export function AssistantEditor({
existingPersona?.document_sets?.map((documentSet) => documentSet.id) ??
([] as number[]),
num_chunks: existingPersona?.num_chunks ?? null,
recent_documents_enabled: existingPersona?.num_days != null,
num_days: existingPersona?.num_days ?? null,
include_citations: existingPersona?.prompts[0]?.include_citations ?? true,
llm_relevance_filter: existingPersona?.llm_relevance_filter ?? false,
llm_model_provider_override:
@@ -211,6 +213,8 @@ export function AssistantEditor({
is_public: Yup.boolean().required(),
document_set_ids: Yup.array().of(Yup.number()),
num_chunks: Yup.number().nullable(),
recent_documents_enabled: Yup.boolean().nullable(),
num_days: Yup.number().nullable(),
include_citations: Yup.boolean().required(),
llm_relevance_filter: Yup.boolean().required(),
llm_model_version_override: Yup.string().nullable(),
@@ -298,6 +302,10 @@ export function AssistantEditor({
// if disable_retrieval is set, set num_chunks to 0
// to tell the backend to not fetch any documents
const numChunks = searchToolEnabled ? values.num_chunks || 10 : 0;
const numDays = searchToolEnabled ? values.num_days || 30 : 0;
const useRecentDocuments = searchToolEnabled
? values.recent_documents_enabled
: false;
// don't set groups if marked as public
const groups = values.is_public ? [] : values.groups;
@@ -310,6 +318,7 @@ export function AssistantEditor({
existingPromptId: existingPrompt?.id,
...values,
num_chunks: numChunks,
num_days: numDays,
users:
user && !checkUserIsNoAuthUser(user.id) ? [user.id] : undefined,
groups,
@@ -319,6 +328,7 @@ export function AssistantEditor({
[promptResponse, personaResponse] = await createPersona({
...values,
num_chunks: numChunks,
num_days: numDays,
users:
user && !checkUserIsNoAuthUser(user.id) ? [user.id] : undefined,
groups,
@@ -580,6 +590,47 @@ export function AssistantEditor({
}
/>
<BooleanFormField
name={`recent_documents_enabled`}
label="Only Use Recent Documents"
subtext={
"If enabled, only recent documents will be used for context."
}
onChange={() => {
setFieldValue("num_days", null);
setFieldValue(
"recent_documents_enabled",
!values.recent_documents_enabled
);
}}
/>
{values.recent_documents_enabled && (
<div className="pl-4 border-l-2 ml-4 border-border">
<TextFormField
name="num_days"
label="Number of Days"
placeholder="Defaults to last 30 days."
subtext={
<div>
How recent should the documents be
that are used for context?
</div>
}
onChange={(e) => {
const value = e.target.value;
// Allow only integer values
if (
value === "" ||
/^[0-9]+$/.test(value)
) {
setFieldValue("num_days", value);
}
}}
/>
</div>
)}
<BooleanFormField
name="include_citations"
label="Include Citations"

View File

@@ -35,6 +35,8 @@ export interface Persona {
llm_model_provider_override?: string;
llm_model_version_override?: string;
starter_messages: StarterMessage[] | null;
use_recent_documents: boolean;
num_days: number | null;
default_persona: boolean;
users: MinimalUserSnapshot[];
groups: number[];

View File

@@ -7,6 +7,7 @@ interface PersonaCreationRequest {
task_prompt: string;
document_set_ids: number[];
num_chunks: number | null;
num_days: number | null;
include_citations: boolean;
is_public: boolean;
llm_relevance_filter: boolean | null;
@@ -27,6 +28,7 @@ interface PersonaUpdateRequest {
task_prompt: string;
document_set_ids: number[];
num_chunks: number | null;
num_days: number | null;
include_citations: boolean;
is_public: boolean;
llm_relevance_filter: boolean | null;
@@ -105,6 +107,7 @@ function buildPersonaAPIBody(
description,
document_set_ids,
num_chunks,
num_days,
llm_relevance_filter,
is_public,
groups,
@@ -116,6 +119,7 @@ function buildPersonaAPIBody(
name,
description,
num_chunks,
num_days,
llm_relevance_filter,
llm_filter_extraction: false,
is_public,