Compare commits

...

1 Commits

Author SHA1 Message Date
joachim-danswer
b3953b2c2f new profile & double yql 2025-04-11 14:06:03 -07:00
2 changed files with 175 additions and 60 deletions

View File

@@ -243,6 +243,73 @@ schema DANSWER_CHUNK_NAME {
}
}
rank-profile hybrid_search_kw_first_phaseVARIABLE_DIM inherits default, default_rank {
inputs {
query(query_embedding) tensor<float>(x[VARIABLE_DIM])
}
function title_vector_score() {
expression {
# If no good matching titles, then it should use the context embeddings rather than having some
# irrelevant title have a vector score of 1. This way at least it will be the doc with the highest
# matching content score getting the full score
max(closeness(field, embeddings), closeness(field, title_embedding))
}
}
# First phase must be vector to allow hits that have no keyword matches
first-phase {
expression: 0.2 * bm25(title) + 0.8 * bm25(content)
}
# Weighted average between Vector Search and BM-25
global-phase {
expression {
(
# Weighted Vector Similarity Score
(
query(alpha) * (
(query(title_content_ratio) * normalize_linear(title_vector_score))
+
((1 - query(title_content_ratio)) * normalize_linear(closeness(field, embeddings)))
)
)
+
# Weighted Keyword Similarity Score
# Note: for the BM25 Title score, it requires decent stopword removal in the query
# This needs to be the case so there aren't irrelevant titles being normalized to a score of 1
(
(1 - query(alpha)) * (
(query(title_content_ratio) * normalize_linear(bm25(title)))
+
((1 - query(title_content_ratio)) * normalize_linear(bm25(content)))
)
)
)
# Boost based on user feedback
* document_boost
# Decay factor based on time document was last updated
* recency_bias
# Boost based on aggregated boost calculation
* aggregated_chunk_boost
}
rerank-count: 1000
}
match-features {
bm25(title)
bm25(content)
closeness(field, title_embedding)
closeness(field, embeddings)
document_boost
recency_bias
aggregated_chunk_boost
closest(embeddings)
}
}
# Used when searching from the admin UI for a specific doc to hide / boost
# Very heavily prioritize title
rank-profile admin_search inherits default, default_rank {

View File

@@ -297,76 +297,124 @@ def query_vespa(
if "query" in query_params and not cast(str, query_params["query"]).strip():
raise ValueError("No/empty query received")
params = dict(
**query_params,
**(
{
"presentation.timing": True,
}
if LOG_VESPA_TIMING_INFORMATION
else {}
),
)
configured_ranking_profile = query_params.get("ranking.profile")
if not configured_ranking_profile:
raise ValueError("No ranking profile configured")
try:
with get_vespa_http_client() as http_client:
response = http_client.post(SEARCH_ENDPOINT, json=params)
response.raise_for_status()
except httpx.HTTPError as e:
error_base = "Failed to query Vespa"
logger.error(
f"{error_base}:\n"
f"Request URL: {e.request.url}\n"
f"Request Headers: {e.request.headers}\n"
f"Request Payload: {params}\n"
f"Exception: {str(e)}"
+ (
f"\nResponse: {e.response.text}"
if isinstance(e, httpx.HTTPStatusError)
else ""
)
query_profiles: list[float | int | str] = []
if (
configured_ranking_profile
and isinstance(configured_ranking_profile, str)
and configured_ranking_profile.startswith("hybrid_search")
):
dimension = configured_ranking_profile.split("hybrid_search")[1]
query_profiles = [
f"hybrid_search_kw_first_phase{dimension}",
f"hybrid_search{dimension}",
]
else:
query_profiles = [configured_ranking_profile]
inference_chunk_sets = []
mutable_params = dict(query_params)
for query_profile in query_profiles:
mutable_params["ranking.profile"] = query_profile
params = dict(
**mutable_params,
**(
{
"presentation.timing": True,
}
if LOG_VESPA_TIMING_INFORMATION
else {}
),
)
raise httpx.HTTPError(error_base) from e
response_json: dict[str, Any] = response.json()
if LOG_VESPA_TIMING_INFORMATION:
logger.debug("Vespa timing info: %s", response_json.get("timing"))
hits = response_json["root"].get("children", [])
if not hits:
logger.warning(
f"No hits found for YQL Query: {query_params.get('yql', 'No YQL Query')}"
)
logger.debug(f"Vespa Response: {response.text}")
for hit in hits:
if hit["fields"].get(CONTENT) is None:
identifier = hit["fields"].get("documentid") or hit["id"]
try:
with get_vespa_http_client() as http_client:
response = http_client.post(SEARCH_ENDPOINT, json=params)
response.raise_for_status()
except httpx.HTTPError as e:
error_base = "Failed to query Vespa"
logger.error(
f"Vespa Index with Vespa ID {identifier} has no contents. "
f"This is invalid because the vector is not meaningful and keywordsearch cannot "
f"fetch this document"
f"{error_base}:\n"
f"Request URL: {e.request.url}\n"
f"Request Headers: {e.request.headers}\n"
f"Request Payload: {params}\n"
f"Exception: {str(e)}"
+ (
f"\nResponse: {e.response.text}"
if isinstance(e, httpx.HTTPStatusError)
else ""
)
)
raise httpx.HTTPError(error_base) from e
filtered_hits = [hit for hit in hits if hit["fields"].get(CONTENT) is not None]
response_json: dict[str, Any] = response.json()
inference_chunks = [_vespa_hit_to_inference_chunk(hit) for hit in filtered_hits]
if LOG_VESPA_TIMING_INFORMATION:
logger.debug("Vespa timing info: %s", response_json.get("timing"))
hits = response_json["root"].get("children", [])
try:
num_retrieved_inference_chunks = len(inference_chunks)
num_retrieved_document_ids = len(
set([chunk.document_id for chunk in inference_chunks])
)
logger.debug(
f"Retrieved {num_retrieved_inference_chunks} inference chunks for {num_retrieved_document_ids} documents"
)
except Exception as e:
# Debug logging only, should not fail the retrieval
logger.error(f"Error logging retrieval statistics: {e}")
if not hits:
logger.warning(
f"No hits found for YQL Query: {query_params.get('yql', 'No YQL Query')}"
)
logger.debug(f"Vespa Response: {response.text}")
for hit in hits:
if hit["fields"].get(CONTENT) is None:
identifier = hit["fields"].get("documentid") or hit["id"]
logger.error(
f"Vespa Index with Vespa ID {identifier} has no contents. "
f"This is invalid because the vector is not meaningful and keywordsearch cannot "
f"fetch this document"
)
filtered_hits = [hit for hit in hits if hit["fields"].get(CONTENT) is not None]
inference_chunks = [_vespa_hit_to_inference_chunk(hit) for hit in filtered_hits]
try:
num_retrieved_inference_chunks = len(inference_chunks)
num_retrieved_document_ids = len(
set([chunk.document_id for chunk in inference_chunks])
)
logger.debug(
f"Retrieved {num_retrieved_inference_chunks} inference chunks for {num_retrieved_document_ids} documents"
)
except Exception as e:
# Debug logging only, should not fail the retrieval
logger.error(f"Error logging retrieval statistics: {e}")
inference_chunk_sets.append(inference_chunks)
flattened_inference_chunks = []
for inference_chunk_set in inference_chunk_sets:
flattened_inference_chunks.extend(inference_chunk_set)
flattened_inference_chunks.sort(key=lambda chunk: chunk.score, reverse=True)
final_chunks = []
used_document_chunk_ids = set()
for chunk in flattened_inference_chunks:
if (
chunk.document_id + "__" + str(chunk.chunk_id)
not in used_document_chunk_ids
):
final_chunks.append(chunk)
used_document_chunk_ids.add(chunk.document_id + "__" + str(chunk.chunk_id))
else:
continue
return final_chunks
# Good Debugging Spot
return inference_chunks
return flattened_inference_chunks
def _get_chunks_via_batch_search(