Compare commits

...

1 Commits

Author SHA1 Message Date
Richard Kuo (Danswer)
c1a4eea2d0 add timings for syncing 2025-01-27 11:54:17 -08:00

View File

@@ -1092,30 +1092,42 @@ def monitor_vespa_sync(self: Task, tenant_id: str | None) -> bool | None:
def vespa_metadata_sync_task(
self: Task, document_id: str, tenant_id: str | None
) -> bool:
timings: dict[str, Any] = {}
start = time.monotonic()
timings["start"] = start
try:
with get_session_with_tenant(tenant_id) as db_session:
phase_start = time.monotonic()
curr_ind_name, sec_ind_name = get_both_index_names(db_session)
doc_index = get_default_document_index(
primary_index_name=curr_ind_name, secondary_index_name=sec_ind_name
)
timings["get_index"] = time.monotonic() - phase_start
phase_start = time.monotonic()
retry_index = RetryDocumentIndex(doc_index)
doc = get_document(document_id, db_session)
if not doc:
return False
timings["get_document"] = time.monotonic() - phase_start
# document set sync
phase_start = time.monotonic()
doc_sets = fetch_document_sets_for_document(document_id, db_session)
update_doc_sets: set[str] = set(doc_sets)
timings["fetch_document_sets_for_document"] = time.monotonic() - phase_start
# User group sync
phase_start = time.monotonic()
doc_access = get_access_for_document(
document_id=document_id, db_session=db_session
)
timings["get_access_for_document"] = time.monotonic() - phase_start
phase_start = time.monotonic()
fields = VespaDocumentFields(
document_sets=update_doc_sets,
access=doc_access,
@@ -1130,10 +1142,13 @@ def vespa_metadata_sync_task(
chunk_count=doc.chunk_count,
fields=fields,
)
timings["index.update_single"] = time.monotonic() - phase_start
# update db last. Worst case = we crash right before this and
# the sync might repeat again later
phase_start = time.monotonic()
mark_document_as_synced(document_id, db_session)
timings["mark_document_as_synced"] = time.monotonic() - phase_start
# this code checks for and removes a per document sync key that is
# used to block out the same doc from continualy resyncing
@@ -1151,6 +1166,8 @@ def vespa_metadata_sync_task(
f"chunks={chunks_affected} "
f"elapsed={elapsed:.2f}"
)
task_logger.debug(f"doc={document_id} timings={timings}")
except SoftTimeLimitExceeded:
task_logger.info(f"SoftTimeLimitExceeded exception. doc={document_id}")
except Exception as ex: