Compare commits

...

2 Commits

Author SHA1 Message Date
Dominic Feliton
eb4ea6eedf precommit 2025-10-30 10:38:51 -07:00
Dominic Feliton
67a64fd0ee fix handling of trailing slashes with semantic identifiers 2025-10-29 23:51:05 -07:00

View File

@@ -535,7 +535,8 @@ class WebConnector(LoadConnector):
id=initial_url,
sections=[TextSection(link=initial_url, text=page_text)],
source=DocumentSource.WEB,
semantic_identifier=initial_url.split("/")[-1],
semantic_identifier=initial_url.rstrip("/").split("/")[-1]
or initial_url,
metadata=metadata,
doc_updated_at=(
_get_datetime_from_last_modified_header(last_modified)