Compare commits

...

1 Commits

Author SHA1 Message Date
Onyx Trialee 2
b99b5ec31b feat: View permissioning information for documents in documents explorer 2025-10-30 19:10:31 -07:00
10 changed files with 88 additions and 41 deletions

View File

@@ -257,6 +257,7 @@ class InferenceChunk(BaseChunk):
large_chunk_reference_ids: list[int] = Field(default_factory=list)
is_federated: bool = False
access_control_list: list[str] | None = None
@property
def unique_id(self) -> str:
@@ -356,6 +357,7 @@ class SearchDoc(BaseModel):
primary_owners: list[str] | None = None
secondary_owners: list[str] | None = None
is_internet: bool = False
access_control_list: list[str] | None = None
@classmethod
def from_chunks_or_sections(
@@ -389,6 +391,7 @@ class SearchDoc(BaseModel):
primary_owners=chunk.primary_owners,
secondary_owners=chunk.secondary_owners,
is_internet=False,
access_control_list=chunk.access_control_list,
)
for item in items
]
@@ -450,6 +453,7 @@ class SavedSearchDoc(SearchDoc):
primary_owners=None,
secondary_owners=None,
is_internet=True,
access_control_list=[],
)
def __lt__(self, other: Any) -> bool:

View File

@@ -12,6 +12,7 @@ from retry import retry
from onyx.configs.app_configs import LOG_VESPA_TIMING_INFORMATION
from onyx.configs.app_configs import VESPA_LANGUAGE_OVERRIDE
from onyx.configs.constants import SOURCE_TYPE
from onyx.context.search.models import IndexFilters
from onyx.context.search.models import InferenceChunkUncleaned
from onyx.document_index.interfaces import VespaChunkRequest
@@ -22,6 +23,7 @@ from onyx.document_index.vespa.shared_utils.vespa_request_builders import (
from onyx.document_index.vespa.shared_utils.vespa_request_builders import (
build_vespa_id_based_retrieval_yql,
)
from onyx.document_index.vespa.shared_utils.vespa_request_builders import build_yql_base
from onyx.document_index.vespa_constants import ACCESS_CONTROL_LIST
from onyx.document_index.vespa_constants import BLURB
from onyx.document_index.vespa_constants import BOOST
@@ -47,10 +49,8 @@ from onyx.document_index.vespa_constants import SECONDARY_OWNERS
from onyx.document_index.vespa_constants import SECTION_CONTINUATION
from onyx.document_index.vespa_constants import SEMANTIC_IDENTIFIER
from onyx.document_index.vespa_constants import SOURCE_LINKS
from onyx.document_index.vespa_constants import SOURCE_TYPE
from onyx.document_index.vespa_constants import TENANT_ID
from onyx.document_index.vespa_constants import TITLE
from onyx.document_index.vespa_constants import YQL_BASE
from onyx.utils.logger import setup_logger
from onyx.utils.threadpool_concurrency import run_functions_tuples_in_parallel
from shared_configs.configs import MULTI_TENANT
@@ -128,6 +128,12 @@ def _vespa_hit_to_inference_chunk(
for k, v in cast(dict[str, str], source_links_dict_unprocessed).items()
}
acl_field = fields.get(ACCESS_CONTROL_LIST)
if isinstance(acl_field, dict):
acl_list = [str(key) for key in acl_field.keys()]
else:
acl_list = []
return InferenceChunkUncleaned(
chunk_id=fields[CHUNK_ID],
blurb=fields.get(BLURB, ""), # Unused
@@ -154,6 +160,7 @@ def _vespa_hit_to_inference_chunk(
chunk_context=fields.get(CHUNK_CONTEXT, ""),
match_highlights=match_highlights,
updated_at=updated_at,
access_control_list=acl_list,
)
@@ -415,7 +422,7 @@ def _get_chunks_via_batch_search(
filters_str = build_vespa_filters(filters=filters, include_hidden=True)
yql = (
YQL_BASE.format(index_name=index_name)
build_yql_base(index_name=index_name, include_acl=False)
+ filters_str
+ build_vespa_id_based_retrieval_yql(chunk_requests[0])
)

View File

@@ -60,6 +60,7 @@ from onyx.document_index.vespa.shared_utils.utils import (
from onyx.document_index.vespa.shared_utils.vespa_request_builders import (
build_vespa_filters,
)
from onyx.document_index.vespa.shared_utils.vespa_request_builders import build_yql_base
from onyx.document_index.vespa_constants import ACCESS_CONTROL_LIST
from onyx.document_index.vespa_constants import BATCH_SIZE
from onyx.document_index.vespa_constants import BOOST
@@ -72,7 +73,6 @@ from onyx.document_index.vespa_constants import NUM_THREADS
from onyx.document_index.vespa_constants import USER_PROJECT
from onyx.document_index.vespa_constants import VESPA_APPLICATION_ENDPOINT
from onyx.document_index.vespa_constants import VESPA_TIMEOUT
from onyx.document_index.vespa_constants import YQL_BASE
from onyx.indexing.models import DocMetadataAwareIndexChunk
from onyx.key_value_store.factory import get_shared_kv_store
from onyx.kg.utils.formatting_utils import split_relationship_id
@@ -951,7 +951,7 @@ class VespaIndex(DocumentIndex):
target_hits = max(10 * num_to_retrieve, 1000)
yql = (
YQL_BASE.format(index_name=self.index_name)
build_yql_base(index_name=self.index_name, include_acl=False)
+ vespa_where_clauses
+ f"(({{targetHits: {target_hits}}}nearestNeighbor(embeddings, query_embedding)) "
+ f"or ({{targetHits: {target_hits}}}nearestNeighbor(title_embedding, query_embedding)) "
@@ -998,7 +998,7 @@ class VespaIndex(DocumentIndex):
) -> list[InferenceChunkUncleaned]:
vespa_where_clauses = build_vespa_filters(filters, include_hidden=True)
yql = (
YQL_BASE.format(index_name=self.index_name)
build_yql_base(index_name=self.index_name, include_acl=True)
+ vespa_where_clauses
+ '({grammar: "weakAnd"}userInput(@query) '
# `({defaultIndex: "content_summary"}userInput(@query))` section is
@@ -1232,7 +1232,10 @@ class VespaIndex(DocumentIndex):
"""
vespa_where_clauses = build_vespa_filters(filters, remove_trailing_and=True)
yql = YQL_BASE.format(index_name=self.index_name) + vespa_where_clauses
yql = (
build_yql_base(index_name=self.index_name, include_acl=True)
+ vespa_where_clauses
)
random_seed = random.randint(0, 1000000)

View File

@@ -11,6 +11,7 @@ from http import HTTPStatus
import httpx
from retry import retry
from onyx.configs.constants import SOURCE_TYPE
from onyx.connectors.cross_connector_utils.miscellaneous_utils import (
get_experts_stores_representations,
)
@@ -47,7 +48,6 @@ from onyx.document_index.vespa_constants import SECTION_CONTINUATION
from onyx.document_index.vespa_constants import SEMANTIC_IDENTIFIER
from onyx.document_index.vespa_constants import SKIP_TITLE_EMBEDDING
from onyx.document_index.vespa_constants import SOURCE_LINKS
from onyx.document_index.vespa_constants import SOURCE_TYPE
from onyx.document_index.vespa_constants import TENANT_ID
from onyx.document_index.vespa_constants import TITLE
from onyx.document_index.vespa_constants import TITLE_EMBEDDING

View File

@@ -3,17 +3,34 @@ from datetime import timedelta
from datetime import timezone
from onyx.configs.constants import INDEX_SEPARATOR
from onyx.configs.constants import SOURCE_TYPE
from onyx.context.search.models import IndexFilters
from onyx.document_index.interfaces import VespaChunkRequest
from onyx.document_index.vespa.deletion import CONTENT_SUMMARY
from onyx.document_index.vespa_constants import ACCESS_CONTROL_LIST
from onyx.document_index.vespa_constants import AGGREGATED_CHUNK_BOOST_FACTOR
from onyx.document_index.vespa_constants import BLURB
from onyx.document_index.vespa_constants import BOOST
from onyx.document_index.vespa_constants import CHUNK_CONTEXT
from onyx.document_index.vespa_constants import CHUNK_ID
from onyx.document_index.vespa_constants import CONTENT
from onyx.document_index.vespa_constants import DOC_SUMMARY
from onyx.document_index.vespa_constants import DOC_UPDATED_AT
from onyx.document_index.vespa_constants import DOCUMENT_ID
from onyx.document_index.vespa_constants import DOCUMENT_SETS
from onyx.document_index.vespa_constants import HIDDEN
from onyx.document_index.vespa_constants import IMAGE_FILE_NAME
from onyx.document_index.vespa_constants import LARGE_CHUNK_REFERENCE_IDS
from onyx.document_index.vespa_constants import METADATA
from onyx.document_index.vespa_constants import METADATA_LIST
from onyx.document_index.vespa_constants import SOURCE_TYPE
from onyx.document_index.vespa_constants import METADATA_SUFFIX
from onyx.document_index.vespa_constants import PRIMARY_OWNERS
from onyx.document_index.vespa_constants import SECONDARY_OWNERS
from onyx.document_index.vespa_constants import SECTION_CONTINUATION
from onyx.document_index.vespa_constants import SEMANTIC_IDENTIFIER
from onyx.document_index.vespa_constants import SOURCE_LINKS
from onyx.document_index.vespa_constants import TENANT_ID
from onyx.document_index.vespa_constants import TITLE
from onyx.document_index.vespa_constants import USER_PROJECT
from onyx.kg.utils.formatting_utils import split_relationship_id
from onyx.utils.logger import setup_logger
@@ -232,3 +249,36 @@ def build_vespa_id_based_retrieval_yql(
id_based_retrieval_yql_section += ")"
return id_based_retrieval_yql_section
def build_yql_base(index_name: str, include_acl: bool = False) -> str:
yql_base = (
f"select "
f"documentid, "
f"{DOCUMENT_ID}, "
f"{CHUNK_ID}, "
f"{BLURB}, "
f"{CONTENT}, "
f"{SOURCE_TYPE}, "
f"{SOURCE_LINKS}, "
f"{SEMANTIC_IDENTIFIER}, "
f"{TITLE}, "
f"{SECTION_CONTINUATION}, "
f"{IMAGE_FILE_NAME}, "
f"{BOOST}, "
f"{AGGREGATED_CHUNK_BOOST_FACTOR}, "
f"{HIDDEN}, "
f"{DOC_UPDATED_AT}, "
f"{PRIMARY_OWNERS}, "
f"{SECONDARY_OWNERS}, "
f"{LARGE_CHUNK_REFERENCE_IDS}, "
f"{METADATA}, "
f"{METADATA_SUFFIX}, "
f"{DOC_SUMMARY}, "
f"{CHUNK_CONTEXT}, "
f"{CONTENT_SUMMARY} "
)
if include_acl:
yql_base += f", {ACCESS_CONTROL_LIST} "
yql_base += f"from {index_name} where "
return yql_base

View File

@@ -3,7 +3,6 @@ from onyx.configs.app_configs import VESPA_CONFIG_SERVER_HOST
from onyx.configs.app_configs import VESPA_HOST
from onyx.configs.app_configs import VESPA_PORT
from onyx.configs.app_configs import VESPA_TENANT_PORT
from onyx.configs.constants import SOURCE_TYPE
# config server
@@ -74,32 +73,3 @@ IMAGE_FILE_NAME = "image_file_name"
# Specific to Vespa, needed for highlighting matching keywords / section
CONTENT_SUMMARY = "content_summary"
YQL_BASE = (
f"select "
f"documentid, "
f"{DOCUMENT_ID}, "
f"{CHUNK_ID}, "
f"{BLURB}, "
f"{CONTENT}, "
f"{SOURCE_TYPE}, "
f"{SOURCE_LINKS}, "
f"{SEMANTIC_IDENTIFIER}, "
f"{TITLE}, "
f"{SECTION_CONTINUATION}, "
f"{IMAGE_FILE_NAME}, "
f"{BOOST}, "
f"{AGGREGATED_CHUNK_BOOST_FACTOR}, "
f"{HIDDEN}, "
f"{DOC_UPDATED_AT}, "
f"{PRIMARY_OWNERS}, "
f"{SECONDARY_OWNERS}, "
f"{LARGE_CHUNK_REFERENCE_IDS}, "
f"{METADATA}, "
f"{METADATA_SUFFIX}, "
f"{DOC_SUMMARY}, "
f"{CHUNK_CONTEXT}, "
f"{CONTENT_SUMMARY} "
f"from {{index_name}} where "
)

View File

@@ -43,6 +43,7 @@ from pydantic import BaseModel
from sqlalchemy import and_
from onyx.configs.constants import INDEX_SEPARATOR
from onyx.configs.constants import SOURCE_TYPE
from onyx.context.search.models import IndexFilters
from onyx.context.search.models import SearchRequest
from onyx.db.engine.sql_engine import get_session_with_current_tenant
@@ -63,7 +64,6 @@ from onyx.document_index.vespa_constants import DOCUMENT_SETS
from onyx.document_index.vespa_constants import HIDDEN
from onyx.document_index.vespa_constants import METADATA_LIST
from onyx.document_index.vespa_constants import SEARCH_ENDPOINT
from onyx.document_index.vespa_constants import SOURCE_TYPE
from onyx.document_index.vespa_constants import VESPA_APP_CONTAINER_URL
from onyx.document_index.vespa_constants import VESPA_APPLICATION_ENDPOINT
from onyx.utils.logger import setup_logger

View File

@@ -5,6 +5,7 @@ from uuid import UUID
from onyx.configs.constants import DocumentSource
from onyx.configs.constants import INDEX_SEPARATOR
from onyx.configs.constants import SOURCE_TYPE
from onyx.context.search.models import IndexFilters
from onyx.context.search.models import Tag
from onyx.document_index.vespa.shared_utils.vespa_request_builders import (
@@ -15,7 +16,6 @@ from onyx.document_index.vespa_constants import DOCUMENT_ID
from onyx.document_index.vespa_constants import DOCUMENT_SETS
from onyx.document_index.vespa_constants import HIDDEN
from onyx.document_index.vespa_constants import METADATA_LIST
from onyx.document_index.vespa_constants import SOURCE_TYPE
from onyx.document_index.vespa_constants import TENANT_ID
from onyx.document_index.vespa_constants import USER_PROJECT
from shared_configs.configs import MULTI_TENANT

View File

@@ -30,6 +30,8 @@ const DocumentDisplay = ({
refresh: () => void;
setPopup: (popupSpec: PopupSpec | null) => void;
}) => {
const acl = document.access_control_list;
const aclStr = acl && acl.length ? acl.join(", ") : "None";
return (
<div
key={document.document_id}
@@ -92,6 +94,16 @@ const DocumentDisplay = ({
<CustomCheckbox checked={!document.hidden} />
</div>
</div>
<div className="px-1 py-0.5 bg-accent-background-hovered rounded flex items-center gap-1">
<span>Permissions:</span>
<span
className="truncate max-w-[220px] whitespace-nowrap overflow-hidden"
title={aclStr}
>
{aclStr}
</span>
</div>
</div>
{document.updated_at && (
<div className="mt-2">

View File

@@ -71,6 +71,7 @@ export interface OnyxDocument extends MinimalOnyxDocument {
db_doc_id?: number;
is_internet: boolean;
validationState?: null | "good" | "bad";
access_control_list: string[] | null;
}
export interface LoadedOnyxDocument extends OnyxDocument {