Compare commits

...

3 Commits

Author SHA1 Message Date
Rei Meguro
74e1ade431 feat: slack parent update in indexing 2025-06-12 15:13:45 -07:00
Rei Meguro
97bc50ed2e feat: simplified migration tables for now 2025-06-12 14:12:28 -07:00
Rei Meguro
eea2bf18d3 feat: migrations 2025-06-12 10:36:20 -07:00
8 changed files with 72 additions and 1 deletions

View File

@@ -0,0 +1,60 @@
"""perm-sync-utils
Revision ID: ae6aaba063eb
Revises: cec7ec36c505
Create Date: 2025-06-11 22:18:31.061655
"""
from alembic import op
import sqlalchemy as sa
# revision identifiers, used by Alembic.
revision = "ae6aaba063eb"
down_revision = "cec7ec36c505"
branch_labels = None
depends_on = None
def upgrade() -> None:
op.add_column(
"document",
sa.Column("last_perm_synced", sa.DateTime(timezone=True), nullable=True),
)
op.create_index(
op.f("ix_document_last_perm_synced"),
"document",
["last_perm_synced"],
unique=False,
)
# set parent to channel name for slack docs
op.add_column(
"document",
sa.Column("parent", sa.String(), nullable=True),
)
op.execute(
"""
WITH slack_docs AS(
SELECT cc_pair.id
FROM document_by_connector_credential_pair AS cc_pair
JOIN connector ON cc_pair.connector_id = connector.id
WHERE connector.source = 'SLACK'
)
UPDATE document
SET parent = tag.tag_value
FROM slack_docs
JOIN document__tag ON slack_docs.id = document__tag.document_id
JOIN tag ON document__tag.tag_id = tag.id
WHERE
document.id = slack_docs.id AND
tag.tag_key = 'Channel'
"""
)
def downgrade() -> None:
op.drop_index(op.f("ix_document_last_perm_synced"), table_name="document")
op.drop_column("document", "last_perm_synced")
op.drop_column("document", "parent")

View File

@@ -98,6 +98,7 @@ def upsert_document_external_perms(
document.external_user_group_ids = list(prefixed_external_groups)
document.is_public = external_access.is_public
document.last_modified = datetime.now(timezone.utc)
document.last_perm_synced = datetime.now(timezone.utc)
db_session.commit()
return False

View File

@@ -145,6 +145,7 @@ class DocumentBase(BaseModel):
primary_owners: list[BasicExpertInfo] | None = None
# Assignee, space owner, etc.
secondary_owners: list[BasicExpertInfo] | None = None
parent: str | None = None
# title is used for search whereas semantic_identifier is used for displaying in the UI
# different because Slack message may display as #general but general should not be part
# of the search, at least not in the same way as a document title should be for like Confluence
@@ -204,6 +205,7 @@ class DocumentBase(BaseModel):
else:
size += sys.getsizeof(self.secondary_owners)
size += sys.getsizeof(self.parent)
size += sys.getsizeof(self.title)
size += sys.getsizeof(self.from_ingestion_api)
size += sys.getsizeof(self.additional_info)

View File

@@ -245,6 +245,7 @@ def thread_to_doc(
semantic_identifier=doc_sem_id,
doc_updated_at=get_latest_message_time(thread),
primary_owners=valid_experts,
parent=channel["name"],
metadata={"Channel": channel["name"]},
external_access=channel_access,
)

View File

@@ -406,6 +406,7 @@ def upsert_documents(
last_modified=datetime.now(timezone.utc),
primary_owners=doc.primary_owners,
secondary_owners=doc.secondary_owners,
parent=doc.parent,
kg_stage=KGStage.NOT_STARTED,
**(
{

View File

@@ -577,7 +577,7 @@ class Document(Base):
)
# Permission sync columns
# Email addresses are saved at the document level for externally synced permissions
# This is becuase the normal flow of assigning permissions is through the cc_pair
# This is because the normal flow of assigning permissions is through the cc_pair
# doesn't apply here
external_user_emails: Mapped[list[str] | None] = mapped_column(
postgresql.ARRAY(String), nullable=True
@@ -587,6 +587,10 @@ class Document(Base):
postgresql.ARRAY(String), nullable=True
)
is_public: Mapped[bool] = mapped_column(Boolean, default=False)
last_perm_synced: Mapped[datetime.datetime | None] = mapped_column(
DateTime(timezone=True), nullable=True, index=True
)
parent: Mapped[str | None] = mapped_column(NullFilteredString, nullable=True)
# tables for the knowledge graph data
kg_stage: Mapped[KGStage] = mapped_column(

View File

@@ -88,6 +88,7 @@ class DocumentMetadata:
# Users may not be in Onyx
primary_owners: list[str] | None = None
secondary_owners: list[str] | None = None
parent: str | None = None
from_ingestion_api: bool = False
external_access: ExternalAccess | None = None

View File

@@ -143,6 +143,7 @@ def _upsert_documents_in_db(
first_link=first_link,
primary_owners=get_experts_stores_representations(doc.primary_owners),
secondary_owners=get_experts_stores_representations(doc.secondary_owners),
parent=doc.parent,
from_ingestion_api=doc.from_ingestion_api,
external_access=doc.external_access,
)