nit

Revert "quick fix"
This reverts commit 906b29bd9b.
2026-02-16 23:35:46 +00:00 · 2025-03-29 12:47:06 -07:00 · 2025-03-29 12:46:05 -07:00 · 2025-03-29 12:44:56 -07:00 · 2025-03-29 11:51:53 -07:00
2 changed files with 15 additions and 28 deletions
--- a/backend/onyx/indexing/chunker.py
+++ b/backend/onyx/indexing/chunker.py
@@ -224,27 +224,6 @@ class Chunker:
        )
        chunks_list.append(new_chunk)

-    def _chunk_document(
-        self,
-        document: IndexingDocument,
-        title_prefix: str,
-        metadata_suffix_semantic: str,
-        metadata_suffix_keyword: str,
-        content_token_limit: int,
-    ) -> list[DocAwareChunk]:
-        """
-        Legacy method for backward compatibility.
-        Calls _chunk_document_with_sections with document.sections.
-        """
-        return self._chunk_document_with_sections(
-            document,
-            document.processed_sections,
-            title_prefix,
-            metadata_suffix_semantic,
-            metadata_suffix_keyword,
-            content_token_limit,
-        )
-
    def _chunk_document_with_sections(
        self,
        document: IndexingDocument,
@@ -264,7 +243,7 @@ class Chunker:

        for section_idx, section in enumerate(sections):
            # Get section text and other attributes
-            section_text = clean_text(section.text or "")
+            section_text = clean_text(str(section.text or ""))
            section_link_text = section.link or ""
            image_url = section.image_file_name

--- a/backend/onyx/indexing/indexing_pipeline.py
+++ b/backend/onyx/indexing/indexing_pipeline.py
@@ -439,7 +439,7 @@ def process_image_sections(documents: list[Document]) -> list[IndexingDocument]:
                **document.dict(),
                processed_sections=[
                    Section(
-                        text=section.text if isinstance(section, TextSection) else None,
+                        text=section.text if isinstance(section, TextSection) else "",
                        link=section.link,
                        image_file_name=section.image_file_name
                        if isinstance(section, ImageSection)
@@ -459,11 +459,11 @@ def process_image_sections(documents: list[Document]) -> list[IndexingDocument]:
        for section in document.sections:
            # For ImageSection, process and create base Section with both text and image_file_name
            if isinstance(section, ImageSection):
-                # Default section with image path preserved
+                # Default section with image path preserved - ensure text is always a string
                processed_section = Section(
                    link=section.link,
                    image_file_name=section.image_file_name,
-                    text=None,  # Will be populated if summarization succeeds
+                    text="",  # Initialize with empty string
                )

                # Try to get image summary
@@ -506,13 +506,21 @@ def process_image_sections(documents: list[Document]) -> list[IndexingDocument]:
            # For TextSection, create a base Section with text and link
            elif isinstance(section, TextSection):
                processed_section = Section(
-                    text=section.text, link=section.link, image_file_name=None
+                    text=section.text or "",  # Ensure text is always a string, not None
+                    link=section.link,
+                    image_file_name=None,
                )
                processed_sections.append(processed_section)

-            # If it's already a base Section (unlikely), just append it
+            # If it's already a base Section (unlikely), just append it with text validation
            else:
-                processed_sections.append(section)
+                # Ensure text is always a string
+                processed_section = Section(
+                    text=section.text if section.text is not None else "",
+                    link=section.link,
+                    image_file_name=section.image_file_name,
+                )
+                processed_sections.append(processed_section)

        # Create IndexingDocument with original sections and processed_sections
        indexed_document = IndexingDocument(
Author	SHA1	Message	Date
pablonyx	a475f1b328	nit	2025-03-29 12:47:06 -07:00
pablonyx	e37bb2209e	Revert "quick fix" This reverts commit `906b29bd9b`.	2025-03-29 12:46:05 -07:00
pablonyx	5aba739aee	quick fix	2025-03-29 12:44:56 -07:00
pablonyx	906b29bd9b	quick fix	2025-03-29 11:51:53 -07:00