Compare commits

...

16 Commits

Author SHA1 Message Date
pablodanswer
884f9624a3 update 2024-12-19 14:22:13 -08:00
pablodanswer
e2b0924077 = -> == 2024-12-19 13:43:38 -08:00
pablodanswer
5650e88ecb cosmetic improvements 2024-12-18 10:48:17 -08:00
pablodanswer
1df3fb5f62 quick nit 2024-12-18 10:10:16 -08:00
pablodanswer
5f7340ebcf import update 2024-12-18 10:09:18 -08:00
pablodanswer
99dc19ee88 update values 2024-12-18 10:09:18 -08:00
pablodanswer
b64e96a582 update requirements 2024-12-18 10:09:18 -08:00
pablodanswer
f7ce933759 minor nit 2024-12-18 10:09:18 -08:00
pablodanswer
2b3c409081 quick cleanup 2024-12-18 10:09:18 -08:00
pablodanswer
be1d5426e3 minor clean up 2024-12-18 10:09:18 -08:00
pablodanswer
2c9c03b97b clean up 2024-12-18 10:09:18 -08:00
pablodanswer
1516a43b10 gdrive parsing updates 2024-12-18 10:09:18 -08:00
pablodanswer
5b4f8848d6 quick clean up 2024-12-18 10:09:18 -08:00
pablodanswer
cff48f813a quick cleanup 2024-12-18 10:09:18 -08:00
pablodanswer
64ca568feb migrate to markitdown 2024-12-18 10:09:18 -08:00
pablodanswer
298aca158d minor updates 2024-12-18 10:09:18 -08:00
6 changed files with 341 additions and 208 deletions

View File

@@ -4,6 +4,7 @@ from datetime import timezone
from googleapiclient.discovery import build # type: ignore
from googleapiclient.errors import HttpError # type: ignore
from markitdown import MarkItDown # type: ignore
from onyx.configs.app_configs import CONTINUE_ON_CONNECTOR_FAILURE
from onyx.configs.constants import DocumentSource
@@ -26,8 +27,8 @@ from onyx.file_processing.unstructured import get_unstructured_api_key
from onyx.file_processing.unstructured import unstructured_to_text
from onyx.utils.logger import setup_logger
logger = setup_logger()
logger = setup_logger()
# these errors don't represent a failure in the connector, but simply files
# that can't / shouldn't be indexed
@@ -38,177 +39,41 @@ ERRORS_TO_CONTINUE_ON = [
]
def _extract_sections_basic(
file: dict[str, str], service: GoogleDriveService
) -> list[Section]:
mime_type = file["mimeType"]
link = file["webViewLink"]
if mime_type not in set(item.value for item in GDriveMimeType):
# Unsupported file types can still have a title, finding this way is still useful
return [Section(link=link, text=UNSUPPORTED_FILE_TYPE_CONTENT)]
try:
if mime_type == GDriveMimeType.SPREADSHEET.value:
try:
sheets_service = build(
"sheets", "v4", credentials=service._http.credentials
)
spreadsheet = (
sheets_service.spreadsheets()
.get(spreadsheetId=file["id"])
.execute()
)
sections = []
for sheet in spreadsheet["sheets"]:
sheet_name = sheet["properties"]["title"]
sheet_id = sheet["properties"]["sheetId"]
# Get sheet dimensions
grid_properties = sheet["properties"].get("gridProperties", {})
row_count = grid_properties.get("rowCount", 1000)
column_count = grid_properties.get("columnCount", 26)
# Convert column count to letter (e.g., 26 -> Z, 27 -> AA)
end_column = ""
while column_count:
column_count, remainder = divmod(column_count - 1, 26)
end_column = chr(65 + remainder) + end_column
range_name = f"'{sheet_name}'!A1:{end_column}{row_count}"
try:
result = (
sheets_service.spreadsheets()
.values()
.get(spreadsheetId=file["id"], range=range_name)
.execute()
)
values = result.get("values", [])
if values:
text = f"Sheet: {sheet_name}\n"
for row in values:
text += "\t".join(str(cell) for cell in row) + "\n"
sections.append(
Section(
link=f"{link}#gid={sheet_id}",
text=text,
)
)
except HttpError as e:
logger.warning(
f"Error fetching data for sheet '{sheet_name}': {e}"
)
continue
return sections
except Exception as e:
logger.warning(
f"Ran into exception '{e}' when pulling data from Google Sheet '{file['name']}'."
" Falling back to basic extraction."
)
if mime_type in [
GDriveMimeType.DOC.value,
GDriveMimeType.PPT.value,
GDriveMimeType.SPREADSHEET.value,
]:
export_mime_type = (
"text/plain"
if mime_type != GDriveMimeType.SPREADSHEET.value
else "text/csv"
)
text = (
service.files()
.export(fileId=file["id"], mimeType=export_mime_type)
.execute()
.decode("utf-8")
)
return [Section(link=link, text=text)]
elif mime_type in [
GDriveMimeType.PLAIN_TEXT.value,
GDriveMimeType.MARKDOWN.value,
]:
return [
Section(
link=link,
text=service.files()
.get_media(fileId=file["id"])
.execute()
.decode("utf-8"),
)
]
if mime_type in [
GDriveMimeType.WORD_DOC.value,
GDriveMimeType.POWERPOINT.value,
GDriveMimeType.PDF.value,
]:
response = service.files().get_media(fileId=file["id"]).execute()
if get_unstructured_api_key():
return [
Section(
link=link,
text=unstructured_to_text(
file=io.BytesIO(response),
file_name=file.get("name", file["id"]),
),
)
]
if mime_type == GDriveMimeType.WORD_DOC.value:
return [
Section(link=link, text=docx_to_text(file=io.BytesIO(response)))
]
elif mime_type == GDriveMimeType.PDF.value:
text, _ = read_pdf_file(file=io.BytesIO(response))
return [Section(link=link, text=text)]
elif mime_type == GDriveMimeType.POWERPOINT.value:
return [
Section(link=link, text=pptx_to_text(file=io.BytesIO(response)))
]
return [Section(link=link, text=UNSUPPORTED_FILE_TYPE_CONTENT)]
except Exception:
return [Section(link=link, text=UNSUPPORTED_FILE_TYPE_CONTENT)]
def convert_drive_item_to_document(
file: GoogleDriveFileType,
drive_service: GoogleDriveService,
docs_service: GoogleDocsService,
) -> Document | None:
"""
Converts a Google Drive file into an internal Document object, extracting
the text and organizing it into sections. Uses specialized methods for Google Docs
to preserve structure. Falls back to basic extraction for all other formats.
"""
try:
# Skip files that are shortcuts
# Skip shortcuts and folders
if file.get("mimeType") == DRIVE_SHORTCUT_TYPE:
logger.info("Ignoring Drive Shortcut Filetype")
return None
# Skip files that are folders
if file.get("mimeType") == DRIVE_FOLDER_TYPE:
logger.info("Ignoring Drive Folder Filetype")
return None
sections: list[Section] = []
# Special handling for Google Docs to preserve structure, link
# to headers
# Special handling for Google Docs to preserve structure
if file.get("mimeType") == GDriveMimeType.DOC.value:
try:
sections = get_document_sections(docs_service, file["id"])
except Exception as e:
logger.warning(
f"Ran into exception '{e}' when pulling sections from Google Doc '{file['name']}'."
" Falling back to basic extraction."
f"Exception '{e}' when pulling sections from Google Doc '{file['name']}'. "
"Falling back to basic extraction."
)
# NOTE: this will run for either (1) the above failed or (2) the file is not a Google Doc
# If not a GDoc or GDoc extraction failed
if not sections:
try:
# For all other file types just extract the text
sections = _extract_sections_basic(file, drive_service)
except HttpError as e:
reason = e.error_details[0]["reason"] if e.error_details else e.reason
message = e.error_details[0]["message"] if e.error_details else e.reason
@@ -217,8 +82,8 @@ def convert_drive_item_to_document(
f"Could not export file '{file['name']}' due to '{message}', skipping..."
)
return None
raise
if not sections:
return None
@@ -238,9 +103,248 @@ def convert_drive_item_to_document(
except Exception as e:
if not CONTINUE_ON_CONNECTOR_FAILURE:
raise e
logger.exception("Ran into exception when pulling a file from Google Drive")
return None
return None
def _extract_sections_basic(
file: GoogleDriveFileType, service: GoogleDriveService
) -> list[Section]:
"""
Extracts text from a Google Drive file based on its MIME type.
"""
mime_type = file["mimeType"]
link = file["webViewLink"]
# Handle unsupported MIME types
if mime_type not in {item.value for item in GDriveMimeType}:
logger.debug(
f"Unsupported MIME type '{mime_type}' for file '{file.get('name')}'"
)
return [Section(link=link, text=UNSUPPORTED_FILE_TYPE_CONTENT)]
# Specialized handling for Google Sheets
if mime_type == GDriveMimeType.SPREADSHEET.value:
try:
return _extract_google_sheets(file, service)
except Exception as e:
logger.warning(
f"Error extracting data from Google Sheet '{file['name']}': {e}. "
"Falling back to basic content extraction."
)
# For other types
return _extract_general_content(file, service)
def _extract_google_sheets(
file: dict[str, str], service: GoogleDriveService
) -> list[Section]:
"""
Specialized extraction logic for Google Sheets.
Iterates through each sheet, fetches all data, and returns a list of Section objects.
"""
link = file["webViewLink"]
file_id = file["id"]
sheets_service = build("sheets", "v4", credentials=service._http.credentials)
spreadsheet = sheets_service.spreadsheets().get(spreadsheetId=file_id).execute()
sections: list[Section] = []
for sheet in spreadsheet.get("sheets", []):
sheet_name = sheet["properties"]["title"]
sheet_id = sheet["properties"]["sheetId"]
grid_props = sheet["properties"].get("gridProperties", {})
row_count = grid_props.get("rowCount", 1000)
column_count = grid_props.get("columnCount", 26)
# Convert a number to a spreadsheet column letter (1->A, 26->Z, 27->AA,...)
end_column = ""
col_count = column_count
while col_count > 0:
col_count, remainder = divmod(col_count - 1, 26)
end_column = chr(65 + remainder) + end_column
range_name = f"'{sheet_name}'!A1:{end_column}{row_count}"
try:
result = (
sheets_service.spreadsheets()
.values()
.get(spreadsheetId=file_id, range=range_name)
.execute()
)
values = result.get("values", [])
if values:
text = f"Sheet: {sheet_name}\n"
for row in values:
text += "\t".join(str(cell) for cell in row) + "\n"
sections.append(Section(link=f"{link}#gid={sheet_id}", text=text))
except HttpError as e:
logger.warning(
f"Error fetching data for sheet '{sheet_name}' in '{file.get('name')}' : {e}"
)
continue
return sections
def _extract_general_content(
file: dict[str, str], service: GoogleDriveService
) -> list[Section]:
"""
Extracts general file content for files other than Google Sheets.
- PDF: Revert to read_pdf_file
- DOCX: Unstructured, then docx_to_text, then MarkItDown.
- PPTX: Unstructured, then pptx_to_text, then MarkItDown.
- TXT: Decode the content; if empty, log.
- Google Docs/Slides: Export as text/plain and return directly.
"""
link = file["webViewLink"]
mime_type = file["mimeType"]
file_id = file["id"]
file_name = file.get("name", file_id)
try:
# Google Docs and Google Slides (internal GDrive formats)
if (
mime_type == GDriveMimeType.DOC.value
or mime_type == GDriveMimeType.PPT.value
):
logger.debug(f"Extracting Google-native doc/presentation: {file_name}")
export_mime_type = "text/plain"
content = (
service.files()
.export(fileId=file_id, mimeType=export_mime_type)
.execute()
)
text = content.decode("utf-8", errors="replace").strip()
if not text:
logger.warning(
f"No text extracted from Google Docs/Slides file '{file_name}'."
)
text = UNSUPPORTED_FILE_TYPE_CONTENT
return [Section(link=link, text=text)]
# For all other formats, get raw content
content = service.files().get_media(fileId=file_id).execute()
if mime_type == GDriveMimeType.PDF.value:
# Revert to original PDF extraction
logger.debug(f"Extracting PDF content for '{file_name}'")
text, _ = read_pdf_file(file=io.BytesIO(content))
if not text:
logger.warning(
f"No text extracted from PDF '{file_name}' with read_pdf_file."
)
text = UNSUPPORTED_FILE_TYPE_CONTENT
return [Section(link=link, text=text)]
if mime_type == GDriveMimeType.WORD_DOC.value:
logger.debug(f"Extracting DOCX content for '{file_name}'")
return [
Section(link=link, text=_extract_docx_pptx_txt(content, file, "docx"))
]
if mime_type == GDriveMimeType.POWERPOINT.value:
logger.debug(f"Extracting PPTX content for '{file_name}'")
return [
Section(link=link, text=_extract_docx_pptx_txt(content, file, "pptx"))
]
if (
mime_type == GDriveMimeType.PLAIN_TEXT.value
or mime_type == GDriveMimeType.MARKDOWN.value
):
logger.debug(f"Extracting plain text/markdown content for '{file_name}'")
text = content.decode("utf-8", errors="replace").strip()
if not text:
logger.warning(
f"No text extracted from TXT/MD '{file_name}'. Returning unsupported message."
)
text = UNSUPPORTED_FILE_TYPE_CONTENT
return [Section(link=link, text=text)]
# If we reach here, it's some other format supported by MarkItDown/unstructured
logger.debug(f"Trying MarkItDown/unstructured fallback for '{file_name}'")
text = _extract_docx_pptx_txt(content, file, None) # generic fallback
return [Section(link=link, text=text)]
except Exception as e:
logger.error(
f"Error extracting file content for '{file_name}': {e}", exc_info=True
)
return [Section(link=link, text=UNSUPPORTED_FILE_TYPE_CONTENT)]
def _extract_docx_pptx_txt(
content: bytes, file: dict[str, str], file_type: str | None
) -> str:
"""
Attempts to extract text from DOCX, PPTX, or any supported format using:
1. unstructured (if configured)
2. docx_to_text/pptx_to_text if known format
3. MarkItDown fallback
"""
file_name = file.get("name", file["id"])
# 1. Try unstructured first
if get_unstructured_api_key():
try:
logger.debug(f"Attempting unstructured extraction for '{file_name}'...")
text = unstructured_to_text(io.BytesIO(content), file_name)
if text.strip():
return text
else:
logger.warning(f"Unstructured returned empty text for '{file_name}'.")
except Exception as e:
logger.warning(f"Unstructured extraction failed for '{file_name}': {e}")
# 2. If format is docx or pptx, try direct extraction methods
if file_type == "docx":
try:
logger.debug(f"Trying docx_to_text for '{file_name}'...")
text = docx_to_text(file=io.BytesIO(content))
if text.strip():
return text
else:
logger.warning(f"docx_to_text returned empty for '{file_name}'.")
except Exception as e:
logger.warning(f"docx_to_text failed for '{file_name}': {e}")
if file_type == "pptx":
try:
logger.debug(f"Trying pptx_to_text for '{file_name}'...")
text = pptx_to_text(file=io.BytesIO(content))
if text.strip():
return text
else:
logger.warning(f"pptx_to_text returned empty for '{file_name}'.")
except Exception as e:
logger.warning(f"pptx_to_text failed for '{file_name}': {e}")
# 3. Fallback to MarkItDown
try:
logger.debug(f"Falling back to MarkItDown for '{file_name}'...")
md = MarkItDown()
result = md.convert(io.BytesIO(content))
if result and result.text_content and result.text_content.strip():
return result.text_content
else:
logger.warning(f"MarkItDown returned empty text for '{file_name}'.")
except Exception as e:
logger.error(
f"MarkItDown conversion failed for '{file_name}': {e}", exc_info=True
)
# If all methods fail or return empty, return unsupported message
logger.error(
f"All extraction methods failed for '{file_name}', returning unsupported file message."
)
return UNSUPPORTED_FILE_TYPE_CONTENT
def build_slim_document(file: GoogleDriveFileType) -> SlimDocument | None:

View File

@@ -14,10 +14,9 @@ from typing import IO
import chardet
import docx # type: ignore
import openpyxl # type: ignore
import pptx # type: ignore
from docx import Document
from fastapi import UploadFile
from markitdown import MarkItDown # type: ignore
from pypdf import PdfReader
from pypdf.errors import PdfStreamError
@@ -60,6 +59,9 @@ VALID_FILE_EXTENSIONS = PLAIN_TEXT_FILE_EXTENSIONS + [
".html",
]
# These are the file extensions that we use markitdown for
MARKITDOWN_FILE_EXTENSIONS = [".docx", ".pptx", ".xlsx"]
def is_text_file_extension(file_name: str) -> bool:
return any(file_name.endswith(ext) for ext in PLAIN_TEXT_FILE_EXTENSIONS)
@@ -74,6 +76,10 @@ def is_valid_file_ext(ext: str) -> bool:
return ext in VALID_FILE_EXTENSIONS
def is_markitdown_file_ext(ext: str) -> bool:
return ext in MARKITDOWN_FILE_EXTENSIONS
def is_text_file(file: IO[bytes]) -> bool:
"""
checks if the first 1024 bytes only contain printable or whitespace characters
@@ -185,13 +191,6 @@ def read_text_file(
return file_content_raw, metadata
def pdf_to_text(file: IO[Any], pdf_pass: str | None = None) -> str:
"""Extract text from a PDF file."""
# Return only the extracted text from read_pdf_file
text, _ = read_pdf_file(file, pdf_pass)
return text
def read_pdf_file(
file: IO[Any],
pdf_pass: str | None = None,
@@ -299,16 +298,11 @@ def pptx_to_text(file: IO[Any]) -> str:
return TEXT_SECTION_SEPARATOR.join(text_content)
def xlsx_to_text(file: IO[Any]) -> str:
workbook = openpyxl.load_workbook(file, read_only=True)
text_content = []
for sheet in workbook.worksheets:
sheet_string = "\n".join(
",".join(map(str, row))
for row in sheet.iter_rows(min_row=1, values_only=True)
)
text_content.append(sheet_string)
return TEXT_SECTION_SEPARATOR.join(text_content)
def pdf_to_text(file: IO[Any], pdf_pass: str | None = None) -> str:
"""Extract text from a PDF file."""
# Return only the extracted text from read_pdf_file
text, _ = read_pdf_file(file, pdf_pass)
return text
def eml_to_text(file: IO[Any]) -> str:
@@ -346,9 +340,6 @@ def extract_file_text(
) -> str:
extension_to_function: dict[str, Callable[[IO[Any]], str]] = {
".pdf": pdf_to_text,
".docx": docx_to_text,
".pptx": pptx_to_text,
".xlsx": xlsx_to_text,
".eml": eml_to_text,
".epub": epub_to_text,
".html": parse_html_page_basic,
@@ -358,6 +349,8 @@ def extract_file_text(
if get_unstructured_api_key():
return unstructured_to_text(file, file_name)
md = MarkItDown()
if file_name or extension:
if extension is not None:
final_extension = extension
@@ -365,6 +358,12 @@ def extract_file_text(
final_extension = get_file_ext(file_name)
if is_valid_file_ext(final_extension):
if is_markitdown_file_ext(final_extension):
with BytesIO(file.read()) as file_like_object:
result = md.convert_stream(
file_like_object, file_extension=final_extension
)
return result.text_content
return extension_to_function.get(final_extension, file_io_to_text)(file)
# Either the file somehow has no name or the extension is not one that we recognize
@@ -382,29 +381,37 @@ def extract_file_text(
return ""
def convert_docx_to_txt(
def convert_docx_to_markdown(
file: UploadFile, file_store: FileStore, file_path: str
) -> None:
file.file.seek(0)
docx_content = file.file.read()
doc = Document(BytesIO(docx_content))
try:
# Read the file content
file_content = file.file.read()
# Extract text from the document
full_text = []
for para in doc.paragraphs:
full_text.append(para.text)
if not file_content:
raise ValueError(f"File {file.filename} is empty")
# Join the extracted text
text_content = "\n".join(full_text)
# Reset the file pointer to the beginning
file.file.seek(0)
txt_file_path = docx_to_txt_filename(file_path)
file_store.save_file(
file_name=txt_file_path,
content=BytesIO(text_content.encode("utf-8")),
display_name=file.filename,
file_origin=FileOrigin.CONNECTOR,
file_type="text/plain",
)
text_content = extract_file_text(
file=file.file, file_name=file.filename or "", extension=".docx"
)
if not text_content:
raise ValueError(f"Failed to extract text from {file.filename}")
txt_file_path = docx_to_txt_filename(file_path)
file_store.save_file(
file_name=txt_file_path,
content=BytesIO(text_content.encode("utf-8")),
display_name=file.filename,
file_origin=FileOrigin.CONNECTOR,
file_type="text/plain",
)
except Exception as e:
logger.error(f"Error converting DOCX to Markdown: {str(e)}")
raise RuntimeError(f"Failed to process file {file.filename}: {str(e)}") from e
def docx_to_txt_filename(file_path: str) -> str:

View File

@@ -87,7 +87,7 @@ from onyx.db.models import SearchSettings
from onyx.db.models import User
from onyx.db.search_settings import get_current_search_settings
from onyx.db.search_settings import get_secondary_search_settings
from onyx.file_processing.extract_file_text import convert_docx_to_txt
from onyx.file_processing.extract_file_text import convert_docx_to_markdown
from onyx.file_store.file_store import get_default_file_store
from onyx.key_value_store.interface import KvKeyNotFoundError
from onyx.redis.redis_connector import RedisConnector
@@ -396,11 +396,12 @@ def upload_files(
file_origin=FileOrigin.CONNECTOR,
file_type=file.content_type or "text/plain",
)
file.file.seek(0)
if file.content_type and file.content_type.startswith(
"application/vnd.openxmlformats-officedocument.wordprocessingml.document"
):
convert_docx_to_txt(file, file_store, file_path)
convert_docx_to_markdown(file, file_store, file_path)
except ValueError as e:
raise HTTPException(status_code=400, detail=str(e))

View File

@@ -81,4 +81,5 @@ stripe==10.12.0
urllib3==2.2.3
mistune==0.8.4
sentry-sdk==2.14.0
prometheus_client==0.21.0
prometheus_client==0.21.0
markitdown==0.0.1a3

View File

@@ -6,6 +6,7 @@ import {
} from "@/app/chat/message/MemoizedTextComponents";
import React, { useMemo } from "react";
import ReactMarkdown from "react-markdown";
import rehypePrism from "rehype-prism-plus";
import remarkGfm from "remark-gfm";
interface MinimalMarkdownProps {
@@ -35,9 +36,10 @@ export const MinimalMarkdown: React.FC<MinimalMarkdownProps> = ({
return (
<ReactMarkdown
className={`w-full text-wrap break-word ${className}`}
className={`prose max-w-full text-base ${className}`}
components={markdownComponents}
remarkPlugins={[remarkGfm]}
rehypePlugins={[[rehypePrism, { ignoreMissing: true }]]}
>
{content}
</ReactMarkdown>

View File

@@ -21,11 +21,11 @@ export default function TextView({
onClose,
}: TextViewProps) {
const [zoom, setZoom] = useState(100);
const [fileContent, setFileContent] = useState<string>("");
const [fileUrl, setFileUrl] = useState<string>("");
const [fileName, setFileName] = useState<string>("");
const [fileContent, setFileContent] = useState("");
const [fileUrl, setFileUrl] = useState("");
const [fileName, setFileName] = useState("");
const [isLoading, setIsLoading] = useState(true);
const [fileType, setFileType] = useState<string>("application/octet-stream");
const [fileType, setFileType] = useState("application/octet-stream");
const isMarkdownFormat = (mimeType: string): boolean => {
const markdownFormats = [
@@ -51,18 +51,17 @@ export default function TextView({
const fetchFile = useCallback(async () => {
setIsLoading(true);
const fileId = presentingDocument.document_id.split("__")[1];
try {
const fileId = presentingDocument.document_id.split("__")[1];
const response = await fetch(
`/api/chat/file/${encodeURIComponent(fileId)}`,
{
method: "GET",
}
`/api/chat/file/${encodeURIComponent(fileId)}`
);
const blob = await response.blob();
const url = window.URL.createObjectURL(blob);
setFileUrl(url);
setFileName(presentingDocument.semantic_identifier || "document");
const contentType =
response.headers.get("Content-Type") || "application/octet-stream";
setFileType(contentType);
@@ -70,9 +69,28 @@ export default function TextView({
if (isMarkdownFormat(blob.type)) {
const text = await blob.text();
setFileContent(text);
} else if (blob.type === "application/octet-stream") {
try {
const text = await blob.text();
let nonPrintingCount = 0;
for (let i = 0; i < text.length; i++) {
const code = text.charCodeAt(i);
if (code < 32 && ![9, 10, 13].includes(code)) {
nonPrintingCount++;
}
}
const ratio = nonPrintingCount / text.length;
if (ratio < 0.05) {
setFileContent(text);
setFileType("text/plain");
}
} catch (err) {
console.error("Failed to parse octet-stream as text", err);
}
}
} catch (error) {
console.error("Error fetching file:", error);
} catch (err) {
console.error("Error fetching file:", err);
} finally {
setTimeout(() => {
setIsLoading(false);
@@ -137,7 +155,7 @@ export default function TextView({
</div>
) : (
<div
className={`w-full h-full transform origin-center transition-transform duration-300 ease-in-out`}
className="w-full h-full transform origin-center transition-transform duration-300 ease-in-out"
style={{ transform: `scale(${zoom / 100})` }}
>
{isSupportedIframeFormat(fileType) ? (
@@ -146,7 +164,7 @@ export default function TextView({
className="w-full h-full border-none"
title="File Viewer"
/>
) : isMarkdownFormat(fileType) ? (
) : isMarkdownFormat(fileType) || fileType === "text/plain" ? (
<div className="w-full h-full p-6 overflow-y-scroll overflow-x-hidden">
<MinimalMarkdown
content={fileContent}