Compare commits

..

3 Commits

Author SHA1 Message Date
pablonyx
61676620ab nit 2025-03-30 12:55:18 -07:00
pablonyx
edaca1f58b update 2025-03-30 12:13:31 -07:00
pablonyx
1f280bafca quick fix 2025-03-30 12:13:31 -07:00
6 changed files with 27 additions and 249 deletions

View File

@@ -25,14 +25,8 @@ from onyx.configs.constants import OnyxRedisLocks
from onyx.configs.constants import OnyxRedisSignals
from onyx.db.connector import fetch_connector_by_id
from onyx.db.connector_credential_pair import add_deletion_failure_message
from onyx.db.connector_credential_pair import (
delete_connector_credential_pair__no_commit,
)
from onyx.db.connector_credential_pair import get_connector_credential_pair_from_id
from onyx.db.connector_credential_pair import get_connector_credential_pairs
from onyx.db.document import (
delete_all_documents_by_connector_credential_pair__no_commit,
)
from onyx.db.document import get_document_ids_for_connector_credential_pair
from onyx.db.document_set import delete_document_set_cc_pair_relationship__no_commit
from onyx.db.engine import get_session_with_current_tenant
@@ -449,27 +443,15 @@ def monitor_connector_deletion_taskset(
connector_id_to_delete = cc_pair.connector_id
credential_id_to_delete = cc_pair.credential_id
# Explicitly delete document by connector credential pair records before deleting the connector
# This is needed because connector_id is a primary key in that table and cascading deletes won't work
delete_all_documents_by_connector_credential_pair__no_commit(
db_session=db_session,
connector_id=connector_id_to_delete,
credential_id=credential_id_to_delete,
)
# No need to explicitly delete DocumentByConnectorCredentialPair records anymore
# as we have proper cascade relationships set up in the models
# Flush to ensure document deletion happens before connector deletion
# Flush to ensure all operations happen in sequence
db_session.flush()
# Expire the cc_pair to ensure SQLAlchemy doesn't try to manage its state
# related to the deleted DocumentByConnectorCredentialPair during commit
db_session.expire(cc_pair)
# Delete the cc-pair directly
db_session.delete(cc_pair)
# finally, delete the cc-pair
delete_connector_credential_pair__no_commit(
db_session=db_session,
connector_id=connector_id_to_delete,
credential_id=credential_id_to_delete,
)
# if there are no credentials left, delete the connector
connector = fetch_connector_by_id(
db_session=db_session,

View File

@@ -555,28 +555,6 @@ def delete_documents_by_connector_credential_pair__no_commit(
db_session.execute(stmt)
def delete_all_documents_by_connector_credential_pair__no_commit(
db_session: Session,
connector_id: int,
credential_id: int,
) -> None:
"""Deletes all document by connector credential pair entries for a specific connector and credential.
This is primarily used during connector deletion to ensure all references are removed
before deleting the connector itself. This is crucial because connector_id is part of the
primary key in DocumentByConnectorCredentialPair, and attempting to delete the Connector
would otherwise try to set the foreign key to NULL, which fails for primary keys.
NOTE: Does not commit the transaction, this must be done by the caller.
"""
stmt = delete(DocumentByConnectorCredentialPair).where(
and_(
DocumentByConnectorCredentialPair.connector_id == connector_id,
DocumentByConnectorCredentialPair.credential_id == credential_id,
)
)
db_session.execute(stmt)
def delete_documents__no_commit(db_session: Session, document_ids: list[str]) -> None:
db_session.execute(delete(DbDocument).where(DbDocument.id.in_(document_ids)))

View File

@@ -694,7 +694,11 @@ class Connector(Base):
)
documents_by_connector: Mapped[
list["DocumentByConnectorCredentialPair"]
] = relationship("DocumentByConnectorCredentialPair", back_populates="connector")
] = relationship(
"DocumentByConnectorCredentialPair",
back_populates="connector",
cascade="all, delete-orphan",
)
# synchronize this validation logic with RefreshFrequencySchema etc on front end
# until we have a centralized validation schema
@@ -748,7 +752,11 @@ class Credential(Base):
)
documents_by_credential: Mapped[
list["DocumentByConnectorCredentialPair"]
] = relationship("DocumentByConnectorCredentialPair", back_populates="credential")
] = relationship(
"DocumentByConnectorCredentialPair",
back_populates="credential",
cascade="all, delete-orphan",
)
user: Mapped[User | None] = relationship("User", back_populates="credentials")

View File

@@ -450,6 +450,7 @@ export const AIMessage = ({
)}
</>
) : null}
{toolCall &&
!TOOLS_WITH_CUSTOM_HANDLING.includes(
toolCall.tool_name
@@ -466,10 +467,12 @@ export const AIMessage = ({
isRunning={!toolCall.tool_result || !content}
/>
)}
{toolCall &&
(!files || files.length == 0) &&
toolCall.tool_name === IMAGE_GENERATION_TOOL_NAME &&
!toolCall.tool_result && <GeneratingImageDisplay />}
{toolCall &&
toolCall.tool_name === INTERNET_SEARCH_TOOL_NAME && (
<ToolRunDisplay
@@ -484,6 +487,7 @@ export const AIMessage = ({
isRunning={!toolCall.tool_result}
/>
)}
{docs && docs.length > 0 && (
<div
className={`mobile:hidden ${
@@ -518,6 +522,7 @@ export const AIMessage = ({
</div>
</div>
)}
{content || files ? (
<>
<FileDisplay files={files || []} />
@@ -969,7 +974,7 @@ export const HumanMessage = ({
</div>
) : typeof content === "string" ? (
<>
<div className="ml-auto flex items-center mr-1 mt-2 h-fit mb-auto">
<div className="ml-auto flex items-center mr-1 h-fit my-auto">
{onEdit &&
isHovered &&
!isEditing &&

View File

@@ -1,133 +0,0 @@
import { preprocessLaTeX } from "./codeUtils";
describe("preprocessLaTeX", () => {
describe("currency formatting", () => {
it("should properly escape dollar signs in text with amounts", () => {
const input =
"Maria wants to buy a new laptop that costs $1,200. She has saved $800 so far. If she saves an additional $100 each month, how many months will it take her to have enough money to buy the laptop?";
const processed = preprocessLaTeX(input);
// Should escape all dollar signs in currency amounts
expect(processed).toContain("costs \\$1,200");
expect(processed).toContain("saved \\$800");
expect(processed).toContain("additional \\$100");
expect(processed).not.toContain("costs $1,200");
});
it("should handle dollar signs with backslashes already present", () => {
const input =
"Maria wants to buy a new laptop that costs \\$1,200. She has saved \\$800 so far.";
const processed = preprocessLaTeX(input);
// Should preserve the existing escaped dollar signs
expect(processed).toContain("\\$1,200");
expect(processed).toContain("\\$800");
});
});
describe("code block handling", () => {
it("should not process dollar signs in code blocks", () => {
const input = "```plaintext\nThe total cost is $50.\n```";
const processed = preprocessLaTeX(input);
// Dollar sign in code block should remain untouched
expect(processed).toContain("The total cost is $50.");
expect(processed).not.toContain("The total cost is \\$50.");
});
it("should not process dollar signs in inline code", () => {
const input =
'Use the `printf "$%.2f" $amount` command to format currency.';
const processed = preprocessLaTeX(input);
// Dollar signs in inline code should remain untouched
expect(processed).toContain('`printf "$%.2f" $amount`');
expect(processed).not.toContain('`printf "\\$%.2f" \\$amount`');
});
it("should handle mixed content with code blocks and currency", () => {
const input =
"The cost is $100.\n\n```javascript\nconst price = '$50';\n```\n\nThe remaining balance is $50.";
const processed = preprocessLaTeX(input);
// Dollar signs outside code blocks should be escaped
expect(processed).toContain("The cost is \\$100");
expect(processed).toContain("The remaining balance is \\$50");
// Dollar sign in code block should be preserved
expect(processed).toContain("const price = '$50';");
expect(processed).not.toContain("const price = '\\$50';");
});
});
describe("LaTeX handling", () => {
it("should preserve proper LaTeX delimiters", () => {
const input =
"The formula $x^2 + y^2 = z^2$ represents the Pythagorean theorem.";
const processed = preprocessLaTeX(input);
// LaTeX delimiters should be preserved
expect(processed).toContain("$x^2 + y^2 = z^2$");
});
it("should convert LaTeX block delimiters", () => {
const input = "Consider the equation: \\[E = mc^2\\]";
const processed = preprocessLaTeX(input);
// Block LaTeX delimiters should be converted
expect(processed).toContain("$$E = mc^2$$");
expect(processed).not.toContain("\\[E = mc^2\\]");
});
it("should convert LaTeX inline delimiters", () => {
const input =
"The speed of light \\(c\\) is approximately 299,792,458 m/s.";
const processed = preprocessLaTeX(input);
// Inline LaTeX delimiters should be converted
expect(processed).toContain("$c$");
expect(processed).not.toContain("\\(c\\)");
});
});
describe("special cases", () => {
it("should handle shell variables in text", () => {
const input =
"In bash, you can access arguments with $1, $2, and use echo $HOME to print the home directory.";
const processed = preprocessLaTeX(input);
// Verify current behavior (numeric shell variables are being escaped)
expect(processed).toContain("\\$1");
expect(processed).toContain("\\$2");
// But $HOME is not escaped (non-numeric)
expect(processed).toContain("$HOME");
});
it("should handle shell commands with dollar signs", () => {
const input = "Use awk '{print $2}' to print the second column.";
const processed = preprocessLaTeX(input);
// Dollar sign in awk command should not be escaped
expect(processed).toContain("{print $2}");
expect(processed).not.toContain("{print \\$2}");
});
it("should handle Einstein's equation with mixed LaTeX and code blocks", () => {
const input =
"Sure! The equation for Einstein's mass-energy equivalence, \\(E = mc^2\\), can be written in LaTeX as follows: ```latex\nE = mc^2\n``` When rendered, it looks like this: \\[ E = mc^2 \\]";
const processed = preprocessLaTeX(input);
// LaTeX inline delimiters should be converted
expect(processed).toContain("equivalence, $E = mc^2$,");
expect(processed).not.toContain("equivalence, \\(E = mc^2\\),");
// LaTeX block delimiters should be converted
expect(processed).toContain("it looks like this: $$ E = mc^2 $$");
expect(processed).not.toContain("it looks like this: \\[ E = mc^2 \\]");
// LaTeX within code blocks should remain untouched
expect(processed).toContain("```latex\nE = mc^2\n```");
});
});
});

View File

@@ -59,82 +59,20 @@ export function extractCodeText(
return codeText || "";
}
// We must preprocess LaTeX in the LLM output to avoid improper formatting
export const preprocessLaTeX = (content: string) => {
// First detect if content is within a code block
const codeBlockRegex = /^```[\s\S]*?```$/;
const isCodeBlock = codeBlockRegex.test(content.trim());
// If the entire content is a code block, don't process LaTeX
if (isCodeBlock) {
return content;
}
// Extract code blocks and replace with placeholders
const codeBlocks: string[] = [];
const withCodeBlocksReplaced = content.replace(/```[\s\S]*?```/g, (match) => {
const placeholder = `___CODE_BLOCK_${codeBlocks.length}___`;
codeBlocks.push(match);
return placeholder;
});
// First, protect code-like expressions where $ is used for variables
const codeProtected = withCodeBlocksReplaced.replace(
/\b(\w+(?:\s*-\w+)*\s*(?:'[^']*')?)\s*\{[^}]*?\$\d+[^}]*?\}/g,
(match) => {
// Replace $ with a temporary placeholder in code contexts
return match.replace(/\$/g, "___DOLLAR_PLACEHOLDER___");
}
);
// Also protect common shell variable patterns like $1, $2, etc.
const shellProtected = codeProtected.replace(
/\b(?:print|echo|awk|sed|grep)\s+.*?\$\d+/g,
(match) => match.replace(/\$/g, "___DOLLAR_PLACEHOLDER___")
);
// Protect inline code blocks with backticks
const inlineCodeProtected = shellProtected.replace(/`[^`]+`/g, (match) => {
return match.replace(/\$/g, "___DOLLAR_PLACEHOLDER___");
});
// Process LaTeX expressions now that code is protected
// Valid LaTeX should have matching dollar signs with non-space chars surrounding content
const processedForLatex = inlineCodeProtected.replace(
/\$([^\s$][^$]*?[^\s$])\$/g,
(_, equation) => `$${equation}$`
);
// Escape currency mentions
const currencyEscaped = processedForLatex.replace(
/\$(\d+(?:\.\d*)?)/g,
(_, p1) => `\\$${p1}`
);
// Replace block-level LaTeX delimiters \[ \] with $$ $$
const blockProcessed = currencyEscaped.replace(
// 1) Replace block-level LaTeX delimiters \[ \] with $$ $$
const blockProcessedContent = content.replace(
/\\\[([\s\S]*?)\\\]/g,
(_, equation) => `$$${equation}$$`
);
// Replace inline LaTeX delimiters \( \) with $ $
const inlineProcessed = blockProcessed.replace(
// 2) Replace inline LaTeX delimiters \( \) with $ $
const inlineProcessedContent = blockProcessedContent.replace(
/\\\(([\s\S]*?)\\\)/g,
(_, equation) => `$${equation}$`
);
// Restore original dollar signs in code contexts
const restoredDollars = inlineProcessed.replace(
/___DOLLAR_PLACEHOLDER___/g,
"$"
);
// Restore code blocks
const restoredCodeBlocks = restoredDollars.replace(
/___CODE_BLOCK_(\d+)___/g,
(_, index) => codeBlocks[parseInt(index)]
);
return restoredCodeBlocks;
return inlineProcessedContent;
};