.

fix: vertex prompt caching
2026-03-01 21:55:46 +00:00 · 2026-01-10 16:11:24 -08:00 · 2026-01-10 15:57:31 -08:00
3 changed files with 7 additions and 1 deletions
--- a/backend/onyx/llm/prompt_cache/providers/vertex.py
+++ b/backend/onyx/llm/prompt_cache/providers/vertex.py
@@ -48,7 +48,7 @@ class VertexAIPromptCacheProvider(PromptCacheProvider):
            cacheable_prefix=cacheable_prefix,
            suffix=suffix,
            continuation=continuation,
-            transform_cacheable=_add_vertex_cache_control,
+            transform_cacheable=None,  # TODO: support explicit caching
        )

    def extract_cache_metadata(
@@ -89,6 +89,10 @@ def _add_vertex_cache_control(
    not at the message level. This function converts string content to the array format
    and adds cache_control to the last content block in each cacheable message.
    """
+    # NOTE: unfortunately we need a much more sophisticated mechnism to support
+    # explict caching with vertex in the presence of tools and system messages
+    # (since they're supposed to be stripped out when setting cache_control)
+    # so we're deferring this to a future PR.
    updated: list[ChatCompletionMessage] = []
    for message in messages:
        mutated = dict(message)
--- a/backend/tests/external_dependency_unit/llm/test_prompt_caching.py
+++ b/backend/tests/external_dependency_unit/llm/test_prompt_caching.py
@@ -397,6 +397,7 @@ def test_anthropic_prompt_caching_reduces_costs(
    not os.environ.get(VERTEX_LOCATION_ENV),
    reason="VERTEX_LOCATION required for Vertex AI context caching (e.g., 'us-central1')",
 )
+@pytest.mark.skip(reason="Vertex AI prompt caching is disabled for now")
 def test_google_genai_prompt_caching_reduces_costs(
    db_session: Session,
 ) -> None:
--- a/web/package-lock.json
+++ b/web/package-lock.json
@@ -10308,6 +10308,7 @@
    },
    "node_modules/fsevents": {
      "version": "2.3.2",
+      "dev": true,
      "license": "MIT",
      "optional": true,
      "os": [
Author	SHA1	Message	Date
Weves	d1df258501	.	2026-01-10 16:11:24 -08:00
Evan Lohn	95b0ca42a2	fix: vertex prompt caching	2026-01-10 15:57:31 -08:00