Compare commits

...

2 Commits

Author SHA1 Message Date
Weves
d1df258501 . 2026-01-10 16:11:24 -08:00
Evan Lohn
95b0ca42a2 fix: vertex prompt caching 2026-01-10 15:57:31 -08:00
3 changed files with 7 additions and 1 deletions

View File

@@ -48,7 +48,7 @@ class VertexAIPromptCacheProvider(PromptCacheProvider):
cacheable_prefix=cacheable_prefix,
suffix=suffix,
continuation=continuation,
transform_cacheable=_add_vertex_cache_control,
transform_cacheable=None, # TODO: support explicit caching
)
def extract_cache_metadata(
@@ -89,6 +89,10 @@ def _add_vertex_cache_control(
not at the message level. This function converts string content to the array format
and adds cache_control to the last content block in each cacheable message.
"""
# NOTE: unfortunately we need a much more sophisticated mechnism to support
# explict caching with vertex in the presence of tools and system messages
# (since they're supposed to be stripped out when setting cache_control)
# so we're deferring this to a future PR.
updated: list[ChatCompletionMessage] = []
for message in messages:
mutated = dict(message)

View File

@@ -397,6 +397,7 @@ def test_anthropic_prompt_caching_reduces_costs(
not os.environ.get(VERTEX_LOCATION_ENV),
reason="VERTEX_LOCATION required for Vertex AI context caching (e.g., 'us-central1')",
)
@pytest.mark.skip(reason="Vertex AI prompt caching is disabled for now")
def test_google_genai_prompt_caching_reduces_costs(
db_session: Session,
) -> None:

1
web/package-lock.json generated
View File

@@ -10308,6 +10308,7 @@
},
"node_modules/fsevents": {
"version": "2.3.2",
"dev": true,
"license": "MIT",
"optional": true,
"os": [