Compare commits

...

2 Commits

Author SHA1 Message Date
hagen-danswer
29befd4317 forgot semicolon (in python lol) 2024-08-12 11:24:24 -07:00
hagen-danswer
0600c91711 Added intfloat download to dockerfile 2024-08-12 11:19:52 -07:00
2 changed files with 4 additions and 1 deletions

View File

@@ -69,7 +69,8 @@ RUN apt-get update && \
# Pre-downloading models for setups with limited egress
RUN python -c "from tokenizers import Tokenizer; \
Tokenizer.from_pretrained('nomic-ai/nomic-embed-text-v1')"
Tokenizer.from_pretrained('nomic-ai/nomic-embed-text-v1'); \
Tokenizer.from_pretrained('intfloat/e5-base-v2')"
# Pre-downloading NLTK for setups with limited egress

View File

@@ -26,9 +26,11 @@ AutoTokenizer.from_pretrained('distilbert-base-uncased', cache_folder='/root/.ca
AutoTokenizer.from_pretrained('mixedbread-ai/mxbai-rerank-xsmall-v1', cache_folder='/root/.cache/temp_huggingface/hub/'); \
from huggingface_hub import snapshot_download; \
snapshot_download(repo_id='danswer/hybrid-intent-token-classifier', revision='v1.0.3', cache_dir='/root/.cache/temp_huggingface/hub/'); \
snapshot_download('intfloat/e5-base-v2', cache_dir='/root/.cache/temp_huggingface/hub/'); \
snapshot_download('nomic-ai/nomic-embed-text-v1', cache_dir='/root/.cache/temp_huggingface/hub/'); \
snapshot_download('mixedbread-ai/mxbai-rerank-xsmall-v1', cache_dir='/root/.cache/temp_huggingface/hub/'); \
from sentence_transformers import SentenceTransformer; \
SentenceTransformer(model_name_or_path='intfloat/e5-base-v2', trust_remote_code=True, cache_folder='/root/.cache/temp_huggingface/hub/'); \
SentenceTransformer(model_name_or_path='nomic-ai/nomic-embed-text-v1', trust_remote_code=True, cache_folder='/root/.cache/temp_huggingface/hub/');"
WORKDIR /app