chore(api): slim Dockerfile by installing wheels without venv

Address Dockerfile caching and dockerignore cleanup
Simplify Dockerfile download commands
2025-12-20 14:42:37 +00:00 · 2025-12-05 12:59:38 +08:00 · 2025-12-05 12:59:38 +08:00 · 2025-12-05 12:58:34 +08:00 · 2025-12-05 12:58:10 +08:00 · 2025-12-05 12:57:36 +08:00
2 changed files with 45 additions and 23 deletions
--- a/api/.dockerignore
+++ b/api/.dockerignore
@@ -1,6 +1,26 @@
 .env
 *.env.*

+# VCS and editor noise
+.git
+.DS_Store
+__pycache__/
+*.pyc
+*.pyo
+*.swp
+.vscode
+
+# Large or irrelevant project files
+docs/
+dev/
+images/
+
+
+# Local caches
+.pytest_cache
+.ruff_cache
+.mypy_cache
+
 storage/generate_files/*
 storage/privkeys/*
 storage/tools/*
@@ -12,8 +32,6 @@ logs

 # jetbrains
 .idea
-.mypy_cache
-.ruff_cache

 # venv
 .venv
--- a/api/Dockerfile
+++ b/api/Dockerfile
@@ -19,11 +19,15 @@ RUN apt-get update \
          # basic environment
          g++ \
          # for building gmpy2
-          libmpfr-dev libmpc-dev
+          libmpfr-dev libmpc-dev \
+    && rm -rf /var/lib/apt/lists/*

 # Install Python dependencies
 COPY pyproject.toml uv.lock ./
-RUN uv sync --locked --no-dev
+# Export without hashes because we'll build local wheels (hashes would mismatch)
+RUN uv export --locked --no-dev --format requirements.txt --no-hashes --output-file /tmp/requirements.txt \
+    && pip wheel --no-cache-dir -r /tmp/requirements.txt -w /wheels \
+    && uv cache prune --ci

 # production stage
 FROM base AS production
@@ -54,10 +58,10 @@ RUN groupadd -r -g ${dify_uid} dify && \
    useradd -r -u ${dify_uid} -g ${dify_uid} -s /bin/bash dify && \
    chown -R dify:dify /app

-RUN \
-    apt-get update \
+RUN set -eux; \
+    apt-get update; \
    # Install dependencies
-    && apt-get install -y --no-install-recommends \
+    apt-get install -y --no-install-recommends \
        # basic environment
        curl nodejs \
        # for gmpy2 \
@@ -67,33 +71,33 @@ RUN \
        # install fonts to support the use of tools like pypdfium2
        fonts-noto-cjk \
        # install a package to improve the accuracy of guessing mime type and file extension
-        media-types \
-        # install libmagic to support the use of python-magic guess MIMETYPE
-        libmagic1 \
-    && apt-get autoremove -y \
-    && rm -rf /var/lib/apt/lists/*
+    media-types \
+    # install libmagic to support the use of python-magic guess MIMETYPE
+    libmagic1; \
+    apt-get autoremove -y; \
+    rm -rf /var/lib/apt/lists/*

-# Copy Python environment and packages
-ENV VIRTUAL_ENV=/app/api/.venv
-COPY --from=packages --chown=dify:dify ${VIRTUAL_ENV} ${VIRTUAL_ENV}
-ENV PATH="${VIRTUAL_ENV}/bin:${PATH}"
-
-# Download nltk data
-RUN mkdir -p /usr/local/share/nltk_data && NLTK_DATA=/usr/local/share/nltk_data python -c "import nltk; nltk.download('punkt'); nltk.download('averaged_perceptron_tagger'); nltk.download('stopwords')" \
-    && chmod -R 755 /usr/local/share/nltk_data
+# Install Python packages from prebuilt wheels (no virtualenv to avoid copying ~1.8GB layer)
+COPY --from=packages /tmp/requirements.txt /tmp/requirements.txt
+COPY --from=packages /wheels /wheels
+RUN pip install --no-cache-dir --no-index --find-links /wheels -r /tmp/requirements.txt \
+    && rm -rf /wheels /tmp/requirements.txt

 ENV TIKTOKEN_CACHE_DIR=/app/api/.tiktoken_cache

-RUN python -c "import tiktoken; tiktoken.encoding_for_model('gpt2')" \
+# Pre-fetch NLTK data and warm tiktoken cache before copying source to maximize layer reuse
+RUN set -eux; \
+    mkdir -p /usr/local/share/nltk_data; \
+    NLTK_DATA=/usr/local/share/nltk_data python -c "import nltk; nltk.download('punkt'); nltk.download('averaged_perceptron_tagger'); nltk.download('stopwords')"; \
+    chmod -R 755 /usr/local/share/nltk_data; \
+    python -c "import tiktoken; tiktoken.encoding_for_model('gpt2')" \
    && chown -R dify:dify ${TIKTOKEN_CACHE_DIR}

 # Copy source code
 COPY --chown=dify:dify . /app/api/
-
 # Prepare entrypoint script
 COPY --chown=dify:dify --chmod=755 docker/entrypoint.sh /entrypoint.sh

-
 ARG COMMIT_SHA
 ENV COMMIT_SHA=${COMMIT_SHA}
 ENV NLTK_DATA=/usr/local/share/nltk_data
Author	SHA1	Message	Date
-LAN-	43c2b869b3	chore(api): slim Dockerfile by installing wheels without venv	2025-12-05 12:59:38 +08:00
-LAN-	ae3caa6160	Address Dockerfile caching and dockerignore cleanup	2025-12-05 12:59:38 +08:00
-LAN-	849e86ee6a	Simplify Dockerfile download commands	2025-12-05 12:58:34 +08:00
-LAN-	ada9aafe3e	Fix Dockerfile heredoc continuation	2025-12-05 12:58:10 +08:00
-LAN-	cf51c9cb01	Optimize API Dockerfile size	2025-12-05 12:57:36 +08:00