mirror of
https://github.com/langgenius/dify.git
synced 2025-12-19 22:28:46 +00:00
improve: pooling httpx clients for requests to code sandbox and ssrf (#26052)
Some checks are pending
Build and Push API & Web / build (api, DIFY_API_IMAGE_NAME, linux/amd64, build-api-amd64) (push) Waiting to run
Build and Push API & Web / build (api, DIFY_API_IMAGE_NAME, linux/arm64, build-api-arm64) (push) Waiting to run
Build and Push API & Web / build (web, DIFY_WEB_IMAGE_NAME, linux/amd64, build-web-amd64) (push) Waiting to run
Build and Push API & Web / build (web, DIFY_WEB_IMAGE_NAME, linux/arm64, build-web-arm64) (push) Waiting to run
Build and Push API & Web / create-manifest (api, DIFY_API_IMAGE_NAME, merge-api-images) (push) Blocked by required conditions
Build and Push API & Web / create-manifest (web, DIFY_WEB_IMAGE_NAME, merge-web-images) (push) Blocked by required conditions
Main CI Pipeline / Check Changed Files (push) Waiting to run
Main CI Pipeline / API Tests (push) Blocked by required conditions
Main CI Pipeline / Web Tests (push) Blocked by required conditions
Main CI Pipeline / Style Check (push) Waiting to run
Main CI Pipeline / VDB Tests (push) Blocked by required conditions
Main CI Pipeline / DB Migration Test (push) Blocked by required conditions
Some checks are pending
Build and Push API & Web / build (api, DIFY_API_IMAGE_NAME, linux/amd64, build-api-amd64) (push) Waiting to run
Build and Push API & Web / build (api, DIFY_API_IMAGE_NAME, linux/arm64, build-api-arm64) (push) Waiting to run
Build and Push API & Web / build (web, DIFY_WEB_IMAGE_NAME, linux/amd64, build-web-amd64) (push) Waiting to run
Build and Push API & Web / build (web, DIFY_WEB_IMAGE_NAME, linux/arm64, build-web-arm64) (push) Waiting to run
Build and Push API & Web / create-manifest (api, DIFY_API_IMAGE_NAME, merge-api-images) (push) Blocked by required conditions
Build and Push API & Web / create-manifest (web, DIFY_WEB_IMAGE_NAME, merge-web-images) (push) Blocked by required conditions
Main CI Pipeline / Check Changed Files (push) Waiting to run
Main CI Pipeline / API Tests (push) Blocked by required conditions
Main CI Pipeline / Web Tests (push) Blocked by required conditions
Main CI Pipeline / Style Check (push) Waiting to run
Main CI Pipeline / VDB Tests (push) Blocked by required conditions
Main CI Pipeline / DB Migration Test (push) Blocked by required conditions
This commit is contained in:
@@ -408,6 +408,9 @@ SSRF_DEFAULT_TIME_OUT=5
|
||||
SSRF_DEFAULT_CONNECT_TIME_OUT=5
|
||||
SSRF_DEFAULT_READ_TIME_OUT=5
|
||||
SSRF_DEFAULT_WRITE_TIME_OUT=5
|
||||
SSRF_POOL_MAX_CONNECTIONS=100
|
||||
SSRF_POOL_MAX_KEEPALIVE_CONNECTIONS=20
|
||||
SSRF_POOL_KEEPALIVE_EXPIRY=5.0
|
||||
|
||||
BATCH_UPLOAD_LIMIT=10
|
||||
KEYWORD_DATA_SOURCE_TYPE=database
|
||||
@@ -418,6 +421,10 @@ WORKFLOW_FILE_UPLOAD_LIMIT=10
|
||||
# CODE EXECUTION CONFIGURATION
|
||||
CODE_EXECUTION_ENDPOINT=http://127.0.0.1:8194
|
||||
CODE_EXECUTION_API_KEY=dify-sandbox
|
||||
CODE_EXECUTION_SSL_VERIFY=True
|
||||
CODE_EXECUTION_POOL_MAX_CONNECTIONS=100
|
||||
CODE_EXECUTION_POOL_MAX_KEEPALIVE_CONNECTIONS=20
|
||||
CODE_EXECUTION_POOL_KEEPALIVE_EXPIRY=5.0
|
||||
CODE_MAX_NUMBER=9223372036854775807
|
||||
CODE_MIN_NUMBER=-9223372036854775808
|
||||
CODE_MAX_STRING_LENGTH=80000
|
||||
|
||||
@@ -113,6 +113,21 @@ class CodeExecutionSandboxConfig(BaseSettings):
|
||||
default=10.0,
|
||||
)
|
||||
|
||||
CODE_EXECUTION_POOL_MAX_CONNECTIONS: PositiveInt = Field(
|
||||
description="Maximum number of concurrent connections for the code execution HTTP client",
|
||||
default=100,
|
||||
)
|
||||
|
||||
CODE_EXECUTION_POOL_MAX_KEEPALIVE_CONNECTIONS: PositiveInt = Field(
|
||||
description="Maximum number of persistent keep-alive connections for the code execution HTTP client",
|
||||
default=20,
|
||||
)
|
||||
|
||||
CODE_EXECUTION_POOL_KEEPALIVE_EXPIRY: PositiveFloat | None = Field(
|
||||
description="Keep-alive expiry in seconds for idle connections (set to None to disable)",
|
||||
default=5.0,
|
||||
)
|
||||
|
||||
CODE_MAX_NUMBER: PositiveInt = Field(
|
||||
description="Maximum allowed numeric value in code execution",
|
||||
default=9223372036854775807,
|
||||
@@ -153,6 +168,11 @@ class CodeExecutionSandboxConfig(BaseSettings):
|
||||
default=1000,
|
||||
)
|
||||
|
||||
CODE_EXECUTION_SSL_VERIFY: bool = Field(
|
||||
description="Enable or disable SSL verification for code execution requests",
|
||||
default=True,
|
||||
)
|
||||
|
||||
|
||||
class PluginConfig(BaseSettings):
|
||||
"""
|
||||
@@ -404,6 +424,21 @@ class HttpConfig(BaseSettings):
|
||||
default=5,
|
||||
)
|
||||
|
||||
SSRF_POOL_MAX_CONNECTIONS: PositiveInt = Field(
|
||||
description="Maximum number of concurrent connections for the SSRF HTTP client",
|
||||
default=100,
|
||||
)
|
||||
|
||||
SSRF_POOL_MAX_KEEPALIVE_CONNECTIONS: PositiveInt = Field(
|
||||
description="Maximum number of persistent keep-alive connections for the SSRF HTTP client",
|
||||
default=20,
|
||||
)
|
||||
|
||||
SSRF_POOL_KEEPALIVE_EXPIRY: PositiveFloat | None = Field(
|
||||
description="Keep-alive expiry in seconds for idle SSRF connections (set to None to disable)",
|
||||
default=5.0,
|
||||
)
|
||||
|
||||
RESPECT_XFORWARD_HEADERS_ENABLED: bool = Field(
|
||||
description="Enable handling of X-Forwarded-For, X-Forwarded-Proto, and X-Forwarded-Port headers"
|
||||
" when the app is behind a single trusted reverse proxy.",
|
||||
|
||||
@@ -4,7 +4,7 @@ from enum import StrEnum
|
||||
from threading import Lock
|
||||
from typing import Any
|
||||
|
||||
from httpx import Timeout, post
|
||||
import httpx
|
||||
from pydantic import BaseModel
|
||||
from yarl import URL
|
||||
|
||||
@@ -13,9 +13,17 @@ from core.helper.code_executor.javascript.javascript_transformer import NodeJsTe
|
||||
from core.helper.code_executor.jinja2.jinja2_transformer import Jinja2TemplateTransformer
|
||||
from core.helper.code_executor.python3.python3_transformer import Python3TemplateTransformer
|
||||
from core.helper.code_executor.template_transformer import TemplateTransformer
|
||||
from core.helper.http_client_pooling import get_pooled_http_client
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
code_execution_endpoint_url = URL(str(dify_config.CODE_EXECUTION_ENDPOINT))
|
||||
CODE_EXECUTION_SSL_VERIFY = dify_config.CODE_EXECUTION_SSL_VERIFY
|
||||
_CODE_EXECUTOR_CLIENT_LIMITS = httpx.Limits(
|
||||
max_connections=dify_config.CODE_EXECUTION_POOL_MAX_CONNECTIONS,
|
||||
max_keepalive_connections=dify_config.CODE_EXECUTION_POOL_MAX_KEEPALIVE_CONNECTIONS,
|
||||
keepalive_expiry=dify_config.CODE_EXECUTION_POOL_KEEPALIVE_EXPIRY,
|
||||
)
|
||||
_CODE_EXECUTOR_CLIENT_KEY = "code_executor:http_client"
|
||||
|
||||
|
||||
class CodeExecutionError(Exception):
|
||||
@@ -38,6 +46,13 @@ class CodeLanguage(StrEnum):
|
||||
JAVASCRIPT = "javascript"
|
||||
|
||||
|
||||
def _build_code_executor_client() -> httpx.Client:
|
||||
return httpx.Client(
|
||||
verify=CODE_EXECUTION_SSL_VERIFY,
|
||||
limits=_CODE_EXECUTOR_CLIENT_LIMITS,
|
||||
)
|
||||
|
||||
|
||||
class CodeExecutor:
|
||||
dependencies_cache: dict[str, str] = {}
|
||||
dependencies_cache_lock = Lock()
|
||||
@@ -76,17 +91,21 @@ class CodeExecutor:
|
||||
"enable_network": True,
|
||||
}
|
||||
|
||||
timeout = httpx.Timeout(
|
||||
connect=dify_config.CODE_EXECUTION_CONNECT_TIMEOUT,
|
||||
read=dify_config.CODE_EXECUTION_READ_TIMEOUT,
|
||||
write=dify_config.CODE_EXECUTION_WRITE_TIMEOUT,
|
||||
pool=None,
|
||||
)
|
||||
|
||||
client = get_pooled_http_client(_CODE_EXECUTOR_CLIENT_KEY, _build_code_executor_client)
|
||||
|
||||
try:
|
||||
response = post(
|
||||
response = client.post(
|
||||
str(url),
|
||||
json=data,
|
||||
headers=headers,
|
||||
timeout=Timeout(
|
||||
connect=dify_config.CODE_EXECUTION_CONNECT_TIMEOUT,
|
||||
read=dify_config.CODE_EXECUTION_READ_TIMEOUT,
|
||||
write=dify_config.CODE_EXECUTION_WRITE_TIMEOUT,
|
||||
pool=None,
|
||||
),
|
||||
timeout=timeout,
|
||||
)
|
||||
if response.status_code == 503:
|
||||
raise CodeExecutionError("Code execution service is unavailable")
|
||||
@@ -106,8 +125,8 @@ class CodeExecutor:
|
||||
|
||||
try:
|
||||
response_data = response.json()
|
||||
except:
|
||||
raise CodeExecutionError("Failed to parse response")
|
||||
except Exception as e:
|
||||
raise CodeExecutionError("Failed to parse response") from e
|
||||
|
||||
if (code := response_data.get("code")) != 0:
|
||||
raise CodeExecutionError(f"Got error code: {code}. Got error msg: {response_data.get('message')}")
|
||||
|
||||
59
api/core/helper/http_client_pooling.py
Normal file
59
api/core/helper/http_client_pooling.py
Normal file
@@ -0,0 +1,59 @@
|
||||
"""HTTP client pooling utilities."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import atexit
|
||||
import threading
|
||||
from collections.abc import Callable
|
||||
|
||||
import httpx
|
||||
|
||||
ClientBuilder = Callable[[], httpx.Client]
|
||||
|
||||
|
||||
class HttpClientPoolFactory:
|
||||
"""Thread-safe factory that maintains reusable HTTP client instances."""
|
||||
|
||||
def __init__(self) -> None:
|
||||
self._clients: dict[str, httpx.Client] = {}
|
||||
self._lock = threading.Lock()
|
||||
|
||||
def get_or_create(self, key: str, builder: ClientBuilder) -> httpx.Client:
|
||||
"""Return a pooled client associated with ``key`` creating it on demand."""
|
||||
client = self._clients.get(key)
|
||||
if client is not None:
|
||||
return client
|
||||
|
||||
with self._lock:
|
||||
client = self._clients.get(key)
|
||||
if client is None:
|
||||
client = builder()
|
||||
self._clients[key] = client
|
||||
return client
|
||||
|
||||
def close_all(self) -> None:
|
||||
"""Close all pooled clients and clear the pool."""
|
||||
with self._lock:
|
||||
for client in self._clients.values():
|
||||
client.close()
|
||||
self._clients.clear()
|
||||
|
||||
|
||||
_factory = HttpClientPoolFactory()
|
||||
|
||||
|
||||
def get_pooled_http_client(key: str, builder: ClientBuilder) -> httpx.Client:
|
||||
"""Return a pooled client for the given ``key`` using ``builder`` when missing."""
|
||||
return _factory.get_or_create(key, builder)
|
||||
|
||||
|
||||
def close_all_pooled_clients() -> None:
|
||||
"""Close every client created through the pooling factory."""
|
||||
_factory.close_all()
|
||||
|
||||
|
||||
def _register_shutdown_hook() -> None:
|
||||
atexit.register(close_all_pooled_clients)
|
||||
|
||||
|
||||
_register_shutdown_hook()
|
||||
@@ -8,27 +8,23 @@ import time
|
||||
import httpx
|
||||
|
||||
from configs import dify_config
|
||||
from core.helper.http_client_pooling import get_pooled_http_client
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
SSRF_DEFAULT_MAX_RETRIES = dify_config.SSRF_DEFAULT_MAX_RETRIES
|
||||
|
||||
http_request_node_ssl_verify = True # Default value for http_request_node_ssl_verify is True
|
||||
try:
|
||||
config_value = dify_config.HTTP_REQUEST_NODE_SSL_VERIFY
|
||||
http_request_node_ssl_verify_lower = str(config_value).lower()
|
||||
if http_request_node_ssl_verify_lower == "true":
|
||||
http_request_node_ssl_verify = True
|
||||
elif http_request_node_ssl_verify_lower == "false":
|
||||
http_request_node_ssl_verify = False
|
||||
else:
|
||||
raise ValueError("Invalid value. HTTP_REQUEST_NODE_SSL_VERIFY should be 'True' or 'False'")
|
||||
except NameError:
|
||||
http_request_node_ssl_verify = True
|
||||
|
||||
BACKOFF_FACTOR = 0.5
|
||||
STATUS_FORCELIST = [429, 500, 502, 503, 504]
|
||||
|
||||
_SSL_VERIFIED_POOL_KEY = "ssrf:verified"
|
||||
_SSL_UNVERIFIED_POOL_KEY = "ssrf:unverified"
|
||||
_SSRF_CLIENT_LIMITS = httpx.Limits(
|
||||
max_connections=dify_config.SSRF_POOL_MAX_CONNECTIONS,
|
||||
max_keepalive_connections=dify_config.SSRF_POOL_MAX_KEEPALIVE_CONNECTIONS,
|
||||
keepalive_expiry=dify_config.SSRF_POOL_KEEPALIVE_EXPIRY,
|
||||
)
|
||||
|
||||
|
||||
class MaxRetriesExceededError(ValueError):
|
||||
"""Raised when the maximum number of retries is exceeded."""
|
||||
@@ -36,6 +32,45 @@ class MaxRetriesExceededError(ValueError):
|
||||
pass
|
||||
|
||||
|
||||
def _create_proxy_mounts() -> dict[str, httpx.HTTPTransport]:
|
||||
return {
|
||||
"http://": httpx.HTTPTransport(
|
||||
proxy=dify_config.SSRF_PROXY_HTTP_URL,
|
||||
),
|
||||
"https://": httpx.HTTPTransport(
|
||||
proxy=dify_config.SSRF_PROXY_HTTPS_URL,
|
||||
),
|
||||
}
|
||||
|
||||
|
||||
def _build_ssrf_client(verify: bool) -> httpx.Client:
|
||||
if dify_config.SSRF_PROXY_ALL_URL:
|
||||
return httpx.Client(
|
||||
proxy=dify_config.SSRF_PROXY_ALL_URL,
|
||||
verify=verify,
|
||||
limits=_SSRF_CLIENT_LIMITS,
|
||||
)
|
||||
|
||||
if dify_config.SSRF_PROXY_HTTP_URL and dify_config.SSRF_PROXY_HTTPS_URL:
|
||||
return httpx.Client(
|
||||
mounts=_create_proxy_mounts(),
|
||||
verify=verify,
|
||||
limits=_SSRF_CLIENT_LIMITS,
|
||||
)
|
||||
|
||||
return httpx.Client(verify=verify, limits=_SSRF_CLIENT_LIMITS)
|
||||
|
||||
|
||||
def _get_ssrf_client(ssl_verify_enabled: bool) -> httpx.Client:
|
||||
if not isinstance(ssl_verify_enabled, bool):
|
||||
raise ValueError("SSRF client verify flag must be a boolean")
|
||||
|
||||
return get_pooled_http_client(
|
||||
_SSL_VERIFIED_POOL_KEY if ssl_verify_enabled else _SSL_UNVERIFIED_POOL_KEY,
|
||||
lambda: _build_ssrf_client(verify=ssl_verify_enabled),
|
||||
)
|
||||
|
||||
|
||||
def make_request(method, url, max_retries=SSRF_DEFAULT_MAX_RETRIES, **kwargs):
|
||||
if "allow_redirects" in kwargs:
|
||||
allow_redirects = kwargs.pop("allow_redirects")
|
||||
@@ -50,33 +85,22 @@ def make_request(method, url, max_retries=SSRF_DEFAULT_MAX_RETRIES, **kwargs):
|
||||
write=dify_config.SSRF_DEFAULT_WRITE_TIME_OUT,
|
||||
)
|
||||
|
||||
if "ssl_verify" not in kwargs:
|
||||
kwargs["ssl_verify"] = http_request_node_ssl_verify
|
||||
|
||||
ssl_verify = kwargs.pop("ssl_verify")
|
||||
# prioritize per-call option, which can be switched on and off inside the HTTP node on the web UI
|
||||
verify_option = kwargs.pop("ssl_verify", dify_config.HTTP_REQUEST_NODE_SSL_VERIFY)
|
||||
client = _get_ssrf_client(verify_option)
|
||||
|
||||
retries = 0
|
||||
while retries <= max_retries:
|
||||
try:
|
||||
if dify_config.SSRF_PROXY_ALL_URL:
|
||||
with httpx.Client(proxy=dify_config.SSRF_PROXY_ALL_URL, verify=ssl_verify) as client:
|
||||
response = client.request(method=method, url=url, **kwargs)
|
||||
elif dify_config.SSRF_PROXY_HTTP_URL and dify_config.SSRF_PROXY_HTTPS_URL:
|
||||
proxy_mounts = {
|
||||
"http://": httpx.HTTPTransport(proxy=dify_config.SSRF_PROXY_HTTP_URL, verify=ssl_verify),
|
||||
"https://": httpx.HTTPTransport(proxy=dify_config.SSRF_PROXY_HTTPS_URL, verify=ssl_verify),
|
||||
}
|
||||
with httpx.Client(mounts=proxy_mounts, verify=ssl_verify) as client:
|
||||
response = client.request(method=method, url=url, **kwargs)
|
||||
else:
|
||||
with httpx.Client(verify=ssl_verify) as client:
|
||||
response = client.request(method=method, url=url, **kwargs)
|
||||
response = client.request(method=method, url=url, **kwargs)
|
||||
|
||||
if response.status_code not in STATUS_FORCELIST:
|
||||
return response
|
||||
else:
|
||||
logger.warning(
|
||||
"Received status code %s for URL %s which is in the force list", response.status_code, url
|
||||
"Received status code %s for URL %s which is in the force list",
|
||||
response.status_code,
|
||||
url,
|
||||
)
|
||||
|
||||
except httpx.RequestError as e:
|
||||
|
||||
@@ -859,6 +859,10 @@ OWNER_TRANSFER_TOKEN_EXPIRY_MINUTES=5
|
||||
# The sandbox service endpoint.
|
||||
CODE_EXECUTION_ENDPOINT=http://sandbox:8194
|
||||
CODE_EXECUTION_API_KEY=dify-sandbox
|
||||
CODE_EXECUTION_SSL_VERIFY=True
|
||||
CODE_EXECUTION_POOL_MAX_CONNECTIONS=100
|
||||
CODE_EXECUTION_POOL_MAX_KEEPALIVE_CONNECTIONS=20
|
||||
CODE_EXECUTION_POOL_KEEPALIVE_EXPIRY=5.0
|
||||
CODE_MAX_NUMBER=9223372036854775807
|
||||
CODE_MIN_NUMBER=-9223372036854775808
|
||||
CODE_MAX_DEPTH=5
|
||||
@@ -1134,6 +1138,9 @@ SSRF_DEFAULT_TIME_OUT=5
|
||||
SSRF_DEFAULT_CONNECT_TIME_OUT=5
|
||||
SSRF_DEFAULT_READ_TIME_OUT=5
|
||||
SSRF_DEFAULT_WRITE_TIME_OUT=5
|
||||
SSRF_POOL_MAX_CONNECTIONS=100
|
||||
SSRF_POOL_MAX_KEEPALIVE_CONNECTIONS=20
|
||||
SSRF_POOL_KEEPALIVE_EXPIRY=5.0
|
||||
|
||||
# ------------------------------
|
||||
# docker env var for specifying vector db type at startup
|
||||
|
||||
@@ -382,6 +382,10 @@ x-shared-env: &shared-api-worker-env
|
||||
OWNER_TRANSFER_TOKEN_EXPIRY_MINUTES: ${OWNER_TRANSFER_TOKEN_EXPIRY_MINUTES:-5}
|
||||
CODE_EXECUTION_ENDPOINT: ${CODE_EXECUTION_ENDPOINT:-http://sandbox:8194}
|
||||
CODE_EXECUTION_API_KEY: ${CODE_EXECUTION_API_KEY:-dify-sandbox}
|
||||
CODE_EXECUTION_SSL_VERIFY: ${CODE_EXECUTION_SSL_VERIFY:-True}
|
||||
CODE_EXECUTION_POOL_MAX_CONNECTIONS: ${CODE_EXECUTION_POOL_MAX_CONNECTIONS:-100}
|
||||
CODE_EXECUTION_POOL_MAX_KEEPALIVE_CONNECTIONS: ${CODE_EXECUTION_POOL_MAX_KEEPALIVE_CONNECTIONS:-20}
|
||||
CODE_EXECUTION_POOL_KEEPALIVE_EXPIRY: ${CODE_EXECUTION_POOL_KEEPALIVE_EXPIRY:-5.0}
|
||||
CODE_MAX_NUMBER: ${CODE_MAX_NUMBER:-9223372036854775807}
|
||||
CODE_MIN_NUMBER: ${CODE_MIN_NUMBER:--9223372036854775808}
|
||||
CODE_MAX_DEPTH: ${CODE_MAX_DEPTH:-5}
|
||||
@@ -497,6 +501,9 @@ x-shared-env: &shared-api-worker-env
|
||||
SSRF_DEFAULT_CONNECT_TIME_OUT: ${SSRF_DEFAULT_CONNECT_TIME_OUT:-5}
|
||||
SSRF_DEFAULT_READ_TIME_OUT: ${SSRF_DEFAULT_READ_TIME_OUT:-5}
|
||||
SSRF_DEFAULT_WRITE_TIME_OUT: ${SSRF_DEFAULT_WRITE_TIME_OUT:-5}
|
||||
SSRF_POOL_MAX_CONNECTIONS: ${SSRF_POOL_MAX_CONNECTIONS:-100}
|
||||
SSRF_POOL_MAX_KEEPALIVE_CONNECTIONS: ${SSRF_POOL_MAX_KEEPALIVE_CONNECTIONS:-20}
|
||||
SSRF_POOL_KEEPALIVE_EXPIRY: ${SSRF_POOL_KEEPALIVE_EXPIRY:-5.0}
|
||||
EXPOSE_NGINX_PORT: ${EXPOSE_NGINX_PORT:-80}
|
||||
EXPOSE_NGINX_SSL_PORT: ${EXPOSE_NGINX_SSL_PORT:-443}
|
||||
POSITION_TOOL_PINS: ${POSITION_TOOL_PINS:-}
|
||||
|
||||
Reference in New Issue
Block a user