Compare commits

...

10 Commits

Author SHA1 Message Date
takatost
ea35f1dce1 feat: bump version to 0.3.31-fix3 (#1606) 2023-11-22 19:40:52 +08:00
Jyong
a5b80c9d1f Fix/multi thread parameter (#1604) 2023-11-22 18:31:29 +08:00
Jyong
f704094a5f fix hybrid search when document is none (#1603)
Co-authored-by: jyong <jyong@dify.ai>
2023-11-22 17:53:42 +08:00
takatost
1f58f15bff feat: optimize db connections in thread (#1601) 2023-11-22 16:55:59 +08:00
Jyong
b930716745 fix weaviate hybrid search issue (#1600)
Co-authored-by: jyong <jyong@dify.ai>
2023-11-22 16:41:20 +08:00
zxhlyh
9587479b76 fix: chat token spent info style (#1597) 2023-11-22 15:22:50 +08:00
waltcow
3c0fbf3a6a fix sql transaction error in statistic API (#1586) 2023-11-22 14:28:21 +08:00
takatost
caa330c91f feat: bump version to 0.3.31-fix2 (#1592) 2023-11-22 01:53:40 +08:00
takatost
4a55d5729d feat: add anthropic claude-2.1 support (#1591) 2023-11-22 01:46:19 +08:00
Joel
d6a6697891 fix: safari can not in (#1590) 2023-11-21 20:25:23 +08:00
23 changed files with 159 additions and 149 deletions

View File

@@ -62,16 +62,15 @@ class DailyConversationStatistic(Resource):
sql_query += ' GROUP BY date order by date'
with db.engine.begin() as conn:
rs = conn.execute(db.text(sql_query), arg_dict)
response_data = []
for i in rs:
response_data.append({
'date': str(i.date),
'conversation_count': i.conversation_count
})
with db.engine.begin() as conn:
rs = conn.execute(db.text(sql_query), arg_dict)
for i in rs:
response_data.append({
'date': str(i.date),
'conversation_count': i.conversation_count
})
return jsonify({
'data': response_data
@@ -124,16 +123,15 @@ class DailyTerminalsStatistic(Resource):
sql_query += ' GROUP BY date order by date'
with db.engine.begin() as conn:
rs = conn.execute(db.text(sql_query), arg_dict)
response_data = []
for i in rs:
response_data.append({
'date': str(i.date),
'terminal_count': i.terminal_count
})
with db.engine.begin() as conn:
rs = conn.execute(db.text(sql_query), arg_dict)
for i in rs:
response_data.append({
'date': str(i.date),
'terminal_count': i.terminal_count
})
return jsonify({
'data': response_data
@@ -187,18 +185,17 @@ class DailyTokenCostStatistic(Resource):
sql_query += ' GROUP BY date order by date'
with db.engine.begin() as conn:
rs = conn.execute(db.text(sql_query), arg_dict)
response_data = []
for i in rs:
response_data.append({
'date': str(i.date),
'token_count': i.token_count,
'total_price': i.total_price,
'currency': 'USD'
})
with db.engine.begin() as conn:
rs = conn.execute(db.text(sql_query), arg_dict)
for i in rs:
response_data.append({
'date': str(i.date),
'token_count': i.token_count,
'total_price': i.total_price,
'currency': 'USD'
})
return jsonify({
'data': response_data
@@ -256,16 +253,15 @@ LEFT JOIN conversations c on c.id=subquery.conversation_id
GROUP BY date
ORDER BY date"""
response_data = []
with db.engine.begin() as conn:
rs = conn.execute(db.text(sql_query), arg_dict)
response_data = []
for i in rs:
response_data.append({
'date': str(i.date),
'interactions': float(i.interactions.quantize(Decimal('0.01')))
})
for i in rs:
response_data.append({
'date': str(i.date),
'interactions': float(i.interactions.quantize(Decimal('0.01')))
})
return jsonify({
'data': response_data
@@ -320,20 +316,19 @@ class UserSatisfactionRateStatistic(Resource):
sql_query += ' GROUP BY date order by date'
with db.engine.begin() as conn:
rs = conn.execute(db.text(sql_query), arg_dict)
response_data = []
for i in rs:
response_data.append({
'date': str(i.date),
'rate': round((i.feedback_count * 1000 / i.message_count) if i.message_count > 0 else 0, 2),
})
with db.engine.begin() as conn:
rs = conn.execute(db.text(sql_query), arg_dict)
for i in rs:
response_data.append({
'date': str(i.date),
'rate': round((i.feedback_count * 1000 / i.message_count) if i.message_count > 0 else 0, 2),
})
return jsonify({
'data': response_data
})
'data': response_data
})
class AverageResponseTimeStatistic(Resource):
@@ -383,16 +378,15 @@ class AverageResponseTimeStatistic(Resource):
sql_query += ' GROUP BY date order by date'
with db.engine.begin() as conn:
rs = conn.execute(db.text(sql_query), arg_dict)
response_data = []
for i in rs:
response_data.append({
'date': str(i.date),
'latency': round(i.latency * 1000, 4)
})
with db.engine.begin() as conn:
rs = conn.execute(db.text(sql_query), arg_dict)
for i in rs:
response_data.append({
'date': str(i.date),
'latency': round(i.latency * 1000, 4)
})
return jsonify({
'data': response_data
@@ -447,16 +441,15 @@ WHERE app_id = :app_id'''
sql_query += ' GROUP BY date order by date'
with db.engine.begin() as conn:
rs = conn.execute(db.text(sql_query), arg_dict)
response_data = []
for i in rs:
response_data.append({
'date': str(i.date),
'tps': round(i.tokens_per_second, 4)
})
with db.engine.begin() as conn:
rs = conn.execute(db.text(sql_query), arg_dict)
for i in rs:
response_data.append({
'date': str(i.date),
'tps': round(i.tokens_per_second, 4)
})
return jsonify({
'data': response_data

View File

@@ -111,7 +111,7 @@ class WeaviateVectorIndex(BaseVectorIndex):
if self._vector_store:
return self._vector_store
attributes = ['doc_id', 'dataset_id', 'document_id']
attributes = ['doc_id', 'dataset_id', 'document_id', 'doc_hash']
if self._is_origin():
attributes = ['doc_id']

View File

@@ -32,9 +32,12 @@ class AnthropicProvider(BaseModelProvider):
if model_type == ModelType.TEXT_GENERATION:
return [
{
'id': 'claude-instant-1',
'name': 'claude-instant-1',
'id': 'claude-2.1',
'name': 'claude-2.1',
'mode': ModelMode.CHAT.value,
'features': [
ModelFeature.AGENT_THOUGHT.value
]
},
{
'id': 'claude-2',
@@ -44,6 +47,11 @@ class AnthropicProvider(BaseModelProvider):
ModelFeature.AGENT_THOUGHT.value
]
},
{
'id': 'claude-instant-1',
'name': 'claude-instant-1',
'mode': ModelMode.CHAT.value,
},
]
else:
return []
@@ -73,12 +81,18 @@ class AnthropicProvider(BaseModelProvider):
:param model_type:
:return:
"""
model_max_tokens = {
'claude-instant-1': 100000,
'claude-2': 100000,
'claude-2.1': 200000,
}
return ModelKwargsRules(
temperature=KwargRule[float](min=0, max=1, default=1, precision=2),
top_p=KwargRule[float](min=0, max=1, default=0.7, precision=2),
presence_penalty=KwargRule[float](enabled=False),
frequency_penalty=KwargRule[float](enabled=False),
max_tokens=KwargRule[int](alias="max_tokens_to_sample", min=10, max=100000, default=256, precision=0),
max_tokens=KwargRule[int](alias="max_tokens_to_sample", min=10, max=model_max_tokens.get(model_name, 100000), default=256, precision=0),
)
@classmethod

View File

@@ -23,8 +23,14 @@
"currency": "USD"
},
"claude-2": {
"prompt": "11.02",
"completion": "32.68",
"prompt": "8.00",
"completion": "24.00",
"unit": "0.000001",
"currency": "USD"
},
"claude-2.1": {
"prompt": "8.00",
"completion": "24.00",
"unit": "0.000001",
"currency": "USD"
}

View File

@@ -207,10 +207,10 @@ class OrchestratorRuleParser:
).first()
if not dataset:
return None
continue
if dataset and dataset.available_document_count == 0 and dataset.available_document_count == 0:
return None
continue
dataset_ids.append(dataset.id)
if retrieval_model == 'single':
retrieval_model = dataset.retrieval_model if dataset.retrieval_model else default_retrieval_model

View File

@@ -1,7 +1,7 @@
from typing import Dict
from httpx import Limits
from langchain.chat_models import ChatAnthropic
from langchain.schema import ChatMessage, BaseMessage, HumanMessage, AIMessage, SystemMessage
from langchain.utils import get_from_dict_or_env, check_package_version
from pydantic import root_validator
@@ -29,8 +29,7 @@ class AnthropicLLM(ChatAnthropic):
base_url=values["anthropic_api_url"],
api_key=values["anthropic_api_key"],
timeout=values["default_request_timeout"],
max_retries=0,
connection_pool_limits=Limits(max_connections=200, max_keepalive_connections=100),
max_retries=0
)
values["async_client"] = anthropic.AsyncAnthropic(
base_url=values["anthropic_api_url"],
@@ -46,3 +45,16 @@ class AnthropicLLM(ChatAnthropic):
"Please it install it with `pip install anthropic`."
)
return values
def _convert_one_message_to_text(self, message: BaseMessage) -> str:
if isinstance(message, ChatMessage):
message_text = f"\n\n{message.role.capitalize()}: {message.content}"
elif isinstance(message, HumanMessage):
message_text = f"{self.HUMAN_PROMPT} {message.content}"
elif isinstance(message, AIMessage):
message_text = f"{self.AI_PROMPT} {message.content}"
elif isinstance(message, SystemMessage):
message_text = f"{message.content}"
else:
raise ValueError(f"Got unknown type {message}")
return message_text

View File

@@ -192,7 +192,7 @@ class DatasetMultiRetrieverTool(BaseTool):
'search_method'] == 'hybrid_search':
embedding_thread = threading.Thread(target=RetrievalService.embedding_search, kwargs={
'flask_app': current_app._get_current_object(),
'dataset': dataset,
'dataset_id': str(dataset.id),
'query': query,
'top_k': self.top_k,
'score_threshold': self.score_threshold,
@@ -210,7 +210,7 @@ class DatasetMultiRetrieverTool(BaseTool):
full_text_index_thread = threading.Thread(target=RetrievalService.full_text_index_search,
kwargs={
'flask_app': current_app._get_current_object(),
'dataset': dataset,
'dataset_id': str(dataset.id),
'query': query,
'search_method': 'hybrid_search',
'embeddings': embeddings,

View File

@@ -106,7 +106,7 @@ class DatasetRetrieverTool(BaseTool):
if retrieval_model['search_method'] == 'semantic_search' or retrieval_model['search_method'] == 'hybrid_search':
embedding_thread = threading.Thread(target=RetrievalService.embedding_search, kwargs={
'flask_app': current_app._get_current_object(),
'dataset': dataset,
'dataset_id': str(dataset.id),
'query': query,
'top_k': self.top_k,
'score_threshold': retrieval_model['score_threshold'] if retrieval_model[
@@ -124,7 +124,7 @@ class DatasetRetrieverTool(BaseTool):
if retrieval_model['search_method'] == 'full_text_search' or retrieval_model['search_method'] == 'hybrid_search':
full_text_index_thread = threading.Thread(target=RetrievalService.full_text_index_search, kwargs={
'flask_app': current_app._get_current_object(),
'dataset': dataset,
'dataset_id': str(dataset.id),
'query': query,
'search_method': retrieval_model['search_method'],
'embeddings': embeddings,

View File

@@ -60,7 +60,7 @@ def _create_weaviate_client(**kwargs: Any) -> Any:
def _default_score_normalizer(val: float) -> float:
return 1 - 1 / (1 + np.exp(val))
return 1 - val
def _json_serializable(value: Any) -> Any:
@@ -243,7 +243,8 @@ class Weaviate(VectorStore):
query_obj = query_obj.with_where(kwargs.get("where_filter"))
if kwargs.get("additional"):
query_obj = query_obj.with_additional(kwargs.get("additional"))
result = query_obj.with_bm25(query=content).with_limit(k).do()
properties = ['text', 'dataset_id', 'doc_hash', 'doc_id', 'document_id']
result = query_obj.with_bm25(query=query, properties=properties).with_limit(k).do()
if "errors" in result:
raise ValueError(f"Error during query: {result['errors']}")
docs = []
@@ -380,14 +381,14 @@ class Weaviate(VectorStore):
result = (
query_obj.with_near_vector(vector)
.with_limit(k)
.with_additional("vector")
.with_additional(["vector", "distance"])
.do()
)
else:
result = (
query_obj.with_near_text(content)
.with_limit(k)
.with_additional("vector")
.with_additional(["vector", "distance"])
.do()
)
@@ -397,7 +398,7 @@ class Weaviate(VectorStore):
docs_and_scores = []
for res in result["data"]["Get"][self._index_name]:
text = res.pop(self._text_key)
score = np.dot(res["_additional"]["vector"], embedded_query)
score = res["_additional"]["distance"]
docs_and_scores.append((Document(page_content=text, metadata=res), score))
return docs_and_scores

View File

@@ -1,4 +1,4 @@
from langchain.vectorstores import Weaviate
from core.vector_store.vector.weaviate import Weaviate
class WeaviateVectorStore(Weaviate):

View File

@@ -35,7 +35,7 @@ docx2txt==0.8
pypdfium2==4.16.0
resend~=0.5.1
pyjwt~=2.6.0
anthropic~=0.3.4
anthropic~=0.7.2
newspaper3k==0.2.8
google-api-python-client==2.90.0
wikipedia==1.4.0

View File

@@ -232,7 +232,7 @@ class CompletionService:
logging.exception("Unknown Error in completion")
PubHandler.pub_error(user, generate_task_id, e)
finally:
db.session.commit()
db.session.remove()
@classmethod
def countdown_and_close(cls, flask_app: Flask, worker_thread, pubsub, detached_user,
@@ -242,22 +242,25 @@ class CompletionService:
def close_pubsub():
with flask_app.app_context():
user = db.session.merge(detached_user)
try:
user = db.session.merge(detached_user)
sleep_iterations = 0
while sleep_iterations < timeout and worker_thread.is_alive():
if sleep_iterations > 0 and sleep_iterations % 10 == 0:
PubHandler.ping(user, generate_task_id)
sleep_iterations = 0
while sleep_iterations < timeout and worker_thread.is_alive():
if sleep_iterations > 0 and sleep_iterations % 10 == 0:
PubHandler.ping(user, generate_task_id)
time.sleep(1)
sleep_iterations += 1
time.sleep(1)
sleep_iterations += 1
if worker_thread.is_alive():
PubHandler.stop(user, generate_task_id)
try:
pubsub.close()
except Exception:
pass
if worker_thread.is_alive():
PubHandler.stop(user, generate_task_id)
try:
pubsub.close()
except Exception:
pass
finally:
db.session.remove()
countdown_thread = threading.Thread(target=close_pubsub)
countdown_thread.start()
@@ -394,7 +397,7 @@ class CompletionService:
logging.exception(e)
raise
finally:
db.session.commit()
db.session.remove()
try:
pubsub.unsubscribe(generate_channel)
@@ -436,7 +439,7 @@ class CompletionService:
logging.exception(e)
raise
finally:
db.session.commit()
db.session.remove()
try:
pubsub.unsubscribe(generate_channel)

View File

@@ -61,7 +61,7 @@ class HitTestingService:
if retrieval_model['search_method'] == 'semantic_search' or retrieval_model['search_method'] == 'hybrid_search':
embedding_thread = threading.Thread(target=RetrievalService.embedding_search, kwargs={
'flask_app': current_app._get_current_object(),
'dataset': dataset,
'dataset_id': str(dataset.id),
'query': query,
'top_k': retrieval_model['top_k'],
'score_threshold': retrieval_model['score_threshold'] if retrieval_model['score_threshold_enable'] else None,
@@ -77,7 +77,7 @@ class HitTestingService:
if retrieval_model['search_method'] == 'full_text_search' or retrieval_model['search_method'] == 'hybrid_search':
full_text_index_thread = threading.Thread(target=RetrievalService.full_text_index_search, kwargs={
'flask_app': current_app._get_current_object(),
'dataset': dataset,
'dataset_id': str(dataset.id),
'query': query,
'search_method': retrieval_model['search_method'],
'embeddings': embeddings,

View File

@@ -4,6 +4,7 @@ from flask import current_app, Flask
from langchain.embeddings.base import Embeddings
from core.index.vector_index.vector_index import VectorIndex
from core.model_providers.model_factory import ModelFactory
from extensions.ext_database import db
from models.dataset import Dataset
default_retrieval_model = {
@@ -21,10 +22,13 @@ default_retrieval_model = {
class RetrievalService:
@classmethod
def embedding_search(cls, flask_app: Flask, dataset: Dataset, query: str,
def embedding_search(cls, flask_app: Flask, dataset_id: str, query: str,
top_k: int, score_threshold: Optional[float], reranking_model: Optional[dict],
all_documents: list, search_method: str, embeddings: Embeddings):
with flask_app.app_context():
dataset = db.session.query(Dataset).filter(
Dataset.id == dataset_id
).first()
vector_index = VectorIndex(
dataset=dataset,
@@ -56,10 +60,13 @@ class RetrievalService:
all_documents.extend(documents)
@classmethod
def full_text_index_search(cls, flask_app: Flask, dataset: Dataset, query: str,
def full_text_index_search(cls, flask_app: Flask, dataset_id: str, query: str,
top_k: int, score_threshold: Optional[float], reranking_model: Optional[dict],
all_documents: list, search_method: str, embeddings: Embeddings):
with flask_app.app_context():
dataset = db.session.query(Dataset).filter(
Dataset.id == dataset_id
).first()
vector_index = VectorIndex(
dataset=dataset,

View File

@@ -31,12 +31,12 @@ def mock_chat_generate_invalid(messages: List[BaseMessage],
run_manager: Optional[CallbackManagerForLLMRun] = None,
**kwargs: Any):
raise anthropic.APIStatusError('Invalid credentials',
request=httpx._models.Request(
method='POST',
url='https://api.anthropic.com/v1/completions',
),
response=httpx._models.Response(
status_code=401,
request=httpx._models.Request(
method='POST',
url='https://api.anthropic.com/v1/completions',
)
),
body=None
)

View File

@@ -30,7 +30,7 @@ services:
# The Weaviate vector store.
weaviate:
image: semitechnologies/weaviate:1.18.4
image: semitechnologies/weaviate:1.19.0
restart: always
volumes:
# Mount the Weaviate data directory to the container.
@@ -63,4 +63,4 @@ services:
# environment:
# QDRANT__API_KEY: 'difyai123456'
# ports:
# - "6333:6333"
# - "6333:6333"

View File

@@ -2,7 +2,7 @@ version: '3.1'
services:
# API service
api:
image: langgenius/dify-api:0.3.31-fix1
image: langgenius/dify-api:0.3.31-fix3
restart: always
environment:
# Startup mode, 'api' starts the API server.
@@ -128,7 +128,7 @@ services:
# worker service
# The Celery worker for processing the queue.
worker:
image: langgenius/dify-api:0.3.31-fix1
image: langgenius/dify-api:0.3.31-fix3
restart: always
environment:
# Startup mode, 'worker' starts the Celery worker for processing the queue.
@@ -196,7 +196,7 @@ services:
# Frontend web application.
web:
image: langgenius/dify-web:0.3.31-fix1
image: langgenius/dify-web:0.3.31-fix3
restart: always
environment:
EDITION: SELF_HOSTED
@@ -253,7 +253,7 @@ services:
# The Weaviate vector store.
weaviate:
image: semitechnologies/weaviate:1.18.4
image: semitechnologies/weaviate:1.19.0
restart: always
volumes:
# Mount the Weaviate data directory to the container.

View File

@@ -194,7 +194,7 @@ const Answer: FC<IAnswerProps> = ({
</div>
)
}
<div className={cn(s.answerWrapWrap, 'chat-answer-container')}>
<div className={cn(s.answerWrapWrap, 'chat-answer-container group')}>
<div className={`${s.answerWrap} ${showEdit ? 'w-full' : ''}`}>
<div className={`${s.answer} relative text-sm text-gray-900`}>
<div className={'ml-2 py-3 px-4 bg-gray-100 rounded-tr-2xl rounded-b-2xl'}>
@@ -280,7 +280,7 @@ const Answer: FC<IAnswerProps> = ({
{!feedbackDisabled && renderFeedbackRating(feedback?.rating, !isHideFeedbackEdit, displayScene !== 'console')}
</div>
</div>
{more && <MoreInfo more={more} isQuestion={false} />}
{more && <MoreInfo className='hidden group-hover:block' more={more} isQuestion={false} />}
</div>
</div>
</div>

View File

@@ -5,11 +5,15 @@ import { useTranslation } from 'react-i18next'
import type { MessageMore } from '../type'
import { formatNumber } from '@/utils/format'
export type IMoreInfoProps = { more: MessageMore; isQuestion: boolean }
export type IMoreInfoProps = {
more: MessageMore
isQuestion: boolean
className?: string
}
const MoreInfo: FC<IMoreInfoProps> = ({ more, isQuestion }) => {
const MoreInfo: FC<IMoreInfoProps> = ({ more, isQuestion, className }) => {
const { t } = useTranslation()
return (<div className={`mt-1 w-full text-xs text-gray-400 !text-right ${isQuestion ? 'mr-2 text-right ' : 'ml-2 text-left float-right'}`}>
return (<div className={`mt-1 w-full text-xs text-gray-400 ${isQuestion ? 'mr-2 text-right ' : 'pl-2 text-left float-right'} ${className}`}>
<span>{`${t('appLog.detail.timeConsuming')} ${more.latency}${t('appLog.detail.second')}`}</span>
<span>{`${t('appLog.detail.tokenCost')} ${formatNumber(more.tokens)}`}</span>
<span>· </span>

4
web/global.d.ts vendored
View File

@@ -1,5 +1,3 @@
declare module 'lamejs';
declare module 'react-18-input-autosize';
declare module 'fetch-readablestream' {
export default function fetchReadableStream(url: string, options?: RequestInit): Promise<Response>
}

View File

@@ -36,7 +36,6 @@
"echarts": "^5.4.1",
"echarts-for-react": "^3.0.2",
"emoji-mart": "^5.5.2",
"fetch-readablestream": "^0.2.0",
"i18next": "^22.4.13",
"i18next-resources-to-backend": "^1.1.3",
"immer": "^9.0.19",

View File

@@ -1,11 +1,8 @@
import fetchStream from 'fetch-readablestream'
import { API_PREFIX, IS_CE_EDITION, PUBLIC_API_PREFIX } from '@/config'
import Toast from '@/app/components/base/toast'
import type { MessageEnd, MessageReplace, ThoughtItem } from '@/app/components/app/chat/type'
import { isSupportNativeFetchStream } from '@/utils/stream'
const TIME_OUT = 100000
const supportNativeFetchStream = isSupportNativeFetchStream()
const ContentType = {
json: 'application/json',
@@ -223,9 +220,6 @@ const baseFetch = <T>(
if (body && bodyStringify)
options.body = JSON.stringify(body)
// for those do not support native fetch stream, we use fetch-readablestream as polyfill
const doFetch = supportNativeFetchStream ? globalThis.fetch : fetchStream
// Handle timeout
return Promise.race([
new Promise((resolve, reject) => {
@@ -234,7 +228,7 @@ const baseFetch = <T>(
}, TIME_OUT)
}),
new Promise((resolve, reject) => {
doFetch(urlWithPrefix, options as RequestInit)
globalThis.fetch(urlWithPrefix, options as RequestInit)
.then((res) => {
const resClone = res.clone()
// Error handler

View File

@@ -1,21 +0,0 @@
// https://developer.chrome.com/articles/fetch-streaming-requests/#feature-detection
export const isSupportNativeFetchStream = () => {
const supportsRequestStreams = (() => {
let duplexAccessed = false
const params = {
body: new ReadableStream(),
method: 'POST',
get duplex() {
duplexAccessed = true
return 'half'
},
}
const hasContentType = new Request('', params).headers.has('Content-Type')
return duplexAccessed && !hasContentType
})()
return supportsRequestStreams
}