mirror of
https://github.com/langgenius/dify.git
synced 2026-01-14 18:59:49 +00:00
Compare commits
91 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
f7939c758f | ||
|
|
bf7045566d | ||
|
|
ebd11e7482 | ||
|
|
94626487db | ||
|
|
24bdedf802 | ||
|
|
0025ba4921 | ||
|
|
7c0676343f | ||
|
|
1fe4e3afde | ||
|
|
9dee9e7ade | ||
|
|
33901384c6 | ||
|
|
7a221d0858 | ||
|
|
60ee98f578 | ||
|
|
5b24d7129e | ||
|
|
b8592ad412 | ||
|
|
e696b72f08 | ||
|
|
344821ed35 | ||
|
|
126b4c332f | ||
|
|
c32c177e15 | ||
|
|
853cdd741f | ||
|
|
69d42ae95b | ||
|
|
5ff701ca3f | ||
|
|
9f58912fd7 | ||
|
|
0c746f5c5a | ||
|
|
a8cedea15a | ||
|
|
87832ede17 | ||
|
|
4d99c689f0 | ||
|
|
28b26f67e2 | ||
|
|
b934232411 | ||
|
|
2f120786fd | ||
|
|
6075fee556 | ||
|
|
de584807e1 | ||
|
|
a1285cbf15 | ||
|
|
cf1f6f3961 | ||
|
|
f4d97ef9fa | ||
|
|
28883e80d4 | ||
|
|
a0f74cdd9d | ||
|
|
296bf443a8 | ||
|
|
af7be9bdd7 | ||
|
|
2cfd5568e1 | ||
|
|
faf40a42bc | ||
|
|
97c972f14d | ||
|
|
3fa5204b0c | ||
|
|
5a756ca981 | ||
|
|
01f9feff9f | ||
|
|
2757494265 | ||
|
|
b88a8f7bb1 | ||
|
|
b4225bedb5 | ||
|
|
a82b4d315a | ||
|
|
3d92784bd4 | ||
|
|
c06e766d7e | ||
|
|
4a3d15b6de | ||
|
|
a798dcfae9 | ||
|
|
b4a170cb8a | ||
|
|
665318da3d | ||
|
|
66cdf577f5 | ||
|
|
891218615e | ||
|
|
a938e1f184 | ||
|
|
7c7ee633c1 | ||
|
|
18af84e193 | ||
|
|
025b859c7e | ||
|
|
0e239a4f71 | ||
|
|
ca85b0afbe | ||
|
|
a0a9461f79 | ||
|
|
6a2eb5f442 | ||
|
|
0c5892bcb6 | ||
|
|
91ff07fcf7 | ||
|
|
bb7af56e69 | ||
|
|
77f9e8ce0f | ||
|
|
5ca4c4a44d | ||
|
|
a44022c388 | ||
|
|
6333cf43a8 | ||
|
|
91ee62d1ab | ||
|
|
ede69b4659 | ||
|
|
61aaeff413 | ||
|
|
4e1cd75f6f | ||
|
|
a8ff2e95da | ||
|
|
4d502ea44d | ||
|
|
66b3588897 | ||
|
|
9134849744 | ||
|
|
fcf8512956 | ||
|
|
ae975b10e9 | ||
|
|
b43f1441a9 | ||
|
|
5a2aa83030 | ||
|
|
4de27d0404 | ||
|
|
c6d59681ff | ||
|
|
3b668c0bb1 | ||
|
|
4aed1fe8a8 | ||
|
|
2381264a3f | ||
|
|
4562e83b24 | ||
|
|
7be77c19f5 | ||
|
|
82247c0f14 |
94
.github/ISSUE_TEMPLATE/bug_report.yml
vendored
94
.github/ISSUE_TEMPLATE/bug_report.yml
vendored
@@ -1,56 +1,56 @@
|
||||
name: "🕷️ Bug report"
|
||||
description: Report errors or unexpected behavior
|
||||
labels:
|
||||
- bug
|
||||
- bug
|
||||
body:
|
||||
- type: checkboxes
|
||||
attributes:
|
||||
label: Self Checks
|
||||
description: "To make sure we get to you in time, please check the following :)"
|
||||
options:
|
||||
- label: I have searched for existing issues [search for existing issues](https://github.com/langgenius/dify/issues), including closed ones.
|
||||
required: true
|
||||
- label: I confirm that I am using English to file this report (我已阅读并同意 [Language Policy](https://github.com/langgenius/dify/issues/1542)).
|
||||
required: true
|
||||
- type: checkboxes
|
||||
attributes:
|
||||
label: Self Checks
|
||||
description: "To make sure we get to you in time, please check the following :)"
|
||||
options:
|
||||
- label: I have searched for existing issues [search for existing issues](https://github.com/langgenius/dify/issues), including closed ones.
|
||||
required: true
|
||||
- label: I confirm that I am using English to file this report (我已阅读并同意 [Language Policy](https://github.com/langgenius/dify/issues/1542)).
|
||||
required: true
|
||||
|
||||
- type: input
|
||||
attributes:
|
||||
label: Dify version
|
||||
placeholder: 0.3.21
|
||||
description: See about section in Dify console
|
||||
validations:
|
||||
required: true
|
||||
- type: input
|
||||
attributes:
|
||||
label: Dify version
|
||||
placeholder: 0.3.21
|
||||
description: See about section in Dify console
|
||||
validations:
|
||||
required: true
|
||||
|
||||
- type: dropdown
|
||||
attributes:
|
||||
label: Cloud or Self Hosted
|
||||
description: How / Where was Dify installed from?
|
||||
multiple: true
|
||||
options:
|
||||
- Cloud
|
||||
- Self Hosted (Docker)
|
||||
- Self Hosted (Source)
|
||||
validations:
|
||||
required: true
|
||||
- type: dropdown
|
||||
attributes:
|
||||
label: Cloud or Self Hosted
|
||||
description: How / Where was Dify installed from?
|
||||
multiple: true
|
||||
options:
|
||||
- Cloud
|
||||
- Self Hosted (Docker)
|
||||
- Self Hosted (Source)
|
||||
validations:
|
||||
required: true
|
||||
|
||||
- type: textarea
|
||||
attributes:
|
||||
label: Steps to reproduce
|
||||
description: We highly suggest including screenshots and a bug report log.
|
||||
placeholder: Having detailed steps helps us reproduce the bug.
|
||||
validations:
|
||||
required: true
|
||||
- type: textarea
|
||||
attributes:
|
||||
label: Steps to reproduce
|
||||
description: We highly suggest including screenshots and a bug report log.
|
||||
placeholder: Having detailed steps helps us reproduce the bug.
|
||||
validations:
|
||||
required: true
|
||||
|
||||
- type: textarea
|
||||
attributes:
|
||||
label: ✔️ Expected Behavior
|
||||
placeholder: What were you expecting?
|
||||
validations:
|
||||
required: false
|
||||
- type: textarea
|
||||
attributes:
|
||||
label: ✔️ Expected Behavior
|
||||
placeholder: What were you expecting?
|
||||
validations:
|
||||
required: false
|
||||
|
||||
- type: textarea
|
||||
attributes:
|
||||
label: ❌ Actual Behavior
|
||||
placeholder: What happened instead?
|
||||
validations:
|
||||
required: false
|
||||
- type: textarea
|
||||
attributes:
|
||||
label: ❌ Actual Behavior
|
||||
placeholder: What happened instead?
|
||||
validations:
|
||||
required: false
|
||||
|
||||
2
.github/ISSUE_TEMPLATE/config.yml
vendored
2
.github/ISSUE_TEMPLATE/config.yml
vendored
@@ -5,4 +5,4 @@ contact_links:
|
||||
about: Documentation for users of Dify
|
||||
- name: "\U0001F4DA Dify dev documentation"
|
||||
url: https://docs.dify.ai/getting-started/install-self-hosted
|
||||
about: Documentation for people interested in developing and contributing for Dify
|
||||
about: Documentation for people interested in developing and contributing for Dify
|
||||
|
||||
33
.github/ISSUE_TEMPLATE/document_issue.yml
vendored
33
.github/ISSUE_TEMPLATE/document_issue.yml
vendored
@@ -1,19 +1,20 @@
|
||||
name: "📚 Documentation Issue"
|
||||
description: Report issues in our documentation
|
||||
labels:
|
||||
- ducumentation
|
||||
labels:
|
||||
- ducumentation
|
||||
body:
|
||||
- type: checkboxes
|
||||
attributes:
|
||||
label: Self Checks
|
||||
description: "To make sure we get to you in time, please check the following :)"
|
||||
- label: I have searched for existing issues [search for existing issues](https://github.com/langgenius/dify/issues), including closed ones.
|
||||
required: true
|
||||
- label: I confirm that I am using English to file this report (我已阅读并同意 [Language Policy](https://github.com/langgenius/dify/issues/1542)).
|
||||
required: true
|
||||
- type: textarea
|
||||
attributes:
|
||||
label: Provide a description of requested docs changes
|
||||
placeholder: Briefly describe which document needs to be corrected and why.
|
||||
validations:
|
||||
required: true
|
||||
- type: checkboxes
|
||||
attributes:
|
||||
label: Self Checks
|
||||
description: "To make sure we get to you in time, please check the following :)"
|
||||
options:
|
||||
- label: I have searched for existing issues [search for existing issues](https://github.com/langgenius/dify/issues), including closed ones.
|
||||
required: true
|
||||
- label: I confirm that I am using English to file this report (我已阅读并同意 [Language Policy](https://github.com/langgenius/dify/issues/1542)).
|
||||
required: true
|
||||
- type: textarea
|
||||
attributes:
|
||||
label: Provide a description of requested docs changes
|
||||
placeholder: Briefly describe which document needs to be corrected and why.
|
||||
validations:
|
||||
required: true
|
||||
|
||||
62
.github/ISSUE_TEMPLATE/feature_request.yml
vendored
62
.github/ISSUE_TEMPLATE/feature_request.yml
vendored
@@ -1,35 +1,35 @@
|
||||
name: "⭐ Feature or enhancement request"
|
||||
description: Propose something new.
|
||||
labels:
|
||||
- enhancement
|
||||
- enhancement
|
||||
body:
|
||||
- type: checkboxes
|
||||
attributes:
|
||||
label: Self Checks
|
||||
description: "To make sure we get to you in time, please check the following :)"
|
||||
options:
|
||||
- label: I have searched for existing issues [search for existing issues](https://github.com/langgenius/dify/issues), including closed ones.
|
||||
required: true
|
||||
- label: I confirm that I am using English to file this report (我已阅读并同意 [Language Policy](https://github.com/langgenius/dify/issues/1542)).
|
||||
required: true
|
||||
- type: textarea
|
||||
attributes:
|
||||
label: Description of the new feature / enhancement
|
||||
placeholder: What is the expected behavior of the proposed feature?
|
||||
validations:
|
||||
required: true
|
||||
- type: textarea
|
||||
attributes:
|
||||
label: Scenario when this would be used?
|
||||
placeholder: What is the scenario this would be used? Why is this important to your workflow as a dify user?
|
||||
validations:
|
||||
required: true
|
||||
- type: textarea
|
||||
attributes:
|
||||
label: Supporting information
|
||||
placeholder: "Having additional evidence, data, tweets, blog posts, research, ... anything is extremely helpful. This information provides context to the scenario that may otherwise be lost."
|
||||
validations:
|
||||
required: false
|
||||
- type: markdown
|
||||
attributes:
|
||||
value: Please limit one request per issue.
|
||||
- type: checkboxes
|
||||
attributes:
|
||||
label: Self Checks
|
||||
description: "To make sure we get to you in time, please check the following :)"
|
||||
options:
|
||||
- label: I have searched for existing issues [search for existing issues](https://github.com/langgenius/dify/issues), including closed ones.
|
||||
required: true
|
||||
- label: I confirm that I am using English to file this report (我已阅读并同意 [Language Policy](https://github.com/langgenius/dify/issues/1542)).
|
||||
required: true
|
||||
- type: textarea
|
||||
attributes:
|
||||
label: Description of the new feature / enhancement
|
||||
placeholder: What is the expected behavior of the proposed feature?
|
||||
validations:
|
||||
required: true
|
||||
- type: textarea
|
||||
attributes:
|
||||
label: Scenario when this would be used?
|
||||
placeholder: What is the scenario this would be used? Why is this important to your workflow as a dify user?
|
||||
validations:
|
||||
required: true
|
||||
- type: textarea
|
||||
attributes:
|
||||
label: Supporting information
|
||||
placeholder: "Having additional evidence, data, tweets, blog posts, research, ... anything is extremely helpful. This information provides context to the scenario that may otherwise be lost."
|
||||
validations:
|
||||
required: false
|
||||
- type: markdown
|
||||
attributes:
|
||||
value: Please limit one request per issue.
|
||||
|
||||
34
.github/ISSUE_TEMPLATE/help_wanted.yml
vendored
34
.github/ISSUE_TEMPLATE/help_wanted.yml
vendored
@@ -1,20 +1,20 @@
|
||||
name: "🤝 Help Wanted"
|
||||
description: "Request help from the community" [please use English :)]
|
||||
description: "Request help from the community [please use English :)]"
|
||||
labels:
|
||||
- help-wanted
|
||||
- help-wanted
|
||||
body:
|
||||
- type: checkboxes
|
||||
attributes:
|
||||
label: Self Checks
|
||||
description: "To make sure we get to you in time, please check the following :)"
|
||||
options:
|
||||
- label: I have searched for existing issues [search for existing issues](https://github.com/langgenius/dify/issues), including closed ones.
|
||||
required: true
|
||||
- label: I confirm that I am using English to file this report (我已阅读并同意 [Language Policy](https://github.com/langgenius/dify/issues/1542)).
|
||||
required: true
|
||||
- type: textarea
|
||||
attributes:
|
||||
label: Provide a description of the help you need
|
||||
placeholder: Briefly describe what you need help with.
|
||||
validations:
|
||||
required: true
|
||||
- type: checkboxes
|
||||
attributes:
|
||||
label: Self Checks
|
||||
description: "To make sure we get to you in time, please check the following :)"
|
||||
options:
|
||||
- label: I have searched for existing issues [search for existing issues](https://github.com/langgenius/dify/issues), including closed ones.
|
||||
required: true
|
||||
- label: I confirm that I am using English to file this report (我已阅读并同意 [Language Policy](https://github.com/langgenius/dify/issues/1542)).
|
||||
required: true
|
||||
- type: textarea
|
||||
attributes:
|
||||
label: Provide a description of the help you need
|
||||
placeholder: Briefly describe what you need help with.
|
||||
validations:
|
||||
required: true
|
||||
|
||||
96
.github/ISSUE_TEMPLATE/translation_issue.yml
vendored
96
.github/ISSUE_TEMPLATE/translation_issue.yml
vendored
@@ -1,52 +1,52 @@
|
||||
name: "🌐 Localization/Translation issue"
|
||||
description: Report incorrect translations. [please use English :)]
|
||||
labels:
|
||||
- translation
|
||||
- translation
|
||||
body:
|
||||
- type: checkboxes
|
||||
attributes:
|
||||
label: Self Checks
|
||||
description: "To make sure we get to you in time, please check the following :)"
|
||||
options:
|
||||
- label: I have searched for existing issues [search for existing issues](https://github.com/langgenius/dify/issues), including closed ones.
|
||||
required: true
|
||||
- label: I confirm that I am using English to file this report (我已阅读并同意 [Language Policy](https://github.com/langgenius/dify/issues/1542)).
|
||||
required: true
|
||||
- type: input
|
||||
attributes:
|
||||
label: Dify version
|
||||
placeholder: 0.3.21
|
||||
description: Hover over system tray icon or look at Settings
|
||||
validations:
|
||||
required: true
|
||||
- type: input
|
||||
attributes:
|
||||
label: Utility with translation issue
|
||||
placeholder: Some area
|
||||
description: Please input here the utility with the translation issue
|
||||
validations:
|
||||
required: true
|
||||
- type: input
|
||||
attributes:
|
||||
label: 🌐 Language affected
|
||||
placeholder: "German"
|
||||
validations:
|
||||
required: true
|
||||
- type: textarea
|
||||
attributes:
|
||||
label: ❌ Actual phrase(s)
|
||||
placeholder: What is there? Please include a screenshot as that is extremely helpful.
|
||||
validations:
|
||||
required: true
|
||||
- type: textarea
|
||||
attributes:
|
||||
label: ✔️ Expected phrase(s)
|
||||
placeholder: What was expected?
|
||||
validations:
|
||||
required: true
|
||||
- type: textarea
|
||||
attributes:
|
||||
label: ℹ Why is the current translation wrong
|
||||
placeholder: Why do you feel this is incorrect?
|
||||
validations:
|
||||
required: true
|
||||
- type: checkboxes
|
||||
attributes:
|
||||
label: Self Checks
|
||||
description: "To make sure we get to you in time, please check the following :)"
|
||||
options:
|
||||
- label: I have searched for existing issues [search for existing issues](https://github.com/langgenius/dify/issues), including closed ones.
|
||||
required: true
|
||||
- label: I confirm that I am using English to file this report (我已阅读并同意 [Language Policy](https://github.com/langgenius/dify/issues/1542)).
|
||||
required: true
|
||||
- type: input
|
||||
attributes:
|
||||
label: Dify version
|
||||
placeholder: 0.3.21
|
||||
description: Hover over system tray icon or look at Settings
|
||||
validations:
|
||||
required: true
|
||||
- type: input
|
||||
attributes:
|
||||
label: Utility with translation issue
|
||||
placeholder: Some area
|
||||
description: Please input here the utility with the translation issue
|
||||
validations:
|
||||
required: true
|
||||
- type: input
|
||||
attributes:
|
||||
label: 🌐 Language affected
|
||||
placeholder: "German"
|
||||
validations:
|
||||
required: true
|
||||
- type: textarea
|
||||
attributes:
|
||||
label: ❌ Actual phrase(s)
|
||||
placeholder: What is there? Please include a screenshot as that is extremely helpful.
|
||||
validations:
|
||||
required: true
|
||||
- type: textarea
|
||||
attributes:
|
||||
label: ✔️ Expected phrase(s)
|
||||
placeholder: What was expected?
|
||||
validations:
|
||||
required: true
|
||||
- type: textarea
|
||||
attributes:
|
||||
label: ℹ Why is the current translation wrong
|
||||
placeholder: Why do you feel this is incorrect?
|
||||
validations:
|
||||
required: true
|
||||
|
||||
1
.github/linters/.hadolint.yaml
vendored
Normal file
1
.github/linters/.hadolint.yaml
vendored
Normal file
@@ -0,0 +1 @@
|
||||
failure-threshold: "error"
|
||||
11
.github/linters/.yaml-lint.yml
vendored
Normal file
11
.github/linters/.yaml-lint.yml
vendored
Normal file
@@ -0,0 +1,11 @@
|
||||
---
|
||||
|
||||
extends: default
|
||||
|
||||
rules:
|
||||
brackets:
|
||||
max-spaces-inside: 1
|
||||
comments-indentation: disable
|
||||
document-start: disable
|
||||
line-length: disable
|
||||
truthy: disable
|
||||
33
.github/workflows/api-model-runtime-tests.yml
vendored
33
.github/workflows/api-model-runtime-tests.yml
vendored
@@ -31,28 +31,19 @@ jobs:
|
||||
HUGGINGFACE_EMBEDDINGS_ENDPOINT_URL: c
|
||||
MOCK_SWITCH: true
|
||||
|
||||
|
||||
steps:
|
||||
- name: Checkout code
|
||||
uses: actions/checkout@v2
|
||||
- name: Checkout code
|
||||
uses: actions/checkout@v4
|
||||
|
||||
- name: Set up Python
|
||||
uses: actions/setup-python@v2
|
||||
with:
|
||||
python-version: '3.10'
|
||||
- name: Set up Python
|
||||
uses: actions/setup-python@v5
|
||||
with:
|
||||
python-version: '3.10'
|
||||
cache: 'pip'
|
||||
cache-dependency-path: ./api/requirements.txt
|
||||
|
||||
- name: Cache pip dependencies
|
||||
uses: actions/cache@v2
|
||||
with:
|
||||
path: ~/.cache/pip
|
||||
key: ${{ runner.os }}-pip-${{ hashFiles('api/requirements.txt') }}
|
||||
restore-keys: ${{ runner.os }}-pip-
|
||||
- name: Install dependencies
|
||||
run: pip install -r ./api/requirements.txt
|
||||
|
||||
- name: Install dependencies
|
||||
run: |
|
||||
python -m pip install --upgrade pip
|
||||
pip install pytest
|
||||
pip install -r api/requirements.txt
|
||||
|
||||
- name: Run pytest
|
||||
run: pytest api/tests/integration_tests/model_runtime/anthropic api/tests/integration_tests/model_runtime/azure_openai api/tests/integration_tests/model_runtime/openai api/tests/integration_tests/model_runtime/chatglm api/tests/integration_tests/model_runtime/google api/tests/integration_tests/model_runtime/xinference api/tests/integration_tests/model_runtime/huggingface_hub/test_llm.py
|
||||
- name: Run pytest
|
||||
run: pytest api/tests/integration_tests/model_runtime/anthropic api/tests/integration_tests/model_runtime/azure_openai api/tests/integration_tests/model_runtime/openai api/tests/integration_tests/model_runtime/chatglm api/tests/integration_tests/model_runtime/google api/tests/integration_tests/model_runtime/xinference api/tests/integration_tests/model_runtime/huggingface_hub/test_llm.py
|
||||
|
||||
82
.github/workflows/build-api-image.yml
vendored
82
.github/workflows/build-api-image.yml
vendored
@@ -6,55 +6,55 @@ on:
|
||||
- 'main'
|
||||
- 'deploy/dev'
|
||||
release:
|
||||
types: [published]
|
||||
types: [ published ]
|
||||
|
||||
jobs:
|
||||
build-and-push:
|
||||
runs-on: ubuntu-latest
|
||||
if: github.event.pull_request.draft == false
|
||||
steps:
|
||||
- name: Set up QEMU
|
||||
uses: docker/setup-qemu-action@v3
|
||||
- name: Set up QEMU
|
||||
uses: docker/setup-qemu-action@v3
|
||||
|
||||
- name: Set up Docker Buildx
|
||||
uses: docker/setup-buildx-action@v3
|
||||
- name: Set up Docker Buildx
|
||||
uses: docker/setup-buildx-action@v3
|
||||
|
||||
- name: Login to Docker Hub
|
||||
uses: docker/login-action@v2
|
||||
with:
|
||||
username: ${{ secrets.DOCKERHUB_USER }}
|
||||
password: ${{ secrets.DOCKERHUB_TOKEN }}
|
||||
- name: Login to Docker Hub
|
||||
uses: docker/login-action@v2
|
||||
with:
|
||||
username: ${{ secrets.DOCKERHUB_USER }}
|
||||
password: ${{ secrets.DOCKERHUB_TOKEN }}
|
||||
|
||||
- name: Extract metadata (tags, labels) for Docker
|
||||
id: meta
|
||||
uses: docker/metadata-action@v5
|
||||
with:
|
||||
images: langgenius/dify-api
|
||||
tags: |
|
||||
type=raw,value=latest,enable=${{ startsWith(github.ref, 'refs/tags/') }}
|
||||
type=ref,event=branch
|
||||
type=sha,enable=true,priority=100,prefix=,suffix=,format=long
|
||||
type=raw,value=${{ github.ref_name }},enable=${{ startsWith(github.ref, 'refs/tags/') }}
|
||||
- name: Extract metadata (tags, labels) for Docker
|
||||
id: meta
|
||||
uses: docker/metadata-action@v5
|
||||
with:
|
||||
images: langgenius/dify-api
|
||||
tags: |
|
||||
type=raw,value=latest,enable=${{ startsWith(github.ref, 'refs/tags/') }}
|
||||
type=ref,event=branch
|
||||
type=sha,enable=true,priority=100,prefix=,suffix=,format=long
|
||||
type=raw,value=${{ github.ref_name }},enable=${{ startsWith(github.ref, 'refs/tags/') }}
|
||||
|
||||
- name: Build and push
|
||||
uses: docker/build-push-action@v5
|
||||
with:
|
||||
context: "{{defaultContext}}:api"
|
||||
platforms: ${{ startsWith(github.ref, 'refs/tags/') && 'linux/amd64,linux/arm64' || 'linux/amd64' }}
|
||||
build-args: |
|
||||
COMMIT_SHA=${{ fromJSON(steps.meta.outputs.json).labels['org.opencontainers.image.revision'] }}
|
||||
push: true
|
||||
tags: ${{ steps.meta.outputs.tags }}
|
||||
labels: ${{ steps.meta.outputs.labels }}
|
||||
cache-from: type=gha
|
||||
cache-to: type=gha,mode=max
|
||||
- name: Build and push
|
||||
uses: docker/build-push-action@v5
|
||||
with:
|
||||
context: "{{defaultContext}}:api"
|
||||
platforms: ${{ startsWith(github.ref, 'refs/tags/') && 'linux/amd64,linux/arm64' || 'linux/amd64' }}
|
||||
build-args: |
|
||||
COMMIT_SHA=${{ fromJSON(steps.meta.outputs.json).labels['org.opencontainers.image.revision'] }}
|
||||
push: true
|
||||
tags: ${{ steps.meta.outputs.tags }}
|
||||
labels: ${{ steps.meta.outputs.labels }}
|
||||
cache-from: type=gha
|
||||
cache-to: type=gha,mode=max
|
||||
|
||||
- name: Deploy to server
|
||||
if: github.ref == 'refs/heads/deploy/dev'
|
||||
uses: appleboy/ssh-action@v0.1.8
|
||||
with:
|
||||
host: ${{ secrets.SSH_HOST }}
|
||||
username: ${{ secrets.SSH_USER }}
|
||||
key: ${{ secrets.SSH_PRIVATE_KEY }}
|
||||
script: |
|
||||
${{ secrets.SSH_SCRIPT }}
|
||||
- name: Deploy to server
|
||||
if: github.ref == 'refs/heads/deploy/dev'
|
||||
uses: appleboy/ssh-action@v0.1.8
|
||||
with:
|
||||
host: ${{ secrets.SSH_HOST }}
|
||||
username: ${{ secrets.SSH_USER }}
|
||||
key: ${{ secrets.SSH_PRIVATE_KEY }}
|
||||
script: |
|
||||
${{ secrets.SSH_SCRIPT }}
|
||||
|
||||
82
.github/workflows/build-web-image.yml
vendored
82
.github/workflows/build-web-image.yml
vendored
@@ -6,55 +6,55 @@ on:
|
||||
- 'main'
|
||||
- 'deploy/dev'
|
||||
release:
|
||||
types: [published]
|
||||
types: [ published ]
|
||||
|
||||
jobs:
|
||||
build-and-push:
|
||||
runs-on: ubuntu-latest
|
||||
if: github.event.pull_request.draft == false
|
||||
steps:
|
||||
- name: Set up QEMU
|
||||
uses: docker/setup-qemu-action@v3
|
||||
- name: Set up QEMU
|
||||
uses: docker/setup-qemu-action@v3
|
||||
|
||||
- name: Set up Docker Buildx
|
||||
uses: docker/setup-buildx-action@v3
|
||||
- name: Set up Docker Buildx
|
||||
uses: docker/setup-buildx-action@v3
|
||||
|
||||
- name: Login to Docker Hub
|
||||
uses: docker/login-action@v2
|
||||
with:
|
||||
username: ${{ secrets.DOCKERHUB_USER }}
|
||||
password: ${{ secrets.DOCKERHUB_TOKEN }}
|
||||
- name: Login to Docker Hub
|
||||
uses: docker/login-action@v2
|
||||
with:
|
||||
username: ${{ secrets.DOCKERHUB_USER }}
|
||||
password: ${{ secrets.DOCKERHUB_TOKEN }}
|
||||
|
||||
- name: Extract metadata (tags, labels) for Docker
|
||||
id: meta
|
||||
uses: docker/metadata-action@v5
|
||||
with:
|
||||
images: langgenius/dify-web
|
||||
tags: |
|
||||
type=raw,value=latest,enable=${{ startsWith(github.ref, 'refs/tags/') }}
|
||||
type=ref,event=branch
|
||||
type=sha,enable=true,priority=100,prefix=,suffix=,format=long
|
||||
type=raw,value=${{ github.ref_name }},enable=${{ startsWith(github.ref, 'refs/tags/') }}
|
||||
- name: Extract metadata (tags, labels) for Docker
|
||||
id: meta
|
||||
uses: docker/metadata-action@v5
|
||||
with:
|
||||
images: langgenius/dify-web
|
||||
tags: |
|
||||
type=raw,value=latest,enable=${{ startsWith(github.ref, 'refs/tags/') }}
|
||||
type=ref,event=branch
|
||||
type=sha,enable=true,priority=100,prefix=,suffix=,format=long
|
||||
type=raw,value=${{ github.ref_name }},enable=${{ startsWith(github.ref, 'refs/tags/') }}
|
||||
|
||||
- name: Build and push
|
||||
uses: docker/build-push-action@v5
|
||||
with:
|
||||
context: "{{defaultContext}}:web"
|
||||
platforms: ${{ startsWith(github.ref, 'refs/tags/') && 'linux/amd64,linux/arm64' || 'linux/amd64' }}
|
||||
build-args: |
|
||||
COMMIT_SHA=${{ fromJSON(steps.meta.outputs.json).labels['org.opencontainers.image.revision'] }}
|
||||
push: true
|
||||
tags: ${{ steps.meta.outputs.tags }}
|
||||
labels: ${{ steps.meta.outputs.labels }}
|
||||
cache-from: type=gha
|
||||
cache-to: type=gha,mode=max
|
||||
- name: Build and push
|
||||
uses: docker/build-push-action@v5
|
||||
with:
|
||||
context: "{{defaultContext}}:web"
|
||||
platforms: ${{ startsWith(github.ref, 'refs/tags/') && 'linux/amd64,linux/arm64' || 'linux/amd64' }}
|
||||
build-args: |
|
||||
COMMIT_SHA=${{ fromJSON(steps.meta.outputs.json).labels['org.opencontainers.image.revision'] }}
|
||||
push: true
|
||||
tags: ${{ steps.meta.outputs.tags }}
|
||||
labels: ${{ steps.meta.outputs.labels }}
|
||||
cache-from: type=gha
|
||||
cache-to: type=gha,mode=max
|
||||
|
||||
- name: Deploy to server
|
||||
if: github.ref == 'refs/heads/deploy/dev'
|
||||
uses: appleboy/ssh-action@v0.1.8
|
||||
with:
|
||||
host: ${{ secrets.SSH_HOST }}
|
||||
username: ${{ secrets.SSH_USER }}
|
||||
key: ${{ secrets.SSH_PRIVATE_KEY }}
|
||||
script: |
|
||||
${{ secrets.SSH_SCRIPT }}
|
||||
- name: Deploy to server
|
||||
if: github.ref == 'refs/heads/deploy/dev'
|
||||
uses: appleboy/ssh-action@v0.1.8
|
||||
with:
|
||||
host: ${{ secrets.SSH_HOST }}
|
||||
username: ${{ secrets.SSH_USER }}
|
||||
key: ${{ secrets.SSH_PRIVATE_KEY }}
|
||||
script: |
|
||||
${{ secrets.SSH_SCRIPT }}
|
||||
|
||||
22
.github/workflows/stale.yml
vendored
22
.github/workflows/stale.yml
vendored
@@ -7,7 +7,7 @@ name: Mark stale issues and pull requests
|
||||
|
||||
on:
|
||||
schedule:
|
||||
- cron: '0 3 * * *'
|
||||
- cron: '0 3 * * *'
|
||||
|
||||
jobs:
|
||||
stale:
|
||||
@@ -18,13 +18,13 @@ jobs:
|
||||
pull-requests: write
|
||||
|
||||
steps:
|
||||
- uses: actions/stale@v5
|
||||
with:
|
||||
days-before-issue-stale: 15
|
||||
days-before-issue-close: 3
|
||||
repo-token: ${{ secrets.GITHUB_TOKEN }}
|
||||
stale-issue-message: "Close due to it's no longer active, if you have any questions, you can reopen it."
|
||||
stale-pr-message: "Close due to it's no longer active, if you have any questions, you can reopen it."
|
||||
stale-issue-label: 'no-issue-activity'
|
||||
stale-pr-label: 'no-pr-activity'
|
||||
any-of-labels: 'duplicate,question,invalid,wontfix,no-issue-activity,no-pr-activity,enhancement,cant-reproduce,help-wanted'
|
||||
- uses: actions/stale@v5
|
||||
with:
|
||||
days-before-issue-stale: 15
|
||||
days-before-issue-close: 3
|
||||
repo-token: ${{ secrets.GITHUB_TOKEN }}
|
||||
stale-issue-message: "Close due to it's no longer active, if you have any questions, you can reopen it."
|
||||
stale-pr-message: "Close due to it's no longer active, if you have any questions, you can reopen it."
|
||||
stale-issue-label: 'no-issue-activity'
|
||||
stale-pr-label: 'no-pr-activity'
|
||||
any-of-labels: 'duplicate,question,invalid,wontfix,no-issue-activity,no-pr-activity,enhancement,cant-reproduce,help-wanted'
|
||||
|
||||
54
.github/workflows/style.yml
vendored
Normal file
54
.github/workflows/style.yml
vendored
Normal file
@@ -0,0 +1,54 @@
|
||||
name: Style check
|
||||
|
||||
on:
|
||||
pull_request:
|
||||
branches:
|
||||
- main
|
||||
push:
|
||||
branches:
|
||||
- deploy/dev
|
||||
|
||||
concurrency:
|
||||
group: dep-${{ github.head_ref || github.run_id }}
|
||||
cancel-in-progress: true
|
||||
|
||||
jobs:
|
||||
test:
|
||||
name: ESLint and SuperLinter
|
||||
runs-on: ubuntu-latest
|
||||
|
||||
steps:
|
||||
- name: Checkout code
|
||||
uses: actions/checkout@v4
|
||||
|
||||
- name: Setup NodeJS
|
||||
uses: actions/setup-node@v4
|
||||
with:
|
||||
node-version: 18
|
||||
cache: yarn
|
||||
cache-dependency-path: ./web/package.json
|
||||
|
||||
- name: Web dependencies
|
||||
run: |
|
||||
cd ./web
|
||||
yarn install --frozen-lockfile
|
||||
|
||||
- name: Web style check
|
||||
run: |
|
||||
cd ./web
|
||||
yarn run lint
|
||||
|
||||
- name: Super-linter
|
||||
uses: super-linter/super-linter/slim@v5
|
||||
env:
|
||||
BASH_SEVERITY: warning
|
||||
DEFAULT_BRANCH: main
|
||||
ERROR_ON_MISSING_EXEC_BIT: true
|
||||
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
|
||||
IGNORE_GENERATED_FILES: true
|
||||
IGNORE_GITIGNORED_FILES: true
|
||||
VALIDATE_BASH: true
|
||||
VALIDATE_BASH_EXEC: true
|
||||
VALIDATE_GITHUB_ACTIONS: true
|
||||
VALIDATE_DOCKERFILE_HADOLINT: true
|
||||
VALIDATE_YAML: true
|
||||
@@ -91,7 +91,7 @@ After running, you can access the Dify dashboard in your browser at [http://loca
|
||||
|
||||
### Helm Chart
|
||||
|
||||
A big thanks to @BorisPolonsky for providing us with a [Helm Chart](https://helm.sh/) version, which allows Dify to be deployed on Kubernetes.
|
||||
Big thanks to @BorisPolonsky for providing us with a [Helm Chart](https://helm.sh/) version, which allows Dify to be deployed on Kubernetes.
|
||||
You can go to https://github.com/BorisPolonsky/dify-helm for deployment information.
|
||||
|
||||
### Configuration
|
||||
|
||||
@@ -65,6 +65,7 @@ WEAVIATE_BATCH_SIZE=100
|
||||
# Qdrant configuration, use `http://localhost:6333` for local mode or `https://your-qdrant-cluster-url.qdrant.io` for remote mode
|
||||
QDRANT_URL=http://localhost:6333
|
||||
QDRANT_API_KEY=difyai123456
|
||||
QDRANT_CLIENT_TIMEOUT=20
|
||||
|
||||
# Milvus configuration
|
||||
MILVUS_HOST=127.0.0.1
|
||||
@@ -85,6 +86,7 @@ MULTIMODAL_SEND_IMAGE_FORMAT=base64
|
||||
MAIL_TYPE=
|
||||
MAIL_DEFAULT_SEND_FROM=no-reply <no-reply@dify.ai>
|
||||
RESEND_API_KEY=
|
||||
RESEND_API_URL=https://api.resend.com
|
||||
|
||||
# Sentry configuration
|
||||
SENTRY_DSN=
|
||||
|
||||
@@ -36,6 +36,7 @@ DEFAULTS = {
|
||||
'SENTRY_PROFILES_SAMPLE_RATE': 1.0,
|
||||
'WEAVIATE_GRPC_ENABLED': 'True',
|
||||
'WEAVIATE_BATCH_SIZE': 100,
|
||||
'QDRANT_CLIENT_TIMEOUT': 20,
|
||||
'CELERY_BACKEND': 'database',
|
||||
'LOG_LEVEL': 'INFO',
|
||||
'HOSTED_OPENAI_QUOTA_LIMIT': 200,
|
||||
@@ -87,7 +88,7 @@ class Config:
|
||||
# ------------------------
|
||||
# General Configurations.
|
||||
# ------------------------
|
||||
self.CURRENT_VERSION = "0.4.0"
|
||||
self.CURRENT_VERSION = "0.4.5"
|
||||
self.COMMIT_SHA = get_env('COMMIT_SHA')
|
||||
self.EDITION = "SELF_HOSTED"
|
||||
self.DEPLOY_ENV = get_env('DEPLOY_ENV')
|
||||
@@ -197,6 +198,7 @@ class Config:
|
||||
# qdrant settings
|
||||
self.QDRANT_URL = get_env('QDRANT_URL')
|
||||
self.QDRANT_API_KEY = get_env('QDRANT_API_KEY')
|
||||
self.QDRANT_CLIENT_TIMEOUT = get_env('QDRANT_CLIENT_TIMEOUT')
|
||||
|
||||
# milvus / zilliz setting
|
||||
self.MILVUS_HOST = get_env('MILVUS_HOST')
|
||||
@@ -217,6 +219,7 @@ class Config:
|
||||
self.MAIL_TYPE = get_env('MAIL_TYPE')
|
||||
self.MAIL_DEFAULT_SEND_FROM = get_env('MAIL_DEFAULT_SEND_FROM')
|
||||
self.RESEND_API_KEY = get_env('RESEND_API_KEY')
|
||||
self.RESEND_API_URL = get_env('RESEND_API_URL')
|
||||
|
||||
# ------------------------
|
||||
# Workpace Configurations.
|
||||
|
||||
@@ -141,15 +141,9 @@ class AppListApi(Resource):
|
||||
model_type=ModelType.LLM
|
||||
)
|
||||
except ProviderTokenNotInitError:
|
||||
raise ProviderNotInitializeError(
|
||||
f"No Default System Reasoning Model available. Please configure "
|
||||
f"in the Settings -> Model Provider.")
|
||||
model_instance = None
|
||||
|
||||
if not model_instance:
|
||||
raise ProviderNotInitializeError(
|
||||
f"No Default System Reasoning Model available. Please configure "
|
||||
f"in the Settings -> Model Provider.")
|
||||
else:
|
||||
if model_instance:
|
||||
model_dict = app_model_config.model_dict
|
||||
model_dict['provider'] = model_instance.provider
|
||||
model_dict['name'] = model_instance.model
|
||||
|
||||
@@ -58,7 +58,7 @@ class ChatMessageAudioApi(Resource):
|
||||
except ModelCurrentlyNotSupportError:
|
||||
raise ProviderModelCurrentlyNotSupportError()
|
||||
except InvokeError as e:
|
||||
raise CompletionRequestError(str(e))
|
||||
raise CompletionRequestError(e.description)
|
||||
except ValueError as e:
|
||||
raise e
|
||||
except Exception as e:
|
||||
|
||||
@@ -78,7 +78,7 @@ class CompletionMessageApi(Resource):
|
||||
except ModelCurrentlyNotSupportError:
|
||||
raise ProviderModelCurrentlyNotSupportError()
|
||||
except InvokeError as e:
|
||||
raise CompletionRequestError(str(e))
|
||||
raise CompletionRequestError(e.description)
|
||||
except ValueError as e:
|
||||
raise e
|
||||
except Exception as e:
|
||||
@@ -153,7 +153,7 @@ class ChatMessageApi(Resource):
|
||||
except ModelCurrentlyNotSupportError:
|
||||
raise ProviderModelCurrentlyNotSupportError()
|
||||
except InvokeError as e:
|
||||
raise CompletionRequestError(str(e))
|
||||
raise CompletionRequestError(e.description)
|
||||
except ValueError as e:
|
||||
raise e
|
||||
except Exception as e:
|
||||
|
||||
@@ -38,7 +38,7 @@ class RuleGenerateApi(Resource):
|
||||
except ModelCurrentlyNotSupportError:
|
||||
raise ProviderModelCurrentlyNotSupportError()
|
||||
except InvokeError as e:
|
||||
raise CompletionRequestError(str(e))
|
||||
raise CompletionRequestError(e.description)
|
||||
|
||||
return rules
|
||||
|
||||
|
||||
@@ -228,7 +228,7 @@ class MessageMoreLikeThisApi(Resource):
|
||||
except ModelCurrentlyNotSupportError:
|
||||
raise ProviderModelCurrentlyNotSupportError()
|
||||
except InvokeError as e:
|
||||
raise CompletionRequestError(str(e))
|
||||
raise CompletionRequestError(e.description)
|
||||
except ValueError as e:
|
||||
raise e
|
||||
except Exception as e:
|
||||
@@ -256,7 +256,7 @@ def compact_response(response: Union[dict, Generator]) -> Response:
|
||||
yield "data: " + json.dumps(
|
||||
api.handle_error(ProviderModelCurrentlyNotSupportError()).get_json()) + "\n\n"
|
||||
except InvokeError as e:
|
||||
yield "data: " + json.dumps(api.handle_error(CompletionRequestError(str(e))).get_json()) + "\n\n"
|
||||
yield "data: " + json.dumps(api.handle_error(CompletionRequestError(e.description)).get_json()) + "\n\n"
|
||||
except ValueError as e:
|
||||
yield "data: " + json.dumps(api.handle_error(e).get_json()) + "\n\n"
|
||||
except Exception:
|
||||
@@ -296,7 +296,7 @@ class MessageSuggestedQuestionApi(Resource):
|
||||
except ModelCurrentlyNotSupportError:
|
||||
raise ProviderModelCurrentlyNotSupportError()
|
||||
except InvokeError as e:
|
||||
raise CompletionRequestError(str(e))
|
||||
raise CompletionRequestError(e.description)
|
||||
except Exception:
|
||||
logging.exception("internal server error.")
|
||||
raise InternalServerError()
|
||||
|
||||
@@ -156,6 +156,9 @@ class DatasetDocumentSegmentApi(Resource):
|
||||
if not segment:
|
||||
raise NotFound('Segment not found.')
|
||||
|
||||
if segment.status != 'completed':
|
||||
raise NotFound('Segment is not completed, enable or disable function is not allowed')
|
||||
|
||||
document_indexing_cache_key = 'document_{}_indexing'.format(segment.document_id)
|
||||
cache_result = redis_client.get(document_indexing_cache_key)
|
||||
if cache_result is not None:
|
||||
|
||||
@@ -1,6 +1,8 @@
|
||||
import logging
|
||||
|
||||
from flask_login import current_user
|
||||
|
||||
from core.model_runtime.errors.invoke import InvokeError
|
||||
from libs.login import login_required
|
||||
from flask_restful import Resource, reqparse, marshal
|
||||
from werkzeug.exceptions import InternalServerError, NotFound, Forbidden
|
||||
@@ -8,7 +10,7 @@ from werkzeug.exceptions import InternalServerError, NotFound, Forbidden
|
||||
import services
|
||||
from controllers.console import api
|
||||
from controllers.console.app.error import ProviderNotInitializeError, ProviderQuotaExceededError, \
|
||||
ProviderModelCurrentlyNotSupportError
|
||||
ProviderModelCurrentlyNotSupportError, CompletionRequestError
|
||||
from controllers.console.datasets.error import HighQualityDatasetOnlyError, DatasetNotInitializedError
|
||||
from controllers.console.setup import setup_required
|
||||
from controllers.console.wraps import account_initialization_required
|
||||
@@ -69,6 +71,8 @@ class HitTestingApi(Resource):
|
||||
raise ProviderNotInitializeError(
|
||||
f"No Embedding Model or Reranking Model available. Please configure a valid provider "
|
||||
f"in the Settings -> Model Provider.")
|
||||
except InvokeError as e:
|
||||
raise CompletionRequestError(e.description)
|
||||
except ValueError as e:
|
||||
raise ValueError(str(e))
|
||||
except Exception as e:
|
||||
|
||||
@@ -54,7 +54,7 @@ class ChatAudioApi(InstalledAppResource):
|
||||
except ModelCurrentlyNotSupportError:
|
||||
raise ProviderModelCurrentlyNotSupportError()
|
||||
except InvokeError as e:
|
||||
raise CompletionRequestError(str(e))
|
||||
raise CompletionRequestError(e.description)
|
||||
except ValueError as e:
|
||||
raise e
|
||||
except Exception as e:
|
||||
|
||||
@@ -70,7 +70,7 @@ class CompletionApi(InstalledAppResource):
|
||||
except ModelCurrentlyNotSupportError:
|
||||
raise ProviderModelCurrentlyNotSupportError()
|
||||
except InvokeError as e:
|
||||
raise CompletionRequestError(str(e))
|
||||
raise CompletionRequestError(e.description)
|
||||
except ValueError as e:
|
||||
raise e
|
||||
except Exception as e:
|
||||
@@ -134,7 +134,7 @@ class ChatApi(InstalledAppResource):
|
||||
except ModelCurrentlyNotSupportError:
|
||||
raise ProviderModelCurrentlyNotSupportError()
|
||||
except InvokeError as e:
|
||||
raise CompletionRequestError(str(e))
|
||||
raise CompletionRequestError(e.description)
|
||||
except ValueError as e:
|
||||
raise e
|
||||
except Exception as e:
|
||||
@@ -175,7 +175,7 @@ def compact_response(response: Union[dict, Generator]) -> Response:
|
||||
except ModelCurrentlyNotSupportError:
|
||||
yield "data: " + json.dumps(api.handle_error(ProviderModelCurrentlyNotSupportError()).get_json()) + "\n\n"
|
||||
except InvokeError as e:
|
||||
yield "data: " + json.dumps(api.handle_error(CompletionRequestError(str(e))).get_json()) + "\n\n"
|
||||
yield "data: " + json.dumps(api.handle_error(CompletionRequestError(e.description)).get_json()) + "\n\n"
|
||||
except ValueError as e:
|
||||
yield "data: " + json.dumps(api.handle_error(e).get_json()) + "\n\n"
|
||||
except Exception:
|
||||
|
||||
@@ -104,7 +104,7 @@ class MessageMoreLikeThisApi(InstalledAppResource):
|
||||
except ModelCurrentlyNotSupportError:
|
||||
raise ProviderModelCurrentlyNotSupportError()
|
||||
except InvokeError as e:
|
||||
raise CompletionRequestError(str(e))
|
||||
raise CompletionRequestError(e.description)
|
||||
except ValueError as e:
|
||||
raise e
|
||||
except Exception:
|
||||
@@ -131,7 +131,7 @@ def compact_response(response: Union[dict, Generator]) -> Response:
|
||||
except ModelCurrentlyNotSupportError:
|
||||
yield "data: " + json.dumps(api.handle_error(ProviderModelCurrentlyNotSupportError()).get_json()) + "\n\n"
|
||||
except InvokeError as e:
|
||||
yield "data: " + json.dumps(api.handle_error(CompletionRequestError(str(e))).get_json()) + "\n\n"
|
||||
yield "data: " + json.dumps(api.handle_error(CompletionRequestError(e.description)).get_json()) + "\n\n"
|
||||
except ValueError as e:
|
||||
yield "data: " + json.dumps(api.handle_error(e).get_json()) + "\n\n"
|
||||
except Exception:
|
||||
@@ -169,7 +169,7 @@ class MessageSuggestedQuestionApi(InstalledAppResource):
|
||||
except ModelCurrentlyNotSupportError:
|
||||
raise ProviderModelCurrentlyNotSupportError()
|
||||
except InvokeError as e:
|
||||
raise CompletionRequestError(str(e))
|
||||
raise CompletionRequestError(e.description)
|
||||
except Exception:
|
||||
logging.exception("internal server error.")
|
||||
raise InternalServerError()
|
||||
|
||||
@@ -54,7 +54,7 @@ class UniversalChatAudioApi(UniversalChatResource):
|
||||
except ModelCurrentlyNotSupportError:
|
||||
raise ProviderModelCurrentlyNotSupportError()
|
||||
except InvokeError as e:
|
||||
raise CompletionRequestError(str(e))
|
||||
raise CompletionRequestError(e.description)
|
||||
except ValueError as e:
|
||||
raise e
|
||||
except Exception as e:
|
||||
|
||||
@@ -89,7 +89,7 @@ class UniversalChatApi(UniversalChatResource):
|
||||
except ModelCurrentlyNotSupportError:
|
||||
raise ProviderModelCurrentlyNotSupportError()
|
||||
except InvokeError as e:
|
||||
raise CompletionRequestError(str(e))
|
||||
raise CompletionRequestError(e.description)
|
||||
except ValueError as e:
|
||||
raise e
|
||||
except Exception as e:
|
||||
@@ -126,7 +126,7 @@ def compact_response(response: Union[dict, Generator]) -> Response:
|
||||
except ModelCurrentlyNotSupportError:
|
||||
yield "data: " + json.dumps(api.handle_error(ProviderModelCurrentlyNotSupportError()).get_json()) + "\n\n"
|
||||
except InvokeError as e:
|
||||
yield "data: " + json.dumps(api.handle_error(CompletionRequestError(str(e))).get_json()) + "\n\n"
|
||||
yield "data: " + json.dumps(api.handle_error(CompletionRequestError(e.description)).get_json()) + "\n\n"
|
||||
except ValueError as e:
|
||||
yield "data: " + json.dumps(api.handle_error(e).get_json()) + "\n\n"
|
||||
except Exception:
|
||||
|
||||
@@ -133,7 +133,7 @@ class UniversalChatMessageSuggestedQuestionApi(UniversalChatResource):
|
||||
except ModelCurrentlyNotSupportError:
|
||||
raise ProviderModelCurrentlyNotSupportError()
|
||||
except InvokeError as e:
|
||||
raise CompletionRequestError(str(e))
|
||||
raise CompletionRequestError(e.description)
|
||||
except Exception:
|
||||
logging.exception("internal server error.")
|
||||
raise InternalServerError()
|
||||
|
||||
@@ -50,7 +50,7 @@ class AudioApi(AppApiResource):
|
||||
except ModelCurrentlyNotSupportError:
|
||||
raise ProviderModelCurrentlyNotSupportError()
|
||||
except InvokeError as e:
|
||||
raise CompletionRequestError(str(e))
|
||||
raise CompletionRequestError(e.description)
|
||||
except ValueError as e:
|
||||
raise e
|
||||
except Exception as e:
|
||||
|
||||
@@ -31,7 +31,7 @@ class CompletionApi(AppApiResource):
|
||||
parser.add_argument('query', type=str, location='json', default='')
|
||||
parser.add_argument('files', type=list, required=False, location='json')
|
||||
parser.add_argument('response_mode', type=str, choices=['blocking', 'streaming'], location='json')
|
||||
parser.add_argument('user', type=str, location='json')
|
||||
parser.add_argument('user', required=True, nullable=False, type=str, location='json')
|
||||
parser.add_argument('retriever_from', type=str, required=False, default='dev', location='json')
|
||||
|
||||
args = parser.parse_args()
|
||||
@@ -67,7 +67,7 @@ class CompletionApi(AppApiResource):
|
||||
except ModelCurrentlyNotSupportError:
|
||||
raise ProviderModelCurrentlyNotSupportError()
|
||||
except InvokeError as e:
|
||||
raise CompletionRequestError(str(e))
|
||||
raise CompletionRequestError(e.description)
|
||||
except ValueError as e:
|
||||
raise e
|
||||
except Exception as e:
|
||||
@@ -96,7 +96,7 @@ class ChatApi(AppApiResource):
|
||||
parser.add_argument('files', type=list, required=False, location='json')
|
||||
parser.add_argument('response_mode', type=str, choices=['blocking', 'streaming'], location='json')
|
||||
parser.add_argument('conversation_id', type=uuid_value, location='json')
|
||||
parser.add_argument('user', type=str, location='json')
|
||||
parser.add_argument('user', type=str, required=True, nullable=False, location='json')
|
||||
parser.add_argument('retriever_from', type=str, required=False, default='dev', location='json')
|
||||
parser.add_argument('auto_generate_name', type=bool, required=False, default=True, location='json')
|
||||
|
||||
@@ -131,7 +131,7 @@ class ChatApi(AppApiResource):
|
||||
except ModelCurrentlyNotSupportError:
|
||||
raise ProviderModelCurrentlyNotSupportError()
|
||||
except InvokeError as e:
|
||||
raise CompletionRequestError(str(e))
|
||||
raise CompletionRequestError(e.description)
|
||||
except ValueError as e:
|
||||
raise e
|
||||
except Exception as e:
|
||||
@@ -171,7 +171,7 @@ def compact_response(response: Union[dict, Generator]) -> Response:
|
||||
except ModelCurrentlyNotSupportError:
|
||||
yield "data: " + json.dumps(api.handle_error(ProviderModelCurrentlyNotSupportError()).get_json()) + "\n\n"
|
||||
except InvokeError as e:
|
||||
yield "data: " + json.dumps(api.handle_error(CompletionRequestError(str(e))).get_json()) + "\n\n"
|
||||
yield "data: " + json.dumps(api.handle_error(CompletionRequestError(e.description)).get_json()) + "\n\n"
|
||||
except ValueError as e:
|
||||
yield "data: " + json.dumps(api.handle_error(e).get_json()) + "\n\n"
|
||||
except Exception:
|
||||
|
||||
@@ -52,7 +52,7 @@ class AudioApi(WebApiResource):
|
||||
except ModelCurrentlyNotSupportError:
|
||||
raise ProviderModelCurrentlyNotSupportError()
|
||||
except InvokeError as e:
|
||||
raise CompletionRequestError(str(e))
|
||||
raise CompletionRequestError(e.description)
|
||||
except ValueError as e:
|
||||
raise e
|
||||
except Exception as e:
|
||||
|
||||
@@ -64,7 +64,7 @@ class CompletionApi(WebApiResource):
|
||||
except ModelCurrentlyNotSupportError:
|
||||
raise ProviderModelCurrentlyNotSupportError()
|
||||
except InvokeError as e:
|
||||
raise CompletionRequestError(str(e))
|
||||
raise CompletionRequestError(e.description)
|
||||
except ValueError as e:
|
||||
raise e
|
||||
except Exception as e:
|
||||
@@ -124,7 +124,7 @@ class ChatApi(WebApiResource):
|
||||
except ModelCurrentlyNotSupportError:
|
||||
raise ProviderModelCurrentlyNotSupportError()
|
||||
except InvokeError as e:
|
||||
raise CompletionRequestError(str(e))
|
||||
raise CompletionRequestError(e.description)
|
||||
except ValueError as e:
|
||||
raise e
|
||||
except Exception as e:
|
||||
@@ -164,7 +164,7 @@ def compact_response(response: Union[dict, Generator]) -> Response:
|
||||
except ModelCurrentlyNotSupportError:
|
||||
yield "data: " + json.dumps(api.handle_error(ProviderModelCurrentlyNotSupportError()).get_json()) + "\n\n"
|
||||
except InvokeError as e:
|
||||
yield "data: " + json.dumps(api.handle_error(CompletionRequestError(str(e))).get_json()) + "\n\n"
|
||||
yield "data: " + json.dumps(api.handle_error(CompletionRequestError(e.description)).get_json()) + "\n\n"
|
||||
except ValueError as e:
|
||||
yield "data: " + json.dumps(api.handle_error(e).get_json()) + "\n\n"
|
||||
except Exception:
|
||||
|
||||
@@ -138,7 +138,7 @@ class MessageMoreLikeThisApi(WebApiResource):
|
||||
except ModelCurrentlyNotSupportError:
|
||||
raise ProviderModelCurrentlyNotSupportError()
|
||||
except InvokeError as e:
|
||||
raise CompletionRequestError(str(e))
|
||||
raise CompletionRequestError(e.description)
|
||||
except ValueError as e:
|
||||
raise e
|
||||
except Exception:
|
||||
@@ -165,7 +165,7 @@ def compact_response(response: Union[dict, Generator]) -> Response:
|
||||
except ModelCurrentlyNotSupportError:
|
||||
yield "data: " + json.dumps(api.handle_error(ProviderModelCurrentlyNotSupportError()).get_json()) + "\n\n"
|
||||
except InvokeError as e:
|
||||
yield "data: " + json.dumps(api.handle_error(CompletionRequestError(str(e))).get_json()) + "\n\n"
|
||||
yield "data: " + json.dumps(api.handle_error(CompletionRequestError(e.description)).get_json()) + "\n\n"
|
||||
except ValueError as e:
|
||||
yield "data: " + json.dumps(api.handle_error(e).get_json()) + "\n\n"
|
||||
except Exception:
|
||||
@@ -202,7 +202,7 @@ class MessageSuggestedQuestionApi(WebApiResource):
|
||||
except ModelCurrentlyNotSupportError:
|
||||
raise ProviderModelCurrentlyNotSupportError()
|
||||
except InvokeError as e:
|
||||
raise CompletionRequestError(str(e))
|
||||
raise CompletionRequestError(e.description)
|
||||
except Exception:
|
||||
logging.exception("internal server error.")
|
||||
raise InternalServerError()
|
||||
|
||||
@@ -75,7 +75,7 @@ class AgentApplicationRunner(AppRunner):
|
||||
# reorganize all inputs and template to prompt messages
|
||||
# Include: prompt template, inputs, query(optional), files(optional)
|
||||
# memory(optional)
|
||||
prompt_messages, stop = self.originze_prompt_messages(
|
||||
prompt_messages, stop = self.organize_prompt_messages(
|
||||
app_record=app_record,
|
||||
model_config=app_orchestration_config.model_config,
|
||||
prompt_template_entity=app_orchestration_config.prompt_template,
|
||||
@@ -153,7 +153,7 @@ class AgentApplicationRunner(AppRunner):
|
||||
# reorganize all inputs and template to prompt messages
|
||||
# Include: prompt template, inputs, query(optional), files(optional)
|
||||
# memory(optional), external data, dataset context(optional)
|
||||
prompt_messages, stop = self.originze_prompt_messages(
|
||||
prompt_messages, stop = self.organize_prompt_messages(
|
||||
app_record=app_record,
|
||||
model_config=app_orchestration_config.model_config,
|
||||
prompt_template_entity=app_orchestration_config.prompt_template,
|
||||
@@ -237,8 +237,8 @@ class AgentApplicationRunner(AppRunner):
|
||||
all_message_tokens = 0
|
||||
all_answer_tokens = 0
|
||||
for agent_thought in agent_thoughts:
|
||||
all_message_tokens += agent_thought.message_tokens
|
||||
all_answer_tokens += agent_thought.answer_tokens
|
||||
all_message_tokens += agent_thought.message_token
|
||||
all_answer_tokens += agent_thought.answer_token
|
||||
|
||||
model_type_instance = model_config.provider_model_bundle.model_type_instance
|
||||
model_type_instance = cast(LargeLanguageModel, model_type_instance)
|
||||
|
||||
@@ -1,7 +1,7 @@
|
||||
import time
|
||||
from typing import cast, Optional, List, Tuple, Generator, Union
|
||||
|
||||
from core.application_queue_manager import ApplicationQueueManager
|
||||
from core.application_queue_manager import ApplicationQueueManager, PublishFrom
|
||||
from core.entities.application_entities import ModelConfigEntity, PromptTemplateEntity, AppOrchestrationConfigEntity
|
||||
from core.file.file_obj import FileObj
|
||||
from core.memory.token_buffer_memory import TokenBufferMemory
|
||||
@@ -50,7 +50,7 @@ class AppRunner:
|
||||
max_tokens = 0
|
||||
|
||||
# get prompt messages without memory and context
|
||||
prompt_messages, stop = self.originze_prompt_messages(
|
||||
prompt_messages, stop = self.organize_prompt_messages(
|
||||
app_record=app_record,
|
||||
model_config=model_config,
|
||||
prompt_template_entity=prompt_template_entity,
|
||||
@@ -107,7 +107,7 @@ class AppRunner:
|
||||
or (parameter_rule.use_template and parameter_rule.use_template == 'max_tokens')):
|
||||
model_config.parameters[parameter_rule.name] = max_tokens
|
||||
|
||||
def originze_prompt_messages(self, app_record: App,
|
||||
def organize_prompt_messages(self, app_record: App,
|
||||
model_config: ModelConfigEntity,
|
||||
prompt_template_entity: PromptTemplateEntity,
|
||||
inputs: dict[str, str],
|
||||
@@ -183,7 +183,7 @@ class AppRunner:
|
||||
index=index,
|
||||
message=AssistantPromptMessage(content=token)
|
||||
)
|
||||
))
|
||||
), PublishFrom.APPLICATION_MANAGER)
|
||||
index += 1
|
||||
time.sleep(0.01)
|
||||
|
||||
@@ -193,7 +193,8 @@ class AppRunner:
|
||||
prompt_messages=prompt_messages,
|
||||
message=AssistantPromptMessage(content=text),
|
||||
usage=usage if usage else LLMUsage.empty_usage()
|
||||
)
|
||||
),
|
||||
pub_from=PublishFrom.APPLICATION_MANAGER
|
||||
)
|
||||
|
||||
def _handle_invoke_result(self, invoke_result: Union[LLMResult, Generator],
|
||||
@@ -226,7 +227,8 @@ class AppRunner:
|
||||
:return:
|
||||
"""
|
||||
queue_manager.publish_message_end(
|
||||
llm_result=invoke_result
|
||||
llm_result=invoke_result,
|
||||
pub_from=PublishFrom.APPLICATION_MANAGER
|
||||
)
|
||||
|
||||
def _handle_invoke_result_stream(self, invoke_result: Generator,
|
||||
@@ -242,7 +244,7 @@ class AppRunner:
|
||||
text = ''
|
||||
usage = None
|
||||
for result in invoke_result:
|
||||
queue_manager.publish_chunk_message(result)
|
||||
queue_manager.publish_chunk_message(result, PublishFrom.APPLICATION_MANAGER)
|
||||
|
||||
text += result.delta.message.content
|
||||
|
||||
@@ -263,5 +265,6 @@ class AppRunner:
|
||||
)
|
||||
|
||||
queue_manager.publish_message_end(
|
||||
llm_result=llm_result
|
||||
llm_result=llm_result,
|
||||
pub_from=PublishFrom.APPLICATION_MANAGER
|
||||
)
|
||||
|
||||
@@ -5,7 +5,7 @@ from core.app_runner.app_runner import AppRunner
|
||||
from core.callback_handler.index_tool_callback_handler import DatasetIndexToolCallbackHandler
|
||||
from core.entities.application_entities import ApplicationGenerateEntity, ModelConfigEntity, \
|
||||
AppOrchestrationConfigEntity, InvokeFrom, ExternalDataVariableEntity, DatasetEntity
|
||||
from core.application_queue_manager import ApplicationQueueManager
|
||||
from core.application_queue_manager import ApplicationQueueManager, PublishFrom
|
||||
from core.features.annotation_reply import AnnotationReplyFeature
|
||||
from core.features.dataset_retrieval import DatasetRetrievalFeature
|
||||
from core.features.external_data_fetch import ExternalDataFetchFeature
|
||||
@@ -79,7 +79,7 @@ class BasicApplicationRunner(AppRunner):
|
||||
# organize all inputs and template to prompt messages
|
||||
# Include: prompt template, inputs, query(optional), files(optional)
|
||||
# memory(optional)
|
||||
prompt_messages, stop = self.originze_prompt_messages(
|
||||
prompt_messages, stop = self.organize_prompt_messages(
|
||||
app_record=app_record,
|
||||
model_config=app_orchestration_config.model_config,
|
||||
prompt_template_entity=app_orchestration_config.prompt_template,
|
||||
@@ -121,7 +121,8 @@ class BasicApplicationRunner(AppRunner):
|
||||
|
||||
if annotation_reply:
|
||||
queue_manager.publish_annotation_reply(
|
||||
message_annotation_id=annotation_reply.id
|
||||
message_annotation_id=annotation_reply.id,
|
||||
pub_from=PublishFrom.APPLICATION_MANAGER
|
||||
)
|
||||
self.direct_output(
|
||||
queue_manager=queue_manager,
|
||||
@@ -132,16 +133,16 @@ class BasicApplicationRunner(AppRunner):
|
||||
)
|
||||
return
|
||||
|
||||
# fill in variable inputs from external data tools if exists
|
||||
external_data_tools = app_orchestration_config.external_data_variables
|
||||
if external_data_tools:
|
||||
inputs = self.fill_in_inputs_from_external_data_tools(
|
||||
tenant_id=app_record.tenant_id,
|
||||
app_id=app_record.id,
|
||||
external_data_tools=external_data_tools,
|
||||
inputs=inputs,
|
||||
query=query
|
||||
)
|
||||
# fill in variable inputs from external data tools if exists
|
||||
external_data_tools = app_orchestration_config.external_data_variables
|
||||
if external_data_tools:
|
||||
inputs = self.fill_in_inputs_from_external_data_tools(
|
||||
tenant_id=app_record.tenant_id,
|
||||
app_id=app_record.id,
|
||||
external_data_tools=external_data_tools,
|
||||
inputs=inputs,
|
||||
query=query
|
||||
)
|
||||
|
||||
# get context from datasets
|
||||
context = None
|
||||
@@ -164,7 +165,7 @@ class BasicApplicationRunner(AppRunner):
|
||||
# reorganize all inputs and template to prompt messages
|
||||
# Include: prompt template, inputs, query(optional), files(optional)
|
||||
# memory(optional), external data, dataset context(optional)
|
||||
prompt_messages, stop = self.originze_prompt_messages(
|
||||
prompt_messages, stop = self.organize_prompt_messages(
|
||||
app_record=app_record,
|
||||
model_config=app_orchestration_config.model_config,
|
||||
prompt_template_entity=app_orchestration_config.prompt_template,
|
||||
|
||||
@@ -7,7 +7,7 @@ from pydantic import BaseModel
|
||||
|
||||
from core.app_runner.moderation_handler import OutputModerationHandler, ModerationRule
|
||||
from core.entities.application_entities import ApplicationGenerateEntity
|
||||
from core.application_queue_manager import ApplicationQueueManager
|
||||
from core.application_queue_manager import ApplicationQueueManager, PublishFrom
|
||||
from core.entities.queue_entities import QueueErrorEvent, QueueStopEvent, QueueMessageEndEvent, \
|
||||
QueueRetrieverResourcesEvent, QueueAgentThoughtEvent, QueuePingEvent, QueueMessageEvent, QueueMessageReplaceEvent, \
|
||||
AnnotationReplyEvent
|
||||
@@ -312,8 +312,11 @@ class GenerateTaskPipeline:
|
||||
index=0,
|
||||
message=AssistantPromptMessage(content=self._task_state.llm_result.message.content)
|
||||
)
|
||||
))
|
||||
self._queue_manager.publish(QueueStopEvent(stopped_by=QueueStopEvent.StopBy.OUTPUT_MODERATION))
|
||||
), PublishFrom.TASK_PIPELINE)
|
||||
self._queue_manager.publish(
|
||||
QueueStopEvent(stopped_by=QueueStopEvent.StopBy.OUTPUT_MODERATION),
|
||||
PublishFrom.TASK_PIPELINE
|
||||
)
|
||||
continue
|
||||
else:
|
||||
self._output_moderation_handler.append_new_token(delta_text)
|
||||
|
||||
@@ -6,6 +6,7 @@ from typing import Any, Optional, Dict
|
||||
from flask import current_app, Flask
|
||||
from pydantic import BaseModel
|
||||
|
||||
from core.application_queue_manager import PublishFrom
|
||||
from core.moderation.base import ModerationAction, ModerationOutputsResult
|
||||
from core.moderation.factory import ModerationFactory
|
||||
|
||||
@@ -66,7 +67,7 @@ class OutputModerationHandler(BaseModel):
|
||||
final_output = result.text
|
||||
|
||||
if public_event:
|
||||
self.on_message_replace_func(final_output)
|
||||
self.on_message_replace_func(final_output, PublishFrom.TASK_PIPELINE)
|
||||
|
||||
return final_output
|
||||
|
||||
|
||||
@@ -23,7 +23,7 @@ from core.model_runtime.errors.invoke import InvokeAuthorizationError, InvokeErr
|
||||
from core.model_runtime.model_providers.__base.large_language_model import LargeLanguageModel
|
||||
from core.prompt.prompt_template import PromptTemplateParser
|
||||
from core.provider_manager import ProviderManager
|
||||
from core.application_queue_manager import ApplicationQueueManager, ConversationTaskStoppedException
|
||||
from core.application_queue_manager import ApplicationQueueManager, ConversationTaskStoppedException, PublishFrom
|
||||
from extensions.ext_database import db
|
||||
from models.account import Account
|
||||
from models.model import EndUser, Conversation, Message, MessageFile, App
|
||||
@@ -169,15 +169,18 @@ class ApplicationManager:
|
||||
except ConversationTaskStoppedException:
|
||||
pass
|
||||
except InvokeAuthorizationError:
|
||||
queue_manager.publish_error(InvokeAuthorizationError('Incorrect API key provided'))
|
||||
queue_manager.publish_error(
|
||||
InvokeAuthorizationError('Incorrect API key provided'),
|
||||
PublishFrom.APPLICATION_MANAGER
|
||||
)
|
||||
except ValidationError as e:
|
||||
logger.exception("Validation Error when generating")
|
||||
queue_manager.publish_error(e)
|
||||
queue_manager.publish_error(e, PublishFrom.APPLICATION_MANAGER)
|
||||
except (ValueError, InvokeError) as e:
|
||||
queue_manager.publish_error(e)
|
||||
queue_manager.publish_error(e, PublishFrom.APPLICATION_MANAGER)
|
||||
except Exception as e:
|
||||
logger.exception("Unknown Error when generating")
|
||||
queue_manager.publish_error(e)
|
||||
queue_manager.publish_error(e, PublishFrom.APPLICATION_MANAGER)
|
||||
finally:
|
||||
db.session.remove()
|
||||
|
||||
@@ -376,7 +379,8 @@ class ApplicationManager:
|
||||
and 'enabled' in copy_app_model_config_dict['agent_mode'] and copy_app_model_config_dict['agent_mode'][
|
||||
'enabled']:
|
||||
agent_dict = copy_app_model_config_dict.get('agent_mode')
|
||||
if agent_dict['strategy'] in ['router', 'react_router']:
|
||||
agent_strategy = agent_dict.get('strategy', 'router')
|
||||
if agent_strategy in ['router', 'react_router']:
|
||||
dataset_ids = []
|
||||
for tool in agent_dict.get('tools', []):
|
||||
key = list(tool.keys())[0]
|
||||
@@ -402,7 +406,7 @@ class ApplicationManager:
|
||||
retrieve_strategy=DatasetRetrieveConfigEntity.RetrieveStrategy.value_of(
|
||||
dataset_configs['retrieval_model']
|
||||
),
|
||||
single_strategy=agent_dict['strategy']
|
||||
single_strategy=agent_strategy
|
||||
)
|
||||
)
|
||||
else:
|
||||
@@ -419,7 +423,7 @@ class ApplicationManager:
|
||||
)
|
||||
)
|
||||
else:
|
||||
if agent_dict['strategy'] == 'react':
|
||||
if agent_strategy == 'react':
|
||||
strategy = AgentEntity.Strategy.CHAIN_OF_THOUGHT
|
||||
else:
|
||||
strategy = AgentEntity.Strategy.FUNCTION_CALLING
|
||||
@@ -472,7 +476,7 @@ class ApplicationManager:
|
||||
more_like_this_dict = copy_app_model_config_dict.get('more_like_this')
|
||||
if more_like_this_dict:
|
||||
if 'enabled' in more_like_this_dict and more_like_this_dict['enabled']:
|
||||
properties['more_like_this'] = copy_app_model_config_dict.get('opening_statement')
|
||||
properties['more_like_this'] = True
|
||||
|
||||
# speech to text
|
||||
speech_to_text_dict = copy_app_model_config_dict.get('speech_to_text')
|
||||
|
||||
@@ -1,5 +1,6 @@
|
||||
import queue
|
||||
import time
|
||||
from enum import Enum
|
||||
from typing import Generator, Any
|
||||
|
||||
from sqlalchemy.orm import DeclarativeMeta
|
||||
@@ -13,6 +14,11 @@ from extensions.ext_redis import redis_client
|
||||
from models.model import MessageAgentThought
|
||||
|
||||
|
||||
class PublishFrom(Enum):
|
||||
APPLICATION_MANAGER = 1
|
||||
TASK_PIPELINE = 2
|
||||
|
||||
|
||||
class ApplicationQueueManager:
|
||||
def __init__(self, task_id: str,
|
||||
user_id: str,
|
||||
@@ -61,11 +67,14 @@ class ApplicationQueueManager:
|
||||
if elapsed_time >= listen_timeout or self._is_stopped():
|
||||
# publish two messages to make sure the client can receive the stop signal
|
||||
# and stop listening after the stop signal processed
|
||||
self.publish(QueueStopEvent(stopped_by=QueueStopEvent.StopBy.USER_MANUAL))
|
||||
self.publish(
|
||||
QueueStopEvent(stopped_by=QueueStopEvent.StopBy.USER_MANUAL),
|
||||
PublishFrom.TASK_PIPELINE
|
||||
)
|
||||
self.stop_listen()
|
||||
|
||||
if elapsed_time // 10 > last_ping_time:
|
||||
self.publish(QueuePingEvent())
|
||||
self.publish(QueuePingEvent(), PublishFrom.TASK_PIPELINE)
|
||||
last_ping_time = elapsed_time // 10
|
||||
|
||||
def stop_listen(self) -> None:
|
||||
@@ -75,76 +84,83 @@ class ApplicationQueueManager:
|
||||
"""
|
||||
self._q.put(None)
|
||||
|
||||
def publish_chunk_message(self, chunk: LLMResultChunk) -> None:
|
||||
def publish_chunk_message(self, chunk: LLMResultChunk, pub_from: PublishFrom) -> None:
|
||||
"""
|
||||
Publish chunk message to channel
|
||||
|
||||
:param chunk: chunk
|
||||
:param pub_from: publish from
|
||||
:return:
|
||||
"""
|
||||
self.publish(QueueMessageEvent(
|
||||
chunk=chunk
|
||||
))
|
||||
), pub_from)
|
||||
|
||||
def publish_message_replace(self, text: str) -> None:
|
||||
def publish_message_replace(self, text: str, pub_from: PublishFrom) -> None:
|
||||
"""
|
||||
Publish message replace
|
||||
:param text: text
|
||||
:param pub_from: publish from
|
||||
:return:
|
||||
"""
|
||||
self.publish(QueueMessageReplaceEvent(
|
||||
text=text
|
||||
))
|
||||
), pub_from)
|
||||
|
||||
def publish_retriever_resources(self, retriever_resources: list[dict]) -> None:
|
||||
def publish_retriever_resources(self, retriever_resources: list[dict], pub_from: PublishFrom) -> None:
|
||||
"""
|
||||
Publish retriever resources
|
||||
:return:
|
||||
"""
|
||||
self.publish(QueueRetrieverResourcesEvent(retriever_resources=retriever_resources))
|
||||
self.publish(QueueRetrieverResourcesEvent(retriever_resources=retriever_resources), pub_from)
|
||||
|
||||
def publish_annotation_reply(self, message_annotation_id: str) -> None:
|
||||
def publish_annotation_reply(self, message_annotation_id: str, pub_from: PublishFrom) -> None:
|
||||
"""
|
||||
Publish annotation reply
|
||||
:param message_annotation_id: message annotation id
|
||||
:param pub_from: publish from
|
||||
:return:
|
||||
"""
|
||||
self.publish(AnnotationReplyEvent(message_annotation_id=message_annotation_id))
|
||||
self.publish(AnnotationReplyEvent(message_annotation_id=message_annotation_id), pub_from)
|
||||
|
||||
def publish_message_end(self, llm_result: LLMResult) -> None:
|
||||
def publish_message_end(self, llm_result: LLMResult, pub_from: PublishFrom) -> None:
|
||||
"""
|
||||
Publish message end
|
||||
:param llm_result: llm result
|
||||
:param pub_from: publish from
|
||||
:return:
|
||||
"""
|
||||
self.publish(QueueMessageEndEvent(llm_result=llm_result))
|
||||
self.publish(QueueMessageEndEvent(llm_result=llm_result), pub_from)
|
||||
self.stop_listen()
|
||||
|
||||
def publish_agent_thought(self, message_agent_thought: MessageAgentThought) -> None:
|
||||
def publish_agent_thought(self, message_agent_thought: MessageAgentThought, pub_from: PublishFrom) -> None:
|
||||
"""
|
||||
Publish agent thought
|
||||
:param message_agent_thought: message agent thought
|
||||
:param pub_from: publish from
|
||||
:return:
|
||||
"""
|
||||
self.publish(QueueAgentThoughtEvent(
|
||||
agent_thought_id=message_agent_thought.id
|
||||
))
|
||||
), pub_from)
|
||||
|
||||
def publish_error(self, e) -> None:
|
||||
def publish_error(self, e, pub_from: PublishFrom) -> None:
|
||||
"""
|
||||
Publish error
|
||||
:param e: error
|
||||
:param pub_from: publish from
|
||||
:return:
|
||||
"""
|
||||
self.publish(QueueErrorEvent(
|
||||
error=e
|
||||
))
|
||||
), pub_from)
|
||||
self.stop_listen()
|
||||
|
||||
def publish(self, event: AppQueueEvent) -> None:
|
||||
def publish(self, event: AppQueueEvent, pub_from: PublishFrom) -> None:
|
||||
"""
|
||||
Publish event to queue
|
||||
:param event:
|
||||
:param pub_from:
|
||||
:return:
|
||||
"""
|
||||
self._check_for_sqlalchemy_models(event.dict())
|
||||
@@ -162,6 +178,9 @@ class ApplicationQueueManager:
|
||||
if isinstance(event, QueueStopEvent):
|
||||
self.stop_listen()
|
||||
|
||||
if pub_from == PublishFrom.APPLICATION_MANAGER and self._is_stopped():
|
||||
raise ConversationTaskStoppedException()
|
||||
|
||||
@classmethod
|
||||
def set_stop_flag(cls, task_id: str, invoke_from: InvokeFrom, user_id: str) -> None:
|
||||
"""
|
||||
@@ -173,7 +192,7 @@ class ApplicationQueueManager:
|
||||
return
|
||||
|
||||
user_prefix = 'account' if invoke_from in [InvokeFrom.EXPLORE, InvokeFrom.DEBUGGER] else 'end-user'
|
||||
if result != f"{user_prefix}-{user_id}":
|
||||
if result.decode('utf-8') != f"{user_prefix}-{user_id}":
|
||||
return
|
||||
|
||||
stopped_cache_key = cls._generate_stopped_cache_key(task_id)
|
||||
@@ -187,7 +206,6 @@ class ApplicationQueueManager:
|
||||
stopped_cache_key = ApplicationQueueManager._generate_stopped_cache_key(self._task_id)
|
||||
result = redis_client.get(stopped_cache_key)
|
||||
if result is not None:
|
||||
redis_client.delete(stopped_cache_key)
|
||||
return True
|
||||
|
||||
return False
|
||||
|
||||
@@ -8,7 +8,7 @@ from langchain.agents import openai_functions_agent, openai_functions_multi_agen
|
||||
from langchain.callbacks.base import BaseCallbackHandler
|
||||
from langchain.schema import AgentAction, AgentFinish, LLMResult, ChatGeneration, BaseMessage
|
||||
|
||||
from core.application_queue_manager import ApplicationQueueManager
|
||||
from core.application_queue_manager import ApplicationQueueManager, PublishFrom
|
||||
from core.callback_handler.entity.agent_loop import AgentLoop
|
||||
from core.entities.application_entities import ModelConfigEntity
|
||||
from core.model_runtime.entities.llm_entities import LLMResult as RuntimeLLMResult
|
||||
@@ -232,7 +232,7 @@ class AgentLoopGatherCallbackHandler(BaseCallbackHandler):
|
||||
db.session.add(message_agent_thought)
|
||||
db.session.commit()
|
||||
|
||||
self.queue_manager.publish_agent_thought(message_agent_thought)
|
||||
self.queue_manager.publish_agent_thought(message_agent_thought, PublishFrom.APPLICATION_MANAGER)
|
||||
|
||||
return message_agent_thought
|
||||
|
||||
|
||||
@@ -2,7 +2,7 @@ from typing import List, Union
|
||||
|
||||
from langchain.schema import Document
|
||||
|
||||
from core.application_queue_manager import ApplicationQueueManager
|
||||
from core.application_queue_manager import ApplicationQueueManager, PublishFrom
|
||||
from core.entities.application_entities import InvokeFrom
|
||||
from extensions.ext_database import db
|
||||
from models.dataset import DocumentSegment, DatasetQuery
|
||||
@@ -80,4 +80,4 @@ class DatasetIndexToolCallbackHandler:
|
||||
db.session.add(dataset_retriever_resource)
|
||||
db.session.commit()
|
||||
|
||||
self._queue_manager.publish_retriever_resources(resource)
|
||||
self._queue_manager.publish_retriever_resources(resource, PublishFrom.APPLICATION_MANAGER)
|
||||
|
||||
@@ -65,7 +65,8 @@ class FileExtractor:
|
||||
elif file_extension == '.pdf':
|
||||
loader = PdfLoader(file_path, upload_file=upload_file)
|
||||
elif file_extension in ['.md', '.markdown']:
|
||||
loader = UnstructuredMarkdownLoader(file_path, unstructured_api_url)
|
||||
loader = UnstructuredMarkdownLoader(file_path, unstructured_api_url) if is_automatic \
|
||||
else MarkdownLoader(file_path, autodetect_encoding=True)
|
||||
elif file_extension in ['.htm', '.html']:
|
||||
loader = HTMLLoader(file_path)
|
||||
elif file_extension == '.docx':
|
||||
@@ -84,7 +85,8 @@ class FileExtractor:
|
||||
loader = UnstructuredXmlLoader(file_path, unstructured_api_url)
|
||||
else:
|
||||
# txt
|
||||
loader = UnstructuredTextLoader(file_path, unstructured_api_url)
|
||||
loader = UnstructuredTextLoader(file_path, unstructured_api_url) if is_automatic \
|
||||
else TextLoader(file_path, autodetect_encoding=True)
|
||||
else:
|
||||
if file_extension == '.xlsx':
|
||||
loader = ExcelLoader(file_path)
|
||||
|
||||
@@ -1,6 +1,7 @@
|
||||
import datetime
|
||||
import json
|
||||
import time
|
||||
import logging
|
||||
|
||||
from json import JSONDecodeError
|
||||
from typing import Optional, List, Dict, Tuple, Iterator
|
||||
|
||||
@@ -9,8 +10,10 @@ from pydantic import BaseModel
|
||||
from core.entities.model_entities import ModelWithProviderEntity, ModelStatus, SimpleModelProviderEntity
|
||||
from core.entities.provider_entities import SystemConfiguration, CustomConfiguration, SystemConfigurationStatus
|
||||
from core.helper import encrypter
|
||||
from core.model_runtime.entities.model_entities import ModelType
|
||||
from core.model_runtime.entities.provider_entities import ProviderEntity, CredentialFormSchema, FormType
|
||||
from core.helper.model_provider_cache import ProviderCredentialsCache, ProviderCredentialsCacheType
|
||||
from core.model_runtime.entities.model_entities import ModelType, FetchFrom
|
||||
from core.model_runtime.entities.provider_entities import ProviderEntity, CredentialFormSchema, FormType, \
|
||||
ConfigurateMethod
|
||||
from core.model_runtime.model_providers import model_provider_factory
|
||||
from core.model_runtime.model_providers.__base.ai_model import AIModel
|
||||
from core.model_runtime.model_providers.__base.model_provider import ModelProvider
|
||||
@@ -18,6 +21,10 @@ from core.model_runtime.utils import encoders
|
||||
from extensions.ext_database import db
|
||||
from models.provider import ProviderType, Provider, ProviderModel, TenantPreferredModelProvider
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
original_provider_configurate_methods = {}
|
||||
|
||||
|
||||
class ProviderConfiguration(BaseModel):
|
||||
"""
|
||||
@@ -30,6 +37,20 @@ class ProviderConfiguration(BaseModel):
|
||||
system_configuration: SystemConfiguration
|
||||
custom_configuration: CustomConfiguration
|
||||
|
||||
def __init__(self, **data):
|
||||
super().__init__(**data)
|
||||
|
||||
if self.provider.provider not in original_provider_configurate_methods:
|
||||
original_provider_configurate_methods[self.provider.provider] = []
|
||||
for configurate_method in self.provider.configurate_methods:
|
||||
original_provider_configurate_methods[self.provider.provider].append(configurate_method)
|
||||
|
||||
if original_provider_configurate_methods[self.provider.provider] == [ConfigurateMethod.CUSTOMIZABLE_MODEL]:
|
||||
if (any([len(quota_configuration.restrict_models) > 0
|
||||
for quota_configuration in self.system_configuration.quota_configurations])
|
||||
and ConfigurateMethod.PREDEFINED_MODEL not in self.provider.configurate_methods):
|
||||
self.provider.configurate_methods.append(ConfigurateMethod.PREDEFINED_MODEL)
|
||||
|
||||
def get_current_credentials(self, model_type: ModelType, model: str) -> Optional[dict]:
|
||||
"""
|
||||
Get current credentials.
|
||||
@@ -39,7 +60,22 @@ class ProviderConfiguration(BaseModel):
|
||||
:return:
|
||||
"""
|
||||
if self.using_provider_type == ProviderType.SYSTEM:
|
||||
return self.system_configuration.credentials
|
||||
restrict_models = []
|
||||
for quota_configuration in self.system_configuration.quota_configurations:
|
||||
if self.system_configuration.current_quota_type != quota_configuration.quota_type:
|
||||
continue
|
||||
|
||||
restrict_models = quota_configuration.restrict_models
|
||||
|
||||
copy_credentials = self.system_configuration.credentials.copy()
|
||||
if restrict_models:
|
||||
for restrict_model in restrict_models:
|
||||
if (restrict_model.model_type == model_type
|
||||
and restrict_model.model == model
|
||||
and restrict_model.base_model_name):
|
||||
copy_credentials['base_model_name'] = restrict_model.base_model_name
|
||||
|
||||
return copy_credentials
|
||||
else:
|
||||
if self.custom_configuration.models:
|
||||
for model_configuration in self.custom_configuration.models:
|
||||
@@ -119,7 +155,8 @@ class ProviderConfiguration(BaseModel):
|
||||
|
||||
if provider_record:
|
||||
try:
|
||||
original_credentials = json.loads(provider_record.encrypted_config) if provider_record.encrypted_config else {}
|
||||
original_credentials = json.loads(
|
||||
provider_record.encrypted_config) if provider_record.encrypted_config else {}
|
||||
except JSONDecodeError:
|
||||
original_credentials = {}
|
||||
|
||||
@@ -168,6 +205,14 @@ class ProviderConfiguration(BaseModel):
|
||||
db.session.add(provider_record)
|
||||
db.session.commit()
|
||||
|
||||
provider_model_credentials_cache = ProviderCredentialsCache(
|
||||
tenant_id=self.tenant_id,
|
||||
identity_id=provider_record.id,
|
||||
cache_type=ProviderCredentialsCacheType.PROVIDER
|
||||
)
|
||||
|
||||
provider_model_credentials_cache.delete()
|
||||
|
||||
self.switch_preferred_provider_type(ProviderType.CUSTOM)
|
||||
|
||||
def delete_custom_credentials(self) -> None:
|
||||
@@ -190,6 +235,14 @@ class ProviderConfiguration(BaseModel):
|
||||
db.session.delete(provider_record)
|
||||
db.session.commit()
|
||||
|
||||
provider_model_credentials_cache = ProviderCredentialsCache(
|
||||
tenant_id=self.tenant_id,
|
||||
identity_id=provider_record.id,
|
||||
cache_type=ProviderCredentialsCacheType.PROVIDER
|
||||
)
|
||||
|
||||
provider_model_credentials_cache.delete()
|
||||
|
||||
def get_custom_model_credentials(self, model_type: ModelType, model: str, obfuscated: bool = False) \
|
||||
-> Optional[dict]:
|
||||
"""
|
||||
@@ -245,7 +298,8 @@ class ProviderConfiguration(BaseModel):
|
||||
|
||||
if provider_model_record:
|
||||
try:
|
||||
original_credentials = json.loads(provider_model_record.encrypted_config) if provider_model_record.encrypted_config else {}
|
||||
original_credentials = json.loads(
|
||||
provider_model_record.encrypted_config) if provider_model_record.encrypted_config else {}
|
||||
except JSONDecodeError:
|
||||
original_credentials = {}
|
||||
|
||||
@@ -311,6 +365,14 @@ class ProviderConfiguration(BaseModel):
|
||||
db.session.add(provider_model_record)
|
||||
db.session.commit()
|
||||
|
||||
provider_model_credentials_cache = ProviderCredentialsCache(
|
||||
tenant_id=self.tenant_id,
|
||||
identity_id=provider_model_record.id,
|
||||
cache_type=ProviderCredentialsCacheType.MODEL
|
||||
)
|
||||
|
||||
provider_model_credentials_cache.delete()
|
||||
|
||||
def delete_custom_model_credentials(self, model_type: ModelType, model: str) -> None:
|
||||
"""
|
||||
Delete custom model credentials.
|
||||
@@ -332,6 +394,14 @@ class ProviderConfiguration(BaseModel):
|
||||
db.session.delete(provider_model_record)
|
||||
db.session.commit()
|
||||
|
||||
provider_model_credentials_cache = ProviderCredentialsCache(
|
||||
tenant_id=self.tenant_id,
|
||||
identity_id=provider_model_record.id,
|
||||
cache_type=ProviderCredentialsCacheType.MODEL
|
||||
)
|
||||
|
||||
provider_model_credentials_cache.delete()
|
||||
|
||||
def get_provider_instance(self) -> ModelProvider:
|
||||
"""
|
||||
Get provider instance.
|
||||
@@ -484,7 +554,13 @@ class ProviderConfiguration(BaseModel):
|
||||
provider_models.extend(
|
||||
[
|
||||
ModelWithProviderEntity(
|
||||
**m.dict(),
|
||||
model=m.model,
|
||||
label=m.label,
|
||||
model_type=m.model_type,
|
||||
features=m.features,
|
||||
fetch_from=m.fetch_from,
|
||||
model_properties=m.model_properties,
|
||||
deprecated=m.deprecated,
|
||||
provider=SimpleModelProviderEntity(self.provider),
|
||||
status=ModelStatus.ACTIVE
|
||||
)
|
||||
@@ -492,21 +568,70 @@ class ProviderConfiguration(BaseModel):
|
||||
]
|
||||
)
|
||||
|
||||
if self.provider.provider not in original_provider_configurate_methods:
|
||||
original_provider_configurate_methods[self.provider.provider] = []
|
||||
for configurate_method in provider_instance.get_provider_schema().configurate_methods:
|
||||
original_provider_configurate_methods[self.provider.provider].append(configurate_method)
|
||||
|
||||
should_use_custom_model = False
|
||||
if original_provider_configurate_methods[self.provider.provider] == [ConfigurateMethod.CUSTOMIZABLE_MODEL]:
|
||||
should_use_custom_model = True
|
||||
|
||||
for quota_configuration in self.system_configuration.quota_configurations:
|
||||
if self.system_configuration.current_quota_type != quota_configuration.quota_type:
|
||||
continue
|
||||
|
||||
restrict_llms = quota_configuration.restrict_llms
|
||||
if not restrict_llms:
|
||||
restrict_models = quota_configuration.restrict_models
|
||||
if len(restrict_models) == 0:
|
||||
break
|
||||
|
||||
if should_use_custom_model:
|
||||
if original_provider_configurate_methods[self.provider.provider] == [ConfigurateMethod.CUSTOMIZABLE_MODEL]:
|
||||
# only customizable model
|
||||
for restrict_model in restrict_models:
|
||||
copy_credentials = self.system_configuration.credentials.copy()
|
||||
if restrict_model.base_model_name:
|
||||
copy_credentials['base_model_name'] = restrict_model.base_model_name
|
||||
|
||||
try:
|
||||
custom_model_schema = (
|
||||
provider_instance.get_model_instance(restrict_model.model_type)
|
||||
.get_customizable_model_schema_from_credentials(
|
||||
restrict_model.model,
|
||||
copy_credentials
|
||||
)
|
||||
)
|
||||
except Exception as ex:
|
||||
logger.warning(f'get custom model schema failed, {ex}')
|
||||
continue
|
||||
|
||||
if not custom_model_schema:
|
||||
continue
|
||||
|
||||
if custom_model_schema.model_type not in model_types:
|
||||
continue
|
||||
|
||||
provider_models.append(
|
||||
ModelWithProviderEntity(
|
||||
model=custom_model_schema.model,
|
||||
label=custom_model_schema.label,
|
||||
model_type=custom_model_schema.model_type,
|
||||
features=custom_model_schema.features,
|
||||
fetch_from=FetchFrom.PREDEFINED_MODEL,
|
||||
model_properties=custom_model_schema.model_properties,
|
||||
deprecated=custom_model_schema.deprecated,
|
||||
provider=SimpleModelProviderEntity(self.provider),
|
||||
status=ModelStatus.ACTIVE
|
||||
)
|
||||
)
|
||||
|
||||
# if llm name not in restricted llm list, remove it
|
||||
restrict_model_names = [rm.model for rm in restrict_models]
|
||||
for m in provider_models:
|
||||
if m.model_type == ModelType.LLM and m.model not in restrict_llms:
|
||||
if m.model_type == ModelType.LLM and m.model not in restrict_model_names:
|
||||
m.status = ModelStatus.NO_PERMISSION
|
||||
elif not quota_configuration.is_valid:
|
||||
m.status = ModelStatus.QUOTA_EXCEEDED
|
||||
|
||||
return provider_models
|
||||
|
||||
def _get_custom_provider_models(self,
|
||||
@@ -533,7 +658,13 @@ class ProviderConfiguration(BaseModel):
|
||||
for m in models:
|
||||
provider_models.append(
|
||||
ModelWithProviderEntity(
|
||||
**m.dict(),
|
||||
model=m.model,
|
||||
label=m.label,
|
||||
model_type=m.model_type,
|
||||
features=m.features,
|
||||
fetch_from=m.fetch_from,
|
||||
model_properties=m.model_properties,
|
||||
deprecated=m.deprecated,
|
||||
provider=SimpleModelProviderEntity(self.provider),
|
||||
status=ModelStatus.ACTIVE if credentials else ModelStatus.NO_CONFIGURE
|
||||
)
|
||||
@@ -544,20 +675,30 @@ class ProviderConfiguration(BaseModel):
|
||||
if model_configuration.model_type not in model_types:
|
||||
continue
|
||||
|
||||
custom_model_schema = (
|
||||
provider_instance.get_model_instance(model_configuration.model_type)
|
||||
.get_customizable_model_schema_from_credentials(
|
||||
model_configuration.model,
|
||||
model_configuration.credentials
|
||||
try:
|
||||
custom_model_schema = (
|
||||
provider_instance.get_model_instance(model_configuration.model_type)
|
||||
.get_customizable_model_schema_from_credentials(
|
||||
model_configuration.model,
|
||||
model_configuration.credentials
|
||||
)
|
||||
)
|
||||
)
|
||||
except Exception as ex:
|
||||
logger.warning(f'get custom model schema failed, {ex}')
|
||||
continue
|
||||
|
||||
if not custom_model_schema:
|
||||
continue
|
||||
|
||||
provider_models.append(
|
||||
ModelWithProviderEntity(
|
||||
**custom_model_schema.dict(),
|
||||
model=custom_model_schema.model,
|
||||
label=custom_model_schema.label,
|
||||
model_type=custom_model_schema.model_type,
|
||||
features=custom_model_schema.features,
|
||||
fetch_from=custom_model_schema.fetch_from,
|
||||
model_properties=custom_model_schema.model_properties,
|
||||
deprecated=custom_model_schema.deprecated,
|
||||
provider=SimpleModelProviderEntity(self.provider),
|
||||
status=ModelStatus.ACTIVE
|
||||
)
|
||||
|
||||
@@ -21,6 +21,12 @@ class SystemConfigurationStatus(Enum):
|
||||
UNSUPPORTED = 'unsupported'
|
||||
|
||||
|
||||
class RestrictModel(BaseModel):
|
||||
model: str
|
||||
base_model_name: Optional[str] = None
|
||||
model_type: ModelType
|
||||
|
||||
|
||||
class QuotaConfiguration(BaseModel):
|
||||
"""
|
||||
Model class for provider quota configuration.
|
||||
@@ -30,7 +36,7 @@ class QuotaConfiguration(BaseModel):
|
||||
quota_limit: int
|
||||
quota_used: int
|
||||
is_valid: bool
|
||||
restrict_llms: list[str] = []
|
||||
restrict_models: list[RestrictModel] = []
|
||||
|
||||
|
||||
class SystemConfiguration(BaseModel):
|
||||
|
||||
@@ -61,7 +61,7 @@ class Extensible:
|
||||
|
||||
builtin_file_path = os.path.join(subdir_path, '__builtin__')
|
||||
if os.path.exists(builtin_file_path):
|
||||
with open(builtin_file_path, 'r') as f:
|
||||
with open(builtin_file_path, 'r', encoding='utf-8') as f:
|
||||
position = int(f.read().strip())
|
||||
|
||||
if (extension_name + '.py') not in file_names:
|
||||
@@ -93,7 +93,7 @@ class Extensible:
|
||||
json_path = os.path.join(subdir_path, 'schema.json')
|
||||
json_data = {}
|
||||
if os.path.exists(json_path):
|
||||
with open(json_path, 'r') as f:
|
||||
with open(json_path, 'r', encoding='utf-8') as f:
|
||||
json_data = json.load(f)
|
||||
|
||||
extensions[extension_name] = ModuleExtension(
|
||||
|
||||
@@ -58,7 +58,7 @@ class ApiExternalDataTool(ExternalDataTool):
|
||||
if not api_based_extension:
|
||||
raise ValueError("[External data tool] API query failed, variable: {}, "
|
||||
"error: api_based_extension_id is invalid"
|
||||
.format(self.config.get('variable')))
|
||||
.format(self.variable))
|
||||
|
||||
# decrypt api_key
|
||||
api_key = encrypter.decrypt_token(
|
||||
@@ -74,7 +74,7 @@ class ApiExternalDataTool(ExternalDataTool):
|
||||
)
|
||||
except Exception as e:
|
||||
raise ValueError("[External data tool] API query failed, variable: {}, error: {}".format(
|
||||
self.config.get('variable'),
|
||||
self.variable,
|
||||
e
|
||||
))
|
||||
|
||||
@@ -87,6 +87,10 @@ class ApiExternalDataTool(ExternalDataTool):
|
||||
|
||||
if 'result' not in response_json:
|
||||
raise ValueError("[External data tool] API query failed, variable: {}, error: result not found in response"
|
||||
.format(self.config.get('variable')))
|
||||
.format(self.variable))
|
||||
|
||||
if not isinstance(response_json['result'], str):
|
||||
raise ValueError("[External data tool] API query failed, variable: {}, error: result is not string"
|
||||
.format(self.variable))
|
||||
|
||||
return response_json['result']
|
||||
|
||||
@@ -1,35 +0,0 @@
|
||||
{
|
||||
"label": {
|
||||
"en-US": "Weather Search",
|
||||
"zh-Hans": "天气查询"
|
||||
},
|
||||
"form_schema": [
|
||||
{
|
||||
"type": "select",
|
||||
"label": {
|
||||
"en-US": "Temperature Unit",
|
||||
"zh-Hans": "温度单位"
|
||||
},
|
||||
"variable": "temperature_unit",
|
||||
"required": true,
|
||||
"options": [
|
||||
{
|
||||
"label": {
|
||||
"en-US": "Fahrenheit",
|
||||
"zh-Hans": "华氏度"
|
||||
},
|
||||
"value": "fahrenheit"
|
||||
},
|
||||
{
|
||||
"label": {
|
||||
"en-US": "Centigrade",
|
||||
"zh-Hans": "摄氏度"
|
||||
},
|
||||
"value": "centigrade"
|
||||
}
|
||||
],
|
||||
"default": "centigrade",
|
||||
"placeholder": "Please select temperature unit"
|
||||
}
|
||||
]
|
||||
}
|
||||
@@ -1,45 +0,0 @@
|
||||
from typing import Optional
|
||||
|
||||
from core.external_data_tool.base import ExternalDataTool
|
||||
|
||||
|
||||
class WeatherSearch(ExternalDataTool):
|
||||
"""
|
||||
The name of custom type must be unique, keep the same with directory and file name.
|
||||
"""
|
||||
name: str = "weather_search"
|
||||
|
||||
@classmethod
|
||||
def validate_config(cls, tenant_id: str, config: dict) -> None:
|
||||
"""
|
||||
schema.json validation. It will be called when user save the config.
|
||||
|
||||
Example:
|
||||
.. code-block:: python
|
||||
config = {
|
||||
"temperature_unit": "centigrade"
|
||||
}
|
||||
|
||||
:param tenant_id: the id of workspace
|
||||
:param config: the variables of form config
|
||||
:return:
|
||||
"""
|
||||
|
||||
if not config.get('temperature_unit'):
|
||||
raise ValueError('temperature unit is required')
|
||||
|
||||
def query(self, inputs: dict, query: Optional[str] = None) -> str:
|
||||
"""
|
||||
Query the external data tool.
|
||||
|
||||
:param inputs: user inputs
|
||||
:param query: the query of chat app
|
||||
:return: the tool query result
|
||||
"""
|
||||
city = inputs.get('city')
|
||||
temperature_unit = self.config.get('temperature_unit')
|
||||
|
||||
if temperature_unit == 'fahrenheit':
|
||||
return f'Weather in {city} is 32°F'
|
||||
else:
|
||||
return f'Weather in {city} is 0°C'
|
||||
51
api/core/helper/model_provider_cache.py
Normal file
51
api/core/helper/model_provider_cache.py
Normal file
@@ -0,0 +1,51 @@
|
||||
import json
|
||||
from enum import Enum
|
||||
from json import JSONDecodeError
|
||||
from typing import Optional
|
||||
|
||||
from extensions.ext_redis import redis_client
|
||||
|
||||
|
||||
class ProviderCredentialsCacheType(Enum):
|
||||
PROVIDER = "provider"
|
||||
MODEL = "provider_model"
|
||||
|
||||
|
||||
class ProviderCredentialsCache:
|
||||
def __init__(self, tenant_id: str, identity_id: str, cache_type: ProviderCredentialsCacheType):
|
||||
self.cache_key = f"{cache_type.value}_credentials:tenant_id:{tenant_id}:id:{identity_id}"
|
||||
|
||||
def get(self) -> Optional[dict]:
|
||||
"""
|
||||
Get cached model provider credentials.
|
||||
|
||||
:return:
|
||||
"""
|
||||
cached_provider_credentials = redis_client.get(self.cache_key)
|
||||
if cached_provider_credentials:
|
||||
try:
|
||||
cached_provider_credentials = cached_provider_credentials.decode('utf-8')
|
||||
cached_provider_credentials = json.loads(cached_provider_credentials)
|
||||
except JSONDecodeError:
|
||||
return None
|
||||
|
||||
return cached_provider_credentials
|
||||
else:
|
||||
return None
|
||||
|
||||
def set(self, credentials: dict) -> None:
|
||||
"""
|
||||
Cache model provider credentials.
|
||||
|
||||
:param credentials: provider credentials
|
||||
:return:
|
||||
"""
|
||||
redis_client.setex(self.cache_key, 86400, json.dumps(credentials))
|
||||
|
||||
def delete(self) -> None:
|
||||
"""
|
||||
Delete cached model provider credentials.
|
||||
|
||||
:return:
|
||||
"""
|
||||
redis_client.delete(self.cache_key)
|
||||
@@ -4,13 +4,14 @@ from typing import Optional
|
||||
from flask import Flask
|
||||
from pydantic import BaseModel
|
||||
|
||||
from core.entities.provider_entities import QuotaUnit
|
||||
from core.entities.provider_entities import QuotaUnit, RestrictModel
|
||||
from core.model_runtime.entities.model_entities import ModelType
|
||||
from models.provider import ProviderQuotaType
|
||||
|
||||
|
||||
class HostingQuota(BaseModel):
|
||||
quota_type: ProviderQuotaType
|
||||
restrict_llms: list[str] = []
|
||||
restrict_models: list[RestrictModel] = []
|
||||
|
||||
|
||||
class TrialHostingQuota(HostingQuota):
|
||||
@@ -47,10 +48,11 @@ class HostingConfiguration:
|
||||
provider_map: dict[str, HostingProvider] = {}
|
||||
moderation_config: HostedModerationConfig = None
|
||||
|
||||
def init_app(self, app: Flask):
|
||||
def init_app(self, app: Flask) -> None:
|
||||
if app.config.get('EDITION') != 'CLOUD':
|
||||
return
|
||||
|
||||
self.provider_map["azure_openai"] = self.init_azure_openai()
|
||||
self.provider_map["openai"] = self.init_openai()
|
||||
self.provider_map["anthropic"] = self.init_anthropic()
|
||||
self.provider_map["minimax"] = self.init_minimax()
|
||||
@@ -59,6 +61,47 @@ class HostingConfiguration:
|
||||
|
||||
self.moderation_config = self.init_moderation_config()
|
||||
|
||||
def init_azure_openai(self) -> HostingProvider:
|
||||
quota_unit = QuotaUnit.TIMES
|
||||
if os.environ.get("HOSTED_AZURE_OPENAI_ENABLED") and os.environ.get("HOSTED_AZURE_OPENAI_ENABLED").lower() == 'true':
|
||||
credentials = {
|
||||
"openai_api_key": os.environ.get("HOSTED_AZURE_OPENAI_API_KEY"),
|
||||
"openai_api_base": os.environ.get("HOSTED_AZURE_OPENAI_API_BASE"),
|
||||
"base_model_name": "gpt-35-turbo"
|
||||
}
|
||||
|
||||
quotas = []
|
||||
hosted_quota_limit = int(os.environ.get("HOSTED_AZURE_OPENAI_QUOTA_LIMIT", "1000"))
|
||||
if hosted_quota_limit != -1 or hosted_quota_limit > 0:
|
||||
trial_quota = TrialHostingQuota(
|
||||
quota_limit=hosted_quota_limit,
|
||||
restrict_models=[
|
||||
RestrictModel(model="gpt-4", base_model_name="gpt-4", model_type=ModelType.LLM),
|
||||
RestrictModel(model="gpt-4-32k", base_model_name="gpt-4-32k", model_type=ModelType.LLM),
|
||||
RestrictModel(model="gpt-4-1106-preview", base_model_name="gpt-4-1106-preview", model_type=ModelType.LLM),
|
||||
RestrictModel(model="gpt-4-vision-preview", base_model_name="gpt-4-vision-preview", model_type=ModelType.LLM),
|
||||
RestrictModel(model="gpt-35-turbo", base_model_name="gpt-35-turbo", model_type=ModelType.LLM),
|
||||
RestrictModel(model="gpt-35-turbo-1106", base_model_name="gpt-35-turbo-1106", model_type=ModelType.LLM),
|
||||
RestrictModel(model="gpt-35-turbo-instruct", base_model_name="gpt-35-turbo-instruct", model_type=ModelType.LLM),
|
||||
RestrictModel(model="gpt-35-turbo-16k", base_model_name="gpt-35-turbo-16k", model_type=ModelType.LLM),
|
||||
RestrictModel(model="text-davinci-003", base_model_name="text-davinci-003", model_type=ModelType.LLM),
|
||||
RestrictModel(model="text-embedding-ada-002", base_model_name="text-embedding-ada-002", model_type=ModelType.TEXT_EMBEDDING),
|
||||
]
|
||||
)
|
||||
quotas.append(trial_quota)
|
||||
|
||||
return HostingProvider(
|
||||
enabled=True,
|
||||
credentials=credentials,
|
||||
quota_unit=quota_unit,
|
||||
quotas=quotas
|
||||
)
|
||||
|
||||
return HostingProvider(
|
||||
enabled=False,
|
||||
quota_unit=quota_unit,
|
||||
)
|
||||
|
||||
def init_openai(self) -> HostingProvider:
|
||||
quota_unit = QuotaUnit.TIMES
|
||||
if os.environ.get("HOSTED_OPENAI_ENABLED") and os.environ.get("HOSTED_OPENAI_ENABLED").lower() == 'true':
|
||||
@@ -77,12 +120,12 @@ class HostingConfiguration:
|
||||
if hosted_quota_limit != -1 or hosted_quota_limit > 0:
|
||||
trial_quota = TrialHostingQuota(
|
||||
quota_limit=hosted_quota_limit,
|
||||
restrict_llms=[
|
||||
"gpt-3.5-turbo",
|
||||
"gpt-3.5-turbo-1106",
|
||||
"gpt-3.5-turbo-instruct",
|
||||
"gpt-3.5-turbo-16k",
|
||||
"text-davinci-003"
|
||||
restrict_models=[
|
||||
RestrictModel(model="gpt-3.5-turbo", model_type=ModelType.LLM),
|
||||
RestrictModel(model="gpt-3.5-turbo-1106", model_type=ModelType.LLM),
|
||||
RestrictModel(model="gpt-3.5-turbo-instruct", model_type=ModelType.LLM),
|
||||
RestrictModel(model="gpt-3.5-turbo-16k", model_type=ModelType.LLM),
|
||||
RestrictModel(model="text-davinci-003", model_type=ModelType.LLM),
|
||||
]
|
||||
)
|
||||
quotas.append(trial_quota)
|
||||
|
||||
@@ -136,6 +136,7 @@ class KeywordTableIndex(BaseIndex):
|
||||
page_content=segment.content,
|
||||
metadata={
|
||||
"doc_id": chunk_index,
|
||||
"doc_hash": segment.index_node_hash,
|
||||
"document_id": segment.document_id,
|
||||
"dataset_id": segment.dataset_id,
|
||||
}
|
||||
|
||||
@@ -18,6 +18,7 @@ from models.dataset import Dataset, DatasetCollectionBinding
|
||||
class QdrantConfig(BaseModel):
|
||||
endpoint: str
|
||||
api_key: Optional[str]
|
||||
timeout: float = 20
|
||||
root_path: Optional[str]
|
||||
|
||||
def to_qdrant_params(self):
|
||||
@@ -33,6 +34,7 @@ class QdrantConfig(BaseModel):
|
||||
return {
|
||||
'url': self.endpoint,
|
||||
'api_key': self.api_key,
|
||||
'timeout': self.timeout
|
||||
}
|
||||
|
||||
|
||||
|
||||
@@ -49,7 +49,8 @@ class VectorIndex:
|
||||
config=QdrantConfig(
|
||||
endpoint=config.get('QDRANT_URL'),
|
||||
api_key=config.get('QDRANT_API_KEY'),
|
||||
root_path=current_app.root_path
|
||||
root_path=current_app.root_path,
|
||||
timeout=config.get('QDRANT_CLIENT_TIMEOUT')
|
||||
),
|
||||
embeddings=embeddings
|
||||
)
|
||||
|
||||
@@ -5,12 +5,12 @@ import re
|
||||
import threading
|
||||
import time
|
||||
import uuid
|
||||
from typing import Optional, List, cast
|
||||
from typing import Optional, List, cast, Type, Union, Literal, AbstractSet, Collection, Any
|
||||
|
||||
from flask import current_app, Flask
|
||||
from flask_login import current_user
|
||||
from langchain.schema import Document
|
||||
from langchain.text_splitter import RecursiveCharacterTextSplitter, TextSplitter
|
||||
from langchain.text_splitter import TextSplitter, TS, TokenTextSplitter
|
||||
from sqlalchemy.orm.exc import ObjectDeletedError
|
||||
|
||||
from core.data_loader.file_extractor import FileExtractor
|
||||
@@ -23,7 +23,8 @@ from core.errors.error import ProviderTokenNotInitError
|
||||
from core.model_runtime.entities.model_entities import ModelType, PriceType
|
||||
from core.model_runtime.model_providers.__base.large_language_model import LargeLanguageModel
|
||||
from core.model_runtime.model_providers.__base.text_embedding_model import TextEmbeddingModel
|
||||
from core.spiltter.fixed_text_splitter import FixedRecursiveCharacterTextSplitter
|
||||
from core.model_runtime.model_providers.__base.tokenizers.gpt2_tokenzier import GPT2Tokenizer
|
||||
from core.spiltter.fixed_text_splitter import FixedRecursiveCharacterTextSplitter, EnhanceRecursiveCharacterTextSplitter
|
||||
from extensions.ext_database import db
|
||||
from extensions.ext_redis import redis_client
|
||||
from extensions.ext_storage import storage
|
||||
@@ -58,7 +59,7 @@ class IndexingRunner:
|
||||
first()
|
||||
|
||||
# load file
|
||||
text_docs = self._load_data(dataset_document)
|
||||
text_docs = self._load_data(dataset_document, processing_rule.mode == 'automatic')
|
||||
|
||||
# get splitter
|
||||
splitter = self._get_splitter(processing_rule)
|
||||
@@ -112,15 +113,14 @@ class IndexingRunner:
|
||||
for document_segment in document_segments:
|
||||
db.session.delete(document_segment)
|
||||
db.session.commit()
|
||||
|
||||
# load file
|
||||
text_docs = self._load_data(dataset_document)
|
||||
|
||||
# get the process rule
|
||||
processing_rule = db.session.query(DatasetProcessRule). \
|
||||
filter(DatasetProcessRule.id == dataset_document.dataset_process_rule_id). \
|
||||
first()
|
||||
|
||||
# load file
|
||||
text_docs = self._load_data(dataset_document, processing_rule.mode == 'automatic')
|
||||
|
||||
# get splitter
|
||||
splitter = self._get_splitter(processing_rule)
|
||||
|
||||
@@ -221,12 +221,18 @@ class IndexingRunner:
|
||||
if not dataset:
|
||||
raise ValueError('Dataset not found.')
|
||||
if dataset.indexing_technique == 'high_quality' or indexing_technique == 'high_quality':
|
||||
embedding_model_instance = self.model_manager.get_model_instance(
|
||||
tenant_id=tenant_id,
|
||||
provider=dataset.embedding_model_provider,
|
||||
model_type=ModelType.TEXT_EMBEDDING,
|
||||
model=dataset.embedding_model
|
||||
)
|
||||
if dataset.embedding_model_provider:
|
||||
embedding_model_instance = self.model_manager.get_model_instance(
|
||||
tenant_id=tenant_id,
|
||||
provider=dataset.embedding_model_provider,
|
||||
model_type=ModelType.TEXT_EMBEDDING,
|
||||
model=dataset.embedding_model
|
||||
)
|
||||
else:
|
||||
embedding_model_instance = self.model_manager.get_default_model_instance(
|
||||
tenant_id=tenant_id,
|
||||
model_type=ModelType.TEXT_EMBEDDING,
|
||||
)
|
||||
else:
|
||||
if indexing_technique == 'high_quality':
|
||||
embedding_model_instance = self.model_manager.get_default_model_instance(
|
||||
@@ -237,14 +243,15 @@ class IndexingRunner:
|
||||
preview_texts = []
|
||||
total_segments = 0
|
||||
for file_detail in file_details:
|
||||
# load data from file
|
||||
text_docs = FileExtractor.load(file_detail)
|
||||
|
||||
processing_rule = DatasetProcessRule(
|
||||
mode=tmp_processing_rule["mode"],
|
||||
rules=json.dumps(tmp_processing_rule["rules"])
|
||||
)
|
||||
|
||||
# load data from file
|
||||
text_docs = FileExtractor.load(file_detail, is_automatic=processing_rule.mode == 'automatic')
|
||||
|
||||
# get splitter
|
||||
splitter = self._get_splitter(processing_rule)
|
||||
|
||||
@@ -327,12 +334,18 @@ class IndexingRunner:
|
||||
if not dataset:
|
||||
raise ValueError('Dataset not found.')
|
||||
if dataset.indexing_technique == 'high_quality' or indexing_technique == 'high_quality':
|
||||
embedding_model_instance = self.model_manager.get_model_instance(
|
||||
tenant_id=tenant_id,
|
||||
provider=dataset.embedding_model_provider,
|
||||
model_type=ModelType.TEXT_EMBEDDING,
|
||||
model=dataset.embedding_model
|
||||
)
|
||||
if dataset.embedding_model_provider:
|
||||
embedding_model_instance = self.model_manager.get_model_instance(
|
||||
tenant_id=tenant_id,
|
||||
provider=dataset.embedding_model_provider,
|
||||
model_type=ModelType.TEXT_EMBEDDING,
|
||||
model=dataset.embedding_model
|
||||
)
|
||||
else:
|
||||
embedding_model_instance = self.model_manager.get_default_model_instance(
|
||||
tenant_id=tenant_id,
|
||||
model_type=ModelType.TEXT_EMBEDDING,
|
||||
)
|
||||
else:
|
||||
if indexing_technique == 'high_quality':
|
||||
embedding_model_instance = self.model_manager.get_default_model_instance(
|
||||
@@ -381,13 +394,15 @@ class IndexingRunner:
|
||||
)
|
||||
total_segments += len(documents)
|
||||
|
||||
embedding_model_type_instance = embedding_model_instance.model_type_instance
|
||||
embedding_model_type_instance = cast(TextEmbeddingModel, embedding_model_type_instance)
|
||||
embedding_model_type_instance = None
|
||||
if embedding_model_instance:
|
||||
embedding_model_type_instance = embedding_model_instance.model_type_instance
|
||||
embedding_model_type_instance = cast(TextEmbeddingModel, embedding_model_type_instance)
|
||||
|
||||
for document in documents:
|
||||
if len(preview_texts) < 5:
|
||||
preview_texts.append(document.page_content)
|
||||
if indexing_technique == 'high_quality' or embedding_model_instance:
|
||||
if indexing_technique == 'high_quality' and embedding_model_type_instance:
|
||||
tokens += embedding_model_type_instance.get_num_tokens(
|
||||
model=embedding_model_instance.model,
|
||||
credentials=embedding_model_instance.credentials,
|
||||
@@ -456,7 +471,7 @@ class IndexingRunner:
|
||||
one_or_none()
|
||||
|
||||
if file_detail:
|
||||
text_docs = FileExtractor.load(file_detail, is_automatic=True)
|
||||
text_docs = FileExtractor.load(file_detail, is_automatic=automatic)
|
||||
elif dataset_document.data_source_type == 'notion_import':
|
||||
loader = NotionLoader.from_document(dataset_document)
|
||||
text_docs = loader.load()
|
||||
@@ -502,7 +517,8 @@ class IndexingRunner:
|
||||
if separator:
|
||||
separator = separator.replace('\\n', '\n')
|
||||
|
||||
character_splitter = FixedRecursiveCharacterTextSplitter.from_tiktoken_encoder(
|
||||
|
||||
character_splitter = FixedRecursiveCharacterTextSplitter.from_gpt2_encoder(
|
||||
chunk_size=segmentation["max_tokens"],
|
||||
chunk_overlap=0,
|
||||
fixed_separator=separator,
|
||||
@@ -510,7 +526,7 @@ class IndexingRunner:
|
||||
)
|
||||
else:
|
||||
# Automatic segmentation
|
||||
character_splitter = RecursiveCharacterTextSplitter.from_tiktoken_encoder(
|
||||
character_splitter = EnhanceRecursiveCharacterTextSplitter.from_gpt2_encoder(
|
||||
chunk_size=DatasetProcessRule.AUTOMATIC_RULES['segmentation']['max_tokens'],
|
||||
chunk_overlap=0,
|
||||
separators=["\n\n", "。", ".", " ", ""]
|
||||
|
||||
@@ -144,7 +144,7 @@ class ModelInstance:
|
||||
user=user
|
||||
)
|
||||
|
||||
def invoke_speech2text(self, file: IO[bytes], user: Optional[str] = None) \
|
||||
def invoke_speech2text(self, file: IO[bytes], user: Optional[str] = None, **params) \
|
||||
-> str:
|
||||
"""
|
||||
Invoke large language model
|
||||
@@ -161,7 +161,8 @@ class ModelInstance:
|
||||
model=self.model,
|
||||
credentials=self.credentials,
|
||||
file=file,
|
||||
user=user
|
||||
user=user,
|
||||
**params
|
||||
)
|
||||
|
||||
|
||||
@@ -178,6 +179,8 @@ class ModelManager:
|
||||
:param model: model name
|
||||
:return:
|
||||
"""
|
||||
if not provider:
|
||||
return self.get_default_model_instance(tenant_id, model_type)
|
||||
provider_model_bundle = self._provider_manager.get_provider_model_bundle(
|
||||
tenant_id=tenant_id,
|
||||
provider=provider,
|
||||
|
||||
@@ -30,7 +30,7 @@
|
||||
```yaml
|
||||
provider: xinference #确定供应商标识
|
||||
label: # 供应商展示名称,可设置 en_US 英文、zh_Hans 中文两种语言,zh_Hans 不设置将默认使用 en_US。
|
||||
en_US: Xorbots Inference
|
||||
en_US: Xorbits Inference
|
||||
icon_small: # 小图标,可以参考其他供应商的图标,存储在对应供应商实现目录下的 _assets 目录,中英文策略同 label
|
||||
en_US: icon_s_en.svg
|
||||
icon_large: # 大图标
|
||||
@@ -260,7 +260,7 @@ provider_credential_schema:
|
||||
fetch_from=FetchFrom.CUSTOMIZABLE_MODEL,
|
||||
model_type=model_type,
|
||||
model_properties={
|
||||
'mode': ModelType.LLM,
|
||||
ModelPropertyKey.MODE: ModelType.LLM,
|
||||
},
|
||||
parameter_rules=rules
|
||||
)
|
||||
|
||||
@@ -18,7 +18,7 @@ PARAMETER_RULE_TEMPLATE: Dict[DefaultParameterName, dict] = {
|
||||
'default': 0.0,
|
||||
'min': 0.0,
|
||||
'max': 1.0,
|
||||
'precision': 1,
|
||||
'precision': 2,
|
||||
},
|
||||
DefaultParameterName.TOP_P: {
|
||||
'label': {
|
||||
@@ -34,7 +34,7 @@ PARAMETER_RULE_TEMPLATE: Dict[DefaultParameterName, dict] = {
|
||||
'default': 1.0,
|
||||
'min': 0.0,
|
||||
'max': 1.0,
|
||||
'precision': 1,
|
||||
'precision': 2,
|
||||
},
|
||||
DefaultParameterName.PRESENCE_PENALTY: {
|
||||
'label': {
|
||||
@@ -50,7 +50,7 @@ PARAMETER_RULE_TEMPLATE: Dict[DefaultParameterName, dict] = {
|
||||
'default': 0.0,
|
||||
'min': 0.0,
|
||||
'max': 1.0,
|
||||
'precision': 1,
|
||||
'precision': 2,
|
||||
},
|
||||
DefaultParameterName.FREQUENCY_PENALTY: {
|
||||
'label': {
|
||||
@@ -66,7 +66,7 @@ PARAMETER_RULE_TEMPLATE: Dict[DefaultParameterName, dict] = {
|
||||
'default': 0.0,
|
||||
'min': 0.0,
|
||||
'max': 1.0,
|
||||
'precision': 1,
|
||||
'precision': 2,
|
||||
},
|
||||
DefaultParameterName.MAX_TOKENS: {
|
||||
'label': {
|
||||
|
||||
@@ -32,7 +32,7 @@ class ModelType(Enum):
|
||||
return cls.TEXT_EMBEDDING
|
||||
elif origin_model_type == 'reranking' or origin_model_type == cls.RERANK.value:
|
||||
return cls.RERANK
|
||||
elif origin_model_type == cls.SPEECH2TEXT.value:
|
||||
elif origin_model_type == 'speech2text' or origin_model_type == cls.SPEECH2TEXT.value:
|
||||
return cls.SPEECH2TEXT
|
||||
elif origin_model_type == cls.MODERATION.value:
|
||||
return cls.MODERATION
|
||||
|
||||
@@ -8,6 +8,9 @@ class InvokeError(Exception):
|
||||
def __init__(self, description: Optional[str] = None) -> None:
|
||||
self.description = description
|
||||
|
||||
def __str__(self):
|
||||
return self.description or self.__class__.__name__
|
||||
|
||||
|
||||
class InvokeConnectionError(InvokeError):
|
||||
"""Raised when the Invoke returns connection error."""
|
||||
|
||||
@@ -147,13 +147,15 @@ class AIModel(ABC):
|
||||
# read _position.yaml file
|
||||
position_map = {}
|
||||
if os.path.exists(position_file_path):
|
||||
with open(position_file_path, 'r') as f:
|
||||
position_map = yaml.safe_load(f)
|
||||
with open(position_file_path, 'r', encoding='utf-8') as f:
|
||||
positions = yaml.safe_load(f)
|
||||
# convert list to dict with key as model provider name, value as index
|
||||
position_map = {position: index for index, position in enumerate(positions)}
|
||||
|
||||
# traverse all model_schema_yaml_paths
|
||||
for model_schema_yaml_path in model_schema_yaml_paths:
|
||||
# read yaml data from yaml file
|
||||
with open(model_schema_yaml_path, 'r') as f:
|
||||
with open(model_schema_yaml_path, 'r', encoding='utf-8') as f:
|
||||
yaml_data = yaml.safe_load(f)
|
||||
|
||||
new_parameter_rules = []
|
||||
@@ -236,16 +238,6 @@ class AIModel(ABC):
|
||||
:param credentials: model credentials
|
||||
:return: model schema
|
||||
"""
|
||||
if 'schema' in credentials:
|
||||
schema_dict = json.loads(credentials['schema'])
|
||||
|
||||
try:
|
||||
model_instance = AIModelEntity.parse_obj(schema_dict)
|
||||
return model_instance
|
||||
except ValidationError as e:
|
||||
logging.exception(f"Invalid model schema for {model}")
|
||||
return self._get_customizable_model_schema(model, credentials)
|
||||
|
||||
return self._get_customizable_model_schema(model, credentials)
|
||||
|
||||
def _get_customizable_model_schema(self, model: str, credentials: dict) -> Optional[AIModelEntity]:
|
||||
|
||||
@@ -132,8 +132,8 @@ class LargeLanguageModel(AIModel):
|
||||
system_fingerprint = None
|
||||
real_model = model
|
||||
|
||||
for chunk in result:
|
||||
try:
|
||||
try:
|
||||
for chunk in result:
|
||||
yield chunk
|
||||
|
||||
self._trigger_new_chunk_callbacks(
|
||||
@@ -156,8 +156,8 @@ class LargeLanguageModel(AIModel):
|
||||
|
||||
if chunk.system_fingerprint:
|
||||
system_fingerprint = chunk.system_fingerprint
|
||||
except Exception as e:
|
||||
raise self._transform_invoke_error(e)
|
||||
except Exception as e:
|
||||
raise self._transform_invoke_error(e)
|
||||
|
||||
self._trigger_after_invoke_callbacks(
|
||||
model=model,
|
||||
@@ -165,7 +165,7 @@ class LargeLanguageModel(AIModel):
|
||||
model=real_model,
|
||||
prompt_messages=prompt_messages,
|
||||
message=prompt_message,
|
||||
usage=usage,
|
||||
usage=usage if usage else LLMUsage.empty_usage(),
|
||||
system_fingerprint=system_fingerprint
|
||||
),
|
||||
credentials=credentials,
|
||||
|
||||
@@ -47,7 +47,7 @@ class ModelProvider(ABC):
|
||||
yaml_path = os.path.join(current_path, f'{provider_name}.yaml')
|
||||
yaml_data = {}
|
||||
if os.path.exists(yaml_path):
|
||||
with open(yaml_path, 'r') as f:
|
||||
with open(yaml_path, 'r', encoding='utf-8') as f:
|
||||
yaml_data = yaml.safe_load(f)
|
||||
|
||||
try:
|
||||
@@ -112,7 +112,7 @@ class ModelProvider(ABC):
|
||||
model_class = None
|
||||
for name, obj in vars(mod).items():
|
||||
if (isinstance(obj, type) and issubclass(obj, AIModel) and not obj.__abstractmethods__
|
||||
and obj != AIModel):
|
||||
and obj != AIModel and obj.__module__ == mod.__name__):
|
||||
model_class = obj
|
||||
break
|
||||
|
||||
|
||||
@@ -1,19 +1,20 @@
|
||||
openai: 0
|
||||
anthropic: 1
|
||||
azure_openai: 2
|
||||
google: 3
|
||||
replicate: 4
|
||||
huggingface_hub: 5
|
||||
cohere: 6
|
||||
zhipuai: 7
|
||||
baichuan: 8
|
||||
spark: 9
|
||||
minimax: 10
|
||||
tongyi: 11
|
||||
wenxin: 12
|
||||
jina: 13
|
||||
chatglm: 14
|
||||
xinference: 15
|
||||
openllm: 16
|
||||
localai: 17
|
||||
openai_api_compatible: 18
|
||||
- openai
|
||||
- anthropic
|
||||
- azure_openai
|
||||
- google
|
||||
- replicate
|
||||
- huggingface_hub
|
||||
- cohere
|
||||
- togetherai
|
||||
- zhipuai
|
||||
- baichuan
|
||||
- spark
|
||||
- minimax
|
||||
- tongyi
|
||||
- wenxin
|
||||
- jina
|
||||
- chatglm
|
||||
- xinference
|
||||
- openllm
|
||||
- localai
|
||||
- openai_api_compatible
|
||||
|
||||
@@ -16,24 +16,24 @@ help:
|
||||
url:
|
||||
en_US: https://console.anthropic.com/account/keys
|
||||
supported_model_types:
|
||||
- llm
|
||||
- llm
|
||||
configurate_methods:
|
||||
- predefined-model
|
||||
- predefined-model
|
||||
provider_credential_schema:
|
||||
credential_form_schemas:
|
||||
- variable: anthropic_api_key
|
||||
label:
|
||||
en_US: API Key
|
||||
type: secret-input
|
||||
required: true
|
||||
placeholder:
|
||||
zh_Hans: 在此输入您的 API Key
|
||||
en_US: Enter your API Key
|
||||
- variable: anthropic_api_url
|
||||
label:
|
||||
en_US: API URL
|
||||
type: text-input
|
||||
required: false
|
||||
placeholder:
|
||||
zh_Hans: 在此输入您的 API URL
|
||||
en_US: Enter your API URL
|
||||
- variable: anthropic_api_key
|
||||
label:
|
||||
en_US: API Key
|
||||
type: secret-input
|
||||
required: true
|
||||
placeholder:
|
||||
zh_Hans: 在此输入您的 API Key
|
||||
en_US: Enter your API Key
|
||||
- variable: anthropic_api_url
|
||||
label:
|
||||
en_US: API URL
|
||||
type: text-input
|
||||
required: false
|
||||
placeholder:
|
||||
zh_Hans: 在此输入您的 API URL
|
||||
en_US: Enter your API URL
|
||||
|
||||
@@ -3,32 +3,32 @@ label:
|
||||
en_US: claude-2.1
|
||||
model_type: llm
|
||||
features:
|
||||
- agent-thought
|
||||
- agent-thought
|
||||
model_properties:
|
||||
mode: chat
|
||||
context_size: 200000
|
||||
parameter_rules:
|
||||
- name: temperature
|
||||
use_template: temperature
|
||||
- name: top_p
|
||||
use_template: top_p
|
||||
- name: top_k
|
||||
label:
|
||||
zh_Hans: 取样数量
|
||||
en_US: Top k
|
||||
type: int
|
||||
help:
|
||||
zh_Hans: 仅从每个后续标记的前 K 个选项中采样。
|
||||
en_US: Only sample from the top K options for each subsequent token.
|
||||
required: false
|
||||
- name: max_tokens_to_sample
|
||||
use_template: max_tokens
|
||||
required: true
|
||||
default: 4096
|
||||
min: 1
|
||||
max: 4096
|
||||
- name: temperature
|
||||
use_template: temperature
|
||||
- name: top_p
|
||||
use_template: top_p
|
||||
- name: top_k
|
||||
label:
|
||||
zh_Hans: 取样数量
|
||||
en_US: Top k
|
||||
type: int
|
||||
help:
|
||||
zh_Hans: 仅从每个后续标记的前 K 个选项中采样。
|
||||
en_US: Only sample from the top K options for each subsequent token.
|
||||
required: false
|
||||
- name: max_tokens_to_sample
|
||||
use_template: max_tokens
|
||||
required: true
|
||||
default: 4096
|
||||
min: 1
|
||||
max: 4096
|
||||
pricing:
|
||||
input: '8.00'
|
||||
output: '24.00'
|
||||
unit: '0.000001'
|
||||
currency: USD
|
||||
currency: USD
|
||||
|
||||
@@ -3,32 +3,32 @@ label:
|
||||
en_US: claude-2
|
||||
model_type: llm
|
||||
features:
|
||||
- agent-thought
|
||||
- agent-thought
|
||||
model_properties:
|
||||
mode: chat
|
||||
context_size: 100000
|
||||
parameter_rules:
|
||||
- name: temperature
|
||||
use_template: temperature
|
||||
- name: top_p
|
||||
use_template: top_p
|
||||
- name: top_k
|
||||
label:
|
||||
zh_Hans: 取样数量
|
||||
en_US: Top k
|
||||
type: int
|
||||
help:
|
||||
zh_Hans: 仅从每个后续标记的前 K 个选项中采样。
|
||||
en_US: Only sample from the top K options for each subsequent token.
|
||||
required: false
|
||||
- name: max_tokens_to_sample
|
||||
use_template: max_tokens
|
||||
required: true
|
||||
default: 4096
|
||||
min: 1
|
||||
max: 4096
|
||||
- name: temperature
|
||||
use_template: temperature
|
||||
- name: top_p
|
||||
use_template: top_p
|
||||
- name: top_k
|
||||
label:
|
||||
zh_Hans: 取样数量
|
||||
en_US: Top k
|
||||
type: int
|
||||
help:
|
||||
zh_Hans: 仅从每个后续标记的前 K 个选项中采样。
|
||||
en_US: Only sample from the top K options for each subsequent token.
|
||||
required: false
|
||||
- name: max_tokens_to_sample
|
||||
use_template: max_tokens
|
||||
required: true
|
||||
default: 4096
|
||||
min: 1
|
||||
max: 4096
|
||||
pricing:
|
||||
input: '8.00'
|
||||
output: '24.00'
|
||||
unit: '0.000001'
|
||||
currency: USD
|
||||
currency: USD
|
||||
|
||||
@@ -2,32 +2,32 @@ model: claude-instant-1
|
||||
label:
|
||||
en_US: claude-instant-1
|
||||
model_type: llm
|
||||
features: []
|
||||
features: [ ]
|
||||
model_properties:
|
||||
mode: chat
|
||||
context_size: 100000
|
||||
parameter_rules:
|
||||
- name: temperature
|
||||
use_template: temperature
|
||||
- name: top_p
|
||||
use_template: top_p
|
||||
- name: top_k
|
||||
label:
|
||||
zh_Hans: 取样数量
|
||||
en_US: Top k
|
||||
type: int
|
||||
help:
|
||||
zh_Hans: 仅从每个后续标记的前 K 个选项中采样。
|
||||
en_US: Only sample from the top K options for each subsequent token.
|
||||
required: false
|
||||
- name: max_tokens_to_sample
|
||||
use_template: max_tokens
|
||||
required: true
|
||||
default: 4096
|
||||
min: 1
|
||||
max: 4096
|
||||
- name: temperature
|
||||
use_template: temperature
|
||||
- name: top_p
|
||||
use_template: top_p
|
||||
- name: top_k
|
||||
label:
|
||||
zh_Hans: 取样数量
|
||||
en_US: Top k
|
||||
type: int
|
||||
help:
|
||||
zh_Hans: 仅从每个后续标记的前 K 个选项中采样。
|
||||
en_US: Only sample from the top K options for each subsequent token.
|
||||
required: false
|
||||
- name: max_tokens_to_sample
|
||||
use_template: max_tokens
|
||||
required: true
|
||||
default: 4096
|
||||
min: 1
|
||||
max: 4096
|
||||
pricing:
|
||||
input: '1.63'
|
||||
output: '5.51'
|
||||
unit: '0.000001'
|
||||
currency: USD
|
||||
currency: USD
|
||||
|
||||
@@ -252,6 +252,9 @@ class AnthropicLargeLanguageModel(LargeLanguageModel):
|
||||
:param messages: List of PromptMessage to combine.
|
||||
:return: Combined string with necessary human_prompt and ai_prompt tags.
|
||||
"""
|
||||
if not messages:
|
||||
return ''
|
||||
|
||||
messages = messages.copy() # don't mutate the original list
|
||||
if not isinstance(messages[-1], AssistantPromptMessage):
|
||||
messages.append(AssistantPromptMessage(content=""))
|
||||
|
||||
@@ -2,7 +2,7 @@ from pydantic import BaseModel
|
||||
|
||||
from core.model_runtime.entities.llm_entities import LLMMode
|
||||
from core.model_runtime.entities.model_entities import ModelFeature, ModelType, FetchFrom, ParameterRule, \
|
||||
DefaultParameterName, PriceConfig
|
||||
DefaultParameterName, PriceConfig, ModelPropertyKey
|
||||
from core.model_runtime.entities.model_entities import AIModelEntity, I18nObject
|
||||
from core.model_runtime.entities.defaults import PARAMETER_RULE_TEMPLATE
|
||||
|
||||
@@ -40,8 +40,8 @@ LLM_BASE_MODELS = [
|
||||
],
|
||||
fetch_from=FetchFrom.CUSTOMIZABLE_MODEL,
|
||||
model_properties={
|
||||
'mode': LLMMode.CHAT.value,
|
||||
'context_size': 4096,
|
||||
ModelPropertyKey.MODE: LLMMode.CHAT.value,
|
||||
ModelPropertyKey.CONTEXT_SIZE: 4096,
|
||||
},
|
||||
parameter_rules=[
|
||||
ParameterRule(
|
||||
@@ -84,8 +84,8 @@ LLM_BASE_MODELS = [
|
||||
],
|
||||
fetch_from=FetchFrom.CUSTOMIZABLE_MODEL,
|
||||
model_properties={
|
||||
'mode': LLMMode.CHAT.value,
|
||||
'context_size': 16385,
|
||||
ModelPropertyKey.MODE: LLMMode.CHAT.value,
|
||||
ModelPropertyKey.CONTEXT_SIZE: 16385,
|
||||
},
|
||||
parameter_rules=[
|
||||
ParameterRule(
|
||||
@@ -128,8 +128,8 @@ LLM_BASE_MODELS = [
|
||||
],
|
||||
fetch_from=FetchFrom.CUSTOMIZABLE_MODEL,
|
||||
model_properties={
|
||||
'mode': LLMMode.CHAT.value,
|
||||
'context_size': 8192,
|
||||
ModelPropertyKey.MODE: LLMMode.CHAT.value,
|
||||
ModelPropertyKey.CONTEXT_SIZE: 8192,
|
||||
},
|
||||
parameter_rules=[
|
||||
ParameterRule(
|
||||
@@ -202,8 +202,8 @@ LLM_BASE_MODELS = [
|
||||
],
|
||||
fetch_from=FetchFrom.CUSTOMIZABLE_MODEL,
|
||||
model_properties={
|
||||
'mode': LLMMode.CHAT.value,
|
||||
'context_size': 32768,
|
||||
ModelPropertyKey.MODE: LLMMode.CHAT.value,
|
||||
ModelPropertyKey.CONTEXT_SIZE: 32768,
|
||||
},
|
||||
parameter_rules=[
|
||||
ParameterRule(
|
||||
@@ -276,8 +276,8 @@ LLM_BASE_MODELS = [
|
||||
],
|
||||
fetch_from=FetchFrom.CUSTOMIZABLE_MODEL,
|
||||
model_properties={
|
||||
'mode': LLMMode.CHAT.value,
|
||||
'context_size': 128000,
|
||||
ModelPropertyKey.MODE: LLMMode.CHAT.value,
|
||||
ModelPropertyKey.CONTEXT_SIZE: 128000,
|
||||
},
|
||||
parameter_rules=[
|
||||
ParameterRule(
|
||||
@@ -296,7 +296,7 @@ LLM_BASE_MODELS = [
|
||||
name='frequency_penalty',
|
||||
**PARAMETER_RULE_TEMPLATE[DefaultParameterName.FREQUENCY_PENALTY],
|
||||
),
|
||||
_get_max_tokens(default=512, min_val=1, max_val=128000),
|
||||
_get_max_tokens(default=512, min_val=1, max_val=4096),
|
||||
ParameterRule(
|
||||
name='seed',
|
||||
label=I18nObject(
|
||||
@@ -349,8 +349,8 @@ LLM_BASE_MODELS = [
|
||||
],
|
||||
fetch_from=FetchFrom.CUSTOMIZABLE_MODEL,
|
||||
model_properties={
|
||||
'mode': LLMMode.CHAT.value,
|
||||
'context_size': 128000,
|
||||
ModelPropertyKey.MODE: LLMMode.CHAT.value,
|
||||
ModelPropertyKey.CONTEXT_SIZE: 128000,
|
||||
},
|
||||
parameter_rules=[
|
||||
ParameterRule(
|
||||
@@ -369,7 +369,7 @@ LLM_BASE_MODELS = [
|
||||
name='frequency_penalty',
|
||||
**PARAMETER_RULE_TEMPLATE[DefaultParameterName.FREQUENCY_PENALTY],
|
||||
),
|
||||
_get_max_tokens(default=512, min_val=1, max_val=128000),
|
||||
_get_max_tokens(default=512, min_val=1, max_val=4096),
|
||||
ParameterRule(
|
||||
name='seed',
|
||||
label=I18nObject(
|
||||
@@ -419,8 +419,8 @@ LLM_BASE_MODELS = [
|
||||
model_type=ModelType.LLM,
|
||||
fetch_from=FetchFrom.CUSTOMIZABLE_MODEL,
|
||||
model_properties={
|
||||
'mode': LLMMode.COMPLETION.value,
|
||||
'context_size': 4096,
|
||||
ModelPropertyKey.MODE: LLMMode.COMPLETION.value,
|
||||
ModelPropertyKey.CONTEXT_SIZE: 4096,
|
||||
},
|
||||
parameter_rules=[
|
||||
ParameterRule(
|
||||
@@ -448,6 +448,46 @@ LLM_BASE_MODELS = [
|
||||
currency='USD',
|
||||
)
|
||||
)
|
||||
),
|
||||
AzureBaseModel(
|
||||
base_model_name='text-davinci-003',
|
||||
entity=AIModelEntity(
|
||||
model='fake-deployment-name',
|
||||
label=I18nObject(
|
||||
en_US='fake-deployment-name-label',
|
||||
),
|
||||
model_type=ModelType.LLM,
|
||||
fetch_from=FetchFrom.CUSTOMIZABLE_MODEL,
|
||||
model_properties={
|
||||
ModelPropertyKey.MODE: LLMMode.COMPLETION.value,
|
||||
ModelPropertyKey.CONTEXT_SIZE: 4096,
|
||||
},
|
||||
parameter_rules=[
|
||||
ParameterRule(
|
||||
name='temperature',
|
||||
**PARAMETER_RULE_TEMPLATE[DefaultParameterName.TEMPERATURE],
|
||||
),
|
||||
ParameterRule(
|
||||
name='top_p',
|
||||
**PARAMETER_RULE_TEMPLATE[DefaultParameterName.TOP_P],
|
||||
),
|
||||
ParameterRule(
|
||||
name='presence_penalty',
|
||||
**PARAMETER_RULE_TEMPLATE[DefaultParameterName.PRESENCE_PENALTY],
|
||||
),
|
||||
ParameterRule(
|
||||
name='frequency_penalty',
|
||||
**PARAMETER_RULE_TEMPLATE[DefaultParameterName.FREQUENCY_PENALTY],
|
||||
),
|
||||
_get_max_tokens(default=512, min_val=1, max_val=4096),
|
||||
],
|
||||
pricing=PriceConfig(
|
||||
input=0.02,
|
||||
output=0.02,
|
||||
unit=0.001,
|
||||
currency='USD',
|
||||
)
|
||||
)
|
||||
)
|
||||
]
|
||||
|
||||
@@ -462,8 +502,8 @@ EMBEDDING_BASE_MODELS = [
|
||||
fetch_from=FetchFrom.CUSTOMIZABLE_MODEL,
|
||||
model_type=ModelType.TEXT_EMBEDDING,
|
||||
model_properties={
|
||||
'context_size': 8097,
|
||||
'max_chunks': 32,
|
||||
ModelPropertyKey.CONTEXT_SIZE: 8097,
|
||||
ModelPropertyKey.MAX_CHUNKS: 32,
|
||||
},
|
||||
pricing=PriceConfig(
|
||||
input=0.0001,
|
||||
|
||||
@@ -13,10 +13,10 @@ help:
|
||||
url:
|
||||
en_US: https://azure.microsoft.com/en-us/products/ai-services/openai-service
|
||||
supported_model_types:
|
||||
- llm
|
||||
- text-embedding
|
||||
- llm
|
||||
- text-embedding
|
||||
configurate_methods:
|
||||
- customizable-model
|
||||
- customizable-model
|
||||
model_credential_schema:
|
||||
model:
|
||||
label:
|
||||
@@ -26,79 +26,79 @@ model_credential_schema:
|
||||
en_US: Enter your Deployment Name here, matching the Azure deployment name.
|
||||
zh_Hans: 在此输入您的部署名称,与 Azure 部署名称匹配。
|
||||
credential_form_schemas:
|
||||
- variable: openai_api_base
|
||||
label:
|
||||
en_US: API Endpoint URL
|
||||
zh_Hans: API 域名
|
||||
type: text-input
|
||||
required: true
|
||||
placeholder:
|
||||
zh_Hans: '在此输入您的 API 域名,如:https://example.com/xxx'
|
||||
en_US: 'Enter your API Endpoint, eg: https://example.com/xxx'
|
||||
- variable: openai_api_key
|
||||
label:
|
||||
en_US: API Key
|
||||
zh_Hans: API Key
|
||||
type: secret-input
|
||||
required: true
|
||||
placeholder:
|
||||
zh_Hans: 在此输入您的 API Key
|
||||
en_US: Enter your API key here
|
||||
- variable: base_model_name
|
||||
label:
|
||||
en_US: Base Model
|
||||
zh_Hans: 基础模型
|
||||
type: select
|
||||
required: true
|
||||
options:
|
||||
- label:
|
||||
en_US: gpt-35-turbo
|
||||
value: gpt-35-turbo
|
||||
show_on:
|
||||
- variable: __model_type
|
||||
value: llm
|
||||
- label:
|
||||
en_US: gpt-35-turbo-16k
|
||||
value: gpt-35-turbo-16k
|
||||
show_on:
|
||||
- variable: __model_type
|
||||
value: llm
|
||||
- label:
|
||||
en_US: gpt-4
|
||||
value: gpt-4
|
||||
show_on:
|
||||
- variable: __model_type
|
||||
value: llm
|
||||
- label:
|
||||
en_US: gpt-4-32k
|
||||
value: gpt-4-32k
|
||||
show_on:
|
||||
- variable: __model_type
|
||||
value: llm
|
||||
- label:
|
||||
en_US: gpt-4-1106-preview
|
||||
value: gpt-4-1106-preview
|
||||
show_on:
|
||||
- variable: __model_type
|
||||
value: llm
|
||||
- label:
|
||||
en_US: gpt-4-vision-preview
|
||||
value: gpt-4-vision-preview
|
||||
show_on:
|
||||
- variable: __model_type
|
||||
value: llm
|
||||
- label:
|
||||
en_US: gpt-35-turbo-instruct
|
||||
value: gpt-35-turbo-instruct
|
||||
show_on:
|
||||
- variable: __model_type
|
||||
value: llm
|
||||
- label:
|
||||
en_US: text-embedding-ada-002
|
||||
value: text-embedding-ada-002
|
||||
show_on:
|
||||
- variable: __model_type
|
||||
value: text-embedding
|
||||
placeholder:
|
||||
zh_Hans: 在此输入您的模型版本
|
||||
en_US: Enter your model version
|
||||
- variable: openai_api_base
|
||||
label:
|
||||
en_US: API Endpoint URL
|
||||
zh_Hans: API 域名
|
||||
type: text-input
|
||||
required: true
|
||||
placeholder:
|
||||
zh_Hans: '在此输入您的 API 域名,如:https://example.com/xxx'
|
||||
en_US: 'Enter your API Endpoint, eg: https://example.com/xxx'
|
||||
- variable: openai_api_key
|
||||
label:
|
||||
en_US: API Key
|
||||
zh_Hans: API Key
|
||||
type: secret-input
|
||||
required: true
|
||||
placeholder:
|
||||
zh_Hans: 在此输入您的 API Key
|
||||
en_US: Enter your API key here
|
||||
- variable: base_model_name
|
||||
label:
|
||||
en_US: Base Model
|
||||
zh_Hans: 基础模型
|
||||
type: select
|
||||
required: true
|
||||
options:
|
||||
- label:
|
||||
en_US: gpt-35-turbo
|
||||
value: gpt-35-turbo
|
||||
show_on:
|
||||
- variable: __model_type
|
||||
value: llm
|
||||
- label:
|
||||
en_US: gpt-35-turbo-16k
|
||||
value: gpt-35-turbo-16k
|
||||
show_on:
|
||||
- variable: __model_type
|
||||
value: llm
|
||||
- label:
|
||||
en_US: gpt-4
|
||||
value: gpt-4
|
||||
show_on:
|
||||
- variable: __model_type
|
||||
value: llm
|
||||
- label:
|
||||
en_US: gpt-4-32k
|
||||
value: gpt-4-32k
|
||||
show_on:
|
||||
- variable: __model_type
|
||||
value: llm
|
||||
- label:
|
||||
en_US: gpt-4-1106-preview
|
||||
value: gpt-4-1106-preview
|
||||
show_on:
|
||||
- variable: __model_type
|
||||
value: llm
|
||||
- label:
|
||||
en_US: gpt-4-vision-preview
|
||||
value: gpt-4-vision-preview
|
||||
show_on:
|
||||
- variable: __model_type
|
||||
value: llm
|
||||
- label:
|
||||
en_US: gpt-35-turbo-instruct
|
||||
value: gpt-35-turbo-instruct
|
||||
show_on:
|
||||
- variable: __model_type
|
||||
value: llm
|
||||
- label:
|
||||
en_US: text-embedding-ada-002
|
||||
value: text-embedding-ada-002
|
||||
show_on:
|
||||
- variable: __model_type
|
||||
value: text-embedding
|
||||
placeholder:
|
||||
zh_Hans: 在此输入您的模型版本
|
||||
en_US: Enter your model version
|
||||
|
||||
@@ -30,7 +30,7 @@ class AzureOpenAILargeLanguageModel(_CommonAzureOpenAI, LargeLanguageModel):
|
||||
stream: bool = True, user: Optional[str] = None) \
|
||||
-> Union[LLMResult, Generator]:
|
||||
|
||||
ai_model_entity = self._get_ai_model_entity(credentials['base_model_name'], model)
|
||||
ai_model_entity = self._get_ai_model_entity(credentials.get('base_model_name'), model)
|
||||
|
||||
if ai_model_entity.entity.model_properties.get(ModelPropertyKey.MODE) == LLMMode.CHAT.value:
|
||||
# chat model
|
||||
@@ -59,7 +59,7 @@ class AzureOpenAILargeLanguageModel(_CommonAzureOpenAI, LargeLanguageModel):
|
||||
def get_num_tokens(self, model: str, credentials: dict, prompt_messages: list[PromptMessage],
|
||||
tools: Optional[list[PromptMessageTool]] = None) -> int:
|
||||
|
||||
model_mode = self._get_ai_model_entity(credentials['base_model_name'], model).entity.model_properties.get(
|
||||
model_mode = self._get_ai_model_entity(credentials.get('base_model_name'), model).entity.model_properties.get(
|
||||
ModelPropertyKey.MODE)
|
||||
|
||||
if model_mode == LLMMode.CHAT.value:
|
||||
@@ -79,7 +79,7 @@ class AzureOpenAILargeLanguageModel(_CommonAzureOpenAI, LargeLanguageModel):
|
||||
if 'base_model_name' not in credentials:
|
||||
raise CredentialsValidateFailedError('Base Model Name is required')
|
||||
|
||||
ai_model_entity = self._get_ai_model_entity(credentials['base_model_name'], model)
|
||||
ai_model_entity = self._get_ai_model_entity(credentials.get('base_model_name'), model)
|
||||
|
||||
if not ai_model_entity:
|
||||
raise CredentialsValidateFailedError(f'Base Model Name {credentials["base_model_name"]} is invalid')
|
||||
@@ -109,8 +109,8 @@ class AzureOpenAILargeLanguageModel(_CommonAzureOpenAI, LargeLanguageModel):
|
||||
raise CredentialsValidateFailedError(str(ex))
|
||||
|
||||
def get_customizable_model_schema(self, model: str, credentials: dict) -> Optional[AIModelEntity]:
|
||||
ai_model_entity = self._get_ai_model_entity(credentials['base_model_name'], model)
|
||||
return ai_model_entity.entity
|
||||
ai_model_entity = self._get_ai_model_entity(credentials.get('base_model_name'), model)
|
||||
return ai_model_entity.entity if ai_model_entity else None
|
||||
|
||||
def _generate(self, model: str, credentials: dict,
|
||||
prompt_messages: list[PromptMessage], model_parameters: dict, stop: Optional[List[str]] = None,
|
||||
@@ -309,7 +309,7 @@ class AzureOpenAILargeLanguageModel(_CommonAzureOpenAI, LargeLanguageModel):
|
||||
|
||||
# transform response
|
||||
response = LLMResult(
|
||||
model=response.model,
|
||||
model=response.model or model,
|
||||
prompt_messages=prompt_messages,
|
||||
message=assistant_prompt_message,
|
||||
usage=usage,
|
||||
|
||||
@@ -54,7 +54,7 @@ class AzureOpenAITextEmbeddingModel(_CommonAzureOpenAI, TextEmbeddingModel):
|
||||
_iter = range(0, len(tokens), max_chunks)
|
||||
|
||||
for i in _iter:
|
||||
embeddings, embedding_used_tokens = self._embedding_invoke(
|
||||
embeddings_batch, embedding_used_tokens = self._embedding_invoke(
|
||||
model=model,
|
||||
client=client,
|
||||
texts=tokens[i: i + max_chunks],
|
||||
@@ -62,7 +62,7 @@ class AzureOpenAITextEmbeddingModel(_CommonAzureOpenAI, TextEmbeddingModel):
|
||||
)
|
||||
|
||||
used_tokens += embedding_used_tokens
|
||||
batched_embeddings += [data for data in embeddings]
|
||||
batched_embeddings += embeddings_batch
|
||||
|
||||
results: list[list[list[float]]] = [[] for _ in range(len(texts))]
|
||||
num_tokens_in_batch: list[list[int]] = [[] for _ in range(len(texts))]
|
||||
@@ -73,7 +73,7 @@ class AzureOpenAITextEmbeddingModel(_CommonAzureOpenAI, TextEmbeddingModel):
|
||||
for i in range(len(texts)):
|
||||
_result = results[i]
|
||||
if len(_result) == 0:
|
||||
embeddings, embedding_used_tokens = self._embedding_invoke(
|
||||
embeddings_batch, embedding_used_tokens = self._embedding_invoke(
|
||||
model=model,
|
||||
client=client,
|
||||
texts=[""],
|
||||
@@ -81,7 +81,7 @@ class AzureOpenAITextEmbeddingModel(_CommonAzureOpenAI, TextEmbeddingModel):
|
||||
)
|
||||
|
||||
used_tokens += embedding_used_tokens
|
||||
average = embeddings[0]
|
||||
average = embeddings_batch[0]
|
||||
else:
|
||||
average = np.average(_result, axis=0, weights=num_tokens_in_batch[i])
|
||||
embeddings[i] = (average / np.linalg.norm(average)).tolist()
|
||||
|
||||
@@ -8,30 +8,30 @@ icon_large:
|
||||
background: "#FFF6F2"
|
||||
help:
|
||||
title:
|
||||
en_US: Get your API Key from BAICHUAN AI
|
||||
en_US: Get your API Key from BAICHUAN AI
|
||||
zh_Hans: 从百川智能获取您的 API Key
|
||||
url:
|
||||
en_US: https://www.baichuan-ai.com
|
||||
supported_model_types:
|
||||
- llm
|
||||
- text-embedding
|
||||
- llm
|
||||
- text-embedding
|
||||
configurate_methods:
|
||||
- predefined-model
|
||||
- predefined-model
|
||||
provider_credential_schema:
|
||||
credential_form_schemas:
|
||||
- variable: api_key
|
||||
label:
|
||||
en_US: API Key
|
||||
type: secret-input
|
||||
required: true
|
||||
placeholder:
|
||||
zh_Hans: 在此输入您的 API Key
|
||||
en_US: Enter your API Key
|
||||
- variable: secret_key
|
||||
label:
|
||||
en_US: Secret Key
|
||||
type: secret-input
|
||||
required: false
|
||||
placeholder:
|
||||
zh_Hans: 在此输入您的 Secret Key
|
||||
en_US: Enter your Secret Key
|
||||
- variable: api_key
|
||||
label:
|
||||
en_US: API Key
|
||||
type: secret-input
|
||||
required: true
|
||||
placeholder:
|
||||
zh_Hans: 在此输入您的 API Key
|
||||
en_US: Enter your API Key
|
||||
- variable: secret_key
|
||||
label:
|
||||
en_US: Secret Key
|
||||
type: secret-input
|
||||
required: false
|
||||
placeholder:
|
||||
zh_Hans: 在此输入您的 Secret Key
|
||||
en_US: Enter your Secret Key
|
||||
|
||||
@@ -3,40 +3,40 @@ label:
|
||||
en_US: Baichuan2-53B
|
||||
model_type: llm
|
||||
features:
|
||||
- agent-thought
|
||||
- agent-thought
|
||||
model_properties:
|
||||
mode: chat
|
||||
context_size: 4000
|
||||
parameter_rules:
|
||||
- name: temperature
|
||||
use_template: temperature
|
||||
- name: top_p
|
||||
use_template: top_p
|
||||
- name: top_k
|
||||
label:
|
||||
zh_Hans: 取样数量
|
||||
en_US: Top k
|
||||
type: int
|
||||
help:
|
||||
zh_Hans: 仅从每个后续标记的前 K 个选项中采样。
|
||||
en_US: Only sample from the top K options for each subsequent token.
|
||||
required: false
|
||||
- name: max_tokens
|
||||
use_template: max_tokens
|
||||
required: true
|
||||
default: 1000
|
||||
min: 1
|
||||
max: 4000
|
||||
- name: presence_penalty
|
||||
use_template: presence_penalty
|
||||
- name: frequency_penalty
|
||||
use_template: frequency_penalty
|
||||
- name: with_search_enhance
|
||||
label:
|
||||
zh_Hans: 搜索增强
|
||||
en_US: Search Enhance
|
||||
type: boolean
|
||||
help:
|
||||
zh_Hans: 允许模型自行进行外部搜索,以增强生成结果。
|
||||
en_US: Allow the model to perform external search to enhance the generation results.
|
||||
required: false
|
||||
- name: temperature
|
||||
use_template: temperature
|
||||
- name: top_p
|
||||
use_template: top_p
|
||||
- name: top_k
|
||||
label:
|
||||
zh_Hans: 取样数量
|
||||
en_US: Top k
|
||||
type: int
|
||||
help:
|
||||
zh_Hans: 仅从每个后续标记的前 K 个选项中采样。
|
||||
en_US: Only sample from the top K options for each subsequent token.
|
||||
required: false
|
||||
- name: max_tokens
|
||||
use_template: max_tokens
|
||||
required: true
|
||||
default: 1000
|
||||
min: 1
|
||||
max: 4000
|
||||
- name: presence_penalty
|
||||
use_template: presence_penalty
|
||||
- name: frequency_penalty
|
||||
use_template: frequency_penalty
|
||||
- name: with_search_enhance
|
||||
label:
|
||||
zh_Hans: 搜索增强
|
||||
en_US: Search Enhance
|
||||
type: boolean
|
||||
help:
|
||||
zh_Hans: 允许模型自行进行外部搜索,以增强生成结果。
|
||||
en_US: Allow the model to perform external search to enhance the generation results.
|
||||
required: false
|
||||
|
||||
@@ -3,40 +3,40 @@ label:
|
||||
en_US: Baichuan2-Turbo-192K
|
||||
model_type: llm
|
||||
features:
|
||||
- agent-thought
|
||||
- agent-thought
|
||||
model_properties:
|
||||
mode: chat
|
||||
context_size: 192000
|
||||
parameter_rules:
|
||||
- name: temperature
|
||||
use_template: temperature
|
||||
- name: top_p
|
||||
use_template: top_p
|
||||
- name: top_k
|
||||
label:
|
||||
zh_Hans: 取样数量
|
||||
en_US: Top k
|
||||
type: int
|
||||
help:
|
||||
zh_Hans: 仅从每个后续标记的前 K 个选项中采样。
|
||||
en_US: Only sample from the top K options for each subsequent token.
|
||||
required: false
|
||||
- name: max_tokens
|
||||
use_template: max_tokens
|
||||
required: true
|
||||
default: 8000
|
||||
min: 1
|
||||
max: 192000
|
||||
- name: presence_penalty
|
||||
use_template: presence_penalty
|
||||
- name: frequency_penalty
|
||||
use_template: frequency_penalty
|
||||
- name: with_search_enhance
|
||||
label:
|
||||
zh_Hans: 搜索增强
|
||||
en_US: Search Enhance
|
||||
type: boolean
|
||||
help:
|
||||
zh_Hans: 允许模型自行进行外部搜索,以增强生成结果。
|
||||
en_US: Allow the model to perform external search to enhance the generation results.
|
||||
required: false
|
||||
- name: temperature
|
||||
use_template: temperature
|
||||
- name: top_p
|
||||
use_template: top_p
|
||||
- name: top_k
|
||||
label:
|
||||
zh_Hans: 取样数量
|
||||
en_US: Top k
|
||||
type: int
|
||||
help:
|
||||
zh_Hans: 仅从每个后续标记的前 K 个选项中采样。
|
||||
en_US: Only sample from the top K options for each subsequent token.
|
||||
required: false
|
||||
- name: max_tokens
|
||||
use_template: max_tokens
|
||||
required: true
|
||||
default: 8000
|
||||
min: 1
|
||||
max: 192000
|
||||
- name: presence_penalty
|
||||
use_template: presence_penalty
|
||||
- name: frequency_penalty
|
||||
use_template: frequency_penalty
|
||||
- name: with_search_enhance
|
||||
label:
|
||||
zh_Hans: 搜索增强
|
||||
en_US: Search Enhance
|
||||
type: boolean
|
||||
help:
|
||||
zh_Hans: 允许模型自行进行外部搜索,以增强生成结果。
|
||||
en_US: Allow the model to perform external search to enhance the generation results.
|
||||
required: false
|
||||
|
||||
@@ -3,40 +3,40 @@ label:
|
||||
en_US: Baichuan2-Turbo
|
||||
model_type: llm
|
||||
features:
|
||||
- agent-thought
|
||||
- agent-thought
|
||||
model_properties:
|
||||
mode: chat
|
||||
context_size: 192000
|
||||
parameter_rules:
|
||||
- name: temperature
|
||||
use_template: temperature
|
||||
- name: top_p
|
||||
use_template: top_p
|
||||
- name: top_k
|
||||
label:
|
||||
zh_Hans: 取样数量
|
||||
en_US: Top k
|
||||
type: int
|
||||
help:
|
||||
zh_Hans: 仅从每个后续标记的前 K 个选项中采样。
|
||||
en_US: Only sample from the top K options for each subsequent token.
|
||||
required: false
|
||||
- name: max_tokens
|
||||
use_template: max_tokens
|
||||
required: true
|
||||
default: 8000
|
||||
min: 1
|
||||
max: 192000
|
||||
- name: presence_penalty
|
||||
use_template: presence_penalty
|
||||
- name: frequency_penalty
|
||||
use_template: frequency_penalty
|
||||
- name: with_search_enhance
|
||||
label:
|
||||
zh_Hans: 搜索增强
|
||||
en_US: Search Enhance
|
||||
type: boolean
|
||||
help:
|
||||
zh_Hans: 允许模型自行进行外部搜索,以增强生成结果。
|
||||
en_US: Allow the model to perform external search to enhance the generation results.
|
||||
required: false
|
||||
- name: temperature
|
||||
use_template: temperature
|
||||
- name: top_p
|
||||
use_template: top_p
|
||||
- name: top_k
|
||||
label:
|
||||
zh_Hans: 取样数量
|
||||
en_US: Top k
|
||||
type: int
|
||||
help:
|
||||
zh_Hans: 仅从每个后续标记的前 K 个选项中采样。
|
||||
en_US: Only sample from the top K options for each subsequent token.
|
||||
required: false
|
||||
- name: max_tokens
|
||||
use_template: max_tokens
|
||||
required: true
|
||||
default: 8000
|
||||
min: 1
|
||||
max: 192000
|
||||
- name: presence_penalty
|
||||
use_template: presence_penalty
|
||||
- name: frequency_penalty
|
||||
use_template: frequency_penalty
|
||||
- name: with_search_enhance
|
||||
label:
|
||||
zh_Hans: 搜索增强
|
||||
en_US: Search Enhance
|
||||
type: boolean
|
||||
help:
|
||||
zh_Hans: 允许模型自行进行外部搜索,以增强生成结果。
|
||||
en_US: Allow the model to perform external search to enhance the generation results.
|
||||
required: false
|
||||
|
||||
@@ -2,4 +2,4 @@ model: baichuan-text-embedding
|
||||
model_type: text-embedding
|
||||
model_properties:
|
||||
context_size: 512
|
||||
max_chunks: 16
|
||||
max_chunks: 16
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
from typing import Optional
|
||||
from typing import Optional, Tuple
|
||||
|
||||
from core.model_runtime.entities.model_entities import PriceType
|
||||
from core.model_runtime.entities.text_embedding_entities import TextEmbeddingResult, EmbeddingUsage
|
||||
@@ -38,6 +38,50 @@ class BaichuanTextEmbeddingModel(TextEmbeddingModel):
|
||||
raise ValueError('Invalid model name')
|
||||
if not api_key:
|
||||
raise CredentialsValidateFailedError('api_key is required')
|
||||
|
||||
# split into chunks of batch size 16
|
||||
chunks = []
|
||||
for i in range(0, len(texts), 16):
|
||||
chunks.append(texts[i:i + 16])
|
||||
|
||||
embeddings = []
|
||||
token_usage = 0
|
||||
|
||||
for chunk in chunks:
|
||||
# embeding chunk
|
||||
chunk_embeddings, chunk_usage = self.embedding(
|
||||
model=model,
|
||||
api_key=api_key,
|
||||
texts=chunk,
|
||||
user=user
|
||||
)
|
||||
|
||||
embeddings.extend(chunk_embeddings)
|
||||
token_usage += chunk_usage
|
||||
|
||||
result = TextEmbeddingResult(
|
||||
model=model,
|
||||
embeddings=embeddings,
|
||||
usage=self._calc_response_usage(
|
||||
model=model,
|
||||
credentials=credentials,
|
||||
tokens=token_usage
|
||||
)
|
||||
)
|
||||
|
||||
return result
|
||||
|
||||
def embedding(self, model: str, api_key, texts: list[str], user: Optional[str] = None) \
|
||||
-> Tuple[list[list[float]], int]:
|
||||
"""
|
||||
Embed given texts
|
||||
|
||||
:param model: model name
|
||||
:param credentials: model credentials
|
||||
:param texts: texts to embed
|
||||
:param user: unique user id
|
||||
:return: embeddings result
|
||||
"""
|
||||
url = self.api_base
|
||||
headers = {
|
||||
'Authorization': 'Bearer ' + api_key,
|
||||
@@ -69,9 +113,9 @@ class BaichuanTextEmbeddingModel(TextEmbeddingModel):
|
||||
raise InsufficientAccountBalance(msg)
|
||||
elif err == 'invalid_authentication':
|
||||
raise InvalidAuthenticationError(msg)
|
||||
elif 'rate' in err:
|
||||
elif err and 'rate' in err:
|
||||
raise RateLimitReachedError(msg)
|
||||
elif 'internal' in err:
|
||||
elif err and 'internal' in err:
|
||||
raise InternalServerError(msg)
|
||||
elif err == 'api_key_empty':
|
||||
raise InvalidAPIKeyError(msg)
|
||||
@@ -85,17 +129,10 @@ class BaichuanTextEmbeddingModel(TextEmbeddingModel):
|
||||
except Exception as e:
|
||||
raise InternalServerError(f"Failed to convert response to json: {e} with text: {response.text}")
|
||||
|
||||
usage = self._calc_response_usage(model=model, credentials=credentials, tokens=usage['total_tokens'])
|
||||
return [
|
||||
data['embedding'] for data in embeddings
|
||||
], usage['total_tokens']
|
||||
|
||||
result = TextEmbeddingResult(
|
||||
model=model,
|
||||
embeddings=[[
|
||||
float(data) for data in x['embedding']
|
||||
] for x in embeddings],
|
||||
usage=usage
|
||||
)
|
||||
|
||||
return result
|
||||
|
||||
def get_num_tokens(self, model: str, credentials: dict, texts: list[str]) -> int:
|
||||
"""
|
||||
|
||||
@@ -13,16 +13,16 @@ help:
|
||||
url:
|
||||
en_US: https://github.com/THUDM/ChatGLM3
|
||||
supported_model_types:
|
||||
- llm
|
||||
- llm
|
||||
configurate_methods:
|
||||
- predefined-model
|
||||
- predefined-model
|
||||
provider_credential_schema:
|
||||
credential_form_schemas:
|
||||
- variable: api_base
|
||||
label:
|
||||
en_US: API URL
|
||||
type: text-input
|
||||
required: true
|
||||
placeholder:
|
||||
zh_Hans: 在此输入您的 API URL
|
||||
en_US: Enter your API URL
|
||||
- variable: api_base
|
||||
label:
|
||||
en_US: API URL
|
||||
type: text-input
|
||||
required: true
|
||||
placeholder:
|
||||
zh_Hans: 在此输入您的 API URL
|
||||
en_US: Enter your API URL
|
||||
|
||||
@@ -3,19 +3,19 @@ label:
|
||||
en_US: ChatGLM2-6B-32K
|
||||
model_type: llm
|
||||
features:
|
||||
- agent-thought
|
||||
- agent-thought
|
||||
model_properties:
|
||||
mode: chat
|
||||
context_size: 32000
|
||||
parameter_rules:
|
||||
- name: temperature
|
||||
use_template: temperature
|
||||
- name: top_p
|
||||
use_template: top_p
|
||||
required: false
|
||||
- name: max_tokens
|
||||
use_template: max_tokens
|
||||
required: true
|
||||
default: 2000
|
||||
min: 1
|
||||
max: 32000
|
||||
- name: temperature
|
||||
use_template: temperature
|
||||
- name: top_p
|
||||
use_template: top_p
|
||||
required: false
|
||||
- name: max_tokens
|
||||
use_template: max_tokens
|
||||
required: true
|
||||
default: 2000
|
||||
min: 1
|
||||
max: 32000
|
||||
|
||||
@@ -3,19 +3,19 @@ label:
|
||||
en_US: ChatGLM2-6B
|
||||
model_type: llm
|
||||
features:
|
||||
- agent-thought
|
||||
- agent-thought
|
||||
model_properties:
|
||||
mode: chat
|
||||
context_size: 2000
|
||||
parameter_rules:
|
||||
- name: temperature
|
||||
use_template: temperature
|
||||
- name: top_p
|
||||
use_template: top_p
|
||||
required: false
|
||||
- name: max_tokens
|
||||
use_template: max_tokens
|
||||
required: true
|
||||
default: 256
|
||||
min: 1
|
||||
max: 2000
|
||||
- name: temperature
|
||||
use_template: temperature
|
||||
- name: top_p
|
||||
use_template: top_p
|
||||
required: false
|
||||
- name: max_tokens
|
||||
use_template: max_tokens
|
||||
required: true
|
||||
default: 256
|
||||
min: 1
|
||||
max: 2000
|
||||
|
||||
@@ -3,20 +3,20 @@ label:
|
||||
en_US: ChatGLM3-6B-32K
|
||||
model_type: llm
|
||||
features:
|
||||
- tool-call
|
||||
- agent-thought
|
||||
- tool-call
|
||||
- agent-thought
|
||||
model_properties:
|
||||
mode: chat
|
||||
context_size: 32000
|
||||
parameter_rules:
|
||||
- name: temperature
|
||||
use_template: temperature
|
||||
- name: top_p
|
||||
use_template: top_p
|
||||
required: false
|
||||
- name: max_tokens
|
||||
use_template: max_tokens
|
||||
required: true
|
||||
default: 8000
|
||||
min: 1
|
||||
max: 32000
|
||||
- name: temperature
|
||||
use_template: temperature
|
||||
- name: top_p
|
||||
use_template: top_p
|
||||
required: false
|
||||
- name: max_tokens
|
||||
use_template: max_tokens
|
||||
required: true
|
||||
default: 8000
|
||||
min: 1
|
||||
max: 32000
|
||||
|
||||
@@ -3,20 +3,20 @@ label:
|
||||
en_US: ChatGLM3-6B
|
||||
model_type: llm
|
||||
features:
|
||||
- tool-call
|
||||
- agent-thought
|
||||
- tool-call
|
||||
- agent-thought
|
||||
model_properties:
|
||||
mode: chat
|
||||
context_size: 8000
|
||||
parameter_rules:
|
||||
- name: temperature
|
||||
use_template: temperature
|
||||
- name: top_p
|
||||
use_template: top_p
|
||||
required: false
|
||||
- name: max_tokens
|
||||
use_template: max_tokens
|
||||
required: true
|
||||
default: 256
|
||||
min: 1
|
||||
max: 8000
|
||||
- name: temperature
|
||||
use_template: temperature
|
||||
- name: top_p
|
||||
use_template: top_p
|
||||
required: false
|
||||
- name: max_tokens
|
||||
use_template: max_tokens
|
||||
required: true
|
||||
default: 256
|
||||
min: 1
|
||||
max: 8000
|
||||
|
||||
@@ -14,18 +14,18 @@ help:
|
||||
url:
|
||||
en_US: https://dashboard.cohere.com/api-keys
|
||||
supported_model_types:
|
||||
- rerank
|
||||
- rerank
|
||||
configurate_methods:
|
||||
- predefined-model
|
||||
- predefined-model
|
||||
provider_credential_schema:
|
||||
credential_form_schemas:
|
||||
- variable: api_key
|
||||
label:
|
||||
zh_Hans: API Key
|
||||
en_US: API Key
|
||||
type: secret-input
|
||||
required: true
|
||||
placeholder:
|
||||
zh_Hans: 请填写 API Key
|
||||
en_US: Please fill in API Key
|
||||
show_on: []
|
||||
- variable: api_key
|
||||
label:
|
||||
zh_Hans: API Key
|
||||
en_US: API Key
|
||||
type: secret-input
|
||||
required: true
|
||||
placeholder:
|
||||
zh_Hans: 请填写 API Key
|
||||
en_US: Please fill in API Key
|
||||
show_on: [ ]
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
model: rerank-multilingual-v2.0
|
||||
model_type: rerank
|
||||
model_properties:
|
||||
context_size: 5120
|
||||
context_size: 5120
|
||||
|
||||
@@ -16,17 +16,16 @@ help:
|
||||
url:
|
||||
en_US: https://ai.google.dev/
|
||||
supported_model_types:
|
||||
- llm
|
||||
- llm
|
||||
configurate_methods:
|
||||
- predefined-model
|
||||
- predefined-model
|
||||
provider_credential_schema:
|
||||
credential_form_schemas:
|
||||
- variable: google_api_key
|
||||
label:
|
||||
en_US: API Key
|
||||
type: secret-input
|
||||
required: true
|
||||
placeholder:
|
||||
zh_Hans: 在此输入您的 API Key
|
||||
en_US: Enter your API Key
|
||||
|
||||
- variable: google_api_key
|
||||
label:
|
||||
en_US: API Key
|
||||
type: secret-input
|
||||
required: true
|
||||
placeholder:
|
||||
zh_Hans: 在此输入您的 API Key
|
||||
en_US: Enter your API Key
|
||||
|
||||
@@ -3,32 +3,32 @@ label:
|
||||
en_US: Gemini Pro Vision
|
||||
model_type: llm
|
||||
features:
|
||||
- vision
|
||||
- vision
|
||||
model_properties:
|
||||
mode: chat
|
||||
context_size: 12288
|
||||
parameter_rules:
|
||||
- name: temperature
|
||||
use_template: temperature
|
||||
- name: top_p
|
||||
use_template: top_p
|
||||
- name: top_k
|
||||
label:
|
||||
zh_Hans: 取样数量
|
||||
en_US: Top k
|
||||
type: int
|
||||
help:
|
||||
zh_Hans: 仅从每个后续标记的前 K 个选项中采样。
|
||||
en_US: Only sample from the top K options for each subsequent token.
|
||||
required: false
|
||||
- name: max_tokens_to_sample
|
||||
use_template: max_tokens
|
||||
required: true
|
||||
default: 4096
|
||||
min: 1
|
||||
max: 4096
|
||||
- name: temperature
|
||||
use_template: temperature
|
||||
- name: top_p
|
||||
use_template: top_p
|
||||
- name: top_k
|
||||
label:
|
||||
zh_Hans: 取样数量
|
||||
en_US: Top k
|
||||
type: int
|
||||
help:
|
||||
zh_Hans: 仅从每个后续标记的前 K 个选项中采样。
|
||||
en_US: Only sample from the top K options for each subsequent token.
|
||||
required: false
|
||||
- name: max_tokens_to_sample
|
||||
use_template: max_tokens
|
||||
required: true
|
||||
default: 4096
|
||||
min: 1
|
||||
max: 4096
|
||||
pricing:
|
||||
input: '0.00'
|
||||
output: '0.00'
|
||||
unit: '0.000001'
|
||||
currency: USD
|
||||
currency: USD
|
||||
|
||||
@@ -3,32 +3,32 @@ label:
|
||||
en_US: Gemini Pro
|
||||
model_type: llm
|
||||
features:
|
||||
- agent-thought
|
||||
- agent-thought
|
||||
model_properties:
|
||||
mode: chat
|
||||
context_size: 30720
|
||||
parameter_rules:
|
||||
- name: temperature
|
||||
use_template: temperature
|
||||
- name: top_p
|
||||
use_template: top_p
|
||||
- name: top_k
|
||||
label:
|
||||
zh_Hans: 取样数量
|
||||
en_US: Top k
|
||||
type: int
|
||||
help:
|
||||
zh_Hans: 仅从每个后续标记的前 K 个选项中采样。
|
||||
en_US: Only sample from the top K options for each subsequent token.
|
||||
required: false
|
||||
- name: max_tokens_to_sample
|
||||
use_template: max_tokens
|
||||
required: true
|
||||
default: 2048
|
||||
min: 1
|
||||
max: 2048
|
||||
- name: temperature
|
||||
use_template: temperature
|
||||
- name: top_p
|
||||
use_template: top_p
|
||||
- name: top_k
|
||||
label:
|
||||
zh_Hans: 取样数量
|
||||
en_US: Top k
|
||||
type: int
|
||||
help:
|
||||
zh_Hans: 仅从每个后续标记的前 K 个选项中采样。
|
||||
en_US: Only sample from the top K options for each subsequent token.
|
||||
required: false
|
||||
- name: max_tokens_to_sample
|
||||
use_template: max_tokens
|
||||
required: true
|
||||
default: 2048
|
||||
min: 1
|
||||
max: 2048
|
||||
pricing:
|
||||
input: '0.00'
|
||||
output: '0.00'
|
||||
unit: '0.000001'
|
||||
currency: USD
|
||||
currency: USD
|
||||
|
||||
@@ -3,6 +3,7 @@ from typing import Optional, Generator, Union, List
|
||||
import google.generativeai as genai
|
||||
import google.api_core.exceptions as exceptions
|
||||
import google.generativeai.client as client
|
||||
from google.generativeai.types import HarmCategory, HarmBlockThreshold
|
||||
|
||||
from google.generativeai.types import GenerateContentResponse, ContentType
|
||||
from google.generativeai.types.content_types import to_part
|
||||
@@ -124,7 +125,7 @@ class GoogleLargeLanguageModel(LargeLanguageModel):
|
||||
last_msg = prompt_messages[-1]
|
||||
content = self._format_message_to_glm_content(last_msg)
|
||||
history.append(content)
|
||||
else:
|
||||
else:
|
||||
for msg in prompt_messages: # makes message roles strictly alternating
|
||||
content = self._format_message_to_glm_content(msg)
|
||||
if history and history[-1]["role"] == content["role"]:
|
||||
@@ -139,13 +140,21 @@ class GoogleLargeLanguageModel(LargeLanguageModel):
|
||||
new_custom_client = new_client_manager.make_client("generative")
|
||||
|
||||
google_model._client = new_custom_client
|
||||
|
||||
safety_settings={
|
||||
HarmCategory.HARM_CATEGORY_HARASSMENT: HarmBlockThreshold.BLOCK_NONE,
|
||||
HarmCategory.HARM_CATEGORY_HATE_SPEECH: HarmBlockThreshold.BLOCK_NONE,
|
||||
HarmCategory.HARM_CATEGORY_SEXUALLY_EXPLICIT: HarmBlockThreshold.BLOCK_NONE,
|
||||
HarmCategory.HARM_CATEGORY_DANGEROUS_CONTENT: HarmBlockThreshold.BLOCK_NONE,
|
||||
}
|
||||
|
||||
response = google_model.generate_content(
|
||||
contents=history,
|
||||
generation_config=genai.types.GenerationConfig(
|
||||
**config_kwargs
|
||||
),
|
||||
stream=stream
|
||||
stream=stream,
|
||||
safety_settings=safety_settings
|
||||
)
|
||||
|
||||
if stream:
|
||||
@@ -169,7 +178,6 @@ class GoogleLargeLanguageModel(LargeLanguageModel):
|
||||
content=response.text
|
||||
)
|
||||
|
||||
|
||||
# calculate num tokens
|
||||
prompt_tokens = self.get_num_tokens(model, credentials, prompt_messages)
|
||||
completion_tokens = self.get_num_tokens(model, credentials, [assistant_prompt_message])
|
||||
@@ -202,11 +210,11 @@ class GoogleLargeLanguageModel(LargeLanguageModel):
|
||||
for chunk in response:
|
||||
content = chunk.text
|
||||
index += 1
|
||||
|
||||
|
||||
assistant_prompt_message = AssistantPromptMessage(
|
||||
content=content if content else '',
|
||||
)
|
||||
|
||||
|
||||
if not response._done:
|
||||
|
||||
# transform assistant message to prompt message
|
||||
|
||||
@@ -2,9 +2,9 @@ provider: huggingface_hub
|
||||
label:
|
||||
en_US: Hugging Face Model
|
||||
icon_small:
|
||||
en_US: icon_s_en.svg
|
||||
en_US: icon_s_en.svg
|
||||
icon_large:
|
||||
en_US: icon_l_en.svg
|
||||
en_US: icon_l_en.svg
|
||||
background: "#FFF8DC"
|
||||
help:
|
||||
title:
|
||||
@@ -13,90 +13,90 @@ help:
|
||||
url:
|
||||
en_US: https://huggingface.co/settings/tokens
|
||||
supported_model_types:
|
||||
- llm
|
||||
- text-embedding
|
||||
- llm
|
||||
- text-embedding
|
||||
configurate_methods:
|
||||
- customizable-model
|
||||
- customizable-model
|
||||
model_credential_schema:
|
||||
model:
|
||||
label:
|
||||
en_US: Model Name
|
||||
zh_Hans: 模型名称
|
||||
credential_form_schemas:
|
||||
- variable: huggingfacehub_api_type
|
||||
label:
|
||||
en_US: Endpoint Type
|
||||
zh_Hans: 端点类型
|
||||
type: radio
|
||||
required: true
|
||||
default: hosted_inference_api
|
||||
options:
|
||||
- value: hosted_inference_api
|
||||
label:
|
||||
en_US: Hosted Inference API
|
||||
- value: inference_endpoints
|
||||
label:
|
||||
en_US: Inference Endpoints
|
||||
- variable: huggingfacehub_api_token
|
||||
label:
|
||||
en_US: API Token
|
||||
zh_Hans: API Token
|
||||
type: secret-input
|
||||
required: true
|
||||
placeholder:
|
||||
en_US: Enter your Hugging Face Hub API Token here
|
||||
zh_Hans: 在此输入您的 Hugging Face Hub API Token
|
||||
- variable: huggingface_namespace
|
||||
label:
|
||||
en_US: 'User Name / Organization Name'
|
||||
zh_Hans: '用户名 / 组织名称'
|
||||
type: text-input
|
||||
required: true
|
||||
placeholder:
|
||||
en_US: 'Enter your User Name / Organization Name here'
|
||||
zh_Hans: '在此输入您的用户名 / 组织名称'
|
||||
show_on:
|
||||
- variable: __model_type
|
||||
value: text-embedding
|
||||
- variable: huggingfacehub_api_type
|
||||
value: inference_endpoints
|
||||
- variable: huggingfacehub_endpoint_url
|
||||
label:
|
||||
en_US: Endpoint URL
|
||||
zh_Hans: 端点 URL
|
||||
type: text-input
|
||||
required: true
|
||||
placeholder:
|
||||
en_US: Enter your Endpoint URL here
|
||||
zh_Hans: 在此输入您的端点 URL
|
||||
show_on:
|
||||
- variable: huggingfacehub_api_type
|
||||
value: inference_endpoints
|
||||
- variable: task_type
|
||||
label:
|
||||
en_US: Task
|
||||
zh_Hans: Task
|
||||
type: select
|
||||
options:
|
||||
- value: text2text-generation
|
||||
label:
|
||||
en_US: Text-to-Text Generation
|
||||
show_on:
|
||||
- variable: __model_type
|
||||
value: llm
|
||||
- value: text-generation
|
||||
en_US: Endpoint Type
|
||||
zh_Hans: 端点类型
|
||||
type: radio
|
||||
required: true
|
||||
default: hosted_inference_api
|
||||
options:
|
||||
- value: hosted_inference_api
|
||||
label:
|
||||
en_US: Hosted Inference API
|
||||
- value: inference_endpoints
|
||||
label:
|
||||
en_US: Inference Endpoints
|
||||
- variable: huggingfacehub_api_token
|
||||
label:
|
||||
en_US: Text Generation
|
||||
zh_Hans: 文本生成
|
||||
show_on:
|
||||
- variable: __model_type
|
||||
value: llm
|
||||
- value: feature-extraction
|
||||
en_US: API Token
|
||||
zh_Hans: API Token
|
||||
type: secret-input
|
||||
required: true
|
||||
placeholder:
|
||||
en_US: Enter your Hugging Face Hub API Token here
|
||||
zh_Hans: 在此输入您的 Hugging Face Hub API Token
|
||||
- variable: huggingface_namespace
|
||||
label:
|
||||
en_US: Feature Extraction
|
||||
en_US: 'User Name / Organization Name'
|
||||
zh_Hans: '用户名 / 组织名称'
|
||||
type: text-input
|
||||
required: true
|
||||
placeholder:
|
||||
en_US: 'Enter your User Name / Organization Name here'
|
||||
zh_Hans: '在此输入您的用户名 / 组织名称'
|
||||
show_on:
|
||||
- variable: __model_type
|
||||
value: text-embedding
|
||||
show_on:
|
||||
- variable: huggingfacehub_api_type
|
||||
value: inference_endpoints
|
||||
- variable: __model_type
|
||||
value: text-embedding
|
||||
- variable: huggingfacehub_api_type
|
||||
value: inference_endpoints
|
||||
- variable: huggingfacehub_endpoint_url
|
||||
label:
|
||||
en_US: Endpoint URL
|
||||
zh_Hans: 端点 URL
|
||||
type: text-input
|
||||
required: true
|
||||
placeholder:
|
||||
en_US: Enter your Endpoint URL here
|
||||
zh_Hans: 在此输入您的端点 URL
|
||||
show_on:
|
||||
- variable: huggingfacehub_api_type
|
||||
value: inference_endpoints
|
||||
- variable: task_type
|
||||
label:
|
||||
en_US: Task
|
||||
zh_Hans: Task
|
||||
type: select
|
||||
options:
|
||||
- value: text2text-generation
|
||||
label:
|
||||
en_US: Text-to-Text Generation
|
||||
show_on:
|
||||
- variable: __model_type
|
||||
value: llm
|
||||
- value: text-generation
|
||||
label:
|
||||
en_US: Text Generation
|
||||
zh_Hans: 文本生成
|
||||
show_on:
|
||||
- variable: __model_type
|
||||
value: llm
|
||||
- value: feature-extraction
|
||||
label:
|
||||
en_US: Feature Extraction
|
||||
show_on:
|
||||
- variable: __model_type
|
||||
value: text-embedding
|
||||
show_on:
|
||||
- variable: huggingfacehub_api_type
|
||||
value: inference_endpoints
|
||||
|
||||
@@ -10,7 +10,7 @@ from core.model_runtime.entities.llm_entities import LLMResult, LLMResultChunk,
|
||||
from core.model_runtime.entities.message_entities import PromptMessage, PromptMessageTool, AssistantPromptMessage, \
|
||||
UserPromptMessage, SystemPromptMessage
|
||||
from core.model_runtime.entities.model_entities import ParameterRule, DefaultParameterName, AIModelEntity, ModelType, \
|
||||
FetchFrom
|
||||
FetchFrom, ModelPropertyKey
|
||||
from core.model_runtime.errors.validate import CredentialsValidateFailedError
|
||||
from core.model_runtime.model_providers.__base.large_language_model import LargeLanguageModel
|
||||
from core.model_runtime.model_providers.huggingface_hub._common import _CommonHuggingfaceHub
|
||||
@@ -97,7 +97,7 @@ class HuggingfaceHubLargeLanguageModel(_CommonHuggingfaceHub, LargeLanguageModel
|
||||
fetch_from=FetchFrom.CUSTOMIZABLE_MODEL,
|
||||
model_type=ModelType.LLM,
|
||||
model_properties={
|
||||
'mode': LLMMode.COMPLETION.value
|
||||
ModelPropertyKey.MODE: LLMMode.COMPLETION.value
|
||||
},
|
||||
parameter_rules=self._get_customizable_model_parameter_rules()
|
||||
)
|
||||
@@ -154,20 +154,31 @@ class HuggingfaceHubLargeLanguageModel(_CommonHuggingfaceHub, LargeLanguageModel
|
||||
content=chunk.token.text
|
||||
)
|
||||
|
||||
prompt_tokens = self.get_num_tokens(model, credentials, prompt_messages)
|
||||
completion_tokens = self.get_num_tokens(model, credentials, [assistant_prompt_message])
|
||||
if chunk.details:
|
||||
prompt_tokens = self.get_num_tokens(model, credentials, prompt_messages)
|
||||
completion_tokens = self.get_num_tokens(model, credentials, [assistant_prompt_message])
|
||||
|
||||
usage = self._calc_response_usage(model, credentials, prompt_tokens, completion_tokens)
|
||||
usage = self._calc_response_usage(model, credentials, prompt_tokens, completion_tokens)
|
||||
|
||||
yield LLMResultChunk(
|
||||
model=model,
|
||||
prompt_messages=prompt_messages,
|
||||
delta=LLMResultChunkDelta(
|
||||
index=index,
|
||||
message=assistant_prompt_message,
|
||||
usage=usage,
|
||||
),
|
||||
)
|
||||
yield LLMResultChunk(
|
||||
model=model,
|
||||
prompt_messages=prompt_messages,
|
||||
delta=LLMResultChunkDelta(
|
||||
index=index,
|
||||
message=assistant_prompt_message,
|
||||
usage=usage,
|
||||
finish_reason=chunk.details.finish_reason,
|
||||
),
|
||||
)
|
||||
else:
|
||||
yield LLMResultChunk(
|
||||
model=model,
|
||||
prompt_messages=prompt_messages,
|
||||
delta=LLMResultChunkDelta(
|
||||
index=index,
|
||||
message=assistant_prompt_message,
|
||||
),
|
||||
)
|
||||
|
||||
def _handle_generate_response(self, model: str, credentials: dict, prompt_messages: list[PromptMessage], response: any) -> LLMResult:
|
||||
if isinstance(response, str):
|
||||
|
||||
@@ -2,7 +2,7 @@ provider: jina
|
||||
label:
|
||||
en_US: Jina
|
||||
description:
|
||||
en_US: Embedding Model Supported
|
||||
en_US: Embedding Model Supported
|
||||
icon_small:
|
||||
en_US: icon_s_en.svg
|
||||
icon_large:
|
||||
@@ -15,16 +15,16 @@ help:
|
||||
url:
|
||||
en_US: https://jina.ai/embeddings/
|
||||
supported_model_types:
|
||||
- text-embedding
|
||||
- text-embedding
|
||||
configurate_methods:
|
||||
- predefined-model
|
||||
- predefined-model
|
||||
provider_credential_schema:
|
||||
credential_form_schemas:
|
||||
- variable: api_key
|
||||
label:
|
||||
en_US: API Key
|
||||
type: secret-input
|
||||
required: true
|
||||
placeholder:
|
||||
zh_Hans: 在此输入您的 API Key
|
||||
en_US: Enter your API Key
|
||||
- variable: api_key
|
||||
label:
|
||||
en_US: API Key
|
||||
type: secret-input
|
||||
required: true
|
||||
placeholder:
|
||||
zh_Hans: 在此输入您的 API Key
|
||||
en_US: Enter your API Key
|
||||
|
||||
@@ -6,4 +6,4 @@ model_properties:
|
||||
pricing:
|
||||
input: '0.001'
|
||||
unit: '0.001'
|
||||
currency: USD
|
||||
currency: USD
|
||||
|
||||
@@ -6,4 +6,4 @@ model_properties:
|
||||
pricing:
|
||||
input: '0.001'
|
||||
unit: '0.001'
|
||||
currency: USD
|
||||
currency: USD
|
||||
|
||||
@@ -1,7 +1,7 @@
|
||||
from typing import Generator, List, Optional, Union, cast
|
||||
from core.model_runtime.entities.llm_entities import LLMResult, LLMUsage, LLMResultChunk, LLMResultChunkDelta, LLMMode
|
||||
from core.model_runtime.entities.message_entities import PromptMessage, PromptMessageTool, AssistantPromptMessage, UserPromptMessage, SystemPromptMessage
|
||||
from core.model_runtime.entities.model_entities import AIModelEntity, ParameterRule, ParameterType, FetchFrom, ModelType
|
||||
from core.model_runtime.entities.model_entities import AIModelEntity, ParameterRule, ParameterType, FetchFrom, ModelType, ModelPropertyKey
|
||||
from core.model_runtime.entities.common_entities import I18nObject
|
||||
from core.model_runtime.model_providers.__base.large_language_model import LargeLanguageModel
|
||||
from core.model_runtime.errors.invoke import InvokeConnectionError, InvokeServerUnavailableError, InvokeRateLimitError, \
|
||||
@@ -156,9 +156,9 @@ class LocalAILarguageModel(LargeLanguageModel):
|
||||
def get_customizable_model_schema(self, model: str, credentials: dict) -> AIModelEntity | None:
|
||||
completion_model = None
|
||||
if credentials['completion_type'] == 'chat_completion':
|
||||
completion_model = LLMMode.CHAT
|
||||
completion_model = LLMMode.CHAT.value
|
||||
elif credentials['completion_type'] == 'completion':
|
||||
completion_model = LLMMode.COMPLETION
|
||||
completion_model = LLMMode.COMPLETION.value
|
||||
else:
|
||||
raise ValueError(f"Unknown completion type {credentials['completion_type']}")
|
||||
|
||||
@@ -202,7 +202,7 @@ class LocalAILarguageModel(LargeLanguageModel):
|
||||
),
|
||||
fetch_from=FetchFrom.CUSTOMIZABLE_MODEL,
|
||||
model_type=ModelType.LLM,
|
||||
model_properties={ 'mode': completion_model } if completion_model else {},
|
||||
model_properties={ ModelPropertyKey.MODE: completion_model } if completion_model else {},
|
||||
parameter_rules=rules
|
||||
)
|
||||
|
||||
|
||||
@@ -13,10 +13,10 @@ help:
|
||||
url:
|
||||
en_US: https://github.com/go-skynet/LocalAI
|
||||
supported_model_types:
|
||||
- llm
|
||||
- text-embedding
|
||||
- llm
|
||||
- text-embedding
|
||||
configurate_methods:
|
||||
- customizable-model
|
||||
- customizable-model
|
||||
model_credential_schema:
|
||||
model:
|
||||
label:
|
||||
@@ -26,33 +26,33 @@ model_credential_schema:
|
||||
en_US: Enter your model name
|
||||
zh_Hans: 输入模型名称
|
||||
credential_form_schemas:
|
||||
- variable: completion_type
|
||||
show_on:
|
||||
- variable: __model_type
|
||||
value: llm
|
||||
label:
|
||||
en_US: Completion type
|
||||
type: select
|
||||
required: false
|
||||
default: chat_completion
|
||||
placeholder:
|
||||
zh_Hans: 选择对话类型
|
||||
en_US: Select completion type
|
||||
options:
|
||||
- value: completion
|
||||
label:
|
||||
en_US: Completion
|
||||
zh_Hans: 补全
|
||||
- value: chat_completion
|
||||
label:
|
||||
en_US: ChatCompletion
|
||||
zh_Hans: 对话
|
||||
- variable: server_url
|
||||
label:
|
||||
zh_Hans: 服务器URL
|
||||
en_US: Server url
|
||||
type: text-input
|
||||
required: true
|
||||
placeholder:
|
||||
zh_Hans: 在此输入LocalAI的服务器地址,如 https://example.com/xxx
|
||||
en_US: Enter the url of your LocalAI, for example https://example.com/xxx
|
||||
- variable: completion_type
|
||||
show_on:
|
||||
- variable: __model_type
|
||||
value: llm
|
||||
label:
|
||||
en_US: Completion type
|
||||
type: select
|
||||
required: false
|
||||
default: chat_completion
|
||||
placeholder:
|
||||
zh_Hans: 选择对话类型
|
||||
en_US: Select completion type
|
||||
options:
|
||||
- value: completion
|
||||
label:
|
||||
en_US: Completion
|
||||
zh_Hans: 补全
|
||||
- value: chat_completion
|
||||
label:
|
||||
en_US: ChatCompletion
|
||||
zh_Hans: 对话
|
||||
- variable: server_url
|
||||
label:
|
||||
zh_Hans: 服务器URL
|
||||
en_US: Server url
|
||||
type: text-input
|
||||
required: true
|
||||
placeholder:
|
||||
zh_Hans: 在此输入LocalAI的服务器地址,如 https://example.com/xxx
|
||||
en_US: Enter the url of your LocalAI, for example https://example.com/xxx
|
||||
|
||||
@@ -3,27 +3,27 @@ label:
|
||||
en_US: Abab5-Chat
|
||||
model_type: llm
|
||||
features:
|
||||
- agent-thought
|
||||
- agent-thought
|
||||
model_properties:
|
||||
mode: chat
|
||||
context_size: 6144
|
||||
parameter_rules:
|
||||
- name: temperature
|
||||
use_template: temperature
|
||||
- name: top_p
|
||||
use_template: top_p
|
||||
- name: max_tokens
|
||||
use_template: max_tokens
|
||||
required: true
|
||||
default: 6144
|
||||
min: 1
|
||||
max: 6144
|
||||
- name: presence_penalty
|
||||
use_template: presence_penalty
|
||||
- name: frequency_penalty
|
||||
use_template: frequency_penalty
|
||||
- name: temperature
|
||||
use_template: temperature
|
||||
- name: top_p
|
||||
use_template: top_p
|
||||
- name: max_tokens
|
||||
use_template: max_tokens
|
||||
required: true
|
||||
default: 6144
|
||||
min: 1
|
||||
max: 6144
|
||||
- name: presence_penalty
|
||||
use_template: presence_penalty
|
||||
- name: frequency_penalty
|
||||
use_template: frequency_penalty
|
||||
pricing:
|
||||
input: '0.00'
|
||||
output: '0.015'
|
||||
unit: '0.001'
|
||||
currency: RMB
|
||||
currency: RMB
|
||||
|
||||
@@ -3,34 +3,34 @@ label:
|
||||
en_US: Abab5.5-Chat
|
||||
model_type: llm
|
||||
features:
|
||||
- agent-thought
|
||||
- agent-thought
|
||||
model_properties:
|
||||
mode: chat
|
||||
context_size: 16384
|
||||
parameter_rules:
|
||||
- name: temperature
|
||||
use_template: temperature
|
||||
- name: top_p
|
||||
use_template: top_p
|
||||
- name: max_tokens
|
||||
use_template: max_tokens
|
||||
required: true
|
||||
default: 6144
|
||||
min: 1
|
||||
max: 16384
|
||||
- name: presence_penalty
|
||||
use_template: presence_penalty
|
||||
- name: frequency_penalty
|
||||
use_template: frequency_penalty
|
||||
- name: plugin_web_search
|
||||
required: false
|
||||
default: false
|
||||
type: boolean
|
||||
label:
|
||||
en_US: Enable Web Search
|
||||
zh_Hans: 开启网页搜索
|
||||
- name: temperature
|
||||
use_template: temperature
|
||||
- name: top_p
|
||||
use_template: top_p
|
||||
- name: max_tokens
|
||||
use_template: max_tokens
|
||||
required: true
|
||||
default: 6144
|
||||
min: 1
|
||||
max: 16384
|
||||
- name: presence_penalty
|
||||
use_template: presence_penalty
|
||||
- name: frequency_penalty
|
||||
use_template: frequency_penalty
|
||||
- name: plugin_web_search
|
||||
required: false
|
||||
default: false
|
||||
type: boolean
|
||||
label:
|
||||
en_US: Enable Web Search
|
||||
zh_Hans: 开启网页搜索
|
||||
pricing:
|
||||
input: '0.00'
|
||||
output: '0.015'
|
||||
unit: '0.001'
|
||||
currency: RMB
|
||||
currency: RMB
|
||||
|
||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user