Update CodeGen for GenAIComps Refactor (#1308)
Signed-off-by: lvliang-intel <liang1.lv@intel.com> Co-authored-by: WenjiaoYue <wenjiao.yue@intel.com>
This commit is contained in:
@@ -15,6 +15,12 @@ services:
|
||||
https_proxy: ${https_proxy}
|
||||
HUGGING_FACE_HUB_TOKEN: ${CODEGEN_HUGGINGFACEHUB_API_TOKEN}
|
||||
HUGGINGFACEHUB_API_TOKEN: ${CODEGEN_HUGGINGFACEHUB_API_TOKEN}
|
||||
host_ip: ${host_ip}
|
||||
healthcheck:
|
||||
test: ["CMD-SHELL", "curl -f http://$host_ip:${CODEGEN_TGI_SERVICE_PORT:-8028}/health || exit 1"]
|
||||
interval: 10s
|
||||
timeout: 10s
|
||||
retries: 100
|
||||
shm_size: 1g
|
||||
devices:
|
||||
- /dev/kfd:/dev/kfd
|
||||
@@ -31,7 +37,8 @@ services:
|
||||
image: ${REGISTRY:-opea}/llm-tgi:${TAG:-latest}
|
||||
container_name: codegen-llm-server
|
||||
depends_on:
|
||||
- codegen-tgi-service
|
||||
codegen-tgi-service:
|
||||
condition: service_healthy
|
||||
ports:
|
||||
- "${CODEGEN_LLM_SERVICE_PORT:-9000}:9000"
|
||||
ipc: host
|
||||
@@ -39,7 +46,8 @@ services:
|
||||
no_proxy: ${no_proxy}
|
||||
http_proxy: ${http_proxy}
|
||||
https_proxy: ${https_proxy}
|
||||
TGI_LLM_ENDPOINT: "http://codegen-tgi-service"
|
||||
LLM_ENDPOINT: "http://codegen-tgi-service"
|
||||
LLM_MODEL_ID: ${CODEGEN_LLM_MODEL_ID}
|
||||
HUGGINGFACEHUB_API_TOKEN: ${CODEGEN_HUGGINGFACEHUB_API_TOKEN}
|
||||
restart: unless-stopped
|
||||
codegen-backend-server:
|
||||
|
||||
@@ -15,12 +15,19 @@ services:
|
||||
http_proxy: ${http_proxy}
|
||||
https_proxy: ${https_proxy}
|
||||
HF_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
|
||||
host_ip: ${host_ip}
|
||||
healthcheck:
|
||||
test: ["CMD-SHELL", "curl -f http://$host_ip:8028/health || exit 1"]
|
||||
interval: 10s
|
||||
timeout: 10s
|
||||
retries: 100
|
||||
command: --model-id ${LLM_MODEL_ID} --cuda-graphs 0
|
||||
llm:
|
||||
image: ${REGISTRY:-opea}/llm-tgi:${TAG:-latest}
|
||||
container_name: llm-tgi-server
|
||||
depends_on:
|
||||
- tgi-service
|
||||
tgi-service:
|
||||
condition: service_healthy
|
||||
ports:
|
||||
- "9000:9000"
|
||||
ipc: host
|
||||
@@ -28,7 +35,8 @@ services:
|
||||
no_proxy: ${no_proxy}
|
||||
http_proxy: ${http_proxy}
|
||||
https_proxy: ${https_proxy}
|
||||
TGI_LLM_ENDPOINT: ${TGI_LLM_ENDPOINT}
|
||||
LLM_ENDPOINT: ${TGI_LLM_ENDPOINT}
|
||||
LLM_MODEL_ID: ${LLM_MODEL_ID}
|
||||
HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
|
||||
restart: unless-stopped
|
||||
codegen-xeon-backend-server:
|
||||
|
||||
@@ -20,6 +20,11 @@ services:
|
||||
LIMIT_HPU_GRAPH: true
|
||||
USE_FLASH_ATTENTION: true
|
||||
FLASH_ATTENTION_RECOMPUTE: true
|
||||
healthcheck:
|
||||
test: ["CMD-SHELL", "sleep 500 && exit 0"]
|
||||
interval: 1s
|
||||
timeout: 505s
|
||||
retries: 1
|
||||
runtime: habana
|
||||
cap_add:
|
||||
- SYS_NICE
|
||||
@@ -29,7 +34,8 @@ services:
|
||||
image: ${REGISTRY:-opea}/llm-tgi:${TAG:-latest}
|
||||
container_name: llm-tgi-gaudi-server
|
||||
depends_on:
|
||||
- tgi-service
|
||||
tgi-service:
|
||||
condition: service_healthy
|
||||
ports:
|
||||
- "9000:9000"
|
||||
ipc: host
|
||||
@@ -37,7 +43,8 @@ services:
|
||||
no_proxy: ${no_proxy}
|
||||
http_proxy: ${http_proxy}
|
||||
https_proxy: ${https_proxy}
|
||||
TGI_LLM_ENDPOINT: ${TGI_LLM_ENDPOINT}
|
||||
LLM_ENDPOINT: ${TGI_LLM_ENDPOINT}
|
||||
LLM_MODEL_ID: ${LLM_MODEL_ID}
|
||||
HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
|
||||
restart: unless-stopped
|
||||
codegen-gaudi-backend-server:
|
||||
|
||||
@@ -34,6 +34,7 @@ function start_services() {
|
||||
export MEGA_SERVICE_HOST_IP=${ip_address}
|
||||
export LLM_SERVICE_HOST_IP=${ip_address}
|
||||
export BACKEND_SERVICE_ENDPOINT="http://${ip_address}:7778/v1/codegen"
|
||||
export host_ip=${ip_address}
|
||||
|
||||
sed -i "s/backend_address/$ip_address/g" $WORKPATH/ui/svelte/.env
|
||||
|
||||
|
||||
@@ -39,6 +39,7 @@ function start_services() {
|
||||
export CODEGEN_BACKEND_SERVICE_PORT=7778
|
||||
export CODEGEN_BACKEND_SERVICE_URL="http://${ip_address}:${CODEGEN_BACKEND_SERVICE_PORT}/v1/codegen"
|
||||
export CODEGEN_UI_SERVICE_PORT=5173
|
||||
export host_ip=${ip_address}
|
||||
|
||||
sed -i "s/backend_address/$ip_address/g" $WORKPATH/ui/svelte/.env
|
||||
|
||||
|
||||
@@ -35,6 +35,7 @@ function start_services() {
|
||||
export MEGA_SERVICE_HOST_IP=${ip_address}
|
||||
export LLM_SERVICE_HOST_IP=${ip_address}
|
||||
export BACKEND_SERVICE_ENDPOINT="http://${ip_address}:7778/v1/codegen"
|
||||
export host_ip=${ip_address}
|
||||
|
||||
sed -i "s/backend_address/$ip_address/g" $WORKPATH/ui/svelte/.env
|
||||
|
||||
|
||||
@@ -34,22 +34,21 @@
|
||||
const eventSource = await fetchTextStream(query);
|
||||
|
||||
eventSource.addEventListener("message", (e: any) => {
|
||||
let Msg = e.data;
|
||||
console.log("Msg", Msg);
|
||||
let res = e.data;
|
||||
|
||||
if (Msg.startsWith("b")) {
|
||||
const trimmedData = Msg.slice(2, -1);
|
||||
if (trimmedData.includes("'''")) {
|
||||
deleteFlag = true;
|
||||
} else if (deleteFlag && trimmedData.includes("\\n")) {
|
||||
deleteFlag = false;
|
||||
} else if (trimmedData !== "</s>" && !deleteFlag) {
|
||||
code_output += trimmedData.replace(/\\n/g, "\n");
|
||||
}
|
||||
} else if (Msg === "[DONE]") {
|
||||
if (res === "[DONE]") {
|
||||
deleteFlag = false;
|
||||
loading = false;
|
||||
query = '';
|
||||
} else {
|
||||
let Msg = JSON.parse(res).choices[0].text;
|
||||
if (Msg.includes("'''")) {
|
||||
deleteFlag = true;
|
||||
} else if (deleteFlag && Msg.includes("\\n")) {
|
||||
deleteFlag = false;
|
||||
} else if (Msg !== "</s>" && !deleteFlag) {
|
||||
code_output += Msg.replace(/\\n/g, "\n");
|
||||
}
|
||||
}
|
||||
});
|
||||
eventSource.stream();
|
||||
|
||||
Reference in New Issue
Block a user