diff --git a/CodeGen/README.md b/CodeGen/README.md index 03288fb2d..013c31d37 100644 --- a/CodeGen/README.md +++ b/CodeGen/README.md @@ -85,12 +85,12 @@ Currently we support two ways of deploying ChatQnA services with docker compose: By default, the LLM model is set to a default value as listed below: -| Service | Model | -| ------------ | ------------------------------------------------------------------------------- | -| LLM_MODEL_ID | [meta-llama/CodeLlama-7b-hf](https://huggingface.co/meta-llama/CodeLlama-7b-hf) | +| Service | Model | +| ------------ | --------------------------------------------------------------------------------------- | +| LLM_MODEL_ID | [Qwen/Qwen2.5-Coder-7B-Instruct](https://huggingface.co/Qwen/Qwen2.5-Coder-7B-Instruct) | -[meta-llama/CodeLlama-7b-hf](https://huggingface.co/meta-llama/CodeLlama-7b-hf) is a gated model that requires submitting an access request through Hugging Face. You can replace it with another model. -Change the `LLM_MODEL_ID` below for your needs, such as: [Qwen/CodeQwen1.5-7B-Chat](https://huggingface.co/Qwen/CodeQwen1.5-7B-Chat), [deepseek-ai/deepseek-coder-6.7b-instruct](https://huggingface.co/deepseek-ai/deepseek-coder-6.7b-instruct) +[Qwen/Qwen2.5-Coder-7B-Instruct](https://huggingface.co/Qwen/Qwen2.5-Coder-7B-Instruct) may be a gated model that requires submitting an access request through Hugging Face. You can replace it with another model. +Change the `LLM_MODEL_ID` below for your needs, such as: [deepseek-ai/deepseek-coder-6.7b-instruct](https://huggingface.co/deepseek-ai/deepseek-coder-6.7b-instruct) If you choose to use `meta-llama/CodeLlama-7b-hf` as LLM model, you will need to visit [here](https://huggingface.co/meta-llama/CodeLlama-7b-hf), click the `Expand to review and access` button to ask for model access. diff --git a/CodeGen/docker_compose/intel/cpu/xeon/README.md b/CodeGen/docker_compose/intel/cpu/xeon/README.md index 8bdde1f75..5332d719a 100644 --- a/CodeGen/docker_compose/intel/cpu/xeon/README.md +++ b/CodeGen/docker_compose/intel/cpu/xeon/README.md @@ -105,7 +105,7 @@ export your_no_proxy=${your_no_proxy},"External_Public_IP" export no_proxy=${your_no_proxy} export http_proxy=${your_http_proxy} export https_proxy=${your_http_proxy} -export LLM_MODEL_ID="meta-llama/CodeLlama-7b-hf" +export LLM_MODEL_ID="Qwen/Qwen2.5-Coder-7B-Instruct" export TGI_LLM_ENDPOINT="http://${host_ip}:8028" export HUGGINGFACEHUB_API_TOKEN=${your_hf_api_token} export MEGA_SERVICE_HOST_IP=${host_ip} diff --git a/CodeGen/docker_compose/intel/hpu/gaudi/README.md b/CodeGen/docker_compose/intel/hpu/gaudi/README.md index 2a5040ea0..31cfad292 100644 --- a/CodeGen/docker_compose/intel/hpu/gaudi/README.md +++ b/CodeGen/docker_compose/intel/hpu/gaudi/README.md @@ -85,7 +85,7 @@ Since the `compose.yaml` will consume some environment variables, you need to se export no_proxy=${your_no_proxy} export http_proxy=${your_http_proxy} export https_proxy=${your_http_proxy} -export LLM_MODEL_ID="meta-llama/CodeLlama-7b-hf" +export LLM_MODEL_ID="Qwen/Qwen2.5-Coder-7B-Instruct" export TGI_LLM_ENDPOINT="http://${host_ip}:8028" export HUGGINGFACEHUB_API_TOKEN=${your_hf_api_token} export MEGA_SERVICE_HOST_IP=${host_ip} diff --git a/CodeGen/docker_compose/set_env.sh b/CodeGen/docker_compose/set_env.sh index d66a120af..dba717b64 100644 --- a/CodeGen/docker_compose/set_env.sh +++ b/CodeGen/docker_compose/set_env.sh @@ -4,7 +4,7 @@ # SPDX-License-Identifier: Apache-2.0 -export LLM_MODEL_ID="meta-llama/CodeLlama-7b-hf" +export LLM_MODEL_ID="Qwen/Qwen2.5-Coder-7B-Instruct" export TGI_LLM_ENDPOINT="http://${host_ip}:8028" export MEGA_SERVICE_HOST_IP=${host_ip} export LLM_SERVICE_HOST_IP=${host_ip} diff --git a/CodeGen/kubernetes/intel/README.md b/CodeGen/kubernetes/intel/README.md index be18003b8..a4bb44681 100644 --- a/CodeGen/kubernetes/intel/README.md +++ b/CodeGen/kubernetes/intel/README.md @@ -14,7 +14,7 @@ ``` cd GenAIExamples/CodeGen/kubernetes/intel/cpu/xeon/manifests export HUGGINGFACEHUB_API_TOKEN="YourOwnToken" -export MODEL_ID="meta-llama/CodeLlama-7b-hf" +export MODEL_ID="Qwen/Qwen2.5-Coder-7B-Instruct" sed -i "s/insert-your-huggingface-token-here/${HUGGINGFACEHUB_API_TOKEN}/g" codegen.yaml sed -i "s/meta-llama\/CodeLlama-7b-hf/${MODEL_ID}/g" codegen.yaml kubectl apply -f codegen.yaml diff --git a/CodeGen/kubernetes/intel/cpu/xeon/gmc/codegen_xeon.yaml b/CodeGen/kubernetes/intel/cpu/xeon/gmc/codegen_xeon.yaml index dd1675ce3..8dd3c2b57 100644 --- a/CodeGen/kubernetes/intel/cpu/xeon/gmc/codegen_xeon.yaml +++ b/CodeGen/kubernetes/intel/cpu/xeon/gmc/codegen_xeon.yaml @@ -29,6 +29,6 @@ spec: internalService: serviceName: tgi-service config: - MODEL_ID: meta-llama/CodeLlama-7b-hf + MODEL_ID: Qwen/Qwen2.5-Coder-7B-Instruct endpoint: /generate isDownstreamService: true diff --git a/CodeGen/kubernetes/intel/cpu/xeon/manifest/codegen.yaml b/CodeGen/kubernetes/intel/cpu/xeon/manifest/codegen.yaml index 96cc68266..4e6d8f91c 100644 --- a/CodeGen/kubernetes/intel/cpu/xeon/manifest/codegen.yaml +++ b/CodeGen/kubernetes/intel/cpu/xeon/manifest/codegen.yaml @@ -64,7 +64,7 @@ metadata: app.kubernetes.io/version: "2.1.0" app.kubernetes.io/managed-by: Helm data: - MODEL_ID: "meta-llama/CodeLlama-7b-hf" + MODEL_ID: "Qwen/Qwen2.5-Coder-7B-Instruct" PORT: "2080" HF_TOKEN: "insert-your-huggingface-token-here" http_proxy: "" diff --git a/CodeGen/kubernetes/intel/hpu/gaudi/gmc/codegen_gaudi.yaml b/CodeGen/kubernetes/intel/hpu/gaudi/gmc/codegen_gaudi.yaml index 2e3782057..d9a927e5c 100644 --- a/CodeGen/kubernetes/intel/hpu/gaudi/gmc/codegen_gaudi.yaml +++ b/CodeGen/kubernetes/intel/hpu/gaudi/gmc/codegen_gaudi.yaml @@ -29,6 +29,6 @@ spec: internalService: serviceName: tgi-gaudi-svc config: - MODEL_ID: meta-llama/CodeLlama-7b-hf + MODEL_ID: Qwen/Qwen2.5-Coder-7B-Instruct endpoint: /generate isDownstreamService: true diff --git a/CodeGen/kubernetes/intel/hpu/gaudi/manifest/codegen.yaml b/CodeGen/kubernetes/intel/hpu/gaudi/manifest/codegen.yaml index c4a43a7c3..b506d17d4 100644 --- a/CodeGen/kubernetes/intel/hpu/gaudi/manifest/codegen.yaml +++ b/CodeGen/kubernetes/intel/hpu/gaudi/manifest/codegen.yaml @@ -64,7 +64,7 @@ metadata: app.kubernetes.io/version: "2.1.0" app.kubernetes.io/managed-by: Helm data: - MODEL_ID: "meta-llama/CodeLlama-7b-hf" + MODEL_ID: "Qwen/Qwen2.5-Coder-7B-Instruct" PORT: "2080" HF_TOKEN: "insert-your-huggingface-token-here" http_proxy: "" diff --git a/supported_examples.md b/supported_examples.md index ec0624b1a..33b02f71d 100644 --- a/supported_examples.md +++ b/supported_examples.md @@ -63,9 +63,9 @@ This document introduces the supported examples of GenAIExamples. The supported [CodeGen](./CodeGen/README.md) is an example of copilot designed for code generation in Visual Studio Code. -| Framework | LLM | Serving | HW | Description | -| ------------------------------------------------------------------------------ | ------------------------------------------------------------------------------- | --------------------------------------------------------------- | ----------- | ----------- | -| [LangChain](https://www.langchain.com)/[LlamaIndex](https://www.llamaindex.ai) | [meta-llama/CodeLlama-7b-hf](https://huggingface.co/meta-llama/CodeLlama-7b-hf) | [TGI](https://github.com/huggingface/text-generation-inference) | Xeon/Gaudi2 | Copilot | +| Framework | LLM | Serving | HW | Description | +| ------------------------------------------------------------------------------ | --------------------------------------------------------------------------------------- | --------------------------------------------------------------- | ----------- | ----------- | +| [LangChain](https://www.langchain.com)/[LlamaIndex](https://www.llamaindex.ai) | [Qwen/Qwen2.5-Coder-7B-Instruct](https://huggingface.co/Qwen/Qwen2.5-Coder-7B-Instruct) | [TGI](https://github.com/huggingface/text-generation-inference) | Xeon/Gaudi2 | Copilot | ### CodeTrans