Added QNA chat using Qdrant (#100)

Signed-off-by: Anush008 <anushshetty90@gmail.com>
Co-authored-by: lvliang-intel <liang1.lv@intel.com>
This commit is contained in:
Anush
2024-04-30 12:22:49 +05:30
committed by GitHub
parent 04c5e64287
commit f1b4aef062
15 changed files with 521 additions and 42 deletions

4
.gitignore vendored
View File

@@ -1,3 +1,5 @@
**/node_modules **/node_modules
**/.svelte-kit **/.svelte-kit
**/package-lock.json **/package-lock.json
__pycache__/

View File

@@ -113,25 +113,31 @@ curl 127.0.0.1:9090/embed \
Note: If you want to integrate the TEI service into the LangChain application, you'll need to restart the LangChain backend service after launching the TEI service. Note: If you want to integrate the TEI service into the LangChain application, you'll need to restart the LangChain backend service after launching the TEI service.
## Launch Redis and LangChain Backend Service ## Launch Vector Database and LangChain Backend Service
Update the `HUGGINGFACEHUB_API_TOKEN` environment variable with your huggingface token in the `docker-compose.yml` Update the `HUGGINGFACEHUB_API_TOKEN` environment variable with your huggingface token in the `docker-compose.yml`
By default, Redis is used as the vector store. To use Qdrant, use the `docker-compose-qdrant.yml` file instead.
```bash ```bash
cd langchain/docker cd langchain/docker
docker compose -f docker-compose.yml up -d docker compose -f docker-compose.yml up -d
# To use Qdrant, run
# docker compose -f docker-compose-qdrant.yml up -d
cd ../../ cd ../../
``` ```
> [!NOTE] > [!NOTE]
> If you modified any files and want that change introduced in this step, add `--build` to the end of the command to build the container image instead of pulling it from dockerhub. > If you modified any files and want that change introduced in this step, add `--build` to the end of the command to build the container image instead of pulling it from dockerhub.
## Ingest data into Redis ## Ingest Data Into Vector Database
Each time the Redis container is launched, data should be ingested into the container using the commands: Each time the vector database container is launched, data should be ingested into the container using the commands:
```bash ```bash
docker exec -it qna-rag-redis-server bash docker exec -it qna-rag-redis-server bash
# To use Qdrant, run
# docker exec -it qna-rag-qdrant-server bash
cd /ws cd /ws
python ingest.py python ingest.py
``` ```

View File

@@ -0,0 +1,45 @@
# Copyright (c) 2024 Intel Corporation
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
services:
qdrant-vector-db:
image: qdrant/qdrant:v1.9.0
container_name: qdrant-vector-db
ports:
- "6333:6333"
- "6334:6334"
qna-rag-qdrant-server:
build:
args:
https_proxy: ${https_proxy}
http_proxy: ${http_proxy}
dockerfile: Dockerfile
context: .
image: intel/gen-ai-examples:qna-rag-qdrant-server
container_name: qna-rag-qdrant-server
environment:
- https_proxy=${https_proxy}
- HUGGINGFACEHUB_API_TOKEN=${HUGGINGFACEHUB_API_TOKEN}
- "EMBED_MODEL=BAAI/bge-base-en-v1.5"
- "VECTOR_DATABASE=QDRANT"
- "TGI_LLM_ENDPOINT=http://localhost:8080"
# "TEI_ENDPOINT="http://xxx.xxx.xxx.xxx:9090" - To use a custom TEI endpoint
ulimits:
memlock:
soft: -1 # Set memlock to unlimited (no soft or hard limit)
hard: -1
volumes:
- ../qdrant:/ws
- ../test:/test
network_mode: "host"

View File

@@ -43,6 +43,7 @@ services:
- "REDIS_PORT=6379" - "REDIS_PORT=6379"
- "EMBED_MODEL=BAAI/bge-base-en-v1.5" - "EMBED_MODEL=BAAI/bge-base-en-v1.5"
- "REDIS_SCHEMA=schema_dim_768.yml" - "REDIS_SCHEMA=schema_dim_768.yml"
- "VECTOR_DATABASE=REDIS"
ulimits: ulimits:
memlock: memlock:
soft: -1 # Set memlock to unlimited (no soft or hard limit) soft: -1 # Set memlock to unlimited (no soft or hard limit)

View File

@@ -23,15 +23,14 @@ from fastapi.responses import JSONResponse, RedirectResponse, StreamingResponse
from guardrails import moderation_prompt_for_chat, unsafe_dict from guardrails import moderation_prompt_for_chat, unsafe_dict
from langchain_community.embeddings import HuggingFaceBgeEmbeddings, HuggingFaceHubEmbeddings from langchain_community.embeddings import HuggingFaceBgeEmbeddings, HuggingFaceHubEmbeddings
from langchain_community.llms import HuggingFaceEndpoint from langchain_community.llms import HuggingFaceEndpoint
from langchain_community.vectorstores import Redis
from langchain_core.messages import HumanMessage from langchain_core.messages import HumanMessage
from langchain_core.output_parsers import StrOutputParser from langchain_core.output_parsers import StrOutputParser
from langchain_core.runnables import RunnablePassthrough from langchain_core.runnables import RunnablePassthrough
from langserve import add_routes from langserve import add_routes
from prompts import contextualize_q_prompt, prompt, qa_prompt from prompts import contextualize_q_prompt, prompt, qa_prompt
from rag_redis.config import EMBED_MODEL, INDEX_NAME, INDEX_SCHEMA, REDIS_URL
from starlette.middleware.cors import CORSMiddleware from starlette.middleware.cors import CORSMiddleware
from utils import ( from utils import (
VECTOR_DATABASE,
create_kb_folder, create_kb_folder,
create_retriever_from_files, create_retriever_from_files,
create_retriever_from_links, create_retriever_from_links,
@@ -40,6 +39,11 @@ from utils import (
reload_retriever, reload_retriever,
) )
if VECTOR_DATABASE == "REDIS":
from rag_redis.config import INDEX_NAME
elif VECTOR_DATABASE == "QDRANT":
from rag_qdrant.config import COLLECTION_NAME as INDEX_NAME
parser = argparse.ArgumentParser(description="Server Configuration") parser = argparse.ArgumentParser(description="Server Configuration")
parser.add_argument("--chathistory", action="store_true", help="Enable debug mode") parser.add_argument("--chathistory", action="store_true", help="Enable debug mode")
args = parser.parse_args() args = parser.parse_args()
@@ -52,7 +56,6 @@ app.add_middleware(
class RAGAPIRouter(APIRouter): class RAGAPIRouter(APIRouter):
def __init__(self, upload_dir, entrypoint, safety_guard_endpoint, tei_endpoint=None) -> None: def __init__(self, upload_dir, entrypoint, safety_guard_endpoint, tei_endpoint=None) -> None:
super().__init__() super().__init__()
self.upload_dir = upload_dir self.upload_dir = upload_dir
@@ -93,15 +96,31 @@ class RAGAPIRouter(APIRouter):
self.embeddings = HuggingFaceHubEmbeddings(model=tei_endpoint) self.embeddings = HuggingFaceHubEmbeddings(model=tei_endpoint)
else: else:
# create embeddings using local embedding model # create embeddings using local embedding model
EMBED_MODEL = os.getenv("EMBED_MODEL", "sentence-transformers/all-MiniLM-L6-v2")
self.embeddings = HuggingFaceBgeEmbeddings(model_name=EMBED_MODEL) self.embeddings = HuggingFaceBgeEmbeddings(model_name=EMBED_MODEL)
rds = Redis.from_existing_index( if VECTOR_DATABASE == "REDIS":
self.embeddings, from langchain_community.vectorstores import Redis
index_name=INDEX_NAME, from rag_redis.config import INDEX_SCHEMA, REDIS_URL
redis_url=REDIS_URL,
schema=INDEX_SCHEMA, vdb = Redis.from_existing_index(
) self.embeddings,
retriever = rds.as_retriever(search_type="mmr") index_name=INDEX_NAME,
redis_url=REDIS_URL,
schema=INDEX_SCHEMA,
)
elif VECTOR_DATABASE == "QDRANT":
from langchain_community.vectorstores import Qdrant
from qdrant_client import QdrantClient
from rag_qdrant.config import QDRANT_HOST, QDRANT_PORT
client = QdrantClient(host=QDRANT_HOST, port=QDRANT_PORT)
vdb = Qdrant(
embeddings=self.embeddings,
collection_name=INDEX_NAME,
client=client,
)
retriever = vdb.as_retriever(search_type="mmr")
# Define contextualize chain # Define contextualize chain
self.contextualize_q_chain = contextualize_q_prompt | self.llm | StrOutputParser() self.contextualize_q_chain = contextualize_q_prompt | self.llm | StrOutputParser()

View File

@@ -28,9 +28,13 @@ import requests
from bs4 import BeautifulSoup from bs4 import BeautifulSoup
from langchain.text_splitter import RecursiveCharacterTextSplitter from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_community.document_loaders import UnstructuredFileLoader from langchain_community.document_loaders import UnstructuredFileLoader
from langchain_community.vectorstores import Redis
from langchain_core.documents import Document from langchain_core.documents import Document
from rag_redis.config import INDEX_SCHEMA, REDIS_URL
SUPPORTED_VECTOR_DATABASES = ["REDIS", "QDRANT"]
VECTOR_DATABASE = str(os.getenv("VECTOR_DATABASE", "redis")).upper()
assert VECTOR_DATABASE in SUPPORTED_VECTOR_DATABASES, f"Invalid VECTOR_DATABASE: {VECTOR_DATABASE}"
def get_current_beijing_time(): def get_current_beijing_time():
@@ -57,7 +61,6 @@ def create_kb_folder(upload_dir):
class Crawler: class Crawler:
def __init__(self, pool=None): def __init__(self, pool=None):
if pool: if pool:
assert isinstance(pool, (str, list, tuple)), "url pool should be str, list or tuple" assert isinstance(pool, (str, list, tuple)), "url pool should be str, list or tuple"
@@ -292,16 +295,33 @@ def create_retriever_from_files(doc, embeddings, index_name: str):
loader = UnstructuredFileLoader(doc, mode="single", strategy="fast") loader = UnstructuredFileLoader(doc, mode="single", strategy="fast")
chunks = loader.load_and_split(text_splitter) chunks = loader.load_and_split(text_splitter)
rds = Redis.from_texts( if VECTOR_DATABASE == "REDIS":
texts=[chunk.page_content for chunk in chunks], from langchain_community.vectorstores import Redis
metadatas=[chunk.metadata for chunk in chunks], from rag_redis.config import INDEX_SCHEMA, REDIS_URL
embedding=embeddings,
index_name=index_name,
redis_url=REDIS_URL,
index_schema=INDEX_SCHEMA,
)
retriever = rds.as_retriever(search_type="mmr") vdb = Redis.from_texts(
texts=[chunk.page_content for chunk in chunks],
metadatas=[chunk.metadata for chunk in chunks],
embedding=embeddings,
index_name=index_name,
redis_url=REDIS_URL,
index_schema=INDEX_SCHEMA,
)
elif VECTOR_DATABASE == "QDRANT":
from langchain_community.vectorstores import Qdrant
from rag_qdrant.config import COLLECTION_NAME, QDRANT_HOST, QDRANT_PORT
vdb = Qdrant.from_texts(
texts=[chunk.page_content for chunk in chunks],
metadatas=[chunk.metadata for chunk in chunks],
embedding=embeddings,
collection_name=COLLECTION_NAME,
host=QDRANT_HOST,
port=QDRANT_PORT,
)
retriever = vdb.as_retriever(search_type="mmr")
return retriever return retriever
@@ -315,29 +335,63 @@ def create_retriever_from_links(embeddings, link_list: list, index_name):
texts.append(data) texts.append(data)
metadatas.append(metadata) metadatas.append(metadata)
rds = Redis.from_texts( if VECTOR_DATABASE == "REDIS":
texts=texts, from langchain_community.vectorstores import Redis
metadatas=metadatas, from rag_redis.config import INDEX_SCHEMA, REDIS_URL
embedding=embeddings,
index_name=index_name,
redis_url=REDIS_URL,
index_schema=INDEX_SCHEMA,
)
retriever = rds.as_retriever(search_type="mmr") vdb = Redis.from_texts(
texts=texts,
metadatas=metadatas,
embedding=embeddings,
index_name=index_name,
redis_url=REDIS_URL,
index_schema=INDEX_SCHEMA,
)
elif VECTOR_DATABASE == "QDRANT":
from langchain_community.vectorstores import Qdrant
from rag_qdrant.config import COLLECTION_NAME, QDRANT_HOST, QDRANT_PORT
vdb = Qdrant.from_texts(
texts=texts,
metadatas=metadatas,
embedding=embeddings,
collection_name=COLLECTION_NAME,
host=QDRANT_HOST,
port=QDRANT_PORT,
)
retriever = vdb.as_retriever(search_type="mmr")
return retriever return retriever
def reload_retriever(embeddings, index_name): def reload_retriever(embeddings, index_name):
print(f"[rag - reload retriever] reload with index: {index_name}") print(f"[rag - reload retriever] reload with index: {index_name}")
rds = Redis.from_existing_index(
embeddings,
index_name=index_name,
redis_url=REDIS_URL,
schema=INDEX_SCHEMA,
)
retriever = rds.as_retriever(search_type="mmr") if VECTOR_DATABASE == "REDIS":
from langchain_community.vectorstores import Redis
from rag_redis.config import INDEX_SCHEMA, REDIS_URL
vdb = Redis.from_existing_index(
embeddings,
index_name=index_name,
redis_url=REDIS_URL,
schema=INDEX_SCHEMA,
)
elif VECTOR_DATABASE == "QDRANT":
from langchain_community.vectorstores import Qdrant
from qdrant_client import QdrantClient
from rag_qdrant.config import COLLECTION_NAME, QDRANT_HOST, QDRANT_PORT
client = QdrantClient(host=QDRANT_HOST, port=QDRANT_PORT)
vdb = Qdrant(
embeddings=embeddings,
collection_name=COLLECTION_NAME,
client=client,
)
retriever = vdb.as_retriever(search_type="mmr")
return retriever return retriever

View File

@@ -12,6 +12,7 @@ poetry
pyarrow pyarrow
pydantic==1.10.13 pydantic==1.10.13
pymupdf pymupdf
qdrant-client==1.9.0
redis redis
sentence-transformers sentence-transformers
unstructured unstructured

View File

@@ -0,0 +1,21 @@
MIT License
Copyright (c) 2023 LangChain, Inc.
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in all
copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
SOFTWARE.

Binary file not shown.

View File

@@ -0,0 +1,106 @@
#!/usr/bin/env python
# Copyright (c) 2024 Intel Corporation
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import io
import os
import numpy as np
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_community.embeddings import HuggingFaceBgeEmbeddings, HuggingFaceHubEmbeddings
from langchain_community.vectorstores import Qdrant
from PIL import Image
from rag_qdrant.config import COLLECTION_NAME, EMBED_MODEL, QDRANT_HOST, QDRANT_PORT, TEI_EMBEDDING_ENDPOINT
def pdf_loader(file_path):
try:
import easyocr
import fitz
except ImportError:
raise ImportError(
"`PyMuPDF` or 'easyocr' package is not found, please install it with "
"`pip install pymupdf or pip install easyocr.`"
)
doc = fitz.open(file_path)
reader = easyocr.Reader(["en"])
result = ""
for i in range(doc.page_count):
page = doc.load_page(i)
pagetext = page.get_text().strip()
if pagetext:
result = result + pagetext
if len(doc.get_page_images(i)) > 0:
for img in doc.get_page_images(i):
if img:
pageimg = ""
xref = img[0]
img_data = doc.extract_image(xref)
img_bytes = img_data["image"]
pil_image = Image.open(io.BytesIO(img_bytes))
img = np.array(pil_image)
img_result = reader.readtext(img, paragraph=True, detail=0)
pageimg = pageimg + ", ".join(img_result).strip()
if pageimg.endswith("!") or pageimg.endswith("?") or pageimg.endswith("."):
pass
else:
pageimg = pageimg + "."
result = result + pageimg
return result
def ingest_documents():
"""Ingest PDF to Qdrant from the data/ directory that
contains Edgar 10k filings data for Nike."""
# Load list of pdfs
company_name = "Nike"
data_path = "data/"
doc_path = [os.path.join(data_path, file) for file in os.listdir(data_path)][0]
print("Parsing 10k filing doc for NIKE", doc_path)
text_splitter = RecursiveCharacterTextSplitter(chunk_size=1500, chunk_overlap=100, add_start_index=True)
content = pdf_loader(doc_path)
chunks = text_splitter.split_text(content)
print("Done preprocessing. Created ", len(chunks), " chunks of the original pdf")
# Create vectorstore
if TEI_EMBEDDING_ENDPOINT:
# create embeddings using TEI endpoint service
embedder = HuggingFaceHubEmbeddings(model=TEI_EMBEDDING_ENDPOINT)
else:
# create embeddings using local embedding model
embedder = HuggingFaceBgeEmbeddings(model_name=EMBED_MODEL)
# Batch size
batch_size = 32
num_chunks = len(chunks)
for i in range(0, num_chunks, batch_size):
batch_chunks = chunks[i : i + batch_size]
batch_texts = [f"Company: {company_name}. " + chunk for chunk in batch_chunks]
_ = Qdrant.from_texts(
texts=batch_texts,
embedding=embedder,
collection_name=COLLECTION_NAME,
host=QDRANT_HOST,
port=QDRANT_PORT,
)
print(f"Processed batch {i//batch_size + 1}/{(num_chunks-1)//batch_size + 1}")
if __name__ == "__main__":
ingest_documents()

View File

@@ -0,0 +1,94 @@
{
"cells": [
{
"cell_type": "markdown",
"id": "fe1adb29",
"metadata": {},
"source": []
},
{
"cell_type": "markdown",
"id": "681a5d1e",
"metadata": {},
"source": [
"## Connect to RAG App\n",
"\n",
"Assuming you are already running this server:\n",
"```bash\n",
"langserve start\n",
"```"
]
},
{
"cell_type": "code",
"execution_count": 37,
"id": "d774be2a",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Nike's revenue in 2023 was $51.2 billion. \n",
"\n",
"Source: 'data/nke-10k-2023.pdf', Start Index: '146100'\n"
]
}
],
"source": [
"from langserve.client import RemoteRunnable\n",
"\n",
"rag_qdrant = RemoteRunnable(\"http://localhost:8000/rag-qdrant\")\n",
"\n",
"print(rag_qdrant.invoke(\"What was Nike's revenue in 2023?\"))"
]
},
{
"cell_type": "code",
"execution_count": 43,
"id": "07ae0005",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"As of May 31, 2023, Nike had approximately 83,700 employees worldwide. This information can be found in the first piece of context provided. (source: data/nke-10k-2023.pdf, start_index: 32532)\n"
]
}
],
"source": [
"print(rag_qdrant.invoke(\"How many employees work at Nike?\"))"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "4a6b9f00",
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.10.6"
}
},
"nbformat": 4,
"nbformat_minor": 5
}

View File

@@ -0,0 +1,13 @@
# Copyright (c) 2024 Intel Corporation
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

View File

@@ -0,0 +1,80 @@
#!/usr/bin/env python
# Copyright (c) 2024 Intel Corporation
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from langchain_community.embeddings import HuggingFaceEmbeddings
from langchain_community.llms import HuggingFaceEndpoint
from langchain_community.vectorstores import Qdrant
from langchain_core.output_parsers import StrOutputParser
from langchain_core.prompts import ChatPromptTemplate
from langchain_core.pydantic_v1 import BaseModel
from langchain_core.runnables import RunnableParallel, RunnablePassthrough
from qdrant_client import QdrantClient
from rag_qdrant.config import COLLECTION_NAME, EMBED_MODEL, QDRANT_HOST, QDRANT_PORT, TGI_LLM_ENDPOINT
# Make this look better in the docs.
class Question(BaseModel):
__root__: str
# Init Embeddings
embedder = HuggingFaceEmbeddings(model_name=EMBED_MODEL)
# Connect to pre-loaded vectorstore
# run the ingest.py script to populate this
client = QdrantClient(host=QDRANT_HOST, port=QDRANT_PORT)
vectorstore = Qdrant(embeddings=embedder, collection_name=COLLECTION_NAME, client=client)
# TODO allow user to change parameters
retriever = vectorstore.as_retriever(search_type="mmr")
# Define our prompt
template = """
Use the following pieces of context from retrieved
dataset to answer the question. Do not make up an answer if there is no
context provided to help answer it. Include the 'source' and 'start_index'
from the metadata included in the context you used to answer the question
Context:
---------
{context}
---------
Question: {question}
---------
Answer:
"""
prompt = ChatPromptTemplate.from_template(template)
# RAG Chain
model = HuggingFaceEndpoint(
endpoint_url=TGI_LLM_ENDPOINT,
max_new_tokens=512,
top_k=10,
top_p=0.95,
typical_p=0.95,
temperature=0.01,
repetition_penalty=1.03,
streaming=True,
truncate=1024,
)
chain = (
RunnableParallel({"context": retriever, "question": RunnablePassthrough()}) | prompt | model | StrOutputParser()
).with_types(input_type=Question)

View File

@@ -0,0 +1,28 @@
# Copyright (c) 2024 Intel Corporation
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import os
# Embedding model
EMBED_MODEL = os.getenv("EMBED_MODEL", "sentence-transformers/all-MiniLM-L6-v2")
# Qdrant configuration
QDRANT_HOST = os.getenv("QDRANT", "localhost")
QDRANT_PORT = int(os.getenv("QDRANT_PORT", 6333))
COLLECTION_NAME = os.getenv("COLLECTION_NAME", "rag-qdrant")
# LLM/Embedding endpoints
TGI_LLM_ENDPOINT = os.getenv("TGI_LLM_ENDPOINT", "http://localhost:8080")
TGI_LLM_ENDPOINT_NO_RAG = os.getenv("TGI_LLM_ENDPOINT_NO_RAG", "http://localhost:8081")
TEI_EMBEDDING_ENDPOINT = os.getenv("TEI_ENDPOINT")

View File

@@ -66,6 +66,15 @@ All the examples are well-validated on Intel platforms. In addition, these examp
<td>Gaudi2</td> <td>Gaudi2</td>
<td>Chatbot</td> <td>Chatbot</td>
</tr> </tr>
<tr>
<td><a href="https://www.langchain.com">LangChain</a></td>
<td><a href="https://huggingface.co/mistralai/Mistral-7B-v0.1">Mistral-7B</a></td>
<td><a href="https://huggingface.co/BAAI/bge-base-en">BGE-Base</a></td>
<td><a href="https://qdrant.tech/">Qdrant</a></td>
<td><a href="https://github.com/huggingface/tgi-gaudi">TGI-Habana</a></td>
<td>Gaudi2</td>
<td>Chatbot</td>
</tr>
</tbody> </tbody>
</table> </table>