Added QNA chat using Qdrant (#100)

Signed-off-by: Anush008 <anushshetty90@gmail.com> Co-authored-by: lvliang-intel <liang1.lv@intel.com>
2024-04-30 12:22:49 +05:30
parent 04c5e64287
commit f1b4aef062
15 changed files with 521 additions and 42 deletions
--- a/.gitignore
+++ b/.gitignore
@@ -1,3 +1,5 @@
 **/node_modules
 **/.svelte-kit
-**/package-lock.json
+**/package-lock.json
 __pycache__/
--- a/ChatQnA/README.md
+++ b/ChatQnA/README.md
@@ -113,25 +113,31 @@ curl 127.0.0.1:9090/embed \
 Note: If you want to integrate the TEI service into the LangChain application, you'll need to restart the LangChain backend service after launching the TEI service.
-## Launch Redis and LangChain Backend Service
+## Launch Vector Database and LangChain Backend Service
 Update the `HUGGINGFACEHUB_API_TOKEN` environment variable with your huggingface token in the `docker-compose.yml`
 By default, Redis is used as the vector store. To use Qdrant, use the `docker-compose-qdrant.yml` file instead.
 ```bash
 cd langchain/docker
 docker compose -f docker-compose.yml up -d
 # To use Qdrant, run
 # docker compose -f docker-compose-qdrant.yml up -d
 cd ../../
 ```
 > [!NOTE]
 > If you modified any files and want that change introduced in this step, add `--build` to the end of the command to build the container image instead of pulling it from dockerhub.
-## Ingest data into Redis
+## Ingest Data Into Vector Database
-Each time the Redis container is launched, data should be ingested into the container using the commands:
+Each time the vector database container is launched, data should be ingested into the container using the commands:
 ```bash
 docker exec -it qna-rag-redis-server bash
 # To use Qdrant, run
 # docker exec -it qna-rag-qdrant-server bash
 cd /ws
 python ingest.py
 ```
--- a/ChatQnA/langchain/docker/docker-compose-qdrant.yml
+++ b/ChatQnA/langchain/docker/docker-compose-qdrant.yml
@@ -0,0 +1,45 @@
 # Copyright (c) 2024 Intel Corporation
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
 # You may obtain a copy of the License at
 #
 #    http://www.apache.org/licenses/LICENSE-2.0
 #
 # Unless required by applicable law or agreed to in writing, software
 # distributed under the License is distributed on an "AS IS" BASIS,
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
 services:
  qdrant-vector-db:
    image: qdrant/qdrant:v1.9.0
    container_name: qdrant-vector-db
    ports:
      - "6333:6333"
      - "6334:6334"
  qna-rag-qdrant-server:
    build:
      args:
        https_proxy: ${https_proxy}
        http_proxy: ${http_proxy}
      dockerfile: Dockerfile
      context: .
    image: intel/gen-ai-examples:qna-rag-qdrant-server
    container_name: qna-rag-qdrant-server
    environment:
      - https_proxy=${https_proxy}
      - HUGGINGFACEHUB_API_TOKEN=${HUGGINGFACEHUB_API_TOKEN}
      - "EMBED_MODEL=BAAI/bge-base-en-v1.5"
      - "VECTOR_DATABASE=QDRANT"
      - "TGI_LLM_ENDPOINT=http://localhost:8080"
      # "TEI_ENDPOINT="http://xxx.xxx.xxx.xxx:9090" - To use a custom TEI endpoint
    ulimits:
      memlock:
        soft: -1 # Set memlock to unlimited (no soft or hard limit)
        hard: -1
    volumes:
      - ../qdrant:/ws
      - ../test:/test
    network_mode: "host"
--- a/ChatQnA/langchain/docker/docker-compose.yml
+++ b/ChatQnA/langchain/docker/docker-compose.yml
@@ -43,6 +43,7 @@ services:
      - "REDIS_PORT=6379"
      - "EMBED_MODEL=BAAI/bge-base-en-v1.5"
      - "REDIS_SCHEMA=schema_dim_768.yml"
      - "VECTOR_DATABASE=REDIS"
    ulimits:
      memlock:
        soft: -1 # Set memlock to unlimited (no soft or hard limit)
--- a/ChatQnA/langchain/docker/qna-app/app/server.py
+++ b/ChatQnA/langchain/docker/qna-app/app/server.py
@@ -23,15 +23,14 @@ from fastapi.responses import JSONResponse, RedirectResponse, StreamingResponse
 from guardrails import moderation_prompt_for_chat, unsafe_dict
 from langchain_community.embeddings import HuggingFaceBgeEmbeddings, HuggingFaceHubEmbeddings
 from langchain_community.llms import HuggingFaceEndpoint
 from langchain_community.vectorstores import Redis
 from langchain_core.messages import HumanMessage
 from langchain_core.output_parsers import StrOutputParser
 from langchain_core.runnables import RunnablePassthrough
 from langserve import add_routes
 from prompts import contextualize_q_prompt, prompt, qa_prompt
 from rag_redis.config import EMBED_MODEL, INDEX_NAME, INDEX_SCHEMA, REDIS_URL
 from starlette.middleware.cors import CORSMiddleware
 from utils import (
    VECTOR_DATABASE,
    create_kb_folder,
    create_retriever_from_files,
    create_retriever_from_links,
@@ -40,6 +39,11 @@ from utils import (
    reload_retriever,
 )
 if VECTOR_DATABASE == "REDIS":
    from rag_redis.config import INDEX_NAME
 elif VECTOR_DATABASE == "QDRANT":
    from rag_qdrant.config import COLLECTION_NAME as INDEX_NAME
 parser = argparse.ArgumentParser(description="Server Configuration")
 parser.add_argument("--chathistory", action="store_true", help="Enable debug mode")
 args = parser.parse_args()
@@ -52,7 +56,6 @@ app.add_middleware(
 class RAGAPIRouter(APIRouter):
    def __init__(self, upload_dir, entrypoint, safety_guard_endpoint, tei_endpoint=None) -> None:
        super().__init__()
        self.upload_dir = upload_dir
@@ -93,15 +96,31 @@ class RAGAPIRouter(APIRouter):
            self.embeddings = HuggingFaceHubEmbeddings(model=tei_endpoint)
        else:
            # create embeddings using local embedding model
            EMBED_MODEL = os.getenv("EMBED_MODEL", "sentence-transformers/all-MiniLM-L6-v2")
            self.embeddings = HuggingFaceBgeEmbeddings(model_name=EMBED_MODEL)
-        rds = Redis.from_existing_index(
+        if VECTOR_DATABASE == "REDIS":
-            self.embeddings,
+            from langchain_community.vectorstores import Redis
-            index_name=INDEX_NAME,
+            from rag_redis.config import INDEX_SCHEMA, REDIS_URL
-            redis_url=REDIS_URL,
+
-            schema=INDEX_SCHEMA,
+            vdb = Redis.from_existing_index(
-        )
+                self.embeddings,
-        retriever = rds.as_retriever(search_type="mmr")
+                index_name=INDEX_NAME,
                redis_url=REDIS_URL,
                schema=INDEX_SCHEMA,
            )
        elif VECTOR_DATABASE == "QDRANT":
            from langchain_community.vectorstores import Qdrant
            from qdrant_client import QdrantClient
            from rag_qdrant.config import QDRANT_HOST, QDRANT_PORT
            client = QdrantClient(host=QDRANT_HOST, port=QDRANT_PORT)
            vdb = Qdrant(
                embeddings=self.embeddings,
                collection_name=INDEX_NAME,
                client=client,
            )
        retriever = vdb.as_retriever(search_type="mmr")
        # Define contextualize chain
        self.contextualize_q_chain = contextualize_q_prompt | self.llm | StrOutputParser()
--- a/ChatQnA/langchain/docker/qna-app/app/utils.py
+++ b/ChatQnA/langchain/docker/qna-app/app/utils.py
@@ -28,9 +28,13 @@ import requests
 from bs4 import BeautifulSoup
 from langchain.text_splitter import RecursiveCharacterTextSplitter
 from langchain_community.document_loaders import UnstructuredFileLoader
 from langchain_community.vectorstores import Redis
 from langchain_core.documents import Document
-from rag_redis.config import INDEX_SCHEMA, REDIS_URL
+
 SUPPORTED_VECTOR_DATABASES = ["REDIS", "QDRANT"]
 VECTOR_DATABASE = str(os.getenv("VECTOR_DATABASE", "redis")).upper()
 assert VECTOR_DATABASE in SUPPORTED_VECTOR_DATABASES, f"Invalid VECTOR_DATABASE: {VECTOR_DATABASE}"
 def get_current_beijing_time():
@@ -57,7 +61,6 @@ def create_kb_folder(upload_dir):
 class Crawler:
    def __init__(self, pool=None):
        if pool:
            assert isinstance(pool, (str, list, tuple)), "url pool should be str, list or tuple"
@@ -292,16 +295,33 @@ def create_retriever_from_files(doc, embeddings, index_name: str):
    loader = UnstructuredFileLoader(doc, mode="single", strategy="fast")
    chunks = loader.load_and_split(text_splitter)
-    rds = Redis.from_texts(
+    if VECTOR_DATABASE == "REDIS":
-        texts=[chunk.page_content for chunk in chunks],
+        from langchain_community.vectorstores import Redis
-        metadatas=[chunk.metadata for chunk in chunks],
+        from rag_redis.config import INDEX_SCHEMA, REDIS_URL
        embedding=embeddings,
        index_name=index_name,
        redis_url=REDIS_URL,
        index_schema=INDEX_SCHEMA,
    )
-    retriever = rds.as_retriever(search_type="mmr")
+        vdb = Redis.from_texts(
            texts=[chunk.page_content for chunk in chunks],
            metadatas=[chunk.metadata for chunk in chunks],
            embedding=embeddings,
            index_name=index_name,
            redis_url=REDIS_URL,
            index_schema=INDEX_SCHEMA,
        )
    elif VECTOR_DATABASE == "QDRANT":
        from langchain_community.vectorstores import Qdrant
        from rag_qdrant.config import COLLECTION_NAME, QDRANT_HOST, QDRANT_PORT
        vdb = Qdrant.from_texts(
            texts=[chunk.page_content for chunk in chunks],
            metadatas=[chunk.metadata for chunk in chunks],
            embedding=embeddings,
            collection_name=COLLECTION_NAME,
            host=QDRANT_HOST,
            port=QDRANT_PORT,
        )
    retriever = vdb.as_retriever(search_type="mmr")
    return retriever
@@ -315,29 +335,63 @@ def create_retriever_from_links(embeddings, link_list: list, index_name):
        texts.append(data)
        metadatas.append(metadata)
-    rds = Redis.from_texts(
+    if VECTOR_DATABASE == "REDIS":
-        texts=texts,
+        from langchain_community.vectorstores import Redis
-        metadatas=metadatas,
+        from rag_redis.config import INDEX_SCHEMA, REDIS_URL
        embedding=embeddings,
        index_name=index_name,
        redis_url=REDIS_URL,
        index_schema=INDEX_SCHEMA,
    )
-    retriever = rds.as_retriever(search_type="mmr")
+        vdb = Redis.from_texts(
            texts=texts,
            metadatas=metadatas,
            embedding=embeddings,
            index_name=index_name,
            redis_url=REDIS_URL,
            index_schema=INDEX_SCHEMA,
        )
    elif VECTOR_DATABASE == "QDRANT":
        from langchain_community.vectorstores import Qdrant
        from rag_qdrant.config import COLLECTION_NAME, QDRANT_HOST, QDRANT_PORT
        vdb = Qdrant.from_texts(
            texts=texts,
            metadatas=metadatas,
            embedding=embeddings,
            collection_name=COLLECTION_NAME,
            host=QDRANT_HOST,
            port=QDRANT_PORT,
        )
    retriever = vdb.as_retriever(search_type="mmr")
    return retriever
 def reload_retriever(embeddings, index_name):
    print(f"[rag - reload retriever] reload with index: {index_name}")
    rds = Redis.from_existing_index(
        embeddings,
        index_name=index_name,
        redis_url=REDIS_URL,
        schema=INDEX_SCHEMA,
    )
-    retriever = rds.as_retriever(search_type="mmr")
+    if VECTOR_DATABASE == "REDIS":
        from langchain_community.vectorstores import Redis
        from rag_redis.config import INDEX_SCHEMA, REDIS_URL
        vdb = Redis.from_existing_index(
            embeddings,
            index_name=index_name,
            redis_url=REDIS_URL,
            schema=INDEX_SCHEMA,
        )
    elif VECTOR_DATABASE == "QDRANT":
        from langchain_community.vectorstores import Qdrant
        from qdrant_client import QdrantClient
        from rag_qdrant.config import COLLECTION_NAME, QDRANT_HOST, QDRANT_PORT
        client = QdrantClient(host=QDRANT_HOST, port=QDRANT_PORT)
        vdb = Qdrant(
            embeddings=embeddings,
            collection_name=COLLECTION_NAME,
            client=client,
        )
    retriever = vdb.as_retriever(search_type="mmr")
    return retriever
--- a/ChatQnA/langchain/docker/requirements.txt
+++ b/ChatQnA/langchain/docker/requirements.txt
@@ -12,6 +12,7 @@ poetry
 pyarrow
 pydantic==1.10.13
 pymupdf
 qdrant-client==1.9.0
 redis
 sentence-transformers
 unstructured
--- a/ChatQnA/langchain/qdrant/LICENSE
+++ b/ChatQnA/langchain/qdrant/LICENSE
@@ -0,0 +1,21 @@
 MIT License
 Copyright (c) 2023 LangChain, Inc.
 Permission is hereby granted, free of charge, to any person obtaining a copy
 of this software and associated documentation files (the "Software"), to deal
 in the Software without restriction, including without limitation the rights
 to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 copies of the Software, and to permit persons to whom the Software is
 furnished to do so, subject to the following conditions:
 The above copyright notice and this permission notice shall be included in all
 copies or substantial portions of the Software.
 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
 AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
 LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
 OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
 SOFTWARE.
--- a/ChatQnA/langchain/qdrant/data/nke-10k-2023.pdf
+++ b/ChatQnA/langchain/qdrant/data/nke-10k-2023.pdf
--- a/ChatQnA/langchain/qdrant/ingest.py
+++ b/ChatQnA/langchain/qdrant/ingest.py
@@ -0,0 +1,106 @@
 #!/usr/bin/env python
 # Copyright (c) 2024 Intel Corporation
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
 # You may obtain a copy of the License at
 #
 #    http://www.apache.org/licenses/LICENSE-2.0
 #
 # Unless required by applicable law or agreed to in writing, software
 # distributed under the License is distributed on an "AS IS" BASIS,
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
 import io
 import os
 import numpy as np
 from langchain.text_splitter import RecursiveCharacterTextSplitter
 from langchain_community.embeddings import HuggingFaceBgeEmbeddings, HuggingFaceHubEmbeddings
 from langchain_community.vectorstores import Qdrant
 from PIL import Image
 from rag_qdrant.config import COLLECTION_NAME, EMBED_MODEL, QDRANT_HOST, QDRANT_PORT, TEI_EMBEDDING_ENDPOINT
 def pdf_loader(file_path):
    try:
        import easyocr
        import fitz
    except ImportError:
        raise ImportError(
            "`PyMuPDF` or 'easyocr' package is not found, please install it with "
            "`pip install pymupdf or pip install easyocr.`"
        )
    doc = fitz.open(file_path)
    reader = easyocr.Reader(["en"])
    result = ""
    for i in range(doc.page_count):
        page = doc.load_page(i)
        pagetext = page.get_text().strip()
        if pagetext:
            result = result + pagetext
        if len(doc.get_page_images(i)) > 0:
            for img in doc.get_page_images(i):
                if img:
                    pageimg = ""
                    xref = img[0]
                    img_data = doc.extract_image(xref)
                    img_bytes = img_data["image"]
                    pil_image = Image.open(io.BytesIO(img_bytes))
                    img = np.array(pil_image)
                    img_result = reader.readtext(img, paragraph=True, detail=0)
                    pageimg = pageimg + ", ".join(img_result).strip()
                    if pageimg.endswith("!") or pageimg.endswith("?") or pageimg.endswith("."):
                        pass
                    else:
                        pageimg = pageimg + "."
                result = result + pageimg
    return result
 def ingest_documents():
    """Ingest PDF to Qdrant from the data/ directory that
    contains Edgar 10k filings data for Nike."""
    # Load list of pdfs
    company_name = "Nike"
    data_path = "data/"
    doc_path = [os.path.join(data_path, file) for file in os.listdir(data_path)][0]
    print("Parsing 10k filing doc for NIKE", doc_path)
    text_splitter = RecursiveCharacterTextSplitter(chunk_size=1500, chunk_overlap=100, add_start_index=True)
    content = pdf_loader(doc_path)
    chunks = text_splitter.split_text(content)
    print("Done preprocessing. Created ", len(chunks), " chunks of the original pdf")
    # Create vectorstore
    if TEI_EMBEDDING_ENDPOINT:
        # create embeddings using TEI endpoint service
        embedder = HuggingFaceHubEmbeddings(model=TEI_EMBEDDING_ENDPOINT)
    else:
        # create embeddings using local embedding model
        embedder = HuggingFaceBgeEmbeddings(model_name=EMBED_MODEL)
    # Batch size
    batch_size = 32
    num_chunks = len(chunks)
    for i in range(0, num_chunks, batch_size):
        batch_chunks = chunks[i : i + batch_size]
        batch_texts = [f"Company: {company_name}. " + chunk for chunk in batch_chunks]
        _ = Qdrant.from_texts(
            texts=batch_texts,
            embedding=embedder,
            collection_name=COLLECTION_NAME,
            host=QDRANT_HOST,
            port=QDRANT_PORT,
        )
        print(f"Processed batch {i//batch_size + 1}/{(num_chunks-1)//batch_size + 1}")
 if __name__ == "__main__":
    ingest_documents()
--- a/ChatQnA/langchain/qdrant/rag_qdrant.ipynb
+++ b/ChatQnA/langchain/qdrant/rag_qdrant.ipynb
@@ -0,0 +1,94 @@
 {
 "cells": [
  {
   "cell_type": "markdown",
   "id": "fe1adb29",
   "metadata": {},
   "source": []
  },
  {
   "cell_type": "markdown",
   "id": "681a5d1e",
   "metadata": {},
   "source": [
    "## Connect to RAG App\n",
    "\n",
    "Assuming you are already running this server:\n",
    "```bash\n",
    "langserve start\n",
    "```"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 37,
   "id": "d774be2a",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Nike's revenue in 2023 was $51.2 billion. \n",
      "\n",
      "Source: 'data/nke-10k-2023.pdf', Start Index: '146100'\n"
     ]
    }
   ],
   "source": [
    "from langserve.client import RemoteRunnable\n",
    "\n",
    "rag_qdrant = RemoteRunnable(\"http://localhost:8000/rag-qdrant\")\n",
    "\n",
    "print(rag_qdrant.invoke(\"What was Nike's revenue in 2023?\"))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 43,
   "id": "07ae0005",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "As of May 31, 2023, Nike had approximately 83,700 employees worldwide. This information can be found in the first piece of context provided. (source: data/nke-10k-2023.pdf, start_index: 32532)\n"
     ]
    }
   ],
   "source": [
    "print(rag_qdrant.invoke(\"How many employees work at Nike?\"))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "4a6b9f00",
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3 (ipykernel)",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.10.6"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
 }
--- a/ChatQnA/langchain/qdrant/rag_qdrant/init.py
+++ b/ChatQnA/langchain/qdrant/rag_qdrant/init.py
@@ -0,0 +1,13 @@
 # Copyright (c) 2024 Intel Corporation
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
 # You may obtain a copy of the License at
 #
 #    http://www.apache.org/licenses/LICENSE-2.0
 #
 # Unless required by applicable law or agreed to in writing, software
 # distributed under the License is distributed on an "AS IS" BASIS,
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
--- a/ChatQnA/langchain/qdrant/rag_qdrant/chain.py
+++ b/ChatQnA/langchain/qdrant/rag_qdrant/chain.py
@@ -0,0 +1,80 @@
 #!/usr/bin/env python
 # Copyright (c) 2024 Intel Corporation
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
 # You may obtain a copy of the License at
 #
 #    http://www.apache.org/licenses/LICENSE-2.0
 #
 # Unless required by applicable law or agreed to in writing, software
 # distributed under the License is distributed on an "AS IS" BASIS,
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
 from langchain_community.embeddings import HuggingFaceEmbeddings
 from langchain_community.llms import HuggingFaceEndpoint
 from langchain_community.vectorstores import Qdrant
 from langchain_core.output_parsers import StrOutputParser
 from langchain_core.prompts import ChatPromptTemplate
 from langchain_core.pydantic_v1 import BaseModel
 from langchain_core.runnables import RunnableParallel, RunnablePassthrough
 from qdrant_client import QdrantClient
 from rag_qdrant.config import COLLECTION_NAME, EMBED_MODEL, QDRANT_HOST, QDRANT_PORT, TGI_LLM_ENDPOINT
 # Make this look better in the docs.
 class Question(BaseModel):
    __root__: str
 # Init Embeddings
 embedder = HuggingFaceEmbeddings(model_name=EMBED_MODEL)
 # Connect to pre-loaded vectorstore
 # run the ingest.py script to populate this
 client = QdrantClient(host=QDRANT_HOST, port=QDRANT_PORT)
 vectorstore = Qdrant(embeddings=embedder, collection_name=COLLECTION_NAME, client=client)
 # TODO allow user to change parameters
 retriever = vectorstore.as_retriever(search_type="mmr")
 # Define our prompt
 template = """
 Use the following pieces of context from retrieved
 dataset to answer the question. Do not make up an answer if there is no
 context provided to help answer it. Include the 'source' and 'start_index'
 from the metadata included in the context you used to answer the question
 Context:
 ---------
 {context}
 ---------
 Question: {question}
 ---------
 Answer:
 """
 prompt = ChatPromptTemplate.from_template(template)
 # RAG Chain
 model = HuggingFaceEndpoint(
    endpoint_url=TGI_LLM_ENDPOINT,
    max_new_tokens=512,
    top_k=10,
    top_p=0.95,
    typical_p=0.95,
    temperature=0.01,
    repetition_penalty=1.03,
    streaming=True,
    truncate=1024,
 )
 chain = (
    RunnableParallel({"context": retriever, "question": RunnablePassthrough()}) | prompt | model | StrOutputParser()
 ).with_types(input_type=Question)
--- a/ChatQnA/langchain/qdrant/rag_qdrant/config.py
+++ b/ChatQnA/langchain/qdrant/rag_qdrant/config.py
@@ -0,0 +1,28 @@
 # Copyright (c) 2024 Intel Corporation
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
 # You may obtain a copy of the License at
 #
 #    http://www.apache.org/licenses/LICENSE-2.0
 #
 # Unless required by applicable law or agreed to in writing, software
 # distributed under the License is distributed on an "AS IS" BASIS,
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
 import os
 # Embedding model
 EMBED_MODEL = os.getenv("EMBED_MODEL", "sentence-transformers/all-MiniLM-L6-v2")
 # Qdrant configuration
 QDRANT_HOST = os.getenv("QDRANT", "localhost")
 QDRANT_PORT = int(os.getenv("QDRANT_PORT", 6333))
 COLLECTION_NAME = os.getenv("COLLECTION_NAME", "rag-qdrant")
 # LLM/Embedding endpoints
 TGI_LLM_ENDPOINT = os.getenv("TGI_LLM_ENDPOINT", "http://localhost:8080")
 TGI_LLM_ENDPOINT_NO_RAG = os.getenv("TGI_LLM_ENDPOINT_NO_RAG", "http://localhost:8081")
 TEI_EMBEDDING_ENDPOINT = os.getenv("TEI_ENDPOINT")
--- a/README.md
+++ b/README.md
@@ -66,6 +66,15 @@ All the examples are well-validated on Intel platforms. In addition, these examp
 			<td>Gaudi2</td>
 			<td>Chatbot</td>
 		</tr>
 		<tr>
 			<td><a href="https://www.langchain.com">LangChain</a></td>
 			<td><a href="https://huggingface.co/mistralai/Mistral-7B-v0.1">Mistral-7B</a></td>
 			<td><a href="https://huggingface.co/BAAI/bge-base-en">BGE-Base</a></td>
 			<td><a href="https://qdrant.tech/">Qdrant</a></td>
 			<td><a href="https://github.com/huggingface/tgi-gaudi">TGI-Habana</a></td>
 			<td>Gaudi2</td>
 			<td>Chatbot</td>
 		</tr>
 	</tbody>
 </table>