add opentelemetry tracing into OPEA DAG and couple microservices code path related to ChatQnA (#1122)
* add opentelemetry tracing into ChatQnA workflow Signed-off-by: Louie, Tsai <louie.tsai@intel.com> Signed-off-by: louie-tsai <louie.tsai@intel.com> * handle stream/non-stream tracing Signed-off-by: louie-tsai <louie.tsai@intel.com> * pre-commit fix Signed-off-by: louie-tsai <louie.tsai@intel.com> * add a tag for async llm microservice execute Signed-off-by: louie-tsai <louie.tsai@intel.com> * add oltp tracing for retriever Signed-off-by: louie-tsai <louie.tsai@intel.com> * fix CI issue Signed-off-by: louie-tsai <louie.tsai@intel.com> --------- Signed-off-by: Louie, Tsai <louie.tsai@intel.com> Signed-off-by: louie-tsai <louie.tsai@intel.com> Co-authored-by: Spycsh <sihan.chen@intel.com>
This commit is contained in:
@@ -2,6 +2,7 @@
|
|||||||
# SPDX-License-Identifier: Apache-2.0
|
# SPDX-License-Identifier: Apache-2.0
|
||||||
|
|
||||||
import asyncio
|
import asyncio
|
||||||
|
import contextlib
|
||||||
import copy
|
import copy
|
||||||
import json
|
import json
|
||||||
import os
|
import os
|
||||||
@@ -16,6 +17,7 @@ from prometheus_client import Gauge, Histogram
|
|||||||
from pydantic import BaseModel
|
from pydantic import BaseModel
|
||||||
|
|
||||||
from ..proto.docarray import LLMParams
|
from ..proto.docarray import LLMParams
|
||||||
|
from ..telemetry.opea_telemetry import opea_telemetry, tracer
|
||||||
from .constants import ServiceType
|
from .constants import ServiceType
|
||||||
from .dag import DAG
|
from .dag import DAG
|
||||||
from .logger import CustomLogger
|
from .logger import CustomLogger
|
||||||
@@ -80,6 +82,7 @@ class ServiceOrchestrator(DAG):
|
|||||||
logger.error(e)
|
logger.error(e)
|
||||||
return False
|
return False
|
||||||
|
|
||||||
|
@opea_telemetry
|
||||||
async def schedule(self, initial_inputs: Dict | BaseModel, llm_parameters: LLMParams = LLMParams(), **kwargs):
|
async def schedule(self, initial_inputs: Dict | BaseModel, llm_parameters: LLMParams = LLMParams(), **kwargs):
|
||||||
req_start = time.time()
|
req_start = time.time()
|
||||||
self.metrics.pending_update(True)
|
self.metrics.pending_update(True)
|
||||||
@@ -166,6 +169,26 @@ class ServiceOrchestrator(DAG):
|
|||||||
all_outputs.update(result_dict[prev_node])
|
all_outputs.update(result_dict[prev_node])
|
||||||
return all_outputs
|
return all_outputs
|
||||||
|
|
||||||
|
def wrap_iterable(self, iterable, is_first=True):
|
||||||
|
|
||||||
|
with tracer.start_as_current_span("llm_generate_stream"):
|
||||||
|
while True:
|
||||||
|
with (
|
||||||
|
tracer.start_as_current_span("llm_generate_stream_first_token")
|
||||||
|
if is_first
|
||||||
|
else contextlib.nullcontext()
|
||||||
|
): # else tracer.start_as_current_span(f"llm_generate_stream_next_token")
|
||||||
|
try:
|
||||||
|
token = next(iterable)
|
||||||
|
yield token
|
||||||
|
is_first = False
|
||||||
|
except StopIteration:
|
||||||
|
# Exiting the iterable loop cleanly
|
||||||
|
break
|
||||||
|
except Exception as e:
|
||||||
|
raise e
|
||||||
|
|
||||||
|
@opea_telemetry
|
||||||
async def execute(
|
async def execute(
|
||||||
self,
|
self,
|
||||||
session: aiohttp.client.ClientSession,
|
session: aiohttp.client.ClientSession,
|
||||||
@@ -193,14 +216,15 @@ class ServiceOrchestrator(DAG):
|
|||||||
# Still leave to sync requests.post for StreamingResponse
|
# Still leave to sync requests.post for StreamingResponse
|
||||||
if LOGFLAG:
|
if LOGFLAG:
|
||||||
logger.info(inputs)
|
logger.info(inputs)
|
||||||
response = requests.post(
|
with tracer.start_as_current_span(f"{cur_node}_asyn_generate"):
|
||||||
url=endpoint,
|
response = requests.post(
|
||||||
data=json.dumps(inputs),
|
url=endpoint,
|
||||||
headers={"Content-type": "application/json"},
|
data=json.dumps(inputs),
|
||||||
proxies={"http": None},
|
headers={"Content-type": "application/json"},
|
||||||
stream=True,
|
proxies={"http": None},
|
||||||
timeout=1000,
|
stream=True,
|
||||||
)
|
timeout=1000,
|
||||||
|
)
|
||||||
downstream = runtime_graph.downstream(cur_node)
|
downstream = runtime_graph.downstream(cur_node)
|
||||||
if downstream:
|
if downstream:
|
||||||
assert len(downstream) == 1, "Not supported multiple stream downstreams yet!"
|
assert len(downstream) == 1, "Not supported multiple stream downstreams yet!"
|
||||||
@@ -214,7 +238,9 @@ class ServiceOrchestrator(DAG):
|
|||||||
# response.elapsed = time until first headers received
|
# response.elapsed = time until first headers received
|
||||||
buffered_chunk_str = ""
|
buffered_chunk_str = ""
|
||||||
is_first = True
|
is_first = True
|
||||||
for chunk in response.iter_content(chunk_size=None):
|
|
||||||
|
for chunk in self.wrap_iterable(response.iter_content(chunk_size=None)):
|
||||||
|
|
||||||
if chunk:
|
if chunk:
|
||||||
if downstream:
|
if downstream:
|
||||||
chunk = chunk.decode("utf-8")
|
chunk = chunk.decode("utf-8")
|
||||||
@@ -240,6 +266,7 @@ class ServiceOrchestrator(DAG):
|
|||||||
token_start = self.metrics.token_update(token_start, is_first)
|
token_start = self.metrics.token_update(token_start, is_first)
|
||||||
yield chunk
|
yield chunk
|
||||||
is_first = False
|
is_first = False
|
||||||
|
|
||||||
self.metrics.request_update(req_start)
|
self.metrics.request_update(req_start)
|
||||||
self.metrics.pending_update(False)
|
self.metrics.pending_update(False)
|
||||||
|
|
||||||
@@ -256,19 +283,18 @@ class ServiceOrchestrator(DAG):
|
|||||||
input_data = {k: v for k, v in input_data.items() if v is not None}
|
input_data = {k: v for k, v in input_data.items() if v is not None}
|
||||||
else:
|
else:
|
||||||
input_data = inputs
|
input_data = inputs
|
||||||
async with session.post(endpoint, json=input_data) as response:
|
with tracer.start_as_current_span(f"{cur_node}_generate"):
|
||||||
if response.content_type == "audio/wav":
|
response = await session.post(endpoint, json=input_data)
|
||||||
audio_data = await response.read()
|
if response.content_type == "audio/wav":
|
||||||
data = self.align_outputs(
|
audio_data = await response.read()
|
||||||
audio_data, cur_node, inputs, runtime_graph, llm_parameters_dict, **kwargs
|
data = self.align_outputs(audio_data, cur_node, inputs, runtime_graph, llm_parameters_dict, **kwargs)
|
||||||
)
|
else:
|
||||||
else:
|
# Parse as JSON
|
||||||
# Parse as JSON
|
data = await response.json()
|
||||||
data = await response.json()
|
# post process
|
||||||
# post process
|
data = self.align_outputs(data, cur_node, inputs, runtime_graph, llm_parameters_dict, **kwargs)
|
||||||
data = self.align_outputs(data, cur_node, inputs, runtime_graph, llm_parameters_dict, **kwargs)
|
|
||||||
|
|
||||||
return data, cur_node
|
return data, cur_node
|
||||||
|
|
||||||
def align_inputs(self, inputs, *args, **kwargs):
|
def align_inputs(self, inputs, *args, **kwargs):
|
||||||
"""Override this method in megaservice definition."""
|
"""Override this method in megaservice definition."""
|
||||||
|
|||||||
@@ -6,12 +6,29 @@ import os
|
|||||||
from functools import wraps
|
from functools import wraps
|
||||||
|
|
||||||
from opentelemetry import trace
|
from opentelemetry import trace
|
||||||
|
from opentelemetry.context.contextvars_context import ContextVarsRuntimeContext
|
||||||
from opentelemetry.exporter.otlp.proto.http.trace_exporter import OTLPSpanExporter as HTTPSpanExporter
|
from opentelemetry.exporter.otlp.proto.http.trace_exporter import OTLPSpanExporter as HTTPSpanExporter
|
||||||
from opentelemetry.sdk.resources import SERVICE_NAME, Resource
|
from opentelemetry.sdk.resources import SERVICE_NAME, Resource
|
||||||
from opentelemetry.sdk.trace import TracerProvider
|
from opentelemetry.sdk.trace import TracerProvider
|
||||||
from opentelemetry.sdk.trace.export import BatchSpanProcessor
|
from opentelemetry.sdk.trace.export import BatchSpanProcessor
|
||||||
from opentelemetry.sdk.trace.export.in_memory_span_exporter import InMemorySpanExporter
|
from opentelemetry.sdk.trace.export.in_memory_span_exporter import InMemorySpanExporter
|
||||||
|
|
||||||
|
|
||||||
|
def detach_ignore_err(self, token: object) -> None:
|
||||||
|
"""Resets Context to a previous value.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
token: A reference to a previous Context.
|
||||||
|
"""
|
||||||
|
try:
|
||||||
|
self._current_context.reset(token) # type: ignore
|
||||||
|
except Exception as e:
|
||||||
|
pass
|
||||||
|
|
||||||
|
|
||||||
|
# bypass the ValueError that ContextVar context was created in a different Context from StreamingResponse
|
||||||
|
ContextVarsRuntimeContext.detach = detach_ignore_err
|
||||||
|
|
||||||
telemetry_endpoint = os.environ.get("TELEMETRY_ENDPOINT", "http://localhost:4318/v1/traces")
|
telemetry_endpoint = os.environ.get("TELEMETRY_ENDPOINT", "http://localhost:4318/v1/traces")
|
||||||
|
|
||||||
resource = Resource.create({SERVICE_NAME: "opea"})
|
resource = Resource.create({SERVICE_NAME: "opea"})
|
||||||
@@ -26,7 +43,6 @@ tracer = trace.get_tracer(__name__)
|
|||||||
|
|
||||||
|
|
||||||
def opea_telemetry(func):
|
def opea_telemetry(func):
|
||||||
print(f"[*** telemetry ***] {func.__name__} under telemetry.")
|
|
||||||
if inspect.iscoroutinefunction(func):
|
if inspect.iscoroutinefunction(func):
|
||||||
|
|
||||||
@wraps(func)
|
@wraps(func)
|
||||||
|
|||||||
@@ -17,6 +17,7 @@ from comps import (
|
|||||||
statistics_dict,
|
statistics_dict,
|
||||||
)
|
)
|
||||||
from comps.cores.proto.api_protocol import EmbeddingRequest, EmbeddingResponse
|
from comps.cores.proto.api_protocol import EmbeddingRequest, EmbeddingResponse
|
||||||
|
from comps.cores.telemetry.opea_telemetry import opea_telemetry
|
||||||
|
|
||||||
logger = CustomLogger("opea_embedding_microservice")
|
logger = CustomLogger("opea_embedding_microservice")
|
||||||
logflag = os.getenv("LOGFLAG", False)
|
logflag = os.getenv("LOGFLAG", False)
|
||||||
@@ -36,6 +37,7 @@ loader = OpeaComponentLoader(
|
|||||||
host="0.0.0.0",
|
host="0.0.0.0",
|
||||||
port=6000,
|
port=6000,
|
||||||
)
|
)
|
||||||
|
@opea_telemetry
|
||||||
@register_statistics(names=["opea_service@embedding"])
|
@register_statistics(names=["opea_service@embedding"])
|
||||||
async def embedding(input: EmbeddingRequest) -> EmbeddingResponse:
|
async def embedding(input: EmbeddingRequest) -> EmbeddingResponse:
|
||||||
start = time.time()
|
start = time.time()
|
||||||
|
|||||||
@@ -17,6 +17,7 @@ from comps import (
|
|||||||
statistics_dict,
|
statistics_dict,
|
||||||
)
|
)
|
||||||
from comps.cores.proto.api_protocol import ChatCompletionRequest
|
from comps.cores.proto.api_protocol import ChatCompletionRequest
|
||||||
|
from comps.cores.telemetry.opea_telemetry import opea_telemetry
|
||||||
|
|
||||||
logger = CustomLogger("llm")
|
logger = CustomLogger("llm")
|
||||||
logflag = os.getenv("LOGFLAG", False)
|
logflag = os.getenv("LOGFLAG", False)
|
||||||
@@ -42,6 +43,7 @@ loader = OpeaComponentLoader(llm_component_name, description=f"OPEA LLM Componen
|
|||||||
host="0.0.0.0",
|
host="0.0.0.0",
|
||||||
port=9000,
|
port=9000,
|
||||||
)
|
)
|
||||||
|
@opea_telemetry
|
||||||
@register_statistics(names=["opea_service@llm"])
|
@register_statistics(names=["opea_service@llm"])
|
||||||
async def llm_generate(input: Union[LLMParamsDoc, ChatCompletionRequest, SearchedDoc]):
|
async def llm_generate(input: Union[LLMParamsDoc, ChatCompletionRequest, SearchedDoc]):
|
||||||
start = time.time()
|
start = time.time()
|
||||||
|
|||||||
@@ -19,6 +19,7 @@ from comps import (
|
|||||||
)
|
)
|
||||||
from comps.cores.proto.api_protocol import ChatCompletionRequest, RerankingRequest, RerankingResponse
|
from comps.cores.proto.api_protocol import ChatCompletionRequest, RerankingRequest, RerankingResponse
|
||||||
from comps.cores.proto.docarray import LLMParamsDoc, LVMVideoDoc, RerankedDoc, SearchedDoc, SearchedMultimodalDoc
|
from comps.cores.proto.docarray import LLMParamsDoc, LVMVideoDoc, RerankedDoc, SearchedDoc, SearchedMultimodalDoc
|
||||||
|
from comps.cores.telemetry.opea_telemetry import opea_telemetry
|
||||||
|
|
||||||
logger = CustomLogger("opea_reranking_microservice")
|
logger = CustomLogger("opea_reranking_microservice")
|
||||||
logflag = os.getenv("LOGFLAG", False)
|
logflag = os.getenv("LOGFLAG", False)
|
||||||
@@ -35,6 +36,7 @@ loader = OpeaComponentLoader(rerank_component_name, description=f"OPEA RERANK Co
|
|||||||
host="0.0.0.0",
|
host="0.0.0.0",
|
||||||
port=8000,
|
port=8000,
|
||||||
)
|
)
|
||||||
|
@opea_telemetry
|
||||||
@register_statistics(names=["opea_service@reranking"])
|
@register_statistics(names=["opea_service@reranking"])
|
||||||
async def reranking(
|
async def reranking(
|
||||||
input: Union[SearchedMultimodalDoc, SearchedDoc, RerankingRequest, ChatCompletionRequest]
|
input: Union[SearchedMultimodalDoc, SearchedDoc, RerankingRequest, ChatCompletionRequest]
|
||||||
|
|||||||
@@ -37,6 +37,7 @@ from comps.cores.proto.api_protocol import (
|
|||||||
RetrievalResponse,
|
RetrievalResponse,
|
||||||
RetrievalResponseData,
|
RetrievalResponseData,
|
||||||
)
|
)
|
||||||
|
from comps.cores.telemetry.opea_telemetry import opea_telemetry
|
||||||
|
|
||||||
logger = CustomLogger("opea_retrievers_microservice")
|
logger = CustomLogger("opea_retrievers_microservice")
|
||||||
logflag = os.getenv("LOGFLAG", False)
|
logflag = os.getenv("LOGFLAG", False)
|
||||||
@@ -56,6 +57,7 @@ loader = OpeaComponentLoader(
|
|||||||
host="0.0.0.0",
|
host="0.0.0.0",
|
||||||
port=7000,
|
port=7000,
|
||||||
)
|
)
|
||||||
|
@opea_telemetry
|
||||||
@register_statistics(names=["opea_service@retrievers"])
|
@register_statistics(names=["opea_service@retrievers"])
|
||||||
async def ingest_files(
|
async def ingest_files(
|
||||||
input: Union[EmbedDoc, EmbedMultimodalDoc, RetrievalRequest, ChatCompletionRequest]
|
input: Union[EmbedDoc, EmbedMultimodalDoc, RetrievalRequest, ChatCompletionRequest]
|
||||||
|
|||||||
Reference in New Issue
Block a user