Co-authored-by: root <root@idc708073.jf.intel.com> Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com>
36 lines
1.2 KiB
Python
36 lines
1.2 KiB
Python
# Copyright (C) 2024 Intel Corporation
|
|
# SPDX-License-Identifier: Apache-2.0
|
|
|
|
import torch
|
|
from datasets import load_dataset
|
|
from evaluate import load
|
|
from transformers import WhisperForConditionalGeneration, WhisperProcessor
|
|
|
|
device = "cuda" if torch.cuda.is_available() else "cpu"
|
|
|
|
MODEL_NAME = "openai/whisper-large-v2"
|
|
|
|
librispeech_test_clean = load_dataset(
|
|
"andreagasparini/librispeech_test_only", "clean", split="test", trust_remote_code=True
|
|
)
|
|
processor = WhisperProcessor.from_pretrained(MODEL_NAME)
|
|
model = WhisperForConditionalGeneration.from_pretrained(MODEL_NAME).to(device)
|
|
|
|
|
|
def map_to_pred(batch):
|
|
audio = batch["audio"]
|
|
input_features = processor(audio["array"], sampling_rate=audio["sampling_rate"], return_tensors="pt").input_features
|
|
batch["reference"] = processor.tokenizer._normalize(batch["text"])
|
|
|
|
with torch.no_grad():
|
|
predicted_ids = model.generate(input_features.to(device))[0]
|
|
transcription = processor.decode(predicted_ids)
|
|
batch["prediction"] = processor.tokenizer._normalize(transcription)
|
|
return batch
|
|
|
|
|
|
result = librispeech_test_clean.map(map_to_pred)
|
|
|
|
wer = load("wer")
|
|
print(100 * wer.compute(references=result["reference"], predictions=result["prediction"]))
|