[Python] How can i implement Transcribe audio from stream audio through django WebSocket by...

Stack · Setembro 28, 2024 às 11:22

I tried to implement this, but transcripts are not generating. implemented backend code below.

class ReadingConsumer(AsyncWebsocketConsumer):
def __init__(self,*args, **kwargs):
super().__init__(*args, **kwargs)
credentials = "my_google_credential"
self.client = speech.SpeechClient(credentials=credentials)

self.config = speech.RecognitionConfig(
encoding=speech.RecognitionConfig.AudioEncoding.LINEAR16,
sample_rate_hertz=16000,
language_code="en-US",
)
self.streaming_config = speech.StreamingRecognitionConfig(
config=self.config,
interim_results=True
)
self.closed = False
self.audio_chunks = asyncio.Queue()
async def connect(self):
await self.accept()
#self.audio_chunks = queue.Queue()
self.transcription_thread = None
self.transcription_started = False
#self.audio_queue = queue.Queue()
print("Streaming task started")

async def disconnect(self, close_code):
print(f"WebSocket disconnected with code: {close_code}")
self.closed = True
await asyncio.sleep(5)
if self.transcription_thread and self.transcription_thread.is_alive():
self.audio_chunks.put(None) # Signal to stop transcription
self.transcription_thread.join()

async def receive(self, text_data=None, bytes_data=None):
if bytes_data:
print(f"Received bytes data size: {len(bytes_data)}")
await self.audio_chunks.put(bytes_data)
print(f'audio chunk initial queue: {self.audio_chunks.qsize()}')
#self.streaming_task.send(bytes_data)
if not self.transcription_started:
print('Thread run')
self.transcription_started = True
self.transcription_thread = threading.Thread(target=self.stream_audio)
self.transcription_thread.start()

def sync_audio_stream_generator(self):
loop = asyncio.new_event_loop()
asyncio.set_event_loop(loop)
while not self.closed:
#chunk = loop.run_until_complete(self.fetch_audio_data())#self.audio_chunks.get()
#if chunk is None:
# return
#data = [chunk]
#while True:
try:
# Get the next chunk from the queue
print(f"Synchronous generator yielding: {self.audio_chunks.qsize()}")
chunk = loop.run_until_complete(self.fetch_audio_data())#self.audio_chunks.get(block=False)
#chunk = self.audio_chunks.get()#block=False
if chunk is None:
print("No more audio chunks. Stopping the generator.")
return
print(f'inside generator data returned')
yield chunk
except queue.Empty:
print("Queue is empty, break")
break
except Exception as e:
print(f'exception sync_audio_stream_generator: {e}')
break
#yield b"".join(data)
async def fetch_audio_data(self):
"""
Asynchronous helper function to fetch data from the queue.
This runs inside the sync generator using the event loop.
"""
try:
# Wait for audio data to be put in the queue, timeout after 1 second
return await asyncio.wait_for(self.audio_chunks.get(), timeout=1)
except asyncio.TimeoutError:
# If no data, return None to keep the transcription stream going
return None
def stream_audio(self):
print("Starting audio streaming to Google Speech-to-Text API")
try:
audio_generator = self.sync_audio_stream_generator()
print(f"generator: {audio_generator}")
#time.sleep(2)
#await asyncio.sleep(1)
requests = (speech.StreamingRecognizeRequest(audio_content=content) for content in audio_generator)
print(f'requests: {requests}')
#time.sleep(2)
responses = self.client.streaming_recognize(self.streaming_config, requests)
print(f'responses: {responses}')

for response in responses:
print(f'response results: {response}')
if not response.results:
continue
result = response.results[0]
if not result.alternatives:
continue

# Display the transcription of the top alternative.
transcript = result.alternatives[0].transcript
#await self.compare_and_send_word_by_word(transcript)
print(f'transcript: {transcript}')
asyncio.run(self.send_transcript(transcript))

print('last line')
except Exception as e:
#traceback.print_exc()
print(f"Error in stream_audio: {str(e)}")
traceback.print_exc()
async def send_transcript(self, transcript):
# Send the transcript back to the WebSocket client asynchronously
#await self.send(text_data=json.dumps({"transcription": "hello....", "is_final": True}))
await self.send(text_data=transcript)

frontend implementation code also given below. Frontend code captured 1 channel mono audio as requirment of google stt.

document.getElementById('startBtn').addEventListener('click', () => {
navigator.mediaDevices.getUserMedia({ audio: {
deviceId: "default",
channelCount: 1, // Mono audio
sampleRate: 16000, // 16 kHz sampleRate :{ideal: 16000}
sampleSize: 16,
} }).then((stream) => {
if (!MediaRecorder.isTypeSupported('audio/webm'))
return alert('Browser not supported')

mediaRecorder = new MediaRecorder(stream, {
mimeType: 'audio/webm',
})

socket = new WebSocket(`ws://localhost:8000/ws/transcribe/${chapterId}/`);

socket.onopen = () => {
document.querySelector('#status').textContent = 'Connected'
console.log({ event: 'onopen' })

mediaRecorder.addEventListener('dataavailable', async (event) => {
if (event.data.size > 0 && socket.readyState == 1) {
socket.send(event.data)
}
})
mediaRecorder.start(250)
})})

What is the actual issue of the above code, and please someone give me the actual implementation. I debug the code, and did not get any exception ar error. code is running perfectily and receiving audio chunk continously. I used threading because while executing the code websocket block for some times and it cause of google timeout error. streaming_recognize expects continous audio chunk and wait for maximum 4 seconds.

Continue reading...

Logar ou Criar uma Conta

[Python] How can i implement Transcribe audio from stream audio through django WebSocket by...

Stack Membro Participativo

Compartilhe esta Página

Logar ou Criar uma Conta

[Python] How can i implement Transcribe audio from stream audio through django WebSocket by...

Stack Membro Participativo

Compartilhe esta Página

Pesquisas Úteis