1. Anuncie Aqui ! Entre em contato fdantas@4each.com.br

[Python] How can i implement Transcribe audio from stream audio through django WebSocket by...

Discussão em 'Python' iniciado por Stack, Setembro 28, 2024 às 11:22.

  1. Stack

    Stack Membro Participativo

    I tried to implement this, but transcripts are not generating. implemented backend code below.

    class ReadingConsumer(AsyncWebsocketConsumer):
    def __init__(self,*args, **kwargs):
    super().__init__(*args, **kwargs)
    credentials = "my_google_credential"
    self.client = speech.SpeechClient(credentials=credentials)

    self.config = speech.RecognitionConfig(
    encoding=speech.RecognitionConfig.AudioEncoding.LINEAR16,
    sample_rate_hertz=16000,
    language_code="en-US",
    )
    self.streaming_config = speech.StreamingRecognitionConfig(
    config=self.config,
    interim_results=True
    )
    self.closed = False
    self.audio_chunks = asyncio.Queue()
    async def connect(self):
    await self.accept()
    #self.audio_chunks = queue.Queue()
    self.transcription_thread = None
    self.transcription_started = False
    #self.audio_queue = queue.Queue()
    print("Streaming task started")


    async def disconnect(self, close_code):
    print(f"WebSocket disconnected with code: {close_code}")
    self.closed = True
    await asyncio.sleep(5)
    if self.transcription_thread and self.transcription_thread.is_alive():
    self.audio_chunks.put(None) # Signal to stop transcription
    self.transcription_thread.join()

    async def receive(self, text_data=None, bytes_data=None):
    if bytes_data:
    print(f"Received bytes data size: {len(bytes_data)}")
    await self.audio_chunks.put(bytes_data)
    print(f'audio chunk initial queue: {self.audio_chunks.qsize()}')
    #self.streaming_task.send(bytes_data)
    if not self.transcription_started:
    print('Thread run')
    self.transcription_started = True
    self.transcription_thread = threading.Thread(target=self.stream_audio)
    self.transcription_thread.start()

    def sync_audio_stream_generator(self):
    loop = asyncio.new_event_loop()
    asyncio.set_event_loop(loop)
    while not self.closed:
    #chunk = loop.run_until_complete(self.fetch_audio_data())#self.audio_chunks.get()
    #if chunk is None:
    # return
    #data = [chunk]
    #while True:
    try:
    # Get the next chunk from the queue
    print(f"Synchronous generator yielding: {self.audio_chunks.qsize()}")
    chunk = loop.run_until_complete(self.fetch_audio_data())#self.audio_chunks.get(block=False)
    #chunk = self.audio_chunks.get()#block=False
    if chunk is None:
    print("No more audio chunks. Stopping the generator.")
    return
    print(f'inside generator data returned')
    yield chunk
    except queue.Empty:
    print("Queue is empty, break")
    break
    except Exception as e:
    print(f'exception sync_audio_stream_generator: {e}')
    break
    #yield b"".join(data)
    async def fetch_audio_data(self):
    """
    Asynchronous helper function to fetch data from the queue.
    This runs inside the sync generator using the event loop.
    """
    try:
    # Wait for audio data to be put in the queue, timeout after 1 second
    return await asyncio.wait_for(self.audio_chunks.get(), timeout=1)
    except asyncio.TimeoutError:
    # If no data, return None to keep the transcription stream going
    return None
    def stream_audio(self):
    print("Starting audio streaming to Google Speech-to-Text API")
    try:
    audio_generator = self.sync_audio_stream_generator()
    print(f"generator: {audio_generator}")
    #time.sleep(2)
    #await asyncio.sleep(1)
    requests = (speech.StreamingRecognizeRequest(audio_content=content) for content in audio_generator)
    print(f'requests: {requests}')
    #time.sleep(2)
    responses = self.client.streaming_recognize(self.streaming_config, requests)
    print(f'responses: {responses}')

    for response in responses:
    print(f'response results: {response}')
    if not response.results:
    continue
    result = response.results[0]
    if not result.alternatives:
    continue

    # Display the transcription of the top alternative.
    transcript = result.alternatives[0].transcript
    #await self.compare_and_send_word_by_word(transcript)
    print(f'transcript: {transcript}')
    asyncio.run(self.send_transcript(transcript))

    print('last line')
    except Exception as e:
    #traceback.print_exc()
    print(f"Error in stream_audio: {str(e)}")
    traceback.print_exc()
    async def send_transcript(self, transcript):
    # Send the transcript back to the WebSocket client asynchronously
    #await self.send(text_data=json.dumps({"transcription": "hello....", "is_final": True}))
    await self.send(text_data=transcript)


    frontend implementation code also given below. Frontend code captured 1 channel mono audio as requirment of google stt.

    document.getElementById('startBtn').addEventListener('click', () => {
    navigator.mediaDevices.getUserMedia({ audio: {
    deviceId: "default",
    channelCount: 1, // Mono audio
    sampleRate: 16000, // 16 kHz sampleRate :{ideal: 16000}
    sampleSize: 16,
    } }).then((stream) => {
    if (!MediaRecorder.isTypeSupported('audio/webm'))
    return alert('Browser not supported')

    mediaRecorder = new MediaRecorder(stream, {
    mimeType: 'audio/webm',
    })

    socket = new WebSocket(`ws://localhost:8000/ws/transcribe/${chapterId}/`);

    socket.onopen = () => {
    document.querySelector('#status').textContent = 'Connected'
    console.log({ event: 'onopen' })

    mediaRecorder.addEventListener('dataavailable', async (event) => {
    if (event.data.size > 0 && socket.readyState == 1) {
    socket.send(event.data)
    }
    })
    mediaRecorder.start(250)
    })})


    What is the actual issue of the above code, and please someone give me the actual implementation. I debug the code, and did not get any exception ar error. code is running perfectily and receiving audio chunk continously. I used threading because while executing the code websocket block for some times and it cause of google timeout error. streaming_recognize expects continous audio chunk and wait for maximum 4 seconds.

    Continue reading...

Compartilhe esta Página