1. Anuncie Aqui ! Entre em contato fdantas@4each.com.br

[Python] How to implement voice activity detection and stop Twilio voice bot on customer...

Discussão em 'Python' iniciado por Stack, Outubro 7, 2024.

  1. Stack

    Stack Membro Participativo

    I am building a voice bot using Twilio and Django. I have set up an outbound call with media streaming, which is working fine. However, I want to implement voice activity detection so that if the customer interrupts while the voice bot is speaking, the bot stops speaking immediately. I am looking for guidance on how to achieve this using Twilio's Media Streams feature

    I have set up media streaming for the Twilio outbound call using the verb in the TwiML response. This is my view:

    class StartOutboundCallingView(APIView):

    client = Client(sid, token)

    def post(self, request, format=None):
    from_ = request.data.get("from")
    to = request.data.get("to")

    if "from" not in request.data or "to" not in request.data:
    return Response(
    {"Required Field": "from and to are required fields"},
    status=status.HTTP_400_BAD_REQUEST,
    )

    resp = VoiceResponse()

    call = self.client.calls.create(
    twiml=generate_twiml(),
    to=to,
    from_=from_,
    )
    return Response({"call_id": call.sid}, status=status.HTTP_200_OK)


    this is how I am generating the twiml

    def generate_twiml():
    return f'''
    <Response>
    <Connect>
    <Stream url="{url}"/>
    </Connect>
    </Response>
    '''


    this is my websocket consumer:

    class TwilioWS(WebsocketConsumer):

    def __init__(self, *args, **kwargs):
    super().__init__(*args, **kwargs)

    self.groq_ai = None
    self.call_id = None
    self.stream_id = None
    self.transcriber = None
    self.tts = None
    self.thread_pool = concurrent.futures.ThreadPoolExecutor(max_workers=1)
    self.loop = asyncio.get_event_loop()


    def connect(self):
    self.transcriber: DeepgramSTT = DeepgramSTT(self)
    self.transcriber.start_transcription()
    asyncio.run(self.init_groq())
    return super().connect()

    async def init_groq(self) -> None:
    self.groq_ai: GroqAI = GroqAI(self)
    self.groq_ai.initialize()

    def disconnect(self) -> None:
    self.transcriber.disconnect()

    def receive(self, text_data: str) -> None:
    """
    Handle incoming WebSocket message.

    Args:
    text_data (str): Base64 encoded audio stream.

    Returns:
    None
    """
    if not text_data:
    self.transcriber.disconnect()
    return

    data: dict = json.loads(text_data)
    event: str = data.get('event')

    if event == "start":
    self.streamSid: str = data['start']['streamSid']
    self.call_id: str = data['start']['callSid']

    if event == "media":
    self.handle_transcription(
    base64.b64decode(data["media"]["payload"]),
    self.call_id,
    )

    if event == "stop":
    self.transcriber.disconnect()

    def handle_transcription(self, chunk: bytes, call_id: str) -> None:

    """Handle transcription of a chunk of audio.
    Args:
    chunk (bytes): The audio chunk to transcribe.
    call_id (str): The ID of the call.
    """

    self.thread_pool.submit(
    self.transcriber.transcribe, chunk, call_id
    )

    // return response audio to twilio
    def handle_transcribed_audio(self, audio_data):
    # start_time = time.time()
    encoded_audio = base64.b64encode(audio_data).decode('UTF-8')
    self.send(json.dumps({
    'streamSid': self.streamSid,
    'event': 'media',
    'media': {
    'payload': encoded_audio,
    },
    }))

    Continue reading...

Compartilhe esta Página