Skip to content

Commit e9e7a51

Browse files
committed
fix: don't listen to the users input until agent didn't stop talking
1 parent 56a25a5 commit e9e7a51

2 files changed

Lines changed: 19 additions & 13 deletions

File tree

custom/composables/useAgentAudio.ts

Lines changed: 15 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -49,7 +49,7 @@ function restartStandByAudio() {
4949

5050
export const useAgentAudio = defineStore('agentAudio', () => {
5151
const agentStore = useAgentStore();
52-
const agentAudioMode = ref<'transcribing' | 'streaming' | 'fetchingAudio' | 'playingAgentResponse' | null>(null);
52+
const agentAudioMode = ref<'transcribing' | 'streaming' | 'fetchingAudio' | 'playingAgentResponse' | 'readyToRespond' >('readyToRespond');
5353
const isStreamingResponse = ref(false);
5454

5555
let currentAbortController: AbortController | null = null;
@@ -59,15 +59,21 @@ export const useAgentAudio = defineStore('agentAudio', () => {
5959
let currentStreamingAudio: StreamingAudioState | null = null;
6060
let bufferedAudioChunks: ArrayBuffer[] = [];
6161
let bufferedAudioMimeType = 'audio/mpeg';
62+
let wasAudioResponseReceived = false;
6263

6364
function stopGenerationAndAudio() {
64-
agentAudioMode.value = null;
65+
setAudioModeReadyToRespond();
6566
stopCurrentAudioPlayback();
6667
currentAbortController?.abort();
6768
}
6869

70+
function setAudioModeReadyToRespond() {
71+
agentAudioMode.value = 'readyToRespond';
72+
}
73+
6974
async function sendAudioToServerAndHandleResponse(blob: Blob) {
7075
currentAbortController = new AbortController();
76+
wasAudioResponseReceived = false;
7177
const formData = new FormData();
7278
formData.append('file', blob, 'user_prompt.wav');
7379
formData.append('sessionId', agentStore.activeSessionId);
@@ -101,7 +107,9 @@ export const useAgentAudio = defineStore('agentAudio', () => {
101107
}
102108
} finally {
103109
isStreamingResponse.value = false;
104-
agentAudioMode.value = null;
110+
if (!wasAudioResponseReceived) {
111+
setAudioModeReadyToRespond();
112+
}
105113
}
106114
}
107115

@@ -177,14 +185,15 @@ export const useAgentAudio = defineStore('agentAudio', () => {
177185
stopStandByAudio();
178186
agentStore.setCurrentChatStatus('ready');
179187
agentStore.addAgentMessage(event.data.response.text);
180-
agentAudioMode.value = 'playingAgentResponse';
181188
return;
182189
}
183190

184191
if (event.type === 'audio-start') {
192+
wasAudioResponseReceived = true;
185193
isStreamingResponse.value = false;
186194
agentAudioMode.value = 'fetchingAudio';
187195
initializeAudioStream(event.data.mimeType);
196+
agentAudioMode.value = 'playingAgentResponse';
188197
return;
189198
}
190199

@@ -348,12 +357,12 @@ export const useAgentAudio = defineStore('agentAudio', () => {
348357
detachStreamingAudio();
349358
destroyCurrentAudioElement();
350359
if (!dontResetMode) {
351-
agentAudioMode.value = null;
360+
setAudioModeReadyToRespond();
352361
}
353362
}
354363

355364
function handleAudioEnded() {
356-
agentAudioMode.value = null;
365+
setAudioModeReadyToRespond();
357366
stopCurrentAudioPlayback();
358367
}
359368

custom/speech_recognition_frontend/MicrophoneButon.vue

Lines changed: 4 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -63,7 +63,7 @@ onMounted(() => {
6363
});
6464
});
6565
66-
watch(agentAudioMode, (newVal) => {
66+
watch(agentAudioMode, async (newVal) => {
6767
if(newVal === 'streaming') {
6868
stopCurrentAudioPlayback(true);
6969
microphoneButtonMode.value = 'generating';
@@ -72,10 +72,11 @@ watch(agentAudioMode, (newVal) => {
7272
} else if (newVal === 'fetchingAudio') {
7373
//Generation is done, waiting for audio to be ready
7474
} else if (newVal === 'playingAgentResponse') {
75-
// Audio is playing
76-
} else {
75+
// Audio is playing'
76+
} else if (newVal === 'readyToRespond') {
7777
if(isAudioChatMode.value) {
7878
microphoneButtonMode.value = 'listen';
79+
await requestMicAndStartVAD(saidSomething, stopRecording, onAnySound);
7980
} else {
8081
microphoneButtonMode.value = 'off';
8182
}
@@ -142,12 +143,8 @@ async function sendRecordForTranscription() {
142143
showAudioWavesAnimation.value = false;
143144
const recordBlob = await getRecord();
144145
if (recordBlob) {
145-
console.log('Audio recorded, sending to server for transcription. Audio Blob size:', recordBlob.size, recordBlob.type);
146146
onStopRecording();
147147
await sendAudioToServerAndHandleResponse(recordBlob);
148-
if (agentStore.isAudioChatMode) {
149-
await requestMicAndStartVAD(saidSomething, stopRecording, onAnySound);
150-
}
151148
} else {
152149
console.error('No audio recorded');
153150
}

0 commit comments

Comments
 (0)