diff --git a/camera_hub/Cargo.lock b/camera_hub/Cargo.lock
index 7663255..dd8c92f 100644
--- a/camera_hub/Cargo.lock
+++ b/camera_hub/Cargo.lock
@@ -2801,6 +2801,24 @@ version = "0.2.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "7c87def4c32ab89d880effc9e097653c8da5d6ef28e6b539d313baaacfbafcbe"
 
+[[package]]
+name = "opusic-c"
+version = "1.6.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "89f8e9c909466f15e60277212cc4fec082c68a5e1c9f6e373eee716fec2fed47"
+dependencies = [
+ "opusic-sys",
+]
+
+[[package]]
+name = "opusic-sys"
+version = "0.7.3"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "2804e694ef0de3b4cbb254de565053b7cb48d3398df7fd60c6c62bed40c5372a"
+dependencies = [
+ "cmake",
+]
+
 [[package]]
 name = "order-stat"
 version = "0.1.3"
@@ -3714,6 +3732,7 @@ dependencies = [
  "log",
  "ndarray 0.17.2",
  "openmls",
+ "opusic-c",
  "rand 0.9.4",
  "reqwest",
  "retina",
diff --git a/camera_hub/Cargo.toml b/camera_hub/Cargo.toml
index fe625e2..4c8f5b3 100644
--- a/camera_hub/Cargo.toml
+++ b/camera_hub/Cargo.toml
@@ -8,7 +8,7 @@ authors = ["Ardalan Amiri Sani <arrdalan@gmail.com>"]
 default = ["logging"]
 logging = ["log"]
 ip = ["dep:rpassword", "dep:reqwest", "dep:http-auth", "dep:linfa", "dep:linfa-clustering", "dep:retina", "dep:serde_yaml2", "dep:ndarray", "dep:futures", "secluso-client-lib/camera_secret_qrcode"]
-raspberry = ["dep:secluso-motion-ai"]
+raspberry = ["dep:secluso-motion-ai", "dep:opusic-c"]
 manual = []
 telemetry = [] # todo: dep on the motion_ai crate
 test = []
@@ -46,3 +46,4 @@ linfa-clustering = { version = "0.8.1", optional = true }
 
 # Raspberry Specific Dependencies
 secluso-motion-ai = { path = "../motion_ai/pipeline", optional = true, default-features = false }
+opusic-c = { version = "1.6.1", optional = true }
diff --git a/camera_hub/src/mp4.rs b/camera_hub/src/mp4.rs
index 70c8335..2ebd401 100644
--- a/camera_hub/src/mp4.rs
+++ b/camera_hub/src/mp4.rs
@@ -70,13 +70,14 @@ pub struct TrakTrackerCore {
     /// is calculated using the PTS of the following sample.
     pub durations: Vec<(u32, u32)>,
     pub last_pts: Option<u64>,
+    pub last_duration: Option<u32>,
     pub tot_duration: u64,
 }
 
 impl TrakTrackerCore {
     pub fn finish(&mut self) {
         if self.last_pts.is_some() {
-            self.durations.push((1, 0));
+            self.durations.push((1, self.last_duration.unwrap_or(0)));
         }
     }
 
@@ -172,6 +173,7 @@ impl TrakTracker {
             };
             self.core.tot_duration += duration;
             let duration = u32::try_from(duration)?;
+            self.core.last_duration = Some(duration);
             match self.core.durations.last_mut() {
                 Some((s, d)) if *d == duration => *s += 1,
                 _ => self.core.durations.push((1, duration)),
@@ -350,26 +352,24 @@ impl<W: AsyncWrite + Unpin, V: CodecParameters, A: CodecParameters> Mp4WriterCor
                             self.audio_params.write_codec_box(buf)?;
                         });
                         audio_trak_core.write_common_stbl_parts(buf)?;
-
-                        // AAC requires two samples (really, each is a set of 960 or 1024 samples)
-                        // to decode accurately. See
-                        // https://developer.apple.com/library/archive/documentation/QuickTime/QTFF/QTFFAppenG/QTFFAppenG.html .
-                        write_box!(buf, b"sgpd", {
-                            // BMFF section 8.9.3: SampleGroupDescriptionBox
-                            buf.put_u32(0); // version
-                            buf.extend_from_slice(b"roll"); // grouping type
-                            buf.put_u32(1); // entry_count
-                                            // BMFF section 10.1: AudioRollRecoveryEntry
-                            buf.put_i16(-1); // roll_distance
-                        });
-                        write_box!(buf, b"sbgp", {
-                            // BMFF section 8.9.2: SampleToGroupBox
-                            buf.put_u32(0); // version
-                            buf.extend_from_slice(b"roll"); // grouping type
-                            buf.put_u32(1); // entry_count
-                            buf.put_u32(audio_trak_core.samples);
-                            buf.put_u32(1); // group_description_index
-                        });
+                        if let Some(roll_distance) = self.audio_params.audio_roll_distance() {
+                            write_box!(buf, b"sgpd", {
+                                // BMFF section 8.9.3: SampleGroupDescriptionBox
+                                buf.put_u32(0); // version
+                                buf.extend_from_slice(b"roll"); // grouping type
+                                buf.put_u32(1); // entry_count
+                                                // BMFF section 10.1: AudioRollRecoveryEntry
+                                buf.put_i16(roll_distance);
+                            });
+                            write_box!(buf, b"sbgp", {
+                                // BMFF section 8.9.2: SampleToGroupBox
+                                buf.put_u32(0); // version
+                                buf.extend_from_slice(b"roll"); // grouping type
+                                buf.put_u32(1); // entry_count
+                                buf.put_u32(audio_trak_core.samples);
+                                buf.put_u32(1); // group_description_index
+                            });
+                        }
                     });
                 });
             });
@@ -400,6 +400,7 @@ impl<W: AsyncWrite + AsyncSeek + Send + Unpin, V: CodecParameters, A: CodecParam
                 b'i', b's', b'o', b'm', // major_brand
                 0, 0, 0, 0, // minor_version
                 b'i', b's', b'o', b'm', // compatible_brands[0]
+                b'i', b's', b'o', b'2', // compatible_brands[1]
             ]);
         });
         buf.extend_from_slice(&b"\0\0\0\0mdat"[..]);
@@ -416,6 +417,14 @@ impl<W: AsyncWrite + AsyncSeek + Send + Unpin, V: CodecParameters, A: CodecParam
     pub async fn finish(mut self) -> Result<(), Error> {
         self.video_trak.core.finish();
         self.audio_trak.core.finish();
+        let movie_timescale = 90_000u64;
+        let movie_duration = self.video_trak.core.tot_duration.max(
+            self.audio_trak
+                .core
+                .tot_duration
+                .saturating_mul(movie_timescale)
+                / u64::from(self.core.audio_params.get_clock_rate().max(1)),
+        );
         let mut buf = BytesMut::with_capacity(
             1024 + self.video_trak.core.size_estimate()
                 + self.audio_trak.core.size_estimate()
@@ -426,8 +435,8 @@ impl<W: AsyncWrite + AsyncSeek + Send + Unpin, V: CodecParameters, A: CodecParam
                 buf.put_u32(1 << 24); // version
                 buf.put_u64(0); // creation_time
                 buf.put_u64(0); // modification_time
-                buf.put_u32(90000); // timescale
-                buf.put_u64(self.video_trak.core.tot_duration);
+                buf.put_u32(movie_timescale as u32); // timescale
+                buf.put_u64(movie_duration);
                 buf.put_u32(0x00010000); // rate
                 buf.put_u16(0x0100); // volume
                 buf.put_u16(0); // reserved
@@ -438,7 +447,7 @@ impl<W: AsyncWrite + AsyncSeek + Send + Unpin, V: CodecParameters, A: CodecParam
                 for _ in 0..6 {
                     buf.put_u32(0); // pre_defined
                 }
-                buf.put_u32(2); // next_track_id
+                buf.put_u32(3); // next_track_id
             });
             if self.video_trak.core.samples > 0 {
                 self.core
diff --git a/camera_hub/src/raspberry_pi/rpi_camera.rs b/camera_hub/src/raspberry_pi/rpi_camera.rs
index 0b8d28b..e148521 100644
--- a/camera_hub/src/raspberry_pi/rpi_camera.rs
+++ b/camera_hub/src/raspberry_pi/rpi_camera.rs
@@ -36,6 +36,16 @@ use tokio::runtime::Runtime;
 const TOTAL_FRAME_RATE: usize = 10;
 const I_FRAME_INTERVAL: usize = TOTAL_FRAME_RATE; // 1-second fragments
 
+// We currently standardize Raspberry Pi audio on mono 48 kHz Opus.
+// 20 ms packets are the common low-latency Opus framing and map to 960 PCM samples at 48 kHz.
+pub const AUDIO_SAMPLE_RATE: u32 = 48_000;
+pub const AUDIO_CHANNELS: u16 = 1;
+pub const OPUS_FRAME_SAMPLES: u32 = 960; // 20 ms at 48 kHz
+pub const OPUS_BITRATE_BPS: u32 = 32_000;
+pub const OPUS_COMPLEXITY: u8 = 3;
+// 80 ms is the recommended preroll for random access and is = to 3840 samples at 48 kHz.
+pub const OPUS_PREROLL_SAMPLES: u16 = 3_840; // 80 ms at 48 kHz
+
 //These are for our local SPS/PPS channel
 #[derive(PartialEq, Debug, Clone)]
 pub enum FrameKind {
@@ -141,7 +151,8 @@ impl RaspberryPiCamera {
             debug!("Exited controller tick loop");
         });
 
-        let resolution: CameraResolution = Self::fetch_resolution().expect("A supported camera module was not found");
+        let resolution: CameraResolution =
+            Self::fetch_resolution().expect("A supported camera module was not found");
 
         // Start the new shared stream.
         rpi_dual_stream::start(
@@ -154,7 +165,7 @@ impl RaspberryPiCamera {
             ps_tx,
             motion_fps as u8,
         )
-            .expect("Failed to start shared stream");
+        .expect("Failed to start shared stream");
 
         rpi_dual_stream::start_audio(Arc::clone(&frame_queue))
             .expect("Failed to start audio stream");
@@ -199,6 +210,7 @@ impl RaspberryPiCamera {
         mp4: &mut M,
         duration: Option<u64>,
         frame_queue: Arc<Mutex<VecDeque<Frame>>>,
+        include_audio: bool,
     ) -> Result<(), Error> {
         let recording_window = duration.map(|secs| Duration::new(secs, 0));
         let recording_start_time = Instant::now();
@@ -236,9 +248,13 @@ impl RaspberryPiCamera {
 
             if started {
                 if frame.kind == FrameKind::Audio {
+                    // TODO: Livestream fMP4 audio is disabled for now.
+                    if !include_audio {
+                        continue;
+                    }
                     let ts_audio = audio_sample_count; // 48k timescale
                     mp4.audio(&frame.data, ts_audio).await?;
-                    audio_sample_count += 1024; // AAC-LC fixed
+                    audio_sample_count += u64::from(OPUS_FRAME_SAMPLES);
                     continue;
                 } else {
                     let ts = video_frame_count * ticks_per_video_frame;
@@ -315,10 +331,10 @@ impl RaspberryPiCamera {
             RpiCameraAudioParameters::new(),
             file,
         )
-            .await?;
+        .await?;
 
         // Process the rest of the frames, writing both to the MP4 writer and to the raw file.
-        Self::copy(&mut mp4, Some(duration), frame_queue).await?;
+        Self::copy(&mut mp4, Some(duration), frame_queue, true).await?;
         mp4.finish().await?;
 
         Ok(())
@@ -435,10 +451,10 @@ impl RaspberryPiCamera {
             RpiCameraAudioParameters::new(),
             livestream_writer,
         )
-            .await?;
+        .await?;
         fmp4.finish_header(None).await?;
 
-        Self::copy(&mut fmp4, None, frame_queue).await?;
+        Self::copy(&mut fmp4, None, frame_queue, false).await?;
 
         Ok(())
     }
@@ -556,7 +572,7 @@ impl RaspberryPiCamera {
             })
         } else {
             None
-        }
+        };
     }
 }
 
@@ -608,7 +624,7 @@ impl Camera for RaspberryPiCamera {
             Arc::clone(&self.frame_queue),
             self.sps_frame.clone(),
             self.pps_frame.clone(),
-            self.resolution.clone()
+            self.resolution.clone(),
         );
 
         rt.block_on(future).unwrap();
@@ -634,7 +650,7 @@ impl Camera for RaspberryPiCamera {
                 frame_queue_clone,
                 sps_frame_clone,
                 pps_frame_clone,
-                resolution_clone
+                resolution_clone,
             );
             if let Err(e) = rt.block_on(future) {
                 eprintln!("[Livestream] write_fmp4 error: {e:?}");
@@ -669,7 +685,11 @@ struct RpiCameraVideoParameters {
 
 impl RpiCameraVideoParameters {
     pub fn new(sps: Vec<u8>, pps: Vec<u8>, dimensions: CameraResolution) -> Self {
-        Self { sps, pps, dimensions }
+        Self {
+            sps,
+            pps,
+            dimensions,
+        }
     }
 }
 
@@ -757,95 +777,50 @@ impl CodecParameters for RpiCameraVideoParameters {
     }
 
     fn get_dimensions(&self) -> (u32, u32) {
-        ((self.dimensions.width as u32) << 16, (self.dimensions.height as u32) << 16)
+        (
+            (self.dimensions.width as u32) << 16,
+            (self.dimensions.height as u32) << 16,
+        )
     }
 }
 
 struct RpiCameraAudioParameters {
-    sample_rate: u32, // 48000
-    channels: u16,    // 1
-    asc: [u8; 2],     // AudioSpecificConfig
+    sample_rate: u32,
+    channels: u16,
 }
 
 impl RpiCameraAudioParameters {
     fn new() -> Self {
         Self {
-            sample_rate: 48_000,
-            channels: 1,
-            asc: [0x11, 0x88], // AAC-LC, 48kHz (idx=3), mono (1)
+            sample_rate: AUDIO_SAMPLE_RATE,
+            channels: AUDIO_CHANNELS,
         }
     }
 }
 
 impl CodecParameters for RpiCameraAudioParameters {
     fn write_codec_box(&self, buf: &mut BytesMut) -> Result<(), Error> {
-        write_box!(buf, b"mp4a", {
-            // 6 reserved bytes
-            buf.put_u8(0);
-            buf.put_u8(0);
-            buf.put_u8(0);
-            buf.put_u8(0);
-            buf.put_u8(0);
-            buf.put_u8(0);
-
-            // data_reference_index
+        write_box!(buf, b"Opus", {
+            // AudioSampleEntry layout mirrors mp4a
+            // however, Opus-specific decoder metadata goes in the dOps child box defined by the Opus-in-ISOBMFF mapping.
+            buf.extend_from_slice(&[0; 6]);
             buf.put_u16(1);
-
-            // AudioSampleEntry
-            buf.put_u16(0); // version
-            buf.put_u16(0); // revision
-            buf.put_u32(0); // vendor
-
-            buf.put_u16(self.channels); // channelcount
-            buf.put_u16(16); // samplesize
-            buf.put_u16(0); // compressionid
-            buf.put_u16(0); // packetsize
-            buf.put_u32(self.sample_rate << 16); // samplerate 16.16
-
-            // ES Descriptor box
-            write_box!(buf, b"esds", {
-                // FullBox: version(1) + flags(3)
-                buf.put_u32(0);
-
-                let asc = &self.asc;
-
-                // Build the descriptor payload in a temp vec, then write with size.
-                let mut d = Vec::new();
-
-                // ES_Descriptor
-                d.push(0x03);
-                let mut es_payload = Vec::new();
-                es_payload.extend_from_slice(&0x0002u16.to_be_bytes()); // ES_ID
-                es_payload.push(0x00); // flags
-
-                // DecoderConfigDescriptor (tag 0x04) ---
-                let mut dec_payload = Vec::new();
-                dec_payload.push(0x40); // objectTypeIndication = MPEG-4 Audio
-                dec_payload.push(0x15); // streamType=5 (AudioStream) <<2 | 1 reserved
-                dec_payload.extend_from_slice(&[0x00, 0x00, 0x00]); // bufferSizeDB (unknown)
-                dec_payload.extend_from_slice(&0u32.to_be_bytes()); // maxBitrate
-                dec_payload.extend_from_slice(&0u32.to_be_bytes()); // avgBitrate
-
-                // DecoderSpecificInfo (tag 0x05) inside DecoderConfigDescriptor
-                dec_payload.push(0x05);
-                dec_payload.push(asc.len() as u8);
-                dec_payload.extend_from_slice(asc);
-
-                // Write DecoderConfigDescriptor: tag + length + payload
-                es_payload.push(0x04);
-                es_payload.extend_from_slice(&write_desc_len(dec_payload.len()));
-                es_payload.extend_from_slice(&dec_payload);
-
-                // SLConfigDescriptor (tag 0x06) is a sibling of 0x04 inside ES_Descriptor
-                es_payload.push(0x06);
-                es_payload.push(0x01);
-                es_payload.push(0x02);
-
-                // Write length for ES_Descriptor
-                d.extend_from_slice(&write_desc_len(es_payload.len()));
-                d.extend_from_slice(&es_payload);
-
-                buf.extend_from_slice(&d);
+            buf.put_u16(0);
+            buf.put_u16(0);
+            buf.put_u32(0);
+            buf.put_u16(self.channels);
+            buf.put_u16(16);
+            buf.put_u16(0);
+            buf.put_u16(0);
+            buf.put_u32(self.sample_rate << 16);
+
+            write_box!(buf, b"dOps", {
+                buf.put_u8(0); // version
+                buf.put_u8(self.channels as u8);
+                buf.put_u16(OPUS_PREROLL_SAMPLES); // pre_skip / decoder preroll in PCM samples
+                buf.put_u32(self.sample_rate); // original input sample rate
+                buf.put_i16(0); // output gain
+                buf.put_u8(0); // channel mapping family
             });
         });
 
@@ -859,28 +834,8 @@ impl CodecParameters for RpiCameraAudioParameters {
     fn get_dimensions(&self) -> (u32, u32) {
         (0, 0)
     }
-}
 
-// Size encoding: 7-bit continuation
-fn write_desc_len(mut len: usize) -> Vec<u8> {
-    // Up to 4 bytes
-    let mut out = vec![0u8; 4];
-    for i in (0..4).rev() {
-        out[i] = (len & 0x7F) as u8;
-        len >>= 7;
-    }
-    // Set continuation bit on first 3
-    out[0] |= 0x80;
-    out[1] |= 0x80;
-    out[2] |= 0x80;
-
-    // Trim leading 0x80 chunks if possible
-    while out.len() > 1 && out[0] == 0x80 && (out[1] & 0x80) != 0 {
-        out.remove(0);
-    }
-    // If needed, also trim if the very first byte is 0
-    while out.len() > 1 && out[0] == 0 {
-        out.remove(0);
+    fn audio_roll_distance(&self) -> Option<i16> {
+        Some(-(OPUS_PREROLL_SAMPLES as i16))
     }
-    out
 }
diff --git a/camera_hub/src/raspberry_pi/rpi_dual_stream.rs b/camera_hub/src/raspberry_pi/rpi_dual_stream.rs
index 7b3b50b..fbfb062 100644
--- a/camera_hub/src/raspberry_pi/rpi_dual_stream.rs
+++ b/camera_hub/src/raspberry_pi/rpi_dual_stream.rs
@@ -7,22 +7,55 @@ use std::collections::VecDeque;
 use std::os::unix::net::UnixStream;
 use std::sync::{Arc, Mutex};
 use std::thread::sleep;
-use std::time::{SystemTime};
+use std::time::SystemTime;
 use std::{
     io::{BufReader, Read, Write},
-    process::{Command, Stdio},
+    process::{Child, ChildStdout, Command, Stdio},
     thread,
     time::Duration,
 };
-use bytes::Buf;
 
-use crate::raspberry_pi::rpi_camera::{Frame, FrameKind};
+use crate::raspberry_pi::rpi_camera::{
+    Frame, FrameKind, OPUS_BITRATE_BPS, OPUS_COMPLEXITY, OPUS_FRAME_SAMPLES,
+};
 use anyhow::anyhow;
 use bytes::BytesMut;
 use crossbeam_channel::Sender;
+use opusic_c::{Application, Bitrate, Channels, Encoder, Signal};
 use secluso_motion_ai::frame::RawFrame;
 use secluso_motion_ai::logic::pipeline::PipelineController;
 
+const AUDIO_PROBE_PACKETS: usize = 50;
+const AUDIO_SIGNAL_RMS_THRESHOLD: f64 = 8.0;
+const AUDIO_SIGNAL_PEAK_THRESHOLD: i16 = 32;
+
+#[derive(Clone, Copy, Debug, PartialEq, Eq)]
+enum AudioCaptureMode {
+    I2s32StereoLeft,
+    Mono16,
+}
+
+#[derive(Clone, Debug, PartialEq, Eq)]
+struct AudioCaptureCandidate {
+    device: String,
+    mode: AudioCaptureMode,
+    forced: bool,
+}
+
+#[derive(Clone, Copy, Debug)]
+enum I2sDecodeMode {
+    LeftShift16,
+    LeftShift8,
+    RightShift16,
+    RightShift8,
+}
+
+#[derive(Clone, Copy, Debug)]
+struct AudioProbeStats {
+    rms: f64,
+    peak: i16,
+}
+
 /// Provides two channels: one for raw YUV420 frames from rpicam‑vid (for motion detection), one for H.264 frames converted by rpicam-vid.
 #[allow(clippy::too_many_arguments)]
 pub fn start(
@@ -274,45 +307,209 @@ fn extract_h264_frame(buffer: &mut BytesMut) -> anyhow::Result<Option<Frame>> {
     Ok(Some(Frame::new(nal_unit.to_vec(), kind)))
 }
 
-fn adts_frame_len(header: &[u8]) -> Option<usize> {
-    if header.len() < 7 { return None; }
-    // syncword 0xFFF
-    if header[0] != 0xFF || (header[1] & 0xF0) != 0xF0 { return None; }
-    let protection_absent = header[1] & 0x01;
-    let hdr_len = if protection_absent == 1 { 7 } else { 9 };
+pub fn start_audio(
+    frame_queue: Arc<Mutex<VecDeque<Frame>>>,
+) -> Result<(), Box<dyn std::error::Error>> {
+    // Spawn a thread to continuously
+    // (1) look for a usable ALSA capture device &
+    // (2) encode one 20 ms Opus packet at a time from whichever source is available.
+    thread::spawn(move || loop {
+        let candidates = audio_device_candidates();
+        let mut used_device = false;
+
+        for candidate in candidates {
+            match spawn_arecord(&candidate) {
+                Ok((mut child, stdout)) => {
+                    eprintln!(
+                        "Using audio input device: {} ({:?}, forced={})",
+                        candidate.device, candidate.mode, candidate.forced
+                    );
+                    used_device = encode_audio_stream(stdout, &candidate, Arc::clone(&frame_queue));
+                    let _ = child.wait();
+
+                    if used_device {
+                        eprintln!(
+                            "Audio input device ended: {}. Retrying discovery.",
+                            candidate.device
+                        );
+                        break;
+                    }
+
+                    eprintln!(
+                        "Audio input device produced no audio: {}. Trying next candidate.",
+                        candidate.device
+                    );
+                }
+                Err(err) => {
+                    eprintln!(
+                        "Failed to start audio input device {} ({:?}): {:?}",
+                        candidate.device, candidate.mode, err
+                    );
+                }
+            }
+        }
 
-    let frame_length = (((header[3] & 0x03) as usize) << 11)
-        | ((header[4] as usize) << 3)
-        | (((header[5] & 0xE0) as usize) >> 5);
+        if !used_device {
+            eprintln!("No usable audio input device found. Retrying in 5 seconds.");
+            thread::sleep(Duration::from_secs(5));
+        } else {
+            thread::sleep(Duration::from_secs(1));
+        }
+    });
 
-    if frame_length < hdr_len { return None; }
-    Some(frame_length)
+    Ok(())
 }
 
-fn strip_adts(frame: &[u8]) -> Option<&[u8]> {
-    if frame.len() < 7 { return None; }
-    if frame[0] != 0xFF || (frame[1] & 0xF0) != 0xF0 { return None; }
-    let protection_absent = frame[1] & 0x01;
-    let hdr_len = if protection_absent == 1 { 7 } else { 9 };
-    if frame.len() < hdr_len { return None; }
-    Some(&frame[hdr_len..])
+fn audio_device_candidates() -> Vec<AudioCaptureCandidate> {
+    let mut devices = Vec::new();
+
+    if let Ok(device) = std::env::var("SECLUSO_AUDIO_DEVICE") {
+        let device = device.trim();
+        if !device.is_empty() {
+            eprintln!(
+                "SECLUSO_AUDIO_DEVICE is set. Forcing audio capture attempts on {}.",
+                device
+            );
+            devices.push(AudioCaptureCandidate {
+                device: device.to_string(),
+                mode: AudioCaptureMode::I2s32StereoLeft,
+                forced: true,
+            });
+            devices.push(AudioCaptureCandidate {
+                device: device.to_string(),
+                mode: AudioCaptureMode::Mono16,
+                forced: true,
+            });
+            eprintln!(
+                "Audio candidate list: {} ({:?}, forced), {} ({:?}, forced).",
+                device,
+                AudioCaptureMode::I2s32StereoLeft,
+                device,
+                AudioCaptureMode::Mono16
+            );
+            return devices;
+        }
+    }
+
+    devices.push(AudioCaptureCandidate {
+        device: "plughw:ICS43432Mic,0".to_string(),
+        mode: AudioCaptureMode::I2s32StereoLeft,
+        forced: false,
+    });
+    // We prioritize the HAT capture device first because that is our default config
+    eprintln!(
+        "Audio candidate list starts with HAT device {} ({:?}).",
+        devices[0].device, devices[0].mode
+    );
+
+    for device in detect_usb_capture_devices() {
+        let candidate = AudioCaptureCandidate {
+            device,
+            mode: AudioCaptureMode::Mono16,
+            forced: false,
+        };
+        if !devices.iter().any(|existing| existing == &candidate) {
+            eprintln!(
+                "Discovered USB audio candidate {} ({:?}).",
+                candidate.device, candidate.mode
+            );
+            devices.push(candidate);
+        }
+    }
+
+    eprintln!("Final audio candidate count: {}.", devices.len());
+    devices
 }
 
-pub fn start_audio(
-    frame_queue: Arc<Mutex<VecDeque<Frame>>>,
-) -> Result<(), Box<dyn std::error::Error>> {
+fn detect_usb_capture_devices() -> Vec<String> {
+    let output = match Command::new("arecord")
+        .arg("-l")
+        .stdout(Stdio::piped())
+        .stderr(Stdio::null())
+        .output()
+    {
+        Ok(output) => output,
+        Err(err) => {
+            eprintln!("Failed to enumerate ALSA capture devices: {:?}", err);
+            return Vec::new();
+        }
+    };
 
-    let cmd = "\
-        arecord -D plughw:0,0 -f S16_LE -r 48000 -c 1 -t raw | \
-        sox -t raw -b 16 -e signed-integer -r 48000 -c 1 - \
-            -t raw -b 16 -e signed-integer -r 48000 -c 1 - \
-            highpass 100 lowpass 7000 gain 20 | \
-        fdkaac --raw --raw-channels 1 --raw-rate 48000 \
-                --bitrate 96k --transport-format 2 -o - -";
+    let listing = String::from_utf8_lossy(&output.stdout);
+    let mut devices = Vec::new();
 
-    let mut child = Command::new("sh")
-        .arg("-c")
-        .arg(cmd)
+    for line in listing.lines() {
+        let lower = line.to_ascii_lowercase();
+        // Catch the class-compliant USB microphone devices that have been tested (and not unrelated ALSA hardware)
+        if !lower.contains("usb") && !lower.contains("c-media") {
+            continue;
+        }
+
+        if let Some((card, device)) = parse_arecord_card_and_device(line) {
+            eprintln!("Matched USB ALSA capture line: {}", line.trim());
+            devices.push(format!("plughw:{},{}", card, device));
+        }
+    }
+
+    if devices.is_empty() {
+        eprintln!("No USB ALSA capture devices matched arecord -l output.");
+    }
+
+    devices
+}
+
+fn parse_arecord_card_and_device(line: &str) -> Option<(u32, u32)> {
+    let card_start = line.find("card ")? + "card ".len();
+    let card_end = line[card_start..].find(':')? + card_start;
+    let device_marker = ", device ";
+    let device_start = line.find(device_marker)? + device_marker.len();
+    let device_end = line[device_start..].find(':')? + device_start;
+
+    let card = line[card_start..card_end].trim().parse().ok()?;
+    let device = line[device_start..device_end].trim().parse().ok()?;
+
+    Some((card, device))
+}
+
+fn spawn_arecord(
+    candidate: &AudioCaptureCandidate,
+) -> Result<(Child, ChildStdout), Box<dyn std::error::Error>> {
+    let args: &[&str] = match candidate.mode {
+        AudioCaptureMode::I2s32StereoLeft => &[
+            "-D",
+            &candidate.device,
+            "-f",
+            "S32_LE",
+            "-r",
+            "48000",
+            "-c",
+            "2",
+            "-t",
+            "raw",
+        ],
+        AudioCaptureMode::Mono16 => &[
+            "-D",
+            &candidate.device,
+            "-f",
+            "S16_LE",
+            "-r",
+            "48000",
+            "-c",
+            "1",
+            "-t",
+            "raw",
+        ],
+    };
+
+    eprintln!(
+        "Launching arecord for {} with mode {:?}: arecord {}",
+        candidate.device,
+        candidate.mode,
+        args.join(" ")
+    );
+
+    let mut child = Command::new("arecord")
+        .args(args)
         .stdout(Stdio::piped())
         .stderr(Stdio::null())
         .spawn()?;
@@ -321,47 +518,300 @@ pub fn start_audio(
         .take()
         .ok_or_else(|| anyhow!("Failed to capture stdout"))?;
 
-    // Spawn a thread to read arecord|sox's stdout and extract audio frames.
-    {
-        thread::spawn(move || {
-            let mut r = BufReader::new(stdout);
-            let mut buf = BytesMut::with_capacity(64 * 1024);
-            let mut tmp = [0u8; 4096];
+    Ok((child, stdout))
+}
 
-            loop {
-                match r.read(&mut tmp) {
-                    Ok(0) => break,
-                    Ok(n) => {
-                        buf.extend_from_slice(&tmp[..n]);
-
-                        loop {
-                            if buf.len() < 7 { break; }
-                            let len = match adts_frame_len(&buf[..7]) {
-                                Some(l) => l,
-                                None => {
-                                    // resync: drop 1 byte
-                                    buf.advance(1);
-                                    continue;
-                                }
-                            };
-                            if buf.len() < len { break; }
-
-                            let adts = buf.split_to(len).to_vec();
-                            if let Some(aac_au) = strip_adts(&adts) {
-                                let frame = Frame {
-                                    data: aac_au.to_vec(),
-                                    kind: FrameKind::Audio,
-                                    timestamp: SystemTime::now(),
-                                };
-                                add_frame_and_drop_old(Arc::clone(&frame_queue), frame);
-                            }
-                        }
-                    }
-                    Err(_) => break,
+fn encode_audio_stream(
+    stdout: ChildStdout,
+    candidate: &AudioCaptureCandidate,
+    frame_queue: Arc<Mutex<VecDeque<Frame>>>,
+) -> bool {
+    let mut r = BufReader::new(stdout);
+    let mut encoder = match Encoder::new(
+        Channels::Mono,
+        opusic_c::SampleRate::Hz48000,
+        Application::Audio,
+    ) {
+        Ok(encoder) => encoder,
+        Err(err) => {
+            eprintln!("Failed to initialize Opus encoder: {:?}", err);
+            return false;
+        }
+    };
+    if let Err(err) = encoder.set_bitrate(Bitrate::Value(OPUS_BITRATE_BPS)) {
+        eprintln!("Failed to set Opus bitrate: {:?}", err);
+        return false;
+    }
+    if let Err(err) = encoder.set_vbr(true) {
+        eprintln!("Failed to enable Opus VBR: {:?}", err);
+        return false;
+    }
+    if let Err(err) = encoder.set_complexity(OPUS_COMPLEXITY) {
+        eprintln!("Failed to set Opus complexity: {:?}", err);
+        return false;
+    }
+    if let Err(err) = encoder.set_signal(Signal::Voice) {
+        eprintln!("Failed to set Opus signal mode: {:?}", err);
+        return false;
+    }
+
+    let bytes_per_frame = match candidate.mode {
+        AudioCaptureMode::I2s32StereoLeft => (OPUS_FRAME_SAMPLES as usize) * 8,
+        AudioCaptureMode::Mono16 => (OPUS_FRAME_SAMPLES as usize) * 2,
+    };
+    eprintln!(
+        "Audio encoder configured for {}: opus_frame_samples={} bytes_per_capture_frame={} bitrate_bps={} complexity={}.",
+        candidate.device,
+        OPUS_FRAME_SAMPLES,
+        bytes_per_frame,
+        OPUS_BITRATE_BPS,
+        OPUS_COMPLEXITY
+    );
+    let mut pcm_bytes = vec![0u8; bytes_per_frame];
+    let mut pcm_samples = vec![0u16; OPUS_FRAME_SAMPLES as usize];
+    let mut opus_packet = vec![0u8; 1_500];
+    let mut encoded_any = false;
+    let mut probe_raw = Vec::with_capacity(bytes_per_frame * AUDIO_PROBE_PACKETS);
+
+    // We collect an initial probe window before committing to this device (basically a sanity check)
+    // That gives us enough PCM to both detect obvious silence and, for I2S microphones, infer which slot/shift interpretation contains the live samples.
+    for _ in 0..AUDIO_PROBE_PACKETS {
+        if r.read_exact(&mut pcm_bytes).is_err() {
+            break;
+        }
+        probe_raw.extend_from_slice(&pcm_bytes);
+    }
+
+    if probe_raw.is_empty() {
+        eprintln!(
+            "Audio probe failed: device {} opened but no PCM bytes arrived.",
+            candidate.device
+        );
+        return false;
+    }
+
+    let i2s_decode_mode = match candidate.mode {
+        AudioCaptureMode::I2s32StereoLeft => {
+            let mut best_mode = I2sDecodeMode::LeftShift16;
+            let mut best_stats = AudioProbeStats { rms: 0.0, peak: 0 };
+
+            // The I2S microphone has not always presented its usable 16-bit audio samples in the same byte position / slot arrangement across our boards and overlays.
+            // Thus, we score a few plausible interpretations and keep whichever one produces the strongest signal.
+            for mode in [
+                I2sDecodeMode::LeftShift16,
+                I2sDecodeMode::LeftShift8,
+                I2sDecodeMode::RightShift16,
+                I2sDecodeMode::RightShift8,
+            ] {
+                let stats = probe_i2s_mode(&probe_raw, mode);
+                if stats.rms > best_stats.rms
+                    || (stats.rms == best_stats.rms && stats.peak > best_stats.peak)
+                {
+                    best_mode = mode;
+                    best_stats = stats;
                 }
             }
-        });
+
+            eprintln!(
+                "Audio probe for {} picked {:?} with rms={:.2} peak={}.",
+                candidate.device, best_mode, best_stats.rms, best_stats.peak
+            );
+            Some(best_mode)
+        }
+        AudioCaptureMode::Mono16 => {
+            let stats = probe_mono16(&probe_raw);
+            eprintln!(
+                "Audio probe for {} mono16 rms={:.2} peak={}.",
+                candidate.device, stats.rms, stats.peak
+            );
+            None
+        }
+    };
+
+    let probe_stats = match candidate.mode {
+        AudioCaptureMode::I2s32StereoLeft => probe_i2s_mode(
+            &probe_raw,
+            i2s_decode_mode.expect("i2s decode mode must be selected"),
+        ),
+        AudioCaptureMode::Mono16 => probe_mono16(&probe_raw),
+    };
+    let signal_present = probe_stats.rms >= AUDIO_SIGNAL_RMS_THRESHOLD
+        || probe_stats.peak >= AUDIO_SIGNAL_PEAK_THRESHOLD;
+    eprintln!(
+        "Audio signal status for {}: pcm_bytes={} signal_present={} rms={:.2} peak={} thresholds(rms>={:.2}, peak>={}).",
+        candidate.device,
+        probe_raw.len(),
+        signal_present,
+        probe_stats.rms,
+        probe_stats.peak,
+        AUDIO_SIGNAL_RMS_THRESHOLD,
+        AUDIO_SIGNAL_PEAK_THRESHOLD
+    );
+
+    if !signal_present && !candidate.forced {
+        // For auto-discovered devices, a silent probe means we should probably go to the next candidate on the list
+        // Allows us to skip a dead/default ALSA source and keep searching for a live microphone.
+        eprintln!(
+            "Audio probe saw no meaningful signal on {}. Trying another candidate.",
+            candidate.device
+        );
+        return false;
     }
 
-    Ok(())
-}
\ No newline at end of file
+    eprintln!(
+        "Beginning steady-state audio encode for {} using mode {:?} and decode {:?}.",
+        candidate.device, candidate.mode, i2s_decode_mode
+    );
+    let start_time = SystemTime::now();
+    let mut packets_encoded = 0usize;
+    let mut audio_bytes_encoded = 0usize;
+
+    for frame in probe_raw.chunks_exact(bytes_per_frame) {
+        fill_pcm_samples(frame, candidate.mode, i2s_decode_mode, &mut pcm_samples);
+        if let Some(encoded_len) =
+            encode_packet(&mut encoder, &pcm_samples, &mut opus_packet, &frame_queue)
+        {
+            encoded_any = true;
+            packets_encoded += 1;
+            audio_bytes_encoded += encoded_len;
+        }
+    }
+
+    loop {
+        if r.read_exact(&mut pcm_bytes).is_err() {
+            break;
+        }
+
+        fill_pcm_samples(
+            &pcm_bytes,
+            candidate.mode,
+            i2s_decode_mode,
+            &mut pcm_samples,
+        );
+
+        if let Some(encoded_len) =
+            encode_packet(&mut encoder, &pcm_samples, &mut opus_packet, &frame_queue)
+        {
+            encoded_any = true;
+            packets_encoded += 1;
+            audio_bytes_encoded += encoded_len;
+            if packets_encoded % 250 == 0 {
+                let elapsed = start_time.elapsed().unwrap_or_default();
+                eprintln!(
+                    "Audio encode progress for {}: packets={} opus_bytes={} elapsed_ms={}.",
+                    candidate.device,
+                    packets_encoded,
+                    audio_bytes_encoded,
+                    elapsed.as_millis()
+                );
+            }
+        }
+    }
+
+    let elapsed = start_time.elapsed().unwrap_or_default();
+    eprintln!(
+        "Audio encode loop finished for {}: encoded_any={} packets={} opus_bytes={} elapsed_ms={}.",
+        candidate.device,
+        encoded_any,
+        packets_encoded,
+        audio_bytes_encoded,
+        elapsed.as_millis()
+    );
+
+    encoded_any
+}
+
+fn encode_packet(
+    encoder: &mut Encoder,
+    pcm_samples: &[u16],
+    opus_packet: &mut [u8],
+    frame_queue: &Arc<Mutex<VecDeque<Frame>>>,
+) -> Option<usize> {
+    match encoder.encode_to_slice(pcm_samples, opus_packet) {
+        Ok(encoded_len) if encoded_len > 0 => {
+            let frame = Frame {
+                data: opus_packet[..encoded_len].to_vec(),
+                kind: FrameKind::Audio,
+                timestamp: SystemTime::now(),
+            };
+            add_frame_and_drop_old(Arc::clone(frame_queue), frame);
+            Some(encoded_len)
+        }
+        Ok(_) => None,
+        Err(err) => {
+            eprintln!("Failed to encode Opus audio: {:?}", err);
+            None
+        }
+    }
+}
+
+fn fill_pcm_samples(
+    bytes: &[u8],
+    mode: AudioCaptureMode,
+    i2s_decode_mode: Option<I2sDecodeMode>,
+    pcm_samples: &mut [u16],
+) {
+    match mode {
+        AudioCaptureMode::I2s32StereoLeft => {
+            let decode_mode = i2s_decode_mode.expect("i2s decode mode must be selected");
+            for (dst, frame) in pcm_samples.iter_mut().zip(bytes.chunks_exact(8)) {
+                *dst = decode_i2s_sample(frame, decode_mode);
+            }
+        }
+        AudioCaptureMode::Mono16 => {
+            for (dst, chunk) in pcm_samples.iter_mut().zip(bytes.chunks_exact(2)) {
+                *dst = u16::from_le_bytes([chunk[0], chunk[1]]);
+            }
+        }
+    }
+}
+
+fn probe_mono16(bytes: &[u8]) -> AudioProbeStats {
+    let samples = bytes
+        .chunks_exact(2)
+        .map(|chunk| i16::from_le_bytes([chunk[0], chunk[1]]));
+    compute_probe_stats(samples)
+}
+
+fn probe_i2s_mode(bytes: &[u8], mode: I2sDecodeMode) -> AudioProbeStats {
+    let samples = bytes
+        .chunks_exact(8)
+        .map(|frame| i16::from_ne_bytes(decode_i2s_sample(frame, mode).to_ne_bytes()));
+    compute_probe_stats(samples)
+}
+
+fn compute_probe_stats(samples: impl Iterator<Item = i16>) -> AudioProbeStats {
+    let mut sum_sq = 0.0f64;
+    let mut count = 0usize;
+    let mut peak = 0i16;
+
+    for sample in samples {
+        let abs = sample.saturating_abs();
+        if abs > peak {
+            peak = abs;
+        }
+        let sample_f = f64::from(sample);
+        sum_sq += sample_f * sample_f;
+        count += 1;
+    }
+
+    let rms = if count == 0 {
+        0.0
+    } else {
+        (sum_sq / count as f64).sqrt()
+    };
+
+    AudioProbeStats { rms, peak }
+}
+
+fn decode_i2s_sample(frame: &[u8], mode: I2sDecodeMode) -> u16 {
+    let left = i32::from_le_bytes([frame[0], frame[1], frame[2], frame[3]]);
+    let right = i32::from_le_bytes([frame[4], frame[5], frame[6], frame[7]]);
+    let sample_i16 = match mode {
+        I2sDecodeMode::LeftShift16 => (left >> 16) as i16,
+        I2sDecodeMode::LeftShift8 => (left >> 8) as i16,
+        I2sDecodeMode::RightShift16 => (right >> 16) as i16,
+        I2sDecodeMode::RightShift8 => (right >> 8) as i16,
+    };
+    sample_i16 as u16
+}
diff --git a/camera_hub/src/traits.rs b/camera_hub/src/traits.rs
index 189bfd3..78a910d 100644
--- a/camera_hub/src/traits.rs
+++ b/camera_hub/src/traits.rs
@@ -16,6 +16,9 @@ pub trait CodecParameters {
     fn write_codec_box(&self, buf: &mut BytesMut) -> Result<(), Error>;
     fn get_clock_rate(&self) -> u32;
     fn get_dimensions(&self) -> (u32, u32);
+    fn audio_roll_distance(&self) -> Option<i16> {
+        None
+    }
 }
 
 #[cfg(any(feature = "raspberry", feature = "ip"))]