Unexpected AudioSocket TCP payload length: 320 bytes instead of expected 160 bytes (RNNoise integration)

Hi everyone,

I’m working on integrating RNNoise into an AudioSocket TCP server for Asterisk to denoise audio frames in real-time. I’m using the AudioSocket protocol standard (AudioSocket - Asterisk Documentation) and expect audio data packets (type 0x10) to have a payload length of 160 bytes (80 samples at 8 kHz, 16-bit PCM mono).

Problem:
RNNoise expects the audio at 48,000 Hz.
However, my TCP server is receiving packets with a payload length of 320 bytes (twice as large), which leads to errors like this during processing:

Unexpected payload length: 320, expected 160 bytes for 80 samples at 8kHz

So RNNoise expects the audio at 48,000 Hz…

My assumption:
It seems like the sender (Asterisk) is bundling 2 audio frames (each 80 samples / 160 bytes) into a single packet.

What I’ve done so far:
I’ve written the code expecting exactly 160 bytes per packet to process them with RNNoise.

Additionally, I tested another version of my Node.js script that runs, but the processed audio sounds heavily distorted. So I suspect the resampling or frame handling isn’t done correctly.

Question for the community:
How should one best handle such “double” packets? Is it common that AudioSocket sends multiple frames in a single packet? Should I adapt my implementation to parse and process multiple 160-byte audio frames individually?

Here’s a snippet of my receiving code:

while (buffer.length >= 3) {
  const type = buffer.readUInt8(0);
  const length = buffer.readUInt16BE(1);

  if (buffer.length < 3 + length) break;

  const payload = buffer.slice(3, 3 + length);
  buffer = buffer.slice(3 + length);

  if (type === 0x10) {
    if (payload.length !== 160) {
      console.error(`Unexpected payload length: ${payload.length}, expected 160 bytes for 80 samples at 8kHz`);
      return;
    }
    processAudioFrame(payload);
  }
}

Would it be advisable to extend this to split longer payloads (320, 480 bytes, etc.) into 160-byte chunks and process each chunk individually?

Thanks in advance for your help!

Kind Regards

FULL Code:

import { createRNNWasmModule } from '/etc/asterisk/scripts/node_modules/@bigbinary/rnnoise-wasm/index.js';
import { fileURLToPath } from 'url';
import path from 'path';
import fs from 'fs';
import net from 'net';

const __filename = fileURLToPath(import.meta.url);
const __dirname = path.dirname(__filename);

const wasmFilePath = path.join(__dirname, 'rnnoise-wasm', 'dist', 'rnnoise.wasm');

async function startServer() {
  console.log('Starting RNNoise WASM Module loading...');
  let rnnoiseModule;

  try {
    const wasmBinary = fs.readFileSync(wasmFilePath);

    rnnoiseModule = await createRNNWasmModule({
      wasmBinary,
      locateFile: (path) => path,
    });

    console.log('RNNoise WASM Module successfully loaded and initialized.');
  } catch (error) {
    console.error('Error loading or initializing RNNoise WASM Module:', error);
    process.exit(1);
  }

  const RNNOISE_FRAME_SIZE = 480;  // 10ms bei 48kHz
  const rnnoiseStatePtr = rnnoiseModule._rnnoise_create();
  if (rnnoiseStatePtr === 0) {
    console.error('Could not create RNNoise state.');
    process.exit(1);
  }

  // Resample 8kHz 16bit mono Buffer -> Int16Array mit 48kHz Samples
  function resample8kTo48k(inputBuffer) {
    // inputBuffer: Buffer mit 80 Samples (160 Bytes) 8kHz
    const inputSamples = new Int16Array(
      inputBuffer.buffer,
      inputBuffer.byteOffset,
      inputBuffer.length / 2
    );
    const outputSamples = new Int16Array(480); // 10ms bei 48kHz

    for (let i = 0; i < 480; i++) {
      const pos = i * (inputSamples.length / 480);
      const idx = Math.floor(pos);
      const frac = pos - idx;

      const sample1 = inputSamples[idx] || 0;
      const sample2 = inputSamples[idx + 1] || 0;
      outputSamples[i] = sample1 + frac * (sample2 - sample1);
    }
    return outputSamples;
  }

  // Resample 48kHz Int16Array -> 8kHz Buffer (80 Samples)
  function resample48kTo8k(inputSamples) {
    const outputSamples = new Int16Array(80);
    for (let i = 0; i < 80; i++) {
      const pos = i * (inputSamples.length / 80);
      const idx = Math.floor(pos);
      const frac = pos - idx;

      const sample1 = inputSamples[idx] || 0;
      const sample2 = inputSamples[idx + 1] || 0;
      outputSamples[i] = sample1 + frac * (sample2 - sample1);
    }
    return Buffer.from(outputSamples.buffer);
  }

  function processAudioFrame(payload) {
    // Payload ist 8kHz 16bit PCM (mono)
    // Erwartet 80 Samples * 2 Byte = 160 Bytes pro Frame
    if (payload.length !== 160) {
      console.error(`Unexpected payload length: ${payload.length}, expected 160 bytes for 80 samples at 8kHz`);
      return;
    }

    // Richtig resamplen: 8kHz -> 48kHz
    const input48kSamples = resample8kTo48k(payload);
    const sampleCount = input48kSamples.length; // 480
    const byteLength = sampleCount * 2;         // Bytes

    const ptrIn = rnnoiseModule._malloc(byteLength);
    const ptrOut = rnnoiseModule._malloc(byteLength);

    if (ptrIn === 0 || ptrOut === 0) {
      console.error('malloc failed');
      if (ptrIn !== 0) rnnoiseModule._free(ptrIn);
      if (ptrOut !== 0) rnnoiseModule._free(ptrOut);
      return;
    }

    console.log('ptrIn:', ptrIn, 'ptrOut:', ptrOut);
    console.log('HEAP8.byteLength:', rnnoiseModule.HEAP8.byteLength);
    // Prüfe Speichergrenzen im Byte-Bereich (HEAP8 byteLength ist Gesamtspeichergröße)
    if (ptrIn < 0 || (ptrIn + byteLength) > rnnoiseModule.HEAP8.byteLength ||
        ptrOut < 0 || (ptrOut + byteLength) > rnnoiseModule.HEAP8.byteLength) {
      console.error('Pointer liegt außerhalb des Heaps!');
      rnnoiseModule._free(ptrIn);
      rnnoiseModule._free(ptrOut);
      return;
    }

    try {
      rnnoiseModule.HEAP16.set(input48kSamples, ptrIn >> 1);
      rnnoiseModule._rnnoise_process_frame(rnnoiseStatePtr, ptrOut, ptrIn);

      const denoisedSamples = new Int16Array(rnnoiseModule.HEAP16.buffer, ptrOut, sampleCount);
      const output8kBuffer = resample48kTo8k(denoisedSamples);

      console.log('Processed one audio frame');
      return output8kBuffer;
    } finally {
      console.log(`Freeing ptrIn: ${ptrIn}, ptrOut: ${ptrOut}`);
      rnnoiseModule._free(ptrIn);
      rnnoiseModule._free(ptrOut);
    }
  }

  // TCP-Server starten
  const server = net.createServer((socket) => {
    console.log('Client connected');

    let buffer = Buffer.alloc(0);

    socket.setNoDelay(true);

    socket.on('data', (data) => {
        console.log('Raw TCP data received:', data.toString('hex'));

      buffer = Buffer.concat([buffer, data]);

      while (buffer.length >= 3) {
        const type = buffer.readUInt8(0);
        const length = buffer.readUInt16BE(1);

        if (buffer.length < 3 + length) break; // Warte auf kompletten Payload

        const payload = buffer.slice(3, 3 + length);
        buffer = buffer.slice(3 + length);

        switch(type) {
          case 0x00:
            console.log('Connection Terminate');
            socket.end();
            break;
          case 0x01:
            console.log('UUID:', payload.toString('hex'));
            break;
          case 0x03:
            console.log('DTMF:', payload.toString());
            break;
          case 0x10:
            // Audio 8kHz 16-bit PCM Mono
            const processed = processAudioFrame(payload);
            if (processed) {
              // Hier kannst du den verarbeiteten Buffer senden oder weiterverarbeiten
              // Beispiel: socket.write(processed);
            }
            break;
          case 0xff:
            console.error('Error:', payload);
            break;
          default:
            console.warn('Unbekannter Typ:', type);
        }
      }
    });

    socket.on('close', () => {
      console.log('Client disconnected');
    });

    socket.on('error', (err) => {
      console.error('Socket error:', err);
    });
  });

  server.listen(7070, '127.0.0.1', () => {
    console.log('AudioSocket TCP server listening on 127.0.0.1:7070');
  });

  process.on('SIGINT', () => {
    console.log('Cleaning up RNNoise state...');
    rnnoiseModule._rnnoise_destroy(rnnoiseStatePtr);
    server.close(() => {
      console.log('TCP server closed');
      process.exit();
    });
  });
}

startServer();