Hi everyone,
I’m working on integrating RNNoise into an AudioSocket TCP server for Asterisk to denoise audio frames in real-time. I’m using the AudioSocket protocol standard (AudioSocket - Asterisk Documentation) and expect audio data packets (type 0x10) to have a payload length of 160 bytes (80 samples at 8 kHz, 16-bit PCM mono).
Problem:
RNNoise expects the audio at 48,000 Hz.
However, my TCP server is receiving packets with a payload length of 320 bytes (twice as large), which leads to errors like this during processing:
Unexpected payload length: 320, expected 160 bytes for 80 samples at 8kHz
So RNNoise expects the audio at 48,000 Hz…
My assumption:
It seems like the sender (Asterisk) is bundling 2 audio frames (each 80 samples / 160 bytes) into a single packet.
What I’ve done so far:
I’ve written the code expecting exactly 160 bytes per packet to process them with RNNoise.
Additionally, I tested another version of my Node.js script that runs, but the processed audio sounds heavily distorted. So I suspect the resampling or frame handling isn’t done correctly.
Question for the community:
How should one best handle such “double” packets? Is it common that AudioSocket sends multiple frames in a single packet? Should I adapt my implementation to parse and process multiple 160-byte audio frames individually?
Here’s a snippet of my receiving code:
while (buffer.length >= 3) {
const type = buffer.readUInt8(0);
const length = buffer.readUInt16BE(1);
if (buffer.length < 3 + length) break;
const payload = buffer.slice(3, 3 + length);
buffer = buffer.slice(3 + length);
if (type === 0x10) {
if (payload.length !== 160) {
console.error(`Unexpected payload length: ${payload.length}, expected 160 bytes for 80 samples at 8kHz`);
return;
}
processAudioFrame(payload);
}
}
Would it be advisable to extend this to split longer payloads (320, 480 bytes, etc.) into 160-byte chunks and process each chunk individually?
Thanks in advance for your help!
Kind Regards
–
FULL Code:
import { createRNNWasmModule } from '/etc/asterisk/scripts/node_modules/@bigbinary/rnnoise-wasm/index.js';
import { fileURLToPath } from 'url';
import path from 'path';
import fs from 'fs';
import net from 'net';
const __filename = fileURLToPath(import.meta.url);
const __dirname = path.dirname(__filename);
const wasmFilePath = path.join(__dirname, 'rnnoise-wasm', 'dist', 'rnnoise.wasm');
async function startServer() {
console.log('Starting RNNoise WASM Module loading...');
let rnnoiseModule;
try {
const wasmBinary = fs.readFileSync(wasmFilePath);
rnnoiseModule = await createRNNWasmModule({
wasmBinary,
locateFile: (path) => path,
});
console.log('RNNoise WASM Module successfully loaded and initialized.');
} catch (error) {
console.error('Error loading or initializing RNNoise WASM Module:', error);
process.exit(1);
}
const RNNOISE_FRAME_SIZE = 480; // 10ms bei 48kHz
const rnnoiseStatePtr = rnnoiseModule._rnnoise_create();
if (rnnoiseStatePtr === 0) {
console.error('Could not create RNNoise state.');
process.exit(1);
}
// Resample 8kHz 16bit mono Buffer -> Int16Array mit 48kHz Samples
function resample8kTo48k(inputBuffer) {
// inputBuffer: Buffer mit 80 Samples (160 Bytes) 8kHz
const inputSamples = new Int16Array(
inputBuffer.buffer,
inputBuffer.byteOffset,
inputBuffer.length / 2
);
const outputSamples = new Int16Array(480); // 10ms bei 48kHz
for (let i = 0; i < 480; i++) {
const pos = i * (inputSamples.length / 480);
const idx = Math.floor(pos);
const frac = pos - idx;
const sample1 = inputSamples[idx] || 0;
const sample2 = inputSamples[idx + 1] || 0;
outputSamples[i] = sample1 + frac * (sample2 - sample1);
}
return outputSamples;
}
// Resample 48kHz Int16Array -> 8kHz Buffer (80 Samples)
function resample48kTo8k(inputSamples) {
const outputSamples = new Int16Array(80);
for (let i = 0; i < 80; i++) {
const pos = i * (inputSamples.length / 80);
const idx = Math.floor(pos);
const frac = pos - idx;
const sample1 = inputSamples[idx] || 0;
const sample2 = inputSamples[idx + 1] || 0;
outputSamples[i] = sample1 + frac * (sample2 - sample1);
}
return Buffer.from(outputSamples.buffer);
}
function processAudioFrame(payload) {
// Payload ist 8kHz 16bit PCM (mono)
// Erwartet 80 Samples * 2 Byte = 160 Bytes pro Frame
if (payload.length !== 160) {
console.error(`Unexpected payload length: ${payload.length}, expected 160 bytes for 80 samples at 8kHz`);
return;
}
// Richtig resamplen: 8kHz -> 48kHz
const input48kSamples = resample8kTo48k(payload);
const sampleCount = input48kSamples.length; // 480
const byteLength = sampleCount * 2; // Bytes
const ptrIn = rnnoiseModule._malloc(byteLength);
const ptrOut = rnnoiseModule._malloc(byteLength);
if (ptrIn === 0 || ptrOut === 0) {
console.error('malloc failed');
if (ptrIn !== 0) rnnoiseModule._free(ptrIn);
if (ptrOut !== 0) rnnoiseModule._free(ptrOut);
return;
}
console.log('ptrIn:', ptrIn, 'ptrOut:', ptrOut);
console.log('HEAP8.byteLength:', rnnoiseModule.HEAP8.byteLength);
// Prüfe Speichergrenzen im Byte-Bereich (HEAP8 byteLength ist Gesamtspeichergröße)
if (ptrIn < 0 || (ptrIn + byteLength) > rnnoiseModule.HEAP8.byteLength ||
ptrOut < 0 || (ptrOut + byteLength) > rnnoiseModule.HEAP8.byteLength) {
console.error('Pointer liegt außerhalb des Heaps!');
rnnoiseModule._free(ptrIn);
rnnoiseModule._free(ptrOut);
return;
}
try {
rnnoiseModule.HEAP16.set(input48kSamples, ptrIn >> 1);
rnnoiseModule._rnnoise_process_frame(rnnoiseStatePtr, ptrOut, ptrIn);
const denoisedSamples = new Int16Array(rnnoiseModule.HEAP16.buffer, ptrOut, sampleCount);
const output8kBuffer = resample48kTo8k(denoisedSamples);
console.log('Processed one audio frame');
return output8kBuffer;
} finally {
console.log(`Freeing ptrIn: ${ptrIn}, ptrOut: ${ptrOut}`);
rnnoiseModule._free(ptrIn);
rnnoiseModule._free(ptrOut);
}
}
// TCP-Server starten
const server = net.createServer((socket) => {
console.log('Client connected');
let buffer = Buffer.alloc(0);
socket.setNoDelay(true);
socket.on('data', (data) => {
console.log('Raw TCP data received:', data.toString('hex'));
buffer = Buffer.concat([buffer, data]);
while (buffer.length >= 3) {
const type = buffer.readUInt8(0);
const length = buffer.readUInt16BE(1);
if (buffer.length < 3 + length) break; // Warte auf kompletten Payload
const payload = buffer.slice(3, 3 + length);
buffer = buffer.slice(3 + length);
switch(type) {
case 0x00:
console.log('Connection Terminate');
socket.end();
break;
case 0x01:
console.log('UUID:', payload.toString('hex'));
break;
case 0x03:
console.log('DTMF:', payload.toString());
break;
case 0x10:
// Audio 8kHz 16-bit PCM Mono
const processed = processAudioFrame(payload);
if (processed) {
// Hier kannst du den verarbeiteten Buffer senden oder weiterverarbeiten
// Beispiel: socket.write(processed);
}
break;
case 0xff:
console.error('Error:', payload);
break;
default:
console.warn('Unbekannter Typ:', type);
}
}
});
socket.on('close', () => {
console.log('Client disconnected');
});
socket.on('error', (err) => {
console.error('Socket error:', err);
});
});
server.listen(7070, '127.0.0.1', () => {
console.log('AudioSocket TCP server listening on 127.0.0.1:7070');
});
process.on('SIGINT', () => {
console.log('Cleaning up RNNoise state...');
rnnoiseModule._rnnoise_destroy(rnnoiseStatePtr);
server.close(() => {
console.log('TCP server closed');
process.exit();
});
});
}
startServer();