[wip] never ask how this works

Signed-off-by: Maksim Sukharev <antreesy.web@gmail.com>
This commit is contained in:
Maksim Sukharev 2025-11-28 14:32:13 +01:00
parent b124a6796d
commit f9d7737f3a
15 changed files with 1620 additions and 1 deletions

View file

@ -162,6 +162,12 @@ module.exports = defineConfig((env) => {
},
type: 'javascript/auto',
},
{
test: /\.m?js/,
resolve: {
fullySpecified: false
}
},
{
test: /\.(png|jpe?g|gif|svg|webp)$/i,
type: 'asset',
@ -182,6 +188,10 @@ module.exports = defineConfig((env) => {
resourceQuery: /raw/,
type: 'asset/source',
},
{
resourceQuery: /url$/,
type: 'asset/resource',
},
],
},

View file

@ -56,6 +56,13 @@ import IconMicrophoneOutline from 'vue-material-design-icons/MicrophoneOutline.v
import { useAudioEncoder } from '../../composables/useAudioEncoder.ts'
import { useGetToken } from '../../composables/useGetToken.ts'
import { mediaDevicesManager } from '../../utils/webrtc/index.js'
import { NoiseSuppressorWorklet_Name } from "@timephy/rnnoise-wasm"
// This is an example how to get the script path using Vite, may be different when using other build tools
// NOTE: `?worker&url` is important (`worker` to generate a working script, `url` to get its url to load it)
// import NoiseSuppressorWorklet = new Worker(new URL('@timephy/rnnoise-wasm/NoiseSuppressorWorklet', import.meta.url))
// import NoiseSuppressorWorklet from "../../utils/noise/NoiseSuppressorWorklet?url"
// console.log('local', NoiseSuppressorWorklet)
export default {
name: 'NewMessageAudioRecorder',
@ -190,18 +197,43 @@ export default {
// Create a media recorder to capture the stream
try {
const audioContext = new AudioContext()
// Load the NoiseSuppressorWorklet into the AudioContext
// Load the NoiseSuppressorWorklet into the AudioContext
// Fetch the module and use a blob URL to avoid CORS / import-url issues
let workletModuleUrl = undefined
if (typeof workletModuleUrl !== 'string') {
workletModuleUrl = new URL('../../utils/noise/NoiseSuppressorWorkletBundle.js', import.meta.url).href
}
const resp = await fetch(workletModuleUrl)
if (!resp.ok) {
throw new Error('Failed to fetch worklet module: ' + resp.status)
}
const scriptText = await resp.text()
const blob = new Blob([scriptText], { type: 'application/javascript' })
const blobUrl = URL.createObjectURL(blob)
try {
await audioContext.audioWorklet.addModule(blobUrl)
} finally {
URL.revokeObjectURL(blobUrl)
}
// await audioContext.audioWorklet.addModule(NoiseSuppressorWorklet)
// Instantiate the Worklet as a Node
const noiseSuppressionNode = new AudioWorkletNode(audioContext, NoiseSuppressorWorklet_Name)
const mediaStreamAudioSourceNode = audioContext.createMediaStreamSource(this.audioStream)
const mediaStreamAudioDestinationNode = audioContext.createMediaStreamDestination()
mediaStreamAudioSourceNode
.connect(noiseSuppressionNode) // pass audio through noise suppression
.connect(mediaStreamAudioDestinationNode) // playback audio on output device
this.mediaRecorder = new this.MediaRecorder(mediaStreamAudioDestinationNode.stream, {
mimeType: 'audio/wav',
})
} catch (exception) {
console.debug(exception)
console.error(exception)
this.killStreams()
this.audioStream = null
showError(t('spreed', 'Error while recording audio'))

View file

@ -0,0 +1 @@
import "./polyfills";

View file

@ -0,0 +1,149 @@
// https://github.com/jitsi/jitsi-meet/blob/270cdd017ddab7f72896c4194a474ddc7e0d4bf4/react/features/stream-effects/noise-suppression/NoiseSuppressorWorklet.ts
// NOTE: polyfills for `atob`+`self.location.href` used by `rnnoise-wasm` but not available in `AudioWorkletGlobalScope`
import "./polyfills";
import RnnoiseProcessor from "./RnnoiseProcessor";
import createRNNWasmModuleSync from "./generated/rnnoise-sync";
import { NoiseSuppressorWorklet_Name } from "./index";
import { leastCommonMultiple } from "./math";
/**
* Audio worklet which will denoise targeted audio stream using rnnoise.
*/
class NoiseSuppressorWorklet extends AudioWorkletProcessor {
/**
* RnnoiseProcessor instance.
*/
_denoiseProcessor;
/**
* Audio worklets work with a predefined sample rate of 128.
*/
_procNodeSampleRate = 128;
/**
* PCM Sample size expected by the denoise processor.
*/
_denoiseSampleSize;
/**
* Circular buffer data used for efficient memory operations.
*/
_circularBufferLength;
_circularBuffer;
/**
* The circular buffer uses a couple of indexes to track data segments. Input data from the stream is
* copied to the circular buffer as it comes in, one `procNodeSampleRate` sized sample at a time.
* _inputBufferLength denotes the current length of all gathered raw audio segments.
*/
_inputBufferLength = 0;
/**
* Denoising is done directly on the circular buffer using subArray views, but because
* `procNodeSampleRate` and `_denoiseSampleSize` have different sizes, denoised samples lag behind
* the current gathered raw audio samples so we need a different index, `_denoisedBufferLength`.
*/
_denoisedBufferLength = 0;
/**
* Once enough data has been denoised (size of procNodeSampleRate) it's sent to the
* output buffer, `_denoisedBufferIndx` indicates the start index on the circular buffer
* of denoised data not yet sent.
*/
_denoisedBufferIndx = 0;
/**
* C'tor.
*/
constructor() {
super();
/**
* The wasm module needs to be compiled to load synchronously as the audio worklet `addModule()`
* initialization process does not wait for the resolution of promises in the AudioWorkletGlobalScope.
*/
this._denoiseProcessor = new RnnoiseProcessor(createRNNWasmModuleSync());
/**
* PCM Sample size expected by the denoise processor.
*/
this._denoiseSampleSize = this._denoiseProcessor.getSampleLength();
/**
* In order to avoid unnecessary memory related operations a circular buffer was used.
* Because the audio worklet input array does not match the sample size required by rnnoise two cases can occur
* 1. There is not enough data in which case we buffer it.
* 2. There is enough data but some residue remains after the call to `processAudioFrame`, so its buffered
* for the next call.
* A problem arises when the circular buffer reaches the end and a rollover is required, namely
* the residue could potentially be split between the end of buffer and the beginning and would
* require some complicated logic to handle. Using the lcm as the size of the buffer will
* guarantee that by the time the buffer reaches the end the residue will be a multiple of the
* `procNodeSampleRate` and the residue won't be split.
*/
this._circularBufferLength = leastCommonMultiple(this._procNodeSampleRate, this._denoiseSampleSize);
this._circularBuffer = new Float32Array(this._circularBufferLength);
}
/**
* Worklet interface process method. The inputs parameter contains PCM audio that is then sent to rnnoise.
* Rnnoise only accepts PCM samples of 480 bytes whereas `process` handles 128 sized samples, we take this into
* account using a circular buffer.
*
* @param {Float32Array[]} inputs - Array of inputs connected to the node, each of them with their associated
* array of channels. Each channel is an array of 128 pcm samples.
* @param {Float32Array[]} outputs - Array of outputs similar to the inputs parameter structure, expected to be
* filled during the execution of `process`. By default each channel is zero filled.
* @returns {boolean} - Boolean value that returns whether or not the processor should remain active. Returning
* false will terminate it.
*/
process(inputs, outputs) {
// NOTE: We expect the incoming track to be mono.
// NOTE: If a stereo track is passed only the first channel will get denoised and sent pack (as stereo).
const inData = inputs[0][0];
const outData = outputs[0][0];
// Exit out early if there is no input data (input node not connected/disconnected)
// as rest of worklet will crash otherwise
if (!inData) {
return true;
}
// Append new raw PCM sample.
this._circularBuffer.set(inData, this._inputBufferLength);
this._inputBufferLength += inData.length;
// New raw samples were just added, start denoising frames, _denoisedBufferLength gives us
// the position at which the previous denoise iteration ended, basically it takes into account
// residue data.
for (; this._denoisedBufferLength + this._denoiseSampleSize <= this._inputBufferLength; this._denoisedBufferLength += this._denoiseSampleSize) {
// Create view of circular buffer so it can be modified in place, removing the need for
// extra copies.
const denoiseFrame = this._circularBuffer.subarray(this._denoisedBufferLength, this._denoisedBufferLength + this._denoiseSampleSize);
this._denoiseProcessor.processAudioFrame(denoiseFrame, true);
}
// Determine how much denoised audio is available, if the start index of denoised samples is smaller
// then _denoisedBufferLength that means a rollover occurred.
let unsentDenoisedDataLength;
if (this._denoisedBufferIndx > this._denoisedBufferLength) {
unsentDenoisedDataLength = this._circularBufferLength - this._denoisedBufferIndx;
}
else {
unsentDenoisedDataLength = this._denoisedBufferLength - this._denoisedBufferIndx;
}
// Only copy denoised data to output when there's enough of it to fit the exact buffer length.
// e.g. if the buffer size is 1024 samples but we only denoised 960 (this happens on the first iteration)
// nothing happens, then on the next iteration 1920 samples will be denoised so we send 1024 which leaves
// 896 for the next iteration and so on.
if (unsentDenoisedDataLength >= outData.length) {
const denoisedFrame = this._circularBuffer.subarray(this._denoisedBufferIndx, this._denoisedBufferIndx + outData.length);
// // NOTE: To outout on both output channels (added by Copilot)
// // Copy denoised frame to each channel of the output
// for (let i = 0; i < outputs[0].length; i++) {
// outputs[0][i].set(denoisedFrame, 0)
// }
// // NOTE: end
outData.set(denoisedFrame, 0);
this._denoisedBufferIndx += outData.length;
}
// When the end of the circular buffer has been reached, start from the beginning. By the time the index
// starts over, the data from the begging is stale (has already been processed) and can be safely
// overwritten.
if (this._denoisedBufferIndx === this._circularBufferLength) {
this._denoisedBufferIndx = 0;
}
// Because the circular buffer's length is the lcm of both input size and the processor's sample size,
// by the time we reach the end with the input index the denoise length index will be there as well.
if (this._inputBufferLength === this._circularBufferLength) {
this._inputBufferLength = 0;
this._denoisedBufferLength = 0;
}
return true;
}
}
registerProcessor(NoiseSuppressorWorklet_Name, NoiseSuppressorWorklet);

File diff suppressed because one or more lines are too long

88
src/utils/noise/RnnoiseProcessor.d.ts vendored Normal file
View file

@ -0,0 +1,88 @@
export interface IRnnoiseModule extends EmscriptenModule {
_rnnoise_create: () => number;
_rnnoise_destroy: (context: number) => void;
_rnnoise_process_frame: (context: number, input: number, output: number) => number;
}
/**
* Constant. Rnnoise default sample size, samples of different size won't work.
*/
export declare const RNNOISE_SAMPLE_LENGTH = 480;
/**
* Represents an adaptor for the rnnoise library compiled to webassembly. The class takes care of webassembly
* memory management and exposes rnnoise functionality such as PCM audio denoising and VAD (voice activity
* detection) scores.
*/
export default class RnnoiseProcessor {
/**
* Rnnoise context object needed to perform the audio processing.
*/
private _context;
/**
* State flag, check if the instance was destroyed.
*/
private _destroyed;
/**
* WASM interface through which calls to rnnoise are made.
*/
private _wasmInterface;
/**
* WASM dynamic memory buffer used as input for rnnoise processing method.
*/
private _wasmPcmInput;
/**
* The Float32Array index representing the start point in the wasm heap of the _wasmPcmInput buffer.
*/
private _wasmPcmInputF32Index;
/**
* Constructor.
*
* @class
* @param {Object} wasmInterface - WebAssembly module interface that exposes rnnoise functionality.
*/
constructor(wasmInterface: IRnnoiseModule);
/**
* Release resources associated with the wasm context. If something goes downhill here
* i.e. Exception is thrown, there is nothing much we can do.
*
* @returns {void}
*/
_releaseWasmResources(): void;
/**
* Rnnoise can only operate on a certain PCM array size.
*
* @returns {number} - The PCM sample array size as required by rnnoise.
*/
getSampleLength(): number;
/**
* Rnnoise can only operate on a certain format of PCM sample namely float 32 44.1Kz.
*
* @returns {number} - PCM sample frequency as required by rnnoise.
*/
getRequiredPCMFrequency(): number;
/**
* Release any resources required by the rnnoise context this needs to be called
* before destroying any context that uses the processor.
*
* @returns {void}
*/
destroy(): void;
/**
* Calculate the Voice Activity Detection for a raw Float32 PCM sample Array.
* The size of the array must be of exactly 480 samples, this constraint comes from the rnnoise library.
*
* @param {Float32Array} pcmFrame - Array containing 32 bit PCM samples.
* @returns {Float} Contains VAD score in the interval 0 - 1 i.e. 0.90.
*/
calculateAudioFrameVAD(pcmFrame: Float32Array): number;
/**
* Process an audio frame, optionally denoising the input pcmFrame and returning the Voice Activity Detection score
* for a raw Float32 PCM sample Array.
* The size of the array must be of exactly 480 samples, this constraint comes from the rnnoise library.
*
* @param {Float32Array} pcmFrame - Array containing 32 bit PCM samples. Parameter is also used as output
* when {@code shouldDenoise} is true.
* @param {boolean} shouldDenoise - Should the denoised frame be returned in pcmFrame.
* @returns {Float} Contains VAD score in the interval 0 - 1 i.e. 0.90 .
*/
processAudioFrame(pcmFrame: Float32Array, shouldDenoise?: boolean): number;
}

View file

@ -0,0 +1,153 @@
// https://github.com/jitsi/jitsi-meet/blob/270cdd017ddab7f72896c4194a474ddc7e0d4bf4/react/features/stream-effects/rnnoise/RnnoiseProcessor.ts
/**
* Constant. Rnnoise default sample size, samples of different size won't work.
*/
export const RNNOISE_SAMPLE_LENGTH = 480;
/**
* Constant. Rnnoise only takes inputs of 480 PCM float32 samples thus 480*4.
*/
const RNNOISE_BUFFER_SIZE = RNNOISE_SAMPLE_LENGTH * 4;
/**
* Constant. Rnnoise only takes operates on 44.1Khz float 32 little endian PCM.
*/
const PCM_FREQUENCY = 44100;
/**
* Used to shift a 32 bit number by 16 bits.
*/
const SHIFT_16_BIT_NR = 32768;
/**
* Represents an adaptor for the rnnoise library compiled to webassembly. The class takes care of webassembly
* memory management and exposes rnnoise functionality such as PCM audio denoising and VAD (voice activity
* detection) scores.
*/
export default class RnnoiseProcessor {
/**
* Rnnoise context object needed to perform the audio processing.
*/
_context;
/**
* State flag, check if the instance was destroyed.
*/
_destroyed = false;
/**
* WASM interface through which calls to rnnoise are made.
*/
_wasmInterface;
/**
* WASM dynamic memory buffer used as input for rnnoise processing method.
*/
_wasmPcmInput;
/**
* The Float32Array index representing the start point in the wasm heap of the _wasmPcmInput buffer.
*/
_wasmPcmInputF32Index;
/**
* Constructor.
*
* @class
* @param {Object} wasmInterface - WebAssembly module interface that exposes rnnoise functionality.
*/
constructor(wasmInterface) {
// Considering that we deal with dynamic allocated memory employ exception safety strong guarantee
// i.e. in case of exception there are no side effects.
try {
this._wasmInterface = wasmInterface;
// For VAD score purposes only allocate the buffers once and reuse them
this._wasmPcmInput = this._wasmInterface._malloc(RNNOISE_BUFFER_SIZE);
this._wasmPcmInputF32Index = this._wasmPcmInput >> 2;
if (!this._wasmPcmInput) {
throw Error("Failed to create wasm input memory buffer!");
}
this._context = this._wasmInterface._rnnoise_create();
}
catch (error) {
// release can be called even if not all the components were initialized.
this.destroy();
throw error;
}
}
/**
* Release resources associated with the wasm context. If something goes downhill here
* i.e. Exception is thrown, there is nothing much we can do.
*
* @returns {void}
*/
_releaseWasmResources() {
// For VAD score purposes only allocate the buffers once and reuse them
if (this._wasmPcmInput) {
this._wasmInterface._free(this._wasmPcmInput);
}
if (this._context) {
this._wasmInterface._rnnoise_destroy(this._context);
}
}
/**
* Rnnoise can only operate on a certain PCM array size.
*
* @returns {number} - The PCM sample array size as required by rnnoise.
*/
getSampleLength() {
return RNNOISE_SAMPLE_LENGTH;
}
/**
* Rnnoise can only operate on a certain format of PCM sample namely float 32 44.1Kz.
*
* @returns {number} - PCM sample frequency as required by rnnoise.
*/
getRequiredPCMFrequency() {
return PCM_FREQUENCY;
}
/**
* Release any resources required by the rnnoise context this needs to be called
* before destroying any context that uses the processor.
*
* @returns {void}
*/
destroy() {
// Attempting to release a non initialized processor, do nothing.
if (this._destroyed) {
return;
}
this._releaseWasmResources();
this._destroyed = true;
}
/**
* Calculate the Voice Activity Detection for a raw Float32 PCM sample Array.
* The size of the array must be of exactly 480 samples, this constraint comes from the rnnoise library.
*
* @param {Float32Array} pcmFrame - Array containing 32 bit PCM samples.
* @returns {Float} Contains VAD score in the interval 0 - 1 i.e. 0.90.
*/
calculateAudioFrameVAD(pcmFrame) {
return this.processAudioFrame(pcmFrame);
}
/**
* Process an audio frame, optionally denoising the input pcmFrame and returning the Voice Activity Detection score
* for a raw Float32 PCM sample Array.
* The size of the array must be of exactly 480 samples, this constraint comes from the rnnoise library.
*
* @param {Float32Array} pcmFrame - Array containing 32 bit PCM samples. Parameter is also used as output
* when {@code shouldDenoise} is true.
* @param {boolean} shouldDenoise - Should the denoised frame be returned in pcmFrame.
* @returns {Float} Contains VAD score in the interval 0 - 1 i.e. 0.90 .
*/
processAudioFrame(pcmFrame, shouldDenoise = false) {
// Convert 32 bit Float PCM samples to 16 bit Float PCM samples as that's what rnnoise accepts as input
for (let i = 0; i < RNNOISE_SAMPLE_LENGTH; i++) {
this._wasmInterface.HEAPF32[this._wasmPcmInputF32Index + i] =
pcmFrame[i] * SHIFT_16_BIT_NR;
}
// Use the same buffer for input/output, rnnoise supports this behavior
const vadScore = this._wasmInterface._rnnoise_process_frame(this._context, this._wasmPcmInput, this._wasmPcmInput);
// Rnnoise denoises the frame by default but we can avoid unnecessary operations if the calling
// client doesn't use the denoised frame.
if (shouldDenoise) {
// Convert back to 32 bit PCM
for (let i = 0; i < RNNOISE_SAMPLE_LENGTH; i++) {
pcmFrame[i] =
this._wasmInterface.HEAPF32[this._wasmPcmInputF32Index + i] / SHIFT_16_BIT_NR;
}
}
return vadScore;
}
}

View file

@ -0,0 +1,2 @@
export default createRNNWasmModuleSync;
declare function createRNNWasmModuleSync(moduleArg?: {}): {};

File diff suppressed because one or more lines are too long

1
src/utils/noise/index.d.ts vendored Normal file
View file

@ -0,0 +1 @@
export declare const NoiseSuppressorWorklet_Name = "NoiseSuppressorWorklet";

1
src/utils/noise/index.js Normal file
View file

@ -0,0 +1 @@
export const NoiseSuppressorWorklet_Name = "NoiseSuppressorWorklet";

16
src/utils/noise/math.d.ts vendored Normal file
View file

@ -0,0 +1,16 @@
/**
* Compute the greatest common divisor using Euclid's algorithm.
*
* @param {number} num1 - First number.
* @param {number} num2 - Second number.
* @returns {number}
*/
export declare function greatestCommonDivisor(num1: number, num2: number): number;
/**
* Calculate least common multiple using gcd.
*
* @param {number} num1 - First number.
* @param {number} num2 - Second number.
* @returns {number}
*/
export declare function leastCommonMultiple(num1: number, num2: number): number;

34
src/utils/noise/math.js Normal file
View file

@ -0,0 +1,34 @@
// https://github.com/jitsi/jitsi-meet/blob/270cdd017ddab7f72896c4194a474ddc7e0d4bf4/react/features/base/util/math.ts#L30
/**
* Compute the greatest common divisor using Euclid's algorithm.
*
* @param {number} num1 - First number.
* @param {number} num2 - Second number.
* @returns {number}
*/
export function greatestCommonDivisor(num1, num2) {
let number1 = num1;
let number2 = num2;
while (number1 !== number2) {
if (number1 > number2) {
number1 = number1 - number2;
}
else {
number2 = number2 - number1;
}
}
return number2;
}
/**
* Calculate least common multiple using gcd.
*
* @param {number} num1 - First number.
* @param {number} num2 - Second number.
* @returns {number}
*/
export function leastCommonMultiple(num1, num2) {
const number1 = num1;
const number2 = num2;
const gcd = greatestCommonDivisor(number1, number2);
return (number1 * number2) / gcd;
}

2
src/utils/noise/polyfills.d.ts vendored Normal file
View file

@ -0,0 +1,2 @@
declare const b64 = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/=";
declare const b64re: RegExp;

View file

@ -0,0 +1,63 @@
"use strict";
// https://github.com/MaxArt2501/base64-js/blob/master/base64.js
/* ============================================================================================== */
// base64 character set, plus padding character (=)
const b64 = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/=";
// Regular expression to check formal correctness of base64 encoded strings
// eslint-disable-next-line no-useless-escape
const b64re = /^(?:[A-Za-z\d+\/]{4})*?(?:[A-Za-z\d+\/]{2}(?:==)?|[A-Za-z\d+\/]{3}=?)?$/;
// globalThis.btoa = function (string) {
// string = String(string)
// let bitmap,
// a,
// b,
// c,
// result = "",
// i = 0,
// rest = string.length % 3 // To determine the final padding
// for (; i < string.length; ) {
// if (
// (a = string.charCodeAt(i++)) > 255 ||
// (b = string.charCodeAt(i++)) > 255 ||
// (c = string.charCodeAt(i++)) > 255
// )
// throw new TypeError(
// "Failed to execute 'btoa' on 'Window': The string to be encoded contains characters outside of the Latin1 range.",
// )
// bitmap = (a << 16) | (b << 8) | c
// result +=
// b64.charAt((bitmap >> 18) & 63) +
// b64.charAt((bitmap >> 12) & 63) +
// b64.charAt((bitmap >> 6) & 63) +
// b64.charAt(bitmap & 63)
// }
// // If there's need of padding, replace the last 'A's with equal signs
// return rest ? result.slice(0, rest - 3) + "===".substring(rest) : result
// }
globalThis.atob = function (string) {
// atob can work with strings with whitespaces, even inside the encoded part,
// but only \t, \n, \f, \r and ' ', which can be stripped.
string = String(string).replace(/[\t\n\f\r ]+/g, "");
if (!b64re.test(string))
throw new TypeError("Failed to execute 'atob' on 'Window': The string to be decoded is not correctly encoded.");
// Adding the padding if missing, for semplicity
string += "==".slice(2 - (string.length & 3));
let bitmap, result = "", r1, r2, i = 0;
for (; i < string.length;) {
bitmap =
(b64.indexOf(string.charAt(i++)) << 18) |
(b64.indexOf(string.charAt(i++)) << 12) |
((r1 = b64.indexOf(string.charAt(i++))) << 6) |
(r2 = b64.indexOf(string.charAt(i++)));
result +=
r1 === 64
? String.fromCharCode((bitmap >> 16) & 255)
: r2 === 64
? String.fromCharCode((bitmap >> 16) & 255, (bitmap >> 8) & 255)
: String.fromCharCode((bitmap >> 16) & 255, (bitmap >> 8) & 255, bitmap & 255);
}
return result;
};
/* ============================================================================================== */
// @ts-expect-error does not exist in `AudioWorkletGlobalScope`
globalThis.self = { location: { href: "" } };