From b125bff7c77ed3a73ef7a9d9767779ec976cfb99 Mon Sep 17 00:00:00 2001 From: Andrei Gavrilescu Date: Wed, 6 Nov 2019 11:25:08 +0200 Subject: [PATCH] refactor / enable VAD talk while muted --- conference.js | 2 +- react/features/base/devices/services/index.js | 1 - .../devices/services/vad-reporter/Events.js | 15 +- .../services/vad-reporter/TrackVADEmitter.js | 258 ------------------ .../vad-reporter/VADReportingService.js | 68 ++--- react/features/base/lib-jitsi-meet/index.js | 1 + react/features/rnnoise/functions.js | 17 -- .../rnnoise/RnnoiseProcessor.js | 18 +- webpack.config.js | 8 +- 9 files changed, 63 insertions(+), 325 deletions(-) delete mode 100644 react/features/base/devices/services/vad-reporter/TrackVADEmitter.js diff --git a/conference.js b/conference.js index 257a0d943f..e9afc4dd19 100644 --- a/conference.js +++ b/conference.js @@ -1268,7 +1268,7 @@ export default { options.applicationName = interfaceConfig.APP_NAME; options.getWiFiStatsMethod = this._getWiFiStatsMethod; options.confID = `${locationURL.host}${locationURL.pathname}`; - options.vadProcessor = createRnnoiseProcessorPromise; + options.createVADProcessor = createRnnoiseProcessorPromise; return options; }, diff --git a/react/features/base/devices/services/index.js b/react/features/base/devices/services/index.js index 2bb9f3a17f..3b21e44539 100644 --- a/react/features/base/devices/services/index.js +++ b/react/features/base/devices/services/index.js @@ -1,5 +1,4 @@ export * from './device-detect/ActiveDeviceDetector'; export * from './device-detect/Events'; export * from './vad-reporter/Events'; -export * from './vad-reporter/TrackVADEmitter'; export * from './vad-reporter/VADReportingService'; diff --git a/react/features/base/devices/services/vad-reporter/Events.js b/react/features/base/devices/services/vad-reporter/Events.js index ff16eeeb94..da110cfdee 100644 --- a/react/features/base/devices/services/vad-reporter/Events.js +++ b/react/features/base/devices/services/vad-reporter/Events.js @@ -1,7 +1,10 @@ -// Event generated by a TrackVADEmitter when it emits a VAD score from rnnoise. -// The generated objects are of type VADScore -export const VAD_SCORE_PUBLISHED = 'vad-score-published'; - -// Event generated by VADReportingService when if finishes creating a VAD report for the monitored devices. -// The generated objects are of type Array, one score for each monitored device. +/** + * Event generated by VADReportingService when if finishes creating a VAD report for the monitored devices. + * The generated objects are of type Array, one score for each monitored device. + * @event VAD_REPORT_PUBLISHED + * @type Array with the following structure: + * @property {Date} timestamp - Timestamp at which the compute took place. + * @property {number} avgVAD - Average VAD score over monitored period of time. + * @property {string} deviceId - Associate local audio device ID. + */ export const VAD_REPORT_PUBLISHED = 'vad-report-published'; diff --git a/react/features/base/devices/services/vad-reporter/TrackVADEmitter.js b/react/features/base/devices/services/vad-reporter/TrackVADEmitter.js deleted file mode 100644 index eeb40ecf41..0000000000 --- a/react/features/base/devices/services/vad-reporter/TrackVADEmitter.js +++ /dev/null @@ -1,258 +0,0 @@ -// @flow - -import { createRnnoiseProcessorPromise, getSampleLength } from '../../../../rnnoise'; -import EventEmitter from 'events'; -import JitsiMeetJS from '../../../lib-jitsi-meet'; -import logger from '../../logger'; -import { VAD_SCORE_PUBLISHED } from './Events'; - -/** - * The structure used by TrackVADEmitter to relay a score - */ -export type VADScore = { - - /** - * Device ID associated with the VAD score - */ - deviceId: string, - - /** - * The PCM score from 0 - 1 i.e. 0.60 - */ - score: number, - - /** - * Epoch time at which PCM was recorded - */ - timestamp: number - -}; - -/** - * Connects an audio JitsiLocalTrack to a RnnoiseProcessor using WebAudio ScriptProcessorNode. - * Once an object is created audio from the local track flows through the ScriptProcessorNode as raw PCM. - * The PCM is processed by the rnnoise module and a VAD (voice activity detection) score is obtained, the - * score is published to consumers via an EventEmitter. - * After work is done with this service the destroy method needs to be called for a proper cleanup. - */ -export default class TrackVADEmitter extends EventEmitter { - /** - * The AudioContext instance. - */ - _audioContext: AudioContext; - - /** - * The MediaStreamAudioSourceNode instance. - */ - _audioSource: MediaStreamAudioSourceNode; - - /** - * The ScriptProcessorNode instance. - */ - _audioProcessingNode: ScriptProcessorNode; - - /** - * Buffer to hold residue PCM resulting after a ScriptProcessorNode callback - */ - _bufferResidue: Float32Array; - - /** - * State flag, check if the instance was destroyed - */ - _destroyed: boolean = false; - - /** - * The JitsiLocalTrack instance. - */ - _localTrack: Object; - - /** - * Device ID of the target microphone. - */ - _micDeviceId: string; - - /** - * Callback function that will be called by the ScriptProcessNode with raw PCM data, depending on the set sample - * rate. - */ - _onAudioProcess: (audioEvent: Object) => void; - - /** - * Sample rate of the ScriptProcessorNode. - */ - _procNodeSampleRate: number; - - /** - * Rnnoise adapter that allows us to calculate VAD score for PCM samples - */ - _rnnoiseProcessor: Object; - - /** - * PCM Sample size expected by the RnnoiseProcessor instance. - */ - _rnnoiseSampleSize: number; - - /** - * Constructor. - * - * @param {number} procNodeSampleRate - Sample rate of the ScriptProcessorNode. Possible values 256, 512, 1024, - * 2048, 4096, 8192, 16384. Passing other values will default to closes neighbor. - * @param {Object} rnnoiseProcessor - Rnnoise adapter that allows us to calculate VAD score - * for PCM samples. - * @param {Object} jitsiLocalTrack - JitsiLocalTrack corresponding to micDeviceId. - */ - constructor(procNodeSampleRate: number, rnnoiseProcessor: Object, jitsiLocalTrack: Object) { - super(); - this._procNodeSampleRate = procNodeSampleRate; - this._rnnoiseProcessor = rnnoiseProcessor; - this._localTrack = jitsiLocalTrack; - this._micDeviceId = jitsiLocalTrack.getDeviceId(); - this._bufferResidue = new Float32Array([]); - this._audioContext = new AudioContext(); - this._rnnoiseSampleSize = getSampleLength(); - this._onAudioProcess = this._onAudioProcess.bind(this); - - this._initializeAudioContext(); - this._connectAudioGraph(); - - logger.log(`Constructed VAD emitter for device: ${this._micDeviceId}`); - } - - /** - * Factory method that sets up all the necessary components for the creation of the TrackVADEmitter. - * - * @param {string} micDeviceId - Target microphone device id. - * @param {number} procNodeSampleRate - Sample rate of the proc node. - * @returns {Promise} - Promise resolving in a new instance of TrackVADEmitter. - */ - static async create(micDeviceId: string, procNodeSampleRate: number) { - let rnnoiseProcessor = null; - let localTrack = null; - - try { - logger.log(`Initializing TrackVADEmitter for device: ${micDeviceId}`); - - rnnoiseProcessor = await createRnnoiseProcessorPromise(); - localTrack = await JitsiMeetJS.createLocalTracks({ - devices: [ 'audio' ], - micDeviceId - }); - - // We only expect one audio track when specifying a device id. - if (!localTrack[0]) { - throw new Error(`Failed to create jitsi local track for device id: ${micDeviceId}`); - } - - return new TrackVADEmitter(procNodeSampleRate, rnnoiseProcessor, localTrack[0]); - } catch (error) { - logger.error(`Failed to create TrackVADEmitter for ${micDeviceId} with error: ${error}`); - - if (rnnoiseProcessor) { - rnnoiseProcessor.destroy(); - } - - if (localTrack) { - localTrack.stopStream(); - } - - throw error; - } - } - - /** - * Sets up the audio graph in the AudioContext. - * - * @returns {Promise} - */ - _initializeAudioContext() { - this._audioSource = this._audioContext.createMediaStreamSource(this._localTrack.stream); - - // TODO AudioProcessingNode is deprecated check and replace with alternative. - // We don't need stereo for determining the VAD score so we create a single chanel processing node. - this._audioProcessingNode = this._audioContext.createScriptProcessor(this._procNodeSampleRate, 1, 1); - this._audioProcessingNode.onaudioprocess = this._onAudioProcess; - } - - /** - * ScriptProcessorNode callback, the input parameters contains the PCM audio that is then sent to rnnoise. - * Rnnoise only accepts PCM samples of 480 bytes whereas the webaudio processor node can't sample at a multiple - * of 480 thus after each _onAudioProcess callback there will remain and PCM buffer residue equal - * to _procNodeSampleRate / 480 which will be added to the next sample buffer and so on. - * - * @param {AudioProcessingEvent} audioEvent - Audio event. - * @returns {void} - */ - _onAudioProcess(audioEvent: Object) { - // Prepend the residue PCM buffer from the previous process callback. - const inData = audioEvent.inputBuffer.getChannelData(0); - const completeInData = [ ...this._bufferResidue, ...inData ]; - const sampleTimestamp = Date.now(); - - let i = 0; - - for (; i + this._rnnoiseSampleSize < completeInData.length; i += this._rnnoiseSampleSize) { - const pcmSample = completeInData.slice(i, i + this._rnnoiseSampleSize); - const vadScore = this._rnnoiseProcessor.calculateAudioFrameVAD(pcmSample); - - this.emit(VAD_SCORE_PUBLISHED, { - timestamp: sampleTimestamp, - score: vadScore, - deviceId: this._micDeviceId - }); - } - - this._bufferResidue = completeInData.slice(i, completeInData.length); - } - - /** - * Connects the nodes in the AudioContext to start the flow of audio data. - * - * @returns {void} - */ - _connectAudioGraph() { - this._audioSource.connect(this._audioProcessingNode); - this._audioProcessingNode.connect(this._audioContext.destination); - } - - /** - * Disconnects the nodes in the AudioContext. - * - * @returns {void} - */ - _disconnectAudioGraph() { - // Even thought we disconnect the processing node it seems that some callbacks remain queued, - // resulting in calls with and uninitialized context. - // eslint-disable-next-line no-empty-function - this._audioProcessingNode.onaudioprocess = () => {}; - this._audioProcessingNode.disconnect(); - this._audioSource.disconnect(); - } - - /** - * Cleanup potentially acquired resources. - * - * @returns {void} - */ - _cleanupResources() { - logger.debug(`Cleaning up resources for device ${this._micDeviceId}!`); - - this._disconnectAudioGraph(); - this._localTrack.stopStream(); - this._rnnoiseProcessor.destroy(); - } - - /** - * Destroy TrackVADEmitter instance (release resources and stop callbacks). - * - * @returns {void} - */ - destroy() { - if (this._destroyed) { - return; - } - - logger.log(`Destroying TrackVADEmitter for mic: ${this._micDeviceId}`); - this._cleanupResources(); - this._destroyed = true; - } -} diff --git a/react/features/base/devices/services/vad-reporter/VADReportingService.js b/react/features/base/devices/services/vad-reporter/VADReportingService.js index 99c6123a8e..e1fde360f0 100644 --- a/react/features/base/devices/services/vad-reporter/VADReportingService.js +++ b/react/features/base/devices/services/vad-reporter/VADReportingService.js @@ -1,11 +1,10 @@ // @flow +import { createRnnoiseProcessorPromise } from '../../../../rnnoise'; import EventEmitter from 'events'; import logger from '../../logger'; -import TrackVADEmitter from './TrackVADEmitter'; -import { VAD_SCORE_PUBLISHED, VAD_REPORT_PUBLISHED } from './Events'; -import type { VADScore } from './TrackVADEmitter'; -export type { VADScore }; +import JitsiMeetJS, { JitsiDetectionEvents } from '../../../lib-jitsi-meet'; +import { VAD_REPORT_PUBLISHED } from './Events'; /** * Sample rate used by TrackVADEmitter, this value determines how often the ScriptProcessorNode is going to call the @@ -30,36 +29,14 @@ type VADDeviceContext = { /** * Array with VAD scores publish from the emitter. */ - scoreArray: Array, + scoreArray: Array, /** * TrackVADEmitter associated with media device */ - vadEmitter: TrackVADEmitter + vadEmitter: Object }; -/** - * The structure used by VADReportingService to relay a score report - */ -export type VADReportScore = { - - /** - * Device ID associated with the VAD score - */ - deviceId: string, - - /** - * The PCM score from 0 - 1 i.e. 0.60 - */ - score: number, - - /** - * Epoch time at which PCM was recorded - */ - timestamp: number -}; - - /** * Voice activity detection reporting service. The service create TrackVADEmitters for the provided devices and * publishes an average of their VAD score over the specified interval via EventEmitter. @@ -111,7 +88,7 @@ export default class VADReportingService extends EventEmitter { * * @returns {Promise} */ - static create(micDeviceList: Array, intervalDelay: number) { + static async create(micDeviceList: Array, intervalDelay: number) { const vadReportingService = new VADReportingService(intervalDelay); const emitterPromiseArray = []; @@ -125,8 +102,17 @@ export default class VADReportingService extends EventEmitter { logger.log(`Initializing VAD context for mic: ${micDevice.label} -> ${micDevice.deviceId}`); - const emitterPromise = TrackVADEmitter.create(micDevice.deviceId, SCRIPT_NODE_SAMPLE_RATE).then(emitter => { - emitter.on(VAD_SCORE_PUBLISHED, vadReportingService._devicePublishVADScore.bind(vadReportingService)); + const rnnoiseProcessor = await createRnnoiseProcessorPromise(); + + const emitterPromise = JitsiMeetJS.createTrackVADEmitter( + micDevice.deviceId, + SCRIPT_NODE_SAMPLE_RATE, + rnnoiseProcessor + ).then(emitter => { + emitter.on( + JitsiDetectionEvents.VAD_SCORE_PUBLISHED, + vadReportingService._devicePublishVADScore.bind(vadReportingService) + ); return { vadEmitter: emitter, @@ -216,6 +202,7 @@ export default class VADReportingService extends EventEmitter { * Function called at set interval with selected compute. The result will be published on the set callback. * * @returns {void} + * @fires VAD_REPORT_PUBLISHED */ _reportVadScore() { const vadComputeScoreArray = []; @@ -243,16 +230,30 @@ export default class VADReportingService extends EventEmitter { }); } + /** + * Once the computation for all the tracked devices is done, fire an event containing all the necessary + * information. + * + * @event VAD_REPORT_PUBLISHED + * @type Array with the following structure: + * @property {Date} timestamp - Timestamo at which the compute took place. + * @property {number} avgVAD - Average VAD score over monitored period of time. + * @property {string} deviceId - Associate local audio device ID. + */ this.emit(VAD_REPORT_PUBLISHED, vadComputeScoreArray); } /** * Callback method passed to vad emitters in order to publish their score. * - * @param {VADScore} vadScore - Mic publishing the score. + * @param {Object} vadScore -VAD score emitted by. + * @param {Date} vadScore.timestamp - Exact time at which processed PCM sample was generated. + * @param {number} vadScore.score - VAD score on a scale from 0 to 1 (i.e. 0.7). + * @param {string} vadScore.deviceId - Device id of the associated track. * @returns {void} + * @listens VAD_SCORE_PUBLISHED */ - _devicePublishVADScore(vadScore: VADScore) { + _devicePublishVADScore(vadScore: Object) { const context = this._contextMap.get(vadScore.deviceId); if (context) { @@ -280,5 +281,4 @@ export default class VADReportingService extends EventEmitter { this._clearContextMap(); this._destroyed = true; } - } diff --git a/react/features/base/lib-jitsi-meet/index.js b/react/features/base/lib-jitsi-meet/index.js index 841c2134ee..5d46e65579 100644 --- a/react/features/base/lib-jitsi-meet/index.js +++ b/react/features/base/lib-jitsi-meet/index.js @@ -14,6 +14,7 @@ export const JitsiConnectionErrors = JitsiMeetJS.errors.connection; export const JitsiConnectionEvents = JitsiMeetJS.events.connection; export const JitsiConnectionQualityEvents = JitsiMeetJS.events.connectionQuality; +export const JitsiDetectionEvents = JitsiMeetJS.events.detection; export const JitsiE2ePingEvents = JitsiMeetJS.events.e2eping; export const JitsiMediaDevicesEvents = JitsiMeetJS.events.mediaDevices; export const JitsiParticipantConnectionStatus diff --git a/react/features/rnnoise/functions.js b/react/features/rnnoise/functions.js index 3dad56cd64..9d18696026 100644 --- a/react/features/rnnoise/functions.js +++ b/react/features/rnnoise/functions.js @@ -25,20 +25,3 @@ export function createRnnoiseProcessorPromise() { throw new Error('Rnnoise module binding createRnnoiseProcessor not found!'); }); } - -/** - * Get the accepted sample length for the rnnoise library. We might want to expose it with flow libdefs. - * - * @returns {number} - */ -export function getSampleLength() { - const ns = getJitsiMeetGlobalNS(); - - const rnnoiseSample = ns?.effects?.rnnoise?.RNNOISE_SAMPLE_LENGTH; - - if (!rnnoiseSample) { - throw new Error('Please call createRnnoiseProcessorPromise first or wait for promise to resolve!'); - } - - return rnnoiseSample; -} diff --git a/react/features/stream-effects/rnnoise/RnnoiseProcessor.js b/react/features/stream-effects/rnnoise/RnnoiseProcessor.js index 0f8becce4c..58acb35f7e 100644 --- a/react/features/stream-effects/rnnoise/RnnoiseProcessor.js +++ b/react/features/stream-effects/rnnoise/RnnoiseProcessor.js @@ -10,6 +10,11 @@ export const RNNOISE_SAMPLE_LENGTH: number = 480; */ const RNNOISE_BUFFER_SIZE: number = RNNOISE_SAMPLE_LENGTH * 4; +/** + * Constant. Rnnoise only takes operates on 44.1Khz float 32 little endian PCM. + */ +const PCM_FREQUENCY: number = 44100; + /** * Represents an adaptor for the rnnoise library compiled to webassembly. The class takes care of webassembly * memory management and exposes rnnoise functionality such as PCM audio denoising and VAD (voice activity @@ -132,14 +137,23 @@ export default class RnnoiseProcessor { } /** - * Such comment very wow. + * Rnnoise can only operate on a certain PCM array size. * - * @returns {number} + * @returns {number} - The PCM sample array size as required by rnnoise. */ getSampleLength() { return RNNOISE_SAMPLE_LENGTH; } + /** + * Rnnoise can only operate on a certain format of PCM sample namely float 32 44.1Kz. + * + * @returns {number} - PCM sample frequency as required by rnnoise. + */ + getRequiredPCMFrequency() { + return PCM_FREQUENCY; + } + /** * Release any resources required by the rnnoise context this needs to be called * before destroying any context that uses the processor. diff --git a/webpack.config.js b/webpack.config.js index f159685563..43d92e4083 100644 --- a/webpack.config.js +++ b/webpack.config.js @@ -33,14 +33,12 @@ function getPerformanceHints(size) { const config = { devServer: { https: true, - logLevel: 'debug', inline: true, proxy: { '/': { bypass: devServerProxyBypass, secure: false, - target: devServerProxyTarget, - logLevel: 'debug' + target: devServerProxyTarget } } }, @@ -278,13 +276,11 @@ module.exports = [ * target, undefined; otherwise, the path to the local file to be served. */ function devServerProxyBypass({ path }) { - console.log('Fetching path: ', path); if (path.startsWith('/css/') || path.startsWith('/doc/') || path.startsWith('/fonts/') || path.startsWith('/images/') || path.startsWith('/sounds/') || path.startsWith('/static/') - || path.endsWith('.wasm') - || path.startsWith('/libs/lib-jitsi-meet')) { + || path.endsWith('.wasm')) { return path; }