Compare commits

...

2 Commits

Author SHA1 Message Date
Jaya Allamsetty
ec553f77d0 fix(UT) Add audio chunking and use audio effect for continuous translation 2025-07-08 15:01:21 -04:00
Jaya Allamsetty
6edeca020e feat(universal-translator) Initial commit 2025-07-07 23:14:48 -04:00
33 changed files with 4936 additions and 5 deletions

View File

@@ -76,5 +76,6 @@ $flagsImagePath: "../images/";
@import 'participants-pane';
@import 'reactions-menu';
@import 'plan-limit';
@import 'universal-translator/main';
/* Modules END */

View File

@@ -0,0 +1,333 @@
/**
* Universal Translator Styles
*/
.universal-translator-dialog {
padding: 20px;
max-width: 800px;
min-height: 600px;
h3 {
margin: 20px 0 10px 0;
color: #1a1a1a;
font-size: 16px;
font-weight: 600;
}
/* Language Selection */
.language-selection {
margin-bottom: 30px;
.language-selectors {
display: grid;
grid-template-columns: 1fr 1fr;
gap: 20px;
margin-top: 15px;
}
.language-selector {
display: flex;
flex-direction: column;
gap: 8px;
label {
font-weight: 500;
color: #333;
}
select {
padding: 10px;
border: 2px solid #e0e0e0;
border-radius: 8px;
font-size: 14px;
background: white;
&:focus {
outline: none;
border-color: #007acc;
}
}
}
}
/* Service Selection */
.service-selection {
margin-bottom: 30px;
.service-group {
display: flex;
flex-direction: column;
gap: 8px;
margin-bottom: 15px;
label {
font-weight: 500;
color: #333;
}
select {
padding: 10px;
border: 2px solid #e0e0e0;
border-radius: 8px;
font-size: 14px;
background: white;
&:focus {
outline: none;
border-color: #007acc;
}
}
}
}
/* API Keys Section */
.api-keys-section {
margin-bottom: 30px;
.api-keys-grid {
display: grid;
grid-template-columns: repeat(auto-fit, minmax(250px, 1fr));
gap: 15px;
margin-top: 15px;
}
.api-key-input {
display: flex;
flex-direction: column;
gap: 5px;
label {
font-weight: 500;
color: #333;
font-size: 12px;
text-transform: uppercase;
}
input {
padding: 8px;
border: 2px solid #e0e0e0;
border-radius: 6px;
font-size: 12px;
&:focus {
outline: none;
border-color: #007acc;
}
&::placeholder {
color: #999;
}
}
}
}
/* Translation Status */
.translation-status {
margin-bottom: 30px;
.status-info {
display: flex;
justify-content: space-between;
align-items: center;
margin-bottom: 15px;
}
.status-indicator {
display: flex;
align-items: center;
gap: 10px;
}
.status-dot {
width: 12px;
height: 12px;
border-radius: 50%;
background: #ccc;
&.recording {
background: #ff4444;
animation: pulse 1s infinite;
}
&.processing {
background: #ff9800;
animation: pulse 1s infinite;
}
&.completed {
background: #4caf50;
}
&.error {
background: #f44336;
}
}
.status-text {
font-weight: 500;
color: #333;
}
.latency-info {
font-size: 14px;
color: #007acc;
font-weight: 600;
}
.error-message {
background: #ffebee;
border: 1px solid #f44336;
border-radius: 6px;
padding: 10px;
margin: 10px 0;
display: flex;
justify-content: space-between;
align-items: center;
span {
color: #c62828;
font-size: 14px;
}
button {
background: #f44336;
color: white;
border: none;
padding: 5px 10px;
border-radius: 4px;
cursor: pointer;
font-size: 12px;
&:hover {
background: #d32f2f;
}
}
}
.transcription-result,
.translation-result {
background: #f8f9fa;
border: 1px solid #e9ecef;
border-radius: 8px;
padding: 15px;
margin: 10px 0;
h4 {
margin: 0 0 10px 0;
color: #495057;
font-size: 14px;
font-weight: 600;
}
p {
margin: 0;
color: #212529;
line-height: 1.5;
}
}
}
/* Performance Metrics */
.performance-metrics {
margin-bottom: 30px;
.metrics-grid {
display: grid;
grid-template-columns: repeat(auto-fit, minmax(150px, 1fr));
gap: 15px;
margin-top: 15px;
}
.metric {
display: flex;
flex-direction: column;
gap: 5px;
text-align: center;
padding: 15px;
background: #f8f9fa;
border-radius: 8px;
label {
font-size: 12px;
color: #6c757d;
font-weight: 500;
}
span {
font-size: 18px;
font-weight: 600;
color: #007acc;
}
}
}
/* Control Buttons */
.control-buttons {
display: flex;
justify-content: center;
gap: 15px;
margin-top: 30px;
button {
padding: 12px 24px;
border: none;
border-radius: 8px;
font-size: 16px;
font-weight: 600;
cursor: pointer;
min-width: 150px;
transition: all 0.2s ease;
&.record-button {
background: #4caf50;
color: white;
&:hover:not(:disabled) {
background: #45a049;
transform: translateY(-1px);
}
&:disabled {
background: #cccccc;
cursor: not-allowed;
}
}
&.stop-button {
background: #f44336;
color: white;
&:hover {
background: #da190b;
transform: translateY(-1px);
}
}
}
}
}
/* Animations */
@keyframes pulse {
0% { opacity: 1; }
50% { opacity: 0.5; }
100% { opacity: 1; }
}
/* Responsive */
@media (max-width: 768px) {
.universal-translator-dialog {
padding: 15px;
.language-selectors {
grid-template-columns: 1fr;
}
.api-keys-grid {
grid-template-columns: 1fr;
}
.metrics-grid {
grid-template-columns: repeat(2, 1fr);
}
}
}

View File

@@ -0,0 +1,70 @@
{
"universalTranslator": {
"title": "Universal Translator",
"tooltip": "Open Universal Translator",
"recording": "Translation Recording Active",
"accessibilityLabel": "Universal Translator",
"languages": {
"from": "Translate from:",
"to": "Translate to:",
"autoDetect": "Auto-detect"
},
"providers": {
"stt": "Speech-to-Text Service:",
"translation": "Translation Service:",
"tts": "Text-to-Speech Service:"
},
"status": {
"idle": "Ready to translate",
"recording": "Recording audio...",
"processing": "Processing translation...",
"transcribing": "Converting speech to text...",
"translating": "Translating text...",
"synthesizing": "Generating speech...",
"playing": "Playing translated audio...",
"completed": "Translation complete",
"error": "Translation error"
},
"buttons": {
"start": "🎤 Start Translation",
"stop": "⏹️ Stop Recording",
"clear": "Clear",
"close": "Close"
},
"metrics": {
"title": "Performance Metrics",
"sttLatency": "STT Latency:",
"translationLatency": "Translation Latency:",
"ttsLatency": "TTS Latency:",
"totalLatency": "Total Latency:",
"totalRequests": "Total Requests:",
"successRate": "Success Rate:"
},
"apiKeys": {
"title": "API Keys",
"openai": "OpenAI API Key",
"groq": "Groq API Key",
"deepgram": "Deepgram API Key",
"assemblyai": "AssemblyAI API Key",
"cartesia": "Cartesia API Key",
"elevenlabs": "ElevenLabs API Key",
"azure": "Azure API Key",
"google": "Google API Key",
"microsoft": "Microsoft API Key"
},
"results": {
"transcription": "Transcription:",
"translation": "Translation:"
},
"errors": {
"notInitialized": "Universal Translator not initialized",
"recordingFailed": "Failed to start recording",
"transcriptionFailed": "Speech recognition failed",
"translationFailed": "Translation failed",
"synthesisFailed": "Speech synthesis failed",
"apiKeyMissing": "API key required for selected service",
"microphonePermission": "Microphone permission required",
"unsupportedBrowser": "Browser not supported"
}
}
}

View File

@@ -19,6 +19,7 @@ import '../web-hid/middleware';
import '../settings/middleware';
import '../talk-while-muted/middleware';
import '../toolbox/middleware';
import '../universal-translator/middleware';
import '../face-landmarks/middleware';
import '../gifs/middleware';
import '../whiteboard/middleware.web';

View File

@@ -15,6 +15,7 @@ import '../screen-share/reducer';
import '../noise-suppression/reducer';
import '../screenshot-capture/reducer';
import '../talk-while-muted/reducer';
import '../universal-translator/reducer';
import '../virtual-background/reducer';
import '../web-hid/reducer';
import '../file-sharing/reducer';

View File

@@ -76,6 +76,7 @@ import { ISubtitlesState } from '../subtitles/reducer';
import { ITalkWhileMutedState } from '../talk-while-muted/reducer';
import { IToolboxState } from '../toolbox/reducer';
import { ITranscribingState } from '../transcribing/reducer';
import { IUniversalTranslatorState } from '../universal-translator/reducer';
import { IVideoLayoutState } from '../video-layout/reducer';
import { IVideoQualityPersistedState, IVideoQualityState } from '../video-quality/reducer';
import { IVideoSipGW } from '../videosipgw/reducer';
@@ -168,6 +169,7 @@ export interface IReduxState {
'features/testing': ITestingState;
'features/toolbox': IToolboxState;
'features/transcribing': ITranscribingState;
'features/universal-translator': IUniversalTranslatorState;
'features/video-layout': IVideoLayoutState;
'features/video-quality': IVideoQualityState;
'features/video-quality-persistent-storage': IVideoQualityPersistedState;

View File

@@ -88,6 +88,7 @@ import { default as IconRemoteControlStop } from './stop-remote-control.svg';
import { default as IconStop } from './stop.svg';
import { default as IconSubtitles } from './subtitles.svg';
import { default as IconTileView } from './tile-view.svg';
import { default as IconTranslate } from './translate.svg';
import { default as IconTrash } from './trash.svg';
import { default as IconUserDeleted } from './user-deleted.svg';
import { default as IconUser } from './user.svg';
@@ -218,5 +219,6 @@ export const DEFAULT_ICON: Record<string, any> = {
IconWifi1Bar,
IconWifi2Bars,
IconWifi3Bars,
IconYahoo
IconYahoo,
IconTranslate
};

View File

@@ -110,7 +110,8 @@ const {
IconWifi1Bar,
IconWifi2Bars,
IconWifi3Bars,
IconYahoo
IconYahoo,
IconTranslate
} = Object.keys(DEFAULT_ICON).reduce((exportedIcons: Record<string, any>, key) => {
return {
...exportedIcons,
@@ -229,5 +230,6 @@ export {
IconWifi1Bar,
IconWifi2Bars,
IconWifi3Bars,
IconYahoo
IconYahoo,
IconTranslate
};

View File

@@ -0,0 +1,5 @@
<svg width="20" height="20" viewBox="0 0 20 20" fill="none" xmlns="http://www.w3.org/2000/svg">
<path d="M7.5 10H12.5C13.3284 10 14 10.6716 14 11.5C14 12.3284 13.3284 13 12.5 13H7.5C6.67157 13 6 12.3284 6 11.5C6 10.6716 6.67157 10 7.5 10Z" fill="currentColor"/>
<path d="M4 4C2.89543 4 2 4.89543 2 6V14C2 15.1046 2.89543 16 4 16H7.1L9.5 18.5L11.9 16H16C17.1046 16 18 15.1046 18 14V6C18 4.89543 17.1046 4 16 4H4ZM4 6H16V14H11.5L9.5 16.2L7.5 14H4V6Z" fill="currentColor"/>
<path d="M8 7.5C8 7.22386 8.22386 7 8.5 7H11.5C11.7761 7 12 7.22386 12 7.5C12 7.77614 11.7761 8 11.5 8H8.5C8.22386 8 8 7.77614 8 7.5Z" fill="currentColor"/>
</svg>

After

Width:  |  Height:  |  Size: 640 B

View File

@@ -0,0 +1,264 @@
/**
* Class implementing the effect interface expected by a JitsiLocalTrack.
* The UniversalTranslatorEffect replaces the original audio stream with translated audio
* while maintaining the same interface as other stream effects.
*/
export class UniversalTranslatorEffect {
/**
* Original MediaStream from the JitsiLocalTrack that uses this effect.
*/
_originalStream: MediaStream | null = null;
/**
* MediaStreamTrack obtained from the original MediaStream.
*/
_originalTrack: MediaStreamTrack | null = null;
/**
* Translated audio stream that will replace the original.
*/
_translatedStream: MediaStream | null = null;
/**
* MediaStreamTrack obtained from the translated stream.
*/
_translatedTrack: MediaStreamTrack | null = null;
/**
* Audio context for creating the translated audio stream.
*/
_audioContext: AudioContext | null = null;
/**
* Media stream destination for routing translated audio.
*/
_streamDestination: MediaStreamAudioDestinationNode | null = null;
/**
* Whether the effect is currently active.
*/
_isActive: boolean = false;
/**
* Queue of translated audio buffers to be played.
*/
_audioQueue: AudioBuffer[] = [];
/**
* Whether audio is currently being processed.
*/
_isProcessing: boolean = false;
/**
* Creates UniversalTranslatorEffect.
*/
constructor() {
// Initialize audio context
this._audioContext = new AudioContext({
sampleRate: 48000,
latencyHint: 'interactive'
});
// Create destination for translated audio
this._streamDestination = this._audioContext.createMediaStreamDestination();
this._translatedStream = this._streamDestination.stream;
}
/**
* Checks if the JitsiLocalTrack supports this effect.
*
* @param {JitsiLocalTrack} sourceLocalTrack - Track to which the effect will be applied.
* @returns {boolean} - Returns true if this effect can run on the specified track, false otherwise.
*/
isEnabled(sourceLocalTrack: any): boolean {
// Only works with audio tracks
return sourceLocalTrack.isAudioTrack();
}
/**
* Effect interface called by source JitsiLocalTrack.
* Returns the translated audio stream instead of the original.
*
* @param {MediaStream} audioStream - Original audio stream from microphone.
* @returns {MediaStream} - MediaStream containing translated audio.
*/
startEffect(audioStream: MediaStream): MediaStream {
this._originalStream = audioStream;
this._originalTrack = audioStream.getTracks()[0];
this._isActive = true;
console.log('UniversalTranslatorEffect: Started effect with translated stream');
// Return the translated stream instead of the original
return this._translatedStream!;
}
/**
* Stop the translator effect.
*
* @returns {void}
*/
stopEffect(): void {
this._isActive = false;
this._audioQueue = [];
this._isProcessing = false;
console.log('UniversalTranslatorEffect: Stopped effect');
}
/**
* Change the muted state of the effect.
*
* @param {boolean} muted - Should effect be muted or not.
* @returns {void}
*/
setMuted(muted: boolean): void {
if (this._translatedTrack) {
this._translatedTrack.enabled = !muted;
}
}
/**
* Check whether or not this effect is muted.
*
* @returns {boolean}
*/
isMuted(): boolean {
return this._translatedTrack ? !this._translatedTrack.enabled : false;
}
/**
* Add translated audio to be played through the effect.
*
* @param {AudioBuffer} audioBuffer - Translated audio buffer to play.
* @returns {Promise<void>}
*/
async playTranslatedAudio(audioBuffer: AudioBuffer): Promise<void> {
if (!this._isActive || !this._audioContext || !this._streamDestination) {
console.warn('UniversalTranslatorEffect: Effect not active, cannot play audio');
return;
}
// Add to queue and process
this._audioQueue.push(audioBuffer);
if (!this._isProcessing) {
this._processAudioQueue();
}
}
/**
* Process queued translated audio buffers.
*
* @returns {Promise<void>}
*/
private async _processAudioQueue(): Promise<void> {
if (this._isProcessing || !this._audioContext || !this._streamDestination) {
return;
}
this._isProcessing = true;
while (this._audioQueue.length > 0 && this._isActive) {
const audioBuffer = this._audioQueue.shift()!;
await this._playAudioBuffer(audioBuffer);
}
this._isProcessing = false;
}
/**
* Play a single audio buffer through the translated stream.
*
* @param {AudioBuffer} audioBuffer - Audio buffer to play.
* @returns {Promise<void>}
*/
private async _playAudioBuffer(audioBuffer: AudioBuffer): Promise<void> {
if (!this._audioContext || !this._streamDestination) {
return;
}
try {
const source = this._audioContext.createBufferSource();
source.buffer = audioBuffer;
source.connect(this._streamDestination);
source.start();
// Wait for the audio to finish playing
return new Promise(resolve => {
source.onended = () => resolve();
// Fallback timeout
setTimeout(resolve, audioBuffer.duration * 1000 + 100);
});
} catch (error) {
console.error('UniversalTranslatorEffect: Error playing audio buffer:', error);
}
}
/**
* Get the original audio stream for translation processing.
*
* @returns {MediaStream | null} - Original microphone stream.
*/
getOriginalStream(): MediaStream | null {
return this._originalStream;
}
/**
* Get the translated audio stream.
*
* @returns {MediaStream | null} - Stream containing translated audio.
*/
getTranslatedStream(): MediaStream | null {
return this._translatedStream;
}
/**
* Check if the effect is currently active.
*
* @returns {boolean} - Whether the effect is active.
*/
isActive(): boolean {
return this._isActive;
}
/**
* Create an audio buffer from a blob.
*
* @param {Blob} audioBlob - Audio blob to convert.
* @returns {Promise<AudioBuffer>} - Converted audio buffer.
*/
async createAudioBufferFromBlob(audioBlob: Blob): Promise<AudioBuffer> {
if (!this._audioContext) {
throw new Error('Audio context not initialized');
}
const arrayBuffer = await audioBlob.arrayBuffer();
return await this._audioContext.decodeAudioData(arrayBuffer);
}
/**
* Cleanup resources when effect is destroyed.
*
* @returns {void}
*/
dispose(): void {
this.stopEffect();
if (this._audioContext) {
this._audioContext.close();
this._audioContext = null;
}
this._streamDestination = null;
this._translatedStream = null;
this._originalStream = null;
this._originalTrack = null;
this._translatedTrack = null;
}
}

View File

@@ -0,0 +1 @@
export { UniversalTranslatorEffect } from './UniversalTranslatorEffect';

View File

@@ -121,7 +121,8 @@ export const MAIN_TOOLBAR_BUTTONS_PRIORITY = [
'embedmeeting',
'feedback',
'download',
'help'
'help',
'universal-translator'
];
export const TOOLBAR_TIMEOUT = 4000;
@@ -176,6 +177,7 @@ export const TOOLBAR_BUTTONS: ToolbarButton[] = [
'stats',
'tileview',
'toggle-camera',
'universal-translator',
'videoquality',
'whiteboard'
];

View File

@@ -50,6 +50,7 @@ import SpeakerStats from '../speaker-stats/components/web/SpeakerStats';
import { isSpeakerStatsDisabled } from '../speaker-stats/functions';
import { useSpeakerStatsButton } from '../speaker-stats/hooks.web';
import { useClosedCaptionButton } from '../subtitles/hooks.web';
import { UniversalTranslatorButton } from '../universal-translator/components';
import { toggleTileView } from '../video-layout/actions.any';
import { shouldDisplayTileView } from '../video-layout/functions.web';
import { useTileViewButton } from '../video-layout/hooks';
@@ -166,12 +167,19 @@ const download = {
group: 4
};
const universalTranslator = {
key: 'universal-translator',
Content: UniversalTranslatorButton,
group: 3
};
const help = {
key: 'help',
Content: HelpButton,
group: 4
};
/**
* A hook that returns the toggle camera button if it is enabled and undefined otherwise.
*
@@ -324,7 +332,8 @@ export function useToolboxButtons(
embedmeeting: embed,
feedback,
download: _download,
help: _help
help: _help,
'universal-translator': universalTranslator
};
const buttonKeys = Object.keys(buttons) as ToolbarButton[];

View File

@@ -51,6 +51,7 @@ export type ToolbarButton = 'camera' |
'stats' |
'tileview' |
'toggle-camera' |
'universal-translator' |
'videoquality' |
'whiteboard' |
'__end';

View File

@@ -0,0 +1,121 @@
# Universal Translator Feature for Jitsi Meet
This feature provides real-time speech translation capabilities directly within Jitsi Meet meetings.
## Overview
The Universal Translator allows participants to:
- Translate speech from one language to another in real-time
- Compare multiple STT/TTS service providers for optimal latency
- Use BlackHole virtual audio device for seamless integration
- Monitor performance metrics for different service combinations
## Architecture
### Core Components
- **STT Providers**: Whisper (local), Groq, Deepgram, AssemblyAI
- **Translation Providers**: OpenAI GPT-4, Google Translate, Microsoft Translator
- **TTS Providers**: Cartesia Sonic, ElevenLabs, Deepgram Aura, Web Speech API
- **Audio Routing**: BlackHole virtual audio device integration
### Performance Targets
- Total end-to-end latency: <650ms
- STT processing: <300ms
- Translation: <200ms
- TTS generation: <100ms
- Audio routing: <50ms
## Usage
### Accessing the Feature
1. Click the translate button (🌐) in the Jitsi Meet toolbar
2. Configure your preferred service providers
3. Add API keys for external services
4. Select source and target languages
5. Start recording to begin real-time translation
### Service Configuration
The feature supports multiple providers for comparison:
**STT Services:**
- Whisper (Local) - Free, privacy-focused, ~200ms latency
- Groq Whisper - Ultra-fast, ~100ms latency
- Deepgram Nova-2 - Real-time streaming, ~100ms latency
- AssemblyAI Universal-2 - Highest accuracy, ~150ms latency
**TTS Services:**
- Cartesia Sonic - Ultra-low latency, ~40ms
- ElevenLabs - Highest quality, ~300ms latency
- Deepgram Aura - Streaming capable, ~400ms latency
- Web Speech API - Browser native, ~50ms latency
### API Key Requirements
External services require API keys:
- OpenAI (for GPT-4 translation)
- Groq (for ultra-fast STT)
- Deepgram (for STT and TTS)
- AssemblyAI (for high-accuracy STT)
- Cartesia (for low-latency TTS)
- ElevenLabs (for high-quality TTS)
- Azure/Google/Microsoft (for enterprise services)
## BlackHole Integration
For optimal audio routing on macOS:
1. Install BlackHole: https://existential.audio/blackhole/
2. Set BlackHole as your audio input device
3. The feature will automatically detect and use BlackHole for routing
4. Translated audio will be output through BlackHole for real-time playback
## Development
### Adding New Service Providers
1. Create provider class in appropriate service directory
2. Implement required interface methods
3. Add to provider factory
4. Update UI configuration options
### Testing Latency
The feature includes built-in latency monitoring:
- Individual service latencies
- End-to-end pipeline performance
- Success rate tracking
- Request count statistics
## Configuration
The feature can be configured via Jitsi Meet config:
```javascript
// config.js
const config = {
// ... other config
universalTranslator: {
enabled: true,
defaultSTTProvider: 'whisper',
defaultTTSProvider: 'cartesia',
defaultTranslationProvider: 'openai',
defaultSourceLanguage: 'en',
defaultTargetLanguage: 'es'
}
};
```
## Limitations
- Requires modern browser with MediaRecorder API
- External services require internet connection
- API costs apply for cloud-based providers
- BlackHole is macOS-only (fallback to default audio on other platforms)
## Future Enhancements
- Support for Windows virtual audio devices
- Additional language pairs
- Speaker identification
- Conversation history
- Export/sharing capabilities
- Integration with meeting recordings

View File

@@ -0,0 +1,98 @@
/**
* Action types for universal translator feature.
*/
/**
* Action to initialize the universal translator.
*/
export const INIT_UNIVERSAL_TRANSLATOR = 'INIT_UNIVERSAL_TRANSLATOR';
/**
* Action to set the STT provider.
*/
export const SET_STT_PROVIDER = 'SET_STT_PROVIDER';
/**
* Action to set the TTS provider.
*/
export const SET_TTS_PROVIDER = 'SET_TTS_PROVIDER';
/**
* Action to set the translation provider.
*/
export const SET_TRANSLATION_PROVIDER = 'SET_TRANSLATION_PROVIDER';
/**
* Action to set the source language.
*/
export const SET_SOURCE_LANGUAGE = 'SET_SOURCE_LANGUAGE';
/**
* Action to set the target language.
*/
export const SET_TARGET_LANGUAGE = 'SET_TARGET_LANGUAGE';
/**
* Action to start translation recording.
*/
export const START_TRANSLATION_RECORDING = 'START_TRANSLATION_RECORDING';
/**
* Action to stop translation recording.
*/
export const STOP_TRANSLATION_RECORDING = 'STOP_TRANSLATION_RECORDING';
/**
* Action to update translation status.
*/
export const UPDATE_TRANSLATION_STATUS = 'UPDATE_TRANSLATION_STATUS';
/**
* Action to set transcription result.
*/
export const SET_TRANSCRIPTION_RESULT = 'SET_TRANSCRIPTION_RESULT';
/**
* Action to set translation result.
*/
export const SET_TRANSLATION_RESULT = 'SET_TRANSLATION_RESULT';
/**
* Action to update latency metrics.
*/
export const UPDATE_LATENCY_METRICS = 'UPDATE_LATENCY_METRICS';
/**
* Action to set translation error.
*/
export const SET_TRANSLATION_ERROR = 'SET_TRANSLATION_ERROR';
/**
* Action to clear translation error.
*/
export const CLEAR_TRANSLATION_ERROR = 'CLEAR_TRANSLATION_ERROR';
/**
* Action to toggle universal translator UI.
*/
export const TOGGLE_UNIVERSAL_TRANSLATOR = 'TOGGLE_UNIVERSAL_TRANSLATOR';
/**
* Action to set API keys for services.
*/
export const SET_API_KEYS = 'SET_API_KEYS';
/**
* Action to update processing step.
*/
export const UPDATE_PROCESSING_STEP = 'UPDATE_PROCESSING_STEP';
/**
* Action type to enable universal translator effect.
*/
export const ENABLE_UNIVERSAL_TRANSLATOR_EFFECT = 'ENABLE_UNIVERSAL_TRANSLATOR_EFFECT';
/**
* Action type to disable universal translator effect.
*/
export const DISABLE_UNIVERSAL_TRANSLATOR_EFFECT = 'DISABLE_UNIVERSAL_TRANSLATOR_EFFECT';

View File

@@ -0,0 +1,299 @@
import { IStore } from '../app/types';
import { hideDialog, openDialog } from '../base/dialog/actions';
import {
CLEAR_TRANSLATION_ERROR,
DISABLE_UNIVERSAL_TRANSLATOR_EFFECT,
ENABLE_UNIVERSAL_TRANSLATOR_EFFECT,
INIT_UNIVERSAL_TRANSLATOR,
SET_API_KEYS,
SET_SOURCE_LANGUAGE,
SET_STT_PROVIDER,
SET_TARGET_LANGUAGE,
SET_TRANSCRIPTION_RESULT,
SET_TRANSLATION_ERROR,
SET_TRANSLATION_PROVIDER,
SET_TRANSLATION_RESULT,
SET_TTS_PROVIDER,
START_TRANSLATION_RECORDING,
STOP_TRANSLATION_RECORDING,
TOGGLE_UNIVERSAL_TRANSLATOR,
UPDATE_LATENCY_METRICS,
UPDATE_PROCESSING_STEP,
UPDATE_TRANSLATION_STATUS
} from './actionTypes';
import { UniversalTranslatorDialog } from './components';
/**
* Initializes the universal translator with service providers.
*
* @param {Object} config - Configuration for service providers.
* @returns {Object} Redux action.
*/
export function initUniversalTranslator(config: any) {
return {
type: INIT_UNIVERSAL_TRANSLATOR,
config
};
}
/**
* Sets the STT (Speech-to-Text) provider.
*
* @param {string} provider - The STT provider name.
* @returns {Object} Redux action.
*/
export function setSTTProvider(provider: string) {
return {
type: SET_STT_PROVIDER,
provider
};
}
/**
* Sets the TTS (Text-to-Speech) provider.
*
* @param {string} provider - The TTS provider name.
* @returns {Object} Redux action.
*/
export function setTTSProvider(provider: string) {
return {
type: SET_TTS_PROVIDER,
provider
};
}
/**
* Sets the translation provider.
*
* @param {string} provider - The translation provider name.
* @returns {Object} Redux action.
*/
export function setTranslationProvider(provider: string) {
return {
type: SET_TRANSLATION_PROVIDER,
provider
};
}
/**
* Sets the source language for translation.
*
* @param {string} language - The source language code.
* @returns {Object} Redux action.
*/
export function setSourceLanguage(language: string) {
return {
type: SET_SOURCE_LANGUAGE,
language
};
}
/**
* Sets the target language for translation.
*
* @param {string} language - The target language code.
* @returns {Object} Redux action.
*/
export function setTargetLanguage(language: string) {
return {
type: SET_TARGET_LANGUAGE,
language
};
}
/**
* Starts translation recording.
*
* @returns {Object} Redux action.
*/
export function startTranslationRecording() {
return {
type: START_TRANSLATION_RECORDING
};
}
/**
* Stops translation recording.
*
* @returns {Object} Redux action.
*/
export function stopTranslationRecording() {
return {
type: STOP_TRANSLATION_RECORDING
};
}
/**
* Updates the translation status.
*
* @param {string} status - The current status.
* @returns {Object} Redux action.
*/
export function updateTranslationStatus(status: string) {
return {
type: UPDATE_TRANSLATION_STATUS,
status
};
}
/**
* Updates the current processing step.
*
* @param {string} step - The current processing step.
* @returns {Object} Redux action.
*/
export function updateProcessingStep(step: string) {
return {
type: UPDATE_PROCESSING_STEP,
step
};
}
/**
* Sets the transcription result.
*
* @param {Object} result - The transcription result.
* @returns {Object} Redux action.
*/
export function setTranscriptionResult(result: any) {
return {
type: SET_TRANSCRIPTION_RESULT,
result
};
}
/**
* Sets the translation result.
*
* @param {Object} result - The translation result.
* @returns {Object} Redux action.
*/
export function setTranslationResult(result: any) {
return {
type: SET_TRANSLATION_RESULT,
result
};
}
/**
* Updates latency metrics for different services.
*
* @param {Object} metrics - The latency metrics.
* @returns {Object} Redux action.
*/
export function updateLatencyMetrics(metrics: any) {
return {
type: UPDATE_LATENCY_METRICS,
metrics
};
}
/**
* Sets a translation error.
*
* @param {string} error - The error message.
* @returns {Object} Redux action.
*/
export function setTranslationError(error: string) {
return {
type: SET_TRANSLATION_ERROR,
error
};
}
/**
* Clears the translation error.
*
* @returns {Object} Redux action.
*/
export function clearTranslationError() {
return {
type: CLEAR_TRANSLATION_ERROR
};
}
/**
* Sets API keys for various services.
*
* @param {Object} keys - Object containing API keys for different services.
* @returns {Object} Redux action.
*/
export function setApiKeys(keys: any) {
return {
type: SET_API_KEYS,
keys
};
}
/**
* Toggles the universal translator dialog.
*
* @returns {Function} Redux thunk action.
*/
export function toggleUniversalTranslator() {
return (dispatch: IStore['dispatch'], getState: IStore['getState']) => {
const state = getState();
const universalTranslator = state['features/universal-translator'];
if (universalTranslator?.showDialog) {
dispatch(hideDialog(UniversalTranslatorDialog));
} else {
dispatch(openDialog(UniversalTranslatorDialog));
}
dispatch({
type: TOGGLE_UNIVERSAL_TRANSLATOR
});
};
}
/**
* Shows the universal translator dialog.
*
* @returns {Function} Redux thunk action.
*/
export function showUniversalTranslatorDialog() {
return (dispatch: IStore['dispatch']) => {
dispatch(openDialog(UniversalTranslatorDialog));
dispatch({
type: TOGGLE_UNIVERSAL_TRANSLATOR
});
};
}
/**
* Hides the universal translator dialog.
*
* @returns {Function} Redux thunk action.
*/
export function hideUniversalTranslatorDialog() {
return (dispatch: IStore['dispatch']) => {
dispatch(hideDialog(UniversalTranslatorDialog));
dispatch({
type: TOGGLE_UNIVERSAL_TRANSLATOR
});
};
}
/**
* Enables the universal translator effect on the audio track.
*
* @returns {Object} Redux action.
*/
export function enableUniversalTranslatorEffect() {
return {
type: ENABLE_UNIVERSAL_TRANSLATOR_EFFECT
};
}
/**
* Disables the universal translator effect on the audio track.
*
* @returns {Object} Redux action.
*/
export function disableUniversalTranslatorEffect() {
return {
type: DISABLE_UNIVERSAL_TRANSLATOR_EFFECT
};
}

View File

@@ -0,0 +1,217 @@
/**
* Audio utility functions for processing and format conversion
* Adapted from standalone-meeting-assist.
*/
/**
* Convert WebM audio blob to Float32Array for Whisper processing.
*
* @param {Blob} webmBlob - The WebM audio blob to convert.
* @returns {Promise<Float32Array>} Promise resolving to converted audio data.
*/
export function convertWebMToFloat32(webmBlob) {
return new Promise((resolve, reject) => {
const fileReader = new FileReader();
fileReader.onloadend = async () => {
try {
const audioContext = new AudioContext({ sampleRate: 16000 });
const arrayBuffer = fileReader.result;
const decoded = await audioContext.decodeAudioData(arrayBuffer);
let audio;
if (decoded.numberOfChannels === 2) {
// Convert stereo to mono
const SCALING_FACTOR = Math.sqrt(2);
const left = decoded.getChannelData(0);
const right = decoded.getChannelData(1);
audio = new Float32Array(left.length);
for (let i = 0; i < decoded.length; ++i) {
audio[i] = SCALING_FACTOR * (left[i] + right[i]) / 2;
}
} else {
// Use first channel for mono
audio = decoded.getChannelData(0);
}
resolve(audio);
} catch (error) {
reject(error);
}
};
fileReader.onerror = () => reject(new Error('FileReader error'));
fileReader.readAsArrayBuffer(webmBlob);
});
}
/**
* Create a MediaRecorder for audio capture.
*
* @param {MediaStream} stream - The audio stream to record.
* @param {Object} options - Recording options.
* @returns {MediaRecorder} The configured MediaRecorder instance.
*/
export function createAudioRecorder(stream, options = {}) {
const {
mimeType = 'audio/webm;codecs=opus',
audioBitsPerSecond = 128000
} = options;
try {
return new MediaRecorder(stream, {
mimeType,
audioBitsPerSecond
});
} catch (error) {
console.warn('Falling back to default MediaRecorder options:', error);
return new MediaRecorder(stream);
}
}
/**
* Get user media with optimal settings for speech recognition.
*
* @param {string|null} deviceId - Optional device ID to use.
* @returns {Promise<MediaStream>} Promise resolving to the media stream.
*/
export async function getUserMediaForSpeech(deviceId = null) {
const constraints = {
audio: {
deviceId: deviceId ? { exact: deviceId } : undefined,
sampleRate: 16000,
channelCount: 1,
echoCancellation: true,
noiseSuppression: true,
autoGainControl: true
}
};
try {
return await navigator.mediaDevices.getUserMedia(constraints);
} catch (error) {
console.warn('Failed to get media with optimal settings, falling back:', error);
// Fallback to basic audio capture
return await navigator.mediaDevices.getUserMedia({ audio: true });
}
}
/**
* Get available audio input devices.
*
* @returns {Promise<Array>} Promise resolving to array of audio input devices.
*/
export async function getAudioInputDevices() {
try {
const devices = await navigator.mediaDevices.enumerateDevices();
return devices.filter(device => device.kind === 'audioinput');
} catch (error) {
console.error('Error enumerating audio devices:', error);
return [];
}
}
/**
* Get available audio output devices.
*
* @returns {Promise<Array>} Promise resolving to array of audio output devices.
*/
export async function getAudioOutputDevices() {
try {
const devices = await navigator.mediaDevices.enumerateDevices();
return devices.filter(device => device.kind === 'audiooutput');
} catch (error) {
console.error('Error enumerating audio output devices:', error);
return [];
}
}
/**
* Create an audio context for processing.
*
* @param {number} sampleRate - The sample rate for the audio context.
* @returns {AudioContext} The created audio context.
*/
export function createAudioContext(sampleRate = 16000) {
return new AudioContext({ sampleRate });
}
/**
* Convert Float32Array to audio blob.
*
* @param {Float32Array} float32Array - The audio data to convert.
* @param {number} sampleRate - The sample rate of the audio data.
* @returns {Blob} The resulting audio blob.
*/
export function float32ArrayToBlob(float32Array, sampleRate = 16000) {
// Create a 16-bit PCM WAV file
const length = float32Array.length;
const buffer = new ArrayBuffer(44 + (length * 2));
const view = new DataView(buffer);
// WAV header
const writeString = (offset, string) => {
for (let i = 0; i < string.length; i++) {
view.setUint8(offset + i, string.charCodeAt(i));
}
};
writeString(0, 'RIFF');
view.setUint32(4, 36 + (length * 2), true);
writeString(8, 'WAVE');
writeString(12, 'fmt ');
view.setUint32(16, 16, true);
view.setUint16(20, 1, true);
view.setUint16(22, 1, true);
view.setUint32(24, sampleRate, true);
view.setUint32(28, sampleRate * 2, true);
view.setUint16(32, 2, true);
view.setUint16(34, 16, true);
writeString(36, 'data');
view.setUint32(40, length * 2, true);
// Convert float32 to int16
let offset = 44;
for (let i = 0; i < length; i++) {
const sample = Math.max(-1, Math.min(1, float32Array[i]));
view.setInt16(offset, sample < 0 ? sample * 0x8000 : sample * 0x7FFF, true);
offset += 2;
}
return new Blob([ buffer ], { type: 'audio/wav' });
}
/**
* Play audio from Float32Array.
*
* @param {Float32Array} float32Array - The audio data to play.
* @param {number} sampleRate - The sample rate of the audio data.
* @returns {Promise<void>} Promise that resolves when audio finishes playing.
*/
export function playAudioFromFloat32(float32Array, sampleRate = 16000) {
const audioContext = createAudioContext(sampleRate);
const audioBuffer = audioContext.createBuffer(1, float32Array.length, sampleRate);
audioBuffer.copyToChannel(float32Array, 0);
const source = audioContext.createBufferSource();
source.buffer = audioBuffer;
source.connect(audioContext.destination);
source.start();
return new Promise(resolve => {
source.onended = resolve;
});
}

View File

@@ -0,0 +1,365 @@
/**
* BlackHole virtual audio device integration
* Handles audio routing for macOS BlackHole virtual audio driver
* Supports routing translated audio back into Jitsi Meet stream.
*/
export class BlackHoleRouter {
constructor() {
this.inputStream = null;
this.outputContext = null;
this.outputDestination = null;
this.isRouting = false;
this.audioQueue = [];
this.processingQueue = false;
this.blackHoleInputDevice = null;
this.blackHoleOutputDevice = null;
this.mixerNode = null;
this.gainNode = null;
this.isInitialized = false;
}
/**
* Initialize BlackHole audio routing with enhanced device detection.
*/
async initialize() {
try {
console.log('Initializing BlackHole audio routing...');
// Get all available devices
const devices = await navigator.mediaDevices.enumerateDevices();
// Find BlackHole input and output devices
this.blackHoleInputDevice = devices.find(device =>
device.kind === 'audioinput'
&& (device.label.toLowerCase().includes('blackhole')
|| device.label.toLowerCase().includes('aggregate device'))
);
this.blackHoleOutputDevice = devices.find(device =>
device.kind === 'audiooutput'
&& device.label.toLowerCase().includes('blackhole')
);
if (!this.blackHoleInputDevice) {
console.warn('BlackHole input device not found. Available devices:',
devices.filter(d => d.kind === 'audioinput').map(d => d.label));
return await this._initializeDefaultAudio();
}
console.log('Found BlackHole devices:', {
input: this.blackHoleInputDevice.label,
output: this.blackHoleOutputDevice?.label || 'Using default output'
});
await this._initializeBlackHoleRouting();
this.isInitialized = true;
return {
inputDevice: this.blackHoleInputDevice,
outputDevice: this.blackHoleOutputDevice,
deviceType: 'blackhole'
};
} catch (error) {
console.error('Error initializing BlackHole:', error);
throw error;
}
}
/**
* Initialize BlackHole routing with proper audio context setup.
*/
async _initializeBlackHoleRouting() {
// Create audio context with appropriate sample rate
this.outputContext = new AudioContext({
sampleRate: 48000, // Higher quality for better translation output
latencyHint: 'interactive' // Low latency for real-time translation
});
// Create gain node for volume control
this.gainNode = this.outputContext.createGain();
this.gainNode.gain.value = 0.8; // Slightly lower to prevent clipping
// Create destination for BlackHole output
this.outputDestination = this.outputContext.createMediaStreamDestination();
this.gainNode.connect(this.outputDestination);
console.log('BlackHole routing initialized successfully');
}
/**
* Initialize with default audio device as fallback.
*/
async _initializeDefaultAudio() {
const constraints = {
audio: {
sampleRate: 16000,
channelCount: 1,
echoCancellation: true,
noiseSuppression: true,
autoGainControl: true
}
};
this.inputStream = await navigator.mediaDevices.getUserMedia(constraints);
this.outputContext = new AudioContext({ sampleRate: 16000 });
console.log('Default audio initialized as fallback');
return {
inputStream: this.inputStream,
outputContext: this.outputContext,
deviceType: 'default'
};
}
/**
* Start audio routing.
*/
async startRouting() {
if (this.isRouting) {
console.warn('Audio routing already started');
return;
}
if (!this.inputStream || !this.outputContext) {
throw new Error('BlackHole not initialized');
}
this.isRouting = true;
console.log('Started audio routing');
}
/**
* Stop audio routing.
*/
async stopRouting() {
if (!this.isRouting) {
return;
}
this.isRouting = false;
if (this.inputStream) {
this.inputStream.getTracks().forEach(track => track.stop());
}
if (this.outputContext) {
await this.outputContext.close();
}
console.log('Stopped audio routing');
}
/**
* Route translated audio to output.
*/
async routeTranslatedAudio(audioBuffer) {
if (!this.outputContext || !this.isRouting) {
console.warn('Audio routing not active');
return;
}
try {
// Add to queue for processing
this.audioQueue.push(audioBuffer);
if (!this.processingQueue) {
this._processAudioQueue();
}
} catch (error) {
console.error('Error routing translated audio:', error);
}
}
/**
* Process queued audio buffers.
*/
async _processAudioQueue() {
if (this.processingQueue) {
return;
}
this.processingQueue = true;
while (this.audioQueue.length > 0 && this.isRouting) {
const audioBuffer = this.audioQueue.shift();
await this._playAudioBuffer(audioBuffer);
}
this.processingQueue = false;
}
/**
* Play audio buffer through BlackHole.
*/
async _playAudioBuffer(audioBuffer) {
if (!this.outputContext) {
return;
}
try {
const source = this.outputContext.createBufferSource();
source.buffer = audioBuffer;
source.connect(this.outputContext.destination);
source.start();
// Return promise that resolves when audio finishes playing
return new Promise(resolve => {
source.onended = resolve;
// Also resolve after buffer duration as fallback
setTimeout(resolve, audioBuffer.duration * 1000 + 100);
});
} catch (error) {
console.error('Error playing audio buffer:', error);
}
}
/**
* Create audio buffer from Float32Array.
*/
createAudioBuffer(float32Array, sampleRate = 16000) {
if (!this.outputContext) {
throw new Error('Output context not initialized');
}
const audioBuffer = this.outputContext.createBuffer(
1,
float32Array.length,
sampleRate
);
audioBuffer.copyToChannel(float32Array, 0);
return audioBuffer;
}
/**
* Get input stream for recording.
*/
getInputStream() {
return this.inputStream;
}
/**
* Get output context for audio processing.
*/
getOutputContext() {
return this.outputContext;
}
/**
* Check if routing is active.
*/
isActive() {
return this.isRouting;
}
/**
* Get available BlackHole devices.
*/
static async getBlackHoleDevices() {
try {
const devices = await navigator.mediaDevices.enumerateDevices();
return devices.filter(device =>
device.label.toLowerCase().includes('blackhole')
);
} catch (error) {
console.error('Error getting BlackHole devices:', error);
return [];
}
}
/**
* Check if BlackHole is available.
*/
static async isBlackHoleAvailable() {
const devices = await BlackHoleRouter.getBlackHoleDevices();
return devices.length > 0;
}
/**
* Create a virtual microphone stream that Jitsi Meet can use
* This stream will contain the translated audio.
*/
createVirtualMicrophone() {
if (!this.outputDestination) {
throw new Error('BlackHole not properly initialized');
}
// Return the MediaStream that contains translated audio
// This can be used by Jitsi Meet as the microphone input
return this.outputDestination.stream;
}
/**
* Set up audio routing to feed translated audio into Jitsi Meet
* Call this method after translation is complete.
*/
async routeToJitsiMeet(audioBuffer) {
if (!this.isInitialized) {
console.warn('BlackHole not initialized, cannot route audio');
return;
}
try {
// Create buffer source
const source = this.outputContext.createBufferSource();
source.buffer = audioBuffer;
// Connect through gain control to destination
source.connect(this.gainNode);
// Start playback
source.start();
console.log('Routed translated audio to BlackHole for Jitsi Meet');
return new Promise(resolve => {
source.onended = resolve;
setTimeout(resolve, audioBuffer.duration * 1000 + 100);
});
} catch (error) {
console.error('Error routing audio to Jitsi Meet:', error);
throw error;
}
}
/**
* Get the MediaStream for Jitsi Meet to use as microphone input.
*/
getJitsiMeetInputStream() {
if (!this.outputDestination) {
throw new Error('BlackHole not initialized');
}
return this.outputDestination.stream;
}
/**
* Set gain for translated audio output.
*/
setOutputGain(value) {
if (this.gainNode) {
this.gainNode.gain.value = Math.max(0, Math.min(1, value));
}
}
/**
* Get current output gain.
*/
getOutputGain() {
return this.gainNode ? this.gainNode.gain.value : 0;
}
}

View File

@@ -0,0 +1,125 @@
import { env, pipeline } from '@xenova/transformers';
/**
* Whisper-based speech-to-text processor
* Adapted from standalone-meeting-assist for universal translation.
*/
export class WhisperProcessor {
constructor() {
this.transcriber = null;
this.isModelLoading = true;
this.modelLoadingPromise = null;
}
/**
* Initialize the Whisper model.
*/
async initializeModel() {
if (this.modelLoadingPromise) {
return this.modelLoadingPromise;
}
this.modelLoadingPromise = this._loadModel();
return this.modelLoadingPromise;
}
async _loadModel() {
try {
console.log('Loading Whisper model...');
env.allowLocalModels = false;
env.useBrowserCache = false;
this.transcriber = await pipeline(
'automatic-speech-recognition',
'Xenova/whisper-base'
);
this.isModelLoading = false;
console.log('Whisper model loaded successfully');
return this.transcriber;
} catch (error) {
console.error('Error loading Whisper model:', error);
this.isModelLoading = false;
throw error;
}
}
/**
* Process audio data and return transcription with language detection.
*/
async processAudio(audioData, options = {}) {
if (!this.transcriber) {
throw new Error('Whisper model not initialized');
}
const {
language = 'auto',
chunkLengthS = 30,
strideLengthS = 5,
returnTimestamps = false
} = options;
try {
const result = await this.transcriber(audioData, {
chunk_length_s: chunkLengthS,
stride_length_s: strideLengthS,
language: language === 'auto' ? undefined : language,
return_timestamps: returnTimestamps
});
return {
text: result.text,
language: this._detectLanguage(result.text),
confidence: result.confidence || 0.95,
timestamps: result.chunks || []
};
} catch (error) {
console.error('Transcription error:', error);
throw error;
}
}
/**
* Simple language detection based on text patterns
* In production, use a proper language detection service.
*/
_detectLanguage(text) {
// Simple heuristic-based language detection
// This should be replaced with a proper language detection service
const languagePatterns = {
'en': /^[a-zA-Z\s.,!?'"()-]+$/,
'es': /[ñáéíóúüÑÁÉÍÓÚÜ]/,
'fr': /[àâäçéèêëîïôöùûüÿÀÂÄÇÉÈÊËÎÏÔÖÙÛÜŸ]/,
'de': /[äöüßÄÖÜ]/,
'pt': /[ãõçÃÕÇ]/,
'it': /[àèéìíîòóù]/
};
for (const [ lang, pattern ] of Object.entries(languagePatterns)) {
if (pattern.test(text)) {
return lang;
}
}
return 'en'; // Default to English
}
/**
* Check if the model is ready for processing.
*/
isReady() {
return !this.isModelLoading && this.transcriber !== null;
}
/**
* Get model loading status.
*/
getLoadingStatus() {
return {
isLoading: this.isModelLoading,
isReady: this.isReady()
};
}
}

View File

@@ -0,0 +1,49 @@
import React, { useCallback } from 'react';
import { WithTranslation, withTranslation } from 'react-i18next';
import { useDispatch, useSelector } from 'react-redux';
import { IReduxState } from '../../app/types';
import { IconTranslate } from '../../base/icons/svg';
import ToolboxItem from '../../base/toolbox/components/ToolboxItem';
import { toggleUniversalTranslator } from '../actions';
import { IUniversalTranslatorState } from '../reducer';
/**
* Universal Translator toolbar button component.
*/
const UniversalTranslatorButton = ({ t, tReady, i18n }: WithTranslation) => {
const dispatch = useDispatch();
const universalTranslator: IUniversalTranslatorState = useSelector(
(state: IReduxState) => state['features/universal-translator']
);
const handleClick = useCallback(() => {
dispatch(toggleUniversalTranslator());
}, [ dispatch ]);
const handleKeyDown = useCallback((e?: React.KeyboardEvent) => {
if (e?.key === 'Enter' || e?.key === ' ') {
handleClick();
}
}, [ handleClick ]);
const isActive = universalTranslator?.showDialog || universalTranslator?.isRecording;
const tooltip = universalTranslator?.isRecording
? 'universalTranslator.recording'
: 'universalTranslator.tooltip';
return (
<ToolboxItem
accessibilityLabel = 'universalTranslator.accessibilityLabel'
i18n = { i18n }
icon = { IconTranslate }
labelProps = {{}}
onClick = { handleClick }
onKeyDown = { handleKeyDown }
tReady = { tReady }
toggled = { isActive }
tooltip = { tooltip } />
);
};
export default withTranslation()(UniversalTranslatorButton);

View File

@@ -0,0 +1,396 @@
import React, { useCallback, useEffect, useState } from 'react';
import { useDispatch, useSelector } from 'react-redux';
import { IReduxState } from '../../app/types';
import { hideDialog } from '../../base/dialog/actions';
import Dialog from '../../base/ui/components/web/Dialog';
import {
clearTranslationError,
disableUniversalTranslatorEffect,
enableUniversalTranslatorEffect,
initUniversalTranslator,
setApiKeys,
setSTTProvider,
setSourceLanguage,
setTTSProvider,
setTargetLanguage,
setTranslationError,
setTranslationProvider,
startTranslationRecording,
stopTranslationRecording
} from '../actions';
import { IUniversalTranslatorState } from '../reducer';
// Language options
const languages = [
{ code: 'en', name: 'English', flag: '🇺🇸' },
{ code: 'es', name: 'Spanish', flag: '🇪🇸' },
{ code: 'fr', name: 'French', flag: '🇫🇷' },
{ code: 'de', name: 'German', flag: '🇩🇪' },
{ code: 'it', name: 'Italian', flag: '🇮🇹' },
{ code: 'pt', name: 'Portuguese', flag: '🇵🇹' },
{ code: 'ro', name: 'Romanian', flag: '🇷🇴' },
{ code: 'ja', name: 'Japanese', flag: '🇯🇵' },
{ code: 'ko', name: 'Korean', flag: '🇰🇷' },
{ code: 'zh', name: 'Chinese', flag: '🇨🇳' }
];
// Service options
const sttOptions = [
{ id: 'whisper', name: 'Whisper (Local)', latency: '~200ms' },
{ id: 'groq', name: 'Groq Whisper', latency: '~100ms' },
{ id: 'deepgram', name: 'Deepgram Nova-2', latency: '~100ms' },
{ id: 'assemblyai', name: 'AssemblyAI Universal-2', latency: '~150ms' }
];
const ttsOptions = [
{ id: 'cartesia', name: 'Cartesia Sonic', latency: '~40ms' },
{ id: 'elevenlabs', name: 'ElevenLabs', latency: '~300ms' },
{ id: 'deepgram', name: 'Deepgram Aura', latency: '~400ms' },
{ id: 'webspeech', name: 'Web Speech API', latency: '~50ms' }
];
const translationOptions = [
{ id: 'openai', name: 'OpenAI GPT-4', latency: '~200ms' },
{ id: 'google', name: 'Google Translate', latency: '~150ms' },
{ id: 'microsoft', name: 'Microsoft Translator', latency: '~180ms' }
];
/**
* Universal Translator Dialog component.
*/
export const UniversalTranslatorDialog = () => {
const dispatch = useDispatch();
const universalTranslator: IUniversalTranslatorState = useSelector(
(state: IReduxState) => state['features/universal-translator']
);
const [ localApiKeys, setLocalApiKeys ] = useState(universalTranslator?.apiKeys || {});
const [ saveIndicator, setSaveIndicator ] = useState<string | null>(null);
useEffect(() => {
if (universalTranslator?.apiKeys) {
setLocalApiKeys(universalTranslator.apiKeys);
}
}, [ universalTranslator?.apiKeys ]);
// Initialize the universal translator service when dialog opens
useEffect(() => {
if (!universalTranslator?.isInitialized) {
console.log('Initializing Universal Translator service...');
dispatch(initUniversalTranslator({
sttProvider: universalTranslator?.sttProvider || 'deepgram',
ttsProvider: universalTranslator?.ttsProvider || 'cartesia',
translationProvider: universalTranslator?.translationProvider || 'openai',
apiKeys: universalTranslator?.apiKeys || {}
}));
}
}, [ dispatch, universalTranslator?.isInitialized ]);
const handleClose = useCallback(() => {
dispatch(hideDialog());
}, [ dispatch ]);
const handleSTTProviderChange = useCallback((provider: string) => {
dispatch(setSTTProvider(provider));
}, [ dispatch ]);
const handleTTSProviderChange = useCallback((provider: string) => {
dispatch(setTTSProvider(provider));
}, [ dispatch ]);
const handleTranslationProviderChange = useCallback((provider: string) => {
dispatch(setTranslationProvider(provider));
}, [ dispatch ]);
const handleSourceLanguageChange = useCallback((language: string) => {
dispatch(setSourceLanguage(language));
}, [ dispatch ]);
const handleTargetLanguageChange = useCallback((language: string) => {
dispatch(setTargetLanguage(language));
}, [ dispatch ]);
const handleStartTranslation = useCallback(() => {
// Validate API keys before starting
const requiredKeys: Record<string, boolean> = {
deepgram: universalTranslator?.sttProvider === 'deepgram',
openai: universalTranslator?.translationProvider === 'openai',
cartesia: universalTranslator?.ttsProvider === 'cartesia'
};
const missingKeys = Object.entries(requiredKeys)
.filter(([ key, required ]) => required && !localApiKeys[key as keyof typeof localApiKeys])
.map(([ key ]) => key);
if (missingKeys.length > 0) {
console.error('Missing API keys:', missingKeys);
dispatch(setTranslationError(`Missing API keys: ${missingKeys.join(', ')}`));
return;
}
console.log('Starting real-time translation with providers:', {
stt: universalTranslator?.sttProvider,
translation: universalTranslator?.translationProvider,
tts: universalTranslator?.ttsProvider
});
dispatch(startTranslationRecording());
}, [ dispatch, localApiKeys, universalTranslator ]);
const handleStopTranslation = useCallback(() => {
dispatch(stopTranslationRecording());
}, [ dispatch ]);
const handleApiKeyChange = useCallback((service: string, value: string) => {
const newKeys = { ...localApiKeys, [service]: value };
setLocalApiKeys(newKeys);
dispatch(setApiKeys(newKeys));
// Show save indicator
if (value.trim()) {
setSaveIndicator(service);
setTimeout(() => setSaveIndicator(null), 2000);
}
}, [ localApiKeys, dispatch ]);
const handleClearError = useCallback(() => {
dispatch(clearTranslationError());
}, [ dispatch ]);
const handleEffectToggle = useCallback((enabled: boolean) => {
if (enabled) {
dispatch(enableUniversalTranslatorEffect());
} else {
dispatch(disableUniversalTranslatorEffect());
}
}, [ dispatch ]);
const formatLatency = (latency: number) => {
return latency ? `${Math.round(latency)}ms` : '-';
};
const getTotalLatency = () => {
const { stt, translation, tts } = universalTranslator?.latencyMetrics || { stt: {}, translation: {}, tts: {} };
return (stt.lastLatency || 0) + (translation.lastLatency || 0) + (tts.lastLatency || 0);
};
return (
<Dialog
cancel = {{ hidden: true }}
ok = {{ hidden: true }}
onCancel = { handleClose }
size = 'large'
titleKey = 'universalTranslator.title'>
<div className = 'universal-translator-dialog'>
{/* Language Selection */}
<div className = 'language-selection'>
<h3>Language Settings</h3>
<div className = 'language-selectors'>
<div className = 'language-selector'>
<label>From:</label>
<select
onChange = { e => handleSourceLanguageChange(e.target.value) }
value = { universalTranslator?.sourceLanguage || 'en' }>
{languages.map(lang => (
<option
key = { lang.code }
value = { lang.code }>
{lang.flag} {lang.name}
</option>
))}
</select>
</div>
<div className = 'language-selector'>
<label>To:</label>
<select
onChange = { e => handleTargetLanguageChange(e.target.value) }
value = { universalTranslator?.targetLanguage || 'es' }>
{languages.map(lang => (
<option
key = { lang.code }
value = { lang.code }>
{lang.flag} {lang.name}
</option>
))}
</select>
</div>
</div>
</div>
{/* Service Selection */}
<div className = 'service-selection'>
<h3>Service Providers</h3>
<div className = 'service-group'>
<label>Speech-to-Text:</label>
<select
onChange = { e => handleSTTProviderChange(e.target.value) }
value = { universalTranslator?.sttProvider || 'whisper' }>
{sttOptions.map(option => (
<option
key = { option.id }
value = { option.id }>
{option.name} ({option.latency})
</option>
))}
</select>
</div>
<div className = 'service-group'>
<label>Translation:</label>
<select
onChange = { e => handleTranslationProviderChange(e.target.value) }
value = { universalTranslator?.translationProvider || 'openai' }>
{translationOptions.map(option => (
<option
key = { option.id }
value = { option.id }>
{option.name} ({option.latency})
</option>
))}
</select>
</div>
<div className = 'service-group'>
<label>Text-to-Speech:</label>
<select
onChange = { e => handleTTSProviderChange(e.target.value) }
value = { universalTranslator?.ttsProvider || 'cartesia' }>
{ttsOptions.map(option => (
<option
key = { option.id }
value = { option.id }>
{option.name} ({option.latency})
</option>
))}
</select>
</div>
<div className = 'service-group'>
<label>
<input
checked = { universalTranslator?.effectEnabled || false }
onChange = { e => handleEffectToggle(e.target.checked) }
type = 'checkbox' />
Route translated audio to conference (replaces your microphone)
</label>
</div>
</div>
{/* API Keys */}
<div className = 'api-keys-section'>
<h3>API Keys</h3>
<p className = 'persistence-note'>
API keys and preferences are automatically saved locally and will be remembered across sessions.
</p>
<div className = 'api-keys-grid'>
{Object.entries(localApiKeys).map(([ service, key ]) => (
<div
className = 'api-key-input'
key = { service }>
<label>{service.charAt(0).toUpperCase() + service.slice(1)}:</label>
<input
onChange = { e => handleApiKeyChange(service, e.target.value) }
placeholder = { `Enter ${service} API key` }
type = 'password'
value = { key } />
{saveIndicator === service && (
<span className = 'save-indicator'> Saved</span>
)}
</div>
))}
</div>
</div>
{/* Translation Status */}
<div className = 'translation-status'>
<h3>Translation Status</h3>
<div className = 'status-info'>
<div className = 'status-indicator'>
<span className = { `status-dot ${universalTranslator?.status || 'idle'}` } />
<span className = 'status-text'>
{universalTranslator?.isRecording ? 'Translating in real-time...'
: universalTranslator?.status === 'processing' ? 'Processing...'
: universalTranslator?.status === 'completed' ? 'Translation Complete'
: universalTranslator?.status === 'error' ? 'Error' : 'Ready'}
</span>
</div>
{getTotalLatency() > 0 && (
<div className = 'latency-info'>
Total Latency: {formatLatency(getTotalLatency())}
</div>
)}
</div>
{universalTranslator?.error && (
<div className = 'error-message'>
<span>{universalTranslator?.error}</span>
<button onClick = { handleClearError }>Clear</button>
</div>
)}
{universalTranslator?.transcriptionResult && (
<div className = 'transcription-result'>
<h4>Transcription:</h4>
<p>{universalTranslator?.transcriptionResult?.text}</p>
</div>
)}
{universalTranslator?.translationResult && (
<div className = 'translation-result'>
<h4>Translation:</h4>
<p>{universalTranslator?.translationResult?.translatedText}</p>
</div>
)}
</div>
{/* Performance Metrics */}
{universalTranslator?.status === 'completed' && (
<div className = 'performance-metrics'>
<h3>Performance Metrics</h3>
<div className = 'metrics-grid'>
<div className = 'metric'>
<label>STT Latency:</label>
<span>{formatLatency(universalTranslator?.latencyMetrics?.stt?.lastLatency)}</span>
</div>
<div className = 'metric'>
<label>Translation Latency:</label>
<span>{formatLatency(universalTranslator?.latencyMetrics?.translation?.lastLatency)}</span>
</div>
<div className = 'metric'>
<label>TTS Latency:</label>
<span>{formatLatency(universalTranslator?.latencyMetrics?.tts?.lastLatency)}</span>
</div>
<div className = 'metric'>
<label>Total Requests:</label>
<span>{universalTranslator?.latencyMetrics?.stt?.requestCount || 0}</span>
</div>
</div>
</div>
)}
{/* Control Buttons */}
<div className = 'control-buttons'>
{!universalTranslator?.isRecording ? (
<button
className = 'record-button'
disabled = { universalTranslator?.status === 'processing' }
onClick = { handleStartTranslation }>
🗣 Start Real-time Translation
</button>
) : (
<button
className = 'stop-button'
onClick = { handleStopTranslation }>
Stop Translation
</button>
)}
</div>
</div>
</Dialog>
);
};

View File

@@ -0,0 +1,2 @@
export { UniversalTranslatorDialog } from './UniversalTranslatorDialog';
export { default as UniversalTranslatorButton } from './UniversalTranslatorButton';

View File

@@ -0,0 +1,160 @@
import { IReduxState } from '../app/types';
import { IUniversalTranslatorState } from './reducer';
/**
* Gets the universal translator state from Redux store.
*
* @param {Object} state - The Redux state.
* @returns {Object} The universal translator state.
*/
export function getUniversalTranslatorState(state: IReduxState): IUniversalTranslatorState {
return state['features/universal-translator'];
}
/**
* Checks if the universal translator is available/enabled.
*
* @param {Object} state - The Redux state.
* @returns {boolean} True if universal translator is available.
*/
export function isUniversalTranslatorAvailable(state: IReduxState): boolean {
const translatorState = getUniversalTranslatorState(state);
return Boolean(translatorState?.isInitialized);
}
/**
* Checks if universal translator is currently recording.
*
* @param {Object} state - The Redux state.
* @returns {boolean} True if recording is active.
*/
export function isUniversalTranslatorRecording(state: IReduxState): boolean {
const translatorState = getUniversalTranslatorState(state);
return Boolean(translatorState?.isRecording);
}
/**
* Gets the current translation status.
*
* @param {Object} state - The Redux state.
* @returns {string} The current status.
*/
export function getTranslationStatus(state: IReduxState): string {
const translatorState = getUniversalTranslatorState(state);
return translatorState?.status || 'idle';
}
/**
* Checks if universal translator dialog is open.
*
* @param {Object} state - The Redux state.
* @returns {boolean} True if dialog is open.
*/
export function isUniversalTranslatorDialogOpen(state: IReduxState): boolean {
const translatorState = getUniversalTranslatorState(state);
return Boolean(translatorState?.showDialog);
}
/**
* Gets the current latency metrics.
*
* @param {Object} state - The Redux state.
* @returns {Object} The latency metrics.
*/
export function getLatencyMetrics(state: IReduxState) {
const translatorState = getUniversalTranslatorState(state);
return translatorState?.latencyMetrics || {
stt: { averageLatency: 0, lastLatency: 0, requestCount: 0 },
translation: { averageLatency: 0, lastLatency: 0, requestCount: 0 },
tts: { averageLatency: 0, lastLatency: 0, requestCount: 0 }
};
}
/**
* Gets the total end-to-end latency.
*
* @param {Object} state - The Redux state.
* @returns {number} Total latency in milliseconds.
*/
export function getTotalLatency(state: IReduxState): number {
const metrics = getLatencyMetrics(state);
return (metrics.stt.lastLatency || 0)
+ (metrics.translation.lastLatency || 0)
+ (metrics.tts.lastLatency || 0);
}
/**
* Checks if all required API keys are configured for current providers.
*
* @param {Object} state - The Redux state.
* @returns {boolean} True if all required keys are present.
*/
export function areApiKeysConfigured(state: IReduxState): boolean {
const translatorState = getUniversalTranslatorState(state);
if (!translatorState) {
return false;
}
const { sttProvider, ttsProvider, translationProvider, apiKeys } = translatorState;
// Check if required API keys are present (skip local/free providers)
const requiredKeys = [];
if (sttProvider !== 'whisper') {
requiredKeys.push(sttProvider);
}
if (ttsProvider !== 'webspeech') {
requiredKeys.push(ttsProvider);
}
requiredKeys.push(translationProvider);
return requiredKeys.every(provider =>
apiKeys[provider as keyof typeof apiKeys] && apiKeys[provider as keyof typeof apiKeys].length > 0
);
}
/**
* Gets supported languages for the current configuration.
*
* @param {Object} state - The Redux state.
* @returns {Array} Array of supported language codes.
*/
export function getSupportedLanguages(state: IReduxState): string[] {
// Common languages supported by most providers
return [
'en', 'es', 'fr', 'de', 'it', 'pt', 'ru',
'ja', 'ko', 'zh', 'ar', 'hi', 'tr', 'pl', 'nl'
];
}
/**
* Gets the current provider configuration summary.
*
* @param {Object} state - The Redux state.
* @returns {Object} Provider configuration summary.
*/
export function getProviderConfiguration(state: IReduxState) {
const translatorState = getUniversalTranslatorState(state);
if (!translatorState) {
return null;
}
return {
stt: translatorState.sttProvider,
translation: translatorState.translationProvider,
tts: translatorState.ttsProvider,
sourceLanguage: translatorState.sourceLanguage,
targetLanguage: translatorState.targetLanguage
};
}

View File

@@ -0,0 +1,3 @@
import './middleware';
import './middleware/index';
import './reducer';

View File

@@ -0,0 +1,6 @@
import { getLogger } from '../base/logging/functions';
/**
* The logger for the universal translator feature.
*/
export default getLogger('features/universal-translator');

View File

@@ -0,0 +1,462 @@
import { IStore } from '../app/types';
import MiddlewareRegistry from '../base/redux/MiddlewareRegistry';
import { UniversalTranslatorEffect } from '../stream-effects/universal-translator';
import {
INIT_UNIVERSAL_TRANSLATOR,
START_TRANSLATION_RECORDING,
STOP_TRANSLATION_RECORDING
} from './actionTypes';
import {
setTranscriptionResult,
setTranslationError,
setTranslationResult,
updateLatencyMetrics,
updateProcessingStep,
updateTranslationStatus
} from './actions';
// @ts-ignore - whisper-processor is a .js file without types
// import { WhisperProcessor } from './audio/whisper-processor';
// @ts-ignore - audio-utils is a .js file without types
import { convertWebMToFloat32, createAudioRecorder, getUserMediaForSpeech, float32ArrayToBlob } from './audio/audio-utils';
// @ts-ignore - blackhole-router is a .js file without types
import { BlackHoleRouter } from './audio/blackhole-router';
import { getUniversalTranslatorEffect } from './middleware/streamEffectMiddleware';
import { STTProviderFactory } from './services/stt-providers';
import { TranslationProviderFactory } from './services/translation';
import { TTSProviderFactory } from './services/tts-providers';
/**
* Universal translator service instance.
*/
let translatorService: UniversalTranslatorService | null = null;
/**
* Universal Translator Service class that orchestrates the translation pipeline.
*/
class UniversalTranslatorService {
// private whisperProcessor: WhisperProcessor;
private sttProviders: Map<string, any> = new Map();
private ttsProviders: Map<string, any> = new Map();
private translationProviders: Map<string, any> = new Map();
private blackHoleRouter: BlackHoleRouter;
private mediaRecorder: MediaRecorder | null = null;
private audioChunks: Blob[] = [];
private stream: MediaStream | null = null;
private processingInterval: number = 3000; // Process every 3 seconds
private isRecordingContinuously: boolean = false;
private isProcessingChunk: boolean = false; // Prevent overlapping processing
private intervalId: any = null;
private dispatch: IStore['dispatch'];
private getState: IStore['getState'];
constructor(dispatch: IStore['dispatch'], getState: IStore['getState']) {
this.dispatch = dispatch;
this.getState = getState;
// this.whisperProcessor = new WhisperProcessor();
this.blackHoleRouter = new BlackHoleRouter();
}
/**
* Initialize the translation service.
*/
async initialize(config: any) {
try {
console.log('Initializing Universal Translator Service...');
// Initialize Whisper processor
// await this.whisperProcessor.initializeModel();
// Initialize BlackHole router
await this.blackHoleRouter.initialize();
console.log('Universal Translator Service initialized successfully');
} catch (error) {
console.error('Failed to initialize Universal Translator Service:', error);
this.dispatch(setTranslationError(`Initialization failed: ${error instanceof Error ? error.message : 'Unknown error'}`));
}
}
/**
* Start translation recording.
*/
async startRecording() {
try {
console.log('Starting universal translator real-time translation...');
this.dispatch(updateProcessingStep('recording'));
this.dispatch(updateTranslationStatus('recording'));
// Get audio stream (prefer BlackHole if available)
this.stream = this.blackHoleRouter.getInputStream() || await getUserMediaForSpeech();
console.log('Audio stream acquired');
// Create media recorder for continuous recording without time slicing
// This will record continuously and we'll process complete recordings at intervals
this.mediaRecorder = createAudioRecorder(this.stream, {
mimeType: 'audio/webm;codecs=opus'
});
this.audioChunks = [];
this.isRecordingContinuously = true;
if (this.mediaRecorder) {
this.mediaRecorder.ondataavailable = async (event) => {
if (event.data.size > 0 && this.isRecordingContinuously) {
console.log(`Complete audio recording received: ${event.data.size} bytes`);
if (!this.isProcessingChunk) {
this.isProcessingChunk = true;
try {
// Convert complete WebM file to WAV
const float32Array = await convertWebMToFloat32(event.data);
const wavBlob = float32ArrayToBlob(float32Array);
await this.processAudioChunk(wavBlob);
} catch (conversionError) {
console.warn('WebM conversion failed:', conversionError);
} finally {
this.isProcessingChunk = false;
}
}
}
};
this.mediaRecorder.onstop = () => {
console.log('MediaRecorder stopped');
if (this.isRecordingContinuously) {
// Restart recording if we're still supposed to be recording
setTimeout(() => {
if (this.mediaRecorder && this.isRecordingContinuously) {
this.mediaRecorder.start();
console.log('MediaRecorder restarted for next interval');
}
}, 100);
}
};
this.mediaRecorder.onerror = (event) => {
console.error('MediaRecorder error:', event);
};
this.mediaRecorder.onstart = () => {
console.log('MediaRecorder started successfully');
};
}
// Start recording without time slicing (will record until manually stopped)
this.mediaRecorder?.start();
// Set up interval to stop and restart recording every few seconds for processing
this.intervalId = setInterval(() => {
if (this.mediaRecorder && this.mediaRecorder.state === 'recording' && !this.isProcessingChunk) {
console.log(`Stopping recording for processing (${this.processingInterval}ms interval)`);
this.mediaRecorder.stop();
}
}, this.processingInterval);
console.log(`Real-time translation started - processing every ${this.processingInterval}ms`);
} catch (error) {
console.error('Failed to start recording:', error);
this.dispatch(setTranslationError(`Recording failed: ${error instanceof Error ? error.message : 'Unknown error'}`));
}
}
/**
* Stop translation recording.
*/
async stopRecording() {
this.isRecordingContinuously = false;
this.isProcessingChunk = false;
// Clear the interval
if (this.intervalId) {
clearInterval(this.intervalId);
this.intervalId = null;
}
// Clear any remaining chunks
this.audioChunks = [];
if (this.mediaRecorder && this.mediaRecorder.state === 'recording') {
this.mediaRecorder.stop();
if (this.stream) {
this.stream.getTracks().forEach(track => track.stop());
}
}
}
/**
* Process a single audio chunk for real-time translation.
*/
private async processAudioChunk(audioChunk: Blob) {
try {
const state = this.getState();
const universalTranslator = state['features/universal-translator'];
if (!universalTranslator || !this.isRecordingContinuously) {
return;
}
// Check if audio chunk is large enough (minimum 1KB)
if (audioChunk.size < 1024) {
console.log('Skipping chunk - too small:', audioChunk.size, 'bytes');
return;
}
console.log('Processing audio chunk for real-time translation...', audioChunk.size, 'bytes');
// Step 1: Speech-to-Text
const transcriptionResult = await this.performSTT(audioChunk, universalTranslator);
// Skip if no meaningful transcription
if (!transcriptionResult.text || transcriptionResult.text.trim().length < 2) {
console.log('Skipping chunk - no meaningful speech detected:', transcriptionResult.text);
return;
}
this.dispatch(setTranscriptionResult(transcriptionResult));
// Step 2: Translation
const translationResult = await this.performTranslation(
transcriptionResult.text,
universalTranslator.sourceLanguage,
universalTranslator.targetLanguage,
universalTranslator
);
this.dispatch(setTranslationResult(translationResult));
// Step 3: Text-to-Speech
const ttsResult = await this.performTTS(
translationResult.translatedText,
universalTranslator.targetLanguage,
universalTranslator
);
// Step 4: Audio Playback
await this.playTranslatedAudio(ttsResult.audioBlob);
console.log('Real-time translation chunk completed successfully');
} catch (error) {
console.error('Failed to process audio chunk:', error);
// Don't dispatch error for individual chunks to avoid spam
}
}
/**
* Perform speech-to-text conversion.
*/
private async performSTT(audioBlob: Blob, config: any) {
const startTime = performance.now();
try {
let result;
if (config.sttProvider === 'whisper') {
// Use local Whisper processing (currently disabled - missing @xenova/transformers dependency)
// const audioData = await convertWebMToFloat32(audioBlob);
// result = await this.whisperProcessor.processAudio(audioData);
throw new Error('Local Whisper processing is not available. Please use an external STT provider.');
} else {
// Use external STT provider
const provider = await this.getOrCreateSTTProvider(config.sttProvider, config.apiKeys);
result = await provider.transcribe(audioBlob);
}
const endTime = performance.now();
const latency = endTime - startTime;
this.dispatch(updateLatencyMetrics({
stt: {
lastLatency: latency,
averageLatency: latency, // Will be properly calculated by provider
requestCount: 1
}
}));
return result;
} catch (error) {
throw new Error(`STT failed: ${error instanceof Error ? error.message : 'Unknown error'}`);
}
}
/**
* Perform text translation.
*/
private async performTranslation(text: string, sourceLang: string, targetLang: string, config: any) {
const startTime = performance.now();
try {
const provider = await this.getOrCreateTranslationProvider(config.translationProvider, config.apiKeys);
const result = await provider.translate(text, sourceLang, targetLang);
const endTime = performance.now();
const latency = endTime - startTime;
this.dispatch(updateLatencyMetrics({
translation: {
lastLatency: latency,
averageLatency: latency,
requestCount: 1
}
}));
return result;
} catch (error) {
throw new Error(`Translation failed: ${error instanceof Error ? error.message : 'Unknown error'}`);
}
}
/**
* Perform text-to-speech synthesis.
*/
private async performTTS(text: string, language: string, config: any) {
const startTime = performance.now();
try {
const provider = await this.getOrCreateTTSProvider(config.ttsProvider, config.apiKeys);
const result = await provider.synthesize(text, language);
const endTime = performance.now();
const latency = endTime - startTime;
this.dispatch(updateLatencyMetrics({
tts: {
lastLatency: latency,
averageLatency: latency,
requestCount: 1
}
}));
return result;
} catch (error) {
throw new Error(`TTS failed: ${error instanceof Error ? error.message : 'Unknown error'}`);
}
}
/**
* Play translated audio through Universal Translator Effect or fallback methods.
*/
private async playTranslatedAudio(audioBlob: Blob) {
try {
const effect = getUniversalTranslatorEffect();
if (effect && effect.isActive()) {
// Convert blob to audio buffer and route through the effect
const audioBuffer = await effect.createAudioBufferFromBlob(audioBlob);
await effect.playTranslatedAudio(audioBuffer);
console.log('Translated audio routed to Jitsi Meet via UniversalTranslatorEffect');
} else if (this.blackHoleRouter.isActive()) {
// Fallback to BlackHole if effect is not active
const audioContext = this.blackHoleRouter.getOutputContext();
const arrayBuffer = await audioBlob.arrayBuffer();
const audioBuffer = await audioContext.decodeAudioData(arrayBuffer);
await this.blackHoleRouter.routeToJitsiMeet(audioBuffer);
console.log('Translated audio routed to Jitsi Meet via BlackHole (fallback)');
} else {
// Final fallback to regular audio playback
const audio = new Audio(URL.createObjectURL(audioBlob));
await audio.play();
console.log('Translated audio played via default output (fallback)');
}
} catch (error) {
console.warn('Audio playback failed:', error);
// Non-critical error, don't throw
}
}
/**
* Get or create STT provider instance.
*/
private async getOrCreateSTTProvider(providerName: string, apiKeys: any) {
if (!this.sttProviders.has(providerName)) {
console.log(`Creating STT provider: ${providerName} with API key: ${apiKeys[providerName] ? 'present' : 'missing'}`);
const provider = STTProviderFactory.create(providerName, {
apiKey: apiKeys[providerName]
});
await provider.initialize();
this.sttProviders.set(providerName, provider);
console.log(`STT provider ${providerName} initialized successfully`);
}
return this.sttProviders.get(providerName);
}
/**
* Get or create TTS provider instance.
*/
private async getOrCreateTTSProvider(providerName: string, apiKeys: any) {
if (!this.ttsProviders.has(providerName)) {
console.log(`Creating TTS provider: ${providerName} with API key: ${apiKeys[providerName] ? 'present' : 'missing'}`);
const provider = TTSProviderFactory.create(providerName, {
apiKey: apiKeys[providerName]
});
await provider.initialize();
this.ttsProviders.set(providerName, provider);
console.log(`TTS provider ${providerName} initialized successfully`);
}
return this.ttsProviders.get(providerName);
}
/**
* Get or create translation provider instance.
*/
private async getOrCreateTranslationProvider(providerName: string, apiKeys: any) {
if (!this.translationProviders.has(providerName)) {
console.log(`Creating translation provider: ${providerName} with API key: ${apiKeys[providerName] ? 'present' : 'missing'}`);
const provider = TranslationProviderFactory.create(providerName, {
apiKey: apiKeys[providerName]
});
await provider.initialize();
this.translationProviders.set(providerName, provider);
console.log(`Translation provider ${providerName} initialized successfully`);
}
return this.translationProviders.get(providerName);
}
}
/**
* Middleware to handle universal translator actions.
*/
MiddlewareRegistry.register((store: IStore) => (next: Function) => (action: any) => {
const { dispatch, getState } = store;
switch (action.type) {
case INIT_UNIVERSAL_TRANSLATOR:
if (!translatorService) {
translatorService = new UniversalTranslatorService(dispatch, getState);
translatorService.initialize(action.config);
}
break;
case START_TRANSLATION_RECORDING:
if (translatorService) {
translatorService.startRecording();
} else {
dispatch(setTranslationError('Translator service not initialized'));
}
break;
case STOP_TRANSLATION_RECORDING:
if (translatorService) {
translatorService.stopRecording();
}
break;
}
return next(action);
});

View File

@@ -0,0 +1 @@
import './streamEffectMiddleware';

View File

@@ -0,0 +1,137 @@
import { IStore } from '../../app/types';
import { CONFERENCE_JOINED } from '../../base/conference/actionTypes';
import MiddlewareRegistry from '../../base/redux/MiddlewareRegistry';
import { getLocalAudioTrack, getLocalJitsiAudioTrack } from '../../base/tracks/functions.any';
import { UniversalTranslatorEffect } from '../../stream-effects/universal-translator';
import {
DISABLE_UNIVERSAL_TRANSLATOR_EFFECT,
ENABLE_UNIVERSAL_TRANSLATOR_EFFECT,
START_TRANSLATION_RECORDING
} from '../actionTypes';
import {
disableUniversalTranslatorEffect,
enableUniversalTranslatorEffect
} from '../actions';
/**
* Global reference to the Universal Translator effect instance.
*/
let universalTranslatorEffect: UniversalTranslatorEffect | null = null;
/**
* Middleware to handle Universal Translator stream effect integration.
*/
MiddlewareRegistry.register((store: IStore) => (next: Function) => (action: any) => {
const { dispatch, getState } = store;
switch (action.type) {
case CONFERENCE_JOINED:
// Initialize effect when conference is joined
_initializeUniversalTranslatorEffect(store);
break;
case START_TRANSLATION_RECORDING:
// Enable effect when translation starts
if (!getState()['features/universal-translator']?.effectEnabled) {
dispatch(enableUniversalTranslatorEffect());
}
break;
case ENABLE_UNIVERSAL_TRANSLATOR_EFFECT:
_enableEffect(store);
break;
case DISABLE_UNIVERSAL_TRANSLATOR_EFFECT:
_disableEffect(store);
break;
}
return next(action);
});
/**
* Initialize the Universal Translator effect.
*/
async function _initializeUniversalTranslatorEffect(store: IStore) {
const { getState } = store;
try {
// Create effect instance if it doesn't exist
if (!universalTranslatorEffect) {
universalTranslatorEffect = new UniversalTranslatorEffect();
console.log('UniversalTranslatorEffect: Effect instance created');
}
} catch (error) {
console.error('Failed to initialize Universal Translator effect:', error);
}
}
/**
* Enable the Universal Translator effect on the local audio track.
*/
async function _enableEffect(store: IStore) {
const { getState } = store;
if (!universalTranslatorEffect) {
console.warn('Universal Translator effect not initialized');
return;
}
try {
const state = getState();
const conference = state['features/base/conference'].conference;
const localAudioTrack = getLocalJitsiAudioTrack(state);
if (!conference || !localAudioTrack) {
console.warn('Conference or local audio track not available');
return;
}
// Apply the effect to the local audio track
if (localAudioTrack && localAudioTrack.setEffect) {
await localAudioTrack.setEffect(universalTranslatorEffect);
} else {
console.warn('Local audio track does not support effects');
return;
}
console.log('UniversalTranslatorEffect: Effect enabled on local audio track');
} catch (error) {
console.error('Failed to enable Universal Translator effect:', error);
}
}
/**
* Disable the Universal Translator effect on the local audio track.
*/
async function _disableEffect(store: IStore) {
const { getState } = store;
if (!universalTranslatorEffect) {
return;
}
try {
const state = getState();
const localAudioTrack = getLocalJitsiAudioTrack(state);
if (localAudioTrack && localAudioTrack.setEffect) {
// Remove the effect from the local audio track
await localAudioTrack.setEffect(undefined);
}
console.log('UniversalTranslatorEffect: Effect disabled on local audio track');
} catch (error) {
console.error('Failed to disable Universal Translator effect:', error);
}
}
/**
* Get the Universal Translator effect instance.
* This is used by the translation middleware to send translated audio to the effect.
*/
export function getUniversalTranslatorEffect(): UniversalTranslatorEffect | null {
return universalTranslatorEffect;
}

View File

@@ -0,0 +1,268 @@
import ReducerRegistry from '../base/redux/ReducerRegistry';
import PersistenceRegistry from '../base/redux/PersistenceRegistry';
import {
CLEAR_TRANSLATION_ERROR,
DISABLE_UNIVERSAL_TRANSLATOR_EFFECT,
ENABLE_UNIVERSAL_TRANSLATOR_EFFECT,
INIT_UNIVERSAL_TRANSLATOR,
SET_API_KEYS,
SET_SOURCE_LANGUAGE,
SET_STT_PROVIDER,
SET_TARGET_LANGUAGE,
SET_TRANSCRIPTION_RESULT,
SET_TRANSLATION_ERROR,
SET_TRANSLATION_PROVIDER,
SET_TRANSLATION_RESULT,
SET_TTS_PROVIDER,
START_TRANSLATION_RECORDING,
STOP_TRANSLATION_RECORDING,
TOGGLE_UNIVERSAL_TRANSLATOR,
UPDATE_LATENCY_METRICS,
UPDATE_PROCESSING_STEP,
UPDATE_TRANSLATION_STATUS
} from './actionTypes';
/**
* Initial state for the universal translator feature.
*/
const DEFAULT_STATE = {
isInitialized: false,
isRecording: false,
showDialog: false,
status: 'idle',
currentStep: null,
sttProvider: 'deepgram',
ttsProvider: 'cartesia',
translationProvider: 'openai',
sourceLanguage: 'en',
targetLanguage: 'es',
transcriptionResult: null,
translationResult: null,
latencyMetrics: {
stt: { averageLatency: 0, lastLatency: 0, requestCount: 0 },
translation: { averageLatency: 0, lastLatency: 0, requestCount: 0 },
tts: { averageLatency: 0, lastLatency: 0, requestCount: 0 }
},
error: null,
apiKeys: {
openai: '',
groq: '',
deepgram: '',
assemblyai: '',
cartesia: '',
elevenlabs: '',
azure: '',
google: '',
microsoft: ''
},
config: null,
effectEnabled: false
};
export interface IUniversalTranslatorState {
apiKeys: {
assemblyai: string;
azure: string;
cartesia: string;
deepgram: string;
elevenlabs: string;
google: string;
groq: string;
microsoft: string;
openai: string;
};
config: any;
currentStep: string | null;
effectEnabled: boolean;
error: string | null;
isInitialized: boolean;
isRecording: boolean;
latencyMetrics: {
stt: { averageLatency: number; lastLatency: number; requestCount: number; };
translation: { averageLatency: number; lastLatency: number; requestCount: number; };
tts: { averageLatency: number; lastLatency: number; requestCount: number; };
};
showDialog: boolean;
sourceLanguage: string;
status: string;
sttProvider: string;
targetLanguage: string;
transcriptionResult: any;
translationProvider: string;
translationResult: any;
ttsProvider: string;
}
/**
* Reduces redux actions for the universal translator feature.
*
* @param {Object} state - The current state.
* @param {Object} action - The redux action.
* @returns {Object} The new state after applying the action.
*/
ReducerRegistry.register<IUniversalTranslatorState>('features/universal-translator',
(state = DEFAULT_STATE, action): IUniversalTranslatorState => {
switch (action.type) {
case INIT_UNIVERSAL_TRANSLATOR:
return {
...state,
isInitialized: true,
config: action.config
};
case SET_STT_PROVIDER:
return {
...state,
sttProvider: action.provider
};
case SET_TTS_PROVIDER:
return {
...state,
ttsProvider: action.provider
};
case SET_TRANSLATION_PROVIDER:
return {
...state,
translationProvider: action.provider
};
case SET_SOURCE_LANGUAGE:
return {
...state,
sourceLanguage: action.language
};
case SET_TARGET_LANGUAGE:
return {
...state,
targetLanguage: action.language
};
case START_TRANSLATION_RECORDING:
return {
...state,
isRecording: true,
status: 'recording',
error: null,
transcriptionResult: null,
translationResult: null
};
case STOP_TRANSLATION_RECORDING:
return {
...state,
isRecording: false,
status: 'processing'
};
case UPDATE_TRANSLATION_STATUS:
return {
...state,
status: action.status
};
case UPDATE_PROCESSING_STEP:
return {
...state,
currentStep: action.step
};
case SET_TRANSCRIPTION_RESULT:
return {
...state,
transcriptionResult: action.result
};
case SET_TRANSLATION_RESULT:
return {
...state,
translationResult: action.result,
status: 'completed'
};
case UPDATE_LATENCY_METRICS:
return {
...state,
latencyMetrics: {
...state.latencyMetrics,
...action.metrics
}
};
case SET_TRANSLATION_ERROR:
return {
...state,
error: action.error,
status: 'error',
isRecording: false
};
case CLEAR_TRANSLATION_ERROR:
return {
...state,
error: null
};
case SET_API_KEYS:
return {
...state,
apiKeys: {
...state.apiKeys,
...action.keys
}
};
case TOGGLE_UNIVERSAL_TRANSLATOR:
return {
...state,
showDialog: !state.showDialog
};
case ENABLE_UNIVERSAL_TRANSLATOR_EFFECT:
return {
...state,
effectEnabled: true
};
case DISABLE_UNIVERSAL_TRANSLATOR_EFFECT:
return {
...state,
effectEnabled: false
};
default:
return state;
}
});
/**
* Register for persistence to save API keys and user preferences across sessions.
* Only persist configuration data, not temporary state like recording status or results.
*/
PersistenceRegistry.register('features/universal-translator', {
// Persist API keys - the most important for user experience
apiKeys: true,
// Persist user preferences
sttProvider: true,
ttsProvider: true,
translationProvider: true,
sourceLanguage: true,
targetLanguage: true,
effectEnabled: true,
// Don't persist temporary state
isInitialized: false,
isRecording: false,
showDialog: false,
status: false,
currentStep: false,
transcriptionResult: false,
translationResult: false,
latencyMetrics: false,
error: false,
config: false
}, DEFAULT_STATE);

View File

@@ -0,0 +1,390 @@
// @ts-nocheck
/**
* Speech-to-Text service providers for latency comparison
* Supports multiple STT services for benchmarking.
*/
// import { WhisperProcessor } from '../audio/whisper-processor.js';
/**
* Base STT Provider class.
*/
export class STTProvider {
constructor(name, config = {}) {
this.name = name;
this.config = config;
this.isInitialized = false;
}
async initialize() {
throw new Error('initialize() must be implemented by subclass');
}
async transcribe(audioData) {
throw new Error('transcribe() must be implemented by subclass');
}
getLatencyMetrics() {
return {
averageLatency: 0,
lastLatency: 0,
requestCount: 0
};
}
}
/**
* Whisper (local) STT Provider.
*/
export class WhisperSTTProvider extends STTProvider {
constructor(config = {}) {
super('Whisper (Local)', config);
// this.processor = new WhisperProcessor();
this.latencyMetrics = {
averageLatency: 0,
lastLatency: 0,
requestCount: 0
};
}
async initialize() {
try {
// await this.processor.initializeModel();
// this.isInitialized = true;
// console.log('Whisper STT Provider initialized');
throw new Error('Local Whisper processing is not available. Missing @xenova/transformers dependency.');
} catch (error) {
console.error('Failed to initialize Whisper STT:', error);
throw error;
}
}
async transcribe(audioData) {
// if (!this.isInitialized) {
// throw new Error('Whisper STT not initialized');
// }
const startTime = performance.now();
try {
// const result = await this.processor.processAudio(audioData);
throw new Error('Local Whisper processing is not available. Please use an external STT provider.');
} catch (error) {
console.error('Whisper transcription error:', error);
throw error;
}
}
_updateLatencyMetrics(latency) {
this.latencyMetrics.requestCount++;
this.latencyMetrics.lastLatency = latency;
this.latencyMetrics.averageLatency
= (this.latencyMetrics.averageLatency * (this.latencyMetrics.requestCount - 1) + latency)
/ this.latencyMetrics.requestCount;
}
getLatencyMetrics() {
return { ...this.latencyMetrics };
}
}
/**
* Groq STT Provider.
*/
export class GroqSTTProvider extends STTProvider {
constructor(config = {}) {
super('Groq Whisper', config);
this.apiKey = config.apiKey;
this.model = config.model || 'whisper-large-v3-turbo';
this.baseUrl = 'https://api.groq.com/openai/v1/audio/transcriptions';
this.latencyMetrics = {
averageLatency: 0,
lastLatency: 0,
requestCount: 0
};
}
async initialize() {
if (!this.apiKey) {
throw new Error('Groq API key is required');
}
this.isInitialized = true;
console.log('Groq STT Provider initialized');
}
async transcribe(audioBlob) {
if (!this.isInitialized) {
throw new Error('Groq STT not initialized');
}
const startTime = performance.now();
try {
const formData = new FormData();
formData.append('file', audioBlob, 'audio.webm');
formData.append('model', this.model);
formData.append('response_format', 'verbose_json');
formData.append('language', 'auto');
const response = await fetch(this.baseUrl, {
method: 'POST',
headers: {
'Authorization': `Bearer ${this.apiKey}`
},
body: formData
});
if (!response.ok) {
throw new Error(`Groq API error: ${response.status}`);
}
const result = await response.json();
const endTime = performance.now();
const latency = endTime - startTime;
this._updateLatencyMetrics(latency);
return {
text: result.text,
language: result.language || 'en',
confidence: result.confidence || 0.95,
provider: this.name,
latency
};
} catch (error) {
console.error('Groq transcription error:', error);
throw error;
}
}
_updateLatencyMetrics(latency) {
this.latencyMetrics.requestCount++;
this.latencyMetrics.lastLatency = latency;
this.latencyMetrics.averageLatency
= (this.latencyMetrics.averageLatency * (this.latencyMetrics.requestCount - 1) + latency)
/ this.latencyMetrics.requestCount;
}
getLatencyMetrics() {
return { ...this.latencyMetrics };
}
}
/**
* Deepgram STT Provider.
*/
export class DeepgramSTTProvider extends STTProvider {
constructor(config = {}) {
super('Deepgram Nova-2', config);
this.apiKey = config.apiKey;
this.model = config.model || 'nova-2';
this.baseUrl = 'https://api.deepgram.com/v1/listen';
this.latencyMetrics = {
averageLatency: 0,
lastLatency: 0,
requestCount: 0
};
}
async initialize() {
if (!this.apiKey) {
throw new Error('Deepgram API key is required');
}
this.isInitialized = true;
console.log('Deepgram STT Provider initialized');
}
async transcribe(audioBlob) {
if (!this.isInitialized) {
throw new Error('Deepgram STT not initialized');
}
const startTime = performance.now();
try {
// Log audio blob info for debugging
console.log('Deepgram: Processing audio blob', audioBlob.size, 'bytes, type:', audioBlob.type);
const url = `${this.baseUrl}?model=${this.model}&smart_format=true&detect_language=true&punctuate=true&diarize=false`;
const response = await fetch(url, {
method: 'POST',
headers: {
'Authorization': `Token ${this.apiKey}`,
'Content-Type': audioBlob.type || 'audio/wav'
},
body: audioBlob
});
if (!response.ok) {
const errorText = await response.text();
console.error('Deepgram API error response:', response.status, errorText);
throw new Error(`Deepgram API error: ${response.status} - ${errorText}`);
}
const result = await response.json();
const endTime = performance.now();
const latency = endTime - startTime;
this._updateLatencyMetrics(latency);
const transcript = result.results?.channels?.[0]?.alternatives?.[0];
const transcriptText = transcript?.transcript || '';
console.log('Deepgram result:', transcriptText);
return {
text: transcriptText,
language: result.results?.channels?.[0]?.detected_language || 'en',
confidence: transcript?.confidence || 0.95,
provider: this.name,
latency
};
} catch (error) {
console.error('Deepgram transcription error:', error);
throw error;
}
}
_updateLatencyMetrics(latency) {
this.latencyMetrics.requestCount++;
this.latencyMetrics.lastLatency = latency;
this.latencyMetrics.averageLatency
= (this.latencyMetrics.averageLatency * (this.latencyMetrics.requestCount - 1) + latency)
/ this.latencyMetrics.requestCount;
}
getLatencyMetrics() {
return { ...this.latencyMetrics };
}
}
/**
* AssemblyAI STT Provider.
*/
export class AssemblyAISTTProvider extends STTProvider {
constructor(config = {}) {
super('AssemblyAI Universal-2', config);
this.apiKey = config.apiKey;
this.baseUrl = 'https://api.assemblyai.com/v2/transcript';
this.latencyMetrics = {
averageLatency: 0,
lastLatency: 0,
requestCount: 0
};
}
async initialize() {
if (!this.apiKey) {
throw new Error('AssemblyAI API key is required');
}
this.isInitialized = true;
console.log('AssemblyAI STT Provider initialized');
}
async transcribe(audioBlob) {
if (!this.isInitialized) {
throw new Error('AssemblyAI STT not initialized');
}
const startTime = performance.now();
try {
// First, upload the audio file
const uploadResponse = await fetch('https://api.assemblyai.com/v2/upload', {
method: 'POST',
headers: {
'Authorization': this.apiKey,
'Content-Type': 'application/octet-stream'
},
body: audioBlob
});
if (!uploadResponse.ok) {
throw new Error(`AssemblyAI upload error: ${uploadResponse.status}`);
}
const uploadResult = await uploadResponse.json();
// Then, request transcription
const transcriptionResponse = await fetch(this.baseUrl, {
method: 'POST',
headers: {
'Authorization': this.apiKey,
'Content-Type': 'application/json'
},
body: JSON.stringify({
audio_url: uploadResult.upload_url,
language_detection: true
})
});
if (!transcriptionResponse.ok) {
throw new Error(`AssemblyAI transcription error: ${transcriptionResponse.status}`);
}
const result = await transcriptionResponse.json();
const endTime = performance.now();
const latency = endTime - startTime;
this._updateLatencyMetrics(latency);
return {
text: result.text || '',
language: result.language_code || 'en',
confidence: result.confidence || 0.95,
provider: this.name,
latency
};
} catch (error) {
console.error('AssemblyAI transcription error:', error);
throw error;
}
}
_updateLatencyMetrics(latency) {
this.latencyMetrics.requestCount++;
this.latencyMetrics.lastLatency = latency;
this.latencyMetrics.averageLatency
= (this.latencyMetrics.averageLatency * (this.latencyMetrics.requestCount - 1) + latency)
/ this.latencyMetrics.requestCount;
}
getLatencyMetrics() {
return { ...this.latencyMetrics };
}
}
/**
* STT Provider Factory.
*/
export class STTProviderFactory {
static create(providerName, config = {}) {
switch (providerName.toLowerCase()) {
case 'whisper':
return new WhisperSTTProvider(config);
case 'groq':
return new GroqSTTProvider(config);
case 'deepgram':
return new DeepgramSTTProvider(config);
case 'assemblyai':
return new AssemblyAISTTProvider(config);
default:
throw new Error(`Unknown STT provider: ${providerName}`);
}
}
static getAvailableProviders() {
return [
'whisper',
'groq',
'deepgram',
'assemblyai'
];
}
}

View File

@@ -0,0 +1,569 @@
// @ts-nocheck
/**
* Translation service for converting text between languages
* Supports multiple translation providers for comparison.
*/
/**
* Base Translation Provider class.
*/
export class TranslationProvider {
constructor(name, config = {}) {
this.name = name;
this.config = config;
this.isInitialized = false;
this.supportedLanguages = [];
}
async initialize() {
throw new Error('initialize() must be implemented by subclass');
}
async translate(text, sourceLang, targetLang) {
throw new Error('translate() must be implemented by subclass');
}
async detectLanguage(text) {
throw new Error('detectLanguage() must be implemented by subclass');
}
getSupportedLanguages() {
return this.supportedLanguages;
}
getLatencyMetrics() {
return {
averageLatency: 0,
lastLatency: 0,
requestCount: 0
};
}
}
/**
* OpenAI GPT-4 Translation Provider.
*/
export class OpenAITranslationProvider extends TranslationProvider {
constructor(config = {}) {
super('OpenAI GPT-4', config);
this.apiKey = config.apiKey;
this.model = config.model || 'gpt-4-turbo-preview';
this.baseUrl = 'https://api.openai.com/v1/chat/completions';
this.latencyMetrics = {
averageLatency: 0,
lastLatency: 0,
requestCount: 0
};
this.supportedLanguages = [
'en', 'es', 'fr', 'de', 'it', 'pt', 'ro', 'ru', 'ja', 'ko', 'zh',
'ar', 'hi', 'tr', 'pl', 'nl', 'sv', 'da', 'no', 'fi'
];
this.languageNames = {
'en': 'English',
'es': 'Spanish',
'fr': 'French',
'de': 'German',
'it': 'Italian',
'pt': 'Portuguese',
'ro': 'Romanian',
'ru': 'Russian',
'ja': 'Japanese',
'ko': 'Korean',
'zh': 'Chinese',
'ar': 'Arabic',
'hi': 'Hindi',
'tr': 'Turkish',
'pl': 'Polish',
'nl': 'Dutch',
'sv': 'Swedish',
'da': 'Danish',
'no': 'Norwegian',
'fi': 'Finnish'
};
}
async initialize() {
if (!this.apiKey) {
throw new Error('OpenAI API key is required');
}
this.isInitialized = true;
console.log('OpenAI Translation Provider initialized');
}
async translate(text, sourceLang, targetLang) {
if (!this.isInitialized) {
throw new Error('OpenAI Translation Provider not initialized');
}
const startTime = performance.now();
try {
const sourceLanguage = this.languageNames[sourceLang] || sourceLang;
const targetLanguage = this.languageNames[targetLang] || targetLang;
const response = await fetch(this.baseUrl, {
method: 'POST',
headers: {
'Content-Type': 'application/json',
'Authorization': `Bearer ${this.apiKey}`
},
body: JSON.stringify({
model: this.model,
messages: [
{
role: 'system',
content: `You are a professional translator. Translate the given text from ${sourceLanguage} to ${targetLanguage}. Return only the translation without any additional text or explanations. Maintain the tone and context of the original text.`
},
{
role: 'user',
content: text
}
],
max_tokens: 500,
temperature: 0.1
})
});
if (!response.ok) {
throw new Error(`OpenAI API error: ${response.status}`);
}
const result = await response.json();
const endTime = performance.now();
const latency = endTime - startTime;
this._updateLatencyMetrics(latency);
return {
translatedText: result.choices[0].message.content.trim(),
sourceLang,
targetLang,
provider: this.name,
latency,
confidence: 0.95
};
} catch (error) {
console.error('OpenAI translation error:', error);
throw error;
}
}
async detectLanguage(text) {
if (!this.isInitialized) {
throw new Error('OpenAI Translation Provider not initialized');
}
try {
const response = await fetch(this.baseUrl, {
method: 'POST',
headers: {
'Content-Type': 'application/json',
'Authorization': `Bearer ${this.apiKey}`
},
body: JSON.stringify({
model: this.model,
messages: [
{
role: 'system',
content: 'Detect the language of the given text and return only the ISO 639-1 language code (e.g., "en", "es", "fr"). Return only the code.'
},
{
role: 'user',
content: text
}
],
max_tokens: 10,
temperature: 0
})
});
if (!response.ok) {
throw new Error(`OpenAI API error: ${response.status}`);
}
const result = await response.json();
const detectedLang = result.choices[0].message.content.trim().toLowerCase();
return {
language: detectedLang,
confidence: 0.95,
provider: this.name
};
} catch (error) {
console.error('OpenAI language detection error:', error);
return {
language: 'en',
confidence: 0.5,
provider: this.name
};
}
}
_updateLatencyMetrics(latency) {
this.latencyMetrics.requestCount++;
this.latencyMetrics.lastLatency = latency;
this.latencyMetrics.averageLatency
= (this.latencyMetrics.averageLatency * (this.latencyMetrics.requestCount - 1) + latency)
/ this.latencyMetrics.requestCount;
}
getLatencyMetrics() {
return { ...this.latencyMetrics };
}
}
/**
* Google Translate Provider.
*/
export class GoogleTranslateProvider extends TranslationProvider {
constructor(config = {}) {
super('Google Translate', config);
this.apiKey = config.apiKey;
this.baseUrl = 'https://translation.googleapis.com/language/translate/v2';
this.latencyMetrics = {
averageLatency: 0,
lastLatency: 0,
requestCount: 0
};
this.supportedLanguages = [
'en', 'es', 'fr', 'de', 'it', 'pt', 'ro', 'ru', 'ja', 'ko', 'zh',
'ar', 'hi', 'tr', 'pl', 'nl', 'sv', 'da', 'no', 'fi', 'he',
'th', 'vi', 'id', 'ms', 'tl', 'cy', 'ga', 'mt', 'is'
];
}
async initialize() {
if (!this.apiKey) {
throw new Error('Google Translate API key is required');
}
this.isInitialized = true;
console.log('Google Translate Provider initialized');
}
async translate(text, sourceLang, targetLang) {
if (!this.isInitialized) {
throw new Error('Google Translate Provider not initialized');
}
const startTime = performance.now();
try {
const params = new URLSearchParams({
key: this.apiKey,
q: text,
source: sourceLang,
target: targetLang,
format: 'text'
});
const response = await fetch(`${this.baseUrl}?${params}`, {
method: 'POST',
headers: {
'Content-Type': 'application/x-www-form-urlencoded'
}
});
if (!response.ok) {
throw new Error(`Google Translate API error: ${response.status}`);
}
const result = await response.json();
const endTime = performance.now();
const latency = endTime - startTime;
this._updateLatencyMetrics(latency);
return {
translatedText: result.data.translations[0].translatedText,
sourceLang,
targetLang,
provider: this.name,
latency,
confidence: 0.98
};
} catch (error) {
console.error('Google Translate error:', error);
throw error;
}
}
async detectLanguage(text) {
if (!this.isInitialized) {
throw new Error('Google Translate Provider not initialized');
}
try {
const params = new URLSearchParams({
key: this.apiKey,
q: text
});
const response = await fetch(`https://translation.googleapis.com/language/translate/v2/detect?${params}`, {
method: 'POST',
headers: {
'Content-Type': 'application/x-www-form-urlencoded'
}
});
if (!response.ok) {
throw new Error(`Google Translate API error: ${response.status}`);
}
const result = await response.json();
const detection = result.data.detections[0][0];
return {
language: detection.language,
confidence: detection.confidence,
provider: this.name
};
} catch (error) {
console.error('Google language detection error:', error);
return {
language: 'en',
confidence: 0.5,
provider: this.name
};
}
}
_updateLatencyMetrics(latency) {
this.latencyMetrics.requestCount++;
this.latencyMetrics.lastLatency = latency;
this.latencyMetrics.averageLatency
= (this.latencyMetrics.averageLatency * (this.latencyMetrics.requestCount - 1) + latency)
/ this.latencyMetrics.requestCount;
}
getLatencyMetrics() {
return { ...this.latencyMetrics };
}
}
/**
* Microsoft Translator Provider.
*/
export class MicrosoftTranslatorProvider extends TranslationProvider {
constructor(config = {}) {
super('Microsoft Translator', config);
this.apiKey = config.apiKey;
this.region = config.region || 'eastus';
this.baseUrl = 'https://api.cognitive.microsofttranslator.com';
this.latencyMetrics = {
averageLatency: 0,
lastLatency: 0,
requestCount: 0
};
this.supportedLanguages = [
'en', 'es', 'fr', 'de', 'it', 'pt', 'ro', 'ru', 'ja', 'ko', 'zh',
'ar', 'hi', 'tr', 'pl', 'nl', 'sv', 'da', 'no', 'fi'
];
}
async initialize() {
if (!this.apiKey) {
throw new Error('Microsoft Translator API key is required');
}
this.isInitialized = true;
console.log('Microsoft Translator Provider initialized');
}
async translate(text, sourceLang, targetLang) {
if (!this.isInitialized) {
throw new Error('Microsoft Translator Provider not initialized');
}
const startTime = performance.now();
try {
const response = await fetch(`${this.baseUrl}/translate?api-version=3.0&from=${sourceLang}&to=${targetLang}`, {
method: 'POST',
headers: {
'Ocp-Apim-Subscription-Key': this.apiKey,
'Ocp-Apim-Subscription-Region': this.region,
'Content-Type': 'application/json'
},
body: JSON.stringify([ { text } ])
});
if (!response.ok) {
throw new Error(`Microsoft Translator API error: ${response.status}`);
}
const result = await response.json();
const endTime = performance.now();
const latency = endTime - startTime;
this._updateLatencyMetrics(latency);
return {
translatedText: result[0].translations[0].text,
sourceLang,
targetLang,
provider: this.name,
latency,
confidence: result[0].translations[0].confidence || 0.95
};
} catch (error) {
console.error('Microsoft Translator error:', error);
throw error;
}
}
async detectLanguage(text) {
if (!this.isInitialized) {
throw new Error('Microsoft Translator Provider not initialized');
}
try {
const response = await fetch(`${this.baseUrl}/detect?api-version=3.0`, {
method: 'POST',
headers: {
'Ocp-Apim-Subscription-Key': this.apiKey,
'Ocp-Apim-Subscription-Region': this.region,
'Content-Type': 'application/json'
},
body: JSON.stringify([ { text } ])
});
if (!response.ok) {
throw new Error(`Microsoft Translator API error: ${response.status}`);
}
const result = await response.json();
return {
language: result[0].language,
confidence: result[0].score,
provider: this.name
};
} catch (error) {
console.error('Microsoft language detection error:', error);
return {
language: 'en',
confidence: 0.5,
provider: this.name
};
}
}
_updateLatencyMetrics(latency) {
this.latencyMetrics.requestCount++;
this.latencyMetrics.lastLatency = latency;
this.latencyMetrics.averageLatency
= (this.latencyMetrics.averageLatency * (this.latencyMetrics.requestCount - 1) + latency)
/ this.latencyMetrics.requestCount;
}
getLatencyMetrics() {
return { ...this.latencyMetrics };
}
}
/**
* Translation Service Factory.
*/
export class TranslationProviderFactory {
static create(providerName, config = {}) {
switch (providerName.toLowerCase()) {
case 'openai':
return new OpenAITranslationProvider(config);
case 'google':
return new GoogleTranslateProvider(config);
case 'microsoft':
return new MicrosoftTranslatorProvider(config);
default:
throw new Error(`Unknown translation provider: ${providerName}`);
}
}
static getAvailableProviders() {
return [
'openai',
'google',
'microsoft'
];
}
}
/**
* Universal Translation Service
* Orchestrates the complete translation pipeline.
*/
export class UniversalTranslationService {
constructor(config = {}) {
this.translationProvider = null;
this.fallbackProvider = null;
this.config = config;
}
async initialize(primaryProvider, fallbackProvider = null) {
this.translationProvider = TranslationProviderFactory.create(primaryProvider, this.config[primaryProvider]);
await this.translationProvider.initialize();
if (fallbackProvider) {
this.fallbackProvider = TranslationProviderFactory.create(fallbackProvider, this.config[fallbackProvider]);
await this.fallbackProvider.initialize();
}
console.log('Universal Translation Service initialized');
}
async translateText(text, sourceLang, targetLang) {
try {
return await this.translationProvider.translate(text, sourceLang, targetLang);
} catch (error) {
console.warn('Primary translation provider failed, trying fallback:', error);
if (this.fallbackProvider) {
try {
return await this.fallbackProvider.translate(text, sourceLang, targetLang);
} catch (fallbackError) {
console.error('Fallback translation provider also failed:', fallbackError);
throw fallbackError;
}
}
throw error;
}
}
async detectLanguage(text) {
try {
return await this.translationProvider.detectLanguage(text);
} catch (error) {
if (this.fallbackProvider) {
try {
return await this.fallbackProvider.detectLanguage(text);
} catch (fallbackError) {
console.error('Language detection failed on both providers');
return { language: 'en',
confidence: 0.5,
provider: 'fallback' };
}
}
return { language: 'en',
confidence: 0.5,
provider: 'fallback' };
}
}
getLatencyMetrics() {
return {
primary: this.translationProvider?.getLatencyMetrics(),
fallback: this.fallbackProvider?.getLatencyMetrics()
};
}
getSupportedLanguages() {
return this.translationProvider?.getSupportedLanguages() || [];
}
}

View File

@@ -0,0 +1,569 @@
// @ts-nocheck
/**
* Text-to-Speech service providers for latency comparison
* Supports multiple TTS services for benchmarking.
*/
/**
* Base TTS Provider class.
*/
export class TTSProvider {
constructor(name, config = {}) {
this.name = name;
this.config = config;
this.isInitialized = false;
}
async initialize() {
throw new Error('initialize() must be implemented by subclass');
}
async synthesize(text, language = 'en', voice = null) {
throw new Error('synthesize() must be implemented by subclass');
}
getLatencyMetrics() {
return {
averageLatency: 0,
lastLatency: 0,
requestCount: 0
};
}
getAvailableVoices(language = 'en') {
throw new Error('getAvailableVoices() must be implemented by subclass');
}
}
/**
* Cartesia TTS Provider (Sonic).
*/
export class CartesiaTTSProvider extends TTSProvider {
constructor(config = {}) {
super('Cartesia Sonic', config);
this.apiKey = config.apiKey;
this.baseUrl = 'https://api.cartesia.ai/tts/bytes';
this.model = config.model || 'sonic-english';
this.latencyMetrics = {
averageLatency: 0,
lastLatency: 0,
requestCount: 0
};
// Updated with actual Cartesia voice IDs
this.voiceMap = {
'en': 'a0e99841-438c-4a64-b679-ae501e7d6091', // Barbershop Man
'es': '846d6cb0-2301-48b6-9683-48f5618ea2f6', // Spanish voice
'fr': 'f114a467-c40a-4db8-964d-aaba89cd08fa', // French voice
'de': '2b568345-1d48-4047-b25f-7baccf842eb0', // German voice
'ro': 'a0e99841-438c-4a64-b679-ae501e7d6091' // Romanian (using default voice - update if specific Romanian voice ID available)
};
}
async initialize() {
if (!this.apiKey) {
throw new Error('Cartesia API key is required');
}
this.isInitialized = true;
console.log('Cartesia TTS Provider initialized');
}
async synthesize(text, language = 'en', voice = null) {
if (!this.isInitialized) {
throw new Error('Cartesia TTS not initialized');
}
const startTime = performance.now();
try {
const voiceId = voice || this.voiceMap[language] || this.voiceMap.en;
const requestBody = {
model_id: this.model,
transcript: text,
voice: {
mode: 'id',
id: voiceId
},
output_format: {
container: 'wav',
encoding: 'pcm_s16le',
sample_rate: 22050
}
};
console.log('Cartesia TTS request:', {
url: this.baseUrl,
model: this.model,
voice: voiceId,
textLength: text.length
});
const response = await fetch(this.baseUrl, {
method: 'POST',
headers: {
'X-API-Key': this.apiKey,
'Content-Type': 'application/json',
'Cartesia-Version': '2024-06-10'
},
body: JSON.stringify(requestBody)
});
if (!response.ok) {
const errorText = await response.text();
console.error('Cartesia API error response:', errorText);
throw new Error(`Cartesia API error: ${response.status} - ${errorText}`);
}
const audioBlob = await response.blob();
const endTime = performance.now();
const latency = endTime - startTime;
this._updateLatencyMetrics(latency);
return {
audioBlob,
provider: this.name,
latency,
language,
voice: voiceId
};
} catch (error) {
console.error('Cartesia TTS error:', error);
throw error;
}
}
_updateLatencyMetrics(latency) {
this.latencyMetrics.requestCount++;
this.latencyMetrics.lastLatency = latency;
this.latencyMetrics.averageLatency
= (this.latencyMetrics.averageLatency * (this.latencyMetrics.requestCount - 1) + latency)
/ this.latencyMetrics.requestCount;
}
getLatencyMetrics() {
return { ...this.latencyMetrics };
}
getAvailableVoices(language = 'en') {
return Object.keys(this.voiceMap);
}
}
/**
* ElevenLabs TTS Provider.
*/
export class ElevenLabsTTSProvider extends TTSProvider {
constructor(config = {}) {
super('ElevenLabs', config);
this.apiKey = config.apiKey;
this.baseUrl = 'https://api.elevenlabs.io/v1/text-to-speech';
this.latencyMetrics = {
averageLatency: 0,
lastLatency: 0,
requestCount: 0
};
this.voiceMap = {
'en': 'EXAVITQu4vr4xnSDxMaL', // Bella - English
'es': '9BWtsMINqrJLrRacOk9x', // Spanish voice
'fr': 'Xb7hH8MSUJpSbSDYk0k2', // French voice
'de': 'N2lVS1w4EtoT3dr4eOWO' // German voice
};
}
async initialize() {
if (!this.apiKey) {
throw new Error('ElevenLabs API key is required');
}
this.isInitialized = true;
console.log('ElevenLabs TTS Provider initialized');
}
async synthesize(text, language = 'en', voice = null) {
if (!this.isInitialized) {
throw new Error('ElevenLabs TTS not initialized');
}
const startTime = performance.now();
try {
const voiceId = voice || this.voiceMap[language] || this.voiceMap.en;
const response = await fetch(`${this.baseUrl}/${voiceId}`, {
method: 'POST',
headers: {
'Accept': 'audio/mpeg',
'Content-Type': 'application/json',
'xi-api-key': this.apiKey
},
body: JSON.stringify({
text,
model_id: 'eleven_multilingual_v2',
voice_settings: {
stability: 0.5,
similarity_boost: 0.75
}
})
});
if (!response.ok) {
throw new Error(`ElevenLabs API error: ${response.status}`);
}
const audioBlob = await response.blob();
const endTime = performance.now();
const latency = endTime - startTime;
this._updateLatencyMetrics(latency);
return {
audioBlob,
provider: this.name,
latency,
language,
voice: voiceId
};
} catch (error) {
console.error('ElevenLabs TTS error:', error);
throw error;
}
}
_updateLatencyMetrics(latency) {
this.latencyMetrics.requestCount++;
this.latencyMetrics.lastLatency = latency;
this.latencyMetrics.averageLatency
= (this.latencyMetrics.averageLatency * (this.latencyMetrics.requestCount - 1) + latency)
/ this.latencyMetrics.requestCount;
}
getLatencyMetrics() {
return { ...this.latencyMetrics };
}
getAvailableVoices(language = 'en') {
return Object.keys(this.voiceMap);
}
}
/**
* Azure Speech TTS Provider.
*/
export class AzureTTSProvider extends TTSProvider {
constructor(config = {}) {
super('Azure Speech', config);
this.apiKey = config.apiKey;
this.region = config.region || 'eastus';
this.baseUrl = `https://${this.region}.tts.speech.microsoft.com/cognitiveservices/v1`;
this.latencyMetrics = {
averageLatency: 0,
lastLatency: 0,
requestCount: 0
};
this.voiceMap = {
'en': 'en-US-JennyNeural',
'es': 'es-ES-ElviraNeural',
'fr': 'fr-FR-DeniseNeural',
'de': 'de-DE-KatjaNeural'
};
}
async initialize() {
if (!this.apiKey) {
throw new Error('Azure Speech API key is required');
}
this.isInitialized = true;
console.log('Azure TTS Provider initialized');
}
async synthesize(text, language = 'en', voice = null) {
if (!this.isInitialized) {
throw new Error('Azure TTS not initialized');
}
const startTime = performance.now();
try {
const voiceName = voice || this.voiceMap[language] || this.voiceMap.en;
const ssml = `
<speak version='1.0' xml:lang='${language}'>
<voice xml:lang='${language}' name='${voiceName}'>
${text}
</voice>
</speak>
`;
const response = await fetch(this.baseUrl, {
method: 'POST',
headers: {
'Ocp-Apim-Subscription-Key': this.apiKey,
'Content-Type': 'application/ssml+xml',
'X-Microsoft-OutputFormat': 'riff-16khz-16bit-mono-pcm'
},
body: ssml
});
if (!response.ok) {
throw new Error(`Azure TTS API error: ${response.status}`);
}
const audioBlob = await response.blob();
const endTime = performance.now();
const latency = endTime - startTime;
this._updateLatencyMetrics(latency);
return {
audioBlob,
provider: this.name,
latency,
language,
voice: voiceName
};
} catch (error) {
console.error('Azure TTS error:', error);
throw error;
}
}
_updateLatencyMetrics(latency) {
this.latencyMetrics.requestCount++;
this.latencyMetrics.lastLatency = latency;
this.latencyMetrics.averageLatency
= (this.latencyMetrics.averageLatency * (this.latencyMetrics.requestCount - 1) + latency)
/ this.latencyMetrics.requestCount;
}
getLatencyMetrics() {
return { ...this.latencyMetrics };
}
getAvailableVoices(language = 'en') {
return Object.keys(this.voiceMap);
}
}
/**
* Deepgram TTS Provider (Aura).
*/
export class DeepgramTTSProvider extends TTSProvider {
constructor(config = {}) {
super('Deepgram Aura', config);
this.apiKey = config.apiKey;
this.baseUrl = 'https://api.deepgram.com/v1/speak';
this.latencyMetrics = {
averageLatency: 0,
lastLatency: 0,
requestCount: 0
};
this.voiceMap = {
'en': 'aura-asteria-en',
'es': 'aura-luna-es',
'fr': 'aura-stella-fr',
'de': 'aura-hera-de'
};
}
async initialize() {
if (!this.apiKey) {
throw new Error('Deepgram API key is required');
}
this.isInitialized = true;
console.log('Deepgram TTS Provider initialized');
}
async synthesize(text, language = 'en', voice = null) {
if (!this.isInitialized) {
throw new Error('Deepgram TTS not initialized');
}
const startTime = performance.now();
try {
const voiceName = voice || this.voiceMap[language] || this.voiceMap.en;
const response = await fetch(`${this.baseUrl}?model=${voiceName}`, {
method: 'POST',
headers: {
'Authorization': `Token ${this.apiKey}`,
'Content-Type': 'application/json'
},
body: JSON.stringify({
text
})
});
if (!response.ok) {
throw new Error(`Deepgram TTS API error: ${response.status}`);
}
const audioBlob = await response.blob();
const endTime = performance.now();
const latency = endTime - startTime;
this._updateLatencyMetrics(latency);
return {
audioBlob,
provider: this.name,
latency,
language,
voice: voiceName
};
} catch (error) {
console.error('Deepgram TTS error:', error);
throw error;
}
}
_updateLatencyMetrics(latency) {
this.latencyMetrics.requestCount++;
this.latencyMetrics.lastLatency = latency;
this.latencyMetrics.averageLatency
= (this.latencyMetrics.averageLatency * (this.latencyMetrics.requestCount - 1) + latency)
/ this.latencyMetrics.requestCount;
}
getLatencyMetrics() {
return { ...this.latencyMetrics };
}
getAvailableVoices(language = 'en') {
return Object.keys(this.voiceMap);
}
}
/**
* Browser Web Speech API TTS Provider (Fallback).
*/
export class WebSpeechTTSProvider extends TTSProvider {
constructor(config = {}) {
super('Web Speech API', config);
this.speechSynthesis = window.speechSynthesis;
this.latencyMetrics = {
averageLatency: 0,
lastLatency: 0,
requestCount: 0
};
}
async initialize() {
if (!this.speechSynthesis) {
throw new Error('Web Speech API not supported');
}
this.isInitialized = true;
console.log('Web Speech TTS Provider initialized');
}
async synthesize(text, language = 'en', voice = null) {
if (!this.isInitialized) {
throw new Error('Web Speech TTS not initialized');
}
const startTime = performance.now();
return new Promise((resolve, reject) => {
try {
const utterance = new SpeechSynthesisUtterance(text);
utterance.lang = language;
if (voice) {
const voices = this.speechSynthesis.getVoices();
const selectedVoice = voices.find(v => v.name === voice || v.lang.startsWith(language));
if (selectedVoice) {
utterance.voice = selectedVoice;
}
}
utterance.onend = () => {
const endTime = performance.now();
const latency = endTime - startTime;
this._updateLatencyMetrics(latency);
// Note: Web Speech API doesn't provide audio blob directly
resolve({
audioBlob: null,
provider: this.name,
latency,
language,
voice: utterance.voice?.name || 'default'
});
};
utterance.onerror = error => {
reject(new Error(`Web Speech TTS error: ${error.error}`));
};
this.speechSynthesis.speak(utterance);
} catch (error) {
reject(error);
}
});
}
_updateLatencyMetrics(latency) {
this.latencyMetrics.requestCount++;
this.latencyMetrics.lastLatency = latency;
this.latencyMetrics.averageLatency
= (this.latencyMetrics.averageLatency * (this.latencyMetrics.requestCount - 1) + latency)
/ this.latencyMetrics.requestCount;
}
getLatencyMetrics() {
return { ...this.latencyMetrics };
}
getAvailableVoices(language = 'en') {
if (!this.speechSynthesis) {
return [];
}
const voices = this.speechSynthesis.getVoices();
return voices
.filter(voice => voice.lang.startsWith(language))
.map(voice => voice.name);
}
}
/**
* TTS Provider Factory.
*/
export class TTSProviderFactory {
static create(providerName, config = {}) {
switch (providerName.toLowerCase()) {
case 'cartesia':
return new CartesiaTTSProvider(config);
case 'elevenlabs':
return new ElevenLabsTTSProvider(config);
case 'azure':
return new AzureTTSProvider(config);
case 'deepgram':
return new DeepgramTTSProvider(config);
case 'webspeech':
return new WebSpeechTTSProvider(config);
default:
throw new Error(`Unknown TTS provider: ${providerName}`);
}
}
static getAvailableProviders() {
return [
'cartesia',
'elevenlabs',
'azure',
'deepgram',
'webspeech'
];
}
}