mirror of
https://gitcode.com/GitHub_Trending/ji/jitsi-meet.git
synced 2026-01-27 09:00:19 +00:00
Compare commits
2 Commits
dependabot
...
universal-
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
ec553f77d0 | ||
|
|
6edeca020e |
@@ -76,5 +76,6 @@ $flagsImagePath: "../images/";
|
||||
@import 'participants-pane';
|
||||
@import 'reactions-menu';
|
||||
@import 'plan-limit';
|
||||
@import 'universal-translator/main';
|
||||
|
||||
/* Modules END */
|
||||
|
||||
333
css/universal-translator/_main.scss
Normal file
333
css/universal-translator/_main.scss
Normal file
@@ -0,0 +1,333 @@
|
||||
/**
|
||||
* Universal Translator Styles
|
||||
*/
|
||||
|
||||
.universal-translator-dialog {
|
||||
padding: 20px;
|
||||
max-width: 800px;
|
||||
min-height: 600px;
|
||||
|
||||
h3 {
|
||||
margin: 20px 0 10px 0;
|
||||
color: #1a1a1a;
|
||||
font-size: 16px;
|
||||
font-weight: 600;
|
||||
}
|
||||
|
||||
/* Language Selection */
|
||||
.language-selection {
|
||||
margin-bottom: 30px;
|
||||
|
||||
.language-selectors {
|
||||
display: grid;
|
||||
grid-template-columns: 1fr 1fr;
|
||||
gap: 20px;
|
||||
margin-top: 15px;
|
||||
}
|
||||
|
||||
.language-selector {
|
||||
display: flex;
|
||||
flex-direction: column;
|
||||
gap: 8px;
|
||||
|
||||
label {
|
||||
font-weight: 500;
|
||||
color: #333;
|
||||
}
|
||||
|
||||
select {
|
||||
padding: 10px;
|
||||
border: 2px solid #e0e0e0;
|
||||
border-radius: 8px;
|
||||
font-size: 14px;
|
||||
background: white;
|
||||
|
||||
&:focus {
|
||||
outline: none;
|
||||
border-color: #007acc;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/* Service Selection */
|
||||
.service-selection {
|
||||
margin-bottom: 30px;
|
||||
|
||||
.service-group {
|
||||
display: flex;
|
||||
flex-direction: column;
|
||||
gap: 8px;
|
||||
margin-bottom: 15px;
|
||||
|
||||
label {
|
||||
font-weight: 500;
|
||||
color: #333;
|
||||
}
|
||||
|
||||
select {
|
||||
padding: 10px;
|
||||
border: 2px solid #e0e0e0;
|
||||
border-radius: 8px;
|
||||
font-size: 14px;
|
||||
background: white;
|
||||
|
||||
&:focus {
|
||||
outline: none;
|
||||
border-color: #007acc;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/* API Keys Section */
|
||||
.api-keys-section {
|
||||
margin-bottom: 30px;
|
||||
|
||||
.api-keys-grid {
|
||||
display: grid;
|
||||
grid-template-columns: repeat(auto-fit, minmax(250px, 1fr));
|
||||
gap: 15px;
|
||||
margin-top: 15px;
|
||||
}
|
||||
|
||||
.api-key-input {
|
||||
display: flex;
|
||||
flex-direction: column;
|
||||
gap: 5px;
|
||||
|
||||
label {
|
||||
font-weight: 500;
|
||||
color: #333;
|
||||
font-size: 12px;
|
||||
text-transform: uppercase;
|
||||
}
|
||||
|
||||
input {
|
||||
padding: 8px;
|
||||
border: 2px solid #e0e0e0;
|
||||
border-radius: 6px;
|
||||
font-size: 12px;
|
||||
|
||||
&:focus {
|
||||
outline: none;
|
||||
border-color: #007acc;
|
||||
}
|
||||
|
||||
&::placeholder {
|
||||
color: #999;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/* Translation Status */
|
||||
.translation-status {
|
||||
margin-bottom: 30px;
|
||||
|
||||
.status-info {
|
||||
display: flex;
|
||||
justify-content: space-between;
|
||||
align-items: center;
|
||||
margin-bottom: 15px;
|
||||
}
|
||||
|
||||
.status-indicator {
|
||||
display: flex;
|
||||
align-items: center;
|
||||
gap: 10px;
|
||||
}
|
||||
|
||||
.status-dot {
|
||||
width: 12px;
|
||||
height: 12px;
|
||||
border-radius: 50%;
|
||||
background: #ccc;
|
||||
|
||||
&.recording {
|
||||
background: #ff4444;
|
||||
animation: pulse 1s infinite;
|
||||
}
|
||||
|
||||
&.processing {
|
||||
background: #ff9800;
|
||||
animation: pulse 1s infinite;
|
||||
}
|
||||
|
||||
&.completed {
|
||||
background: #4caf50;
|
||||
}
|
||||
|
||||
&.error {
|
||||
background: #f44336;
|
||||
}
|
||||
}
|
||||
|
||||
.status-text {
|
||||
font-weight: 500;
|
||||
color: #333;
|
||||
}
|
||||
|
||||
.latency-info {
|
||||
font-size: 14px;
|
||||
color: #007acc;
|
||||
font-weight: 600;
|
||||
}
|
||||
|
||||
.error-message {
|
||||
background: #ffebee;
|
||||
border: 1px solid #f44336;
|
||||
border-radius: 6px;
|
||||
padding: 10px;
|
||||
margin: 10px 0;
|
||||
display: flex;
|
||||
justify-content: space-between;
|
||||
align-items: center;
|
||||
|
||||
span {
|
||||
color: #c62828;
|
||||
font-size: 14px;
|
||||
}
|
||||
|
||||
button {
|
||||
background: #f44336;
|
||||
color: white;
|
||||
border: none;
|
||||
padding: 5px 10px;
|
||||
border-radius: 4px;
|
||||
cursor: pointer;
|
||||
font-size: 12px;
|
||||
|
||||
&:hover {
|
||||
background: #d32f2f;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
.transcription-result,
|
||||
.translation-result {
|
||||
background: #f8f9fa;
|
||||
border: 1px solid #e9ecef;
|
||||
border-radius: 8px;
|
||||
padding: 15px;
|
||||
margin: 10px 0;
|
||||
|
||||
h4 {
|
||||
margin: 0 0 10px 0;
|
||||
color: #495057;
|
||||
font-size: 14px;
|
||||
font-weight: 600;
|
||||
}
|
||||
|
||||
p {
|
||||
margin: 0;
|
||||
color: #212529;
|
||||
line-height: 1.5;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/* Performance Metrics */
|
||||
.performance-metrics {
|
||||
margin-bottom: 30px;
|
||||
|
||||
.metrics-grid {
|
||||
display: grid;
|
||||
grid-template-columns: repeat(auto-fit, minmax(150px, 1fr));
|
||||
gap: 15px;
|
||||
margin-top: 15px;
|
||||
}
|
||||
|
||||
.metric {
|
||||
display: flex;
|
||||
flex-direction: column;
|
||||
gap: 5px;
|
||||
text-align: center;
|
||||
padding: 15px;
|
||||
background: #f8f9fa;
|
||||
border-radius: 8px;
|
||||
|
||||
label {
|
||||
font-size: 12px;
|
||||
color: #6c757d;
|
||||
font-weight: 500;
|
||||
}
|
||||
|
||||
span {
|
||||
font-size: 18px;
|
||||
font-weight: 600;
|
||||
color: #007acc;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/* Control Buttons */
|
||||
.control-buttons {
|
||||
display: flex;
|
||||
justify-content: center;
|
||||
gap: 15px;
|
||||
margin-top: 30px;
|
||||
|
||||
button {
|
||||
padding: 12px 24px;
|
||||
border: none;
|
||||
border-radius: 8px;
|
||||
font-size: 16px;
|
||||
font-weight: 600;
|
||||
cursor: pointer;
|
||||
min-width: 150px;
|
||||
transition: all 0.2s ease;
|
||||
|
||||
&.record-button {
|
||||
background: #4caf50;
|
||||
color: white;
|
||||
|
||||
&:hover:not(:disabled) {
|
||||
background: #45a049;
|
||||
transform: translateY(-1px);
|
||||
}
|
||||
|
||||
&:disabled {
|
||||
background: #cccccc;
|
||||
cursor: not-allowed;
|
||||
}
|
||||
}
|
||||
|
||||
&.stop-button {
|
||||
background: #f44336;
|
||||
color: white;
|
||||
|
||||
&:hover {
|
||||
background: #da190b;
|
||||
transform: translateY(-1px);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/* Animations */
|
||||
@keyframes pulse {
|
||||
0% { opacity: 1; }
|
||||
50% { opacity: 0.5; }
|
||||
100% { opacity: 1; }
|
||||
}
|
||||
|
||||
/* Responsive */
|
||||
@media (max-width: 768px) {
|
||||
.universal-translator-dialog {
|
||||
padding: 15px;
|
||||
|
||||
.language-selectors {
|
||||
grid-template-columns: 1fr;
|
||||
}
|
||||
|
||||
.api-keys-grid {
|
||||
grid-template-columns: 1fr;
|
||||
}
|
||||
|
||||
.metrics-grid {
|
||||
grid-template-columns: repeat(2, 1fr);
|
||||
}
|
||||
}
|
||||
}
|
||||
70
lang/universal-translator-en.json
Normal file
70
lang/universal-translator-en.json
Normal file
@@ -0,0 +1,70 @@
|
||||
{
|
||||
"universalTranslator": {
|
||||
"title": "Universal Translator",
|
||||
"tooltip": "Open Universal Translator",
|
||||
"recording": "Translation Recording Active",
|
||||
"accessibilityLabel": "Universal Translator",
|
||||
"languages": {
|
||||
"from": "Translate from:",
|
||||
"to": "Translate to:",
|
||||
"autoDetect": "Auto-detect"
|
||||
},
|
||||
"providers": {
|
||||
"stt": "Speech-to-Text Service:",
|
||||
"translation": "Translation Service:",
|
||||
"tts": "Text-to-Speech Service:"
|
||||
},
|
||||
"status": {
|
||||
"idle": "Ready to translate",
|
||||
"recording": "Recording audio...",
|
||||
"processing": "Processing translation...",
|
||||
"transcribing": "Converting speech to text...",
|
||||
"translating": "Translating text...",
|
||||
"synthesizing": "Generating speech...",
|
||||
"playing": "Playing translated audio...",
|
||||
"completed": "Translation complete",
|
||||
"error": "Translation error"
|
||||
},
|
||||
"buttons": {
|
||||
"start": "🎤 Start Translation",
|
||||
"stop": "⏹️ Stop Recording",
|
||||
"clear": "Clear",
|
||||
"close": "Close"
|
||||
},
|
||||
"metrics": {
|
||||
"title": "Performance Metrics",
|
||||
"sttLatency": "STT Latency:",
|
||||
"translationLatency": "Translation Latency:",
|
||||
"ttsLatency": "TTS Latency:",
|
||||
"totalLatency": "Total Latency:",
|
||||
"totalRequests": "Total Requests:",
|
||||
"successRate": "Success Rate:"
|
||||
},
|
||||
"apiKeys": {
|
||||
"title": "API Keys",
|
||||
"openai": "OpenAI API Key",
|
||||
"groq": "Groq API Key",
|
||||
"deepgram": "Deepgram API Key",
|
||||
"assemblyai": "AssemblyAI API Key",
|
||||
"cartesia": "Cartesia API Key",
|
||||
"elevenlabs": "ElevenLabs API Key",
|
||||
"azure": "Azure API Key",
|
||||
"google": "Google API Key",
|
||||
"microsoft": "Microsoft API Key"
|
||||
},
|
||||
"results": {
|
||||
"transcription": "Transcription:",
|
||||
"translation": "Translation:"
|
||||
},
|
||||
"errors": {
|
||||
"notInitialized": "Universal Translator not initialized",
|
||||
"recordingFailed": "Failed to start recording",
|
||||
"transcriptionFailed": "Speech recognition failed",
|
||||
"translationFailed": "Translation failed",
|
||||
"synthesisFailed": "Speech synthesis failed",
|
||||
"apiKeyMissing": "API key required for selected service",
|
||||
"microphonePermission": "Microphone permission required",
|
||||
"unsupportedBrowser": "Browser not supported"
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -19,6 +19,7 @@ import '../web-hid/middleware';
|
||||
import '../settings/middleware';
|
||||
import '../talk-while-muted/middleware';
|
||||
import '../toolbox/middleware';
|
||||
import '../universal-translator/middleware';
|
||||
import '../face-landmarks/middleware';
|
||||
import '../gifs/middleware';
|
||||
import '../whiteboard/middleware.web';
|
||||
|
||||
@@ -15,6 +15,7 @@ import '../screen-share/reducer';
|
||||
import '../noise-suppression/reducer';
|
||||
import '../screenshot-capture/reducer';
|
||||
import '../talk-while-muted/reducer';
|
||||
import '../universal-translator/reducer';
|
||||
import '../virtual-background/reducer';
|
||||
import '../web-hid/reducer';
|
||||
import '../file-sharing/reducer';
|
||||
|
||||
@@ -76,6 +76,7 @@ import { ISubtitlesState } from '../subtitles/reducer';
|
||||
import { ITalkWhileMutedState } from '../talk-while-muted/reducer';
|
||||
import { IToolboxState } from '../toolbox/reducer';
|
||||
import { ITranscribingState } from '../transcribing/reducer';
|
||||
import { IUniversalTranslatorState } from '../universal-translator/reducer';
|
||||
import { IVideoLayoutState } from '../video-layout/reducer';
|
||||
import { IVideoQualityPersistedState, IVideoQualityState } from '../video-quality/reducer';
|
||||
import { IVideoSipGW } from '../videosipgw/reducer';
|
||||
@@ -168,6 +169,7 @@ export interface IReduxState {
|
||||
'features/testing': ITestingState;
|
||||
'features/toolbox': IToolboxState;
|
||||
'features/transcribing': ITranscribingState;
|
||||
'features/universal-translator': IUniversalTranslatorState;
|
||||
'features/video-layout': IVideoLayoutState;
|
||||
'features/video-quality': IVideoQualityState;
|
||||
'features/video-quality-persistent-storage': IVideoQualityPersistedState;
|
||||
|
||||
@@ -88,6 +88,7 @@ import { default as IconRemoteControlStop } from './stop-remote-control.svg';
|
||||
import { default as IconStop } from './stop.svg';
|
||||
import { default as IconSubtitles } from './subtitles.svg';
|
||||
import { default as IconTileView } from './tile-view.svg';
|
||||
import { default as IconTranslate } from './translate.svg';
|
||||
import { default as IconTrash } from './trash.svg';
|
||||
import { default as IconUserDeleted } from './user-deleted.svg';
|
||||
import { default as IconUser } from './user.svg';
|
||||
@@ -218,5 +219,6 @@ export const DEFAULT_ICON: Record<string, any> = {
|
||||
IconWifi1Bar,
|
||||
IconWifi2Bars,
|
||||
IconWifi3Bars,
|
||||
IconYahoo
|
||||
IconYahoo,
|
||||
IconTranslate
|
||||
};
|
||||
|
||||
@@ -110,7 +110,8 @@ const {
|
||||
IconWifi1Bar,
|
||||
IconWifi2Bars,
|
||||
IconWifi3Bars,
|
||||
IconYahoo
|
||||
IconYahoo,
|
||||
IconTranslate
|
||||
} = Object.keys(DEFAULT_ICON).reduce((exportedIcons: Record<string, any>, key) => {
|
||||
return {
|
||||
...exportedIcons,
|
||||
@@ -229,5 +230,6 @@ export {
|
||||
IconWifi1Bar,
|
||||
IconWifi2Bars,
|
||||
IconWifi3Bars,
|
||||
IconYahoo
|
||||
IconYahoo,
|
||||
IconTranslate
|
||||
};
|
||||
|
||||
5
react/features/base/icons/svg/translate.svg
Normal file
5
react/features/base/icons/svg/translate.svg
Normal file
@@ -0,0 +1,5 @@
|
||||
<svg width="20" height="20" viewBox="0 0 20 20" fill="none" xmlns="http://www.w3.org/2000/svg">
|
||||
<path d="M7.5 10H12.5C13.3284 10 14 10.6716 14 11.5C14 12.3284 13.3284 13 12.5 13H7.5C6.67157 13 6 12.3284 6 11.5C6 10.6716 6.67157 10 7.5 10Z" fill="currentColor"/>
|
||||
<path d="M4 4C2.89543 4 2 4.89543 2 6V14C2 15.1046 2.89543 16 4 16H7.1L9.5 18.5L11.9 16H16C17.1046 16 18 15.1046 18 14V6C18 4.89543 17.1046 4 16 4H4ZM4 6H16V14H11.5L9.5 16.2L7.5 14H4V6Z" fill="currentColor"/>
|
||||
<path d="M8 7.5C8 7.22386 8.22386 7 8.5 7H11.5C11.7761 7 12 7.22386 12 7.5C12 7.77614 11.7761 8 11.5 8H8.5C8.22386 8 8 7.77614 8 7.5Z" fill="currentColor"/>
|
||||
</svg>
|
||||
|
After Width: | Height: | Size: 640 B |
@@ -0,0 +1,264 @@
|
||||
|
||||
/**
|
||||
* Class implementing the effect interface expected by a JitsiLocalTrack.
|
||||
* The UniversalTranslatorEffect replaces the original audio stream with translated audio
|
||||
* while maintaining the same interface as other stream effects.
|
||||
*/
|
||||
export class UniversalTranslatorEffect {
|
||||
/**
|
||||
* Original MediaStream from the JitsiLocalTrack that uses this effect.
|
||||
*/
|
||||
_originalStream: MediaStream | null = null;
|
||||
|
||||
/**
|
||||
* MediaStreamTrack obtained from the original MediaStream.
|
||||
*/
|
||||
_originalTrack: MediaStreamTrack | null = null;
|
||||
|
||||
/**
|
||||
* Translated audio stream that will replace the original.
|
||||
*/
|
||||
_translatedStream: MediaStream | null = null;
|
||||
|
||||
/**
|
||||
* MediaStreamTrack obtained from the translated stream.
|
||||
*/
|
||||
_translatedTrack: MediaStreamTrack | null = null;
|
||||
|
||||
/**
|
||||
* Audio context for creating the translated audio stream.
|
||||
*/
|
||||
_audioContext: AudioContext | null = null;
|
||||
|
||||
/**
|
||||
* Media stream destination for routing translated audio.
|
||||
*/
|
||||
_streamDestination: MediaStreamAudioDestinationNode | null = null;
|
||||
|
||||
/**
|
||||
* Whether the effect is currently active.
|
||||
*/
|
||||
_isActive: boolean = false;
|
||||
|
||||
/**
|
||||
* Queue of translated audio buffers to be played.
|
||||
*/
|
||||
_audioQueue: AudioBuffer[] = [];
|
||||
|
||||
/**
|
||||
* Whether audio is currently being processed.
|
||||
*/
|
||||
_isProcessing: boolean = false;
|
||||
|
||||
/**
|
||||
* Creates UniversalTranslatorEffect.
|
||||
*/
|
||||
constructor() {
|
||||
// Initialize audio context
|
||||
this._audioContext = new AudioContext({
|
||||
sampleRate: 48000,
|
||||
latencyHint: 'interactive'
|
||||
});
|
||||
|
||||
// Create destination for translated audio
|
||||
this._streamDestination = this._audioContext.createMediaStreamDestination();
|
||||
this._translatedStream = this._streamDestination.stream;
|
||||
}
|
||||
|
||||
/**
|
||||
* Checks if the JitsiLocalTrack supports this effect.
|
||||
*
|
||||
* @param {JitsiLocalTrack} sourceLocalTrack - Track to which the effect will be applied.
|
||||
* @returns {boolean} - Returns true if this effect can run on the specified track, false otherwise.
|
||||
*/
|
||||
isEnabled(sourceLocalTrack: any): boolean {
|
||||
// Only works with audio tracks
|
||||
return sourceLocalTrack.isAudioTrack();
|
||||
}
|
||||
|
||||
/**
|
||||
* Effect interface called by source JitsiLocalTrack.
|
||||
* Returns the translated audio stream instead of the original.
|
||||
*
|
||||
* @param {MediaStream} audioStream - Original audio stream from microphone.
|
||||
* @returns {MediaStream} - MediaStream containing translated audio.
|
||||
*/
|
||||
startEffect(audioStream: MediaStream): MediaStream {
|
||||
this._originalStream = audioStream;
|
||||
this._originalTrack = audioStream.getTracks()[0];
|
||||
this._isActive = true;
|
||||
|
||||
console.log('UniversalTranslatorEffect: Started effect with translated stream');
|
||||
|
||||
// Return the translated stream instead of the original
|
||||
return this._translatedStream!;
|
||||
}
|
||||
|
||||
/**
|
||||
* Stop the translator effect.
|
||||
*
|
||||
* @returns {void}
|
||||
*/
|
||||
stopEffect(): void {
|
||||
this._isActive = false;
|
||||
this._audioQueue = [];
|
||||
this._isProcessing = false;
|
||||
|
||||
console.log('UniversalTranslatorEffect: Stopped effect');
|
||||
}
|
||||
|
||||
/**
|
||||
* Change the muted state of the effect.
|
||||
*
|
||||
* @param {boolean} muted - Should effect be muted or not.
|
||||
* @returns {void}
|
||||
*/
|
||||
setMuted(muted: boolean): void {
|
||||
if (this._translatedTrack) {
|
||||
this._translatedTrack.enabled = !muted;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Check whether or not this effect is muted.
|
||||
*
|
||||
* @returns {boolean}
|
||||
*/
|
||||
isMuted(): boolean {
|
||||
return this._translatedTrack ? !this._translatedTrack.enabled : false;
|
||||
}
|
||||
|
||||
/**
|
||||
* Add translated audio to be played through the effect.
|
||||
*
|
||||
* @param {AudioBuffer} audioBuffer - Translated audio buffer to play.
|
||||
* @returns {Promise<void>}
|
||||
*/
|
||||
async playTranslatedAudio(audioBuffer: AudioBuffer): Promise<void> {
|
||||
if (!this._isActive || !this._audioContext || !this._streamDestination) {
|
||||
console.warn('UniversalTranslatorEffect: Effect not active, cannot play audio');
|
||||
|
||||
return;
|
||||
}
|
||||
|
||||
// Add to queue and process
|
||||
this._audioQueue.push(audioBuffer);
|
||||
|
||||
if (!this._isProcessing) {
|
||||
this._processAudioQueue();
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Process queued translated audio buffers.
|
||||
*
|
||||
* @returns {Promise<void>}
|
||||
*/
|
||||
private async _processAudioQueue(): Promise<void> {
|
||||
if (this._isProcessing || !this._audioContext || !this._streamDestination) {
|
||||
return;
|
||||
}
|
||||
|
||||
this._isProcessing = true;
|
||||
|
||||
while (this._audioQueue.length > 0 && this._isActive) {
|
||||
const audioBuffer = this._audioQueue.shift()!;
|
||||
|
||||
await this._playAudioBuffer(audioBuffer);
|
||||
}
|
||||
|
||||
this._isProcessing = false;
|
||||
}
|
||||
|
||||
/**
|
||||
* Play a single audio buffer through the translated stream.
|
||||
*
|
||||
* @param {AudioBuffer} audioBuffer - Audio buffer to play.
|
||||
* @returns {Promise<void>}
|
||||
*/
|
||||
private async _playAudioBuffer(audioBuffer: AudioBuffer): Promise<void> {
|
||||
if (!this._audioContext || !this._streamDestination) {
|
||||
return;
|
||||
}
|
||||
|
||||
try {
|
||||
const source = this._audioContext.createBufferSource();
|
||||
|
||||
source.buffer = audioBuffer;
|
||||
source.connect(this._streamDestination);
|
||||
source.start();
|
||||
|
||||
// Wait for the audio to finish playing
|
||||
return new Promise(resolve => {
|
||||
source.onended = () => resolve();
|
||||
// Fallback timeout
|
||||
setTimeout(resolve, audioBuffer.duration * 1000 + 100);
|
||||
});
|
||||
} catch (error) {
|
||||
console.error('UniversalTranslatorEffect: Error playing audio buffer:', error);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Get the original audio stream for translation processing.
|
||||
*
|
||||
* @returns {MediaStream | null} - Original microphone stream.
|
||||
*/
|
||||
getOriginalStream(): MediaStream | null {
|
||||
return this._originalStream;
|
||||
}
|
||||
|
||||
/**
|
||||
* Get the translated audio stream.
|
||||
*
|
||||
* @returns {MediaStream | null} - Stream containing translated audio.
|
||||
*/
|
||||
getTranslatedStream(): MediaStream | null {
|
||||
return this._translatedStream;
|
||||
}
|
||||
|
||||
/**
|
||||
* Check if the effect is currently active.
|
||||
*
|
||||
* @returns {boolean} - Whether the effect is active.
|
||||
*/
|
||||
isActive(): boolean {
|
||||
return this._isActive;
|
||||
}
|
||||
|
||||
/**
|
||||
* Create an audio buffer from a blob.
|
||||
*
|
||||
* @param {Blob} audioBlob - Audio blob to convert.
|
||||
* @returns {Promise<AudioBuffer>} - Converted audio buffer.
|
||||
*/
|
||||
async createAudioBufferFromBlob(audioBlob: Blob): Promise<AudioBuffer> {
|
||||
if (!this._audioContext) {
|
||||
throw new Error('Audio context not initialized');
|
||||
}
|
||||
|
||||
const arrayBuffer = await audioBlob.arrayBuffer();
|
||||
|
||||
return await this._audioContext.decodeAudioData(arrayBuffer);
|
||||
}
|
||||
|
||||
/**
|
||||
* Cleanup resources when effect is destroyed.
|
||||
*
|
||||
* @returns {void}
|
||||
*/
|
||||
dispose(): void {
|
||||
this.stopEffect();
|
||||
|
||||
if (this._audioContext) {
|
||||
this._audioContext.close();
|
||||
this._audioContext = null;
|
||||
}
|
||||
|
||||
this._streamDestination = null;
|
||||
this._translatedStream = null;
|
||||
this._originalStream = null;
|
||||
this._originalTrack = null;
|
||||
this._translatedTrack = null;
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1 @@
|
||||
export { UniversalTranslatorEffect } from './UniversalTranslatorEffect';
|
||||
@@ -121,7 +121,8 @@ export const MAIN_TOOLBAR_BUTTONS_PRIORITY = [
|
||||
'embedmeeting',
|
||||
'feedback',
|
||||
'download',
|
||||
'help'
|
||||
'help',
|
||||
'universal-translator'
|
||||
];
|
||||
|
||||
export const TOOLBAR_TIMEOUT = 4000;
|
||||
@@ -176,6 +177,7 @@ export const TOOLBAR_BUTTONS: ToolbarButton[] = [
|
||||
'stats',
|
||||
'tileview',
|
||||
'toggle-camera',
|
||||
'universal-translator',
|
||||
'videoquality',
|
||||
'whiteboard'
|
||||
];
|
||||
|
||||
@@ -50,6 +50,7 @@ import SpeakerStats from '../speaker-stats/components/web/SpeakerStats';
|
||||
import { isSpeakerStatsDisabled } from '../speaker-stats/functions';
|
||||
import { useSpeakerStatsButton } from '../speaker-stats/hooks.web';
|
||||
import { useClosedCaptionButton } from '../subtitles/hooks.web';
|
||||
import { UniversalTranslatorButton } from '../universal-translator/components';
|
||||
import { toggleTileView } from '../video-layout/actions.any';
|
||||
import { shouldDisplayTileView } from '../video-layout/functions.web';
|
||||
import { useTileViewButton } from '../video-layout/hooks';
|
||||
@@ -166,12 +167,19 @@ const download = {
|
||||
group: 4
|
||||
};
|
||||
|
||||
const universalTranslator = {
|
||||
key: 'universal-translator',
|
||||
Content: UniversalTranslatorButton,
|
||||
group: 3
|
||||
};
|
||||
|
||||
const help = {
|
||||
key: 'help',
|
||||
Content: HelpButton,
|
||||
group: 4
|
||||
};
|
||||
|
||||
|
||||
/**
|
||||
* A hook that returns the toggle camera button if it is enabled and undefined otherwise.
|
||||
*
|
||||
@@ -324,7 +332,8 @@ export function useToolboxButtons(
|
||||
embedmeeting: embed,
|
||||
feedback,
|
||||
download: _download,
|
||||
help: _help
|
||||
help: _help,
|
||||
'universal-translator': universalTranslator
|
||||
};
|
||||
const buttonKeys = Object.keys(buttons) as ToolbarButton[];
|
||||
|
||||
|
||||
@@ -51,6 +51,7 @@ export type ToolbarButton = 'camera' |
|
||||
'stats' |
|
||||
'tileview' |
|
||||
'toggle-camera' |
|
||||
'universal-translator' |
|
||||
'videoquality' |
|
||||
'whiteboard' |
|
||||
'__end';
|
||||
|
||||
121
react/features/universal-translator/README.md
Normal file
121
react/features/universal-translator/README.md
Normal file
@@ -0,0 +1,121 @@
|
||||
# Universal Translator Feature for Jitsi Meet
|
||||
|
||||
This feature provides real-time speech translation capabilities directly within Jitsi Meet meetings.
|
||||
|
||||
## Overview
|
||||
|
||||
The Universal Translator allows participants to:
|
||||
- Translate speech from one language to another in real-time
|
||||
- Compare multiple STT/TTS service providers for optimal latency
|
||||
- Use BlackHole virtual audio device for seamless integration
|
||||
- Monitor performance metrics for different service combinations
|
||||
|
||||
## Architecture
|
||||
|
||||
### Core Components
|
||||
- **STT Providers**: Whisper (local), Groq, Deepgram, AssemblyAI
|
||||
- **Translation Providers**: OpenAI GPT-4, Google Translate, Microsoft Translator
|
||||
- **TTS Providers**: Cartesia Sonic, ElevenLabs, Deepgram Aura, Web Speech API
|
||||
- **Audio Routing**: BlackHole virtual audio device integration
|
||||
|
||||
### Performance Targets
|
||||
- Total end-to-end latency: <650ms
|
||||
- STT processing: <300ms
|
||||
- Translation: <200ms
|
||||
- TTS generation: <100ms
|
||||
- Audio routing: <50ms
|
||||
|
||||
## Usage
|
||||
|
||||
### Accessing the Feature
|
||||
1. Click the translate button (🌐) in the Jitsi Meet toolbar
|
||||
2. Configure your preferred service providers
|
||||
3. Add API keys for external services
|
||||
4. Select source and target languages
|
||||
5. Start recording to begin real-time translation
|
||||
|
||||
### Service Configuration
|
||||
The feature supports multiple providers for comparison:
|
||||
|
||||
**STT Services:**
|
||||
- Whisper (Local) - Free, privacy-focused, ~200ms latency
|
||||
- Groq Whisper - Ultra-fast, ~100ms latency
|
||||
- Deepgram Nova-2 - Real-time streaming, ~100ms latency
|
||||
- AssemblyAI Universal-2 - Highest accuracy, ~150ms latency
|
||||
|
||||
**TTS Services:**
|
||||
- Cartesia Sonic - Ultra-low latency, ~40ms
|
||||
- ElevenLabs - Highest quality, ~300ms latency
|
||||
- Deepgram Aura - Streaming capable, ~400ms latency
|
||||
- Web Speech API - Browser native, ~50ms latency
|
||||
|
||||
### API Key Requirements
|
||||
External services require API keys:
|
||||
- OpenAI (for GPT-4 translation)
|
||||
- Groq (for ultra-fast STT)
|
||||
- Deepgram (for STT and TTS)
|
||||
- AssemblyAI (for high-accuracy STT)
|
||||
- Cartesia (for low-latency TTS)
|
||||
- ElevenLabs (for high-quality TTS)
|
||||
- Azure/Google/Microsoft (for enterprise services)
|
||||
|
||||
## BlackHole Integration
|
||||
|
||||
For optimal audio routing on macOS:
|
||||
|
||||
1. Install BlackHole: https://existential.audio/blackhole/
|
||||
2. Set BlackHole as your audio input device
|
||||
3. The feature will automatically detect and use BlackHole for routing
|
||||
4. Translated audio will be output through BlackHole for real-time playback
|
||||
|
||||
## Development
|
||||
|
||||
### Adding New Service Providers
|
||||
|
||||
1. Create provider class in appropriate service directory
|
||||
2. Implement required interface methods
|
||||
3. Add to provider factory
|
||||
4. Update UI configuration options
|
||||
|
||||
### Testing Latency
|
||||
|
||||
The feature includes built-in latency monitoring:
|
||||
- Individual service latencies
|
||||
- End-to-end pipeline performance
|
||||
- Success rate tracking
|
||||
- Request count statistics
|
||||
|
||||
## Configuration
|
||||
|
||||
The feature can be configured via Jitsi Meet config:
|
||||
|
||||
```javascript
|
||||
// config.js
|
||||
const config = {
|
||||
// ... other config
|
||||
universalTranslator: {
|
||||
enabled: true,
|
||||
defaultSTTProvider: 'whisper',
|
||||
defaultTTSProvider: 'cartesia',
|
||||
defaultTranslationProvider: 'openai',
|
||||
defaultSourceLanguage: 'en',
|
||||
defaultTargetLanguage: 'es'
|
||||
}
|
||||
};
|
||||
```
|
||||
|
||||
## Limitations
|
||||
|
||||
- Requires modern browser with MediaRecorder API
|
||||
- External services require internet connection
|
||||
- API costs apply for cloud-based providers
|
||||
- BlackHole is macOS-only (fallback to default audio on other platforms)
|
||||
|
||||
## Future Enhancements
|
||||
|
||||
- Support for Windows virtual audio devices
|
||||
- Additional language pairs
|
||||
- Speaker identification
|
||||
- Conversation history
|
||||
- Export/sharing capabilities
|
||||
- Integration with meeting recordings
|
||||
98
react/features/universal-translator/actionTypes.ts
Normal file
98
react/features/universal-translator/actionTypes.ts
Normal file
@@ -0,0 +1,98 @@
|
||||
/**
|
||||
* Action types for universal translator feature.
|
||||
*/
|
||||
|
||||
/**
|
||||
* Action to initialize the universal translator.
|
||||
*/
|
||||
export const INIT_UNIVERSAL_TRANSLATOR = 'INIT_UNIVERSAL_TRANSLATOR';
|
||||
|
||||
/**
|
||||
* Action to set the STT provider.
|
||||
*/
|
||||
export const SET_STT_PROVIDER = 'SET_STT_PROVIDER';
|
||||
|
||||
/**
|
||||
* Action to set the TTS provider.
|
||||
*/
|
||||
export const SET_TTS_PROVIDER = 'SET_TTS_PROVIDER';
|
||||
|
||||
/**
|
||||
* Action to set the translation provider.
|
||||
*/
|
||||
export const SET_TRANSLATION_PROVIDER = 'SET_TRANSLATION_PROVIDER';
|
||||
|
||||
/**
|
||||
* Action to set the source language.
|
||||
*/
|
||||
export const SET_SOURCE_LANGUAGE = 'SET_SOURCE_LANGUAGE';
|
||||
|
||||
/**
|
||||
* Action to set the target language.
|
||||
*/
|
||||
export const SET_TARGET_LANGUAGE = 'SET_TARGET_LANGUAGE';
|
||||
|
||||
/**
|
||||
* Action to start translation recording.
|
||||
*/
|
||||
export const START_TRANSLATION_RECORDING = 'START_TRANSLATION_RECORDING';
|
||||
|
||||
/**
|
||||
* Action to stop translation recording.
|
||||
*/
|
||||
export const STOP_TRANSLATION_RECORDING = 'STOP_TRANSLATION_RECORDING';
|
||||
|
||||
/**
|
||||
* Action to update translation status.
|
||||
*/
|
||||
export const UPDATE_TRANSLATION_STATUS = 'UPDATE_TRANSLATION_STATUS';
|
||||
|
||||
/**
|
||||
* Action to set transcription result.
|
||||
*/
|
||||
export const SET_TRANSCRIPTION_RESULT = 'SET_TRANSCRIPTION_RESULT';
|
||||
|
||||
/**
|
||||
* Action to set translation result.
|
||||
*/
|
||||
export const SET_TRANSLATION_RESULT = 'SET_TRANSLATION_RESULT';
|
||||
|
||||
/**
|
||||
* Action to update latency metrics.
|
||||
*/
|
||||
export const UPDATE_LATENCY_METRICS = 'UPDATE_LATENCY_METRICS';
|
||||
|
||||
/**
|
||||
* Action to set translation error.
|
||||
*/
|
||||
export const SET_TRANSLATION_ERROR = 'SET_TRANSLATION_ERROR';
|
||||
|
||||
/**
|
||||
* Action to clear translation error.
|
||||
*/
|
||||
export const CLEAR_TRANSLATION_ERROR = 'CLEAR_TRANSLATION_ERROR';
|
||||
|
||||
/**
|
||||
* Action to toggle universal translator UI.
|
||||
*/
|
||||
export const TOGGLE_UNIVERSAL_TRANSLATOR = 'TOGGLE_UNIVERSAL_TRANSLATOR';
|
||||
|
||||
/**
|
||||
* Action to set API keys for services.
|
||||
*/
|
||||
export const SET_API_KEYS = 'SET_API_KEYS';
|
||||
|
||||
/**
|
||||
* Action to update processing step.
|
||||
*/
|
||||
export const UPDATE_PROCESSING_STEP = 'UPDATE_PROCESSING_STEP';
|
||||
|
||||
/**
|
||||
* Action type to enable universal translator effect.
|
||||
*/
|
||||
export const ENABLE_UNIVERSAL_TRANSLATOR_EFFECT = 'ENABLE_UNIVERSAL_TRANSLATOR_EFFECT';
|
||||
|
||||
/**
|
||||
* Action type to disable universal translator effect.
|
||||
*/
|
||||
export const DISABLE_UNIVERSAL_TRANSLATOR_EFFECT = 'DISABLE_UNIVERSAL_TRANSLATOR_EFFECT';
|
||||
299
react/features/universal-translator/actions.ts
Normal file
299
react/features/universal-translator/actions.ts
Normal file
@@ -0,0 +1,299 @@
|
||||
import { IStore } from '../app/types';
|
||||
import { hideDialog, openDialog } from '../base/dialog/actions';
|
||||
|
||||
import {
|
||||
CLEAR_TRANSLATION_ERROR,
|
||||
DISABLE_UNIVERSAL_TRANSLATOR_EFFECT,
|
||||
ENABLE_UNIVERSAL_TRANSLATOR_EFFECT,
|
||||
INIT_UNIVERSAL_TRANSLATOR,
|
||||
SET_API_KEYS,
|
||||
SET_SOURCE_LANGUAGE,
|
||||
SET_STT_PROVIDER,
|
||||
SET_TARGET_LANGUAGE,
|
||||
SET_TRANSCRIPTION_RESULT,
|
||||
SET_TRANSLATION_ERROR,
|
||||
SET_TRANSLATION_PROVIDER,
|
||||
SET_TRANSLATION_RESULT,
|
||||
SET_TTS_PROVIDER,
|
||||
START_TRANSLATION_RECORDING,
|
||||
STOP_TRANSLATION_RECORDING,
|
||||
TOGGLE_UNIVERSAL_TRANSLATOR,
|
||||
UPDATE_LATENCY_METRICS,
|
||||
UPDATE_PROCESSING_STEP,
|
||||
UPDATE_TRANSLATION_STATUS
|
||||
} from './actionTypes';
|
||||
import { UniversalTranslatorDialog } from './components';
|
||||
|
||||
/**
|
||||
* Initializes the universal translator with service providers.
|
||||
*
|
||||
* @param {Object} config - Configuration for service providers.
|
||||
* @returns {Object} Redux action.
|
||||
*/
|
||||
export function initUniversalTranslator(config: any) {
|
||||
return {
|
||||
type: INIT_UNIVERSAL_TRANSLATOR,
|
||||
config
|
||||
};
|
||||
}
|
||||
|
||||
/**
|
||||
* Sets the STT (Speech-to-Text) provider.
|
||||
*
|
||||
* @param {string} provider - The STT provider name.
|
||||
* @returns {Object} Redux action.
|
||||
*/
|
||||
export function setSTTProvider(provider: string) {
|
||||
return {
|
||||
type: SET_STT_PROVIDER,
|
||||
provider
|
||||
};
|
||||
}
|
||||
|
||||
/**
|
||||
* Sets the TTS (Text-to-Speech) provider.
|
||||
*
|
||||
* @param {string} provider - The TTS provider name.
|
||||
* @returns {Object} Redux action.
|
||||
*/
|
||||
export function setTTSProvider(provider: string) {
|
||||
return {
|
||||
type: SET_TTS_PROVIDER,
|
||||
provider
|
||||
};
|
||||
}
|
||||
|
||||
/**
|
||||
* Sets the translation provider.
|
||||
*
|
||||
* @param {string} provider - The translation provider name.
|
||||
* @returns {Object} Redux action.
|
||||
*/
|
||||
export function setTranslationProvider(provider: string) {
|
||||
return {
|
||||
type: SET_TRANSLATION_PROVIDER,
|
||||
provider
|
||||
};
|
||||
}
|
||||
|
||||
/**
|
||||
* Sets the source language for translation.
|
||||
*
|
||||
* @param {string} language - The source language code.
|
||||
* @returns {Object} Redux action.
|
||||
*/
|
||||
export function setSourceLanguage(language: string) {
|
||||
return {
|
||||
type: SET_SOURCE_LANGUAGE,
|
||||
language
|
||||
};
|
||||
}
|
||||
|
||||
/**
|
||||
* Sets the target language for translation.
|
||||
*
|
||||
* @param {string} language - The target language code.
|
||||
* @returns {Object} Redux action.
|
||||
*/
|
||||
export function setTargetLanguage(language: string) {
|
||||
return {
|
||||
type: SET_TARGET_LANGUAGE,
|
||||
language
|
||||
};
|
||||
}
|
||||
|
||||
/**
|
||||
* Starts translation recording.
|
||||
*
|
||||
* @returns {Object} Redux action.
|
||||
*/
|
||||
export function startTranslationRecording() {
|
||||
return {
|
||||
type: START_TRANSLATION_RECORDING
|
||||
};
|
||||
}
|
||||
|
||||
/**
|
||||
* Stops translation recording.
|
||||
*
|
||||
* @returns {Object} Redux action.
|
||||
*/
|
||||
export function stopTranslationRecording() {
|
||||
return {
|
||||
type: STOP_TRANSLATION_RECORDING
|
||||
};
|
||||
}
|
||||
|
||||
/**
|
||||
* Updates the translation status.
|
||||
*
|
||||
* @param {string} status - The current status.
|
||||
* @returns {Object} Redux action.
|
||||
*/
|
||||
export function updateTranslationStatus(status: string) {
|
||||
return {
|
||||
type: UPDATE_TRANSLATION_STATUS,
|
||||
status
|
||||
};
|
||||
}
|
||||
|
||||
/**
|
||||
* Updates the current processing step.
|
||||
*
|
||||
* @param {string} step - The current processing step.
|
||||
* @returns {Object} Redux action.
|
||||
*/
|
||||
export function updateProcessingStep(step: string) {
|
||||
return {
|
||||
type: UPDATE_PROCESSING_STEP,
|
||||
step
|
||||
};
|
||||
}
|
||||
|
||||
/**
|
||||
* Sets the transcription result.
|
||||
*
|
||||
* @param {Object} result - The transcription result.
|
||||
* @returns {Object} Redux action.
|
||||
*/
|
||||
export function setTranscriptionResult(result: any) {
|
||||
return {
|
||||
type: SET_TRANSCRIPTION_RESULT,
|
||||
result
|
||||
};
|
||||
}
|
||||
|
||||
/**
|
||||
* Sets the translation result.
|
||||
*
|
||||
* @param {Object} result - The translation result.
|
||||
* @returns {Object} Redux action.
|
||||
*/
|
||||
export function setTranslationResult(result: any) {
|
||||
return {
|
||||
type: SET_TRANSLATION_RESULT,
|
||||
result
|
||||
};
|
||||
}
|
||||
|
||||
/**
|
||||
* Updates latency metrics for different services.
|
||||
*
|
||||
* @param {Object} metrics - The latency metrics.
|
||||
* @returns {Object} Redux action.
|
||||
*/
|
||||
export function updateLatencyMetrics(metrics: any) {
|
||||
return {
|
||||
type: UPDATE_LATENCY_METRICS,
|
||||
metrics
|
||||
};
|
||||
}
|
||||
|
||||
/**
|
||||
* Sets a translation error.
|
||||
*
|
||||
* @param {string} error - The error message.
|
||||
* @returns {Object} Redux action.
|
||||
*/
|
||||
export function setTranslationError(error: string) {
|
||||
return {
|
||||
type: SET_TRANSLATION_ERROR,
|
||||
error
|
||||
};
|
||||
}
|
||||
|
||||
/**
|
||||
* Clears the translation error.
|
||||
*
|
||||
* @returns {Object} Redux action.
|
||||
*/
|
||||
export function clearTranslationError() {
|
||||
return {
|
||||
type: CLEAR_TRANSLATION_ERROR
|
||||
};
|
||||
}
|
||||
|
||||
/**
|
||||
* Sets API keys for various services.
|
||||
*
|
||||
* @param {Object} keys - Object containing API keys for different services.
|
||||
* @returns {Object} Redux action.
|
||||
*/
|
||||
export function setApiKeys(keys: any) {
|
||||
return {
|
||||
type: SET_API_KEYS,
|
||||
keys
|
||||
};
|
||||
}
|
||||
|
||||
/**
|
||||
* Toggles the universal translator dialog.
|
||||
*
|
||||
* @returns {Function} Redux thunk action.
|
||||
*/
|
||||
export function toggleUniversalTranslator() {
|
||||
return (dispatch: IStore['dispatch'], getState: IStore['getState']) => {
|
||||
const state = getState();
|
||||
const universalTranslator = state['features/universal-translator'];
|
||||
|
||||
if (universalTranslator?.showDialog) {
|
||||
dispatch(hideDialog(UniversalTranslatorDialog));
|
||||
} else {
|
||||
dispatch(openDialog(UniversalTranslatorDialog));
|
||||
}
|
||||
|
||||
dispatch({
|
||||
type: TOGGLE_UNIVERSAL_TRANSLATOR
|
||||
});
|
||||
};
|
||||
}
|
||||
|
||||
/**
|
||||
* Shows the universal translator dialog.
|
||||
*
|
||||
* @returns {Function} Redux thunk action.
|
||||
*/
|
||||
export function showUniversalTranslatorDialog() {
|
||||
return (dispatch: IStore['dispatch']) => {
|
||||
dispatch(openDialog(UniversalTranslatorDialog));
|
||||
dispatch({
|
||||
type: TOGGLE_UNIVERSAL_TRANSLATOR
|
||||
});
|
||||
};
|
||||
}
|
||||
|
||||
/**
|
||||
* Hides the universal translator dialog.
|
||||
*
|
||||
* @returns {Function} Redux thunk action.
|
||||
*/
|
||||
export function hideUniversalTranslatorDialog() {
|
||||
return (dispatch: IStore['dispatch']) => {
|
||||
dispatch(hideDialog(UniversalTranslatorDialog));
|
||||
dispatch({
|
||||
type: TOGGLE_UNIVERSAL_TRANSLATOR
|
||||
});
|
||||
};
|
||||
}
|
||||
|
||||
/**
|
||||
* Enables the universal translator effect on the audio track.
|
||||
*
|
||||
* @returns {Object} Redux action.
|
||||
*/
|
||||
export function enableUniversalTranslatorEffect() {
|
||||
return {
|
||||
type: ENABLE_UNIVERSAL_TRANSLATOR_EFFECT
|
||||
};
|
||||
}
|
||||
|
||||
/**
|
||||
* Disables the universal translator effect on the audio track.
|
||||
*
|
||||
* @returns {Object} Redux action.
|
||||
*/
|
||||
export function disableUniversalTranslatorEffect() {
|
||||
return {
|
||||
type: DISABLE_UNIVERSAL_TRANSLATOR_EFFECT
|
||||
};
|
||||
}
|
||||
217
react/features/universal-translator/audio/audio-utils.js
Normal file
217
react/features/universal-translator/audio/audio-utils.js
Normal file
@@ -0,0 +1,217 @@
|
||||
/**
|
||||
* Audio utility functions for processing and format conversion
|
||||
* Adapted from standalone-meeting-assist.
|
||||
*/
|
||||
|
||||
/**
|
||||
* Convert WebM audio blob to Float32Array for Whisper processing.
|
||||
*
|
||||
* @param {Blob} webmBlob - The WebM audio blob to convert.
|
||||
* @returns {Promise<Float32Array>} Promise resolving to converted audio data.
|
||||
*/
|
||||
export function convertWebMToFloat32(webmBlob) {
|
||||
return new Promise((resolve, reject) => {
|
||||
const fileReader = new FileReader();
|
||||
|
||||
fileReader.onloadend = async () => {
|
||||
try {
|
||||
const audioContext = new AudioContext({ sampleRate: 16000 });
|
||||
const arrayBuffer = fileReader.result;
|
||||
const decoded = await audioContext.decodeAudioData(arrayBuffer);
|
||||
|
||||
let audio;
|
||||
|
||||
if (decoded.numberOfChannels === 2) {
|
||||
// Convert stereo to mono
|
||||
const SCALING_FACTOR = Math.sqrt(2);
|
||||
const left = decoded.getChannelData(0);
|
||||
const right = decoded.getChannelData(1);
|
||||
|
||||
audio = new Float32Array(left.length);
|
||||
for (let i = 0; i < decoded.length; ++i) {
|
||||
audio[i] = SCALING_FACTOR * (left[i] + right[i]) / 2;
|
||||
}
|
||||
} else {
|
||||
// Use first channel for mono
|
||||
audio = decoded.getChannelData(0);
|
||||
}
|
||||
|
||||
resolve(audio);
|
||||
} catch (error) {
|
||||
reject(error);
|
||||
}
|
||||
};
|
||||
|
||||
fileReader.onerror = () => reject(new Error('FileReader error'));
|
||||
fileReader.readAsArrayBuffer(webmBlob);
|
||||
});
|
||||
}
|
||||
|
||||
/**
|
||||
* Create a MediaRecorder for audio capture.
|
||||
*
|
||||
* @param {MediaStream} stream - The audio stream to record.
|
||||
* @param {Object} options - Recording options.
|
||||
* @returns {MediaRecorder} The configured MediaRecorder instance.
|
||||
*/
|
||||
export function createAudioRecorder(stream, options = {}) {
|
||||
const {
|
||||
mimeType = 'audio/webm;codecs=opus',
|
||||
audioBitsPerSecond = 128000
|
||||
} = options;
|
||||
|
||||
try {
|
||||
return new MediaRecorder(stream, {
|
||||
mimeType,
|
||||
audioBitsPerSecond
|
||||
});
|
||||
} catch (error) {
|
||||
console.warn('Falling back to default MediaRecorder options:', error);
|
||||
|
||||
return new MediaRecorder(stream);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Get user media with optimal settings for speech recognition.
|
||||
*
|
||||
* @param {string|null} deviceId - Optional device ID to use.
|
||||
* @returns {Promise<MediaStream>} Promise resolving to the media stream.
|
||||
*/
|
||||
export async function getUserMediaForSpeech(deviceId = null) {
|
||||
const constraints = {
|
||||
audio: {
|
||||
deviceId: deviceId ? { exact: deviceId } : undefined,
|
||||
sampleRate: 16000,
|
||||
channelCount: 1,
|
||||
echoCancellation: true,
|
||||
noiseSuppression: true,
|
||||
autoGainControl: true
|
||||
}
|
||||
};
|
||||
|
||||
try {
|
||||
return await navigator.mediaDevices.getUserMedia(constraints);
|
||||
} catch (error) {
|
||||
console.warn('Failed to get media with optimal settings, falling back:', error);
|
||||
|
||||
|
||||
// Fallback to basic audio capture
|
||||
return await navigator.mediaDevices.getUserMedia({ audio: true });
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Get available audio input devices.
|
||||
*
|
||||
* @returns {Promise<Array>} Promise resolving to array of audio input devices.
|
||||
*/
|
||||
export async function getAudioInputDevices() {
|
||||
try {
|
||||
const devices = await navigator.mediaDevices.enumerateDevices();
|
||||
|
||||
return devices.filter(device => device.kind === 'audioinput');
|
||||
} catch (error) {
|
||||
console.error('Error enumerating audio devices:', error);
|
||||
|
||||
return [];
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Get available audio output devices.
|
||||
*
|
||||
* @returns {Promise<Array>} Promise resolving to array of audio output devices.
|
||||
*/
|
||||
export async function getAudioOutputDevices() {
|
||||
try {
|
||||
const devices = await navigator.mediaDevices.enumerateDevices();
|
||||
|
||||
return devices.filter(device => device.kind === 'audiooutput');
|
||||
} catch (error) {
|
||||
console.error('Error enumerating audio output devices:', error);
|
||||
|
||||
return [];
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Create an audio context for processing.
|
||||
*
|
||||
* @param {number} sampleRate - The sample rate for the audio context.
|
||||
* @returns {AudioContext} The created audio context.
|
||||
*/
|
||||
export function createAudioContext(sampleRate = 16000) {
|
||||
return new AudioContext({ sampleRate });
|
||||
}
|
||||
|
||||
/**
|
||||
* Convert Float32Array to audio blob.
|
||||
*
|
||||
* @param {Float32Array} float32Array - The audio data to convert.
|
||||
* @param {number} sampleRate - The sample rate of the audio data.
|
||||
* @returns {Blob} The resulting audio blob.
|
||||
*/
|
||||
export function float32ArrayToBlob(float32Array, sampleRate = 16000) {
|
||||
// Create a 16-bit PCM WAV file
|
||||
const length = float32Array.length;
|
||||
const buffer = new ArrayBuffer(44 + (length * 2));
|
||||
const view = new DataView(buffer);
|
||||
|
||||
// WAV header
|
||||
const writeString = (offset, string) => {
|
||||
for (let i = 0; i < string.length; i++) {
|
||||
view.setUint8(offset + i, string.charCodeAt(i));
|
||||
}
|
||||
};
|
||||
|
||||
writeString(0, 'RIFF');
|
||||
view.setUint32(4, 36 + (length * 2), true);
|
||||
writeString(8, 'WAVE');
|
||||
writeString(12, 'fmt ');
|
||||
view.setUint32(16, 16, true);
|
||||
view.setUint16(20, 1, true);
|
||||
view.setUint16(22, 1, true);
|
||||
view.setUint32(24, sampleRate, true);
|
||||
view.setUint32(28, sampleRate * 2, true);
|
||||
view.setUint16(32, 2, true);
|
||||
view.setUint16(34, 16, true);
|
||||
writeString(36, 'data');
|
||||
view.setUint32(40, length * 2, true);
|
||||
|
||||
// Convert float32 to int16
|
||||
let offset = 44;
|
||||
|
||||
for (let i = 0; i < length; i++) {
|
||||
const sample = Math.max(-1, Math.min(1, float32Array[i]));
|
||||
|
||||
view.setInt16(offset, sample < 0 ? sample * 0x8000 : sample * 0x7FFF, true);
|
||||
offset += 2;
|
||||
}
|
||||
|
||||
return new Blob([ buffer ], { type: 'audio/wav' });
|
||||
}
|
||||
|
||||
/**
|
||||
* Play audio from Float32Array.
|
||||
*
|
||||
* @param {Float32Array} float32Array - The audio data to play.
|
||||
* @param {number} sampleRate - The sample rate of the audio data.
|
||||
* @returns {Promise<void>} Promise that resolves when audio finishes playing.
|
||||
*/
|
||||
export function playAudioFromFloat32(float32Array, sampleRate = 16000) {
|
||||
const audioContext = createAudioContext(sampleRate);
|
||||
const audioBuffer = audioContext.createBuffer(1, float32Array.length, sampleRate);
|
||||
|
||||
audioBuffer.copyToChannel(float32Array, 0);
|
||||
|
||||
const source = audioContext.createBufferSource();
|
||||
|
||||
source.buffer = audioBuffer;
|
||||
source.connect(audioContext.destination);
|
||||
source.start();
|
||||
|
||||
return new Promise(resolve => {
|
||||
source.onended = resolve;
|
||||
});
|
||||
}
|
||||
365
react/features/universal-translator/audio/blackhole-router.js
Normal file
365
react/features/universal-translator/audio/blackhole-router.js
Normal file
@@ -0,0 +1,365 @@
|
||||
/**
|
||||
* BlackHole virtual audio device integration
|
||||
* Handles audio routing for macOS BlackHole virtual audio driver
|
||||
* Supports routing translated audio back into Jitsi Meet stream.
|
||||
*/
|
||||
|
||||
export class BlackHoleRouter {
|
||||
constructor() {
|
||||
this.inputStream = null;
|
||||
this.outputContext = null;
|
||||
this.outputDestination = null;
|
||||
this.isRouting = false;
|
||||
this.audioQueue = [];
|
||||
this.processingQueue = false;
|
||||
this.blackHoleInputDevice = null;
|
||||
this.blackHoleOutputDevice = null;
|
||||
this.mixerNode = null;
|
||||
this.gainNode = null;
|
||||
this.isInitialized = false;
|
||||
}
|
||||
|
||||
/**
|
||||
* Initialize BlackHole audio routing with enhanced device detection.
|
||||
*/
|
||||
async initialize() {
|
||||
try {
|
||||
console.log('Initializing BlackHole audio routing...');
|
||||
|
||||
// Get all available devices
|
||||
const devices = await navigator.mediaDevices.enumerateDevices();
|
||||
|
||||
// Find BlackHole input and output devices
|
||||
this.blackHoleInputDevice = devices.find(device =>
|
||||
device.kind === 'audioinput'
|
||||
&& (device.label.toLowerCase().includes('blackhole')
|
||||
|| device.label.toLowerCase().includes('aggregate device'))
|
||||
);
|
||||
|
||||
this.blackHoleOutputDevice = devices.find(device =>
|
||||
device.kind === 'audiooutput'
|
||||
&& device.label.toLowerCase().includes('blackhole')
|
||||
);
|
||||
|
||||
if (!this.blackHoleInputDevice) {
|
||||
console.warn('BlackHole input device not found. Available devices:',
|
||||
devices.filter(d => d.kind === 'audioinput').map(d => d.label));
|
||||
|
||||
return await this._initializeDefaultAudio();
|
||||
}
|
||||
|
||||
console.log('Found BlackHole devices:', {
|
||||
input: this.blackHoleInputDevice.label,
|
||||
output: this.blackHoleOutputDevice?.label || 'Using default output'
|
||||
});
|
||||
|
||||
await this._initializeBlackHoleRouting();
|
||||
this.isInitialized = true;
|
||||
|
||||
return {
|
||||
inputDevice: this.blackHoleInputDevice,
|
||||
outputDevice: this.blackHoleOutputDevice,
|
||||
deviceType: 'blackhole'
|
||||
};
|
||||
} catch (error) {
|
||||
console.error('Error initializing BlackHole:', error);
|
||||
throw error;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Initialize BlackHole routing with proper audio context setup.
|
||||
*/
|
||||
async _initializeBlackHoleRouting() {
|
||||
// Create audio context with appropriate sample rate
|
||||
this.outputContext = new AudioContext({
|
||||
sampleRate: 48000, // Higher quality for better translation output
|
||||
latencyHint: 'interactive' // Low latency for real-time translation
|
||||
});
|
||||
|
||||
// Create gain node for volume control
|
||||
this.gainNode = this.outputContext.createGain();
|
||||
this.gainNode.gain.value = 0.8; // Slightly lower to prevent clipping
|
||||
|
||||
// Create destination for BlackHole output
|
||||
this.outputDestination = this.outputContext.createMediaStreamDestination();
|
||||
this.gainNode.connect(this.outputDestination);
|
||||
|
||||
console.log('BlackHole routing initialized successfully');
|
||||
}
|
||||
|
||||
/**
|
||||
* Initialize with default audio device as fallback.
|
||||
*/
|
||||
async _initializeDefaultAudio() {
|
||||
const constraints = {
|
||||
audio: {
|
||||
sampleRate: 16000,
|
||||
channelCount: 1,
|
||||
echoCancellation: true,
|
||||
noiseSuppression: true,
|
||||
autoGainControl: true
|
||||
}
|
||||
};
|
||||
|
||||
this.inputStream = await navigator.mediaDevices.getUserMedia(constraints);
|
||||
this.outputContext = new AudioContext({ sampleRate: 16000 });
|
||||
|
||||
console.log('Default audio initialized as fallback');
|
||||
|
||||
return {
|
||||
inputStream: this.inputStream,
|
||||
outputContext: this.outputContext,
|
||||
deviceType: 'default'
|
||||
};
|
||||
}
|
||||
|
||||
/**
|
||||
* Start audio routing.
|
||||
*/
|
||||
async startRouting() {
|
||||
if (this.isRouting) {
|
||||
console.warn('Audio routing already started');
|
||||
|
||||
return;
|
||||
}
|
||||
|
||||
if (!this.inputStream || !this.outputContext) {
|
||||
throw new Error('BlackHole not initialized');
|
||||
}
|
||||
|
||||
this.isRouting = true;
|
||||
console.log('Started audio routing');
|
||||
}
|
||||
|
||||
/**
|
||||
* Stop audio routing.
|
||||
*/
|
||||
async stopRouting() {
|
||||
if (!this.isRouting) {
|
||||
return;
|
||||
}
|
||||
|
||||
this.isRouting = false;
|
||||
|
||||
if (this.inputStream) {
|
||||
this.inputStream.getTracks().forEach(track => track.stop());
|
||||
}
|
||||
|
||||
if (this.outputContext) {
|
||||
await this.outputContext.close();
|
||||
}
|
||||
|
||||
console.log('Stopped audio routing');
|
||||
}
|
||||
|
||||
/**
|
||||
* Route translated audio to output.
|
||||
*/
|
||||
async routeTranslatedAudio(audioBuffer) {
|
||||
if (!this.outputContext || !this.isRouting) {
|
||||
console.warn('Audio routing not active');
|
||||
|
||||
return;
|
||||
}
|
||||
|
||||
try {
|
||||
// Add to queue for processing
|
||||
this.audioQueue.push(audioBuffer);
|
||||
|
||||
if (!this.processingQueue) {
|
||||
this._processAudioQueue();
|
||||
}
|
||||
} catch (error) {
|
||||
console.error('Error routing translated audio:', error);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Process queued audio buffers.
|
||||
*/
|
||||
async _processAudioQueue() {
|
||||
if (this.processingQueue) {
|
||||
return;
|
||||
}
|
||||
|
||||
this.processingQueue = true;
|
||||
|
||||
while (this.audioQueue.length > 0 && this.isRouting) {
|
||||
const audioBuffer = this.audioQueue.shift();
|
||||
|
||||
await this._playAudioBuffer(audioBuffer);
|
||||
}
|
||||
|
||||
this.processingQueue = false;
|
||||
}
|
||||
|
||||
/**
|
||||
* Play audio buffer through BlackHole.
|
||||
*/
|
||||
async _playAudioBuffer(audioBuffer) {
|
||||
if (!this.outputContext) {
|
||||
return;
|
||||
}
|
||||
|
||||
try {
|
||||
const source = this.outputContext.createBufferSource();
|
||||
|
||||
source.buffer = audioBuffer;
|
||||
source.connect(this.outputContext.destination);
|
||||
source.start();
|
||||
|
||||
// Return promise that resolves when audio finishes playing
|
||||
return new Promise(resolve => {
|
||||
source.onended = resolve;
|
||||
|
||||
// Also resolve after buffer duration as fallback
|
||||
setTimeout(resolve, audioBuffer.duration * 1000 + 100);
|
||||
});
|
||||
} catch (error) {
|
||||
console.error('Error playing audio buffer:', error);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Create audio buffer from Float32Array.
|
||||
*/
|
||||
createAudioBuffer(float32Array, sampleRate = 16000) {
|
||||
if (!this.outputContext) {
|
||||
throw new Error('Output context not initialized');
|
||||
}
|
||||
|
||||
const audioBuffer = this.outputContext.createBuffer(
|
||||
1,
|
||||
float32Array.length,
|
||||
sampleRate
|
||||
);
|
||||
|
||||
audioBuffer.copyToChannel(float32Array, 0);
|
||||
|
||||
return audioBuffer;
|
||||
}
|
||||
|
||||
/**
|
||||
* Get input stream for recording.
|
||||
*/
|
||||
getInputStream() {
|
||||
return this.inputStream;
|
||||
}
|
||||
|
||||
/**
|
||||
* Get output context for audio processing.
|
||||
*/
|
||||
getOutputContext() {
|
||||
return this.outputContext;
|
||||
}
|
||||
|
||||
/**
|
||||
* Check if routing is active.
|
||||
*/
|
||||
isActive() {
|
||||
return this.isRouting;
|
||||
}
|
||||
|
||||
/**
|
||||
* Get available BlackHole devices.
|
||||
*/
|
||||
static async getBlackHoleDevices() {
|
||||
try {
|
||||
const devices = await navigator.mediaDevices.enumerateDevices();
|
||||
|
||||
return devices.filter(device =>
|
||||
device.label.toLowerCase().includes('blackhole')
|
||||
);
|
||||
} catch (error) {
|
||||
console.error('Error getting BlackHole devices:', error);
|
||||
|
||||
return [];
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Check if BlackHole is available.
|
||||
*/
|
||||
static async isBlackHoleAvailable() {
|
||||
const devices = await BlackHoleRouter.getBlackHoleDevices();
|
||||
|
||||
return devices.length > 0;
|
||||
}
|
||||
|
||||
/**
|
||||
* Create a virtual microphone stream that Jitsi Meet can use
|
||||
* This stream will contain the translated audio.
|
||||
*/
|
||||
createVirtualMicrophone() {
|
||||
if (!this.outputDestination) {
|
||||
throw new Error('BlackHole not properly initialized');
|
||||
}
|
||||
|
||||
// Return the MediaStream that contains translated audio
|
||||
// This can be used by Jitsi Meet as the microphone input
|
||||
return this.outputDestination.stream;
|
||||
}
|
||||
|
||||
/**
|
||||
* Set up audio routing to feed translated audio into Jitsi Meet
|
||||
* Call this method after translation is complete.
|
||||
*/
|
||||
async routeToJitsiMeet(audioBuffer) {
|
||||
if (!this.isInitialized) {
|
||||
console.warn('BlackHole not initialized, cannot route audio');
|
||||
|
||||
return;
|
||||
}
|
||||
|
||||
try {
|
||||
// Create buffer source
|
||||
const source = this.outputContext.createBufferSource();
|
||||
|
||||
source.buffer = audioBuffer;
|
||||
|
||||
// Connect through gain control to destination
|
||||
source.connect(this.gainNode);
|
||||
|
||||
// Start playback
|
||||
source.start();
|
||||
|
||||
console.log('Routed translated audio to BlackHole for Jitsi Meet');
|
||||
|
||||
return new Promise(resolve => {
|
||||
source.onended = resolve;
|
||||
setTimeout(resolve, audioBuffer.duration * 1000 + 100);
|
||||
});
|
||||
} catch (error) {
|
||||
console.error('Error routing audio to Jitsi Meet:', error);
|
||||
throw error;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Get the MediaStream for Jitsi Meet to use as microphone input.
|
||||
*/
|
||||
getJitsiMeetInputStream() {
|
||||
if (!this.outputDestination) {
|
||||
throw new Error('BlackHole not initialized');
|
||||
}
|
||||
|
||||
return this.outputDestination.stream;
|
||||
}
|
||||
|
||||
/**
|
||||
* Set gain for translated audio output.
|
||||
*/
|
||||
setOutputGain(value) {
|
||||
if (this.gainNode) {
|
||||
this.gainNode.gain.value = Math.max(0, Math.min(1, value));
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Get current output gain.
|
||||
*/
|
||||
getOutputGain() {
|
||||
return this.gainNode ? this.gainNode.gain.value : 0;
|
||||
}
|
||||
}
|
||||
125
react/features/universal-translator/audio/whisper-processor.js
Normal file
125
react/features/universal-translator/audio/whisper-processor.js
Normal file
@@ -0,0 +1,125 @@
|
||||
import { env, pipeline } from '@xenova/transformers';
|
||||
|
||||
/**
|
||||
* Whisper-based speech-to-text processor
|
||||
* Adapted from standalone-meeting-assist for universal translation.
|
||||
*/
|
||||
export class WhisperProcessor {
|
||||
constructor() {
|
||||
this.transcriber = null;
|
||||
this.isModelLoading = true;
|
||||
this.modelLoadingPromise = null;
|
||||
}
|
||||
|
||||
/**
|
||||
* Initialize the Whisper model.
|
||||
*/
|
||||
async initializeModel() {
|
||||
if (this.modelLoadingPromise) {
|
||||
return this.modelLoadingPromise;
|
||||
}
|
||||
|
||||
this.modelLoadingPromise = this._loadModel();
|
||||
|
||||
return this.modelLoadingPromise;
|
||||
}
|
||||
|
||||
async _loadModel() {
|
||||
try {
|
||||
console.log('Loading Whisper model...');
|
||||
env.allowLocalModels = false;
|
||||
env.useBrowserCache = false;
|
||||
|
||||
this.transcriber = await pipeline(
|
||||
'automatic-speech-recognition',
|
||||
'Xenova/whisper-base'
|
||||
);
|
||||
|
||||
this.isModelLoading = false;
|
||||
console.log('Whisper model loaded successfully');
|
||||
|
||||
return this.transcriber;
|
||||
} catch (error) {
|
||||
console.error('Error loading Whisper model:', error);
|
||||
this.isModelLoading = false;
|
||||
throw error;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Process audio data and return transcription with language detection.
|
||||
*/
|
||||
async processAudio(audioData, options = {}) {
|
||||
if (!this.transcriber) {
|
||||
throw new Error('Whisper model not initialized');
|
||||
}
|
||||
|
||||
const {
|
||||
language = 'auto',
|
||||
chunkLengthS = 30,
|
||||
strideLengthS = 5,
|
||||
returnTimestamps = false
|
||||
} = options;
|
||||
|
||||
try {
|
||||
const result = await this.transcriber(audioData, {
|
||||
chunk_length_s: chunkLengthS,
|
||||
stride_length_s: strideLengthS,
|
||||
language: language === 'auto' ? undefined : language,
|
||||
return_timestamps: returnTimestamps
|
||||
});
|
||||
|
||||
return {
|
||||
text: result.text,
|
||||
language: this._detectLanguage(result.text),
|
||||
confidence: result.confidence || 0.95,
|
||||
timestamps: result.chunks || []
|
||||
};
|
||||
} catch (error) {
|
||||
console.error('Transcription error:', error);
|
||||
throw error;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Simple language detection based on text patterns
|
||||
* In production, use a proper language detection service.
|
||||
*/
|
||||
_detectLanguage(text) {
|
||||
// Simple heuristic-based language detection
|
||||
// This should be replaced with a proper language detection service
|
||||
const languagePatterns = {
|
||||
'en': /^[a-zA-Z\s.,!?'"()-]+$/,
|
||||
'es': /[ñáéíóúüÑÁÉÍÓÚÜ]/,
|
||||
'fr': /[àâäçéèêëîïôöùûüÿÀÂÄÇÉÈÊËÎÏÔÖÙÛÜŸ]/,
|
||||
'de': /[äöüßÄÖÜ]/,
|
||||
'pt': /[ãõçÃÕÇ]/,
|
||||
'it': /[àèéìíîòóù]/
|
||||
};
|
||||
|
||||
for (const [ lang, pattern ] of Object.entries(languagePatterns)) {
|
||||
if (pattern.test(text)) {
|
||||
return lang;
|
||||
}
|
||||
}
|
||||
|
||||
return 'en'; // Default to English
|
||||
}
|
||||
|
||||
/**
|
||||
* Check if the model is ready for processing.
|
||||
*/
|
||||
isReady() {
|
||||
return !this.isModelLoading && this.transcriber !== null;
|
||||
}
|
||||
|
||||
/**
|
||||
* Get model loading status.
|
||||
*/
|
||||
getLoadingStatus() {
|
||||
return {
|
||||
isLoading: this.isModelLoading,
|
||||
isReady: this.isReady()
|
||||
};
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,49 @@
|
||||
import React, { useCallback } from 'react';
|
||||
import { WithTranslation, withTranslation } from 'react-i18next';
|
||||
import { useDispatch, useSelector } from 'react-redux';
|
||||
|
||||
import { IReduxState } from '../../app/types';
|
||||
import { IconTranslate } from '../../base/icons/svg';
|
||||
import ToolboxItem from '../../base/toolbox/components/ToolboxItem';
|
||||
import { toggleUniversalTranslator } from '../actions';
|
||||
import { IUniversalTranslatorState } from '../reducer';
|
||||
|
||||
/**
|
||||
* Universal Translator toolbar button component.
|
||||
*/
|
||||
const UniversalTranslatorButton = ({ t, tReady, i18n }: WithTranslation) => {
|
||||
const dispatch = useDispatch();
|
||||
const universalTranslator: IUniversalTranslatorState = useSelector(
|
||||
(state: IReduxState) => state['features/universal-translator']
|
||||
);
|
||||
|
||||
const handleClick = useCallback(() => {
|
||||
dispatch(toggleUniversalTranslator());
|
||||
}, [ dispatch ]);
|
||||
|
||||
const handleKeyDown = useCallback((e?: React.KeyboardEvent) => {
|
||||
if (e?.key === 'Enter' || e?.key === ' ') {
|
||||
handleClick();
|
||||
}
|
||||
}, [ handleClick ]);
|
||||
|
||||
const isActive = universalTranslator?.showDialog || universalTranslator?.isRecording;
|
||||
const tooltip = universalTranslator?.isRecording
|
||||
? 'universalTranslator.recording'
|
||||
: 'universalTranslator.tooltip';
|
||||
|
||||
return (
|
||||
<ToolboxItem
|
||||
accessibilityLabel = 'universalTranslator.accessibilityLabel'
|
||||
i18n = { i18n }
|
||||
icon = { IconTranslate }
|
||||
labelProps = {{}}
|
||||
onClick = { handleClick }
|
||||
onKeyDown = { handleKeyDown }
|
||||
tReady = { tReady }
|
||||
toggled = { isActive }
|
||||
tooltip = { tooltip } />
|
||||
);
|
||||
};
|
||||
|
||||
export default withTranslation()(UniversalTranslatorButton);
|
||||
@@ -0,0 +1,396 @@
|
||||
import React, { useCallback, useEffect, useState } from 'react';
|
||||
import { useDispatch, useSelector } from 'react-redux';
|
||||
|
||||
import { IReduxState } from '../../app/types';
|
||||
import { hideDialog } from '../../base/dialog/actions';
|
||||
import Dialog from '../../base/ui/components/web/Dialog';
|
||||
import {
|
||||
clearTranslationError,
|
||||
disableUniversalTranslatorEffect,
|
||||
enableUniversalTranslatorEffect,
|
||||
initUniversalTranslator,
|
||||
setApiKeys,
|
||||
setSTTProvider,
|
||||
setSourceLanguage,
|
||||
setTTSProvider,
|
||||
setTargetLanguage,
|
||||
setTranslationError,
|
||||
setTranslationProvider,
|
||||
startTranslationRecording,
|
||||
stopTranslationRecording
|
||||
} from '../actions';
|
||||
import { IUniversalTranslatorState } from '../reducer';
|
||||
|
||||
// Language options
|
||||
const languages = [
|
||||
{ code: 'en', name: 'English', flag: '🇺🇸' },
|
||||
{ code: 'es', name: 'Spanish', flag: '🇪🇸' },
|
||||
{ code: 'fr', name: 'French', flag: '🇫🇷' },
|
||||
{ code: 'de', name: 'German', flag: '🇩🇪' },
|
||||
{ code: 'it', name: 'Italian', flag: '🇮🇹' },
|
||||
{ code: 'pt', name: 'Portuguese', flag: '🇵🇹' },
|
||||
{ code: 'ro', name: 'Romanian', flag: '🇷🇴' },
|
||||
{ code: 'ja', name: 'Japanese', flag: '🇯🇵' },
|
||||
{ code: 'ko', name: 'Korean', flag: '🇰🇷' },
|
||||
{ code: 'zh', name: 'Chinese', flag: '🇨🇳' }
|
||||
];
|
||||
|
||||
// Service options
|
||||
const sttOptions = [
|
||||
{ id: 'whisper', name: 'Whisper (Local)', latency: '~200ms' },
|
||||
{ id: 'groq', name: 'Groq Whisper', latency: '~100ms' },
|
||||
{ id: 'deepgram', name: 'Deepgram Nova-2', latency: '~100ms' },
|
||||
{ id: 'assemblyai', name: 'AssemblyAI Universal-2', latency: '~150ms' }
|
||||
];
|
||||
|
||||
const ttsOptions = [
|
||||
{ id: 'cartesia', name: 'Cartesia Sonic', latency: '~40ms' },
|
||||
{ id: 'elevenlabs', name: 'ElevenLabs', latency: '~300ms' },
|
||||
{ id: 'deepgram', name: 'Deepgram Aura', latency: '~400ms' },
|
||||
{ id: 'webspeech', name: 'Web Speech API', latency: '~50ms' }
|
||||
];
|
||||
|
||||
const translationOptions = [
|
||||
{ id: 'openai', name: 'OpenAI GPT-4', latency: '~200ms' },
|
||||
{ id: 'google', name: 'Google Translate', latency: '~150ms' },
|
||||
{ id: 'microsoft', name: 'Microsoft Translator', latency: '~180ms' }
|
||||
];
|
||||
|
||||
/**
|
||||
* Universal Translator Dialog component.
|
||||
*/
|
||||
export const UniversalTranslatorDialog = () => {
|
||||
const dispatch = useDispatch();
|
||||
const universalTranslator: IUniversalTranslatorState = useSelector(
|
||||
(state: IReduxState) => state['features/universal-translator']
|
||||
);
|
||||
|
||||
const [ localApiKeys, setLocalApiKeys ] = useState(universalTranslator?.apiKeys || {});
|
||||
const [ saveIndicator, setSaveIndicator ] = useState<string | null>(null);
|
||||
|
||||
useEffect(() => {
|
||||
if (universalTranslator?.apiKeys) {
|
||||
setLocalApiKeys(universalTranslator.apiKeys);
|
||||
}
|
||||
}, [ universalTranslator?.apiKeys ]);
|
||||
|
||||
// Initialize the universal translator service when dialog opens
|
||||
useEffect(() => {
|
||||
if (!universalTranslator?.isInitialized) {
|
||||
console.log('Initializing Universal Translator service...');
|
||||
dispatch(initUniversalTranslator({
|
||||
sttProvider: universalTranslator?.sttProvider || 'deepgram',
|
||||
ttsProvider: universalTranslator?.ttsProvider || 'cartesia',
|
||||
translationProvider: universalTranslator?.translationProvider || 'openai',
|
||||
apiKeys: universalTranslator?.apiKeys || {}
|
||||
}));
|
||||
}
|
||||
}, [ dispatch, universalTranslator?.isInitialized ]);
|
||||
|
||||
const handleClose = useCallback(() => {
|
||||
dispatch(hideDialog());
|
||||
}, [ dispatch ]);
|
||||
|
||||
const handleSTTProviderChange = useCallback((provider: string) => {
|
||||
dispatch(setSTTProvider(provider));
|
||||
}, [ dispatch ]);
|
||||
|
||||
const handleTTSProviderChange = useCallback((provider: string) => {
|
||||
dispatch(setTTSProvider(provider));
|
||||
}, [ dispatch ]);
|
||||
|
||||
const handleTranslationProviderChange = useCallback((provider: string) => {
|
||||
dispatch(setTranslationProvider(provider));
|
||||
}, [ dispatch ]);
|
||||
|
||||
const handleSourceLanguageChange = useCallback((language: string) => {
|
||||
dispatch(setSourceLanguage(language));
|
||||
}, [ dispatch ]);
|
||||
|
||||
const handleTargetLanguageChange = useCallback((language: string) => {
|
||||
dispatch(setTargetLanguage(language));
|
||||
}, [ dispatch ]);
|
||||
|
||||
const handleStartTranslation = useCallback(() => {
|
||||
// Validate API keys before starting
|
||||
const requiredKeys: Record<string, boolean> = {
|
||||
deepgram: universalTranslator?.sttProvider === 'deepgram',
|
||||
openai: universalTranslator?.translationProvider === 'openai',
|
||||
cartesia: universalTranslator?.ttsProvider === 'cartesia'
|
||||
};
|
||||
|
||||
const missingKeys = Object.entries(requiredKeys)
|
||||
.filter(([ key, required ]) => required && !localApiKeys[key as keyof typeof localApiKeys])
|
||||
.map(([ key ]) => key);
|
||||
|
||||
if (missingKeys.length > 0) {
|
||||
console.error('Missing API keys:', missingKeys);
|
||||
dispatch(setTranslationError(`Missing API keys: ${missingKeys.join(', ')}`));
|
||||
|
||||
return;
|
||||
}
|
||||
|
||||
console.log('Starting real-time translation with providers:', {
|
||||
stt: universalTranslator?.sttProvider,
|
||||
translation: universalTranslator?.translationProvider,
|
||||
tts: universalTranslator?.ttsProvider
|
||||
});
|
||||
|
||||
dispatch(startTranslationRecording());
|
||||
}, [ dispatch, localApiKeys, universalTranslator ]);
|
||||
|
||||
const handleStopTranslation = useCallback(() => {
|
||||
dispatch(stopTranslationRecording());
|
||||
}, [ dispatch ]);
|
||||
|
||||
const handleApiKeyChange = useCallback((service: string, value: string) => {
|
||||
const newKeys = { ...localApiKeys, [service]: value };
|
||||
|
||||
setLocalApiKeys(newKeys);
|
||||
dispatch(setApiKeys(newKeys));
|
||||
|
||||
// Show save indicator
|
||||
if (value.trim()) {
|
||||
setSaveIndicator(service);
|
||||
setTimeout(() => setSaveIndicator(null), 2000);
|
||||
}
|
||||
}, [ localApiKeys, dispatch ]);
|
||||
|
||||
const handleClearError = useCallback(() => {
|
||||
dispatch(clearTranslationError());
|
||||
}, [ dispatch ]);
|
||||
|
||||
const handleEffectToggle = useCallback((enabled: boolean) => {
|
||||
if (enabled) {
|
||||
dispatch(enableUniversalTranslatorEffect());
|
||||
} else {
|
||||
dispatch(disableUniversalTranslatorEffect());
|
||||
}
|
||||
}, [ dispatch ]);
|
||||
|
||||
const formatLatency = (latency: number) => {
|
||||
return latency ? `${Math.round(latency)}ms` : '-';
|
||||
};
|
||||
|
||||
const getTotalLatency = () => {
|
||||
const { stt, translation, tts } = universalTranslator?.latencyMetrics || { stt: {}, translation: {}, tts: {} };
|
||||
|
||||
return (stt.lastLatency || 0) + (translation.lastLatency || 0) + (tts.lastLatency || 0);
|
||||
};
|
||||
|
||||
return (
|
||||
<Dialog
|
||||
cancel = {{ hidden: true }}
|
||||
ok = {{ hidden: true }}
|
||||
onCancel = { handleClose }
|
||||
size = 'large'
|
||||
titleKey = 'universalTranslator.title'>
|
||||
<div className = 'universal-translator-dialog'>
|
||||
{/* Language Selection */}
|
||||
<div className = 'language-selection'>
|
||||
<h3>Language Settings</h3>
|
||||
<div className = 'language-selectors'>
|
||||
<div className = 'language-selector'>
|
||||
<label>From:</label>
|
||||
<select
|
||||
onChange = { e => handleSourceLanguageChange(e.target.value) }
|
||||
value = { universalTranslator?.sourceLanguage || 'en' }>
|
||||
{languages.map(lang => (
|
||||
<option
|
||||
key = { lang.code }
|
||||
value = { lang.code }>
|
||||
{lang.flag} {lang.name}
|
||||
</option>
|
||||
))}
|
||||
</select>
|
||||
</div>
|
||||
<div className = 'language-selector'>
|
||||
<label>To:</label>
|
||||
<select
|
||||
onChange = { e => handleTargetLanguageChange(e.target.value) }
|
||||
value = { universalTranslator?.targetLanguage || 'es' }>
|
||||
{languages.map(lang => (
|
||||
<option
|
||||
key = { lang.code }
|
||||
value = { lang.code }>
|
||||
{lang.flag} {lang.name}
|
||||
</option>
|
||||
))}
|
||||
</select>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
{/* Service Selection */}
|
||||
<div className = 'service-selection'>
|
||||
<h3>Service Providers</h3>
|
||||
|
||||
<div className = 'service-group'>
|
||||
<label>Speech-to-Text:</label>
|
||||
<select
|
||||
onChange = { e => handleSTTProviderChange(e.target.value) }
|
||||
value = { universalTranslator?.sttProvider || 'whisper' }>
|
||||
{sttOptions.map(option => (
|
||||
<option
|
||||
key = { option.id }
|
||||
value = { option.id }>
|
||||
{option.name} ({option.latency})
|
||||
</option>
|
||||
))}
|
||||
</select>
|
||||
</div>
|
||||
|
||||
<div className = 'service-group'>
|
||||
<label>Translation:</label>
|
||||
<select
|
||||
onChange = { e => handleTranslationProviderChange(e.target.value) }
|
||||
value = { universalTranslator?.translationProvider || 'openai' }>
|
||||
{translationOptions.map(option => (
|
||||
<option
|
||||
key = { option.id }
|
||||
value = { option.id }>
|
||||
{option.name} ({option.latency})
|
||||
</option>
|
||||
))}
|
||||
</select>
|
||||
</div>
|
||||
|
||||
<div className = 'service-group'>
|
||||
<label>Text-to-Speech:</label>
|
||||
<select
|
||||
onChange = { e => handleTTSProviderChange(e.target.value) }
|
||||
value = { universalTranslator?.ttsProvider || 'cartesia' }>
|
||||
{ttsOptions.map(option => (
|
||||
<option
|
||||
key = { option.id }
|
||||
value = { option.id }>
|
||||
{option.name} ({option.latency})
|
||||
</option>
|
||||
))}
|
||||
</select>
|
||||
</div>
|
||||
|
||||
<div className = 'service-group'>
|
||||
<label>
|
||||
<input
|
||||
checked = { universalTranslator?.effectEnabled || false }
|
||||
onChange = { e => handleEffectToggle(e.target.checked) }
|
||||
type = 'checkbox' />
|
||||
Route translated audio to conference (replaces your microphone)
|
||||
</label>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
{/* API Keys */}
|
||||
<div className = 'api-keys-section'>
|
||||
<h3>API Keys</h3>
|
||||
<p className = 'persistence-note'>
|
||||
API keys and preferences are automatically saved locally and will be remembered across sessions.
|
||||
</p>
|
||||
<div className = 'api-keys-grid'>
|
||||
{Object.entries(localApiKeys).map(([ service, key ]) => (
|
||||
<div
|
||||
className = 'api-key-input'
|
||||
key = { service }>
|
||||
<label>{service.charAt(0).toUpperCase() + service.slice(1)}:</label>
|
||||
<input
|
||||
onChange = { e => handleApiKeyChange(service, e.target.value) }
|
||||
placeholder = { `Enter ${service} API key` }
|
||||
type = 'password'
|
||||
value = { key } />
|
||||
{saveIndicator === service && (
|
||||
<span className = 'save-indicator'>✓ Saved</span>
|
||||
)}
|
||||
</div>
|
||||
))}
|
||||
</div>
|
||||
</div>
|
||||
|
||||
{/* Translation Status */}
|
||||
<div className = 'translation-status'>
|
||||
<h3>Translation Status</h3>
|
||||
<div className = 'status-info'>
|
||||
<div className = 'status-indicator'>
|
||||
<span className = { `status-dot ${universalTranslator?.status || 'idle'}` } />
|
||||
<span className = 'status-text'>
|
||||
{universalTranslator?.isRecording ? 'Translating in real-time...'
|
||||
: universalTranslator?.status === 'processing' ? 'Processing...'
|
||||
: universalTranslator?.status === 'completed' ? 'Translation Complete'
|
||||
: universalTranslator?.status === 'error' ? 'Error' : 'Ready'}
|
||||
</span>
|
||||
</div>
|
||||
{getTotalLatency() > 0 && (
|
||||
<div className = 'latency-info'>
|
||||
Total Latency: {formatLatency(getTotalLatency())}
|
||||
</div>
|
||||
)}
|
||||
</div>
|
||||
|
||||
{universalTranslator?.error && (
|
||||
<div className = 'error-message'>
|
||||
<span>{universalTranslator?.error}</span>
|
||||
<button onClick = { handleClearError }>Clear</button>
|
||||
</div>
|
||||
)}
|
||||
|
||||
{universalTranslator?.transcriptionResult && (
|
||||
<div className = 'transcription-result'>
|
||||
<h4>Transcription:</h4>
|
||||
<p>{universalTranslator?.transcriptionResult?.text}</p>
|
||||
</div>
|
||||
)}
|
||||
|
||||
{universalTranslator?.translationResult && (
|
||||
<div className = 'translation-result'>
|
||||
<h4>Translation:</h4>
|
||||
<p>{universalTranslator?.translationResult?.translatedText}</p>
|
||||
</div>
|
||||
)}
|
||||
</div>
|
||||
|
||||
{/* Performance Metrics */}
|
||||
{universalTranslator?.status === 'completed' && (
|
||||
<div className = 'performance-metrics'>
|
||||
<h3>Performance Metrics</h3>
|
||||
<div className = 'metrics-grid'>
|
||||
<div className = 'metric'>
|
||||
<label>STT Latency:</label>
|
||||
<span>{formatLatency(universalTranslator?.latencyMetrics?.stt?.lastLatency)}</span>
|
||||
</div>
|
||||
<div className = 'metric'>
|
||||
<label>Translation Latency:</label>
|
||||
<span>{formatLatency(universalTranslator?.latencyMetrics?.translation?.lastLatency)}</span>
|
||||
</div>
|
||||
<div className = 'metric'>
|
||||
<label>TTS Latency:</label>
|
||||
<span>{formatLatency(universalTranslator?.latencyMetrics?.tts?.lastLatency)}</span>
|
||||
</div>
|
||||
<div className = 'metric'>
|
||||
<label>Total Requests:</label>
|
||||
<span>{universalTranslator?.latencyMetrics?.stt?.requestCount || 0}</span>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
)}
|
||||
|
||||
{/* Control Buttons */}
|
||||
<div className = 'control-buttons'>
|
||||
{!universalTranslator?.isRecording ? (
|
||||
<button
|
||||
className = 'record-button'
|
||||
disabled = { universalTranslator?.status === 'processing' }
|
||||
onClick = { handleStartTranslation }>
|
||||
🗣️ Start Real-time Translation
|
||||
</button>
|
||||
) : (
|
||||
<button
|
||||
className = 'stop-button'
|
||||
onClick = { handleStopTranslation }>
|
||||
⏹️ Stop Translation
|
||||
</button>
|
||||
)}
|
||||
</div>
|
||||
</div>
|
||||
</Dialog>
|
||||
);
|
||||
};
|
||||
2
react/features/universal-translator/components/index.ts
Normal file
2
react/features/universal-translator/components/index.ts
Normal file
@@ -0,0 +1,2 @@
|
||||
export { UniversalTranslatorDialog } from './UniversalTranslatorDialog';
|
||||
export { default as UniversalTranslatorButton } from './UniversalTranslatorButton';
|
||||
160
react/features/universal-translator/functions.ts
Normal file
160
react/features/universal-translator/functions.ts
Normal file
@@ -0,0 +1,160 @@
|
||||
import { IReduxState } from '../app/types';
|
||||
|
||||
import { IUniversalTranslatorState } from './reducer';
|
||||
|
||||
/**
|
||||
* Gets the universal translator state from Redux store.
|
||||
*
|
||||
* @param {Object} state - The Redux state.
|
||||
* @returns {Object} The universal translator state.
|
||||
*/
|
||||
export function getUniversalTranslatorState(state: IReduxState): IUniversalTranslatorState {
|
||||
return state['features/universal-translator'];
|
||||
}
|
||||
|
||||
/**
|
||||
* Checks if the universal translator is available/enabled.
|
||||
*
|
||||
* @param {Object} state - The Redux state.
|
||||
* @returns {boolean} True if universal translator is available.
|
||||
*/
|
||||
export function isUniversalTranslatorAvailable(state: IReduxState): boolean {
|
||||
const translatorState = getUniversalTranslatorState(state);
|
||||
|
||||
return Boolean(translatorState?.isInitialized);
|
||||
}
|
||||
|
||||
/**
|
||||
* Checks if universal translator is currently recording.
|
||||
*
|
||||
* @param {Object} state - The Redux state.
|
||||
* @returns {boolean} True if recording is active.
|
||||
*/
|
||||
export function isUniversalTranslatorRecording(state: IReduxState): boolean {
|
||||
const translatorState = getUniversalTranslatorState(state);
|
||||
|
||||
return Boolean(translatorState?.isRecording);
|
||||
}
|
||||
|
||||
/**
|
||||
* Gets the current translation status.
|
||||
*
|
||||
* @param {Object} state - The Redux state.
|
||||
* @returns {string} The current status.
|
||||
*/
|
||||
export function getTranslationStatus(state: IReduxState): string {
|
||||
const translatorState = getUniversalTranslatorState(state);
|
||||
|
||||
return translatorState?.status || 'idle';
|
||||
}
|
||||
|
||||
/**
|
||||
* Checks if universal translator dialog is open.
|
||||
*
|
||||
* @param {Object} state - The Redux state.
|
||||
* @returns {boolean} True if dialog is open.
|
||||
*/
|
||||
export function isUniversalTranslatorDialogOpen(state: IReduxState): boolean {
|
||||
const translatorState = getUniversalTranslatorState(state);
|
||||
|
||||
return Boolean(translatorState?.showDialog);
|
||||
}
|
||||
|
||||
/**
|
||||
* Gets the current latency metrics.
|
||||
*
|
||||
* @param {Object} state - The Redux state.
|
||||
* @returns {Object} The latency metrics.
|
||||
*/
|
||||
export function getLatencyMetrics(state: IReduxState) {
|
||||
const translatorState = getUniversalTranslatorState(state);
|
||||
|
||||
return translatorState?.latencyMetrics || {
|
||||
stt: { averageLatency: 0, lastLatency: 0, requestCount: 0 },
|
||||
translation: { averageLatency: 0, lastLatency: 0, requestCount: 0 },
|
||||
tts: { averageLatency: 0, lastLatency: 0, requestCount: 0 }
|
||||
};
|
||||
}
|
||||
|
||||
/**
|
||||
* Gets the total end-to-end latency.
|
||||
*
|
||||
* @param {Object} state - The Redux state.
|
||||
* @returns {number} Total latency in milliseconds.
|
||||
*/
|
||||
export function getTotalLatency(state: IReduxState): number {
|
||||
const metrics = getLatencyMetrics(state);
|
||||
|
||||
return (metrics.stt.lastLatency || 0)
|
||||
+ (metrics.translation.lastLatency || 0)
|
||||
+ (metrics.tts.lastLatency || 0);
|
||||
}
|
||||
|
||||
/**
|
||||
* Checks if all required API keys are configured for current providers.
|
||||
*
|
||||
* @param {Object} state - The Redux state.
|
||||
* @returns {boolean} True if all required keys are present.
|
||||
*/
|
||||
export function areApiKeysConfigured(state: IReduxState): boolean {
|
||||
const translatorState = getUniversalTranslatorState(state);
|
||||
|
||||
if (!translatorState) {
|
||||
return false;
|
||||
}
|
||||
|
||||
const { sttProvider, ttsProvider, translationProvider, apiKeys } = translatorState;
|
||||
|
||||
// Check if required API keys are present (skip local/free providers)
|
||||
const requiredKeys = [];
|
||||
|
||||
if (sttProvider !== 'whisper') {
|
||||
requiredKeys.push(sttProvider);
|
||||
}
|
||||
|
||||
if (ttsProvider !== 'webspeech') {
|
||||
requiredKeys.push(ttsProvider);
|
||||
}
|
||||
|
||||
requiredKeys.push(translationProvider);
|
||||
|
||||
return requiredKeys.every(provider =>
|
||||
apiKeys[provider as keyof typeof apiKeys] && apiKeys[provider as keyof typeof apiKeys].length > 0
|
||||
);
|
||||
}
|
||||
|
||||
/**
|
||||
* Gets supported languages for the current configuration.
|
||||
*
|
||||
* @param {Object} state - The Redux state.
|
||||
* @returns {Array} Array of supported language codes.
|
||||
*/
|
||||
export function getSupportedLanguages(state: IReduxState): string[] {
|
||||
// Common languages supported by most providers
|
||||
return [
|
||||
'en', 'es', 'fr', 'de', 'it', 'pt', 'ru',
|
||||
'ja', 'ko', 'zh', 'ar', 'hi', 'tr', 'pl', 'nl'
|
||||
];
|
||||
}
|
||||
|
||||
/**
|
||||
* Gets the current provider configuration summary.
|
||||
*
|
||||
* @param {Object} state - The Redux state.
|
||||
* @returns {Object} Provider configuration summary.
|
||||
*/
|
||||
export function getProviderConfiguration(state: IReduxState) {
|
||||
const translatorState = getUniversalTranslatorState(state);
|
||||
|
||||
if (!translatorState) {
|
||||
return null;
|
||||
}
|
||||
|
||||
return {
|
||||
stt: translatorState.sttProvider,
|
||||
translation: translatorState.translationProvider,
|
||||
tts: translatorState.ttsProvider,
|
||||
sourceLanguage: translatorState.sourceLanguage,
|
||||
targetLanguage: translatorState.targetLanguage
|
||||
};
|
||||
}
|
||||
3
react/features/universal-translator/index.ts
Normal file
3
react/features/universal-translator/index.ts
Normal file
@@ -0,0 +1,3 @@
|
||||
import './middleware';
|
||||
import './middleware/index';
|
||||
import './reducer';
|
||||
6
react/features/universal-translator/logger.ts
Normal file
6
react/features/universal-translator/logger.ts
Normal file
@@ -0,0 +1,6 @@
|
||||
import { getLogger } from '../base/logging/functions';
|
||||
|
||||
/**
|
||||
* The logger for the universal translator feature.
|
||||
*/
|
||||
export default getLogger('features/universal-translator');
|
||||
462
react/features/universal-translator/middleware.ts
Normal file
462
react/features/universal-translator/middleware.ts
Normal file
@@ -0,0 +1,462 @@
|
||||
import { IStore } from '../app/types';
|
||||
import MiddlewareRegistry from '../base/redux/MiddlewareRegistry';
|
||||
import { UniversalTranslatorEffect } from '../stream-effects/universal-translator';
|
||||
|
||||
import {
|
||||
INIT_UNIVERSAL_TRANSLATOR,
|
||||
START_TRANSLATION_RECORDING,
|
||||
STOP_TRANSLATION_RECORDING
|
||||
} from './actionTypes';
|
||||
import {
|
||||
setTranscriptionResult,
|
||||
setTranslationError,
|
||||
setTranslationResult,
|
||||
updateLatencyMetrics,
|
||||
updateProcessingStep,
|
||||
updateTranslationStatus
|
||||
} from './actions';
|
||||
// @ts-ignore - whisper-processor is a .js file without types
|
||||
// import { WhisperProcessor } from './audio/whisper-processor';
|
||||
// @ts-ignore - audio-utils is a .js file without types
|
||||
import { convertWebMToFloat32, createAudioRecorder, getUserMediaForSpeech, float32ArrayToBlob } from './audio/audio-utils';
|
||||
// @ts-ignore - blackhole-router is a .js file without types
|
||||
import { BlackHoleRouter } from './audio/blackhole-router';
|
||||
import { getUniversalTranslatorEffect } from './middleware/streamEffectMiddleware';
|
||||
import { STTProviderFactory } from './services/stt-providers';
|
||||
import { TranslationProviderFactory } from './services/translation';
|
||||
import { TTSProviderFactory } from './services/tts-providers';
|
||||
|
||||
/**
|
||||
* Universal translator service instance.
|
||||
*/
|
||||
let translatorService: UniversalTranslatorService | null = null;
|
||||
|
||||
/**
|
||||
* Universal Translator Service class that orchestrates the translation pipeline.
|
||||
*/
|
||||
class UniversalTranslatorService {
|
||||
// private whisperProcessor: WhisperProcessor;
|
||||
private sttProviders: Map<string, any> = new Map();
|
||||
private ttsProviders: Map<string, any> = new Map();
|
||||
private translationProviders: Map<string, any> = new Map();
|
||||
private blackHoleRouter: BlackHoleRouter;
|
||||
private mediaRecorder: MediaRecorder | null = null;
|
||||
private audioChunks: Blob[] = [];
|
||||
private stream: MediaStream | null = null;
|
||||
private processingInterval: number = 3000; // Process every 3 seconds
|
||||
private isRecordingContinuously: boolean = false;
|
||||
private isProcessingChunk: boolean = false; // Prevent overlapping processing
|
||||
private intervalId: any = null;
|
||||
private dispatch: IStore['dispatch'];
|
||||
private getState: IStore['getState'];
|
||||
|
||||
constructor(dispatch: IStore['dispatch'], getState: IStore['getState']) {
|
||||
this.dispatch = dispatch;
|
||||
this.getState = getState;
|
||||
// this.whisperProcessor = new WhisperProcessor();
|
||||
this.blackHoleRouter = new BlackHoleRouter();
|
||||
}
|
||||
|
||||
/**
|
||||
* Initialize the translation service.
|
||||
*/
|
||||
async initialize(config: any) {
|
||||
try {
|
||||
console.log('Initializing Universal Translator Service...');
|
||||
|
||||
// Initialize Whisper processor
|
||||
// await this.whisperProcessor.initializeModel();
|
||||
|
||||
// Initialize BlackHole router
|
||||
await this.blackHoleRouter.initialize();
|
||||
|
||||
console.log('Universal Translator Service initialized successfully');
|
||||
} catch (error) {
|
||||
console.error('Failed to initialize Universal Translator Service:', error);
|
||||
this.dispatch(setTranslationError(`Initialization failed: ${error instanceof Error ? error.message : 'Unknown error'}`));
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Start translation recording.
|
||||
*/
|
||||
async startRecording() {
|
||||
try {
|
||||
console.log('Starting universal translator real-time translation...');
|
||||
this.dispatch(updateProcessingStep('recording'));
|
||||
this.dispatch(updateTranslationStatus('recording'));
|
||||
|
||||
// Get audio stream (prefer BlackHole if available)
|
||||
this.stream = this.blackHoleRouter.getInputStream() || await getUserMediaForSpeech();
|
||||
console.log('Audio stream acquired');
|
||||
|
||||
// Create media recorder for continuous recording without time slicing
|
||||
// This will record continuously and we'll process complete recordings at intervals
|
||||
this.mediaRecorder = createAudioRecorder(this.stream, {
|
||||
mimeType: 'audio/webm;codecs=opus'
|
||||
});
|
||||
this.audioChunks = [];
|
||||
this.isRecordingContinuously = true;
|
||||
|
||||
if (this.mediaRecorder) {
|
||||
this.mediaRecorder.ondataavailable = async (event) => {
|
||||
if (event.data.size > 0 && this.isRecordingContinuously) {
|
||||
console.log(`Complete audio recording received: ${event.data.size} bytes`);
|
||||
|
||||
if (!this.isProcessingChunk) {
|
||||
this.isProcessingChunk = true;
|
||||
|
||||
try {
|
||||
// Convert complete WebM file to WAV
|
||||
const float32Array = await convertWebMToFloat32(event.data);
|
||||
const wavBlob = float32ArrayToBlob(float32Array);
|
||||
await this.processAudioChunk(wavBlob);
|
||||
} catch (conversionError) {
|
||||
console.warn('WebM conversion failed:', conversionError);
|
||||
} finally {
|
||||
this.isProcessingChunk = false;
|
||||
}
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
this.mediaRecorder.onstop = () => {
|
||||
console.log('MediaRecorder stopped');
|
||||
if (this.isRecordingContinuously) {
|
||||
// Restart recording if we're still supposed to be recording
|
||||
setTimeout(() => {
|
||||
if (this.mediaRecorder && this.isRecordingContinuously) {
|
||||
this.mediaRecorder.start();
|
||||
console.log('MediaRecorder restarted for next interval');
|
||||
}
|
||||
}, 100);
|
||||
}
|
||||
};
|
||||
|
||||
this.mediaRecorder.onerror = (event) => {
|
||||
console.error('MediaRecorder error:', event);
|
||||
};
|
||||
|
||||
this.mediaRecorder.onstart = () => {
|
||||
console.log('MediaRecorder started successfully');
|
||||
};
|
||||
}
|
||||
|
||||
// Start recording without time slicing (will record until manually stopped)
|
||||
this.mediaRecorder?.start();
|
||||
|
||||
// Set up interval to stop and restart recording every few seconds for processing
|
||||
this.intervalId = setInterval(() => {
|
||||
if (this.mediaRecorder && this.mediaRecorder.state === 'recording' && !this.isProcessingChunk) {
|
||||
console.log(`Stopping recording for processing (${this.processingInterval}ms interval)`);
|
||||
this.mediaRecorder.stop();
|
||||
}
|
||||
}, this.processingInterval);
|
||||
|
||||
console.log(`Real-time translation started - processing every ${this.processingInterval}ms`);
|
||||
|
||||
} catch (error) {
|
||||
console.error('Failed to start recording:', error);
|
||||
this.dispatch(setTranslationError(`Recording failed: ${error instanceof Error ? error.message : 'Unknown error'}`));
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Stop translation recording.
|
||||
*/
|
||||
async stopRecording() {
|
||||
this.isRecordingContinuously = false;
|
||||
this.isProcessingChunk = false;
|
||||
|
||||
// Clear the interval
|
||||
if (this.intervalId) {
|
||||
clearInterval(this.intervalId);
|
||||
this.intervalId = null;
|
||||
}
|
||||
|
||||
// Clear any remaining chunks
|
||||
this.audioChunks = [];
|
||||
|
||||
if (this.mediaRecorder && this.mediaRecorder.state === 'recording') {
|
||||
this.mediaRecorder.stop();
|
||||
|
||||
if (this.stream) {
|
||||
this.stream.getTracks().forEach(track => track.stop());
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Process a single audio chunk for real-time translation.
|
||||
*/
|
||||
private async processAudioChunk(audioChunk: Blob) {
|
||||
try {
|
||||
const state = this.getState();
|
||||
const universalTranslator = state['features/universal-translator'];
|
||||
|
||||
if (!universalTranslator || !this.isRecordingContinuously) {
|
||||
return;
|
||||
}
|
||||
|
||||
// Check if audio chunk is large enough (minimum 1KB)
|
||||
if (audioChunk.size < 1024) {
|
||||
console.log('Skipping chunk - too small:', audioChunk.size, 'bytes');
|
||||
return;
|
||||
}
|
||||
|
||||
console.log('Processing audio chunk for real-time translation...', audioChunk.size, 'bytes');
|
||||
|
||||
// Step 1: Speech-to-Text
|
||||
const transcriptionResult = await this.performSTT(audioChunk, universalTranslator);
|
||||
|
||||
// Skip if no meaningful transcription
|
||||
if (!transcriptionResult.text || transcriptionResult.text.trim().length < 2) {
|
||||
console.log('Skipping chunk - no meaningful speech detected:', transcriptionResult.text);
|
||||
return;
|
||||
}
|
||||
|
||||
this.dispatch(setTranscriptionResult(transcriptionResult));
|
||||
|
||||
// Step 2: Translation
|
||||
const translationResult = await this.performTranslation(
|
||||
transcriptionResult.text,
|
||||
universalTranslator.sourceLanguage,
|
||||
universalTranslator.targetLanguage,
|
||||
universalTranslator
|
||||
);
|
||||
|
||||
this.dispatch(setTranslationResult(translationResult));
|
||||
|
||||
// Step 3: Text-to-Speech
|
||||
const ttsResult = await this.performTTS(
|
||||
translationResult.translatedText,
|
||||
universalTranslator.targetLanguage,
|
||||
universalTranslator
|
||||
);
|
||||
|
||||
// Step 4: Audio Playback
|
||||
await this.playTranslatedAudio(ttsResult.audioBlob);
|
||||
|
||||
console.log('Real-time translation chunk completed successfully');
|
||||
|
||||
} catch (error) {
|
||||
console.error('Failed to process audio chunk:', error);
|
||||
// Don't dispatch error for individual chunks to avoid spam
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Perform speech-to-text conversion.
|
||||
*/
|
||||
private async performSTT(audioBlob: Blob, config: any) {
|
||||
const startTime = performance.now();
|
||||
|
||||
try {
|
||||
let result;
|
||||
|
||||
if (config.sttProvider === 'whisper') {
|
||||
// Use local Whisper processing (currently disabled - missing @xenova/transformers dependency)
|
||||
// const audioData = await convertWebMToFloat32(audioBlob);
|
||||
// result = await this.whisperProcessor.processAudio(audioData);
|
||||
throw new Error('Local Whisper processing is not available. Please use an external STT provider.');
|
||||
} else {
|
||||
// Use external STT provider
|
||||
const provider = await this.getOrCreateSTTProvider(config.sttProvider, config.apiKeys);
|
||||
|
||||
result = await provider.transcribe(audioBlob);
|
||||
}
|
||||
|
||||
const endTime = performance.now();
|
||||
const latency = endTime - startTime;
|
||||
|
||||
this.dispatch(updateLatencyMetrics({
|
||||
stt: {
|
||||
lastLatency: latency,
|
||||
averageLatency: latency, // Will be properly calculated by provider
|
||||
requestCount: 1
|
||||
}
|
||||
}));
|
||||
|
||||
return result;
|
||||
} catch (error) {
|
||||
throw new Error(`STT failed: ${error instanceof Error ? error.message : 'Unknown error'}`);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Perform text translation.
|
||||
*/
|
||||
private async performTranslation(text: string, sourceLang: string, targetLang: string, config: any) {
|
||||
const startTime = performance.now();
|
||||
|
||||
try {
|
||||
const provider = await this.getOrCreateTranslationProvider(config.translationProvider, config.apiKeys);
|
||||
const result = await provider.translate(text, sourceLang, targetLang);
|
||||
|
||||
const endTime = performance.now();
|
||||
const latency = endTime - startTime;
|
||||
|
||||
this.dispatch(updateLatencyMetrics({
|
||||
translation: {
|
||||
lastLatency: latency,
|
||||
averageLatency: latency,
|
||||
requestCount: 1
|
||||
}
|
||||
}));
|
||||
|
||||
return result;
|
||||
} catch (error) {
|
||||
throw new Error(`Translation failed: ${error instanceof Error ? error.message : 'Unknown error'}`);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Perform text-to-speech synthesis.
|
||||
*/
|
||||
private async performTTS(text: string, language: string, config: any) {
|
||||
const startTime = performance.now();
|
||||
|
||||
try {
|
||||
const provider = await this.getOrCreateTTSProvider(config.ttsProvider, config.apiKeys);
|
||||
const result = await provider.synthesize(text, language);
|
||||
|
||||
const endTime = performance.now();
|
||||
const latency = endTime - startTime;
|
||||
|
||||
this.dispatch(updateLatencyMetrics({
|
||||
tts: {
|
||||
lastLatency: latency,
|
||||
averageLatency: latency,
|
||||
requestCount: 1
|
||||
}
|
||||
}));
|
||||
|
||||
return result;
|
||||
} catch (error) {
|
||||
throw new Error(`TTS failed: ${error instanceof Error ? error.message : 'Unknown error'}`);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Play translated audio through Universal Translator Effect or fallback methods.
|
||||
*/
|
||||
private async playTranslatedAudio(audioBlob: Blob) {
|
||||
try {
|
||||
const effect = getUniversalTranslatorEffect();
|
||||
|
||||
if (effect && effect.isActive()) {
|
||||
// Convert blob to audio buffer and route through the effect
|
||||
const audioBuffer = await effect.createAudioBufferFromBlob(audioBlob);
|
||||
|
||||
await effect.playTranslatedAudio(audioBuffer);
|
||||
|
||||
console.log('Translated audio routed to Jitsi Meet via UniversalTranslatorEffect');
|
||||
} else if (this.blackHoleRouter.isActive()) {
|
||||
// Fallback to BlackHole if effect is not active
|
||||
const audioContext = this.blackHoleRouter.getOutputContext();
|
||||
const arrayBuffer = await audioBlob.arrayBuffer();
|
||||
const audioBuffer = await audioContext.decodeAudioData(arrayBuffer);
|
||||
|
||||
await this.blackHoleRouter.routeToJitsiMeet(audioBuffer);
|
||||
|
||||
console.log('Translated audio routed to Jitsi Meet via BlackHole (fallback)');
|
||||
} else {
|
||||
// Final fallback to regular audio playback
|
||||
const audio = new Audio(URL.createObjectURL(audioBlob));
|
||||
|
||||
await audio.play();
|
||||
console.log('Translated audio played via default output (fallback)');
|
||||
}
|
||||
} catch (error) {
|
||||
console.warn('Audio playback failed:', error);
|
||||
// Non-critical error, don't throw
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Get or create STT provider instance.
|
||||
*/
|
||||
private async getOrCreateSTTProvider(providerName: string, apiKeys: any) {
|
||||
if (!this.sttProviders.has(providerName)) {
|
||||
console.log(`Creating STT provider: ${providerName} with API key: ${apiKeys[providerName] ? 'present' : 'missing'}`);
|
||||
const provider = STTProviderFactory.create(providerName, {
|
||||
apiKey: apiKeys[providerName]
|
||||
});
|
||||
|
||||
await provider.initialize();
|
||||
this.sttProviders.set(providerName, provider);
|
||||
console.log(`STT provider ${providerName} initialized successfully`);
|
||||
}
|
||||
|
||||
return this.sttProviders.get(providerName);
|
||||
}
|
||||
|
||||
/**
|
||||
* Get or create TTS provider instance.
|
||||
*/
|
||||
private async getOrCreateTTSProvider(providerName: string, apiKeys: any) {
|
||||
if (!this.ttsProviders.has(providerName)) {
|
||||
console.log(`Creating TTS provider: ${providerName} with API key: ${apiKeys[providerName] ? 'present' : 'missing'}`);
|
||||
const provider = TTSProviderFactory.create(providerName, {
|
||||
apiKey: apiKeys[providerName]
|
||||
});
|
||||
|
||||
await provider.initialize();
|
||||
this.ttsProviders.set(providerName, provider);
|
||||
console.log(`TTS provider ${providerName} initialized successfully`);
|
||||
}
|
||||
|
||||
return this.ttsProviders.get(providerName);
|
||||
}
|
||||
|
||||
/**
|
||||
* Get or create translation provider instance.
|
||||
*/
|
||||
private async getOrCreateTranslationProvider(providerName: string, apiKeys: any) {
|
||||
if (!this.translationProviders.has(providerName)) {
|
||||
console.log(`Creating translation provider: ${providerName} with API key: ${apiKeys[providerName] ? 'present' : 'missing'}`);
|
||||
const provider = TranslationProviderFactory.create(providerName, {
|
||||
apiKey: apiKeys[providerName]
|
||||
});
|
||||
|
||||
await provider.initialize();
|
||||
this.translationProviders.set(providerName, provider);
|
||||
console.log(`Translation provider ${providerName} initialized successfully`);
|
||||
}
|
||||
|
||||
return this.translationProviders.get(providerName);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Middleware to handle universal translator actions.
|
||||
*/
|
||||
MiddlewareRegistry.register((store: IStore) => (next: Function) => (action: any) => {
|
||||
const { dispatch, getState } = store;
|
||||
|
||||
switch (action.type) {
|
||||
case INIT_UNIVERSAL_TRANSLATOR:
|
||||
if (!translatorService) {
|
||||
translatorService = new UniversalTranslatorService(dispatch, getState);
|
||||
translatorService.initialize(action.config);
|
||||
}
|
||||
break;
|
||||
|
||||
case START_TRANSLATION_RECORDING:
|
||||
if (translatorService) {
|
||||
translatorService.startRecording();
|
||||
} else {
|
||||
dispatch(setTranslationError('Translator service not initialized'));
|
||||
}
|
||||
break;
|
||||
|
||||
case STOP_TRANSLATION_RECORDING:
|
||||
if (translatorService) {
|
||||
translatorService.stopRecording();
|
||||
}
|
||||
break;
|
||||
}
|
||||
|
||||
return next(action);
|
||||
});
|
||||
1
react/features/universal-translator/middleware/index.ts
Normal file
1
react/features/universal-translator/middleware/index.ts
Normal file
@@ -0,0 +1 @@
|
||||
import './streamEffectMiddleware';
|
||||
@@ -0,0 +1,137 @@
|
||||
import { IStore } from '../../app/types';
|
||||
import { CONFERENCE_JOINED } from '../../base/conference/actionTypes';
|
||||
import MiddlewareRegistry from '../../base/redux/MiddlewareRegistry';
|
||||
import { getLocalAudioTrack, getLocalJitsiAudioTrack } from '../../base/tracks/functions.any';
|
||||
import { UniversalTranslatorEffect } from '../../stream-effects/universal-translator';
|
||||
import {
|
||||
DISABLE_UNIVERSAL_TRANSLATOR_EFFECT,
|
||||
ENABLE_UNIVERSAL_TRANSLATOR_EFFECT,
|
||||
START_TRANSLATION_RECORDING
|
||||
} from '../actionTypes';
|
||||
import {
|
||||
disableUniversalTranslatorEffect,
|
||||
enableUniversalTranslatorEffect
|
||||
} from '../actions';
|
||||
|
||||
/**
|
||||
* Global reference to the Universal Translator effect instance.
|
||||
*/
|
||||
let universalTranslatorEffect: UniversalTranslatorEffect | null = null;
|
||||
|
||||
/**
|
||||
* Middleware to handle Universal Translator stream effect integration.
|
||||
*/
|
||||
MiddlewareRegistry.register((store: IStore) => (next: Function) => (action: any) => {
|
||||
const { dispatch, getState } = store;
|
||||
|
||||
switch (action.type) {
|
||||
case CONFERENCE_JOINED:
|
||||
// Initialize effect when conference is joined
|
||||
_initializeUniversalTranslatorEffect(store);
|
||||
break;
|
||||
|
||||
case START_TRANSLATION_RECORDING:
|
||||
// Enable effect when translation starts
|
||||
if (!getState()['features/universal-translator']?.effectEnabled) {
|
||||
dispatch(enableUniversalTranslatorEffect());
|
||||
}
|
||||
break;
|
||||
|
||||
case ENABLE_UNIVERSAL_TRANSLATOR_EFFECT:
|
||||
_enableEffect(store);
|
||||
break;
|
||||
|
||||
case DISABLE_UNIVERSAL_TRANSLATOR_EFFECT:
|
||||
_disableEffect(store);
|
||||
break;
|
||||
}
|
||||
|
||||
return next(action);
|
||||
});
|
||||
|
||||
/**
|
||||
* Initialize the Universal Translator effect.
|
||||
*/
|
||||
async function _initializeUniversalTranslatorEffect(store: IStore) {
|
||||
const { getState } = store;
|
||||
|
||||
try {
|
||||
// Create effect instance if it doesn't exist
|
||||
if (!universalTranslatorEffect) {
|
||||
universalTranslatorEffect = new UniversalTranslatorEffect();
|
||||
console.log('UniversalTranslatorEffect: Effect instance created');
|
||||
}
|
||||
} catch (error) {
|
||||
console.error('Failed to initialize Universal Translator effect:', error);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Enable the Universal Translator effect on the local audio track.
|
||||
*/
|
||||
async function _enableEffect(store: IStore) {
|
||||
const { getState } = store;
|
||||
|
||||
if (!universalTranslatorEffect) {
|
||||
console.warn('Universal Translator effect not initialized');
|
||||
|
||||
return;
|
||||
}
|
||||
|
||||
try {
|
||||
const state = getState();
|
||||
const conference = state['features/base/conference'].conference;
|
||||
const localAudioTrack = getLocalJitsiAudioTrack(state);
|
||||
|
||||
if (!conference || !localAudioTrack) {
|
||||
console.warn('Conference or local audio track not available');
|
||||
|
||||
return;
|
||||
}
|
||||
|
||||
// Apply the effect to the local audio track
|
||||
if (localAudioTrack && localAudioTrack.setEffect) {
|
||||
await localAudioTrack.setEffect(universalTranslatorEffect);
|
||||
} else {
|
||||
console.warn('Local audio track does not support effects');
|
||||
return;
|
||||
}
|
||||
|
||||
console.log('UniversalTranslatorEffect: Effect enabled on local audio track');
|
||||
} catch (error) {
|
||||
console.error('Failed to enable Universal Translator effect:', error);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Disable the Universal Translator effect on the local audio track.
|
||||
*/
|
||||
async function _disableEffect(store: IStore) {
|
||||
const { getState } = store;
|
||||
|
||||
if (!universalTranslatorEffect) {
|
||||
return;
|
||||
}
|
||||
|
||||
try {
|
||||
const state = getState();
|
||||
const localAudioTrack = getLocalJitsiAudioTrack(state);
|
||||
|
||||
if (localAudioTrack && localAudioTrack.setEffect) {
|
||||
// Remove the effect from the local audio track
|
||||
await localAudioTrack.setEffect(undefined);
|
||||
}
|
||||
|
||||
console.log('UniversalTranslatorEffect: Effect disabled on local audio track');
|
||||
} catch (error) {
|
||||
console.error('Failed to disable Universal Translator effect:', error);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Get the Universal Translator effect instance.
|
||||
* This is used by the translation middleware to send translated audio to the effect.
|
||||
*/
|
||||
export function getUniversalTranslatorEffect(): UniversalTranslatorEffect | null {
|
||||
return universalTranslatorEffect;
|
||||
}
|
||||
268
react/features/universal-translator/reducer.ts
Normal file
268
react/features/universal-translator/reducer.ts
Normal file
@@ -0,0 +1,268 @@
|
||||
import ReducerRegistry from '../base/redux/ReducerRegistry';
|
||||
import PersistenceRegistry from '../base/redux/PersistenceRegistry';
|
||||
|
||||
import {
|
||||
CLEAR_TRANSLATION_ERROR,
|
||||
DISABLE_UNIVERSAL_TRANSLATOR_EFFECT,
|
||||
ENABLE_UNIVERSAL_TRANSLATOR_EFFECT,
|
||||
INIT_UNIVERSAL_TRANSLATOR,
|
||||
SET_API_KEYS,
|
||||
SET_SOURCE_LANGUAGE,
|
||||
SET_STT_PROVIDER,
|
||||
SET_TARGET_LANGUAGE,
|
||||
SET_TRANSCRIPTION_RESULT,
|
||||
SET_TRANSLATION_ERROR,
|
||||
SET_TRANSLATION_PROVIDER,
|
||||
SET_TRANSLATION_RESULT,
|
||||
SET_TTS_PROVIDER,
|
||||
START_TRANSLATION_RECORDING,
|
||||
STOP_TRANSLATION_RECORDING,
|
||||
TOGGLE_UNIVERSAL_TRANSLATOR,
|
||||
UPDATE_LATENCY_METRICS,
|
||||
UPDATE_PROCESSING_STEP,
|
||||
UPDATE_TRANSLATION_STATUS
|
||||
} from './actionTypes';
|
||||
|
||||
/**
|
||||
* Initial state for the universal translator feature.
|
||||
*/
|
||||
const DEFAULT_STATE = {
|
||||
isInitialized: false,
|
||||
isRecording: false,
|
||||
showDialog: false,
|
||||
status: 'idle',
|
||||
currentStep: null,
|
||||
sttProvider: 'deepgram',
|
||||
ttsProvider: 'cartesia',
|
||||
translationProvider: 'openai',
|
||||
sourceLanguage: 'en',
|
||||
targetLanguage: 'es',
|
||||
transcriptionResult: null,
|
||||
translationResult: null,
|
||||
latencyMetrics: {
|
||||
stt: { averageLatency: 0, lastLatency: 0, requestCount: 0 },
|
||||
translation: { averageLatency: 0, lastLatency: 0, requestCount: 0 },
|
||||
tts: { averageLatency: 0, lastLatency: 0, requestCount: 0 }
|
||||
},
|
||||
error: null,
|
||||
apiKeys: {
|
||||
openai: '',
|
||||
groq: '',
|
||||
deepgram: '',
|
||||
assemblyai: '',
|
||||
cartesia: '',
|
||||
elevenlabs: '',
|
||||
azure: '',
|
||||
google: '',
|
||||
microsoft: ''
|
||||
},
|
||||
config: null,
|
||||
effectEnabled: false
|
||||
};
|
||||
|
||||
export interface IUniversalTranslatorState {
|
||||
apiKeys: {
|
||||
assemblyai: string;
|
||||
azure: string;
|
||||
cartesia: string;
|
||||
deepgram: string;
|
||||
elevenlabs: string;
|
||||
google: string;
|
||||
groq: string;
|
||||
microsoft: string;
|
||||
openai: string;
|
||||
};
|
||||
config: any;
|
||||
currentStep: string | null;
|
||||
effectEnabled: boolean;
|
||||
error: string | null;
|
||||
isInitialized: boolean;
|
||||
isRecording: boolean;
|
||||
latencyMetrics: {
|
||||
stt: { averageLatency: number; lastLatency: number; requestCount: number; };
|
||||
translation: { averageLatency: number; lastLatency: number; requestCount: number; };
|
||||
tts: { averageLatency: number; lastLatency: number; requestCount: number; };
|
||||
};
|
||||
showDialog: boolean;
|
||||
sourceLanguage: string;
|
||||
status: string;
|
||||
sttProvider: string;
|
||||
targetLanguage: string;
|
||||
transcriptionResult: any;
|
||||
translationProvider: string;
|
||||
translationResult: any;
|
||||
ttsProvider: string;
|
||||
}
|
||||
|
||||
/**
|
||||
* Reduces redux actions for the universal translator feature.
|
||||
*
|
||||
* @param {Object} state - The current state.
|
||||
* @param {Object} action - The redux action.
|
||||
* @returns {Object} The new state after applying the action.
|
||||
*/
|
||||
ReducerRegistry.register<IUniversalTranslatorState>('features/universal-translator',
|
||||
(state = DEFAULT_STATE, action): IUniversalTranslatorState => {
|
||||
switch (action.type) {
|
||||
case INIT_UNIVERSAL_TRANSLATOR:
|
||||
return {
|
||||
...state,
|
||||
isInitialized: true,
|
||||
config: action.config
|
||||
};
|
||||
|
||||
case SET_STT_PROVIDER:
|
||||
return {
|
||||
...state,
|
||||
sttProvider: action.provider
|
||||
};
|
||||
|
||||
case SET_TTS_PROVIDER:
|
||||
return {
|
||||
...state,
|
||||
ttsProvider: action.provider
|
||||
};
|
||||
|
||||
case SET_TRANSLATION_PROVIDER:
|
||||
return {
|
||||
...state,
|
||||
translationProvider: action.provider
|
||||
};
|
||||
|
||||
case SET_SOURCE_LANGUAGE:
|
||||
return {
|
||||
...state,
|
||||
sourceLanguage: action.language
|
||||
};
|
||||
|
||||
case SET_TARGET_LANGUAGE:
|
||||
return {
|
||||
...state,
|
||||
targetLanguage: action.language
|
||||
};
|
||||
|
||||
case START_TRANSLATION_RECORDING:
|
||||
return {
|
||||
...state,
|
||||
isRecording: true,
|
||||
status: 'recording',
|
||||
error: null,
|
||||
transcriptionResult: null,
|
||||
translationResult: null
|
||||
};
|
||||
|
||||
case STOP_TRANSLATION_RECORDING:
|
||||
return {
|
||||
...state,
|
||||
isRecording: false,
|
||||
status: 'processing'
|
||||
};
|
||||
|
||||
case UPDATE_TRANSLATION_STATUS:
|
||||
return {
|
||||
...state,
|
||||
status: action.status
|
||||
};
|
||||
|
||||
case UPDATE_PROCESSING_STEP:
|
||||
return {
|
||||
...state,
|
||||
currentStep: action.step
|
||||
};
|
||||
|
||||
case SET_TRANSCRIPTION_RESULT:
|
||||
return {
|
||||
...state,
|
||||
transcriptionResult: action.result
|
||||
};
|
||||
|
||||
case SET_TRANSLATION_RESULT:
|
||||
return {
|
||||
...state,
|
||||
translationResult: action.result,
|
||||
status: 'completed'
|
||||
};
|
||||
|
||||
case UPDATE_LATENCY_METRICS:
|
||||
return {
|
||||
...state,
|
||||
latencyMetrics: {
|
||||
...state.latencyMetrics,
|
||||
...action.metrics
|
||||
}
|
||||
};
|
||||
|
||||
case SET_TRANSLATION_ERROR:
|
||||
return {
|
||||
...state,
|
||||
error: action.error,
|
||||
status: 'error',
|
||||
isRecording: false
|
||||
};
|
||||
|
||||
case CLEAR_TRANSLATION_ERROR:
|
||||
return {
|
||||
...state,
|
||||
error: null
|
||||
};
|
||||
|
||||
case SET_API_KEYS:
|
||||
return {
|
||||
...state,
|
||||
apiKeys: {
|
||||
...state.apiKeys,
|
||||
...action.keys
|
||||
}
|
||||
};
|
||||
|
||||
case TOGGLE_UNIVERSAL_TRANSLATOR:
|
||||
return {
|
||||
...state,
|
||||
showDialog: !state.showDialog
|
||||
};
|
||||
|
||||
case ENABLE_UNIVERSAL_TRANSLATOR_EFFECT:
|
||||
return {
|
||||
...state,
|
||||
effectEnabled: true
|
||||
};
|
||||
|
||||
case DISABLE_UNIVERSAL_TRANSLATOR_EFFECT:
|
||||
return {
|
||||
...state,
|
||||
effectEnabled: false
|
||||
};
|
||||
|
||||
default:
|
||||
return state;
|
||||
}
|
||||
});
|
||||
|
||||
/**
|
||||
* Register for persistence to save API keys and user preferences across sessions.
|
||||
* Only persist configuration data, not temporary state like recording status or results.
|
||||
*/
|
||||
PersistenceRegistry.register('features/universal-translator', {
|
||||
// Persist API keys - the most important for user experience
|
||||
apiKeys: true,
|
||||
|
||||
// Persist user preferences
|
||||
sttProvider: true,
|
||||
ttsProvider: true,
|
||||
translationProvider: true,
|
||||
sourceLanguage: true,
|
||||
targetLanguage: true,
|
||||
effectEnabled: true,
|
||||
|
||||
// Don't persist temporary state
|
||||
isInitialized: false,
|
||||
isRecording: false,
|
||||
showDialog: false,
|
||||
status: false,
|
||||
currentStep: false,
|
||||
transcriptionResult: false,
|
||||
translationResult: false,
|
||||
latencyMetrics: false,
|
||||
error: false,
|
||||
config: false
|
||||
}, DEFAULT_STATE);
|
||||
390
react/features/universal-translator/services/stt-providers.ts
Normal file
390
react/features/universal-translator/services/stt-providers.ts
Normal file
@@ -0,0 +1,390 @@
|
||||
// @ts-nocheck
|
||||
/**
|
||||
* Speech-to-Text service providers for latency comparison
|
||||
* Supports multiple STT services for benchmarking.
|
||||
*/
|
||||
|
||||
// import { WhisperProcessor } from '../audio/whisper-processor.js';
|
||||
|
||||
/**
|
||||
* Base STT Provider class.
|
||||
*/
|
||||
export class STTProvider {
|
||||
constructor(name, config = {}) {
|
||||
this.name = name;
|
||||
this.config = config;
|
||||
this.isInitialized = false;
|
||||
}
|
||||
|
||||
async initialize() {
|
||||
throw new Error('initialize() must be implemented by subclass');
|
||||
}
|
||||
|
||||
async transcribe(audioData) {
|
||||
throw new Error('transcribe() must be implemented by subclass');
|
||||
}
|
||||
|
||||
getLatencyMetrics() {
|
||||
return {
|
||||
averageLatency: 0,
|
||||
lastLatency: 0,
|
||||
requestCount: 0
|
||||
};
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Whisper (local) STT Provider.
|
||||
*/
|
||||
export class WhisperSTTProvider extends STTProvider {
|
||||
constructor(config = {}) {
|
||||
super('Whisper (Local)', config);
|
||||
|
||||
// this.processor = new WhisperProcessor();
|
||||
this.latencyMetrics = {
|
||||
averageLatency: 0,
|
||||
lastLatency: 0,
|
||||
requestCount: 0
|
||||
};
|
||||
}
|
||||
|
||||
async initialize() {
|
||||
try {
|
||||
// await this.processor.initializeModel();
|
||||
// this.isInitialized = true;
|
||||
// console.log('Whisper STT Provider initialized');
|
||||
throw new Error('Local Whisper processing is not available. Missing @xenova/transformers dependency.');
|
||||
} catch (error) {
|
||||
console.error('Failed to initialize Whisper STT:', error);
|
||||
throw error;
|
||||
}
|
||||
}
|
||||
|
||||
async transcribe(audioData) {
|
||||
// if (!this.isInitialized) {
|
||||
// throw new Error('Whisper STT not initialized');
|
||||
// }
|
||||
|
||||
const startTime = performance.now();
|
||||
|
||||
try {
|
||||
// const result = await this.processor.processAudio(audioData);
|
||||
throw new Error('Local Whisper processing is not available. Please use an external STT provider.');
|
||||
} catch (error) {
|
||||
console.error('Whisper transcription error:', error);
|
||||
throw error;
|
||||
}
|
||||
}
|
||||
|
||||
_updateLatencyMetrics(latency) {
|
||||
this.latencyMetrics.requestCount++;
|
||||
this.latencyMetrics.lastLatency = latency;
|
||||
this.latencyMetrics.averageLatency
|
||||
= (this.latencyMetrics.averageLatency * (this.latencyMetrics.requestCount - 1) + latency)
|
||||
/ this.latencyMetrics.requestCount;
|
||||
}
|
||||
|
||||
getLatencyMetrics() {
|
||||
return { ...this.latencyMetrics };
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Groq STT Provider.
|
||||
*/
|
||||
export class GroqSTTProvider extends STTProvider {
|
||||
constructor(config = {}) {
|
||||
super('Groq Whisper', config);
|
||||
this.apiKey = config.apiKey;
|
||||
this.model = config.model || 'whisper-large-v3-turbo';
|
||||
this.baseUrl = 'https://api.groq.com/openai/v1/audio/transcriptions';
|
||||
this.latencyMetrics = {
|
||||
averageLatency: 0,
|
||||
lastLatency: 0,
|
||||
requestCount: 0
|
||||
};
|
||||
}
|
||||
|
||||
async initialize() {
|
||||
if (!this.apiKey) {
|
||||
throw new Error('Groq API key is required');
|
||||
}
|
||||
this.isInitialized = true;
|
||||
console.log('Groq STT Provider initialized');
|
||||
}
|
||||
|
||||
async transcribe(audioBlob) {
|
||||
if (!this.isInitialized) {
|
||||
throw new Error('Groq STT not initialized');
|
||||
}
|
||||
|
||||
const startTime = performance.now();
|
||||
|
||||
try {
|
||||
const formData = new FormData();
|
||||
|
||||
formData.append('file', audioBlob, 'audio.webm');
|
||||
formData.append('model', this.model);
|
||||
formData.append('response_format', 'verbose_json');
|
||||
formData.append('language', 'auto');
|
||||
|
||||
const response = await fetch(this.baseUrl, {
|
||||
method: 'POST',
|
||||
headers: {
|
||||
'Authorization': `Bearer ${this.apiKey}`
|
||||
},
|
||||
body: formData
|
||||
});
|
||||
|
||||
if (!response.ok) {
|
||||
throw new Error(`Groq API error: ${response.status}`);
|
||||
}
|
||||
|
||||
const result = await response.json();
|
||||
|
||||
const endTime = performance.now();
|
||||
const latency = endTime - startTime;
|
||||
|
||||
this._updateLatencyMetrics(latency);
|
||||
|
||||
return {
|
||||
text: result.text,
|
||||
language: result.language || 'en',
|
||||
confidence: result.confidence || 0.95,
|
||||
provider: this.name,
|
||||
latency
|
||||
};
|
||||
} catch (error) {
|
||||
console.error('Groq transcription error:', error);
|
||||
throw error;
|
||||
}
|
||||
}
|
||||
|
||||
_updateLatencyMetrics(latency) {
|
||||
this.latencyMetrics.requestCount++;
|
||||
this.latencyMetrics.lastLatency = latency;
|
||||
this.latencyMetrics.averageLatency
|
||||
= (this.latencyMetrics.averageLatency * (this.latencyMetrics.requestCount - 1) + latency)
|
||||
/ this.latencyMetrics.requestCount;
|
||||
}
|
||||
|
||||
getLatencyMetrics() {
|
||||
return { ...this.latencyMetrics };
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Deepgram STT Provider.
|
||||
*/
|
||||
export class DeepgramSTTProvider extends STTProvider {
|
||||
constructor(config = {}) {
|
||||
super('Deepgram Nova-2', config);
|
||||
this.apiKey = config.apiKey;
|
||||
this.model = config.model || 'nova-2';
|
||||
this.baseUrl = 'https://api.deepgram.com/v1/listen';
|
||||
this.latencyMetrics = {
|
||||
averageLatency: 0,
|
||||
lastLatency: 0,
|
||||
requestCount: 0
|
||||
};
|
||||
}
|
||||
|
||||
async initialize() {
|
||||
if (!this.apiKey) {
|
||||
throw new Error('Deepgram API key is required');
|
||||
}
|
||||
this.isInitialized = true;
|
||||
console.log('Deepgram STT Provider initialized');
|
||||
}
|
||||
|
||||
async transcribe(audioBlob) {
|
||||
if (!this.isInitialized) {
|
||||
throw new Error('Deepgram STT not initialized');
|
||||
}
|
||||
|
||||
const startTime = performance.now();
|
||||
|
||||
try {
|
||||
// Log audio blob info for debugging
|
||||
console.log('Deepgram: Processing audio blob', audioBlob.size, 'bytes, type:', audioBlob.type);
|
||||
|
||||
const url = `${this.baseUrl}?model=${this.model}&smart_format=true&detect_language=true&punctuate=true&diarize=false`;
|
||||
|
||||
const response = await fetch(url, {
|
||||
method: 'POST',
|
||||
headers: {
|
||||
'Authorization': `Token ${this.apiKey}`,
|
||||
'Content-Type': audioBlob.type || 'audio/wav'
|
||||
},
|
||||
body: audioBlob
|
||||
});
|
||||
|
||||
if (!response.ok) {
|
||||
const errorText = await response.text();
|
||||
console.error('Deepgram API error response:', response.status, errorText);
|
||||
throw new Error(`Deepgram API error: ${response.status} - ${errorText}`);
|
||||
}
|
||||
|
||||
const result = await response.json();
|
||||
|
||||
const endTime = performance.now();
|
||||
const latency = endTime - startTime;
|
||||
|
||||
this._updateLatencyMetrics(latency);
|
||||
|
||||
const transcript = result.results?.channels?.[0]?.alternatives?.[0];
|
||||
const transcriptText = transcript?.transcript || '';
|
||||
|
||||
console.log('Deepgram result:', transcriptText);
|
||||
|
||||
return {
|
||||
text: transcriptText,
|
||||
language: result.results?.channels?.[0]?.detected_language || 'en',
|
||||
confidence: transcript?.confidence || 0.95,
|
||||
provider: this.name,
|
||||
latency
|
||||
};
|
||||
} catch (error) {
|
||||
console.error('Deepgram transcription error:', error);
|
||||
throw error;
|
||||
}
|
||||
}
|
||||
|
||||
_updateLatencyMetrics(latency) {
|
||||
this.latencyMetrics.requestCount++;
|
||||
this.latencyMetrics.lastLatency = latency;
|
||||
this.latencyMetrics.averageLatency
|
||||
= (this.latencyMetrics.averageLatency * (this.latencyMetrics.requestCount - 1) + latency)
|
||||
/ this.latencyMetrics.requestCount;
|
||||
}
|
||||
|
||||
getLatencyMetrics() {
|
||||
return { ...this.latencyMetrics };
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* AssemblyAI STT Provider.
|
||||
*/
|
||||
export class AssemblyAISTTProvider extends STTProvider {
|
||||
constructor(config = {}) {
|
||||
super('AssemblyAI Universal-2', config);
|
||||
this.apiKey = config.apiKey;
|
||||
this.baseUrl = 'https://api.assemblyai.com/v2/transcript';
|
||||
this.latencyMetrics = {
|
||||
averageLatency: 0,
|
||||
lastLatency: 0,
|
||||
requestCount: 0
|
||||
};
|
||||
}
|
||||
|
||||
async initialize() {
|
||||
if (!this.apiKey) {
|
||||
throw new Error('AssemblyAI API key is required');
|
||||
}
|
||||
this.isInitialized = true;
|
||||
console.log('AssemblyAI STT Provider initialized');
|
||||
}
|
||||
|
||||
async transcribe(audioBlob) {
|
||||
if (!this.isInitialized) {
|
||||
throw new Error('AssemblyAI STT not initialized');
|
||||
}
|
||||
|
||||
const startTime = performance.now();
|
||||
|
||||
try {
|
||||
// First, upload the audio file
|
||||
const uploadResponse = await fetch('https://api.assemblyai.com/v2/upload', {
|
||||
method: 'POST',
|
||||
headers: {
|
||||
'Authorization': this.apiKey,
|
||||
'Content-Type': 'application/octet-stream'
|
||||
},
|
||||
body: audioBlob
|
||||
});
|
||||
|
||||
if (!uploadResponse.ok) {
|
||||
throw new Error(`AssemblyAI upload error: ${uploadResponse.status}`);
|
||||
}
|
||||
|
||||
const uploadResult = await uploadResponse.json();
|
||||
|
||||
// Then, request transcription
|
||||
const transcriptionResponse = await fetch(this.baseUrl, {
|
||||
method: 'POST',
|
||||
headers: {
|
||||
'Authorization': this.apiKey,
|
||||
'Content-Type': 'application/json'
|
||||
},
|
||||
body: JSON.stringify({
|
||||
audio_url: uploadResult.upload_url,
|
||||
language_detection: true
|
||||
})
|
||||
});
|
||||
|
||||
if (!transcriptionResponse.ok) {
|
||||
throw new Error(`AssemblyAI transcription error: ${transcriptionResponse.status}`);
|
||||
}
|
||||
|
||||
const result = await transcriptionResponse.json();
|
||||
|
||||
const endTime = performance.now();
|
||||
const latency = endTime - startTime;
|
||||
|
||||
this._updateLatencyMetrics(latency);
|
||||
|
||||
return {
|
||||
text: result.text || '',
|
||||
language: result.language_code || 'en',
|
||||
confidence: result.confidence || 0.95,
|
||||
provider: this.name,
|
||||
latency
|
||||
};
|
||||
} catch (error) {
|
||||
console.error('AssemblyAI transcription error:', error);
|
||||
throw error;
|
||||
}
|
||||
}
|
||||
|
||||
_updateLatencyMetrics(latency) {
|
||||
this.latencyMetrics.requestCount++;
|
||||
this.latencyMetrics.lastLatency = latency;
|
||||
this.latencyMetrics.averageLatency
|
||||
= (this.latencyMetrics.averageLatency * (this.latencyMetrics.requestCount - 1) + latency)
|
||||
/ this.latencyMetrics.requestCount;
|
||||
}
|
||||
|
||||
getLatencyMetrics() {
|
||||
return { ...this.latencyMetrics };
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* STT Provider Factory.
|
||||
*/
|
||||
export class STTProviderFactory {
|
||||
static create(providerName, config = {}) {
|
||||
switch (providerName.toLowerCase()) {
|
||||
case 'whisper':
|
||||
return new WhisperSTTProvider(config);
|
||||
case 'groq':
|
||||
return new GroqSTTProvider(config);
|
||||
case 'deepgram':
|
||||
return new DeepgramSTTProvider(config);
|
||||
case 'assemblyai':
|
||||
return new AssemblyAISTTProvider(config);
|
||||
default:
|
||||
throw new Error(`Unknown STT provider: ${providerName}`);
|
||||
}
|
||||
}
|
||||
|
||||
static getAvailableProviders() {
|
||||
return [
|
||||
'whisper',
|
||||
'groq',
|
||||
'deepgram',
|
||||
'assemblyai'
|
||||
];
|
||||
}
|
||||
}
|
||||
569
react/features/universal-translator/services/translation.ts
Normal file
569
react/features/universal-translator/services/translation.ts
Normal file
@@ -0,0 +1,569 @@
|
||||
// @ts-nocheck
|
||||
/**
|
||||
* Translation service for converting text between languages
|
||||
* Supports multiple translation providers for comparison.
|
||||
*/
|
||||
|
||||
/**
|
||||
* Base Translation Provider class.
|
||||
*/
|
||||
export class TranslationProvider {
|
||||
constructor(name, config = {}) {
|
||||
this.name = name;
|
||||
this.config = config;
|
||||
this.isInitialized = false;
|
||||
this.supportedLanguages = [];
|
||||
}
|
||||
|
||||
async initialize() {
|
||||
throw new Error('initialize() must be implemented by subclass');
|
||||
}
|
||||
|
||||
async translate(text, sourceLang, targetLang) {
|
||||
throw new Error('translate() must be implemented by subclass');
|
||||
}
|
||||
|
||||
async detectLanguage(text) {
|
||||
throw new Error('detectLanguage() must be implemented by subclass');
|
||||
}
|
||||
|
||||
getSupportedLanguages() {
|
||||
return this.supportedLanguages;
|
||||
}
|
||||
|
||||
getLatencyMetrics() {
|
||||
return {
|
||||
averageLatency: 0,
|
||||
lastLatency: 0,
|
||||
requestCount: 0
|
||||
};
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* OpenAI GPT-4 Translation Provider.
|
||||
*/
|
||||
export class OpenAITranslationProvider extends TranslationProvider {
|
||||
constructor(config = {}) {
|
||||
super('OpenAI GPT-4', config);
|
||||
this.apiKey = config.apiKey;
|
||||
this.model = config.model || 'gpt-4-turbo-preview';
|
||||
this.baseUrl = 'https://api.openai.com/v1/chat/completions';
|
||||
this.latencyMetrics = {
|
||||
averageLatency: 0,
|
||||
lastLatency: 0,
|
||||
requestCount: 0
|
||||
};
|
||||
this.supportedLanguages = [
|
||||
'en', 'es', 'fr', 'de', 'it', 'pt', 'ro', 'ru', 'ja', 'ko', 'zh',
|
||||
'ar', 'hi', 'tr', 'pl', 'nl', 'sv', 'da', 'no', 'fi'
|
||||
];
|
||||
this.languageNames = {
|
||||
'en': 'English',
|
||||
'es': 'Spanish',
|
||||
'fr': 'French',
|
||||
'de': 'German',
|
||||
'it': 'Italian',
|
||||
'pt': 'Portuguese',
|
||||
'ro': 'Romanian',
|
||||
'ru': 'Russian',
|
||||
'ja': 'Japanese',
|
||||
'ko': 'Korean',
|
||||
'zh': 'Chinese',
|
||||
'ar': 'Arabic',
|
||||
'hi': 'Hindi',
|
||||
'tr': 'Turkish',
|
||||
'pl': 'Polish',
|
||||
'nl': 'Dutch',
|
||||
'sv': 'Swedish',
|
||||
'da': 'Danish',
|
||||
'no': 'Norwegian',
|
||||
'fi': 'Finnish'
|
||||
};
|
||||
}
|
||||
|
||||
async initialize() {
|
||||
if (!this.apiKey) {
|
||||
throw new Error('OpenAI API key is required');
|
||||
}
|
||||
this.isInitialized = true;
|
||||
console.log('OpenAI Translation Provider initialized');
|
||||
}
|
||||
|
||||
async translate(text, sourceLang, targetLang) {
|
||||
if (!this.isInitialized) {
|
||||
throw new Error('OpenAI Translation Provider not initialized');
|
||||
}
|
||||
|
||||
const startTime = performance.now();
|
||||
|
||||
try {
|
||||
const sourceLanguage = this.languageNames[sourceLang] || sourceLang;
|
||||
const targetLanguage = this.languageNames[targetLang] || targetLang;
|
||||
|
||||
const response = await fetch(this.baseUrl, {
|
||||
method: 'POST',
|
||||
headers: {
|
||||
'Content-Type': 'application/json',
|
||||
'Authorization': `Bearer ${this.apiKey}`
|
||||
},
|
||||
body: JSON.stringify({
|
||||
model: this.model,
|
||||
messages: [
|
||||
{
|
||||
role: 'system',
|
||||
content: `You are a professional translator. Translate the given text from ${sourceLanguage} to ${targetLanguage}. Return only the translation without any additional text or explanations. Maintain the tone and context of the original text.`
|
||||
},
|
||||
{
|
||||
role: 'user',
|
||||
content: text
|
||||
}
|
||||
],
|
||||
max_tokens: 500,
|
||||
temperature: 0.1
|
||||
})
|
||||
});
|
||||
|
||||
if (!response.ok) {
|
||||
throw new Error(`OpenAI API error: ${response.status}`);
|
||||
}
|
||||
|
||||
const result = await response.json();
|
||||
|
||||
const endTime = performance.now();
|
||||
const latency = endTime - startTime;
|
||||
|
||||
this._updateLatencyMetrics(latency);
|
||||
|
||||
return {
|
||||
translatedText: result.choices[0].message.content.trim(),
|
||||
sourceLang,
|
||||
targetLang,
|
||||
provider: this.name,
|
||||
latency,
|
||||
confidence: 0.95
|
||||
};
|
||||
} catch (error) {
|
||||
console.error('OpenAI translation error:', error);
|
||||
throw error;
|
||||
}
|
||||
}
|
||||
|
||||
async detectLanguage(text) {
|
||||
if (!this.isInitialized) {
|
||||
throw new Error('OpenAI Translation Provider not initialized');
|
||||
}
|
||||
|
||||
try {
|
||||
const response = await fetch(this.baseUrl, {
|
||||
method: 'POST',
|
||||
headers: {
|
||||
'Content-Type': 'application/json',
|
||||
'Authorization': `Bearer ${this.apiKey}`
|
||||
},
|
||||
body: JSON.stringify({
|
||||
model: this.model,
|
||||
messages: [
|
||||
{
|
||||
role: 'system',
|
||||
content: 'Detect the language of the given text and return only the ISO 639-1 language code (e.g., "en", "es", "fr"). Return only the code.'
|
||||
},
|
||||
{
|
||||
role: 'user',
|
||||
content: text
|
||||
}
|
||||
],
|
||||
max_tokens: 10,
|
||||
temperature: 0
|
||||
})
|
||||
});
|
||||
|
||||
if (!response.ok) {
|
||||
throw new Error(`OpenAI API error: ${response.status}`);
|
||||
}
|
||||
|
||||
const result = await response.json();
|
||||
const detectedLang = result.choices[0].message.content.trim().toLowerCase();
|
||||
|
||||
return {
|
||||
language: detectedLang,
|
||||
confidence: 0.95,
|
||||
provider: this.name
|
||||
};
|
||||
} catch (error) {
|
||||
console.error('OpenAI language detection error:', error);
|
||||
|
||||
return {
|
||||
language: 'en',
|
||||
confidence: 0.5,
|
||||
provider: this.name
|
||||
};
|
||||
}
|
||||
}
|
||||
|
||||
_updateLatencyMetrics(latency) {
|
||||
this.latencyMetrics.requestCount++;
|
||||
this.latencyMetrics.lastLatency = latency;
|
||||
this.latencyMetrics.averageLatency
|
||||
= (this.latencyMetrics.averageLatency * (this.latencyMetrics.requestCount - 1) + latency)
|
||||
/ this.latencyMetrics.requestCount;
|
||||
}
|
||||
|
||||
getLatencyMetrics() {
|
||||
return { ...this.latencyMetrics };
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Google Translate Provider.
|
||||
*/
|
||||
export class GoogleTranslateProvider extends TranslationProvider {
|
||||
constructor(config = {}) {
|
||||
super('Google Translate', config);
|
||||
this.apiKey = config.apiKey;
|
||||
this.baseUrl = 'https://translation.googleapis.com/language/translate/v2';
|
||||
this.latencyMetrics = {
|
||||
averageLatency: 0,
|
||||
lastLatency: 0,
|
||||
requestCount: 0
|
||||
};
|
||||
this.supportedLanguages = [
|
||||
'en', 'es', 'fr', 'de', 'it', 'pt', 'ro', 'ru', 'ja', 'ko', 'zh',
|
||||
'ar', 'hi', 'tr', 'pl', 'nl', 'sv', 'da', 'no', 'fi', 'he',
|
||||
'th', 'vi', 'id', 'ms', 'tl', 'cy', 'ga', 'mt', 'is'
|
||||
];
|
||||
}
|
||||
|
||||
async initialize() {
|
||||
if (!this.apiKey) {
|
||||
throw new Error('Google Translate API key is required');
|
||||
}
|
||||
this.isInitialized = true;
|
||||
console.log('Google Translate Provider initialized');
|
||||
}
|
||||
|
||||
async translate(text, sourceLang, targetLang) {
|
||||
if (!this.isInitialized) {
|
||||
throw new Error('Google Translate Provider not initialized');
|
||||
}
|
||||
|
||||
const startTime = performance.now();
|
||||
|
||||
try {
|
||||
const params = new URLSearchParams({
|
||||
key: this.apiKey,
|
||||
q: text,
|
||||
source: sourceLang,
|
||||
target: targetLang,
|
||||
format: 'text'
|
||||
});
|
||||
|
||||
const response = await fetch(`${this.baseUrl}?${params}`, {
|
||||
method: 'POST',
|
||||
headers: {
|
||||
'Content-Type': 'application/x-www-form-urlencoded'
|
||||
}
|
||||
});
|
||||
|
||||
if (!response.ok) {
|
||||
throw new Error(`Google Translate API error: ${response.status}`);
|
||||
}
|
||||
|
||||
const result = await response.json();
|
||||
|
||||
const endTime = performance.now();
|
||||
const latency = endTime - startTime;
|
||||
|
||||
this._updateLatencyMetrics(latency);
|
||||
|
||||
return {
|
||||
translatedText: result.data.translations[0].translatedText,
|
||||
sourceLang,
|
||||
targetLang,
|
||||
provider: this.name,
|
||||
latency,
|
||||
confidence: 0.98
|
||||
};
|
||||
} catch (error) {
|
||||
console.error('Google Translate error:', error);
|
||||
throw error;
|
||||
}
|
||||
}
|
||||
|
||||
async detectLanguage(text) {
|
||||
if (!this.isInitialized) {
|
||||
throw new Error('Google Translate Provider not initialized');
|
||||
}
|
||||
|
||||
try {
|
||||
const params = new URLSearchParams({
|
||||
key: this.apiKey,
|
||||
q: text
|
||||
});
|
||||
|
||||
const response = await fetch(`https://translation.googleapis.com/language/translate/v2/detect?${params}`, {
|
||||
method: 'POST',
|
||||
headers: {
|
||||
'Content-Type': 'application/x-www-form-urlencoded'
|
||||
}
|
||||
});
|
||||
|
||||
if (!response.ok) {
|
||||
throw new Error(`Google Translate API error: ${response.status}`);
|
||||
}
|
||||
|
||||
const result = await response.json();
|
||||
const detection = result.data.detections[0][0];
|
||||
|
||||
return {
|
||||
language: detection.language,
|
||||
confidence: detection.confidence,
|
||||
provider: this.name
|
||||
};
|
||||
} catch (error) {
|
||||
console.error('Google language detection error:', error);
|
||||
|
||||
return {
|
||||
language: 'en',
|
||||
confidence: 0.5,
|
||||
provider: this.name
|
||||
};
|
||||
}
|
||||
}
|
||||
|
||||
_updateLatencyMetrics(latency) {
|
||||
this.latencyMetrics.requestCount++;
|
||||
this.latencyMetrics.lastLatency = latency;
|
||||
this.latencyMetrics.averageLatency
|
||||
= (this.latencyMetrics.averageLatency * (this.latencyMetrics.requestCount - 1) + latency)
|
||||
/ this.latencyMetrics.requestCount;
|
||||
}
|
||||
|
||||
getLatencyMetrics() {
|
||||
return { ...this.latencyMetrics };
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Microsoft Translator Provider.
|
||||
*/
|
||||
export class MicrosoftTranslatorProvider extends TranslationProvider {
|
||||
constructor(config = {}) {
|
||||
super('Microsoft Translator', config);
|
||||
this.apiKey = config.apiKey;
|
||||
this.region = config.region || 'eastus';
|
||||
this.baseUrl = 'https://api.cognitive.microsofttranslator.com';
|
||||
this.latencyMetrics = {
|
||||
averageLatency: 0,
|
||||
lastLatency: 0,
|
||||
requestCount: 0
|
||||
};
|
||||
this.supportedLanguages = [
|
||||
'en', 'es', 'fr', 'de', 'it', 'pt', 'ro', 'ru', 'ja', 'ko', 'zh',
|
||||
'ar', 'hi', 'tr', 'pl', 'nl', 'sv', 'da', 'no', 'fi'
|
||||
];
|
||||
}
|
||||
|
||||
async initialize() {
|
||||
if (!this.apiKey) {
|
||||
throw new Error('Microsoft Translator API key is required');
|
||||
}
|
||||
this.isInitialized = true;
|
||||
console.log('Microsoft Translator Provider initialized');
|
||||
}
|
||||
|
||||
async translate(text, sourceLang, targetLang) {
|
||||
if (!this.isInitialized) {
|
||||
throw new Error('Microsoft Translator Provider not initialized');
|
||||
}
|
||||
|
||||
const startTime = performance.now();
|
||||
|
||||
try {
|
||||
const response = await fetch(`${this.baseUrl}/translate?api-version=3.0&from=${sourceLang}&to=${targetLang}`, {
|
||||
method: 'POST',
|
||||
headers: {
|
||||
'Ocp-Apim-Subscription-Key': this.apiKey,
|
||||
'Ocp-Apim-Subscription-Region': this.region,
|
||||
'Content-Type': 'application/json'
|
||||
},
|
||||
body: JSON.stringify([ { text } ])
|
||||
});
|
||||
|
||||
if (!response.ok) {
|
||||
throw new Error(`Microsoft Translator API error: ${response.status}`);
|
||||
}
|
||||
|
||||
const result = await response.json();
|
||||
|
||||
const endTime = performance.now();
|
||||
const latency = endTime - startTime;
|
||||
|
||||
this._updateLatencyMetrics(latency);
|
||||
|
||||
return {
|
||||
translatedText: result[0].translations[0].text,
|
||||
sourceLang,
|
||||
targetLang,
|
||||
provider: this.name,
|
||||
latency,
|
||||
confidence: result[0].translations[0].confidence || 0.95
|
||||
};
|
||||
} catch (error) {
|
||||
console.error('Microsoft Translator error:', error);
|
||||
throw error;
|
||||
}
|
||||
}
|
||||
|
||||
async detectLanguage(text) {
|
||||
if (!this.isInitialized) {
|
||||
throw new Error('Microsoft Translator Provider not initialized');
|
||||
}
|
||||
|
||||
try {
|
||||
const response = await fetch(`${this.baseUrl}/detect?api-version=3.0`, {
|
||||
method: 'POST',
|
||||
headers: {
|
||||
'Ocp-Apim-Subscription-Key': this.apiKey,
|
||||
'Ocp-Apim-Subscription-Region': this.region,
|
||||
'Content-Type': 'application/json'
|
||||
},
|
||||
body: JSON.stringify([ { text } ])
|
||||
});
|
||||
|
||||
if (!response.ok) {
|
||||
throw new Error(`Microsoft Translator API error: ${response.status}`);
|
||||
}
|
||||
|
||||
const result = await response.json();
|
||||
|
||||
return {
|
||||
language: result[0].language,
|
||||
confidence: result[0].score,
|
||||
provider: this.name
|
||||
};
|
||||
} catch (error) {
|
||||
console.error('Microsoft language detection error:', error);
|
||||
|
||||
return {
|
||||
language: 'en',
|
||||
confidence: 0.5,
|
||||
provider: this.name
|
||||
};
|
||||
}
|
||||
}
|
||||
|
||||
_updateLatencyMetrics(latency) {
|
||||
this.latencyMetrics.requestCount++;
|
||||
this.latencyMetrics.lastLatency = latency;
|
||||
this.latencyMetrics.averageLatency
|
||||
= (this.latencyMetrics.averageLatency * (this.latencyMetrics.requestCount - 1) + latency)
|
||||
/ this.latencyMetrics.requestCount;
|
||||
}
|
||||
|
||||
getLatencyMetrics() {
|
||||
return { ...this.latencyMetrics };
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Translation Service Factory.
|
||||
*/
|
||||
export class TranslationProviderFactory {
|
||||
static create(providerName, config = {}) {
|
||||
switch (providerName.toLowerCase()) {
|
||||
case 'openai':
|
||||
return new OpenAITranslationProvider(config);
|
||||
case 'google':
|
||||
return new GoogleTranslateProvider(config);
|
||||
case 'microsoft':
|
||||
return new MicrosoftTranslatorProvider(config);
|
||||
default:
|
||||
throw new Error(`Unknown translation provider: ${providerName}`);
|
||||
}
|
||||
}
|
||||
|
||||
static getAvailableProviders() {
|
||||
return [
|
||||
'openai',
|
||||
'google',
|
||||
'microsoft'
|
||||
];
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Universal Translation Service
|
||||
* Orchestrates the complete translation pipeline.
|
||||
*/
|
||||
export class UniversalTranslationService {
|
||||
constructor(config = {}) {
|
||||
this.translationProvider = null;
|
||||
this.fallbackProvider = null;
|
||||
this.config = config;
|
||||
}
|
||||
|
||||
async initialize(primaryProvider, fallbackProvider = null) {
|
||||
this.translationProvider = TranslationProviderFactory.create(primaryProvider, this.config[primaryProvider]);
|
||||
await this.translationProvider.initialize();
|
||||
|
||||
if (fallbackProvider) {
|
||||
this.fallbackProvider = TranslationProviderFactory.create(fallbackProvider, this.config[fallbackProvider]);
|
||||
await this.fallbackProvider.initialize();
|
||||
}
|
||||
|
||||
console.log('Universal Translation Service initialized');
|
||||
}
|
||||
|
||||
async translateText(text, sourceLang, targetLang) {
|
||||
try {
|
||||
return await this.translationProvider.translate(text, sourceLang, targetLang);
|
||||
} catch (error) {
|
||||
console.warn('Primary translation provider failed, trying fallback:', error);
|
||||
|
||||
if (this.fallbackProvider) {
|
||||
try {
|
||||
return await this.fallbackProvider.translate(text, sourceLang, targetLang);
|
||||
} catch (fallbackError) {
|
||||
console.error('Fallback translation provider also failed:', fallbackError);
|
||||
throw fallbackError;
|
||||
}
|
||||
}
|
||||
|
||||
throw error;
|
||||
}
|
||||
}
|
||||
|
||||
async detectLanguage(text) {
|
||||
try {
|
||||
return await this.translationProvider.detectLanguage(text);
|
||||
} catch (error) {
|
||||
if (this.fallbackProvider) {
|
||||
try {
|
||||
return await this.fallbackProvider.detectLanguage(text);
|
||||
} catch (fallbackError) {
|
||||
console.error('Language detection failed on both providers');
|
||||
|
||||
return { language: 'en',
|
||||
confidence: 0.5,
|
||||
provider: 'fallback' };
|
||||
}
|
||||
}
|
||||
|
||||
return { language: 'en',
|
||||
confidence: 0.5,
|
||||
provider: 'fallback' };
|
||||
}
|
||||
}
|
||||
|
||||
getLatencyMetrics() {
|
||||
return {
|
||||
primary: this.translationProvider?.getLatencyMetrics(),
|
||||
fallback: this.fallbackProvider?.getLatencyMetrics()
|
||||
};
|
||||
}
|
||||
|
||||
getSupportedLanguages() {
|
||||
return this.translationProvider?.getSupportedLanguages() || [];
|
||||
}
|
||||
}
|
||||
569
react/features/universal-translator/services/tts-providers.ts
Normal file
569
react/features/universal-translator/services/tts-providers.ts
Normal file
@@ -0,0 +1,569 @@
|
||||
// @ts-nocheck
|
||||
/**
|
||||
* Text-to-Speech service providers for latency comparison
|
||||
* Supports multiple TTS services for benchmarking.
|
||||
*/
|
||||
|
||||
/**
|
||||
* Base TTS Provider class.
|
||||
*/
|
||||
export class TTSProvider {
|
||||
constructor(name, config = {}) {
|
||||
this.name = name;
|
||||
this.config = config;
|
||||
this.isInitialized = false;
|
||||
}
|
||||
|
||||
async initialize() {
|
||||
throw new Error('initialize() must be implemented by subclass');
|
||||
}
|
||||
|
||||
async synthesize(text, language = 'en', voice = null) {
|
||||
throw new Error('synthesize() must be implemented by subclass');
|
||||
}
|
||||
|
||||
getLatencyMetrics() {
|
||||
return {
|
||||
averageLatency: 0,
|
||||
lastLatency: 0,
|
||||
requestCount: 0
|
||||
};
|
||||
}
|
||||
|
||||
getAvailableVoices(language = 'en') {
|
||||
throw new Error('getAvailableVoices() must be implemented by subclass');
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Cartesia TTS Provider (Sonic).
|
||||
*/
|
||||
export class CartesiaTTSProvider extends TTSProvider {
|
||||
constructor(config = {}) {
|
||||
super('Cartesia Sonic', config);
|
||||
this.apiKey = config.apiKey;
|
||||
this.baseUrl = 'https://api.cartesia.ai/tts/bytes';
|
||||
this.model = config.model || 'sonic-english';
|
||||
this.latencyMetrics = {
|
||||
averageLatency: 0,
|
||||
lastLatency: 0,
|
||||
requestCount: 0
|
||||
};
|
||||
|
||||
// Updated with actual Cartesia voice IDs
|
||||
this.voiceMap = {
|
||||
'en': 'a0e99841-438c-4a64-b679-ae501e7d6091', // Barbershop Man
|
||||
'es': '846d6cb0-2301-48b6-9683-48f5618ea2f6', // Spanish voice
|
||||
'fr': 'f114a467-c40a-4db8-964d-aaba89cd08fa', // French voice
|
||||
'de': '2b568345-1d48-4047-b25f-7baccf842eb0', // German voice
|
||||
'ro': 'a0e99841-438c-4a64-b679-ae501e7d6091' // Romanian (using default voice - update if specific Romanian voice ID available)
|
||||
};
|
||||
}
|
||||
|
||||
async initialize() {
|
||||
if (!this.apiKey) {
|
||||
throw new Error('Cartesia API key is required');
|
||||
}
|
||||
this.isInitialized = true;
|
||||
console.log('Cartesia TTS Provider initialized');
|
||||
}
|
||||
|
||||
async synthesize(text, language = 'en', voice = null) {
|
||||
if (!this.isInitialized) {
|
||||
throw new Error('Cartesia TTS not initialized');
|
||||
}
|
||||
|
||||
const startTime = performance.now();
|
||||
|
||||
try {
|
||||
const voiceId = voice || this.voiceMap[language] || this.voiceMap.en;
|
||||
|
||||
const requestBody = {
|
||||
model_id: this.model,
|
||||
transcript: text,
|
||||
voice: {
|
||||
mode: 'id',
|
||||
id: voiceId
|
||||
},
|
||||
output_format: {
|
||||
container: 'wav',
|
||||
encoding: 'pcm_s16le',
|
||||
sample_rate: 22050
|
||||
}
|
||||
};
|
||||
|
||||
console.log('Cartesia TTS request:', {
|
||||
url: this.baseUrl,
|
||||
model: this.model,
|
||||
voice: voiceId,
|
||||
textLength: text.length
|
||||
});
|
||||
|
||||
const response = await fetch(this.baseUrl, {
|
||||
method: 'POST',
|
||||
headers: {
|
||||
'X-API-Key': this.apiKey,
|
||||
'Content-Type': 'application/json',
|
||||
'Cartesia-Version': '2024-06-10'
|
||||
},
|
||||
body: JSON.stringify(requestBody)
|
||||
});
|
||||
|
||||
if (!response.ok) {
|
||||
const errorText = await response.text();
|
||||
|
||||
console.error('Cartesia API error response:', errorText);
|
||||
throw new Error(`Cartesia API error: ${response.status} - ${errorText}`);
|
||||
}
|
||||
|
||||
const audioBlob = await response.blob();
|
||||
|
||||
const endTime = performance.now();
|
||||
const latency = endTime - startTime;
|
||||
|
||||
this._updateLatencyMetrics(latency);
|
||||
|
||||
return {
|
||||
audioBlob,
|
||||
provider: this.name,
|
||||
latency,
|
||||
language,
|
||||
voice: voiceId
|
||||
};
|
||||
} catch (error) {
|
||||
console.error('Cartesia TTS error:', error);
|
||||
throw error;
|
||||
}
|
||||
}
|
||||
|
||||
_updateLatencyMetrics(latency) {
|
||||
this.latencyMetrics.requestCount++;
|
||||
this.latencyMetrics.lastLatency = latency;
|
||||
this.latencyMetrics.averageLatency
|
||||
= (this.latencyMetrics.averageLatency * (this.latencyMetrics.requestCount - 1) + latency)
|
||||
/ this.latencyMetrics.requestCount;
|
||||
}
|
||||
|
||||
getLatencyMetrics() {
|
||||
return { ...this.latencyMetrics };
|
||||
}
|
||||
|
||||
getAvailableVoices(language = 'en') {
|
||||
return Object.keys(this.voiceMap);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* ElevenLabs TTS Provider.
|
||||
*/
|
||||
export class ElevenLabsTTSProvider extends TTSProvider {
|
||||
constructor(config = {}) {
|
||||
super('ElevenLabs', config);
|
||||
this.apiKey = config.apiKey;
|
||||
this.baseUrl = 'https://api.elevenlabs.io/v1/text-to-speech';
|
||||
this.latencyMetrics = {
|
||||
averageLatency: 0,
|
||||
lastLatency: 0,
|
||||
requestCount: 0
|
||||
};
|
||||
this.voiceMap = {
|
||||
'en': 'EXAVITQu4vr4xnSDxMaL', // Bella - English
|
||||
'es': '9BWtsMINqrJLrRacOk9x', // Spanish voice
|
||||
'fr': 'Xb7hH8MSUJpSbSDYk0k2', // French voice
|
||||
'de': 'N2lVS1w4EtoT3dr4eOWO' // German voice
|
||||
};
|
||||
}
|
||||
|
||||
async initialize() {
|
||||
if (!this.apiKey) {
|
||||
throw new Error('ElevenLabs API key is required');
|
||||
}
|
||||
this.isInitialized = true;
|
||||
console.log('ElevenLabs TTS Provider initialized');
|
||||
}
|
||||
|
||||
async synthesize(text, language = 'en', voice = null) {
|
||||
if (!this.isInitialized) {
|
||||
throw new Error('ElevenLabs TTS not initialized');
|
||||
}
|
||||
|
||||
const startTime = performance.now();
|
||||
|
||||
try {
|
||||
const voiceId = voice || this.voiceMap[language] || this.voiceMap.en;
|
||||
|
||||
const response = await fetch(`${this.baseUrl}/${voiceId}`, {
|
||||
method: 'POST',
|
||||
headers: {
|
||||
'Accept': 'audio/mpeg',
|
||||
'Content-Type': 'application/json',
|
||||
'xi-api-key': this.apiKey
|
||||
},
|
||||
body: JSON.stringify({
|
||||
text,
|
||||
model_id: 'eleven_multilingual_v2',
|
||||
voice_settings: {
|
||||
stability: 0.5,
|
||||
similarity_boost: 0.75
|
||||
}
|
||||
})
|
||||
});
|
||||
|
||||
if (!response.ok) {
|
||||
throw new Error(`ElevenLabs API error: ${response.status}`);
|
||||
}
|
||||
|
||||
const audioBlob = await response.blob();
|
||||
|
||||
const endTime = performance.now();
|
||||
const latency = endTime - startTime;
|
||||
|
||||
this._updateLatencyMetrics(latency);
|
||||
|
||||
return {
|
||||
audioBlob,
|
||||
provider: this.name,
|
||||
latency,
|
||||
language,
|
||||
voice: voiceId
|
||||
};
|
||||
} catch (error) {
|
||||
console.error('ElevenLabs TTS error:', error);
|
||||
throw error;
|
||||
}
|
||||
}
|
||||
|
||||
_updateLatencyMetrics(latency) {
|
||||
this.latencyMetrics.requestCount++;
|
||||
this.latencyMetrics.lastLatency = latency;
|
||||
this.latencyMetrics.averageLatency
|
||||
= (this.latencyMetrics.averageLatency * (this.latencyMetrics.requestCount - 1) + latency)
|
||||
/ this.latencyMetrics.requestCount;
|
||||
}
|
||||
|
||||
getLatencyMetrics() {
|
||||
return { ...this.latencyMetrics };
|
||||
}
|
||||
|
||||
getAvailableVoices(language = 'en') {
|
||||
return Object.keys(this.voiceMap);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Azure Speech TTS Provider.
|
||||
*/
|
||||
export class AzureTTSProvider extends TTSProvider {
|
||||
constructor(config = {}) {
|
||||
super('Azure Speech', config);
|
||||
this.apiKey = config.apiKey;
|
||||
this.region = config.region || 'eastus';
|
||||
this.baseUrl = `https://${this.region}.tts.speech.microsoft.com/cognitiveservices/v1`;
|
||||
this.latencyMetrics = {
|
||||
averageLatency: 0,
|
||||
lastLatency: 0,
|
||||
requestCount: 0
|
||||
};
|
||||
this.voiceMap = {
|
||||
'en': 'en-US-JennyNeural',
|
||||
'es': 'es-ES-ElviraNeural',
|
||||
'fr': 'fr-FR-DeniseNeural',
|
||||
'de': 'de-DE-KatjaNeural'
|
||||
};
|
||||
}
|
||||
|
||||
async initialize() {
|
||||
if (!this.apiKey) {
|
||||
throw new Error('Azure Speech API key is required');
|
||||
}
|
||||
this.isInitialized = true;
|
||||
console.log('Azure TTS Provider initialized');
|
||||
}
|
||||
|
||||
async synthesize(text, language = 'en', voice = null) {
|
||||
if (!this.isInitialized) {
|
||||
throw new Error('Azure TTS not initialized');
|
||||
}
|
||||
|
||||
const startTime = performance.now();
|
||||
|
||||
try {
|
||||
const voiceName = voice || this.voiceMap[language] || this.voiceMap.en;
|
||||
|
||||
const ssml = `
|
||||
<speak version='1.0' xml:lang='${language}'>
|
||||
<voice xml:lang='${language}' name='${voiceName}'>
|
||||
${text}
|
||||
</voice>
|
||||
</speak>
|
||||
`;
|
||||
|
||||
const response = await fetch(this.baseUrl, {
|
||||
method: 'POST',
|
||||
headers: {
|
||||
'Ocp-Apim-Subscription-Key': this.apiKey,
|
||||
'Content-Type': 'application/ssml+xml',
|
||||
'X-Microsoft-OutputFormat': 'riff-16khz-16bit-mono-pcm'
|
||||
},
|
||||
body: ssml
|
||||
});
|
||||
|
||||
if (!response.ok) {
|
||||
throw new Error(`Azure TTS API error: ${response.status}`);
|
||||
}
|
||||
|
||||
const audioBlob = await response.blob();
|
||||
|
||||
const endTime = performance.now();
|
||||
const latency = endTime - startTime;
|
||||
|
||||
this._updateLatencyMetrics(latency);
|
||||
|
||||
return {
|
||||
audioBlob,
|
||||
provider: this.name,
|
||||
latency,
|
||||
language,
|
||||
voice: voiceName
|
||||
};
|
||||
} catch (error) {
|
||||
console.error('Azure TTS error:', error);
|
||||
throw error;
|
||||
}
|
||||
}
|
||||
|
||||
_updateLatencyMetrics(latency) {
|
||||
this.latencyMetrics.requestCount++;
|
||||
this.latencyMetrics.lastLatency = latency;
|
||||
this.latencyMetrics.averageLatency
|
||||
= (this.latencyMetrics.averageLatency * (this.latencyMetrics.requestCount - 1) + latency)
|
||||
/ this.latencyMetrics.requestCount;
|
||||
}
|
||||
|
||||
getLatencyMetrics() {
|
||||
return { ...this.latencyMetrics };
|
||||
}
|
||||
|
||||
getAvailableVoices(language = 'en') {
|
||||
return Object.keys(this.voiceMap);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Deepgram TTS Provider (Aura).
|
||||
*/
|
||||
export class DeepgramTTSProvider extends TTSProvider {
|
||||
constructor(config = {}) {
|
||||
super('Deepgram Aura', config);
|
||||
this.apiKey = config.apiKey;
|
||||
this.baseUrl = 'https://api.deepgram.com/v1/speak';
|
||||
this.latencyMetrics = {
|
||||
averageLatency: 0,
|
||||
lastLatency: 0,
|
||||
requestCount: 0
|
||||
};
|
||||
this.voiceMap = {
|
||||
'en': 'aura-asteria-en',
|
||||
'es': 'aura-luna-es',
|
||||
'fr': 'aura-stella-fr',
|
||||
'de': 'aura-hera-de'
|
||||
};
|
||||
}
|
||||
|
||||
async initialize() {
|
||||
if (!this.apiKey) {
|
||||
throw new Error('Deepgram API key is required');
|
||||
}
|
||||
this.isInitialized = true;
|
||||
console.log('Deepgram TTS Provider initialized');
|
||||
}
|
||||
|
||||
async synthesize(text, language = 'en', voice = null) {
|
||||
if (!this.isInitialized) {
|
||||
throw new Error('Deepgram TTS not initialized');
|
||||
}
|
||||
|
||||
const startTime = performance.now();
|
||||
|
||||
try {
|
||||
const voiceName = voice || this.voiceMap[language] || this.voiceMap.en;
|
||||
|
||||
const response = await fetch(`${this.baseUrl}?model=${voiceName}`, {
|
||||
method: 'POST',
|
||||
headers: {
|
||||
'Authorization': `Token ${this.apiKey}`,
|
||||
'Content-Type': 'application/json'
|
||||
},
|
||||
body: JSON.stringify({
|
||||
text
|
||||
})
|
||||
});
|
||||
|
||||
if (!response.ok) {
|
||||
throw new Error(`Deepgram TTS API error: ${response.status}`);
|
||||
}
|
||||
|
||||
const audioBlob = await response.blob();
|
||||
|
||||
const endTime = performance.now();
|
||||
const latency = endTime - startTime;
|
||||
|
||||
this._updateLatencyMetrics(latency);
|
||||
|
||||
return {
|
||||
audioBlob,
|
||||
provider: this.name,
|
||||
latency,
|
||||
language,
|
||||
voice: voiceName
|
||||
};
|
||||
} catch (error) {
|
||||
console.error('Deepgram TTS error:', error);
|
||||
throw error;
|
||||
}
|
||||
}
|
||||
|
||||
_updateLatencyMetrics(latency) {
|
||||
this.latencyMetrics.requestCount++;
|
||||
this.latencyMetrics.lastLatency = latency;
|
||||
this.latencyMetrics.averageLatency
|
||||
= (this.latencyMetrics.averageLatency * (this.latencyMetrics.requestCount - 1) + latency)
|
||||
/ this.latencyMetrics.requestCount;
|
||||
}
|
||||
|
||||
getLatencyMetrics() {
|
||||
return { ...this.latencyMetrics };
|
||||
}
|
||||
|
||||
getAvailableVoices(language = 'en') {
|
||||
return Object.keys(this.voiceMap);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Browser Web Speech API TTS Provider (Fallback).
|
||||
*/
|
||||
export class WebSpeechTTSProvider extends TTSProvider {
|
||||
constructor(config = {}) {
|
||||
super('Web Speech API', config);
|
||||
this.speechSynthesis = window.speechSynthesis;
|
||||
this.latencyMetrics = {
|
||||
averageLatency: 0,
|
||||
lastLatency: 0,
|
||||
requestCount: 0
|
||||
};
|
||||
}
|
||||
|
||||
async initialize() {
|
||||
if (!this.speechSynthesis) {
|
||||
throw new Error('Web Speech API not supported');
|
||||
}
|
||||
this.isInitialized = true;
|
||||
console.log('Web Speech TTS Provider initialized');
|
||||
}
|
||||
|
||||
async synthesize(text, language = 'en', voice = null) {
|
||||
if (!this.isInitialized) {
|
||||
throw new Error('Web Speech TTS not initialized');
|
||||
}
|
||||
|
||||
const startTime = performance.now();
|
||||
|
||||
return new Promise((resolve, reject) => {
|
||||
try {
|
||||
const utterance = new SpeechSynthesisUtterance(text);
|
||||
|
||||
utterance.lang = language;
|
||||
|
||||
if (voice) {
|
||||
const voices = this.speechSynthesis.getVoices();
|
||||
const selectedVoice = voices.find(v => v.name === voice || v.lang.startsWith(language));
|
||||
|
||||
if (selectedVoice) {
|
||||
utterance.voice = selectedVoice;
|
||||
}
|
||||
}
|
||||
|
||||
utterance.onend = () => {
|
||||
const endTime = performance.now();
|
||||
const latency = endTime - startTime;
|
||||
|
||||
this._updateLatencyMetrics(latency);
|
||||
|
||||
// Note: Web Speech API doesn't provide audio blob directly
|
||||
resolve({
|
||||
audioBlob: null,
|
||||
provider: this.name,
|
||||
latency,
|
||||
language,
|
||||
voice: utterance.voice?.name || 'default'
|
||||
});
|
||||
};
|
||||
|
||||
utterance.onerror = error => {
|
||||
reject(new Error(`Web Speech TTS error: ${error.error}`));
|
||||
};
|
||||
|
||||
this.speechSynthesis.speak(utterance);
|
||||
} catch (error) {
|
||||
reject(error);
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
_updateLatencyMetrics(latency) {
|
||||
this.latencyMetrics.requestCount++;
|
||||
this.latencyMetrics.lastLatency = latency;
|
||||
this.latencyMetrics.averageLatency
|
||||
= (this.latencyMetrics.averageLatency * (this.latencyMetrics.requestCount - 1) + latency)
|
||||
/ this.latencyMetrics.requestCount;
|
||||
}
|
||||
|
||||
getLatencyMetrics() {
|
||||
return { ...this.latencyMetrics };
|
||||
}
|
||||
|
||||
getAvailableVoices(language = 'en') {
|
||||
if (!this.speechSynthesis) {
|
||||
return [];
|
||||
}
|
||||
|
||||
const voices = this.speechSynthesis.getVoices();
|
||||
|
||||
return voices
|
||||
.filter(voice => voice.lang.startsWith(language))
|
||||
.map(voice => voice.name);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* TTS Provider Factory.
|
||||
*/
|
||||
export class TTSProviderFactory {
|
||||
static create(providerName, config = {}) {
|
||||
switch (providerName.toLowerCase()) {
|
||||
case 'cartesia':
|
||||
return new CartesiaTTSProvider(config);
|
||||
case 'elevenlabs':
|
||||
return new ElevenLabsTTSProvider(config);
|
||||
case 'azure':
|
||||
return new AzureTTSProvider(config);
|
||||
case 'deepgram':
|
||||
return new DeepgramTTSProvider(config);
|
||||
case 'webspeech':
|
||||
return new WebSpeechTTSProvider(config);
|
||||
default:
|
||||
throw new Error(`Unknown TTS provider: ${providerName}`);
|
||||
}
|
||||
}
|
||||
|
||||
static getAvailableProviders() {
|
||||
return [
|
||||
'cartesia',
|
||||
'elevenlabs',
|
||||
'azure',
|
||||
'deepgram',
|
||||
'webspeech'
|
||||
];
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user