cheat-exam/src/utils/openai-realtime.js
Илья Глазунов beb9034cd4 initial commit
2026-01-14 22:57:19 +03:00

403 lines
14 KiB
JavaScript

const { BrowserWindow } = require('electron');
const WebSocket = require('ws');
// OpenAI Realtime API implementation
// Documentation: https://platform.openai.com/docs/api-reference/realtime
let ws = null;
let isUserClosing = false;
let sessionParams = null;
let reconnectAttempts = 0;
const MAX_RECONNECT_ATTEMPTS = 3;
const RECONNECT_DELAY = 2000;
// Message buffer for accumulating responses
let messageBuffer = '';
let currentTranscription = '';
function sendToRenderer(channel, data) {
const windows = BrowserWindow.getAllWindows();
if (windows.length > 0) {
windows[0].webContents.send(channel, data);
}
}
function buildContextMessage(conversationHistory) {
const lastTurns = conversationHistory.slice(-20);
const validTurns = lastTurns.filter(turn => turn.transcription?.trim() && turn.ai_response?.trim());
if (validTurns.length === 0) return null;
const contextLines = validTurns.map(turn => `User: ${turn.transcription.trim()}\nAssistant: ${turn.ai_response.trim()}`);
return `Session reconnected. Here's the conversation so far:\n\n${contextLines.join('\n\n')}\n\nContinue from here.`;
}
async function initializeOpenAISession(config, conversationHistory = []) {
const { apiKey, baseUrl, systemPrompt, model, language, isReconnect } = config;
if (!isReconnect) {
sessionParams = config;
reconnectAttempts = 0;
sendToRenderer('session-initializing', true);
}
// Use custom baseURL or default OpenAI endpoint
const wsUrl = baseUrl || 'wss://api.openai.com/v1/realtime';
const fullUrl = `${wsUrl}?model=${model || 'gpt-4o-realtime-preview-2024-12-17'}`;
return new Promise((resolve, reject) => {
try {
ws = new WebSocket(fullUrl, {
headers: {
Authorization: `Bearer ${apiKey}`,
'OpenAI-Beta': 'realtime=v1',
},
});
ws.on('open', () => {
console.log('OpenAI Realtime connection established');
// Configure session
const sessionConfig = {
type: 'session.update',
session: {
modalities: ['text', 'audio'],
instructions: systemPrompt,
voice: 'alloy',
input_audio_format: 'pcm16',
output_audio_format: 'pcm16',
input_audio_transcription: {
model: 'whisper-1',
},
turn_detection: {
type: 'server_vad',
threshold: 0.5,
prefix_padding_ms: 300,
silence_duration_ms: 500,
},
temperature: 0.8,
max_response_output_tokens: 4096,
},
};
ws.send(JSON.stringify(sessionConfig));
// Restore context if reconnecting
if (isReconnect && conversationHistory.length > 0) {
const contextMessage = buildContextMessage(conversationHistory);
if (contextMessage) {
ws.send(
JSON.stringify({
type: 'conversation.item.create',
item: {
type: 'message',
role: 'user',
content: [{ type: 'input_text', text: contextMessage }],
},
})
);
ws.send(JSON.stringify({ type: 'response.create' }));
}
}
sendToRenderer('update-status', 'Connected to OpenAI');
if (!isReconnect) {
sendToRenderer('session-initializing', false);
}
resolve(ws);
});
ws.on('message', data => {
try {
const event = JSON.parse(data.toString());
handleOpenAIEvent(event);
} catch (error) {
console.error('Error parsing OpenAI message:', error);
}
});
ws.on('error', error => {
console.error('OpenAI WebSocket error:', error);
sendToRenderer('update-status', 'Error: ' + error.message);
reject(error);
});
ws.on('close', (code, reason) => {
console.log(`OpenAI WebSocket closed: ${code} - ${reason}`);
if (isUserClosing) {
isUserClosing = false;
sendToRenderer('update-status', 'Session closed');
return;
}
// Attempt reconnection
if (sessionParams && reconnectAttempts < MAX_RECONNECT_ATTEMPTS) {
attemptReconnect(conversationHistory);
} else {
sendToRenderer('update-status', 'Session closed');
}
});
} catch (error) {
console.error('Failed to initialize OpenAI session:', error);
if (!isReconnect) {
sendToRenderer('session-initializing', false);
}
reject(error);
}
});
}
function handleOpenAIEvent(event) {
console.log('OpenAI event:', event.type);
switch (event.type) {
case 'session.created':
console.log('Session created:', event.session.id);
break;
case 'session.updated':
console.log('Session updated');
sendToRenderer('update-status', 'Listening...');
break;
case 'input_audio_buffer.speech_started':
console.log('Speech started');
break;
case 'input_audio_buffer.speech_stopped':
console.log('Speech stopped');
break;
case 'conversation.item.input_audio_transcription.completed':
if (event.transcript) {
currentTranscription += event.transcript;
console.log('Transcription:', event.transcript);
}
break;
case 'response.audio_transcript.delta':
if (event.delta) {
const isNewResponse = messageBuffer === '';
messageBuffer += event.delta;
sendToRenderer(isNewResponse ? 'new-response' : 'update-response', messageBuffer);
}
break;
case 'response.audio_transcript.done':
console.log('Audio transcript complete');
break;
case 'response.text.delta':
if (event.delta) {
const isNewResponse = messageBuffer === '';
messageBuffer += event.delta;
sendToRenderer(isNewResponse ? 'new-response' : 'update-response', messageBuffer);
}
break;
case 'response.done':
if (messageBuffer.trim() !== '') {
sendToRenderer('update-response', messageBuffer);
// Send conversation turn to be saved
if (currentTranscription) {
sendToRenderer('save-conversation-turn-data', {
transcription: currentTranscription,
response: messageBuffer,
});
currentTranscription = '';
}
}
messageBuffer = '';
sendToRenderer('update-status', 'Listening...');
break;
case 'error':
console.error('OpenAI error:', event.error);
sendToRenderer('update-status', 'Error: ' + event.error.message);
break;
default:
// console.log('Unhandled event type:', event.type);
break;
}
}
async function attemptReconnect(conversationHistory) {
reconnectAttempts++;
console.log(`Reconnection attempt ${reconnectAttempts}/${MAX_RECONNECT_ATTEMPTS}`);
messageBuffer = '';
currentTranscription = '';
sendToRenderer('update-status', `Reconnecting... (${reconnectAttempts}/${MAX_RECONNECT_ATTEMPTS})`);
await new Promise(resolve => setTimeout(resolve, RECONNECT_DELAY));
try {
const newConfig = { ...sessionParams, isReconnect: true };
ws = await initializeOpenAISession(newConfig, conversationHistory);
sendToRenderer('update-status', 'Reconnected! Listening...');
console.log('OpenAI session reconnected successfully');
return true;
} catch (error) {
console.error(`Reconnection attempt ${reconnectAttempts} failed:`, error);
if (reconnectAttempts < MAX_RECONNECT_ATTEMPTS) {
return attemptReconnect(conversationHistory);
}
console.log('Max reconnection attempts reached');
sendToRenderer('reconnect-failed', {
message: 'Tried 3 times to reconnect to OpenAI. Check your connection and API key.',
});
sessionParams = null;
return false;
}
}
async function sendAudioToOpenAI(base64Data) {
if (!ws || ws.readyState !== WebSocket.OPEN) {
console.error('WebSocket not connected');
return { success: false, error: 'No active connection' };
}
try {
ws.send(
JSON.stringify({
type: 'input_audio_buffer.append',
audio: base64Data,
})
);
return { success: true };
} catch (error) {
console.error('Error sending audio to OpenAI:', error);
return { success: false, error: error.message };
}
}
async function sendTextToOpenAI(text) {
if (!ws || ws.readyState !== WebSocket.OPEN) {
console.error('WebSocket not connected');
return { success: false, error: 'No active connection' };
}
try {
// Create a conversation item with user text
ws.send(
JSON.stringify({
type: 'conversation.item.create',
item: {
type: 'message',
role: 'user',
content: [{ type: 'input_text', text: text }],
},
})
);
// Trigger response generation
ws.send(JSON.stringify({ type: 'response.create' }));
return { success: true };
} catch (error) {
console.error('Error sending text to OpenAI:', error);
return { success: false, error: error.message };
}
}
async function sendImageToOpenAI(base64Data, prompt, config) {
const { apiKey, baseUrl, model } = config;
// OpenAI doesn't support images in Realtime API yet, use standard Chat Completions
const apiEndpoint = baseUrl ? `${baseUrl.replace('wss://', 'https://').replace('/v1/realtime', '')}/v1/chat/completions` : 'https://api.openai.com/v1/chat/completions';
try {
const response = await fetch(apiEndpoint, {
method: 'POST',
headers: {
'Content-Type': 'application/json',
Authorization: `Bearer ${apiKey}`,
},
body: JSON.stringify({
model: model || 'gpt-4o',
messages: [
{
role: 'user',
content: [
{ type: 'text', text: prompt },
{
type: 'image_url',
image_url: {
url: `data:image/jpeg;base64,${base64Data}`,
},
},
],
},
],
max_tokens: 4096,
stream: true,
}),
});
if (!response.ok) {
const error = await response.text();
throw new Error(`OpenAI API error: ${response.status} - ${error}`);
}
const reader = response.body.getReader();
const decoder = new TextDecoder();
let fullText = '';
let isFirst = true;
while (true) {
const { done, value } = await reader.read();
if (done) break;
const chunk = decoder.decode(value);
const lines = chunk.split('\n').filter(line => line.trim().startsWith('data: '));
for (const line of lines) {
const data = line.replace('data: ', '');
if (data === '[DONE]') continue;
try {
const json = JSON.parse(data);
const content = json.choices[0]?.delta?.content;
if (content) {
fullText += content;
sendToRenderer(isFirst ? 'new-response' : 'update-response', fullText);
isFirst = false;
}
} catch (e) {
// Skip invalid JSON
}
}
}
return { success: true, text: fullText, model: model || 'gpt-4o' };
} catch (error) {
console.error('Error sending image to OpenAI:', error);
return { success: false, error: error.message };
}
}
function closeOpenAISession() {
isUserClosing = true;
sessionParams = null;
if (ws) {
ws.close();
ws = null;
}
}
module.exports = {
initializeOpenAISession,
sendAudioToOpenAI,
sendTextToOpenAI,
sendImageToOpenAI,
closeOpenAISession,
};