diff --git a/src/index.js b/src/index.js index 0bfed4a..e301bb5 100644 --- a/src/index.js +++ b/src/index.js @@ -1,299 +1,336 @@ -if (require('electron-squirrel-startup')) { - process.exit(0); +if (require("electron-squirrel-startup")) { + process.exit(0); } -const { app, BrowserWindow, shell, ipcMain } = require('electron'); -const { createWindow, updateGlobalShortcuts } = require('./utils/window'); -const { setupGeminiIpcHandlers, stopMacOSAudioCapture, sendToRenderer } = require('./utils/gemini'); -const storage = require('./storage'); +// ── Global crash handlers to prevent silent process termination ── +process.on("uncaughtException", (error) => { + console.error("[FATAL] Uncaught exception:", error); + try { + const { sendToRenderer } = require("./utils/gemini"); + sendToRenderer( + "update-status", + "Fatal error: " + (error?.message || "unknown"), + ); + } catch (_) { + // sendToRenderer may not be available yet + } +}); + +process.on("unhandledRejection", (reason) => { + console.error("[FATAL] Unhandled promise rejection:", reason); + try { + const { sendToRenderer } = require("./utils/gemini"); + sendToRenderer( + "update-status", + "Unhandled error: " + + (reason instanceof Error ? reason.message : String(reason)), + ); + } catch (_) { + // sendToRenderer may not be available yet + } +}); + +const { app, BrowserWindow, shell, ipcMain } = require("electron"); +const { createWindow, updateGlobalShortcuts } = require("./utils/window"); +const { + setupGeminiIpcHandlers, + stopMacOSAudioCapture, + sendToRenderer, +} = require("./utils/gemini"); +const storage = require("./storage"); const geminiSessionRef = { current: null }; let mainWindow = null; function createMainWindow() { - mainWindow = createWindow(sendToRenderer, geminiSessionRef); - return mainWindow; + mainWindow = createWindow(sendToRenderer, geminiSessionRef); + return mainWindow; } app.whenReady().then(async () => { - // Initialize storage (checks version, resets if needed) - storage.initializeStorage(); + // Initialize storage (checks version, resets if needed) + storage.initializeStorage(); - // Trigger screen recording permission prompt on macOS if not already granted - if (process.platform === 'darwin') { - const { desktopCapturer } = require('electron'); - desktopCapturer.getSources({ types: ['screen'] }).catch(() => {}); - } + // Trigger screen recording permission prompt on macOS if not already granted + if (process.platform === "darwin") { + const { desktopCapturer } = require("electron"); + desktopCapturer.getSources({ types: ["screen"] }).catch(() => {}); + } + createMainWindow(); + setupGeminiIpcHandlers(geminiSessionRef); + setupStorageIpcHandlers(); + setupGeneralIpcHandlers(); +}); + +app.on("window-all-closed", () => { + stopMacOSAudioCapture(); + if (process.platform !== "darwin") { + app.quit(); + } +}); + +app.on("before-quit", () => { + stopMacOSAudioCapture(); +}); + +app.on("activate", () => { + if (BrowserWindow.getAllWindows().length === 0) { createMainWindow(); - setupGeminiIpcHandlers(geminiSessionRef); - setupStorageIpcHandlers(); - setupGeneralIpcHandlers(); -}); - -app.on('window-all-closed', () => { - stopMacOSAudioCapture(); - if (process.platform !== 'darwin') { - app.quit(); - } -}); - -app.on('before-quit', () => { - stopMacOSAudioCapture(); -}); - -app.on('activate', () => { - if (BrowserWindow.getAllWindows().length === 0) { - createMainWindow(); - } + } }); function setupStorageIpcHandlers() { - // ============ CONFIG ============ - ipcMain.handle('storage:get-config', async () => { - try { - return { success: true, data: storage.getConfig() }; - } catch (error) { - console.error('Error getting config:', error); - return { success: false, error: error.message }; - } - }); + // ============ CONFIG ============ + ipcMain.handle("storage:get-config", async () => { + try { + return { success: true, data: storage.getConfig() }; + } catch (error) { + console.error("Error getting config:", error); + return { success: false, error: error.message }; + } + }); - ipcMain.handle('storage:set-config', async (event, config) => { - try { - storage.setConfig(config); - return { success: true }; - } catch (error) { - console.error('Error setting config:', error); - return { success: false, error: error.message }; - } - }); + ipcMain.handle("storage:set-config", async (event, config) => { + try { + storage.setConfig(config); + return { success: true }; + } catch (error) { + console.error("Error setting config:", error); + return { success: false, error: error.message }; + } + }); - ipcMain.handle('storage:update-config', async (event, key, value) => { - try { - storage.updateConfig(key, value); - return { success: true }; - } catch (error) { - console.error('Error updating config:', error); - return { success: false, error: error.message }; - } - }); + ipcMain.handle("storage:update-config", async (event, key, value) => { + try { + storage.updateConfig(key, value); + return { success: true }; + } catch (error) { + console.error("Error updating config:", error); + return { success: false, error: error.message }; + } + }); - // ============ CREDENTIALS ============ - ipcMain.handle('storage:get-credentials', async () => { - try { - return { success: true, data: storage.getCredentials() }; - } catch (error) { - console.error('Error getting credentials:', error); - return { success: false, error: error.message }; - } - }); + // ============ CREDENTIALS ============ + ipcMain.handle("storage:get-credentials", async () => { + try { + return { success: true, data: storage.getCredentials() }; + } catch (error) { + console.error("Error getting credentials:", error); + return { success: false, error: error.message }; + } + }); - ipcMain.handle('storage:set-credentials', async (event, credentials) => { - try { - storage.setCredentials(credentials); - return { success: true }; - } catch (error) { - console.error('Error setting credentials:', error); - return { success: false, error: error.message }; - } - }); + ipcMain.handle("storage:set-credentials", async (event, credentials) => { + try { + storage.setCredentials(credentials); + return { success: true }; + } catch (error) { + console.error("Error setting credentials:", error); + return { success: false, error: error.message }; + } + }); - ipcMain.handle('storage:get-api-key', async () => { - try { - return { success: true, data: storage.getApiKey() }; - } catch (error) { - console.error('Error getting API key:', error); - return { success: false, error: error.message }; - } - }); + ipcMain.handle("storage:get-api-key", async () => { + try { + return { success: true, data: storage.getApiKey() }; + } catch (error) { + console.error("Error getting API key:", error); + return { success: false, error: error.message }; + } + }); - ipcMain.handle('storage:set-api-key', async (event, apiKey) => { - try { - storage.setApiKey(apiKey); - return { success: true }; - } catch (error) { - console.error('Error setting API key:', error); - return { success: false, error: error.message }; - } - }); + ipcMain.handle("storage:set-api-key", async (event, apiKey) => { + try { + storage.setApiKey(apiKey); + return { success: true }; + } catch (error) { + console.error("Error setting API key:", error); + return { success: false, error: error.message }; + } + }); - ipcMain.handle('storage:get-groq-api-key', async () => { - try { - return { success: true, data: storage.getGroqApiKey() }; - } catch (error) { - console.error('Error getting Groq API key:', error); - return { success: false, error: error.message }; - } - }); + ipcMain.handle("storage:get-groq-api-key", async () => { + try { + return { success: true, data: storage.getGroqApiKey() }; + } catch (error) { + console.error("Error getting Groq API key:", error); + return { success: false, error: error.message }; + } + }); - ipcMain.handle('storage:set-groq-api-key', async (event, groqApiKey) => { - try { - storage.setGroqApiKey(groqApiKey); - return { success: true }; - } catch (error) { - console.error('Error setting Groq API key:', error); - return { success: false, error: error.message }; - } - }); + ipcMain.handle("storage:set-groq-api-key", async (event, groqApiKey) => { + try { + storage.setGroqApiKey(groqApiKey); + return { success: true }; + } catch (error) { + console.error("Error setting Groq API key:", error); + return { success: false, error: error.message }; + } + }); - // ============ PREFERENCES ============ - ipcMain.handle('storage:get-preferences', async () => { - try { - return { success: true, data: storage.getPreferences() }; - } catch (error) { - console.error('Error getting preferences:', error); - return { success: false, error: error.message }; - } - }); + // ============ PREFERENCES ============ + ipcMain.handle("storage:get-preferences", async () => { + try { + return { success: true, data: storage.getPreferences() }; + } catch (error) { + console.error("Error getting preferences:", error); + return { success: false, error: error.message }; + } + }); - ipcMain.handle('storage:set-preferences', async (event, preferences) => { - try { - storage.setPreferences(preferences); - return { success: true }; - } catch (error) { - console.error('Error setting preferences:', error); - return { success: false, error: error.message }; - } - }); + ipcMain.handle("storage:set-preferences", async (event, preferences) => { + try { + storage.setPreferences(preferences); + return { success: true }; + } catch (error) { + console.error("Error setting preferences:", error); + return { success: false, error: error.message }; + } + }); - ipcMain.handle('storage:update-preference', async (event, key, value) => { - try { - storage.updatePreference(key, value); - return { success: true }; - } catch (error) { - console.error('Error updating preference:', error); - return { success: false, error: error.message }; - } - }); + ipcMain.handle("storage:update-preference", async (event, key, value) => { + try { + storage.updatePreference(key, value); + return { success: true }; + } catch (error) { + console.error("Error updating preference:", error); + return { success: false, error: error.message }; + } + }); - // ============ KEYBINDS ============ - ipcMain.handle('storage:get-keybinds', async () => { - try { - return { success: true, data: storage.getKeybinds() }; - } catch (error) { - console.error('Error getting keybinds:', error); - return { success: false, error: error.message }; - } - }); + // ============ KEYBINDS ============ + ipcMain.handle("storage:get-keybinds", async () => { + try { + return { success: true, data: storage.getKeybinds() }; + } catch (error) { + console.error("Error getting keybinds:", error); + return { success: false, error: error.message }; + } + }); - ipcMain.handle('storage:set-keybinds', async (event, keybinds) => { - try { - storage.setKeybinds(keybinds); - return { success: true }; - } catch (error) { - console.error('Error setting keybinds:', error); - return { success: false, error: error.message }; - } - }); + ipcMain.handle("storage:set-keybinds", async (event, keybinds) => { + try { + storage.setKeybinds(keybinds); + return { success: true }; + } catch (error) { + console.error("Error setting keybinds:", error); + return { success: false, error: error.message }; + } + }); - // ============ HISTORY ============ - ipcMain.handle('storage:get-all-sessions', async () => { - try { - return { success: true, data: storage.getAllSessions() }; - } catch (error) { - console.error('Error getting sessions:', error); - return { success: false, error: error.message }; - } - }); + // ============ HISTORY ============ + ipcMain.handle("storage:get-all-sessions", async () => { + try { + return { success: true, data: storage.getAllSessions() }; + } catch (error) { + console.error("Error getting sessions:", error); + return { success: false, error: error.message }; + } + }); - ipcMain.handle('storage:get-session', async (event, sessionId) => { - try { - return { success: true, data: storage.getSession(sessionId) }; - } catch (error) { - console.error('Error getting session:', error); - return { success: false, error: error.message }; - } - }); + ipcMain.handle("storage:get-session", async (event, sessionId) => { + try { + return { success: true, data: storage.getSession(sessionId) }; + } catch (error) { + console.error("Error getting session:", error); + return { success: false, error: error.message }; + } + }); - ipcMain.handle('storage:save-session', async (event, sessionId, data) => { - try { - storage.saveSession(sessionId, data); - return { success: true }; - } catch (error) { - console.error('Error saving session:', error); - return { success: false, error: error.message }; - } - }); + ipcMain.handle("storage:save-session", async (event, sessionId, data) => { + try { + storage.saveSession(sessionId, data); + return { success: true }; + } catch (error) { + console.error("Error saving session:", error); + return { success: false, error: error.message }; + } + }); - ipcMain.handle('storage:delete-session', async (event, sessionId) => { - try { - storage.deleteSession(sessionId); - return { success: true }; - } catch (error) { - console.error('Error deleting session:', error); - return { success: false, error: error.message }; - } - }); + ipcMain.handle("storage:delete-session", async (event, sessionId) => { + try { + storage.deleteSession(sessionId); + return { success: true }; + } catch (error) { + console.error("Error deleting session:", error); + return { success: false, error: error.message }; + } + }); - ipcMain.handle('storage:delete-all-sessions', async () => { - try { - storage.deleteAllSessions(); - return { success: true }; - } catch (error) { - console.error('Error deleting all sessions:', error); - return { success: false, error: error.message }; - } - }); + ipcMain.handle("storage:delete-all-sessions", async () => { + try { + storage.deleteAllSessions(); + return { success: true }; + } catch (error) { + console.error("Error deleting all sessions:", error); + return { success: false, error: error.message }; + } + }); - // ============ LIMITS ============ - ipcMain.handle('storage:get-today-limits', async () => { - try { - return { success: true, data: storage.getTodayLimits() }; - } catch (error) { - console.error('Error getting today limits:', error); - return { success: false, error: error.message }; - } - }); + // ============ LIMITS ============ + ipcMain.handle("storage:get-today-limits", async () => { + try { + return { success: true, data: storage.getTodayLimits() }; + } catch (error) { + console.error("Error getting today limits:", error); + return { success: false, error: error.message }; + } + }); - // ============ CLEAR ALL ============ - ipcMain.handle('storage:clear-all', async () => { - try { - storage.clearAllData(); - return { success: true }; - } catch (error) { - console.error('Error clearing all data:', error); - return { success: false, error: error.message }; - } - }); + // ============ CLEAR ALL ============ + ipcMain.handle("storage:clear-all", async () => { + try { + storage.clearAllData(); + return { success: true }; + } catch (error) { + console.error("Error clearing all data:", error); + return { success: false, error: error.message }; + } + }); } function setupGeneralIpcHandlers() { - ipcMain.handle('get-app-version', async () => { - return app.getVersion(); - }); + ipcMain.handle("get-app-version", async () => { + return app.getVersion(); + }); - ipcMain.handle('quit-application', async event => { - try { - stopMacOSAudioCapture(); - app.quit(); - return { success: true }; - } catch (error) { - console.error('Error quitting application:', error); - return { success: false, error: error.message }; - } - }); + ipcMain.handle("quit-application", async (event) => { + try { + stopMacOSAudioCapture(); + app.quit(); + return { success: true }; + } catch (error) { + console.error("Error quitting application:", error); + return { success: false, error: error.message }; + } + }); - ipcMain.handle('open-external', async (event, url) => { - try { - await shell.openExternal(url); - return { success: true }; - } catch (error) { - console.error('Error opening external URL:', error); - return { success: false, error: error.message }; - } - }); + ipcMain.handle("open-external", async (event, url) => { + try { + await shell.openExternal(url); + return { success: true }; + } catch (error) { + console.error("Error opening external URL:", error); + return { success: false, error: error.message }; + } + }); - ipcMain.on('update-keybinds', (event, newKeybinds) => { - if (mainWindow) { - // Also save to storage - storage.setKeybinds(newKeybinds); - updateGlobalShortcuts(newKeybinds, mainWindow, sendToRenderer, geminiSessionRef); - } - }); + ipcMain.on("update-keybinds", (event, newKeybinds) => { + if (mainWindow) { + // Also save to storage + storage.setKeybinds(newKeybinds); + updateGlobalShortcuts( + newKeybinds, + mainWindow, + sendToRenderer, + geminiSessionRef, + ); + } + }); - // Debug logging from renderer - ipcMain.on('log-message', (event, msg) => { - console.log(msg); - }); + // Debug logging from renderer + ipcMain.on("log-message", (event, msg) => { + console.log(msg); + }); } diff --git a/src/utils/localai.js b/src/utils/localai.js index cc6583f..800656a 100644 --- a/src/utils/localai.js +++ b/src/utils/localai.js @@ -1,17 +1,27 @@ -const { Ollama } = require('ollama'); -const { getSystemPrompt } = require('./prompts'); -const { sendToRenderer, initializeNewSession, saveConversationTurn } = require('./gemini'); +const { Ollama } = require("ollama"); +const { getSystemPrompt } = require("./prompts"); +const { + sendToRenderer, + initializeNewSession, + saveConversationTurn, +} = require("./gemini"); +const { fork } = require("child_process"); +const path = require("path"); // ── State ── let ollamaClient = null; let ollamaModel = null; -let whisperPipeline = null; +let whisperWorker = null; let isWhisperLoading = false; +let whisperReady = false; let localConversationHistory = []; let currentSystemPrompt = null; let isLocalActive = false; +// Pending transcription callback (one at a time) +let pendingTranscribe = null; + // VAD state let isSpeaking = false; let speechBuffers = []; @@ -20,418 +30,678 @@ let speechFrameCount = 0; // VAD configuration const VAD_MODES = { - NORMAL: { energyThreshold: 0.01, speechFramesRequired: 3, silenceFramesRequired: 30 }, - LOW_BITRATE: { energyThreshold: 0.008, speechFramesRequired: 4, silenceFramesRequired: 35 }, - AGGRESSIVE: { energyThreshold: 0.015, speechFramesRequired: 2, silenceFramesRequired: 20 }, - VERY_AGGRESSIVE: { energyThreshold: 0.02, speechFramesRequired: 2, silenceFramesRequired: 15 }, + NORMAL: { + energyThreshold: 0.01, + speechFramesRequired: 3, + silenceFramesRequired: 30, + }, + LOW_BITRATE: { + energyThreshold: 0.008, + speechFramesRequired: 4, + silenceFramesRequired: 35, + }, + AGGRESSIVE: { + energyThreshold: 0.015, + speechFramesRequired: 2, + silenceFramesRequired: 20, + }, + VERY_AGGRESSIVE: { + energyThreshold: 0.02, + speechFramesRequired: 2, + silenceFramesRequired: 15, + }, }; let vadConfig = VAD_MODES.VERY_AGGRESSIVE; +// Maximum speech buffer size: ~30 seconds at 16kHz, 16-bit mono +const MAX_SPEECH_BUFFER_BYTES = 16000 * 2 * 30; // 960,000 bytes + // Audio resampling buffer let resampleRemainder = Buffer.alloc(0); // ── Audio Resampling (24kHz → 16kHz) ── function resample24kTo16k(inputBuffer) { - // Combine with any leftover samples from previous call - const combined = Buffer.concat([resampleRemainder, inputBuffer]); - const inputSamples = Math.floor(combined.length / 2); // 16-bit = 2 bytes per sample - // Ratio: 16000/24000 = 2/3, so for every 3 input samples we produce 2 output samples - const outputSamples = Math.floor((inputSamples * 2) / 3); - const outputBuffer = Buffer.alloc(outputSamples * 2); + // Combine with any leftover samples from previous call + const combined = Buffer.concat([resampleRemainder, inputBuffer]); + const inputSamples = Math.floor(combined.length / 2); // 16-bit = 2 bytes per sample + // Ratio: 16000/24000 = 2/3, so for every 3 input samples we produce 2 output samples + const outputSamples = Math.floor((inputSamples * 2) / 3); + const outputBuffer = Buffer.alloc(outputSamples * 2); - for (let i = 0; i < outputSamples; i++) { - // Map output sample index to input position - const srcPos = (i * 3) / 2; - const srcIndex = Math.floor(srcPos); - const frac = srcPos - srcIndex; + for (let i = 0; i < outputSamples; i++) { + // Map output sample index to input position + const srcPos = (i * 3) / 2; + const srcIndex = Math.floor(srcPos); + const frac = srcPos - srcIndex; - const s0 = combined.readInt16LE(srcIndex * 2); - const s1 = srcIndex + 1 < inputSamples ? combined.readInt16LE((srcIndex + 1) * 2) : s0; - const interpolated = Math.round(s0 + frac * (s1 - s0)); - outputBuffer.writeInt16LE(Math.max(-32768, Math.min(32767, interpolated)), i * 2); - } + const s0 = combined.readInt16LE(srcIndex * 2); + const s1 = + srcIndex + 1 < inputSamples + ? combined.readInt16LE((srcIndex + 1) * 2) + : s0; + const interpolated = Math.round(s0 + frac * (s1 - s0)); + outputBuffer.writeInt16LE( + Math.max(-32768, Math.min(32767, interpolated)), + i * 2, + ); + } - // Store remainder for next call - const consumedInputSamples = Math.ceil((outputSamples * 3) / 2); - const remainderStart = consumedInputSamples * 2; - resampleRemainder = remainderStart < combined.length ? combined.slice(remainderStart) : Buffer.alloc(0); + // Store remainder for next call + const consumedInputSamples = Math.ceil((outputSamples * 3) / 2); + const remainderStart = consumedInputSamples * 2; + resampleRemainder = + remainderStart < combined.length + ? combined.slice(remainderStart) + : Buffer.alloc(0); - return outputBuffer; + return outputBuffer; } // ── VAD (Voice Activity Detection) ── function calculateRMS(pcm16Buffer) { - const samples = pcm16Buffer.length / 2; - if (samples === 0) return 0; - let sumSquares = 0; - for (let i = 0; i < samples; i++) { - const sample = pcm16Buffer.readInt16LE(i * 2) / 32768; - sumSquares += sample * sample; - } - return Math.sqrt(sumSquares / samples); + const samples = pcm16Buffer.length / 2; + if (samples === 0) return 0; + let sumSquares = 0; + for (let i = 0; i < samples; i++) { + const sample = pcm16Buffer.readInt16LE(i * 2) / 32768; + sumSquares += sample * sample; + } + return Math.sqrt(sumSquares / samples); } function processVAD(pcm16kBuffer) { - const rms = calculateRMS(pcm16kBuffer); - const isVoice = rms > vadConfig.energyThreshold; + const rms = calculateRMS(pcm16kBuffer); + const isVoice = rms > vadConfig.energyThreshold; - if (isVoice) { - speechFrameCount++; - silenceFrameCount = 0; + if (isVoice) { + speechFrameCount++; + silenceFrameCount = 0; - if (!isSpeaking && speechFrameCount >= vadConfig.speechFramesRequired) { - isSpeaking = true; - speechBuffers = []; - console.log('[LocalAI] Speech started (RMS:', rms.toFixed(4), ')'); - sendToRenderer('update-status', 'Listening... (speech detected)'); - } - } else { - silenceFrameCount++; - speechFrameCount = 0; - - if (isSpeaking && silenceFrameCount >= vadConfig.silenceFramesRequired) { - isSpeaking = false; - console.log('[LocalAI] Speech ended, accumulated', speechBuffers.length, 'chunks'); - sendToRenderer('update-status', 'Transcribing...'); - - // Trigger transcription with accumulated audio - const audioData = Buffer.concat(speechBuffers); - speechBuffers = []; - handleSpeechEnd(audioData); - return; - } + if (!isSpeaking && speechFrameCount >= vadConfig.speechFramesRequired) { + isSpeaking = true; + speechBuffers = []; + console.log("[LocalAI] Speech started (RMS:", rms.toFixed(4), ")"); + sendToRenderer("update-status", "Listening... (speech detected)"); } + } else { + silenceFrameCount++; + speechFrameCount = 0; - // Accumulate audio during speech - if (isSpeaking) { - speechBuffers.push(Buffer.from(pcm16kBuffer)); + if (isSpeaking && silenceFrameCount >= vadConfig.silenceFramesRequired) { + isSpeaking = false; + console.log( + "[LocalAI] Speech ended, accumulated", + speechBuffers.length, + "chunks", + ); + sendToRenderer("update-status", "Transcribing..."); + + // Trigger transcription with accumulated audio + const audioData = Buffer.concat(speechBuffers); + speechBuffers = []; + handleSpeechEnd(audioData).catch((err) => { + console.error("[LocalAI] handleSpeechEnd crashed:", err); + sendToRenderer( + "update-status", + "Transcription error: " + (err?.message || "unknown"), + ); + }); + return; } + } + + // Accumulate audio during speech + if (isSpeaking) { + speechBuffers.push(Buffer.from(pcm16kBuffer)); + + // Cap buffer at ~30 seconds to prevent OOM and ONNX tensor overflow + const totalBytes = speechBuffers.reduce((sum, b) => sum + b.length, 0); + if (totalBytes >= MAX_SPEECH_BUFFER_BYTES) { + isSpeaking = false; + console.log( + "[LocalAI] Speech buffer limit reached (" + + totalBytes + + " bytes), forcing transcription", + ); + sendToRenderer("update-status", "Transcribing (max length reached)..."); + const audioData = Buffer.concat(speechBuffers); + speechBuffers = []; + silenceFrameCount = 0; + speechFrameCount = 0; + handleSpeechEnd(audioData).catch((err) => { + console.error("[LocalAI] handleSpeechEnd crashed:", err); + sendToRenderer( + "update-status", + "Transcription error: " + (err?.message || "unknown"), + ); + }); + } + } } -// ── Whisper Transcription ── +// ── Whisper Worker (isolated child process) ── + +function spawnWhisperWorker() { + if (whisperWorker) return; + + const workerPath = path.join(__dirname, "whisperWorker.js"); + console.log("[LocalAI] Spawning Whisper worker:", workerPath); + + whisperWorker = fork(workerPath, [], { + stdio: ["pipe", "pipe", "pipe", "ipc"], + // ELECTRON_RUN_AS_NODE makes the Electron binary behave as plain Node.js, + // which is required for child_process.fork() in packaged Electron apps. + env: { ...process.env, ELECTRON_RUN_AS_NODE: "1" }, + }); + + whisperWorker.stdout.on("data", (data) => { + console.log("[WhisperWorker stdout]", data.toString().trim()); + }); + whisperWorker.stderr.on("data", (data) => { + console.error("[WhisperWorker stderr]", data.toString().trim()); + }); + + whisperWorker.on("message", (msg) => { + switch (msg.type) { + case "ready": + console.log("[LocalAI] Whisper worker ready"); + break; + case "load-result": + handleWorkerLoadResult(msg); + break; + case "transcribe-result": + handleWorkerTranscribeResult(msg); + break; + case "status": + sendToRenderer("update-status", msg.message); + break; + } + }); + + whisperWorker.on("exit", (code, signal) => { + console.error( + "[LocalAI] Whisper worker exited — code:", + code, + "signal:", + signal, + ); + whisperWorker = null; + whisperReady = false; + + // Reject any pending transcription + if (pendingTranscribe) { + pendingTranscribe.reject( + new Error( + "Whisper worker crashed (code: " + code + ", signal: " + signal + ")", + ), + ); + pendingTranscribe = null; + } + + // If session is still active, inform the user and respawn + if (isLocalActive) { + sendToRenderer( + "update-status", + "Whisper crashed (signal: " + + (signal || code) + + "). Respawning worker...", + ); + setTimeout(() => { + if (isLocalActive) { + respawnWhisperWorker(); + } + }, 2000); + } + }); + + whisperWorker.on("error", (err) => { + console.error("[LocalAI] Whisper worker error:", err); + whisperWorker = null; + whisperReady = false; + }); +} + +let pendingLoad = null; + +function handleWorkerLoadResult(msg) { + if (msg.success) { + console.log("[LocalAI] Whisper model loaded successfully (in worker)"); + whisperReady = true; + sendToRenderer("whisper-downloading", false); + isWhisperLoading = false; + if (pendingLoad) { + pendingLoad.resolve(true); + pendingLoad = null; + } + } else { + console.error("[LocalAI] Whisper worker failed to load model:", msg.error); + sendToRenderer("whisper-downloading", false); + sendToRenderer( + "update-status", + "Failed to load Whisper model: " + msg.error, + ); + isWhisperLoading = false; + if (pendingLoad) { + pendingLoad.resolve(false); + pendingLoad = null; + } + } +} + +function handleWorkerTranscribeResult(msg) { + if (!pendingTranscribe) return; + if (msg.success) { + console.log("[LocalAI] Transcription:", msg.text); + pendingTranscribe.resolve(msg.text || null); + } else { + console.error("[LocalAI] Worker transcription error:", msg.error); + pendingTranscribe.resolve(null); + } + pendingTranscribe = null; +} + +function respawnWhisperWorker() { + killWhisperWorker(); + spawnWhisperWorker(); + const { app } = require("electron"); + const cacheDir = path.join(app.getPath("userData"), "whisper-models"); + const modelName = + require("../storage").getPreferences().whisperModel || + "Xenova/whisper-small"; + sendToRenderer("whisper-downloading", true); + isWhisperLoading = true; + whisperWorker.send({ type: "load", modelName, cacheDir }); +} + +function killWhisperWorker() { + if (whisperWorker) { + try { + whisperWorker.removeAllListeners(); + whisperWorker.kill(); + } catch (_) { + // Already dead + } + whisperWorker = null; + whisperReady = false; + } +} async function loadWhisperPipeline(modelName) { - if (whisperPipeline) return whisperPipeline; - if (isWhisperLoading) return null; + if (whisperReady) return true; + if (isWhisperLoading) return null; - isWhisperLoading = true; - console.log('[LocalAI] Loading Whisper model:', modelName); - sendToRenderer('whisper-downloading', true); - sendToRenderer('update-status', 'Loading Whisper model (first time may take a while)...'); + isWhisperLoading = true; + console.log("[LocalAI] Loading Whisper model via worker:", modelName); + sendToRenderer("whisper-downloading", true); + sendToRenderer( + "update-status", + "Loading Whisper model (first time may take a while)...", + ); - try { - // Dynamic import for ESM module - const { pipeline, env } = await import('@huggingface/transformers'); - // Cache models outside the asar archive so ONNX runtime can load them - const { app } = require('electron'); - const path = require('path'); - env.cacheDir = path.join(app.getPath('userData'), 'whisper-models'); - whisperPipeline = await pipeline('automatic-speech-recognition', modelName, { - dtype: 'q8', - device: 'auto', - }); - console.log('[LocalAI] Whisper model loaded successfully'); - sendToRenderer('whisper-downloading', false); - isWhisperLoading = false; - return whisperPipeline; - } catch (error) { - console.error('[LocalAI] Failed to load Whisper model:', error); - sendToRenderer('whisper-downloading', false); - sendToRenderer('update-status', 'Failed to load Whisper model: ' + error.message); - isWhisperLoading = false; - return null; - } -} + spawnWhisperWorker(); -function pcm16ToFloat32(pcm16Buffer) { - const samples = pcm16Buffer.length / 2; - const float32 = new Float32Array(samples); - for (let i = 0; i < samples; i++) { - float32[i] = pcm16Buffer.readInt16LE(i * 2) / 32768; - } - return float32; + const { app } = require("electron"); + const cacheDir = path.join(app.getPath("userData"), "whisper-models"); + + return new Promise((resolve) => { + pendingLoad = { resolve }; + whisperWorker.send({ type: "load", modelName, cacheDir }); + }); } async function transcribeAudio(pcm16kBuffer) { - if (!whisperPipeline) { - console.error('[LocalAI] Whisper pipeline not loaded'); - return null; - } + if (!whisperReady || !whisperWorker) { + console.error("[LocalAI] Whisper worker not ready"); + return null; + } + + if (!pcm16kBuffer || pcm16kBuffer.length < 2) { + console.error("[LocalAI] Invalid audio buffer:", pcm16kBuffer?.length); + return null; + } + + console.log( + "[LocalAI] Starting transcription, audio length:", + pcm16kBuffer.length, + "bytes", + ); + + // Send audio to worker as base64 (IPC serialization) + const audioBase64 = pcm16kBuffer.toString("base64"); + + return new Promise((resolve, reject) => { + // Timeout: if worker takes > 60s, assume it's stuck + const timeout = setTimeout(() => { + console.error("[LocalAI] Transcription timed out after 60s"); + if (pendingTranscribe) { + pendingTranscribe = null; + resolve(null); + } + }, 60000); + + pendingTranscribe = { + resolve: (val) => { + clearTimeout(timeout); + resolve(val); + }, + reject: (err) => { + clearTimeout(timeout); + reject(err); + }, + }; try { - const float32Audio = pcm16ToFloat32(pcm16kBuffer); - - // Whisper expects audio at 16kHz which is what we have - const result = await whisperPipeline(float32Audio, { - sampling_rate: 16000, - language: 'en', - task: 'transcribe', - }); - - const text = result.text?.trim(); - console.log('[LocalAI] Transcription:', text); - return text; - } catch (error) { - console.error('[LocalAI] Transcription error:', error); - return null; + whisperWorker.send({ type: "transcribe", audioBase64 }); + } catch (err) { + clearTimeout(timeout); + pendingTranscribe = null; + console.error("[LocalAI] Failed to send to worker:", err); + resolve(null); } + }); } // ── Speech End Handler ── async function handleSpeechEnd(audioData) { - if (!isLocalActive) return; + if (!isLocalActive) return; - // Minimum audio length check (~0.5 seconds at 16kHz, 16-bit) - if (audioData.length < 16000) { - console.log('[LocalAI] Audio too short, skipping'); - sendToRenderer('update-status', 'Listening...'); - return; - } + // Minimum audio length check (~0.5 seconds at 16kHz, 16-bit) + if (audioData.length < 16000) { + console.log("[LocalAI] Audio too short, skipping"); + sendToRenderer("update-status", "Listening..."); + return; + } + console.log("[LocalAI] Processing audio:", audioData.length, "bytes"); + + try { const transcription = await transcribeAudio(audioData); - if (!transcription || transcription.trim() === '' || transcription.trim().length < 2) { - console.log('[LocalAI] Empty transcription, skipping'); - sendToRenderer('update-status', 'Listening...'); - return; + if ( + !transcription || + transcription.trim() === "" || + transcription.trim().length < 2 + ) { + console.log("[LocalAI] Empty transcription, skipping"); + sendToRenderer("update-status", "Listening..."); + return; } - sendToRenderer('update-status', 'Generating response...'); + sendToRenderer("update-status", "Generating response..."); await sendToOllama(transcription); + } catch (error) { + console.error("[LocalAI] handleSpeechEnd error:", error); + sendToRenderer( + "update-status", + "Error: " + (error?.message || "transcription failed"), + ); + } } // ── Ollama Chat ── async function sendToOllama(transcription) { - if (!ollamaClient || !ollamaModel) { - console.error('[LocalAI] Ollama not configured'); - return; - } + if (!ollamaClient || !ollamaModel) { + console.error("[LocalAI] Ollama not configured"); + return; + } - console.log('[LocalAI] Sending to Ollama:', transcription.substring(0, 100) + '...'); + console.log( + "[LocalAI] Sending to Ollama:", + transcription.substring(0, 100) + "...", + ); - localConversationHistory.push({ - role: 'user', - content: transcription.trim(), + localConversationHistory.push({ + role: "user", + content: transcription.trim(), + }); + + // Keep history manageable + if (localConversationHistory.length > 20) { + localConversationHistory = localConversationHistory.slice(-20); + } + + try { + const messages = [ + { + role: "system", + content: currentSystemPrompt || "You are a helpful assistant.", + }, + ...localConversationHistory, + ]; + + const response = await ollamaClient.chat({ + model: ollamaModel, + messages, + stream: true, }); - // Keep history manageable - if (localConversationHistory.length > 20) { - localConversationHistory = localConversationHistory.slice(-20); + let fullText = ""; + let isFirst = true; + + for await (const part of response) { + const token = part.message?.content || ""; + if (token) { + fullText += token; + sendToRenderer(isFirst ? "new-response" : "update-response", fullText); + isFirst = false; + } } - try { - const messages = [ - { role: 'system', content: currentSystemPrompt || 'You are a helpful assistant.' }, - ...localConversationHistory, - ]; + if (fullText.trim()) { + localConversationHistory.push({ + role: "assistant", + content: fullText.trim(), + }); - const response = await ollamaClient.chat({ - model: ollamaModel, - messages, - stream: true, - }); - - let fullText = ''; - let isFirst = true; - - for await (const part of response) { - const token = part.message?.content || ''; - if (token) { - fullText += token; - sendToRenderer(isFirst ? 'new-response' : 'update-response', fullText); - isFirst = false; - } - } - - if (fullText.trim()) { - localConversationHistory.push({ - role: 'assistant', - content: fullText.trim(), - }); - - saveConversationTurn(transcription, fullText); - } - - console.log('[LocalAI] Ollama response completed'); - sendToRenderer('update-status', 'Listening...'); - } catch (error) { - console.error('[LocalAI] Ollama error:', error); - sendToRenderer('update-status', 'Ollama error: ' + error.message); + saveConversationTurn(transcription, fullText); } + + console.log("[LocalAI] Ollama response completed"); + sendToRenderer("update-status", "Listening..."); + } catch (error) { + console.error("[LocalAI] Ollama error:", error); + sendToRenderer("update-status", "Ollama error: " + error.message); + } } // ── Public API ── -async function initializeLocalSession(ollamaHost, model, whisperModel, profile, customPrompt) { - console.log('[LocalAI] Initializing local session:', { ollamaHost, model, whisperModel, profile }); +async function initializeLocalSession( + ollamaHost, + model, + whisperModel, + profile, + customPrompt, +) { + console.log("[LocalAI] Initializing local session:", { + ollamaHost, + model, + whisperModel, + profile, + }); - sendToRenderer('session-initializing', true); + sendToRenderer("session-initializing", true); + try { + // Setup system prompt + currentSystemPrompt = getSystemPrompt(profile, customPrompt, false); + + // Initialize Ollama client + ollamaClient = new Ollama({ host: ollamaHost }); + ollamaModel = model; + + // Test Ollama connection try { - // Setup system prompt - currentSystemPrompt = getSystemPrompt(profile, customPrompt, false); - - // Initialize Ollama client - ollamaClient = new Ollama({ host: ollamaHost }); - ollamaModel = model; - - // Test Ollama connection - try { - await ollamaClient.list(); - console.log('[LocalAI] Ollama connection verified'); - } catch (error) { - console.error('[LocalAI] Cannot connect to Ollama at', ollamaHost, ':', error.message); - sendToRenderer('session-initializing', false); - sendToRenderer('update-status', 'Cannot connect to Ollama at ' + ollamaHost); - return false; - } - - // Load Whisper model - const pipeline = await loadWhisperPipeline(whisperModel); - if (!pipeline) { - sendToRenderer('session-initializing', false); - return false; - } - - // Reset VAD state - isSpeaking = false; - speechBuffers = []; - silenceFrameCount = 0; - speechFrameCount = 0; - resampleRemainder = Buffer.alloc(0); - localConversationHistory = []; - - // Initialize conversation session - initializeNewSession(profile, customPrompt); - - isLocalActive = true; - sendToRenderer('session-initializing', false); - sendToRenderer('update-status', 'Local AI ready - Listening...'); - - console.log('[LocalAI] Session initialized successfully'); - return true; + await ollamaClient.list(); + console.log("[LocalAI] Ollama connection verified"); } catch (error) { - console.error('[LocalAI] Initialization error:', error); - sendToRenderer('session-initializing', false); - sendToRenderer('update-status', 'Local AI error: ' + error.message); - return false; + console.error( + "[LocalAI] Cannot connect to Ollama at", + ollamaHost, + ":", + error.message, + ); + sendToRenderer("session-initializing", false); + sendToRenderer( + "update-status", + "Cannot connect to Ollama at " + ollamaHost, + ); + return false; } -} -function processLocalAudio(monoChunk24k) { - if (!isLocalActive) return; - - // Resample from 24kHz to 16kHz - const pcm16k = resample24kTo16k(monoChunk24k); - if (pcm16k.length > 0) { - processVAD(pcm16k); + // Load Whisper model + const pipeline = await loadWhisperPipeline(whisperModel); + if (!pipeline) { + sendToRenderer("session-initializing", false); + return false; } -} -function closeLocalSession() { - console.log('[LocalAI] Closing local session'); - isLocalActive = false; + // Reset VAD state isSpeaking = false; speechBuffers = []; silenceFrameCount = 0; speechFrameCount = 0; resampleRemainder = Buffer.alloc(0); localConversationHistory = []; - ollamaClient = null; - ollamaModel = null; - currentSystemPrompt = null; - // Note: whisperPipeline is kept loaded to avoid reloading on next session + + // Initialize conversation session + initializeNewSession(profile, customPrompt); + + isLocalActive = true; + sendToRenderer("session-initializing", false); + sendToRenderer("update-status", "Local AI ready - Listening..."); + + console.log("[LocalAI] Session initialized successfully"); + return true; + } catch (error) { + console.error("[LocalAI] Initialization error:", error); + sendToRenderer("session-initializing", false); + sendToRenderer("update-status", "Local AI error: " + error.message); + return false; + } +} + +function processLocalAudio(monoChunk24k) { + if (!isLocalActive) return; + + // Resample from 24kHz to 16kHz + const pcm16k = resample24kTo16k(monoChunk24k); + if (pcm16k.length > 0) { + processVAD(pcm16k); + } +} + +function closeLocalSession() { + console.log("[LocalAI] Closing local session"); + isLocalActive = false; + isSpeaking = false; + speechBuffers = []; + silenceFrameCount = 0; + speechFrameCount = 0; + resampleRemainder = Buffer.alloc(0); + localConversationHistory = []; + ollamaClient = null; + ollamaModel = null; + currentSystemPrompt = null; + // Note: whisperWorker is kept alive to avoid reloading model on next session + // To fully clean up, call killWhisperWorker() } function isLocalSessionActive() { - return isLocalActive; + return isLocalActive; } // ── Send text directly to Ollama (for manual text input) ── async function sendLocalText(text) { - if (!isLocalActive || !ollamaClient) { - return { success: false, error: 'No active local session' }; - } + if (!isLocalActive || !ollamaClient) { + return { success: false, error: "No active local session" }; + } - try { - await sendToOllama(text); - return { success: true }; - } catch (error) { - return { success: false, error: error.message }; - } + try { + await sendToOllama(text); + return { success: true }; + } catch (error) { + return { success: false, error: error.message }; + } } async function sendLocalImage(base64Data, prompt) { - if (!isLocalActive || !ollamaClient) { - return { success: false, error: 'No active local session' }; + if (!isLocalActive || !ollamaClient) { + return { success: false, error: "No active local session" }; + } + + try { + console.log("[LocalAI] Sending image to Ollama"); + sendToRenderer("update-status", "Analyzing image..."); + + const userMessage = { + role: "user", + content: prompt, + images: [base64Data], + }; + + // Store text-only version in history + localConversationHistory.push({ role: "user", content: prompt }); + + if (localConversationHistory.length > 20) { + localConversationHistory = localConversationHistory.slice(-20); } - try { - console.log('[LocalAI] Sending image to Ollama'); - sendToRenderer('update-status', 'Analyzing image...'); + const messages = [ + { + role: "system", + content: currentSystemPrompt || "You are a helpful assistant.", + }, + ...localConversationHistory.slice(0, -1), + userMessage, + ]; - const userMessage = { - role: 'user', - content: prompt, - images: [base64Data], - }; + const response = await ollamaClient.chat({ + model: ollamaModel, + messages, + stream: true, + }); - // Store text-only version in history - localConversationHistory.push({ role: 'user', content: prompt }); + let fullText = ""; + let isFirst = true; - if (localConversationHistory.length > 20) { - localConversationHistory = localConversationHistory.slice(-20); - } - - const messages = [ - { role: 'system', content: currentSystemPrompt || 'You are a helpful assistant.' }, - ...localConversationHistory.slice(0, -1), - userMessage, - ]; - - const response = await ollamaClient.chat({ - model: ollamaModel, - messages, - stream: true, - }); - - let fullText = ''; - let isFirst = true; - - for await (const part of response) { - const token = part.message?.content || ''; - if (token) { - fullText += token; - sendToRenderer(isFirst ? 'new-response' : 'update-response', fullText); - isFirst = false; - } - } - - if (fullText.trim()) { - localConversationHistory.push({ role: 'assistant', content: fullText.trim() }); - saveConversationTurn(prompt, fullText); - } - - console.log('[LocalAI] Image response completed'); - sendToRenderer('update-status', 'Listening...'); - return { success: true, text: fullText, model: ollamaModel }; - } catch (error) { - console.error('[LocalAI] Image error:', error); - sendToRenderer('update-status', 'Ollama error: ' + error.message); - return { success: false, error: error.message }; + for await (const part of response) { + const token = part.message?.content || ""; + if (token) { + fullText += token; + sendToRenderer(isFirst ? "new-response" : "update-response", fullText); + isFirst = false; + } } + + if (fullText.trim()) { + localConversationHistory.push({ + role: "assistant", + content: fullText.trim(), + }); + saveConversationTurn(prompt, fullText); + } + + console.log("[LocalAI] Image response completed"); + sendToRenderer("update-status", "Listening..."); + return { success: true, text: fullText, model: ollamaModel }; + } catch (error) { + console.error("[LocalAI] Image error:", error); + sendToRenderer("update-status", "Ollama error: " + error.message); + return { success: false, error: error.message }; + } } module.exports = { - initializeLocalSession, - processLocalAudio, - closeLocalSession, - isLocalSessionActive, - sendLocalText, - sendLocalImage, + initializeLocalSession, + processLocalAudio, + closeLocalSession, + isLocalSessionActive, + sendLocalText, + sendLocalImage, }; diff --git a/src/utils/whisperWorker.js b/src/utils/whisperWorker.js new file mode 100644 index 0000000..c2482e0 --- /dev/null +++ b/src/utils/whisperWorker.js @@ -0,0 +1,150 @@ +/** + * Whisper Worker — runs ONNX Runtime in an isolated child process. + * + * The main Electron process forks this file and communicates via IPC messages. + * If ONNX Runtime crashes (SIGSEGV/SIGABRT inside the native Metal or CPU + * execution provider), only this worker dies — the main process survives and + * can respawn the worker automatically. + * + * Protocol (parent ↔ worker): + * parent → worker: + * { type: 'load', modelName, cacheDir } + * { type: 'transcribe', audioBase64 } // PCM 16-bit 16kHz as base64 + * { type: 'shutdown' } + * + * worker → parent: + * { type: 'load-result', success, error? } + * { type: 'transcribe-result', success, text?, error? } + * { type: 'status', message } + * { type: 'ready' } + */ + +let whisperPipeline = null; + +function pcm16ToFloat32(pcm16Buffer) { + if (!pcm16Buffer || pcm16Buffer.length === 0) { + return new Float32Array(0); + } + const alignedLength = + pcm16Buffer.length % 2 === 0 ? pcm16Buffer.length : pcm16Buffer.length - 1; + const samples = alignedLength / 2; + const float32 = new Float32Array(samples); + for (let i = 0; i < samples; i++) { + float32[i] = pcm16Buffer.readInt16LE(i * 2) / 32768; + } + return float32; +} + +async function loadModel(modelName, cacheDir) { + if (whisperPipeline) { + send({ type: "load-result", success: true }); + return; + } + + try { + send({ + type: "status", + message: "Loading Whisper model (first time may take a while)...", + }); + const { pipeline, env } = await import("@huggingface/transformers"); + env.cacheDir = cacheDir; + whisperPipeline = await pipeline( + "automatic-speech-recognition", + modelName, + { + dtype: "q8", + device: "cpu", + }, + ); + send({ type: "load-result", success: true }); + } catch (error) { + send({ type: "load-result", success: false, error: error.message }); + } +} + +async function transcribe(audioBase64) { + if (!whisperPipeline) { + send({ + type: "transcribe-result", + success: false, + error: "Whisper pipeline not loaded", + }); + return; + } + + try { + const pcm16Buffer = Buffer.from(audioBase64, "base64"); + + if (pcm16Buffer.length < 2) { + send({ + type: "transcribe-result", + success: false, + error: "Audio buffer too small", + }); + return; + } + + // Cap at ~30 seconds (16kHz, 16-bit mono) + const maxBytes = 16000 * 2 * 30; + const audioData = + pcm16Buffer.length > maxBytes + ? pcm16Buffer.slice(0, maxBytes) + : pcm16Buffer; + + const float32Audio = pcm16ToFloat32(audioData); + if (float32Audio.length === 0) { + send({ + type: "transcribe-result", + success: false, + error: "Empty audio after conversion", + }); + return; + } + + const result = await whisperPipeline(float32Audio, { + sampling_rate: 16000, + language: "en", + task: "transcribe", + }); + + const text = result.text?.trim() || ""; + send({ type: "transcribe-result", success: true, text }); + } catch (error) { + send({ + type: "transcribe-result", + success: false, + error: error.message || String(error), + }); + } +} + +function send(msg) { + try { + if (process.send) { + process.send(msg); + } + } catch (_) { + // Parent may have disconnected + } +} + +process.on("message", (msg) => { + switch (msg.type) { + case "load": + loadModel(msg.modelName, msg.cacheDir).catch((err) => { + send({ type: "load-result", success: false, error: err.message }); + }); + break; + case "transcribe": + transcribe(msg.audioBase64).catch((err) => { + send({ type: "transcribe-result", success: false, error: err.message }); + }); + break; + case "shutdown": + process.exit(0); + break; + } +}); + +// Signal readiness to parent +send({ type: "ready" });