From 80bb6d5e97a682cd2fd0aed017f366e6db2e4781 Mon Sep 17 00:00:00 2001 From: Egor Date: Wed, 3 Dec 2025 14:01:46 -0800 Subject: [PATCH] more accurate auto VRAM approximate, more code renames from backend -> acceleration --- .vscode/settings.json | 7 +-- package.json | 4 +- src/components/App/index.tsx | 6 +- src/components/ImportBackendLink.tsx | 6 +- src/components/screens/Launch/AdvancedTab.tsx | 20 +++--- .../GeneralTab/AccelerationSelector.tsx | 31 +++++----- .../Launch/GeneralTab/GpuDeviceSelector.tsx | 26 +++++--- src/components/screens/Launch/index.tsx | 24 ++++---- src/components/settings/BackendsTab.tsx | 3 - src/hooks/useLaunchLogic.ts | 26 ++++---- src/hooks/useWarnings.ts | 35 ++++++----- src/main/ipc.ts | 6 +- src/main/modules/koboldcpp/launcher/index.ts | 21 ++----- src/main/modules/koboldcpp/model-download.ts | 6 +- src/preload/index.ts | 6 +- src/stores/launchConfig.ts | 20 +++--- src/types/electron.d.ts | 4 +- src/types/index.d.ts | 2 + src/utils/node/vram.ts | 61 ++++++++++++------- yarn.lock | 22 +++---- 20 files changed, 183 insertions(+), 153 deletions(-) diff --git a/.vscode/settings.json b/.vscode/settings.json index cbd6fe4..3584bb6 100644 --- a/.vscode/settings.json +++ b/.vscode/settings.json @@ -10,11 +10,8 @@ "typescriptreact" ], "editor.formatOnSave": true, - "editor.defaultFormatter": "esbenp.prettier-vscode", - "[typescript]": { + "editor.defaultFormatter": "prettier.prettier-vscode", + "[typescriptreact]": { "editor.defaultFormatter": "prettier.prettier-vscode" }, - "[typescriptreact]": { - "editor.defaultFormatter": "esbenp.prettier-vscode" - } } diff --git a/package.json b/package.json index 3b42541..04a42bd 100644 --- a/package.json +++ b/package.json @@ -79,8 +79,8 @@ "execa": "^9.6.1", "lucide-react": "^0.555.0", "mime-types": "^3.0.2", - "react": "^19.2.0", - "react-dom": "^19.2.0", + "react": "^19.2.1", + "react-dom": "^19.2.1", "react-error-boundary": "^6.0.0", "systeminformation": "^5.27.11", "winston": "^3.18.3", diff --git a/src/components/App/index.tsx b/src/components/App/index.tsx index a3f6f98..a09bee9 100644 --- a/src/components/App/index.tsx +++ b/src/components/App/index.tsx @@ -63,8 +63,10 @@ export const App = () => { useEffect(() => { const cleanup = window.electronAPI.kobold.onServerReady(() => { - setIsServerReady(true); - setActiveInterfaceTab(defaultInterfaceTab); + setTimeout(() => { + setIsServerReady(true); + setActiveInterfaceTab(defaultInterfaceTab); + }, 1000); }); return cleanup; diff --git a/src/components/ImportBackendLink.tsx b/src/components/ImportBackendLink.tsx index cc39f67..f130959 100644 --- a/src/components/ImportBackendLink.tsx +++ b/src/components/ImportBackendLink.tsx @@ -1,5 +1,5 @@ import { useState } from 'react'; -import { Text, Anchor } from '@mantine/core'; +import { Text, Anchor, Box } from '@mantine/core'; interface ImportBackendLinkProps { disabled?: boolean; @@ -40,7 +40,7 @@ export const ImportBackendLink = ({ }; return ( - <> + {importError && ( {importError} @@ -58,6 +58,6 @@ export const ImportBackendLink = ({ {importing ? 'Importing...' : 'Select a local file'} - + ); }; diff --git a/src/components/screens/Launch/AdvancedTab.tsx b/src/components/screens/Launch/AdvancedTab.tsx index 895eacd..856bc3e 100644 --- a/src/components/screens/Launch/AdvancedTab.tsx +++ b/src/components/screens/Launch/AdvancedTab.tsx @@ -27,7 +27,7 @@ export const AdvancedTab = () => { quantmatmul, usemmap, debugmode, - backend, + acceleration, moecpu, moeexperts, setAdditionalArguments, @@ -58,7 +58,7 @@ export const AdvancedTab = () => { setAdditionalArguments(updatedArgs); }; - const isGpuBackend = backend === 'cuda' || backend === 'rocm'; + const isGpuAcceleration = acceleration === 'cuda' || acceleration === 'rocm'; useEffect(() => { const detectAccelerationSupport = async () => { @@ -118,15 +118,15 @@ export const AdvancedTab = () => { /> { /> { const { - backend, + acceleration, gpuLayers, autoGpuLayers, model, contextSize, gpuDeviceSelection, flashattention, - setBackend, + setAcceleration, setGpuLayers, setAutoGpuLayers, } = useLaunchConfigStore(); @@ -57,9 +57,9 @@ export const AccelerationSelector = () => { }, []); useEffect(() => { - if (availableAccelerations.length > 0 && backend) { + if (availableAccelerations.length > 0 && acceleration) { const isAccelerationAvailable = availableAccelerations.some( - (a) => a.value === backend && !a.disabled + (a) => a.value === acceleration && !a.disabled ); if (!isAccelerationAvailable) { @@ -67,15 +67,15 @@ export const AccelerationSelector = () => { (a) => !a.disabled ); if (fallbackAcceleration) { - setBackend(fallbackAcceleration.value); + setAcceleration(fallbackAcceleration.value as Acceleration); } } } - }, [availableAccelerations, backend, setBackend]); + }, [availableAccelerations, acceleration, setAcceleration]); useEffect(() => { const calculateLayers = async () => { - const isCpuOnly = backend === 'cpu' && !isMac; + const isCpuOnly = acceleration === 'cpu' && !isMac; if ( !autoGpuLayers || !model || @@ -118,7 +118,8 @@ export const AccelerationSelector = () => { model, contextSize, availableVramGB, - flashattention + flashattention, + acceleration ); setGpuLayers(result.recommendedLayers); @@ -137,7 +138,7 @@ export const AccelerationSelector = () => { autoGpuLayers, model, contextSize, - backend, + acceleration, gpuDeviceSelection, flashattention, isLoadingAccelerations, @@ -163,14 +164,14 @@ export const AccelerationSelector = () => { } value={ availableAccelerations.some( - (a) => a.value === backend && !a.disabled + (a) => a.value === acceleration && !a.disabled ) - ? backend + ? acceleration : null } onChange={(value) => { if (value) { - setBackend(value); + setAcceleration(value as Acceleration); } }} data={availableAccelerations.map((a) => ({ @@ -223,7 +224,7 @@ export const AccelerationSelector = () => { step={1} size="sm" w={80} - disabled={autoGpuLayers || (backend === 'cpu' && !isMac)} + disabled={autoGpuLayers || (acceleration === 'cpu' && !isMac)} /> { setAutoGpuLayers(event.currentTarget.checked) } size="sm" - disabled={backend === 'cpu' && !isMac} + disabled={acceleration === 'cpu' && !isMac} /> diff --git a/src/components/screens/Launch/GeneralTab/GpuDeviceSelector.tsx b/src/components/screens/Launch/GeneralTab/GpuDeviceSelector.tsx index 6118f0a..046ce11 100644 --- a/src/components/screens/Launch/GeneralTab/GpuDeviceSelector.tsx +++ b/src/components/screens/Launch/GeneralTab/GpuDeviceSelector.tsx @@ -4,8 +4,8 @@ import { useLaunchConfigStore } from '@/stores/launchConfig'; import { Select } from '@/components/Select'; import type { AccelerationOption } from '@/types'; -const GPU_BACKENDS = ['cuda', 'rocm', 'vulkan', 'clblast']; -const TENSOR_SPLIT_BACKENDS = ['cuda', 'rocm', 'vulkan']; +const GPU_ACCELERATIONS = ['cuda', 'rocm', 'vulkan', 'clblast']; +const TENSOR_SPLIT_ACCELERATIONS = ['cuda', 'rocm', 'vulkan']; interface GpuDeviceSelectorProps { availableAccelerations: AccelerationOption[]; @@ -15,7 +15,7 @@ export const GpuDeviceSelector = ({ availableAccelerations, }: GpuDeviceSelectorProps) => { const { - backend, + acceleration, gpuDeviceSelection, tensorSplit, setGpuDeviceSelection, @@ -23,13 +23,17 @@ export const GpuDeviceSelector = ({ } = useLaunchConfigStore(); const selectedAcceleration = availableAccelerations.find( - (a) => a.value === backend + (a) => a.value === acceleration ); - const isGpu = GPU_BACKENDS.includes(backend); + const isGpuAcceleration = GPU_ACCELERATIONS.includes(acceleration); const getDiscreteDeviceCount = () => { if (!selectedAcceleration?.devices) return 0; - if (backend === 'clblast' || backend === 'vulkan' || backend === 'rocm') { + if ( + acceleration === 'clblast' || + acceleration === 'vulkan' || + acceleration === 'rocm' + ) { return selectedAcceleration.devices.filter( (device) => typeof device === 'string' || !device.isIntegrated ).length; @@ -39,24 +43,26 @@ export const GpuDeviceSelector = ({ const hasMultipleDevices = getDiscreteDeviceCount() > 1; const showTensorSplit = - TENSOR_SPLIT_BACKENDS.includes(backend) && + TENSOR_SPLIT_ACCELERATIONS.includes(acceleration) && hasMultipleDevices && gpuDeviceSelection === 'all'; - if (!isGpu || !hasMultipleDevices) { + if (!isGpuAcceleration || !hasMultipleDevices) { return null; } const deviceOptions = (() => { if (!selectedAcceleration?.devices) return []; - if (backend === 'clblast') { + if (acceleration === 'clblast') { return selectedAcceleration.devices .map((device, index) => { if (typeof device === 'object' && device.isIntegrated) { return null; } + const deviceName = typeof device === 'string' ? device : device.name; + return { value: index.toString(), label: `GPU ${index}: ${deviceName}`, @@ -67,7 +73,7 @@ export const GpuDeviceSelector = ({ ); } - if (backend === 'vulkan' || backend === 'rocm') { + if (acceleration === 'vulkan' || acceleration === 'rocm') { const discreteDeviceOptions = selectedAcceleration.devices .map((device, index) => { if (typeof device === 'object' && device.isIntegrated) { diff --git a/src/components/screens/Launch/index.tsx b/src/components/screens/Launch/index.tsx index 5d67ffb..e7fadd9 100644 --- a/src/components/screens/Launch/index.tsx +++ b/src/components/screens/Launch/index.tsx @@ -11,7 +11,7 @@ import { ImageGenerationTab } from '@/components/screens/Launch/ImageGenerationT import { WarningDisplay } from '@/components/WarningDisplay'; import { ConfigFileManager } from '@/components/screens/Launch/ConfigFileManager'; import { DEFAULT_MODEL_URL } from '@/constants'; -import type { ConfigFile } from '@/types'; +import type { Acceleration, ConfigFile } from '@/types'; interface LaunchScreenProps { onLaunch: () => void; @@ -47,7 +47,7 @@ export const LaunchScreen = ({ onLaunch }: LaunchScreenProps) => { quantmatmul, usemmap, debugmode, - backend, + acceleration, gpuDeviceSelection, gpuPlatform, tensorSplit, @@ -66,7 +66,7 @@ export const LaunchScreen = ({ onLaunch }: LaunchScreenProps) => { parseAndApplyConfigFile, loadConfigFromFile, setModel, - setBackend, + setAcceleration, } = useLaunchConfigStore(); const { isLaunching, handleLaunch } = useLaunchLogic({ @@ -78,7 +78,7 @@ export const LaunchScreen = ({ onLaunch }: LaunchScreenProps) => { const { warnings: combinedWarnings } = useWarnings({ model, sdmodel, - backend, + acceleration, configLoaded, }); @@ -86,10 +86,10 @@ export const LaunchScreen = ({ onLaunch }: LaunchScreenProps) => { const accelerations = await window.electronAPI.kobold.getAvailableAccelerations(); - if (!backend && accelerations && accelerations.length > 0) { - setBackend(accelerations[0].value); + if (!acceleration && accelerations && accelerations.length > 0) { + setAcceleration(accelerations[0].value as Acceleration); } - }, [backend, setBackend]); + }, [acceleration, setAcceleration]); const setInitialDefaults = useCallback( (currentModel: string, currentSdModel: string) => { @@ -177,9 +177,9 @@ export const LaunchScreen = ({ onLaunch }: LaunchScreenProps) => { debugmode, moecpu, moeexperts, - usecuda: backend === 'cuda' || backend === 'rocm', - usevulkan: backend === 'vulkan', - useclblast: backend === 'clblast', + usecuda: acceleration === 'cuda' || acceleration === 'rocm', + usevulkan: acceleration === 'vulkan', + useclblast: acceleration === 'clblast', gpuDeviceSelection, tensorSplit, sdmodel, @@ -295,7 +295,7 @@ export const LaunchScreen = ({ onLaunch }: LaunchScreenProps) => { flashattention, noavx2, failsafe, - backend, + acceleration, lowvram, gpuDeviceSelection, gpuPlatform, @@ -333,7 +333,7 @@ export const LaunchScreen = ({ onLaunch }: LaunchScreenProps) => { flashattention, noavx2, failsafe, - backend, + acceleration, lowvram, gpuDeviceSelection, gpuPlatform, diff --git a/src/components/settings/BackendsTab.tsx b/src/components/settings/BackendsTab.tsx index 9fd6ae6..50f7b07 100644 --- a/src/components/settings/BackendsTab.tsx +++ b/src/components/settings/BackendsTab.tsx @@ -7,7 +7,6 @@ import { Loader, Center, Anchor, - Divider, } from '@mantine/core'; import { ExternalLink } from 'lucide-react'; import { DownloadCard } from '@/components/DownloadCard'; @@ -327,8 +326,6 @@ export const BackendsTab = () => { )} - - { const args: string[] = []; - const isGpuBackend = launchArgs.backend && launchArgs.backend !== 'cpu'; + const isGpuAcceleration = + launchArgs.acceleration && launchArgs.acceleration !== 'cpu'; - if (isGpuBackend) { + if (isGpuAcceleration) { if (launchArgs.autoGpuLayers && launchArgs.gpuLayers > 0) { args.push('--gpulayers', launchArgs.gpuLayers.toString()); } else if (!launchArgs.autoGpuLayers && launchArgs.gpuLayers > 0) { @@ -213,8 +214,8 @@ const buildBackendArgs = (launchArgs: LaunchArgs, platform: string) => { return args; } - if (!launchArgs.backend || launchArgs.backend === 'cpu') { - if (launchArgs.backend === 'cpu') { + if (!launchArgs.acceleration || launchArgs.acceleration === 'cpu') { + if (launchArgs.acceleration === 'cpu') { args.push('--usecpu'); } @@ -222,23 +223,26 @@ const buildBackendArgs = (launchArgs: LaunchArgs, platform: string) => { } const isTensorSplitSupported = - launchArgs.backend === 'cuda' || - launchArgs.backend === 'rocm' || - launchArgs.backend === 'vulkan'; + launchArgs.acceleration === 'cuda' || + launchArgs.acceleration === 'rocm' || + launchArgs.acceleration === 'vulkan'; - if (launchArgs.backend === 'cuda' || launchArgs.backend === 'rocm') { + if ( + launchArgs.acceleration === 'cuda' || + launchArgs.acceleration === 'rocm' + ) { args.push(...buildCudaArgs(launchArgs)); if (launchArgs.gpuDeviceSelection === 'all' && isTensorSplitSupported) { addTensorSplitArgs(args, launchArgs); } - } else if (launchArgs.backend === 'vulkan') { + } else if (launchArgs.acceleration === 'vulkan') { args.push(...buildVulkanArgs()); if (launchArgs.gpuDeviceSelection === 'all' && isTensorSplitSupported) { addTensorSplitArgs(args, launchArgs); } - } else if (launchArgs.backend === 'clblast') { + } else if (launchArgs.acceleration === 'clblast') { args.push(...buildClblastArgs(launchArgs)); } diff --git a/src/hooks/useWarnings.ts b/src/hooks/useWarnings.ts index 0127e6a..cc9e6f0 100644 --- a/src/hooks/useWarnings.ts +++ b/src/hooks/useWarnings.ts @@ -10,7 +10,7 @@ export interface Warning { interface UseWarningsProps { model: string; sdmodel: string; - backend?: string; + acceleration?: string; configLoaded?: boolean; } @@ -92,7 +92,7 @@ const checkGpuWarnings = async ( } warnings.push({ - type: 'warning', + type: 'info', message, }); } @@ -100,11 +100,13 @@ const checkGpuWarnings = async ( return warnings; }; -const checkVramWarnings = async (backend: string): Promise => { +const checkVramWarnings = async (acceleration: string): Promise => { const warnings: Warning[] = []; - const isGpuBackend = ['cuda', 'rocm', 'vulkan', 'clblast'].includes(backend); + const isGpuAcceleration = ['cuda', 'rocm', 'vulkan', 'clblast'].includes( + acceleration + ); - if (isGpuBackend) { + if (isGpuAcceleration) { const gpuMemoryInfo = await window.electronAPI.kobold.detectGPUMemory(); if (gpuMemoryInfo) { @@ -133,12 +135,12 @@ const checkVramWarnings = async (backend: string): Promise => { }; const checkCpuWarnings = ( - backend: string, + acceleration: string, availableAccelerations: AccelerationOption[] ) => { const warnings: Warning[] = []; - if (backend !== 'cpu') { + if (acceleration !== 'cpu') { return warnings; } @@ -157,7 +159,7 @@ const checkCpuWarnings = ( }; const checkBackendWarnings = async (params?: { - backend: string; + acceleration: string; cpuCapabilities: CPUCapabilities | null; availableAccelerations: AccelerationOption[]; }) => { @@ -181,13 +183,16 @@ const checkBackendWarnings = async (params?: { warnings.push(...gpuWarnings); if (params) { - const { backend, cpuCapabilities, availableAccelerations } = params; + const { acceleration, cpuCapabilities, availableAccelerations } = params; - const vramWarnings = await checkVramWarnings(backend); + const vramWarnings = await checkVramWarnings(acceleration); warnings.push(...vramWarnings); if (cpuCapabilities) { - const cpuWarnings = checkCpuWarnings(backend, availableAccelerations); + const cpuWarnings = checkCpuWarnings( + acceleration, + availableAccelerations + ); warnings.push(...cpuWarnings); } } @@ -198,7 +203,7 @@ const checkBackendWarnings = async (params?: { export const useWarnings = ({ model, sdmodel, - backend, + acceleration, configLoaded = false, }: UseWarningsProps) => { const [backendWarnings, setBackendWarnings] = useState([]); @@ -209,7 +214,7 @@ export const useWarnings = ({ ); const updateBackendWarnings = useCallback(async () => { - if (!backend) { + if (!acceleration) { setBackendWarnings([]); return; } @@ -220,13 +225,13 @@ export const useWarnings = ({ ]); const result = await checkBackendWarnings({ - backend, + acceleration, cpuCapabilities: cpuCapabilitiesResult, availableAccelerations, }); setBackendWarnings(result); - }, [backend]); + }, [acceleration]); useEffect(() => { // eslint-disable-next-line react-hooks/set-state-in-effect diff --git a/src/main/ipc.ts b/src/main/ipc.ts index d7022f6..f394d5e 100644 --- a/src/main/ipc.ts +++ b/src/main/ipc.ts @@ -1,7 +1,7 @@ import { ipcMain, app } from 'electron'; import { join } from 'path'; import { platform } from 'process'; -import type { Screen } from '@/types'; +import type { Screen, Acceleration } from '@/types'; import { stopKoboldCpp, launchKoboldCppWithCustomFrontends, @@ -182,13 +182,15 @@ export function setupIPCHandlers() { modelPath: string, contextSize: number, availableVramGB: number, - flashAttention: boolean + flashAttention: boolean, + acceleration: Acceleration ) => calculateOptimalGpuLayers({ modelPath, contextSize, availableVramGB, flashAttention, + acceleration, }) ); diff --git a/src/main/modules/koboldcpp/launcher/index.ts b/src/main/modules/koboldcpp/launcher/index.ts index 1a09f14..d0b5ec1 100644 --- a/src/main/modules/koboldcpp/launcher/index.ts +++ b/src/main/modules/koboldcpp/launcher/index.ts @@ -234,15 +234,17 @@ export async function launchKoboldCpp( const handleServerReady = () => { const isKoboldFrontend = frontendPreference === 'koboldcpp' || + frontendPreference === 'llamacpp' || (!isTextMode && imageGenerationFrontendPreference === 'builtin'); if (isKoboldFrontend) { sendToRenderer('server-ready'); } + readyResolve?.({ success: true, pid: child.pid }); }; - child.stdout?.on('data', (data) => { + const handleOutput = (data: Buffer) => { const output = data.toString(); const filtered = debugmode ? output : filterSpam(output); if (filtered.trim()) { @@ -254,21 +256,10 @@ export async function launchKoboldCpp( hasProcessStartedSuccessfully = true; handleServerReady(); } - }); + }; - child.stderr?.on('data', (data) => { - const output = data.toString(); - const filtered = debugmode ? output : filterSpam(output); - if (filtered.trim()) { - sendKoboldOutput(filtered, true); - } - - if (!isReady && output.includes(SERVER_READY_SIGNALS.KOBOLDCPP)) { - isReady = true; - hasProcessStartedSuccessfully = true; - handleServerReady(); - } - }); + child.stdout?.on('data', handleOutput); + child.stderr?.on('data', handleOutput); child.on('exit', (code, signal) => { const isCrash = signal !== null || (code !== null && code !== 0); diff --git a/src/main/modules/koboldcpp/model-download.ts b/src/main/modules/koboldcpp/model-download.ts index 69ec247..f377788 100644 --- a/src/main/modules/koboldcpp/model-download.ts +++ b/src/main/modules/koboldcpp/model-download.ts @@ -270,7 +270,7 @@ export async function resolveModelPath( const localPath = getModelLocalPath(urlOrPath, paramType); if (await pathExists(localPath)) { - sendKoboldOutput(`Using cached model at: ${localPath}\n`); + sendKoboldOutput(`Using cached model at: ${localPath}`); onProgress?.({ type: 'complete', localPath, @@ -278,14 +278,14 @@ export async function resolveModelPath( return localPath; } - sendKoboldOutput(`Downloading model from ${urlOrPath} to ${localPath}...\n`); + sendKoboldOutput(`Downloading model from ${urlOrPath} to ${localPath}...`); const progressCallback = onProgress || ((p: DownloadProgress) => p); try { await downloadFile(urlOrPath, localPath, progressCallback); - sendKoboldOutput(`Model downloaded successfully to: ${localPath}\n\n`); + sendKoboldOutput(`Model downloaded successfully to: ${localPath}\n`); progressCallback({ type: 'complete', localPath, diff --git a/src/preload/index.ts b/src/preload/index.ts index c330ee0..2a30f5b 100644 --- a/src/preload/index.ts +++ b/src/preload/index.ts @@ -63,14 +63,16 @@ const koboldAPI: KoboldAPI = { modelPath, contextSize, availableVramGB, - flashAttention + flashAttention, + acceleration ) => ipcRenderer.invoke( 'kobold:calculateOptimalLayers', modelPath, contextSize, availableVramGB, - flashAttention + flashAttention, + acceleration ), stopKoboldCpp: () => ipcRenderer.invoke('kobold:stopKoboldCpp'), onDownloadProgress: (callback) => { diff --git a/src/stores/launchConfig.ts b/src/stores/launchConfig.ts index 7974028..3c92dcb 100644 --- a/src/stores/launchConfig.ts +++ b/src/stores/launchConfig.ts @@ -1,5 +1,5 @@ import { create } from 'zustand'; -import type { ConfigFile, SdConvDirectMode } from '@/types'; +import type { Acceleration, ConfigFile, SdConvDirectMode } from '@/types'; import { IMAGE_MODEL_PRESETS } from '@/constants/imageModelPresets'; import { DEFAULT_AUTO_GPU_LAYERS, DEFAULT_CONTEXT_SIZE } from '@/constants'; @@ -25,7 +25,7 @@ interface LaunchConfigState { quantmatmul: boolean; usemmap: boolean; debugmode: boolean; - backend: string; + acceleration: Acceleration; gpuDeviceSelection: string; tensorSplit: string; gpuPlatform: number; @@ -65,7 +65,7 @@ interface LaunchConfigState { setUsemmap: (usemmap: boolean) => void; setDebugmode: (debugmode: boolean) => void; setPreLaunchCommands: (commands: string[]) => void; - setBackend: (backend: string) => void; + setAcceleration: (acceleration: Acceleration) => void; setGpuDeviceSelection: (selection: string) => void; setTensorSplit: (split: string) => void; setGpuPlatform: (platform: number) => void; @@ -125,7 +125,7 @@ export const useLaunchConfigStore = create((set, get) => ({ quantmatmul: true, usemmap: true, debugmode: false, - backend: '', + acceleration: '' as Acceleration, gpuDeviceSelection: '0', tensorSplit: '', gpuPlatform: 0, @@ -170,9 +170,9 @@ export const useLaunchConfigStore = create((set, get) => ({ setUsemmap: (usemmap) => set({ usemmap }), setDebugmode: (debugmode) => set({ debugmode }), setPreLaunchCommands: (commands) => set({ preLaunchCommands: commands }), - setBackend: (backend) => + setAcceleration: (acceleration) => set({ - backend, + acceleration, gpuDeviceSelection: '0', tensorSplit: '', }), @@ -331,7 +331,7 @@ export const useLaunchConfigStore = create((set, get) => ({ if (configData.usecuda === true) { const gpuInfo = await window.electronAPI.kobold.detectGPU(); - updates.backend = gpuInfo.hasNVIDIA ? 'cuda' : 'rocm'; + updates.acceleration = gpuInfo.hasNVIDIA ? 'cuda' : 'rocm'; if ( Array.isArray(configData.usecuda) && @@ -343,17 +343,17 @@ export const useLaunchConfigStore = create((set, get) => ({ updates.quantmatmul = mmqMode === 'mmq'; } } else if (configData.usevulkan === true) { - updates.backend = 'vulkan'; + updates.acceleration = 'vulkan'; } else if ( Array.isArray(configData.useclblast) && configData.useclblast.length === 2 ) { - updates.backend = 'clblast'; + updates.acceleration = 'clblast'; const [deviceIndex, platformIndex] = configData.useclblast; updates.gpuDeviceSelection = deviceIndex.toString(); updates.gpuPlatform = platformIndex; } else { - updates.backend = 'cpu'; + updates.acceleration = 'cpu'; } if (typeof configData.gpuDeviceSelection === 'string') { diff --git a/src/types/electron.d.ts b/src/types/electron.d.ts index fde9c99..75cb6e1 100644 --- a/src/types/electron.d.ts +++ b/src/types/electron.d.ts @@ -6,6 +6,7 @@ import type { SystemMemoryInfo, } from '@/types/hardware'; import type { + Acceleration, AccelerationOption, AccelerationSupport, Screen, @@ -170,7 +171,8 @@ export interface KoboldAPI { modelPath: string, contextSize: number, availableVramGB: number, - flashAttention: boolean + flashAttention: boolean, + acceleration: Acceleration ) => Promise; stopKoboldCpp: () => void; onDownloadProgress: (callback: (progress: number) => void) => () => void; diff --git a/src/types/index.d.ts b/src/types/index.d.ts index 0b5dedd..133e76b 100644 --- a/src/types/index.d.ts +++ b/src/types/index.d.ts @@ -103,6 +103,8 @@ export interface AccelerationSupport { cuda: boolean; } +export type Acceleration = keyof AccelerationSupport | 'cpu'; + export interface ModelAnalysis { general: { architecture: string; diff --git a/src/utils/node/vram.ts b/src/utils/node/vram.ts index 34866f2..804cbf2 100644 --- a/src/utils/node/vram.ts +++ b/src/utils/node/vram.ts @@ -1,30 +1,48 @@ import { gguf } from '@huggingface/gguf'; import { stat } from 'fs/promises'; +import type { Acceleration } from '@/types'; interface VramCalculationParams { modelPath: string; contextSize: number; availableVramGB: number; flashAttention?: boolean; + acceleration: Acceleration; +} + +function getAccelerationOverhead(acceleration: Acceleration) { + switch (acceleration) { + case 'cuda': + return { multiplier: 1.05, computeBufferGB: 0.2, headroomGB: 0.1 }; + case 'vulkan': + return { multiplier: 1.05, computeBufferGB: 0.2, headroomGB: 0.1 }; + case 'rocm': + return { multiplier: 1.15, computeBufferGB: 0.4, headroomGB: 0.2 }; + case 'clblast': + return { multiplier: 1.2, computeBufferGB: 0.5, headroomGB: 0.3 }; + // eslint-disable-next-line no-comments/disallowComments + // assuming metal on macOS which we refer to as "cpu" acceleration + case 'cpu': + return { multiplier: 1.05, computeBufferGB: 0.2, headroomGB: 0.1 }; + default: + return { multiplier: 1.1, computeBufferGB: 0.3, headroomGB: 0.15 }; + } } function estimateContextVram( contextSize: number, layers: number, - embeddingLength: number, + kvDim: number, flashAttention: boolean ) { const bytesPerElement = 2; - let kvCacheSizeBytes = - 2 * contextSize * layers * embeddingLength * bytesPerElement; + let kvCacheSizeBytes = 2 * contextSize * layers * kvDim * bytesPerElement; if (flashAttention) { kvCacheSizeBytes *= 0.5; } - const kvCacheSizeGB = kvCacheSizeBytes / 1024 ** 3; - - return kvCacheSizeGB; + return kvCacheSizeBytes / 1024 ** 3; } export async function calculateOptimalGpuLayers({ @@ -32,6 +50,7 @@ export async function calculateOptimalGpuLayers({ contextSize, availableVramGB, flashAttention = false, + acceleration, }: VramCalculationParams) { const isUrl = modelPath.startsWith('http://') || modelPath.startsWith('https://'); @@ -75,25 +94,26 @@ export async function calculateOptimalGpuLayers({ const headDim = embeddingLength / headCount; const kvDim = headCountKv * headDim; - const modelSizeGB = fileSize / 1024 ** 3; - const vramPerLayerGB = modelSizeGB / totalLayers; + const { multiplier, computeBufferGB, headroomGB } = + getAccelerationOverhead(acceleration); - const headroomGB = 0.1; - const availableForModel = availableVramGB - headroomGB; + const modelSizeGB = fileSize / 1024 ** 3; + const effectiveModelSizeGB = modelSizeGB * multiplier; + const vramPerLayerGB = effectiveModelSizeGB / totalLayers; + + const availableForModel = availableVramGB - computeBufferGB - headroomGB; let recommendedLayers = 0; - let modelVramGB = 0; - let contextVramGB = 0; for (let layers = 1; layers <= totalLayers; layers++) { - modelVramGB = layers * vramPerLayerGB; - contextVramGB = estimateContextVram( + const modelVram = layers * vramPerLayerGB; + const contextVram = estimateContextVram( contextSize, layers, kvDim, flashAttention ); - const totalVram = modelVramGB + contextVramGB; + const totalVram = modelVram + contextVram; if (totalVram <= availableForModel) { recommendedLayers = layers; @@ -102,21 +122,20 @@ export async function calculateOptimalGpuLayers({ } } - const finalContextVram = estimateContextVram( + const modelVramGB = recommendedLayers * vramPerLayerGB; + const contextVramGB = estimateContextVram( contextSize, recommendedLayers, kvDim, flashAttention ); - const estimatedVramUsageGB = - recommendedLayers * vramPerLayerGB + finalContextVram; return { recommendedLayers, totalLayers, - estimatedVramUsageGB, - modelVramGB: recommendedLayers * vramPerLayerGB, - contextVramGB: finalContextVram, + estimatedVramUsageGB: modelVramGB + contextVramGB + computeBufferGB, + modelVramGB, + contextVramGB, headroomGB, }; } diff --git a/yarn.lock b/yarn.lock index ec8b905..08e4b40 100644 --- a/yarn.lock +++ b/yarn.lock @@ -3747,8 +3747,8 @@ __metadata: lucide-react: "npm:^0.555.0" mime-types: "npm:^3.0.2" prettier: "npm:^3.7.4" - react: "npm:^19.2.0" - react-dom: "npm:^19.2.0" + react: "npm:^19.2.1" + react-dom: "npm:^19.2.1" react-error-boundary: "npm:^6.0.0" rollup-plugin-visualizer: "npm:^6.0.5" systeminformation: "npm:^5.27.11" @@ -5593,14 +5593,14 @@ __metadata: languageName: node linkType: hard -"react-dom@npm:^19.2.0": - version: 19.2.0 - resolution: "react-dom@npm:19.2.0" +"react-dom@npm:^19.2.1": + version: 19.2.1 + resolution: "react-dom@npm:19.2.1" dependencies: scheduler: "npm:^0.27.0" peerDependencies: - react: ^19.2.0 - checksum: 10c0/fa2cae05248d01288e91523b590ce4e7635b1e13f1344e225f850d722a8da037bf0782f63b1c1d46353334e0c696909b82e582f8cad607948fde6f7646cc18d9 + react: ^19.2.1 + checksum: 10c0/e56b6b3d72314df580ca800b70a69a21c6372703c8f45d9b5451ca6519faefb2496d76ffa9c5adb94136d2bbf2fd303d0dfc208a2cd77ede3132877471af9470 languageName: node linkType: hard @@ -5703,10 +5703,10 @@ __metadata: languageName: node linkType: hard -"react@npm:^19.2.0": - version: 19.2.0 - resolution: "react@npm:19.2.0" - checksum: 10c0/1b6d64eacb9324725bfe1e7860cb7a6b8a34bc89a482920765ebff5c10578eb487e6b46b2f0df263bd27a25edbdae2c45e5ea5d81ae61404301c1a7192c38330 +"react@npm:^19.2.1": + version: 19.2.1 + resolution: "react@npm:19.2.1" + checksum: 10c0/2b5eaf407abb3db84090434c20d6c5a8e447ab7abcd8fe9eaf1ddc299babcf31284ee9db7ea5671d21c85ac5298bd632fa1a7da1ed78d5b368a537f5e1cd5d62 languageName: node linkType: hard