From 80bb6d5e97a682cd2fd0aed017f366e6db2e4781 Mon Sep 17 00:00:00 2001
From: Egor <egor@philippov.ca>
Date: Wed, 3 Dec 2025 14:01:46 -0800
Subject: [PATCH] more accurate auto VRAM approximate, more code renames from
 backend -> acceleration

---
 .vscode/settings.json                         |  7 +--
 package.json                                  |  4 +-
 src/components/App/index.tsx                  |  6 +-
 src/components/ImportBackendLink.tsx          |  6 +-
 src/components/screens/Launch/AdvancedTab.tsx | 20 +++---
 .../GeneralTab/AccelerationSelector.tsx       | 31 +++++-----
 .../Launch/GeneralTab/GpuDeviceSelector.tsx   | 26 +++++---
 src/components/screens/Launch/index.tsx       | 24 ++++----
 src/components/settings/BackendsTab.tsx       |  3 -
 src/hooks/useLaunchLogic.ts                   | 26 ++++----
 src/hooks/useWarnings.ts                      | 35 ++++++-----
 src/main/ipc.ts                               |  6 +-
 src/main/modules/koboldcpp/launcher/index.ts  | 21 ++-----
 src/main/modules/koboldcpp/model-download.ts  |  6 +-
 src/preload/index.ts                          |  6 +-
 src/stores/launchConfig.ts                    | 20 +++---
 src/types/electron.d.ts                       |  4 +-
 src/types/index.d.ts                          |  2 +
 src/utils/node/vram.ts                        | 61 ++++++++++++-------
 yarn.lock                                     | 22 +++----
 20 files changed, 183 insertions(+), 153 deletions(-)
diff --git a/.vscode/settings.json b/.vscode/settings.json
index cbd6fe4..3584bb6 100644
--- a/.vscode/settings.json
+++ b/.vscode/settings.json
@@ -10,11 +10,8 @@
     "typescriptreact"
   ],
   "editor.formatOnSave": true,
-  "editor.defaultFormatter": "esbenp.prettier-vscode",
-  "[typescript]": {
+  "editor.defaultFormatter": "prettier.prettier-vscode",
+  "[typescriptreact]": {
     "editor.defaultFormatter": "prettier.prettier-vscode"
   },
-  "[typescriptreact]": {
-    "editor.defaultFormatter": "esbenp.prettier-vscode"
-  }
 }
diff --git a/package.json b/package.json
index 3b42541..04a42bd 100644
--- a/package.json
+++ b/package.json
@@ -79,8 +79,8 @@
     "execa": "^9.6.1",
     "lucide-react": "^0.555.0",
     "mime-types": "^3.0.2",
-    "react": "^19.2.0",
-    "react-dom": "^19.2.0",
+    "react": "^19.2.1",
+    "react-dom": "^19.2.1",
     "react-error-boundary": "^6.0.0",
     "systeminformation": "^5.27.11",
     "winston": "^3.18.3",
diff --git a/src/components/App/index.tsx b/src/components/App/index.tsx
index a3f6f98..a09bee9 100644
--- a/src/components/App/index.tsx
+++ b/src/components/App/index.tsx
@@ -63,8 +63,10 @@ export const App = () => {
 
   useEffect(() => {
     const cleanup = window.electronAPI.kobold.onServerReady(() => {
-      setIsServerReady(true);
-      setActiveInterfaceTab(defaultInterfaceTab);
+      setTimeout(() => {
+        setIsServerReady(true);
+        setActiveInterfaceTab(defaultInterfaceTab);
+      }, 1000);
     });
 
     return cleanup;
diff --git a/src/components/ImportBackendLink.tsx b/src/components/ImportBackendLink.tsx
index cc39f67..f130959 100644
--- a/src/components/ImportBackendLink.tsx
+++ b/src/components/ImportBackendLink.tsx
@@ -1,5 +1,5 @@
 import { useState } from 'react';
-import { Text, Anchor } from '@mantine/core';
+import { Text, Anchor, Box } from '@mantine/core';
 
 interface ImportBackendLinkProps {
   disabled?: boolean;
@@ -40,7 +40,7 @@ export const ImportBackendLink = ({
   };
 
   return (
-    <>
+    <Box mt="xs">
       {importError && (
         <Text size="sm" c="red" ta="center" mb="xs">
           {importError}
@@ -58,6 +58,6 @@ export const ImportBackendLink = ({
           {importing ? 'Importing...' : 'Select a local file'}
         </Anchor>
       </Text>
-    </>
+    </Box>
   );
 };
diff --git a/src/components/screens/Launch/AdvancedTab.tsx b/src/components/screens/Launch/AdvancedTab.tsx
index 895eacd..856bc3e 100644
--- a/src/components/screens/Launch/AdvancedTab.tsx
+++ b/src/components/screens/Launch/AdvancedTab.tsx
@@ -27,7 +27,7 @@ export const AdvancedTab = () => {
     quantmatmul,
     usemmap,
     debugmode,
-    backend,
+    acceleration,
     moecpu,
     moeexperts,
     setAdditionalArguments,
@@ -58,7 +58,7 @@ export const AdvancedTab = () => {
     setAdditionalArguments(updatedArgs);
   };
 
-  const isGpuBackend = backend === 'cuda' || backend === 'rocm';
+  const isGpuAcceleration = acceleration === 'cuda' || acceleration === 'rocm';
 
   useEffect(() => {
     const detectAccelerationSupport = async () => {
@@ -118,15 +118,15 @@ export const AdvancedTab = () => {
           />
 
           <CheckboxWithTooltip
-            checked={quantmatmul && isGpuBackend}
+            checked={quantmatmul && isGpuAcceleration}
             onChange={setQuantmatmul}
             label="QuantMatMul"
             tooltip={
-              !isGpuBackend
-                ? 'QuantMatMul is only available for CUDA and ROCm backends.'
+              !isGpuAcceleration
+                ? 'QuantMatMul is only available for CUDA and ROCm accelerations.'
                 : 'Enable MMQ mode to use finetuned kernels instead of default CuBLAS/HipBLAS for prompt processing.'
             }
-            disabled={!isGpuBackend}
+            disabled={!isGpuAcceleration}
           />
 
           <CheckboxWithTooltip
@@ -149,15 +149,15 @@ export const AdvancedTab = () => {
           />
 
           <CheckboxWithTooltip
-            checked={lowvram && isGpuBackend}
+            checked={lowvram && isGpuAcceleration}
             onChange={setLowvram}
             label="Low VRAM"
             tooltip={
-              !isGpuBackend
-                ? 'Low VRAM mode is only available for CUDA and ROCm backends.'
+              !isGpuAcceleration
+                ? 'Low VRAM mode is only available for CUDA and ROCm accelerations.'
                 : 'Avoid offloading KV Cache or scratch buffers to VRAM. Allows more layers to fit, but may result in a speed loss.'
             }
-            disabled={!isGpuBackend}
+            disabled={!isGpuAcceleration}
           />
 
           <CheckboxWithTooltip
diff --git a/src/components/screens/Launch/GeneralTab/AccelerationSelector.tsx b/src/components/screens/Launch/GeneralTab/AccelerationSelector.tsx
index 0d33256..50dbd12 100644
--- a/src/components/screens/Launch/GeneralTab/AccelerationSelector.tsx
+++ b/src/components/screens/Launch/GeneralTab/AccelerationSelector.tsx
@@ -4,19 +4,19 @@ import { InfoTooltip } from '@/components/InfoTooltip';
 import { AccelerationSelectItem } from '@/components/screens/Launch/GeneralTab/AccelerationSelectItem';
 import { GpuDeviceSelector } from '@/components/screens/Launch/GeneralTab/GpuDeviceSelector';
 import { useLaunchConfigStore } from '@/stores/launchConfig';
-import type { AccelerationOption } from '@/types';
+import type { Acceleration, AccelerationOption } from '@/types';
 import { Select } from '@/components/Select';
 
 export const AccelerationSelector = () => {
   const {
-    backend,
+    acceleration,
     gpuLayers,
     autoGpuLayers,
     model,
     contextSize,
     gpuDeviceSelection,
     flashattention,
-    setBackend,
+    setAcceleration,
     setGpuLayers,
     setAutoGpuLayers,
   } = useLaunchConfigStore();
@@ -57,9 +57,9 @@ export const AccelerationSelector = () => {
   }, []);
 
   useEffect(() => {
-    if (availableAccelerations.length > 0 && backend) {
+    if (availableAccelerations.length > 0 && acceleration) {
       const isAccelerationAvailable = availableAccelerations.some(
-        (a) => a.value === backend && !a.disabled
+        (a) => a.value === acceleration && !a.disabled
       );
 
       if (!isAccelerationAvailable) {
@@ -67,15 +67,15 @@ export const AccelerationSelector = () => {
           (a) => !a.disabled
         );
         if (fallbackAcceleration) {
-          setBackend(fallbackAcceleration.value);
+          setAcceleration(fallbackAcceleration.value as Acceleration);
         }
       }
     }
-  }, [availableAccelerations, backend, setBackend]);
+  }, [availableAccelerations, acceleration, setAcceleration]);
 
   useEffect(() => {
     const calculateLayers = async () => {
-      const isCpuOnly = backend === 'cpu' && !isMac;
+      const isCpuOnly = acceleration === 'cpu' && !isMac;
       if (
         !autoGpuLayers ||
         !model ||
@@ -118,7 +118,8 @@ export const AccelerationSelector = () => {
           model,
           contextSize,
           availableVramGB,
-          flashattention
+          flashattention,
+          acceleration
         );
 
         setGpuLayers(result.recommendedLayers);
@@ -137,7 +138,7 @@ export const AccelerationSelector = () => {
     autoGpuLayers,
     model,
     contextSize,
-    backend,
+    acceleration,
     gpuDeviceSelection,
     flashattention,
     isLoadingAccelerations,
@@ -163,14 +164,14 @@ export const AccelerationSelector = () => {
             }
             value={
               availableAccelerations.some(
-                (a) => a.value === backend && !a.disabled
+                (a) => a.value === acceleration && !a.disabled
               )
-                ? backend
+                ? acceleration
                 : null
             }
             onChange={(value) => {
               if (value) {
-                setBackend(value);
+                setAcceleration(value as Acceleration);
               }
             }}
             data={availableAccelerations.map((a) => ({
@@ -223,7 +224,7 @@ export const AccelerationSelector = () => {
               step={1}
               size="sm"
               w={80}
-              disabled={autoGpuLayers || (backend === 'cpu' && !isMac)}
+              disabled={autoGpuLayers || (acceleration === 'cpu' && !isMac)}
             />
             <Group gap="xs" align="center">
               <Checkbox
@@ -233,7 +234,7 @@ export const AccelerationSelector = () => {
                   setAutoGpuLayers(event.currentTarget.checked)
                 }
                 size="sm"
-                disabled={backend === 'cpu' && !isMac}
+                disabled={acceleration === 'cpu' && !isMac}
               />
               <InfoTooltip label="Automatically calculate optimal GPU layers based on available VRAM. The calculation accounts for model size, context size and flash attention." />
             </Group>
diff --git a/src/components/screens/Launch/GeneralTab/GpuDeviceSelector.tsx b/src/components/screens/Launch/GeneralTab/GpuDeviceSelector.tsx
index 6118f0a..046ce11 100644
--- a/src/components/screens/Launch/GeneralTab/GpuDeviceSelector.tsx
+++ b/src/components/screens/Launch/GeneralTab/GpuDeviceSelector.tsx
@@ -4,8 +4,8 @@ import { useLaunchConfigStore } from '@/stores/launchConfig';
 import { Select } from '@/components/Select';
 import type { AccelerationOption } from '@/types';
 
-const GPU_BACKENDS = ['cuda', 'rocm', 'vulkan', 'clblast'];
-const TENSOR_SPLIT_BACKENDS = ['cuda', 'rocm', 'vulkan'];
+const GPU_ACCELERATIONS = ['cuda', 'rocm', 'vulkan', 'clblast'];
+const TENSOR_SPLIT_ACCELERATIONS = ['cuda', 'rocm', 'vulkan'];
 
 interface GpuDeviceSelectorProps {
   availableAccelerations: AccelerationOption[];
@@ -15,7 +15,7 @@ export const GpuDeviceSelector = ({
   availableAccelerations,
 }: GpuDeviceSelectorProps) => {
   const {
-    backend,
+    acceleration,
     gpuDeviceSelection,
     tensorSplit,
     setGpuDeviceSelection,
@@ -23,13 +23,17 @@ export const GpuDeviceSelector = ({
   } = useLaunchConfigStore();
 
   const selectedAcceleration = availableAccelerations.find(
-    (a) => a.value === backend
+    (a) => a.value === acceleration
   );
-  const isGpu = GPU_BACKENDS.includes(backend);
+  const isGpuAcceleration = GPU_ACCELERATIONS.includes(acceleration);
 
   const getDiscreteDeviceCount = () => {
     if (!selectedAcceleration?.devices) return 0;
-    if (backend === 'clblast' || backend === 'vulkan' || backend === 'rocm') {
+    if (
+      acceleration === 'clblast' ||
+      acceleration === 'vulkan' ||
+      acceleration === 'rocm'
+    ) {
       return selectedAcceleration.devices.filter(
         (device) => typeof device === 'string' || !device.isIntegrated
       ).length;
@@ -39,24 +43,26 @@ export const GpuDeviceSelector = ({
 
   const hasMultipleDevices = getDiscreteDeviceCount() > 1;
   const showTensorSplit =
-    TENSOR_SPLIT_BACKENDS.includes(backend) &&
+    TENSOR_SPLIT_ACCELERATIONS.includes(acceleration) &&
     hasMultipleDevices &&
     gpuDeviceSelection === 'all';
 
-  if (!isGpu || !hasMultipleDevices) {
+  if (!isGpuAcceleration || !hasMultipleDevices) {
     return null;
   }
 
   const deviceOptions = (() => {
     if (!selectedAcceleration?.devices) return [];
 
-    if (backend === 'clblast') {
+    if (acceleration === 'clblast') {
       return selectedAcceleration.devices
         .map((device, index) => {
           if (typeof device === 'object' && device.isIntegrated) {
             return null;
           }
+
           const deviceName = typeof device === 'string' ? device : device.name;
+
           return {
             value: index.toString(),
             label: `GPU ${index}: ${deviceName}`,
@@ -67,7 +73,7 @@ export const GpuDeviceSelector = ({
         );
     }
 
-    if (backend === 'vulkan' || backend === 'rocm') {
+    if (acceleration === 'vulkan' || acceleration === 'rocm') {
       const discreteDeviceOptions = selectedAcceleration.devices
         .map((device, index) => {
           if (typeof device === 'object' && device.isIntegrated) {
diff --git a/src/components/screens/Launch/index.tsx b/src/components/screens/Launch/index.tsx
index 5d67ffb..e7fadd9 100644
--- a/src/components/screens/Launch/index.tsx
+++ b/src/components/screens/Launch/index.tsx
@@ -11,7 +11,7 @@ import { ImageGenerationTab } from '@/components/screens/Launch/ImageGenerationT
 import { WarningDisplay } from '@/components/WarningDisplay';
 import { ConfigFileManager } from '@/components/screens/Launch/ConfigFileManager';
 import { DEFAULT_MODEL_URL } from '@/constants';
-import type { ConfigFile } from '@/types';
+import type { Acceleration, ConfigFile } from '@/types';
 
 interface LaunchScreenProps {
   onLaunch: () => void;
@@ -47,7 +47,7 @@ export const LaunchScreen = ({ onLaunch }: LaunchScreenProps) => {
     quantmatmul,
     usemmap,
     debugmode,
-    backend,
+    acceleration,
     gpuDeviceSelection,
     gpuPlatform,
     tensorSplit,
@@ -66,7 +66,7 @@ export const LaunchScreen = ({ onLaunch }: LaunchScreenProps) => {
     parseAndApplyConfigFile,
     loadConfigFromFile,
     setModel,
-    setBackend,
+    setAcceleration,
   } = useLaunchConfigStore();
 
   const { isLaunching, handleLaunch } = useLaunchLogic({
@@ -78,7 +78,7 @@ export const LaunchScreen = ({ onLaunch }: LaunchScreenProps) => {
   const { warnings: combinedWarnings } = useWarnings({
     model,
     sdmodel,
-    backend,
+    acceleration,
     configLoaded,
   });
 
@@ -86,10 +86,10 @@ export const LaunchScreen = ({ onLaunch }: LaunchScreenProps) => {
     const accelerations =
       await window.electronAPI.kobold.getAvailableAccelerations();
 
-    if (!backend && accelerations && accelerations.length > 0) {
-      setBackend(accelerations[0].value);
+    if (!acceleration && accelerations && accelerations.length > 0) {
+      setAcceleration(accelerations[0].value as Acceleration);
     }
-  }, [backend, setBackend]);
+  }, [acceleration, setAcceleration]);
 
   const setInitialDefaults = useCallback(
     (currentModel: string, currentSdModel: string) => {
@@ -177,9 +177,9 @@ export const LaunchScreen = ({ onLaunch }: LaunchScreenProps) => {
     debugmode,
     moecpu,
     moeexperts,
-    usecuda: backend === 'cuda' || backend === 'rocm',
-    usevulkan: backend === 'vulkan',
-    useclblast: backend === 'clblast',
+    usecuda: acceleration === 'cuda' || acceleration === 'rocm',
+    usevulkan: acceleration === 'vulkan',
+    useclblast: acceleration === 'clblast',
     gpuDeviceSelection,
     tensorSplit,
     sdmodel,
@@ -295,7 +295,7 @@ export const LaunchScreen = ({ onLaunch }: LaunchScreenProps) => {
       flashattention,
       noavx2,
       failsafe,
-      backend,
+      acceleration,
       lowvram,
       gpuDeviceSelection,
       gpuPlatform,
@@ -333,7 +333,7 @@ export const LaunchScreen = ({ onLaunch }: LaunchScreenProps) => {
     flashattention,
     noavx2,
     failsafe,
-    backend,
+    acceleration,
     lowvram,
     gpuDeviceSelection,
     gpuPlatform,
diff --git a/src/components/settings/BackendsTab.tsx b/src/components/settings/BackendsTab.tsx
index 9fd6ae6..50f7b07 100644
--- a/src/components/settings/BackendsTab.tsx
+++ b/src/components/settings/BackendsTab.tsx
@@ -7,7 +7,6 @@ import {
   Loader,
   Center,
   Anchor,
-  Divider,
 } from '@mantine/core';
 import { ExternalLink } from 'lucide-react';
 import { DownloadCard } from '@/components/DownloadCard';
@@ -327,8 +326,6 @@ export const BackendsTab = () => {
         </Card>
       )}
 
-      <Divider my="md" />
-
       <ImportBackendLink
         disabled={isDisabled}
         onSuccess={loadInstalledBackends}
diff --git a/src/hooks/useLaunchLogic.ts b/src/hooks/useLaunchLogic.ts
index f066b6e..3573030 100644
--- a/src/hooks/useLaunchLogic.ts
+++ b/src/hooks/useLaunchLogic.ts
@@ -22,7 +22,7 @@ interface LaunchArgs {
   flashattention: boolean;
   noavx2: boolean;
   failsafe: boolean;
-  backend: string;
+  acceleration: string;
   lowvram: boolean;
   gpuDeviceSelection: string;
   gpuPlatform: number;
@@ -97,9 +97,10 @@ const buildModelArgs = (
 const buildConfigArgs = (isImageMode: boolean, launchArgs: LaunchArgs) => {
   const args: string[] = [];
 
-  const isGpuBackend = launchArgs.backend && launchArgs.backend !== 'cpu';
+  const isGpuAcceleration =
+    launchArgs.acceleration && launchArgs.acceleration !== 'cpu';
 
-  if (isGpuBackend) {
+  if (isGpuAcceleration) {
     if (launchArgs.autoGpuLayers && launchArgs.gpuLayers > 0) {
       args.push('--gpulayers', launchArgs.gpuLayers.toString());
     } else if (!launchArgs.autoGpuLayers && launchArgs.gpuLayers > 0) {
@@ -213,8 +214,8 @@ const buildBackendArgs = (launchArgs: LaunchArgs, platform: string) => {
     return args;
   }
 
-  if (!launchArgs.backend || launchArgs.backend === 'cpu') {
-    if (launchArgs.backend === 'cpu') {
+  if (!launchArgs.acceleration || launchArgs.acceleration === 'cpu') {
+    if (launchArgs.acceleration === 'cpu') {
       args.push('--usecpu');
     }
 
@@ -222,23 +223,26 @@ const buildBackendArgs = (launchArgs: LaunchArgs, platform: string) => {
   }
 
   const isTensorSplitSupported =
-    launchArgs.backend === 'cuda' ||
-    launchArgs.backend === 'rocm' ||
-    launchArgs.backend === 'vulkan';
+    launchArgs.acceleration === 'cuda' ||
+    launchArgs.acceleration === 'rocm' ||
+    launchArgs.acceleration === 'vulkan';
 
-  if (launchArgs.backend === 'cuda' || launchArgs.backend === 'rocm') {
+  if (
+    launchArgs.acceleration === 'cuda' ||
+    launchArgs.acceleration === 'rocm'
+  ) {
     args.push(...buildCudaArgs(launchArgs));
 
     if (launchArgs.gpuDeviceSelection === 'all' && isTensorSplitSupported) {
       addTensorSplitArgs(args, launchArgs);
     }
-  } else if (launchArgs.backend === 'vulkan') {
+  } else if (launchArgs.acceleration === 'vulkan') {
     args.push(...buildVulkanArgs());
 
     if (launchArgs.gpuDeviceSelection === 'all' && isTensorSplitSupported) {
       addTensorSplitArgs(args, launchArgs);
     }
-  } else if (launchArgs.backend === 'clblast') {
+  } else if (launchArgs.acceleration === 'clblast') {
     args.push(...buildClblastArgs(launchArgs));
   }
 
diff --git a/src/hooks/useWarnings.ts b/src/hooks/useWarnings.ts
index 0127e6a..cc9e6f0 100644
--- a/src/hooks/useWarnings.ts
+++ b/src/hooks/useWarnings.ts
@@ -10,7 +10,7 @@ export interface Warning {
 interface UseWarningsProps {
   model: string;
   sdmodel: string;
-  backend?: string;
+  acceleration?: string;
   configLoaded?: boolean;
 }
 
@@ -92,7 +92,7 @@ const checkGpuWarnings = async (
     }
 
     warnings.push({
-      type: 'warning',
+      type: 'info',
       message,
     });
   }
@@ -100,11 +100,13 @@ const checkGpuWarnings = async (
   return warnings;
 };
 
-const checkVramWarnings = async (backend: string): Promise<Warning[]> => {
+const checkVramWarnings = async (acceleration: string): Promise<Warning[]> => {
   const warnings: Warning[] = [];
-  const isGpuBackend = ['cuda', 'rocm', 'vulkan', 'clblast'].includes(backend);
+  const isGpuAcceleration = ['cuda', 'rocm', 'vulkan', 'clblast'].includes(
+    acceleration
+  );
 
-  if (isGpuBackend) {
+  if (isGpuAcceleration) {
     const gpuMemoryInfo = await window.electronAPI.kobold.detectGPUMemory();
 
     if (gpuMemoryInfo) {
@@ -133,12 +135,12 @@ const checkVramWarnings = async (backend: string): Promise<Warning[]> => {
 };
 
 const checkCpuWarnings = (
-  backend: string,
+  acceleration: string,
   availableAccelerations: AccelerationOption[]
 ) => {
   const warnings: Warning[] = [];
 
-  if (backend !== 'cpu') {
+  if (acceleration !== 'cpu') {
     return warnings;
   }
 
@@ -157,7 +159,7 @@ const checkCpuWarnings = (
 };
 
 const checkBackendWarnings = async (params?: {
-  backend: string;
+  acceleration: string;
   cpuCapabilities: CPUCapabilities | null;
   availableAccelerations: AccelerationOption[];
 }) => {
@@ -181,13 +183,16 @@ const checkBackendWarnings = async (params?: {
   warnings.push(...gpuWarnings);
 
   if (params) {
-    const { backend, cpuCapabilities, availableAccelerations } = params;
+    const { acceleration, cpuCapabilities, availableAccelerations } = params;
 
-    const vramWarnings = await checkVramWarnings(backend);
+    const vramWarnings = await checkVramWarnings(acceleration);
     warnings.push(...vramWarnings);
 
     if (cpuCapabilities) {
-      const cpuWarnings = checkCpuWarnings(backend, availableAccelerations);
+      const cpuWarnings = checkCpuWarnings(
+        acceleration,
+        availableAccelerations
+      );
       warnings.push(...cpuWarnings);
     }
   }
@@ -198,7 +203,7 @@ const checkBackendWarnings = async (params?: {
 export const useWarnings = ({
   model,
   sdmodel,
-  backend,
+  acceleration,
   configLoaded = false,
 }: UseWarningsProps) => {
   const [backendWarnings, setBackendWarnings] = useState<Warning[]>([]);
@@ -209,7 +214,7 @@ export const useWarnings = ({
   );
 
   const updateBackendWarnings = useCallback(async () => {
-    if (!backend) {
+    if (!acceleration) {
       setBackendWarnings([]);
       return;
     }
@@ -220,13 +225,13 @@ export const useWarnings = ({
     ]);
 
     const result = await checkBackendWarnings({
-      backend,
+      acceleration,
       cpuCapabilities: cpuCapabilitiesResult,
       availableAccelerations,
     });
 
     setBackendWarnings(result);
-  }, [backend]);
+  }, [acceleration]);
 
   useEffect(() => {
     // eslint-disable-next-line react-hooks/set-state-in-effect
diff --git a/src/main/ipc.ts b/src/main/ipc.ts
index d7022f6..f394d5e 100644
--- a/src/main/ipc.ts
+++ b/src/main/ipc.ts
@@ -1,7 +1,7 @@
 import { ipcMain, app } from 'electron';
 import { join } from 'path';
 import { platform } from 'process';
-import type { Screen } from '@/types';
+import type { Screen, Acceleration } from '@/types';
 import {
   stopKoboldCpp,
   launchKoboldCppWithCustomFrontends,
@@ -182,13 +182,15 @@ export function setupIPCHandlers() {
       modelPath: string,
       contextSize: number,
       availableVramGB: number,
-      flashAttention: boolean
+      flashAttention: boolean,
+      acceleration: Acceleration
     ) =>
       calculateOptimalGpuLayers({
         modelPath,
         contextSize,
         availableVramGB,
         flashAttention,
+        acceleration,
       })
   );
 
diff --git a/src/main/modules/koboldcpp/launcher/index.ts b/src/main/modules/koboldcpp/launcher/index.ts
index 1a09f14..d0b5ec1 100644
--- a/src/main/modules/koboldcpp/launcher/index.ts
+++ b/src/main/modules/koboldcpp/launcher/index.ts
@@ -234,15 +234,17 @@ export async function launchKoboldCpp(
     const handleServerReady = () => {
       const isKoboldFrontend =
         frontendPreference === 'koboldcpp' ||
+        frontendPreference === 'llamacpp' ||
         (!isTextMode && imageGenerationFrontendPreference === 'builtin');
 
       if (isKoboldFrontend) {
         sendToRenderer('server-ready');
       }
+
       readyResolve?.({ success: true, pid: child.pid });
     };
 
-    child.stdout?.on('data', (data) => {
+    const handleOutput = (data: Buffer) => {
       const output = data.toString();
       const filtered = debugmode ? output : filterSpam(output);
       if (filtered.trim()) {
@@ -254,21 +256,10 @@ export async function launchKoboldCpp(
         hasProcessStartedSuccessfully = true;
         handleServerReady();
       }
-    });
+    };
 
-    child.stderr?.on('data', (data) => {
-      const output = data.toString();
-      const filtered = debugmode ? output : filterSpam(output);
-      if (filtered.trim()) {
-        sendKoboldOutput(filtered, true);
-      }
-
-      if (!isReady && output.includes(SERVER_READY_SIGNALS.KOBOLDCPP)) {
-        isReady = true;
-        hasProcessStartedSuccessfully = true;
-        handleServerReady();
-      }
-    });
+    child.stdout?.on('data', handleOutput);
+    child.stderr?.on('data', handleOutput);
 
     child.on('exit', (code, signal) => {
       const isCrash = signal !== null || (code !== null && code !== 0);
diff --git a/src/main/modules/koboldcpp/model-download.ts b/src/main/modules/koboldcpp/model-download.ts
index 69ec247..f377788 100644
--- a/src/main/modules/koboldcpp/model-download.ts
+++ b/src/main/modules/koboldcpp/model-download.ts
@@ -270,7 +270,7 @@ export async function resolveModelPath(
   const localPath = getModelLocalPath(urlOrPath, paramType);
 
   if (await pathExists(localPath)) {
-    sendKoboldOutput(`Using cached model at: ${localPath}\n`);
+    sendKoboldOutput(`Using cached model at: ${localPath}`);
     onProgress?.({
       type: 'complete',
       localPath,
@@ -278,14 +278,14 @@ export async function resolveModelPath(
     return localPath;
   }
 
-  sendKoboldOutput(`Downloading model from ${urlOrPath} to ${localPath}...\n`);
+  sendKoboldOutput(`Downloading model from ${urlOrPath} to ${localPath}...`);
 
   const progressCallback = onProgress || ((p: DownloadProgress) => p);
 
   try {
     await downloadFile(urlOrPath, localPath, progressCallback);
 
-    sendKoboldOutput(`Model downloaded successfully to: ${localPath}\n\n`);
+    sendKoboldOutput(`Model downloaded successfully to: ${localPath}\n`);
     progressCallback({
       type: 'complete',
       localPath,
diff --git a/src/preload/index.ts b/src/preload/index.ts
index c330ee0..2a30f5b 100644
--- a/src/preload/index.ts
+++ b/src/preload/index.ts
@@ -63,14 +63,16 @@ const koboldAPI: KoboldAPI = {
     modelPath,
     contextSize,
     availableVramGB,
-    flashAttention
+    flashAttention,
+    acceleration
   ) =>
     ipcRenderer.invoke(
       'kobold:calculateOptimalLayers',
       modelPath,
       contextSize,
       availableVramGB,
-      flashAttention
+      flashAttention,
+      acceleration
     ),
   stopKoboldCpp: () => ipcRenderer.invoke('kobold:stopKoboldCpp'),
   onDownloadProgress: (callback) => {
diff --git a/src/stores/launchConfig.ts b/src/stores/launchConfig.ts
index 7974028..3c92dcb 100644
--- a/src/stores/launchConfig.ts
+++ b/src/stores/launchConfig.ts
@@ -1,5 +1,5 @@
 import { create } from 'zustand';
-import type { ConfigFile, SdConvDirectMode } from '@/types';
+import type { Acceleration, ConfigFile, SdConvDirectMode } from '@/types';
 import { IMAGE_MODEL_PRESETS } from '@/constants/imageModelPresets';
 import { DEFAULT_AUTO_GPU_LAYERS, DEFAULT_CONTEXT_SIZE } from '@/constants';
 
@@ -25,7 +25,7 @@ interface LaunchConfigState {
   quantmatmul: boolean;
   usemmap: boolean;
   debugmode: boolean;
-  backend: string;
+  acceleration: Acceleration;
   gpuDeviceSelection: string;
   tensorSplit: string;
   gpuPlatform: number;
@@ -65,7 +65,7 @@ interface LaunchConfigState {
   setUsemmap: (usemmap: boolean) => void;
   setDebugmode: (debugmode: boolean) => void;
   setPreLaunchCommands: (commands: string[]) => void;
-  setBackend: (backend: string) => void;
+  setAcceleration: (acceleration: Acceleration) => void;
   setGpuDeviceSelection: (selection: string) => void;
   setTensorSplit: (split: string) => void;
   setGpuPlatform: (platform: number) => void;
@@ -125,7 +125,7 @@ export const useLaunchConfigStore = create<LaunchConfigState>((set, get) => ({
   quantmatmul: true,
   usemmap: true,
   debugmode: false,
-  backend: '',
+  acceleration: '' as Acceleration,
   gpuDeviceSelection: '0',
   tensorSplit: '',
   gpuPlatform: 0,
@@ -170,9 +170,9 @@ export const useLaunchConfigStore = create<LaunchConfigState>((set, get) => ({
   setUsemmap: (usemmap) => set({ usemmap }),
   setDebugmode: (debugmode) => set({ debugmode }),
   setPreLaunchCommands: (commands) => set({ preLaunchCommands: commands }),
-  setBackend: (backend) =>
+  setAcceleration: (acceleration) =>
     set({
-      backend,
+      acceleration,
       gpuDeviceSelection: '0',
       tensorSplit: '',
     }),
@@ -331,7 +331,7 @@ export const useLaunchConfigStore = create<LaunchConfigState>((set, get) => ({
 
       if (configData.usecuda === true) {
         const gpuInfo = await window.electronAPI.kobold.detectGPU();
-        updates.backend = gpuInfo.hasNVIDIA ? 'cuda' : 'rocm';
+        updates.acceleration = gpuInfo.hasNVIDIA ? 'cuda' : 'rocm';
 
         if (
           Array.isArray(configData.usecuda) &&
@@ -343,17 +343,17 @@ export const useLaunchConfigStore = create<LaunchConfigState>((set, get) => ({
           updates.quantmatmul = mmqMode === 'mmq';
         }
       } else if (configData.usevulkan === true) {
-        updates.backend = 'vulkan';
+        updates.acceleration = 'vulkan';
       } else if (
         Array.isArray(configData.useclblast) &&
         configData.useclblast.length === 2
       ) {
-        updates.backend = 'clblast';
+        updates.acceleration = 'clblast';
         const [deviceIndex, platformIndex] = configData.useclblast;
         updates.gpuDeviceSelection = deviceIndex.toString();
         updates.gpuPlatform = platformIndex;
       } else {
-        updates.backend = 'cpu';
+        updates.acceleration = 'cpu';
       }
 
       if (typeof configData.gpuDeviceSelection === 'string') {
diff --git a/src/types/electron.d.ts b/src/types/electron.d.ts
index fde9c99..75cb6e1 100644
--- a/src/types/electron.d.ts
+++ b/src/types/electron.d.ts
@@ -6,6 +6,7 @@ import type {
   SystemMemoryInfo,
 } from '@/types/hardware';
 import type {
+  Acceleration,
   AccelerationOption,
   AccelerationSupport,
   Screen,
@@ -170,7 +171,8 @@ export interface KoboldAPI {
     modelPath: string,
     contextSize: number,
     availableVramGB: number,
-    flashAttention: boolean
+    flashAttention: boolean,
+    acceleration: Acceleration
   ) => Promise<OptimalLayersResult>;
   stopKoboldCpp: () => void;
   onDownloadProgress: (callback: (progress: number) => void) => () => void;
diff --git a/src/types/index.d.ts b/src/types/index.d.ts
index 0b5dedd..133e76b 100644
--- a/src/types/index.d.ts
+++ b/src/types/index.d.ts
@@ -103,6 +103,8 @@ export interface AccelerationSupport {
   cuda: boolean;
 }
 
+export type Acceleration = keyof AccelerationSupport | 'cpu';
+
 export interface ModelAnalysis {
   general: {
     architecture: string;
diff --git a/src/utils/node/vram.ts b/src/utils/node/vram.ts
index 34866f2..804cbf2 100644
--- a/src/utils/node/vram.ts
+++ b/src/utils/node/vram.ts
@@ -1,30 +1,48 @@
 import { gguf } from '@huggingface/gguf';
 import { stat } from 'fs/promises';
+import type { Acceleration } from '@/types';
 
 interface VramCalculationParams {
   modelPath: string;
   contextSize: number;
   availableVramGB: number;
   flashAttention?: boolean;
+  acceleration: Acceleration;
+}
+
+function getAccelerationOverhead(acceleration: Acceleration) {
+  switch (acceleration) {
+    case 'cuda':
+      return { multiplier: 1.05, computeBufferGB: 0.2, headroomGB: 0.1 };
+    case 'vulkan':
+      return { multiplier: 1.05, computeBufferGB: 0.2, headroomGB: 0.1 };
+    case 'rocm':
+      return { multiplier: 1.15, computeBufferGB: 0.4, headroomGB: 0.2 };
+    case 'clblast':
+      return { multiplier: 1.2, computeBufferGB: 0.5, headroomGB: 0.3 };
+    // eslint-disable-next-line no-comments/disallowComments
+    // assuming metal on macOS which we refer to as "cpu" acceleration
+    case 'cpu':
+      return { multiplier: 1.05, computeBufferGB: 0.2, headroomGB: 0.1 };
+    default:
+      return { multiplier: 1.1, computeBufferGB: 0.3, headroomGB: 0.15 };
+  }
 }
 
 function estimateContextVram(
   contextSize: number,
   layers: number,
-  embeddingLength: number,
+  kvDim: number,
   flashAttention: boolean
 ) {
   const bytesPerElement = 2;
-  let kvCacheSizeBytes =
-    2 * contextSize * layers * embeddingLength * bytesPerElement;
+  let kvCacheSizeBytes = 2 * contextSize * layers * kvDim * bytesPerElement;
 
   if (flashAttention) {
     kvCacheSizeBytes *= 0.5;
   }
 
-  const kvCacheSizeGB = kvCacheSizeBytes / 1024 ** 3;
-
-  return kvCacheSizeGB;
+  return kvCacheSizeBytes / 1024 ** 3;
 }
 
 export async function calculateOptimalGpuLayers({
@@ -32,6 +50,7 @@ export async function calculateOptimalGpuLayers({
   contextSize,
   availableVramGB,
   flashAttention = false,
+  acceleration,
 }: VramCalculationParams) {
   const isUrl =
     modelPath.startsWith('http://') || modelPath.startsWith('https://');
@@ -75,25 +94,26 @@ export async function calculateOptimalGpuLayers({
   const headDim = embeddingLength / headCount;
   const kvDim = headCountKv * headDim;
 
-  const modelSizeGB = fileSize / 1024 ** 3;
-  const vramPerLayerGB = modelSizeGB / totalLayers;
+  const { multiplier, computeBufferGB, headroomGB } =
+    getAccelerationOverhead(acceleration);
 
-  const headroomGB = 0.1;
-  const availableForModel = availableVramGB - headroomGB;
+  const modelSizeGB = fileSize / 1024 ** 3;
+  const effectiveModelSizeGB = modelSizeGB * multiplier;
+  const vramPerLayerGB = effectiveModelSizeGB / totalLayers;
+
+  const availableForModel = availableVramGB - computeBufferGB - headroomGB;
 
   let recommendedLayers = 0;
-  let modelVramGB = 0;
-  let contextVramGB = 0;
 
   for (let layers = 1; layers <= totalLayers; layers++) {
-    modelVramGB = layers * vramPerLayerGB;
-    contextVramGB = estimateContextVram(
+    const modelVram = layers * vramPerLayerGB;
+    const contextVram = estimateContextVram(
       contextSize,
       layers,
       kvDim,
       flashAttention
     );
-    const totalVram = modelVramGB + contextVramGB;
+    const totalVram = modelVram + contextVram;
 
     if (totalVram <= availableForModel) {
       recommendedLayers = layers;
@@ -102,21 +122,20 @@ export async function calculateOptimalGpuLayers({
     }
   }
 
-  const finalContextVram = estimateContextVram(
+  const modelVramGB = recommendedLayers * vramPerLayerGB;
+  const contextVramGB = estimateContextVram(
     contextSize,
     recommendedLayers,
     kvDim,
     flashAttention
   );
-  const estimatedVramUsageGB =
-    recommendedLayers * vramPerLayerGB + finalContextVram;
 
   return {
     recommendedLayers,
     totalLayers,
-    estimatedVramUsageGB,
-    modelVramGB: recommendedLayers * vramPerLayerGB,
-    contextVramGB: finalContextVram,
+    estimatedVramUsageGB: modelVramGB + contextVramGB + computeBufferGB,
+    modelVramGB,
+    contextVramGB,
     headroomGB,
   };
 }
diff --git a/yarn.lock b/yarn.lock
index ec8b905..08e4b40 100644
--- a/yarn.lock
+++ b/yarn.lock
@@ -3747,8 +3747,8 @@ __metadata:
     lucide-react: "npm:^0.555.0"
     mime-types: "npm:^3.0.2"
     prettier: "npm:^3.7.4"
-    react: "npm:^19.2.0"
-    react-dom: "npm:^19.2.0"
+    react: "npm:^19.2.1"
+    react-dom: "npm:^19.2.1"
     react-error-boundary: "npm:^6.0.0"
     rollup-plugin-visualizer: "npm:^6.0.5"
     systeminformation: "npm:^5.27.11"
@@ -5593,14 +5593,14 @@ __metadata:
   languageName: node
   linkType: hard
 
-"react-dom@npm:^19.2.0":
-  version: 19.2.0
-  resolution: "react-dom@npm:19.2.0"
+"react-dom@npm:^19.2.1":
+  version: 19.2.1
+  resolution: "react-dom@npm:19.2.1"
   dependencies:
     scheduler: "npm:^0.27.0"
   peerDependencies:
-    react: ^19.2.0
-  checksum: 10c0/fa2cae05248d01288e91523b590ce4e7635b1e13f1344e225f850d722a8da037bf0782f63b1c1d46353334e0c696909b82e582f8cad607948fde6f7646cc18d9
+    react: ^19.2.1
+  checksum: 10c0/e56b6b3d72314df580ca800b70a69a21c6372703c8f45d9b5451ca6519faefb2496d76ffa9c5adb94136d2bbf2fd303d0dfc208a2cd77ede3132877471af9470
   languageName: node
   linkType: hard
 
@@ -5703,10 +5703,10 @@ __metadata:
   languageName: node
   linkType: hard
 
-"react@npm:^19.2.0":
-  version: 19.2.0
-  resolution: "react@npm:19.2.0"
-  checksum: 10c0/1b6d64eacb9324725bfe1e7860cb7a6b8a34bc89a482920765ebff5c10578eb487e6b46b2f0df263bd27a25edbdae2c45e5ea5d81ae61404301c1a7192c38330
+"react@npm:^19.2.1":
+  version: 19.2.1
+  resolution: "react@npm:19.2.1"
+  checksum: 10c0/2b5eaf407abb3db84090434c20d6c5a8e447ab7abcd8fe9eaf1ddc299babcf31284ee9db7ea5671d21c85ac5298bd632fa1a7da1ed78d5b368a537f5e1cd5d62
   languageName: node
   linkType: hard