diff --git a/.vscode/settings.json b/.vscode/settings.json
index cbd6fe4..3584bb6 100644
--- a/.vscode/settings.json
+++ b/.vscode/settings.json
@@ -10,11 +10,8 @@
"typescriptreact"
],
"editor.formatOnSave": true,
- "editor.defaultFormatter": "esbenp.prettier-vscode",
- "[typescript]": {
+ "editor.defaultFormatter": "prettier.prettier-vscode",
+ "[typescriptreact]": {
"editor.defaultFormatter": "prettier.prettier-vscode"
},
- "[typescriptreact]": {
- "editor.defaultFormatter": "esbenp.prettier-vscode"
- }
}
diff --git a/package.json b/package.json
index 3b42541..04a42bd 100644
--- a/package.json
+++ b/package.json
@@ -79,8 +79,8 @@
"execa": "^9.6.1",
"lucide-react": "^0.555.0",
"mime-types": "^3.0.2",
- "react": "^19.2.0",
- "react-dom": "^19.2.0",
+ "react": "^19.2.1",
+ "react-dom": "^19.2.1",
"react-error-boundary": "^6.0.0",
"systeminformation": "^5.27.11",
"winston": "^3.18.3",
diff --git a/src/components/App/index.tsx b/src/components/App/index.tsx
index a3f6f98..a09bee9 100644
--- a/src/components/App/index.tsx
+++ b/src/components/App/index.tsx
@@ -63,8 +63,10 @@ export const App = () => {
useEffect(() => {
const cleanup = window.electronAPI.kobold.onServerReady(() => {
- setIsServerReady(true);
- setActiveInterfaceTab(defaultInterfaceTab);
+ setTimeout(() => {
+ setIsServerReady(true);
+ setActiveInterfaceTab(defaultInterfaceTab);
+ }, 1000);
});
return cleanup;
diff --git a/src/components/ImportBackendLink.tsx b/src/components/ImportBackendLink.tsx
index cc39f67..f130959 100644
--- a/src/components/ImportBackendLink.tsx
+++ b/src/components/ImportBackendLink.tsx
@@ -1,5 +1,5 @@
import { useState } from 'react';
-import { Text, Anchor } from '@mantine/core';
+import { Text, Anchor, Box } from '@mantine/core';
interface ImportBackendLinkProps {
disabled?: boolean;
@@ -40,7 +40,7 @@ export const ImportBackendLink = ({
};
return (
- <>
+
{importError && (
{importError}
@@ -58,6 +58,6 @@ export const ImportBackendLink = ({
{importing ? 'Importing...' : 'Select a local file'}
- >
+
);
};
diff --git a/src/components/screens/Launch/AdvancedTab.tsx b/src/components/screens/Launch/AdvancedTab.tsx
index 895eacd..856bc3e 100644
--- a/src/components/screens/Launch/AdvancedTab.tsx
+++ b/src/components/screens/Launch/AdvancedTab.tsx
@@ -27,7 +27,7 @@ export const AdvancedTab = () => {
quantmatmul,
usemmap,
debugmode,
- backend,
+ acceleration,
moecpu,
moeexperts,
setAdditionalArguments,
@@ -58,7 +58,7 @@ export const AdvancedTab = () => {
setAdditionalArguments(updatedArgs);
};
- const isGpuBackend = backend === 'cuda' || backend === 'rocm';
+ const isGpuAcceleration = acceleration === 'cuda' || acceleration === 'rocm';
useEffect(() => {
const detectAccelerationSupport = async () => {
@@ -118,15 +118,15 @@ export const AdvancedTab = () => {
/>
{
/>
{
const {
- backend,
+ acceleration,
gpuLayers,
autoGpuLayers,
model,
contextSize,
gpuDeviceSelection,
flashattention,
- setBackend,
+ setAcceleration,
setGpuLayers,
setAutoGpuLayers,
} = useLaunchConfigStore();
@@ -57,9 +57,9 @@ export const AccelerationSelector = () => {
}, []);
useEffect(() => {
- if (availableAccelerations.length > 0 && backend) {
+ if (availableAccelerations.length > 0 && acceleration) {
const isAccelerationAvailable = availableAccelerations.some(
- (a) => a.value === backend && !a.disabled
+ (a) => a.value === acceleration && !a.disabled
);
if (!isAccelerationAvailable) {
@@ -67,15 +67,15 @@ export const AccelerationSelector = () => {
(a) => !a.disabled
);
if (fallbackAcceleration) {
- setBackend(fallbackAcceleration.value);
+ setAcceleration(fallbackAcceleration.value as Acceleration);
}
}
}
- }, [availableAccelerations, backend, setBackend]);
+ }, [availableAccelerations, acceleration, setAcceleration]);
useEffect(() => {
const calculateLayers = async () => {
- const isCpuOnly = backend === 'cpu' && !isMac;
+ const isCpuOnly = acceleration === 'cpu' && !isMac;
if (
!autoGpuLayers ||
!model ||
@@ -118,7 +118,8 @@ export const AccelerationSelector = () => {
model,
contextSize,
availableVramGB,
- flashattention
+ flashattention,
+ acceleration
);
setGpuLayers(result.recommendedLayers);
@@ -137,7 +138,7 @@ export const AccelerationSelector = () => {
autoGpuLayers,
model,
contextSize,
- backend,
+ acceleration,
gpuDeviceSelection,
flashattention,
isLoadingAccelerations,
@@ -163,14 +164,14 @@ export const AccelerationSelector = () => {
}
value={
availableAccelerations.some(
- (a) => a.value === backend && !a.disabled
+ (a) => a.value === acceleration && !a.disabled
)
- ? backend
+ ? acceleration
: null
}
onChange={(value) => {
if (value) {
- setBackend(value);
+ setAcceleration(value as Acceleration);
}
}}
data={availableAccelerations.map((a) => ({
@@ -223,7 +224,7 @@ export const AccelerationSelector = () => {
step={1}
size="sm"
w={80}
- disabled={autoGpuLayers || (backend === 'cpu' && !isMac)}
+ disabled={autoGpuLayers || (acceleration === 'cpu' && !isMac)}
/>
{
setAutoGpuLayers(event.currentTarget.checked)
}
size="sm"
- disabled={backend === 'cpu' && !isMac}
+ disabled={acceleration === 'cpu' && !isMac}
/>
diff --git a/src/components/screens/Launch/GeneralTab/GpuDeviceSelector.tsx b/src/components/screens/Launch/GeneralTab/GpuDeviceSelector.tsx
index 6118f0a..046ce11 100644
--- a/src/components/screens/Launch/GeneralTab/GpuDeviceSelector.tsx
+++ b/src/components/screens/Launch/GeneralTab/GpuDeviceSelector.tsx
@@ -4,8 +4,8 @@ import { useLaunchConfigStore } from '@/stores/launchConfig';
import { Select } from '@/components/Select';
import type { AccelerationOption } from '@/types';
-const GPU_BACKENDS = ['cuda', 'rocm', 'vulkan', 'clblast'];
-const TENSOR_SPLIT_BACKENDS = ['cuda', 'rocm', 'vulkan'];
+const GPU_ACCELERATIONS = ['cuda', 'rocm', 'vulkan', 'clblast'];
+const TENSOR_SPLIT_ACCELERATIONS = ['cuda', 'rocm', 'vulkan'];
interface GpuDeviceSelectorProps {
availableAccelerations: AccelerationOption[];
@@ -15,7 +15,7 @@ export const GpuDeviceSelector = ({
availableAccelerations,
}: GpuDeviceSelectorProps) => {
const {
- backend,
+ acceleration,
gpuDeviceSelection,
tensorSplit,
setGpuDeviceSelection,
@@ -23,13 +23,17 @@ export const GpuDeviceSelector = ({
} = useLaunchConfigStore();
const selectedAcceleration = availableAccelerations.find(
- (a) => a.value === backend
+ (a) => a.value === acceleration
);
- const isGpu = GPU_BACKENDS.includes(backend);
+ const isGpuAcceleration = GPU_ACCELERATIONS.includes(acceleration);
const getDiscreteDeviceCount = () => {
if (!selectedAcceleration?.devices) return 0;
- if (backend === 'clblast' || backend === 'vulkan' || backend === 'rocm') {
+ if (
+ acceleration === 'clblast' ||
+ acceleration === 'vulkan' ||
+ acceleration === 'rocm'
+ ) {
return selectedAcceleration.devices.filter(
(device) => typeof device === 'string' || !device.isIntegrated
).length;
@@ -39,24 +43,26 @@ export const GpuDeviceSelector = ({
const hasMultipleDevices = getDiscreteDeviceCount() > 1;
const showTensorSplit =
- TENSOR_SPLIT_BACKENDS.includes(backend) &&
+ TENSOR_SPLIT_ACCELERATIONS.includes(acceleration) &&
hasMultipleDevices &&
gpuDeviceSelection === 'all';
- if (!isGpu || !hasMultipleDevices) {
+ if (!isGpuAcceleration || !hasMultipleDevices) {
return null;
}
const deviceOptions = (() => {
if (!selectedAcceleration?.devices) return [];
- if (backend === 'clblast') {
+ if (acceleration === 'clblast') {
return selectedAcceleration.devices
.map((device, index) => {
if (typeof device === 'object' && device.isIntegrated) {
return null;
}
+
const deviceName = typeof device === 'string' ? device : device.name;
+
return {
value: index.toString(),
label: `GPU ${index}: ${deviceName}`,
@@ -67,7 +73,7 @@ export const GpuDeviceSelector = ({
);
}
- if (backend === 'vulkan' || backend === 'rocm') {
+ if (acceleration === 'vulkan' || acceleration === 'rocm') {
const discreteDeviceOptions = selectedAcceleration.devices
.map((device, index) => {
if (typeof device === 'object' && device.isIntegrated) {
diff --git a/src/components/screens/Launch/index.tsx b/src/components/screens/Launch/index.tsx
index 5d67ffb..e7fadd9 100644
--- a/src/components/screens/Launch/index.tsx
+++ b/src/components/screens/Launch/index.tsx
@@ -11,7 +11,7 @@ import { ImageGenerationTab } from '@/components/screens/Launch/ImageGenerationT
import { WarningDisplay } from '@/components/WarningDisplay';
import { ConfigFileManager } from '@/components/screens/Launch/ConfigFileManager';
import { DEFAULT_MODEL_URL } from '@/constants';
-import type { ConfigFile } from '@/types';
+import type { Acceleration, ConfigFile } from '@/types';
interface LaunchScreenProps {
onLaunch: () => void;
@@ -47,7 +47,7 @@ export const LaunchScreen = ({ onLaunch }: LaunchScreenProps) => {
quantmatmul,
usemmap,
debugmode,
- backend,
+ acceleration,
gpuDeviceSelection,
gpuPlatform,
tensorSplit,
@@ -66,7 +66,7 @@ export const LaunchScreen = ({ onLaunch }: LaunchScreenProps) => {
parseAndApplyConfigFile,
loadConfigFromFile,
setModel,
- setBackend,
+ setAcceleration,
} = useLaunchConfigStore();
const { isLaunching, handleLaunch } = useLaunchLogic({
@@ -78,7 +78,7 @@ export const LaunchScreen = ({ onLaunch }: LaunchScreenProps) => {
const { warnings: combinedWarnings } = useWarnings({
model,
sdmodel,
- backend,
+ acceleration,
configLoaded,
});
@@ -86,10 +86,10 @@ export const LaunchScreen = ({ onLaunch }: LaunchScreenProps) => {
const accelerations =
await window.electronAPI.kobold.getAvailableAccelerations();
- if (!backend && accelerations && accelerations.length > 0) {
- setBackend(accelerations[0].value);
+ if (!acceleration && accelerations && accelerations.length > 0) {
+ setAcceleration(accelerations[0].value as Acceleration);
}
- }, [backend, setBackend]);
+ }, [acceleration, setAcceleration]);
const setInitialDefaults = useCallback(
(currentModel: string, currentSdModel: string) => {
@@ -177,9 +177,9 @@ export const LaunchScreen = ({ onLaunch }: LaunchScreenProps) => {
debugmode,
moecpu,
moeexperts,
- usecuda: backend === 'cuda' || backend === 'rocm',
- usevulkan: backend === 'vulkan',
- useclblast: backend === 'clblast',
+ usecuda: acceleration === 'cuda' || acceleration === 'rocm',
+ usevulkan: acceleration === 'vulkan',
+ useclblast: acceleration === 'clblast',
gpuDeviceSelection,
tensorSplit,
sdmodel,
@@ -295,7 +295,7 @@ export const LaunchScreen = ({ onLaunch }: LaunchScreenProps) => {
flashattention,
noavx2,
failsafe,
- backend,
+ acceleration,
lowvram,
gpuDeviceSelection,
gpuPlatform,
@@ -333,7 +333,7 @@ export const LaunchScreen = ({ onLaunch }: LaunchScreenProps) => {
flashattention,
noavx2,
failsafe,
- backend,
+ acceleration,
lowvram,
gpuDeviceSelection,
gpuPlatform,
diff --git a/src/components/settings/BackendsTab.tsx b/src/components/settings/BackendsTab.tsx
index 9fd6ae6..50f7b07 100644
--- a/src/components/settings/BackendsTab.tsx
+++ b/src/components/settings/BackendsTab.tsx
@@ -7,7 +7,6 @@ import {
Loader,
Center,
Anchor,
- Divider,
} from '@mantine/core';
import { ExternalLink } from 'lucide-react';
import { DownloadCard } from '@/components/DownloadCard';
@@ -327,8 +326,6 @@ export const BackendsTab = () => {
)}
-
-
{
const args: string[] = [];
- const isGpuBackend = launchArgs.backend && launchArgs.backend !== 'cpu';
+ const isGpuAcceleration =
+ launchArgs.acceleration && launchArgs.acceleration !== 'cpu';
- if (isGpuBackend) {
+ if (isGpuAcceleration) {
if (launchArgs.autoGpuLayers && launchArgs.gpuLayers > 0) {
args.push('--gpulayers', launchArgs.gpuLayers.toString());
} else if (!launchArgs.autoGpuLayers && launchArgs.gpuLayers > 0) {
@@ -213,8 +214,8 @@ const buildBackendArgs = (launchArgs: LaunchArgs, platform: string) => {
return args;
}
- if (!launchArgs.backend || launchArgs.backend === 'cpu') {
- if (launchArgs.backend === 'cpu') {
+ if (!launchArgs.acceleration || launchArgs.acceleration === 'cpu') {
+ if (launchArgs.acceleration === 'cpu') {
args.push('--usecpu');
}
@@ -222,23 +223,26 @@ const buildBackendArgs = (launchArgs: LaunchArgs, platform: string) => {
}
const isTensorSplitSupported =
- launchArgs.backend === 'cuda' ||
- launchArgs.backend === 'rocm' ||
- launchArgs.backend === 'vulkan';
+ launchArgs.acceleration === 'cuda' ||
+ launchArgs.acceleration === 'rocm' ||
+ launchArgs.acceleration === 'vulkan';
- if (launchArgs.backend === 'cuda' || launchArgs.backend === 'rocm') {
+ if (
+ launchArgs.acceleration === 'cuda' ||
+ launchArgs.acceleration === 'rocm'
+ ) {
args.push(...buildCudaArgs(launchArgs));
if (launchArgs.gpuDeviceSelection === 'all' && isTensorSplitSupported) {
addTensorSplitArgs(args, launchArgs);
}
- } else if (launchArgs.backend === 'vulkan') {
+ } else if (launchArgs.acceleration === 'vulkan') {
args.push(...buildVulkanArgs());
if (launchArgs.gpuDeviceSelection === 'all' && isTensorSplitSupported) {
addTensorSplitArgs(args, launchArgs);
}
- } else if (launchArgs.backend === 'clblast') {
+ } else if (launchArgs.acceleration === 'clblast') {
args.push(...buildClblastArgs(launchArgs));
}
diff --git a/src/hooks/useWarnings.ts b/src/hooks/useWarnings.ts
index 0127e6a..cc9e6f0 100644
--- a/src/hooks/useWarnings.ts
+++ b/src/hooks/useWarnings.ts
@@ -10,7 +10,7 @@ export interface Warning {
interface UseWarningsProps {
model: string;
sdmodel: string;
- backend?: string;
+ acceleration?: string;
configLoaded?: boolean;
}
@@ -92,7 +92,7 @@ const checkGpuWarnings = async (
}
warnings.push({
- type: 'warning',
+ type: 'info',
message,
});
}
@@ -100,11 +100,13 @@ const checkGpuWarnings = async (
return warnings;
};
-const checkVramWarnings = async (backend: string): Promise => {
+const checkVramWarnings = async (acceleration: string): Promise => {
const warnings: Warning[] = [];
- const isGpuBackend = ['cuda', 'rocm', 'vulkan', 'clblast'].includes(backend);
+ const isGpuAcceleration = ['cuda', 'rocm', 'vulkan', 'clblast'].includes(
+ acceleration
+ );
- if (isGpuBackend) {
+ if (isGpuAcceleration) {
const gpuMemoryInfo = await window.electronAPI.kobold.detectGPUMemory();
if (gpuMemoryInfo) {
@@ -133,12 +135,12 @@ const checkVramWarnings = async (backend: string): Promise => {
};
const checkCpuWarnings = (
- backend: string,
+ acceleration: string,
availableAccelerations: AccelerationOption[]
) => {
const warnings: Warning[] = [];
- if (backend !== 'cpu') {
+ if (acceleration !== 'cpu') {
return warnings;
}
@@ -157,7 +159,7 @@ const checkCpuWarnings = (
};
const checkBackendWarnings = async (params?: {
- backend: string;
+ acceleration: string;
cpuCapabilities: CPUCapabilities | null;
availableAccelerations: AccelerationOption[];
}) => {
@@ -181,13 +183,16 @@ const checkBackendWarnings = async (params?: {
warnings.push(...gpuWarnings);
if (params) {
- const { backend, cpuCapabilities, availableAccelerations } = params;
+ const { acceleration, cpuCapabilities, availableAccelerations } = params;
- const vramWarnings = await checkVramWarnings(backend);
+ const vramWarnings = await checkVramWarnings(acceleration);
warnings.push(...vramWarnings);
if (cpuCapabilities) {
- const cpuWarnings = checkCpuWarnings(backend, availableAccelerations);
+ const cpuWarnings = checkCpuWarnings(
+ acceleration,
+ availableAccelerations
+ );
warnings.push(...cpuWarnings);
}
}
@@ -198,7 +203,7 @@ const checkBackendWarnings = async (params?: {
export const useWarnings = ({
model,
sdmodel,
- backend,
+ acceleration,
configLoaded = false,
}: UseWarningsProps) => {
const [backendWarnings, setBackendWarnings] = useState([]);
@@ -209,7 +214,7 @@ export const useWarnings = ({
);
const updateBackendWarnings = useCallback(async () => {
- if (!backend) {
+ if (!acceleration) {
setBackendWarnings([]);
return;
}
@@ -220,13 +225,13 @@ export const useWarnings = ({
]);
const result = await checkBackendWarnings({
- backend,
+ acceleration,
cpuCapabilities: cpuCapabilitiesResult,
availableAccelerations,
});
setBackendWarnings(result);
- }, [backend]);
+ }, [acceleration]);
useEffect(() => {
// eslint-disable-next-line react-hooks/set-state-in-effect
diff --git a/src/main/ipc.ts b/src/main/ipc.ts
index d7022f6..f394d5e 100644
--- a/src/main/ipc.ts
+++ b/src/main/ipc.ts
@@ -1,7 +1,7 @@
import { ipcMain, app } from 'electron';
import { join } from 'path';
import { platform } from 'process';
-import type { Screen } from '@/types';
+import type { Screen, Acceleration } from '@/types';
import {
stopKoboldCpp,
launchKoboldCppWithCustomFrontends,
@@ -182,13 +182,15 @@ export function setupIPCHandlers() {
modelPath: string,
contextSize: number,
availableVramGB: number,
- flashAttention: boolean
+ flashAttention: boolean,
+ acceleration: Acceleration
) =>
calculateOptimalGpuLayers({
modelPath,
contextSize,
availableVramGB,
flashAttention,
+ acceleration,
})
);
diff --git a/src/main/modules/koboldcpp/launcher/index.ts b/src/main/modules/koboldcpp/launcher/index.ts
index 1a09f14..d0b5ec1 100644
--- a/src/main/modules/koboldcpp/launcher/index.ts
+++ b/src/main/modules/koboldcpp/launcher/index.ts
@@ -234,15 +234,17 @@ export async function launchKoboldCpp(
const handleServerReady = () => {
const isKoboldFrontend =
frontendPreference === 'koboldcpp' ||
+ frontendPreference === 'llamacpp' ||
(!isTextMode && imageGenerationFrontendPreference === 'builtin');
if (isKoboldFrontend) {
sendToRenderer('server-ready');
}
+
readyResolve?.({ success: true, pid: child.pid });
};
- child.stdout?.on('data', (data) => {
+ const handleOutput = (data: Buffer) => {
const output = data.toString();
const filtered = debugmode ? output : filterSpam(output);
if (filtered.trim()) {
@@ -254,21 +256,10 @@ export async function launchKoboldCpp(
hasProcessStartedSuccessfully = true;
handleServerReady();
}
- });
+ };
- child.stderr?.on('data', (data) => {
- const output = data.toString();
- const filtered = debugmode ? output : filterSpam(output);
- if (filtered.trim()) {
- sendKoboldOutput(filtered, true);
- }
-
- if (!isReady && output.includes(SERVER_READY_SIGNALS.KOBOLDCPP)) {
- isReady = true;
- hasProcessStartedSuccessfully = true;
- handleServerReady();
- }
- });
+ child.stdout?.on('data', handleOutput);
+ child.stderr?.on('data', handleOutput);
child.on('exit', (code, signal) => {
const isCrash = signal !== null || (code !== null && code !== 0);
diff --git a/src/main/modules/koboldcpp/model-download.ts b/src/main/modules/koboldcpp/model-download.ts
index 69ec247..f377788 100644
--- a/src/main/modules/koboldcpp/model-download.ts
+++ b/src/main/modules/koboldcpp/model-download.ts
@@ -270,7 +270,7 @@ export async function resolveModelPath(
const localPath = getModelLocalPath(urlOrPath, paramType);
if (await pathExists(localPath)) {
- sendKoboldOutput(`Using cached model at: ${localPath}\n`);
+ sendKoboldOutput(`Using cached model at: ${localPath}`);
onProgress?.({
type: 'complete',
localPath,
@@ -278,14 +278,14 @@ export async function resolveModelPath(
return localPath;
}
- sendKoboldOutput(`Downloading model from ${urlOrPath} to ${localPath}...\n`);
+ sendKoboldOutput(`Downloading model from ${urlOrPath} to ${localPath}...`);
const progressCallback = onProgress || ((p: DownloadProgress) => p);
try {
await downloadFile(urlOrPath, localPath, progressCallback);
- sendKoboldOutput(`Model downloaded successfully to: ${localPath}\n\n`);
+ sendKoboldOutput(`Model downloaded successfully to: ${localPath}\n`);
progressCallback({
type: 'complete',
localPath,
diff --git a/src/preload/index.ts b/src/preload/index.ts
index c330ee0..2a30f5b 100644
--- a/src/preload/index.ts
+++ b/src/preload/index.ts
@@ -63,14 +63,16 @@ const koboldAPI: KoboldAPI = {
modelPath,
contextSize,
availableVramGB,
- flashAttention
+ flashAttention,
+ acceleration
) =>
ipcRenderer.invoke(
'kobold:calculateOptimalLayers',
modelPath,
contextSize,
availableVramGB,
- flashAttention
+ flashAttention,
+ acceleration
),
stopKoboldCpp: () => ipcRenderer.invoke('kobold:stopKoboldCpp'),
onDownloadProgress: (callback) => {
diff --git a/src/stores/launchConfig.ts b/src/stores/launchConfig.ts
index 7974028..3c92dcb 100644
--- a/src/stores/launchConfig.ts
+++ b/src/stores/launchConfig.ts
@@ -1,5 +1,5 @@
import { create } from 'zustand';
-import type { ConfigFile, SdConvDirectMode } from '@/types';
+import type { Acceleration, ConfigFile, SdConvDirectMode } from '@/types';
import { IMAGE_MODEL_PRESETS } from '@/constants/imageModelPresets';
import { DEFAULT_AUTO_GPU_LAYERS, DEFAULT_CONTEXT_SIZE } from '@/constants';
@@ -25,7 +25,7 @@ interface LaunchConfigState {
quantmatmul: boolean;
usemmap: boolean;
debugmode: boolean;
- backend: string;
+ acceleration: Acceleration;
gpuDeviceSelection: string;
tensorSplit: string;
gpuPlatform: number;
@@ -65,7 +65,7 @@ interface LaunchConfigState {
setUsemmap: (usemmap: boolean) => void;
setDebugmode: (debugmode: boolean) => void;
setPreLaunchCommands: (commands: string[]) => void;
- setBackend: (backend: string) => void;
+ setAcceleration: (acceleration: Acceleration) => void;
setGpuDeviceSelection: (selection: string) => void;
setTensorSplit: (split: string) => void;
setGpuPlatform: (platform: number) => void;
@@ -125,7 +125,7 @@ export const useLaunchConfigStore = create((set, get) => ({
quantmatmul: true,
usemmap: true,
debugmode: false,
- backend: '',
+ acceleration: '' as Acceleration,
gpuDeviceSelection: '0',
tensorSplit: '',
gpuPlatform: 0,
@@ -170,9 +170,9 @@ export const useLaunchConfigStore = create((set, get) => ({
setUsemmap: (usemmap) => set({ usemmap }),
setDebugmode: (debugmode) => set({ debugmode }),
setPreLaunchCommands: (commands) => set({ preLaunchCommands: commands }),
- setBackend: (backend) =>
+ setAcceleration: (acceleration) =>
set({
- backend,
+ acceleration,
gpuDeviceSelection: '0',
tensorSplit: '',
}),
@@ -331,7 +331,7 @@ export const useLaunchConfigStore = create((set, get) => ({
if (configData.usecuda === true) {
const gpuInfo = await window.electronAPI.kobold.detectGPU();
- updates.backend = gpuInfo.hasNVIDIA ? 'cuda' : 'rocm';
+ updates.acceleration = gpuInfo.hasNVIDIA ? 'cuda' : 'rocm';
if (
Array.isArray(configData.usecuda) &&
@@ -343,17 +343,17 @@ export const useLaunchConfigStore = create((set, get) => ({
updates.quantmatmul = mmqMode === 'mmq';
}
} else if (configData.usevulkan === true) {
- updates.backend = 'vulkan';
+ updates.acceleration = 'vulkan';
} else if (
Array.isArray(configData.useclblast) &&
configData.useclblast.length === 2
) {
- updates.backend = 'clblast';
+ updates.acceleration = 'clblast';
const [deviceIndex, platformIndex] = configData.useclblast;
updates.gpuDeviceSelection = deviceIndex.toString();
updates.gpuPlatform = platformIndex;
} else {
- updates.backend = 'cpu';
+ updates.acceleration = 'cpu';
}
if (typeof configData.gpuDeviceSelection === 'string') {
diff --git a/src/types/electron.d.ts b/src/types/electron.d.ts
index fde9c99..75cb6e1 100644
--- a/src/types/electron.d.ts
+++ b/src/types/electron.d.ts
@@ -6,6 +6,7 @@ import type {
SystemMemoryInfo,
} from '@/types/hardware';
import type {
+ Acceleration,
AccelerationOption,
AccelerationSupport,
Screen,
@@ -170,7 +171,8 @@ export interface KoboldAPI {
modelPath: string,
contextSize: number,
availableVramGB: number,
- flashAttention: boolean
+ flashAttention: boolean,
+ acceleration: Acceleration
) => Promise;
stopKoboldCpp: () => void;
onDownloadProgress: (callback: (progress: number) => void) => () => void;
diff --git a/src/types/index.d.ts b/src/types/index.d.ts
index 0b5dedd..133e76b 100644
--- a/src/types/index.d.ts
+++ b/src/types/index.d.ts
@@ -103,6 +103,8 @@ export interface AccelerationSupport {
cuda: boolean;
}
+export type Acceleration = keyof AccelerationSupport | 'cpu';
+
export interface ModelAnalysis {
general: {
architecture: string;
diff --git a/src/utils/node/vram.ts b/src/utils/node/vram.ts
index 34866f2..804cbf2 100644
--- a/src/utils/node/vram.ts
+++ b/src/utils/node/vram.ts
@@ -1,30 +1,48 @@
import { gguf } from '@huggingface/gguf';
import { stat } from 'fs/promises';
+import type { Acceleration } from '@/types';
interface VramCalculationParams {
modelPath: string;
contextSize: number;
availableVramGB: number;
flashAttention?: boolean;
+ acceleration: Acceleration;
+}
+
+function getAccelerationOverhead(acceleration: Acceleration) {
+ switch (acceleration) {
+ case 'cuda':
+ return { multiplier: 1.05, computeBufferGB: 0.2, headroomGB: 0.1 };
+ case 'vulkan':
+ return { multiplier: 1.05, computeBufferGB: 0.2, headroomGB: 0.1 };
+ case 'rocm':
+ return { multiplier: 1.15, computeBufferGB: 0.4, headroomGB: 0.2 };
+ case 'clblast':
+ return { multiplier: 1.2, computeBufferGB: 0.5, headroomGB: 0.3 };
+ // eslint-disable-next-line no-comments/disallowComments
+ // assuming metal on macOS which we refer to as "cpu" acceleration
+ case 'cpu':
+ return { multiplier: 1.05, computeBufferGB: 0.2, headroomGB: 0.1 };
+ default:
+ return { multiplier: 1.1, computeBufferGB: 0.3, headroomGB: 0.15 };
+ }
}
function estimateContextVram(
contextSize: number,
layers: number,
- embeddingLength: number,
+ kvDim: number,
flashAttention: boolean
) {
const bytesPerElement = 2;
- let kvCacheSizeBytes =
- 2 * contextSize * layers * embeddingLength * bytesPerElement;
+ let kvCacheSizeBytes = 2 * contextSize * layers * kvDim * bytesPerElement;
if (flashAttention) {
kvCacheSizeBytes *= 0.5;
}
- const kvCacheSizeGB = kvCacheSizeBytes / 1024 ** 3;
-
- return kvCacheSizeGB;
+ return kvCacheSizeBytes / 1024 ** 3;
}
export async function calculateOptimalGpuLayers({
@@ -32,6 +50,7 @@ export async function calculateOptimalGpuLayers({
contextSize,
availableVramGB,
flashAttention = false,
+ acceleration,
}: VramCalculationParams) {
const isUrl =
modelPath.startsWith('http://') || modelPath.startsWith('https://');
@@ -75,25 +94,26 @@ export async function calculateOptimalGpuLayers({
const headDim = embeddingLength / headCount;
const kvDim = headCountKv * headDim;
- const modelSizeGB = fileSize / 1024 ** 3;
- const vramPerLayerGB = modelSizeGB / totalLayers;
+ const { multiplier, computeBufferGB, headroomGB } =
+ getAccelerationOverhead(acceleration);
- const headroomGB = 0.1;
- const availableForModel = availableVramGB - headroomGB;
+ const modelSizeGB = fileSize / 1024 ** 3;
+ const effectiveModelSizeGB = modelSizeGB * multiplier;
+ const vramPerLayerGB = effectiveModelSizeGB / totalLayers;
+
+ const availableForModel = availableVramGB - computeBufferGB - headroomGB;
let recommendedLayers = 0;
- let modelVramGB = 0;
- let contextVramGB = 0;
for (let layers = 1; layers <= totalLayers; layers++) {
- modelVramGB = layers * vramPerLayerGB;
- contextVramGB = estimateContextVram(
+ const modelVram = layers * vramPerLayerGB;
+ const contextVram = estimateContextVram(
contextSize,
layers,
kvDim,
flashAttention
);
- const totalVram = modelVramGB + contextVramGB;
+ const totalVram = modelVram + contextVram;
if (totalVram <= availableForModel) {
recommendedLayers = layers;
@@ -102,21 +122,20 @@ export async function calculateOptimalGpuLayers({
}
}
- const finalContextVram = estimateContextVram(
+ const modelVramGB = recommendedLayers * vramPerLayerGB;
+ const contextVramGB = estimateContextVram(
contextSize,
recommendedLayers,
kvDim,
flashAttention
);
- const estimatedVramUsageGB =
- recommendedLayers * vramPerLayerGB + finalContextVram;
return {
recommendedLayers,
totalLayers,
- estimatedVramUsageGB,
- modelVramGB: recommendedLayers * vramPerLayerGB,
- contextVramGB: finalContextVram,
+ estimatedVramUsageGB: modelVramGB + contextVramGB + computeBufferGB,
+ modelVramGB,
+ contextVramGB,
headroomGB,
};
}
diff --git a/yarn.lock b/yarn.lock
index ec8b905..08e4b40 100644
--- a/yarn.lock
+++ b/yarn.lock
@@ -3747,8 +3747,8 @@ __metadata:
lucide-react: "npm:^0.555.0"
mime-types: "npm:^3.0.2"
prettier: "npm:^3.7.4"
- react: "npm:^19.2.0"
- react-dom: "npm:^19.2.0"
+ react: "npm:^19.2.1"
+ react-dom: "npm:^19.2.1"
react-error-boundary: "npm:^6.0.0"
rollup-plugin-visualizer: "npm:^6.0.5"
systeminformation: "npm:^5.27.11"
@@ -5593,14 +5593,14 @@ __metadata:
languageName: node
linkType: hard
-"react-dom@npm:^19.2.0":
- version: 19.2.0
- resolution: "react-dom@npm:19.2.0"
+"react-dom@npm:^19.2.1":
+ version: 19.2.1
+ resolution: "react-dom@npm:19.2.1"
dependencies:
scheduler: "npm:^0.27.0"
peerDependencies:
- react: ^19.2.0
- checksum: 10c0/fa2cae05248d01288e91523b590ce4e7635b1e13f1344e225f850d722a8da037bf0782f63b1c1d46353334e0c696909b82e582f8cad607948fde6f7646cc18d9
+ react: ^19.2.1
+ checksum: 10c0/e56b6b3d72314df580ca800b70a69a21c6372703c8f45d9b5451ca6519faefb2496d76ffa9c5adb94136d2bbf2fd303d0dfc208a2cd77ede3132877471af9470
languageName: node
linkType: hard
@@ -5703,10 +5703,10 @@ __metadata:
languageName: node
linkType: hard
-"react@npm:^19.2.0":
- version: 19.2.0
- resolution: "react@npm:19.2.0"
- checksum: 10c0/1b6d64eacb9324725bfe1e7860cb7a6b8a34bc89a482920765ebff5c10578eb487e6b46b2f0df263bd27a25edbdae2c45e5ea5d81ae61404301c1a7192c38330
+"react@npm:^19.2.1":
+ version: 19.2.1
+ resolution: "react@npm:19.2.1"
+ checksum: 10c0/2b5eaf407abb3db84090434c20d6c5a8e447ab7abcd8fe9eaf1ddc299babcf31284ee9db7ea5671d21c85ac5298bd632fa1a7da1ed78d5b368a537f5e1cd5d62
languageName: node
linkType: hard