more accurate auto VRAM approximate, more code renames from backend -> acceleration

This commit is contained in:
Egor 2025-12-03 14:01:46 -08:00
parent 0576b46b29
commit 80bb6d5e97
20 changed files with 183 additions and 153 deletions

View file

@ -10,11 +10,8 @@
"typescriptreact" "typescriptreact"
], ],
"editor.formatOnSave": true, "editor.formatOnSave": true,
"editor.defaultFormatter": "esbenp.prettier-vscode", "editor.defaultFormatter": "prettier.prettier-vscode",
"[typescript]": { "[typescriptreact]": {
"editor.defaultFormatter": "prettier.prettier-vscode" "editor.defaultFormatter": "prettier.prettier-vscode"
}, },
"[typescriptreact]": {
"editor.defaultFormatter": "esbenp.prettier-vscode"
}
} }

View file

@ -79,8 +79,8 @@
"execa": "^9.6.1", "execa": "^9.6.1",
"lucide-react": "^0.555.0", "lucide-react": "^0.555.0",
"mime-types": "^3.0.2", "mime-types": "^3.0.2",
"react": "^19.2.0", "react": "^19.2.1",
"react-dom": "^19.2.0", "react-dom": "^19.2.1",
"react-error-boundary": "^6.0.0", "react-error-boundary": "^6.0.0",
"systeminformation": "^5.27.11", "systeminformation": "^5.27.11",
"winston": "^3.18.3", "winston": "^3.18.3",

View file

@ -63,8 +63,10 @@ export const App = () => {
useEffect(() => { useEffect(() => {
const cleanup = window.electronAPI.kobold.onServerReady(() => { const cleanup = window.electronAPI.kobold.onServerReady(() => {
setIsServerReady(true); setTimeout(() => {
setActiveInterfaceTab(defaultInterfaceTab); setIsServerReady(true);
setActiveInterfaceTab(defaultInterfaceTab);
}, 1000);
}); });
return cleanup; return cleanup;

View file

@ -1,5 +1,5 @@
import { useState } from 'react'; import { useState } from 'react';
import { Text, Anchor } from '@mantine/core'; import { Text, Anchor, Box } from '@mantine/core';
interface ImportBackendLinkProps { interface ImportBackendLinkProps {
disabled?: boolean; disabled?: boolean;
@ -40,7 +40,7 @@ export const ImportBackendLink = ({
}; };
return ( return (
<> <Box mt="xs">
{importError && ( {importError && (
<Text size="sm" c="red" ta="center" mb="xs"> <Text size="sm" c="red" ta="center" mb="xs">
{importError} {importError}
@ -58,6 +58,6 @@ export const ImportBackendLink = ({
{importing ? 'Importing...' : 'Select a local file'} {importing ? 'Importing...' : 'Select a local file'}
</Anchor> </Anchor>
</Text> </Text>
</> </Box>
); );
}; };

View file

@ -27,7 +27,7 @@ export const AdvancedTab = () => {
quantmatmul, quantmatmul,
usemmap, usemmap,
debugmode, debugmode,
backend, acceleration,
moecpu, moecpu,
moeexperts, moeexperts,
setAdditionalArguments, setAdditionalArguments,
@ -58,7 +58,7 @@ export const AdvancedTab = () => {
setAdditionalArguments(updatedArgs); setAdditionalArguments(updatedArgs);
}; };
const isGpuBackend = backend === 'cuda' || backend === 'rocm'; const isGpuAcceleration = acceleration === 'cuda' || acceleration === 'rocm';
useEffect(() => { useEffect(() => {
const detectAccelerationSupport = async () => { const detectAccelerationSupport = async () => {
@ -118,15 +118,15 @@ export const AdvancedTab = () => {
/> />
<CheckboxWithTooltip <CheckboxWithTooltip
checked={quantmatmul && isGpuBackend} checked={quantmatmul && isGpuAcceleration}
onChange={setQuantmatmul} onChange={setQuantmatmul}
label="QuantMatMul" label="QuantMatMul"
tooltip={ tooltip={
!isGpuBackend !isGpuAcceleration
? 'QuantMatMul is only available for CUDA and ROCm backends.' ? 'QuantMatMul is only available for CUDA and ROCm accelerations.'
: 'Enable MMQ mode to use finetuned kernels instead of default CuBLAS/HipBLAS for prompt processing.' : 'Enable MMQ mode to use finetuned kernels instead of default CuBLAS/HipBLAS for prompt processing.'
} }
disabled={!isGpuBackend} disabled={!isGpuAcceleration}
/> />
<CheckboxWithTooltip <CheckboxWithTooltip
@ -149,15 +149,15 @@ export const AdvancedTab = () => {
/> />
<CheckboxWithTooltip <CheckboxWithTooltip
checked={lowvram && isGpuBackend} checked={lowvram && isGpuAcceleration}
onChange={setLowvram} onChange={setLowvram}
label="Low VRAM" label="Low VRAM"
tooltip={ tooltip={
!isGpuBackend !isGpuAcceleration
? 'Low VRAM mode is only available for CUDA and ROCm backends.' ? 'Low VRAM mode is only available for CUDA and ROCm accelerations.'
: 'Avoid offloading KV Cache or scratch buffers to VRAM. Allows more layers to fit, but may result in a speed loss.' : 'Avoid offloading KV Cache or scratch buffers to VRAM. Allows more layers to fit, but may result in a speed loss.'
} }
disabled={!isGpuBackend} disabled={!isGpuAcceleration}
/> />
<CheckboxWithTooltip <CheckboxWithTooltip

View file

@ -4,19 +4,19 @@ import { InfoTooltip } from '@/components/InfoTooltip';
import { AccelerationSelectItem } from '@/components/screens/Launch/GeneralTab/AccelerationSelectItem'; import { AccelerationSelectItem } from '@/components/screens/Launch/GeneralTab/AccelerationSelectItem';
import { GpuDeviceSelector } from '@/components/screens/Launch/GeneralTab/GpuDeviceSelector'; import { GpuDeviceSelector } from '@/components/screens/Launch/GeneralTab/GpuDeviceSelector';
import { useLaunchConfigStore } from '@/stores/launchConfig'; import { useLaunchConfigStore } from '@/stores/launchConfig';
import type { AccelerationOption } from '@/types'; import type { Acceleration, AccelerationOption } from '@/types';
import { Select } from '@/components/Select'; import { Select } from '@/components/Select';
export const AccelerationSelector = () => { export const AccelerationSelector = () => {
const { const {
backend, acceleration,
gpuLayers, gpuLayers,
autoGpuLayers, autoGpuLayers,
model, model,
contextSize, contextSize,
gpuDeviceSelection, gpuDeviceSelection,
flashattention, flashattention,
setBackend, setAcceleration,
setGpuLayers, setGpuLayers,
setAutoGpuLayers, setAutoGpuLayers,
} = useLaunchConfigStore(); } = useLaunchConfigStore();
@ -57,9 +57,9 @@ export const AccelerationSelector = () => {
}, []); }, []);
useEffect(() => { useEffect(() => {
if (availableAccelerations.length > 0 && backend) { if (availableAccelerations.length > 0 && acceleration) {
const isAccelerationAvailable = availableAccelerations.some( const isAccelerationAvailable = availableAccelerations.some(
(a) => a.value === backend && !a.disabled (a) => a.value === acceleration && !a.disabled
); );
if (!isAccelerationAvailable) { if (!isAccelerationAvailable) {
@ -67,15 +67,15 @@ export const AccelerationSelector = () => {
(a) => !a.disabled (a) => !a.disabled
); );
if (fallbackAcceleration) { if (fallbackAcceleration) {
setBackend(fallbackAcceleration.value); setAcceleration(fallbackAcceleration.value as Acceleration);
} }
} }
} }
}, [availableAccelerations, backend, setBackend]); }, [availableAccelerations, acceleration, setAcceleration]);
useEffect(() => { useEffect(() => {
const calculateLayers = async () => { const calculateLayers = async () => {
const isCpuOnly = backend === 'cpu' && !isMac; const isCpuOnly = acceleration === 'cpu' && !isMac;
if ( if (
!autoGpuLayers || !autoGpuLayers ||
!model || !model ||
@ -118,7 +118,8 @@ export const AccelerationSelector = () => {
model, model,
contextSize, contextSize,
availableVramGB, availableVramGB,
flashattention flashattention,
acceleration
); );
setGpuLayers(result.recommendedLayers); setGpuLayers(result.recommendedLayers);
@ -137,7 +138,7 @@ export const AccelerationSelector = () => {
autoGpuLayers, autoGpuLayers,
model, model,
contextSize, contextSize,
backend, acceleration,
gpuDeviceSelection, gpuDeviceSelection,
flashattention, flashattention,
isLoadingAccelerations, isLoadingAccelerations,
@ -163,14 +164,14 @@ export const AccelerationSelector = () => {
} }
value={ value={
availableAccelerations.some( availableAccelerations.some(
(a) => a.value === backend && !a.disabled (a) => a.value === acceleration && !a.disabled
) )
? backend ? acceleration
: null : null
} }
onChange={(value) => { onChange={(value) => {
if (value) { if (value) {
setBackend(value); setAcceleration(value as Acceleration);
} }
}} }}
data={availableAccelerations.map((a) => ({ data={availableAccelerations.map((a) => ({
@ -223,7 +224,7 @@ export const AccelerationSelector = () => {
step={1} step={1}
size="sm" size="sm"
w={80} w={80}
disabled={autoGpuLayers || (backend === 'cpu' && !isMac)} disabled={autoGpuLayers || (acceleration === 'cpu' && !isMac)}
/> />
<Group gap="xs" align="center"> <Group gap="xs" align="center">
<Checkbox <Checkbox
@ -233,7 +234,7 @@ export const AccelerationSelector = () => {
setAutoGpuLayers(event.currentTarget.checked) setAutoGpuLayers(event.currentTarget.checked)
} }
size="sm" size="sm"
disabled={backend === 'cpu' && !isMac} disabled={acceleration === 'cpu' && !isMac}
/> />
<InfoTooltip label="Automatically calculate optimal GPU layers based on available VRAM. The calculation accounts for model size, context size and flash attention." /> <InfoTooltip label="Automatically calculate optimal GPU layers based on available VRAM. The calculation accounts for model size, context size and flash attention." />
</Group> </Group>

View file

@ -4,8 +4,8 @@ import { useLaunchConfigStore } from '@/stores/launchConfig';
import { Select } from '@/components/Select'; import { Select } from '@/components/Select';
import type { AccelerationOption } from '@/types'; import type { AccelerationOption } from '@/types';
const GPU_BACKENDS = ['cuda', 'rocm', 'vulkan', 'clblast']; const GPU_ACCELERATIONS = ['cuda', 'rocm', 'vulkan', 'clblast'];
const TENSOR_SPLIT_BACKENDS = ['cuda', 'rocm', 'vulkan']; const TENSOR_SPLIT_ACCELERATIONS = ['cuda', 'rocm', 'vulkan'];
interface GpuDeviceSelectorProps { interface GpuDeviceSelectorProps {
availableAccelerations: AccelerationOption[]; availableAccelerations: AccelerationOption[];
@ -15,7 +15,7 @@ export const GpuDeviceSelector = ({
availableAccelerations, availableAccelerations,
}: GpuDeviceSelectorProps) => { }: GpuDeviceSelectorProps) => {
const { const {
backend, acceleration,
gpuDeviceSelection, gpuDeviceSelection,
tensorSplit, tensorSplit,
setGpuDeviceSelection, setGpuDeviceSelection,
@ -23,13 +23,17 @@ export const GpuDeviceSelector = ({
} = useLaunchConfigStore(); } = useLaunchConfigStore();
const selectedAcceleration = availableAccelerations.find( const selectedAcceleration = availableAccelerations.find(
(a) => a.value === backend (a) => a.value === acceleration
); );
const isGpu = GPU_BACKENDS.includes(backend); const isGpuAcceleration = GPU_ACCELERATIONS.includes(acceleration);
const getDiscreteDeviceCount = () => { const getDiscreteDeviceCount = () => {
if (!selectedAcceleration?.devices) return 0; if (!selectedAcceleration?.devices) return 0;
if (backend === 'clblast' || backend === 'vulkan' || backend === 'rocm') { if (
acceleration === 'clblast' ||
acceleration === 'vulkan' ||
acceleration === 'rocm'
) {
return selectedAcceleration.devices.filter( return selectedAcceleration.devices.filter(
(device) => typeof device === 'string' || !device.isIntegrated (device) => typeof device === 'string' || !device.isIntegrated
).length; ).length;
@ -39,24 +43,26 @@ export const GpuDeviceSelector = ({
const hasMultipleDevices = getDiscreteDeviceCount() > 1; const hasMultipleDevices = getDiscreteDeviceCount() > 1;
const showTensorSplit = const showTensorSplit =
TENSOR_SPLIT_BACKENDS.includes(backend) && TENSOR_SPLIT_ACCELERATIONS.includes(acceleration) &&
hasMultipleDevices && hasMultipleDevices &&
gpuDeviceSelection === 'all'; gpuDeviceSelection === 'all';
if (!isGpu || !hasMultipleDevices) { if (!isGpuAcceleration || !hasMultipleDevices) {
return null; return null;
} }
const deviceOptions = (() => { const deviceOptions = (() => {
if (!selectedAcceleration?.devices) return []; if (!selectedAcceleration?.devices) return [];
if (backend === 'clblast') { if (acceleration === 'clblast') {
return selectedAcceleration.devices return selectedAcceleration.devices
.map((device, index) => { .map((device, index) => {
if (typeof device === 'object' && device.isIntegrated) { if (typeof device === 'object' && device.isIntegrated) {
return null; return null;
} }
const deviceName = typeof device === 'string' ? device : device.name; const deviceName = typeof device === 'string' ? device : device.name;
return { return {
value: index.toString(), value: index.toString(),
label: `GPU ${index}: ${deviceName}`, label: `GPU ${index}: ${deviceName}`,
@ -67,7 +73,7 @@ export const GpuDeviceSelector = ({
); );
} }
if (backend === 'vulkan' || backend === 'rocm') { if (acceleration === 'vulkan' || acceleration === 'rocm') {
const discreteDeviceOptions = selectedAcceleration.devices const discreteDeviceOptions = selectedAcceleration.devices
.map((device, index) => { .map((device, index) => {
if (typeof device === 'object' && device.isIntegrated) { if (typeof device === 'object' && device.isIntegrated) {

View file

@ -11,7 +11,7 @@ import { ImageGenerationTab } from '@/components/screens/Launch/ImageGenerationT
import { WarningDisplay } from '@/components/WarningDisplay'; import { WarningDisplay } from '@/components/WarningDisplay';
import { ConfigFileManager } from '@/components/screens/Launch/ConfigFileManager'; import { ConfigFileManager } from '@/components/screens/Launch/ConfigFileManager';
import { DEFAULT_MODEL_URL } from '@/constants'; import { DEFAULT_MODEL_URL } from '@/constants';
import type { ConfigFile } from '@/types'; import type { Acceleration, ConfigFile } from '@/types';
interface LaunchScreenProps { interface LaunchScreenProps {
onLaunch: () => void; onLaunch: () => void;
@ -47,7 +47,7 @@ export const LaunchScreen = ({ onLaunch }: LaunchScreenProps) => {
quantmatmul, quantmatmul,
usemmap, usemmap,
debugmode, debugmode,
backend, acceleration,
gpuDeviceSelection, gpuDeviceSelection,
gpuPlatform, gpuPlatform,
tensorSplit, tensorSplit,
@ -66,7 +66,7 @@ export const LaunchScreen = ({ onLaunch }: LaunchScreenProps) => {
parseAndApplyConfigFile, parseAndApplyConfigFile,
loadConfigFromFile, loadConfigFromFile,
setModel, setModel,
setBackend, setAcceleration,
} = useLaunchConfigStore(); } = useLaunchConfigStore();
const { isLaunching, handleLaunch } = useLaunchLogic({ const { isLaunching, handleLaunch } = useLaunchLogic({
@ -78,7 +78,7 @@ export const LaunchScreen = ({ onLaunch }: LaunchScreenProps) => {
const { warnings: combinedWarnings } = useWarnings({ const { warnings: combinedWarnings } = useWarnings({
model, model,
sdmodel, sdmodel,
backend, acceleration,
configLoaded, configLoaded,
}); });
@ -86,10 +86,10 @@ export const LaunchScreen = ({ onLaunch }: LaunchScreenProps) => {
const accelerations = const accelerations =
await window.electronAPI.kobold.getAvailableAccelerations(); await window.electronAPI.kobold.getAvailableAccelerations();
if (!backend && accelerations && accelerations.length > 0) { if (!acceleration && accelerations && accelerations.length > 0) {
setBackend(accelerations[0].value); setAcceleration(accelerations[0].value as Acceleration);
} }
}, [backend, setBackend]); }, [acceleration, setAcceleration]);
const setInitialDefaults = useCallback( const setInitialDefaults = useCallback(
(currentModel: string, currentSdModel: string) => { (currentModel: string, currentSdModel: string) => {
@ -177,9 +177,9 @@ export const LaunchScreen = ({ onLaunch }: LaunchScreenProps) => {
debugmode, debugmode,
moecpu, moecpu,
moeexperts, moeexperts,
usecuda: backend === 'cuda' || backend === 'rocm', usecuda: acceleration === 'cuda' || acceleration === 'rocm',
usevulkan: backend === 'vulkan', usevulkan: acceleration === 'vulkan',
useclblast: backend === 'clblast', useclblast: acceleration === 'clblast',
gpuDeviceSelection, gpuDeviceSelection,
tensorSplit, tensorSplit,
sdmodel, sdmodel,
@ -295,7 +295,7 @@ export const LaunchScreen = ({ onLaunch }: LaunchScreenProps) => {
flashattention, flashattention,
noavx2, noavx2,
failsafe, failsafe,
backend, acceleration,
lowvram, lowvram,
gpuDeviceSelection, gpuDeviceSelection,
gpuPlatform, gpuPlatform,
@ -333,7 +333,7 @@ export const LaunchScreen = ({ onLaunch }: LaunchScreenProps) => {
flashattention, flashattention,
noavx2, noavx2,
failsafe, failsafe,
backend, acceleration,
lowvram, lowvram,
gpuDeviceSelection, gpuDeviceSelection,
gpuPlatform, gpuPlatform,

View file

@ -7,7 +7,6 @@ import {
Loader, Loader,
Center, Center,
Anchor, Anchor,
Divider,
} from '@mantine/core'; } from '@mantine/core';
import { ExternalLink } from 'lucide-react'; import { ExternalLink } from 'lucide-react';
import { DownloadCard } from '@/components/DownloadCard'; import { DownloadCard } from '@/components/DownloadCard';
@ -327,8 +326,6 @@ export const BackendsTab = () => {
</Card> </Card>
)} )}
<Divider my="md" />
<ImportBackendLink <ImportBackendLink
disabled={isDisabled} disabled={isDisabled}
onSuccess={loadInstalledBackends} onSuccess={loadInstalledBackends}

View file

@ -22,7 +22,7 @@ interface LaunchArgs {
flashattention: boolean; flashattention: boolean;
noavx2: boolean; noavx2: boolean;
failsafe: boolean; failsafe: boolean;
backend: string; acceleration: string;
lowvram: boolean; lowvram: boolean;
gpuDeviceSelection: string; gpuDeviceSelection: string;
gpuPlatform: number; gpuPlatform: number;
@ -97,9 +97,10 @@ const buildModelArgs = (
const buildConfigArgs = (isImageMode: boolean, launchArgs: LaunchArgs) => { const buildConfigArgs = (isImageMode: boolean, launchArgs: LaunchArgs) => {
const args: string[] = []; const args: string[] = [];
const isGpuBackend = launchArgs.backend && launchArgs.backend !== 'cpu'; const isGpuAcceleration =
launchArgs.acceleration && launchArgs.acceleration !== 'cpu';
if (isGpuBackend) { if (isGpuAcceleration) {
if (launchArgs.autoGpuLayers && launchArgs.gpuLayers > 0) { if (launchArgs.autoGpuLayers && launchArgs.gpuLayers > 0) {
args.push('--gpulayers', launchArgs.gpuLayers.toString()); args.push('--gpulayers', launchArgs.gpuLayers.toString());
} else if (!launchArgs.autoGpuLayers && launchArgs.gpuLayers > 0) { } else if (!launchArgs.autoGpuLayers && launchArgs.gpuLayers > 0) {
@ -213,8 +214,8 @@ const buildBackendArgs = (launchArgs: LaunchArgs, platform: string) => {
return args; return args;
} }
if (!launchArgs.backend || launchArgs.backend === 'cpu') { if (!launchArgs.acceleration || launchArgs.acceleration === 'cpu') {
if (launchArgs.backend === 'cpu') { if (launchArgs.acceleration === 'cpu') {
args.push('--usecpu'); args.push('--usecpu');
} }
@ -222,23 +223,26 @@ const buildBackendArgs = (launchArgs: LaunchArgs, platform: string) => {
} }
const isTensorSplitSupported = const isTensorSplitSupported =
launchArgs.backend === 'cuda' || launchArgs.acceleration === 'cuda' ||
launchArgs.backend === 'rocm' || launchArgs.acceleration === 'rocm' ||
launchArgs.backend === 'vulkan'; launchArgs.acceleration === 'vulkan';
if (launchArgs.backend === 'cuda' || launchArgs.backend === 'rocm') { if (
launchArgs.acceleration === 'cuda' ||
launchArgs.acceleration === 'rocm'
) {
args.push(...buildCudaArgs(launchArgs)); args.push(...buildCudaArgs(launchArgs));
if (launchArgs.gpuDeviceSelection === 'all' && isTensorSplitSupported) { if (launchArgs.gpuDeviceSelection === 'all' && isTensorSplitSupported) {
addTensorSplitArgs(args, launchArgs); addTensorSplitArgs(args, launchArgs);
} }
} else if (launchArgs.backend === 'vulkan') { } else if (launchArgs.acceleration === 'vulkan') {
args.push(...buildVulkanArgs()); args.push(...buildVulkanArgs());
if (launchArgs.gpuDeviceSelection === 'all' && isTensorSplitSupported) { if (launchArgs.gpuDeviceSelection === 'all' && isTensorSplitSupported) {
addTensorSplitArgs(args, launchArgs); addTensorSplitArgs(args, launchArgs);
} }
} else if (launchArgs.backend === 'clblast') { } else if (launchArgs.acceleration === 'clblast') {
args.push(...buildClblastArgs(launchArgs)); args.push(...buildClblastArgs(launchArgs));
} }

View file

@ -10,7 +10,7 @@ export interface Warning {
interface UseWarningsProps { interface UseWarningsProps {
model: string; model: string;
sdmodel: string; sdmodel: string;
backend?: string; acceleration?: string;
configLoaded?: boolean; configLoaded?: boolean;
} }
@ -92,7 +92,7 @@ const checkGpuWarnings = async (
} }
warnings.push({ warnings.push({
type: 'warning', type: 'info',
message, message,
}); });
} }
@ -100,11 +100,13 @@ const checkGpuWarnings = async (
return warnings; return warnings;
}; };
const checkVramWarnings = async (backend: string): Promise<Warning[]> => { const checkVramWarnings = async (acceleration: string): Promise<Warning[]> => {
const warnings: Warning[] = []; const warnings: Warning[] = [];
const isGpuBackend = ['cuda', 'rocm', 'vulkan', 'clblast'].includes(backend); const isGpuAcceleration = ['cuda', 'rocm', 'vulkan', 'clblast'].includes(
acceleration
);
if (isGpuBackend) { if (isGpuAcceleration) {
const gpuMemoryInfo = await window.electronAPI.kobold.detectGPUMemory(); const gpuMemoryInfo = await window.electronAPI.kobold.detectGPUMemory();
if (gpuMemoryInfo) { if (gpuMemoryInfo) {
@ -133,12 +135,12 @@ const checkVramWarnings = async (backend: string): Promise<Warning[]> => {
}; };
const checkCpuWarnings = ( const checkCpuWarnings = (
backend: string, acceleration: string,
availableAccelerations: AccelerationOption[] availableAccelerations: AccelerationOption[]
) => { ) => {
const warnings: Warning[] = []; const warnings: Warning[] = [];
if (backend !== 'cpu') { if (acceleration !== 'cpu') {
return warnings; return warnings;
} }
@ -157,7 +159,7 @@ const checkCpuWarnings = (
}; };
const checkBackendWarnings = async (params?: { const checkBackendWarnings = async (params?: {
backend: string; acceleration: string;
cpuCapabilities: CPUCapabilities | null; cpuCapabilities: CPUCapabilities | null;
availableAccelerations: AccelerationOption[]; availableAccelerations: AccelerationOption[];
}) => { }) => {
@ -181,13 +183,16 @@ const checkBackendWarnings = async (params?: {
warnings.push(...gpuWarnings); warnings.push(...gpuWarnings);
if (params) { if (params) {
const { backend, cpuCapabilities, availableAccelerations } = params; const { acceleration, cpuCapabilities, availableAccelerations } = params;
const vramWarnings = await checkVramWarnings(backend); const vramWarnings = await checkVramWarnings(acceleration);
warnings.push(...vramWarnings); warnings.push(...vramWarnings);
if (cpuCapabilities) { if (cpuCapabilities) {
const cpuWarnings = checkCpuWarnings(backend, availableAccelerations); const cpuWarnings = checkCpuWarnings(
acceleration,
availableAccelerations
);
warnings.push(...cpuWarnings); warnings.push(...cpuWarnings);
} }
} }
@ -198,7 +203,7 @@ const checkBackendWarnings = async (params?: {
export const useWarnings = ({ export const useWarnings = ({
model, model,
sdmodel, sdmodel,
backend, acceleration,
configLoaded = false, configLoaded = false,
}: UseWarningsProps) => { }: UseWarningsProps) => {
const [backendWarnings, setBackendWarnings] = useState<Warning[]>([]); const [backendWarnings, setBackendWarnings] = useState<Warning[]>([]);
@ -209,7 +214,7 @@ export const useWarnings = ({
); );
const updateBackendWarnings = useCallback(async () => { const updateBackendWarnings = useCallback(async () => {
if (!backend) { if (!acceleration) {
setBackendWarnings([]); setBackendWarnings([]);
return; return;
} }
@ -220,13 +225,13 @@ export const useWarnings = ({
]); ]);
const result = await checkBackendWarnings({ const result = await checkBackendWarnings({
backend, acceleration,
cpuCapabilities: cpuCapabilitiesResult, cpuCapabilities: cpuCapabilitiesResult,
availableAccelerations, availableAccelerations,
}); });
setBackendWarnings(result); setBackendWarnings(result);
}, [backend]); }, [acceleration]);
useEffect(() => { useEffect(() => {
// eslint-disable-next-line react-hooks/set-state-in-effect // eslint-disable-next-line react-hooks/set-state-in-effect

View file

@ -1,7 +1,7 @@
import { ipcMain, app } from 'electron'; import { ipcMain, app } from 'electron';
import { join } from 'path'; import { join } from 'path';
import { platform } from 'process'; import { platform } from 'process';
import type { Screen } from '@/types'; import type { Screen, Acceleration } from '@/types';
import { import {
stopKoboldCpp, stopKoboldCpp,
launchKoboldCppWithCustomFrontends, launchKoboldCppWithCustomFrontends,
@ -182,13 +182,15 @@ export function setupIPCHandlers() {
modelPath: string, modelPath: string,
contextSize: number, contextSize: number,
availableVramGB: number, availableVramGB: number,
flashAttention: boolean flashAttention: boolean,
acceleration: Acceleration
) => ) =>
calculateOptimalGpuLayers({ calculateOptimalGpuLayers({
modelPath, modelPath,
contextSize, contextSize,
availableVramGB, availableVramGB,
flashAttention, flashAttention,
acceleration,
}) })
); );

View file

@ -234,15 +234,17 @@ export async function launchKoboldCpp(
const handleServerReady = () => { const handleServerReady = () => {
const isKoboldFrontend = const isKoboldFrontend =
frontendPreference === 'koboldcpp' || frontendPreference === 'koboldcpp' ||
frontendPreference === 'llamacpp' ||
(!isTextMode && imageGenerationFrontendPreference === 'builtin'); (!isTextMode && imageGenerationFrontendPreference === 'builtin');
if (isKoboldFrontend) { if (isKoboldFrontend) {
sendToRenderer('server-ready'); sendToRenderer('server-ready');
} }
readyResolve?.({ success: true, pid: child.pid }); readyResolve?.({ success: true, pid: child.pid });
}; };
child.stdout?.on('data', (data) => { const handleOutput = (data: Buffer) => {
const output = data.toString(); const output = data.toString();
const filtered = debugmode ? output : filterSpam(output); const filtered = debugmode ? output : filterSpam(output);
if (filtered.trim()) { if (filtered.trim()) {
@ -254,21 +256,10 @@ export async function launchKoboldCpp(
hasProcessStartedSuccessfully = true; hasProcessStartedSuccessfully = true;
handleServerReady(); handleServerReady();
} }
}); };
child.stderr?.on('data', (data) => { child.stdout?.on('data', handleOutput);
const output = data.toString(); child.stderr?.on('data', handleOutput);
const filtered = debugmode ? output : filterSpam(output);
if (filtered.trim()) {
sendKoboldOutput(filtered, true);
}
if (!isReady && output.includes(SERVER_READY_SIGNALS.KOBOLDCPP)) {
isReady = true;
hasProcessStartedSuccessfully = true;
handleServerReady();
}
});
child.on('exit', (code, signal) => { child.on('exit', (code, signal) => {
const isCrash = signal !== null || (code !== null && code !== 0); const isCrash = signal !== null || (code !== null && code !== 0);

View file

@ -270,7 +270,7 @@ export async function resolveModelPath(
const localPath = getModelLocalPath(urlOrPath, paramType); const localPath = getModelLocalPath(urlOrPath, paramType);
if (await pathExists(localPath)) { if (await pathExists(localPath)) {
sendKoboldOutput(`Using cached model at: ${localPath}\n`); sendKoboldOutput(`Using cached model at: ${localPath}`);
onProgress?.({ onProgress?.({
type: 'complete', type: 'complete',
localPath, localPath,
@ -278,14 +278,14 @@ export async function resolveModelPath(
return localPath; return localPath;
} }
sendKoboldOutput(`Downloading model from ${urlOrPath} to ${localPath}...\n`); sendKoboldOutput(`Downloading model from ${urlOrPath} to ${localPath}...`);
const progressCallback = onProgress || ((p: DownloadProgress) => p); const progressCallback = onProgress || ((p: DownloadProgress) => p);
try { try {
await downloadFile(urlOrPath, localPath, progressCallback); await downloadFile(urlOrPath, localPath, progressCallback);
sendKoboldOutput(`Model downloaded successfully to: ${localPath}\n\n`); sendKoboldOutput(`Model downloaded successfully to: ${localPath}\n`);
progressCallback({ progressCallback({
type: 'complete', type: 'complete',
localPath, localPath,

View file

@ -63,14 +63,16 @@ const koboldAPI: KoboldAPI = {
modelPath, modelPath,
contextSize, contextSize,
availableVramGB, availableVramGB,
flashAttention flashAttention,
acceleration
) => ) =>
ipcRenderer.invoke( ipcRenderer.invoke(
'kobold:calculateOptimalLayers', 'kobold:calculateOptimalLayers',
modelPath, modelPath,
contextSize, contextSize,
availableVramGB, availableVramGB,
flashAttention flashAttention,
acceleration
), ),
stopKoboldCpp: () => ipcRenderer.invoke('kobold:stopKoboldCpp'), stopKoboldCpp: () => ipcRenderer.invoke('kobold:stopKoboldCpp'),
onDownloadProgress: (callback) => { onDownloadProgress: (callback) => {

View file

@ -1,5 +1,5 @@
import { create } from 'zustand'; import { create } from 'zustand';
import type { ConfigFile, SdConvDirectMode } from '@/types'; import type { Acceleration, ConfigFile, SdConvDirectMode } from '@/types';
import { IMAGE_MODEL_PRESETS } from '@/constants/imageModelPresets'; import { IMAGE_MODEL_PRESETS } from '@/constants/imageModelPresets';
import { DEFAULT_AUTO_GPU_LAYERS, DEFAULT_CONTEXT_SIZE } from '@/constants'; import { DEFAULT_AUTO_GPU_LAYERS, DEFAULT_CONTEXT_SIZE } from '@/constants';
@ -25,7 +25,7 @@ interface LaunchConfigState {
quantmatmul: boolean; quantmatmul: boolean;
usemmap: boolean; usemmap: boolean;
debugmode: boolean; debugmode: boolean;
backend: string; acceleration: Acceleration;
gpuDeviceSelection: string; gpuDeviceSelection: string;
tensorSplit: string; tensorSplit: string;
gpuPlatform: number; gpuPlatform: number;
@ -65,7 +65,7 @@ interface LaunchConfigState {
setUsemmap: (usemmap: boolean) => void; setUsemmap: (usemmap: boolean) => void;
setDebugmode: (debugmode: boolean) => void; setDebugmode: (debugmode: boolean) => void;
setPreLaunchCommands: (commands: string[]) => void; setPreLaunchCommands: (commands: string[]) => void;
setBackend: (backend: string) => void; setAcceleration: (acceleration: Acceleration) => void;
setGpuDeviceSelection: (selection: string) => void; setGpuDeviceSelection: (selection: string) => void;
setTensorSplit: (split: string) => void; setTensorSplit: (split: string) => void;
setGpuPlatform: (platform: number) => void; setGpuPlatform: (platform: number) => void;
@ -125,7 +125,7 @@ export const useLaunchConfigStore = create<LaunchConfigState>((set, get) => ({
quantmatmul: true, quantmatmul: true,
usemmap: true, usemmap: true,
debugmode: false, debugmode: false,
backend: '', acceleration: '' as Acceleration,
gpuDeviceSelection: '0', gpuDeviceSelection: '0',
tensorSplit: '', tensorSplit: '',
gpuPlatform: 0, gpuPlatform: 0,
@ -170,9 +170,9 @@ export const useLaunchConfigStore = create<LaunchConfigState>((set, get) => ({
setUsemmap: (usemmap) => set({ usemmap }), setUsemmap: (usemmap) => set({ usemmap }),
setDebugmode: (debugmode) => set({ debugmode }), setDebugmode: (debugmode) => set({ debugmode }),
setPreLaunchCommands: (commands) => set({ preLaunchCommands: commands }), setPreLaunchCommands: (commands) => set({ preLaunchCommands: commands }),
setBackend: (backend) => setAcceleration: (acceleration) =>
set({ set({
backend, acceleration,
gpuDeviceSelection: '0', gpuDeviceSelection: '0',
tensorSplit: '', tensorSplit: '',
}), }),
@ -331,7 +331,7 @@ export const useLaunchConfigStore = create<LaunchConfigState>((set, get) => ({
if (configData.usecuda === true) { if (configData.usecuda === true) {
const gpuInfo = await window.electronAPI.kobold.detectGPU(); const gpuInfo = await window.electronAPI.kobold.detectGPU();
updates.backend = gpuInfo.hasNVIDIA ? 'cuda' : 'rocm'; updates.acceleration = gpuInfo.hasNVIDIA ? 'cuda' : 'rocm';
if ( if (
Array.isArray(configData.usecuda) && Array.isArray(configData.usecuda) &&
@ -343,17 +343,17 @@ export const useLaunchConfigStore = create<LaunchConfigState>((set, get) => ({
updates.quantmatmul = mmqMode === 'mmq'; updates.quantmatmul = mmqMode === 'mmq';
} }
} else if (configData.usevulkan === true) { } else if (configData.usevulkan === true) {
updates.backend = 'vulkan'; updates.acceleration = 'vulkan';
} else if ( } else if (
Array.isArray(configData.useclblast) && Array.isArray(configData.useclblast) &&
configData.useclblast.length === 2 configData.useclblast.length === 2
) { ) {
updates.backend = 'clblast'; updates.acceleration = 'clblast';
const [deviceIndex, platformIndex] = configData.useclblast; const [deviceIndex, platformIndex] = configData.useclblast;
updates.gpuDeviceSelection = deviceIndex.toString(); updates.gpuDeviceSelection = deviceIndex.toString();
updates.gpuPlatform = platformIndex; updates.gpuPlatform = platformIndex;
} else { } else {
updates.backend = 'cpu'; updates.acceleration = 'cpu';
} }
if (typeof configData.gpuDeviceSelection === 'string') { if (typeof configData.gpuDeviceSelection === 'string') {

View file

@ -6,6 +6,7 @@ import type {
SystemMemoryInfo, SystemMemoryInfo,
} from '@/types/hardware'; } from '@/types/hardware';
import type { import type {
Acceleration,
AccelerationOption, AccelerationOption,
AccelerationSupport, AccelerationSupport,
Screen, Screen,
@ -170,7 +171,8 @@ export interface KoboldAPI {
modelPath: string, modelPath: string,
contextSize: number, contextSize: number,
availableVramGB: number, availableVramGB: number,
flashAttention: boolean flashAttention: boolean,
acceleration: Acceleration
) => Promise<OptimalLayersResult>; ) => Promise<OptimalLayersResult>;
stopKoboldCpp: () => void; stopKoboldCpp: () => void;
onDownloadProgress: (callback: (progress: number) => void) => () => void; onDownloadProgress: (callback: (progress: number) => void) => () => void;

View file

@ -103,6 +103,8 @@ export interface AccelerationSupport {
cuda: boolean; cuda: boolean;
} }
export type Acceleration = keyof AccelerationSupport | 'cpu';
export interface ModelAnalysis { export interface ModelAnalysis {
general: { general: {
architecture: string; architecture: string;

View file

@ -1,30 +1,48 @@
import { gguf } from '@huggingface/gguf'; import { gguf } from '@huggingface/gguf';
import { stat } from 'fs/promises'; import { stat } from 'fs/promises';
import type { Acceleration } from '@/types';
interface VramCalculationParams { interface VramCalculationParams {
modelPath: string; modelPath: string;
contextSize: number; contextSize: number;
availableVramGB: number; availableVramGB: number;
flashAttention?: boolean; flashAttention?: boolean;
acceleration: Acceleration;
}
function getAccelerationOverhead(acceleration: Acceleration) {
switch (acceleration) {
case 'cuda':
return { multiplier: 1.05, computeBufferGB: 0.2, headroomGB: 0.1 };
case 'vulkan':
return { multiplier: 1.05, computeBufferGB: 0.2, headroomGB: 0.1 };
case 'rocm':
return { multiplier: 1.15, computeBufferGB: 0.4, headroomGB: 0.2 };
case 'clblast':
return { multiplier: 1.2, computeBufferGB: 0.5, headroomGB: 0.3 };
// eslint-disable-next-line no-comments/disallowComments
// assuming metal on macOS which we refer to as "cpu" acceleration
case 'cpu':
return { multiplier: 1.05, computeBufferGB: 0.2, headroomGB: 0.1 };
default:
return { multiplier: 1.1, computeBufferGB: 0.3, headroomGB: 0.15 };
}
} }
function estimateContextVram( function estimateContextVram(
contextSize: number, contextSize: number,
layers: number, layers: number,
embeddingLength: number, kvDim: number,
flashAttention: boolean flashAttention: boolean
) { ) {
const bytesPerElement = 2; const bytesPerElement = 2;
let kvCacheSizeBytes = let kvCacheSizeBytes = 2 * contextSize * layers * kvDim * bytesPerElement;
2 * contextSize * layers * embeddingLength * bytesPerElement;
if (flashAttention) { if (flashAttention) {
kvCacheSizeBytes *= 0.5; kvCacheSizeBytes *= 0.5;
} }
const kvCacheSizeGB = kvCacheSizeBytes / 1024 ** 3; return kvCacheSizeBytes / 1024 ** 3;
return kvCacheSizeGB;
} }
export async function calculateOptimalGpuLayers({ export async function calculateOptimalGpuLayers({
@ -32,6 +50,7 @@ export async function calculateOptimalGpuLayers({
contextSize, contextSize,
availableVramGB, availableVramGB,
flashAttention = false, flashAttention = false,
acceleration,
}: VramCalculationParams) { }: VramCalculationParams) {
const isUrl = const isUrl =
modelPath.startsWith('http://') || modelPath.startsWith('https://'); modelPath.startsWith('http://') || modelPath.startsWith('https://');
@ -75,25 +94,26 @@ export async function calculateOptimalGpuLayers({
const headDim = embeddingLength / headCount; const headDim = embeddingLength / headCount;
const kvDim = headCountKv * headDim; const kvDim = headCountKv * headDim;
const modelSizeGB = fileSize / 1024 ** 3; const { multiplier, computeBufferGB, headroomGB } =
const vramPerLayerGB = modelSizeGB / totalLayers; getAccelerationOverhead(acceleration);
const headroomGB = 0.1; const modelSizeGB = fileSize / 1024 ** 3;
const availableForModel = availableVramGB - headroomGB; const effectiveModelSizeGB = modelSizeGB * multiplier;
const vramPerLayerGB = effectiveModelSizeGB / totalLayers;
const availableForModel = availableVramGB - computeBufferGB - headroomGB;
let recommendedLayers = 0; let recommendedLayers = 0;
let modelVramGB = 0;
let contextVramGB = 0;
for (let layers = 1; layers <= totalLayers; layers++) { for (let layers = 1; layers <= totalLayers; layers++) {
modelVramGB = layers * vramPerLayerGB; const modelVram = layers * vramPerLayerGB;
contextVramGB = estimateContextVram( const contextVram = estimateContextVram(
contextSize, contextSize,
layers, layers,
kvDim, kvDim,
flashAttention flashAttention
); );
const totalVram = modelVramGB + contextVramGB; const totalVram = modelVram + contextVram;
if (totalVram <= availableForModel) { if (totalVram <= availableForModel) {
recommendedLayers = layers; recommendedLayers = layers;
@ -102,21 +122,20 @@ export async function calculateOptimalGpuLayers({
} }
} }
const finalContextVram = estimateContextVram( const modelVramGB = recommendedLayers * vramPerLayerGB;
const contextVramGB = estimateContextVram(
contextSize, contextSize,
recommendedLayers, recommendedLayers,
kvDim, kvDim,
flashAttention flashAttention
); );
const estimatedVramUsageGB =
recommendedLayers * vramPerLayerGB + finalContextVram;
return { return {
recommendedLayers, recommendedLayers,
totalLayers, totalLayers,
estimatedVramUsageGB, estimatedVramUsageGB: modelVramGB + contextVramGB + computeBufferGB,
modelVramGB: recommendedLayers * vramPerLayerGB, modelVramGB,
contextVramGB: finalContextVram, contextVramGB,
headroomGB, headroomGB,
}; };
} }

View file

@ -3747,8 +3747,8 @@ __metadata:
lucide-react: "npm:^0.555.0" lucide-react: "npm:^0.555.0"
mime-types: "npm:^3.0.2" mime-types: "npm:^3.0.2"
prettier: "npm:^3.7.4" prettier: "npm:^3.7.4"
react: "npm:^19.2.0" react: "npm:^19.2.1"
react-dom: "npm:^19.2.0" react-dom: "npm:^19.2.1"
react-error-boundary: "npm:^6.0.0" react-error-boundary: "npm:^6.0.0"
rollup-plugin-visualizer: "npm:^6.0.5" rollup-plugin-visualizer: "npm:^6.0.5"
systeminformation: "npm:^5.27.11" systeminformation: "npm:^5.27.11"
@ -5593,14 +5593,14 @@ __metadata:
languageName: node languageName: node
linkType: hard linkType: hard
"react-dom@npm:^19.2.0": "react-dom@npm:^19.2.1":
version: 19.2.0 version: 19.2.1
resolution: "react-dom@npm:19.2.0" resolution: "react-dom@npm:19.2.1"
dependencies: dependencies:
scheduler: "npm:^0.27.0" scheduler: "npm:^0.27.0"
peerDependencies: peerDependencies:
react: ^19.2.0 react: ^19.2.1
checksum: 10c0/fa2cae05248d01288e91523b590ce4e7635b1e13f1344e225f850d722a8da037bf0782f63b1c1d46353334e0c696909b82e582f8cad607948fde6f7646cc18d9 checksum: 10c0/e56b6b3d72314df580ca800b70a69a21c6372703c8f45d9b5451ca6519faefb2496d76ffa9c5adb94136d2bbf2fd303d0dfc208a2cd77ede3132877471af9470
languageName: node languageName: node
linkType: hard linkType: hard
@ -5703,10 +5703,10 @@ __metadata:
languageName: node languageName: node
linkType: hard linkType: hard
"react@npm:^19.2.0": "react@npm:^19.2.1":
version: 19.2.0 version: 19.2.1
resolution: "react@npm:19.2.0" resolution: "react@npm:19.2.1"
checksum: 10c0/1b6d64eacb9324725bfe1e7860cb7a6b8a34bc89a482920765ebff5c10578eb487e6b46b2f0df263bd27a25edbdae2c45e5ea5d81ae61404301c1a7192c38330 checksum: 10c0/2b5eaf407abb3db84090434c20d6c5a8e447ab7abcd8fe9eaf1ddc299babcf31284ee9db7ea5671d21c85ac5298bd632fa1a7da1ed78d5b368a537f5e1cd5d62
languageName: node languageName: node
linkType: hard linkType: hard