mirror of
https://github.com/lone-cloud/gerbil
synced 2026-06-03 09:33:10 -07:00
more accurate auto VRAM approximate, more code renames from backend -> acceleration
This commit is contained in:
parent
0576b46b29
commit
80bb6d5e97
20 changed files with 183 additions and 153 deletions
7
.vscode/settings.json
vendored
7
.vscode/settings.json
vendored
|
|
@ -10,11 +10,8 @@
|
|||
"typescriptreact"
|
||||
],
|
||||
"editor.formatOnSave": true,
|
||||
"editor.defaultFormatter": "esbenp.prettier-vscode",
|
||||
"[typescript]": {
|
||||
"editor.defaultFormatter": "prettier.prettier-vscode",
|
||||
"[typescriptreact]": {
|
||||
"editor.defaultFormatter": "prettier.prettier-vscode"
|
||||
},
|
||||
"[typescriptreact]": {
|
||||
"editor.defaultFormatter": "esbenp.prettier-vscode"
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -79,8 +79,8 @@
|
|||
"execa": "^9.6.1",
|
||||
"lucide-react": "^0.555.0",
|
||||
"mime-types": "^3.0.2",
|
||||
"react": "^19.2.0",
|
||||
"react-dom": "^19.2.0",
|
||||
"react": "^19.2.1",
|
||||
"react-dom": "^19.2.1",
|
||||
"react-error-boundary": "^6.0.0",
|
||||
"systeminformation": "^5.27.11",
|
||||
"winston": "^3.18.3",
|
||||
|
|
|
|||
|
|
@ -63,8 +63,10 @@ export const App = () => {
|
|||
|
||||
useEffect(() => {
|
||||
const cleanup = window.electronAPI.kobold.onServerReady(() => {
|
||||
setIsServerReady(true);
|
||||
setActiveInterfaceTab(defaultInterfaceTab);
|
||||
setTimeout(() => {
|
||||
setIsServerReady(true);
|
||||
setActiveInterfaceTab(defaultInterfaceTab);
|
||||
}, 1000);
|
||||
});
|
||||
|
||||
return cleanup;
|
||||
|
|
|
|||
|
|
@ -1,5 +1,5 @@
|
|||
import { useState } from 'react';
|
||||
import { Text, Anchor } from '@mantine/core';
|
||||
import { Text, Anchor, Box } from '@mantine/core';
|
||||
|
||||
interface ImportBackendLinkProps {
|
||||
disabled?: boolean;
|
||||
|
|
@ -40,7 +40,7 @@ export const ImportBackendLink = ({
|
|||
};
|
||||
|
||||
return (
|
||||
<>
|
||||
<Box mt="xs">
|
||||
{importError && (
|
||||
<Text size="sm" c="red" ta="center" mb="xs">
|
||||
{importError}
|
||||
|
|
@ -58,6 +58,6 @@ export const ImportBackendLink = ({
|
|||
{importing ? 'Importing...' : 'Select a local file'}
|
||||
</Anchor>
|
||||
</Text>
|
||||
</>
|
||||
</Box>
|
||||
);
|
||||
};
|
||||
|
|
|
|||
|
|
@ -27,7 +27,7 @@ export const AdvancedTab = () => {
|
|||
quantmatmul,
|
||||
usemmap,
|
||||
debugmode,
|
||||
backend,
|
||||
acceleration,
|
||||
moecpu,
|
||||
moeexperts,
|
||||
setAdditionalArguments,
|
||||
|
|
@ -58,7 +58,7 @@ export const AdvancedTab = () => {
|
|||
setAdditionalArguments(updatedArgs);
|
||||
};
|
||||
|
||||
const isGpuBackend = backend === 'cuda' || backend === 'rocm';
|
||||
const isGpuAcceleration = acceleration === 'cuda' || acceleration === 'rocm';
|
||||
|
||||
useEffect(() => {
|
||||
const detectAccelerationSupport = async () => {
|
||||
|
|
@ -118,15 +118,15 @@ export const AdvancedTab = () => {
|
|||
/>
|
||||
|
||||
<CheckboxWithTooltip
|
||||
checked={quantmatmul && isGpuBackend}
|
||||
checked={quantmatmul && isGpuAcceleration}
|
||||
onChange={setQuantmatmul}
|
||||
label="QuantMatMul"
|
||||
tooltip={
|
||||
!isGpuBackend
|
||||
? 'QuantMatMul is only available for CUDA and ROCm backends.'
|
||||
!isGpuAcceleration
|
||||
? 'QuantMatMul is only available for CUDA and ROCm accelerations.'
|
||||
: 'Enable MMQ mode to use finetuned kernels instead of default CuBLAS/HipBLAS for prompt processing.'
|
||||
}
|
||||
disabled={!isGpuBackend}
|
||||
disabled={!isGpuAcceleration}
|
||||
/>
|
||||
|
||||
<CheckboxWithTooltip
|
||||
|
|
@ -149,15 +149,15 @@ export const AdvancedTab = () => {
|
|||
/>
|
||||
|
||||
<CheckboxWithTooltip
|
||||
checked={lowvram && isGpuBackend}
|
||||
checked={lowvram && isGpuAcceleration}
|
||||
onChange={setLowvram}
|
||||
label="Low VRAM"
|
||||
tooltip={
|
||||
!isGpuBackend
|
||||
? 'Low VRAM mode is only available for CUDA and ROCm backends.'
|
||||
!isGpuAcceleration
|
||||
? 'Low VRAM mode is only available for CUDA and ROCm accelerations.'
|
||||
: 'Avoid offloading KV Cache or scratch buffers to VRAM. Allows more layers to fit, but may result in a speed loss.'
|
||||
}
|
||||
disabled={!isGpuBackend}
|
||||
disabled={!isGpuAcceleration}
|
||||
/>
|
||||
|
||||
<CheckboxWithTooltip
|
||||
|
|
|
|||
|
|
@ -4,19 +4,19 @@ import { InfoTooltip } from '@/components/InfoTooltip';
|
|||
import { AccelerationSelectItem } from '@/components/screens/Launch/GeneralTab/AccelerationSelectItem';
|
||||
import { GpuDeviceSelector } from '@/components/screens/Launch/GeneralTab/GpuDeviceSelector';
|
||||
import { useLaunchConfigStore } from '@/stores/launchConfig';
|
||||
import type { AccelerationOption } from '@/types';
|
||||
import type { Acceleration, AccelerationOption } from '@/types';
|
||||
import { Select } from '@/components/Select';
|
||||
|
||||
export const AccelerationSelector = () => {
|
||||
const {
|
||||
backend,
|
||||
acceleration,
|
||||
gpuLayers,
|
||||
autoGpuLayers,
|
||||
model,
|
||||
contextSize,
|
||||
gpuDeviceSelection,
|
||||
flashattention,
|
||||
setBackend,
|
||||
setAcceleration,
|
||||
setGpuLayers,
|
||||
setAutoGpuLayers,
|
||||
} = useLaunchConfigStore();
|
||||
|
|
@ -57,9 +57,9 @@ export const AccelerationSelector = () => {
|
|||
}, []);
|
||||
|
||||
useEffect(() => {
|
||||
if (availableAccelerations.length > 0 && backend) {
|
||||
if (availableAccelerations.length > 0 && acceleration) {
|
||||
const isAccelerationAvailable = availableAccelerations.some(
|
||||
(a) => a.value === backend && !a.disabled
|
||||
(a) => a.value === acceleration && !a.disabled
|
||||
);
|
||||
|
||||
if (!isAccelerationAvailable) {
|
||||
|
|
@ -67,15 +67,15 @@ export const AccelerationSelector = () => {
|
|||
(a) => !a.disabled
|
||||
);
|
||||
if (fallbackAcceleration) {
|
||||
setBackend(fallbackAcceleration.value);
|
||||
setAcceleration(fallbackAcceleration.value as Acceleration);
|
||||
}
|
||||
}
|
||||
}
|
||||
}, [availableAccelerations, backend, setBackend]);
|
||||
}, [availableAccelerations, acceleration, setAcceleration]);
|
||||
|
||||
useEffect(() => {
|
||||
const calculateLayers = async () => {
|
||||
const isCpuOnly = backend === 'cpu' && !isMac;
|
||||
const isCpuOnly = acceleration === 'cpu' && !isMac;
|
||||
if (
|
||||
!autoGpuLayers ||
|
||||
!model ||
|
||||
|
|
@ -118,7 +118,8 @@ export const AccelerationSelector = () => {
|
|||
model,
|
||||
contextSize,
|
||||
availableVramGB,
|
||||
flashattention
|
||||
flashattention,
|
||||
acceleration
|
||||
);
|
||||
|
||||
setGpuLayers(result.recommendedLayers);
|
||||
|
|
@ -137,7 +138,7 @@ export const AccelerationSelector = () => {
|
|||
autoGpuLayers,
|
||||
model,
|
||||
contextSize,
|
||||
backend,
|
||||
acceleration,
|
||||
gpuDeviceSelection,
|
||||
flashattention,
|
||||
isLoadingAccelerations,
|
||||
|
|
@ -163,14 +164,14 @@ export const AccelerationSelector = () => {
|
|||
}
|
||||
value={
|
||||
availableAccelerations.some(
|
||||
(a) => a.value === backend && !a.disabled
|
||||
(a) => a.value === acceleration && !a.disabled
|
||||
)
|
||||
? backend
|
||||
? acceleration
|
||||
: null
|
||||
}
|
||||
onChange={(value) => {
|
||||
if (value) {
|
||||
setBackend(value);
|
||||
setAcceleration(value as Acceleration);
|
||||
}
|
||||
}}
|
||||
data={availableAccelerations.map((a) => ({
|
||||
|
|
@ -223,7 +224,7 @@ export const AccelerationSelector = () => {
|
|||
step={1}
|
||||
size="sm"
|
||||
w={80}
|
||||
disabled={autoGpuLayers || (backend === 'cpu' && !isMac)}
|
||||
disabled={autoGpuLayers || (acceleration === 'cpu' && !isMac)}
|
||||
/>
|
||||
<Group gap="xs" align="center">
|
||||
<Checkbox
|
||||
|
|
@ -233,7 +234,7 @@ export const AccelerationSelector = () => {
|
|||
setAutoGpuLayers(event.currentTarget.checked)
|
||||
}
|
||||
size="sm"
|
||||
disabled={backend === 'cpu' && !isMac}
|
||||
disabled={acceleration === 'cpu' && !isMac}
|
||||
/>
|
||||
<InfoTooltip label="Automatically calculate optimal GPU layers based on available VRAM. The calculation accounts for model size, context size and flash attention." />
|
||||
</Group>
|
||||
|
|
|
|||
|
|
@ -4,8 +4,8 @@ import { useLaunchConfigStore } from '@/stores/launchConfig';
|
|||
import { Select } from '@/components/Select';
|
||||
import type { AccelerationOption } from '@/types';
|
||||
|
||||
const GPU_BACKENDS = ['cuda', 'rocm', 'vulkan', 'clblast'];
|
||||
const TENSOR_SPLIT_BACKENDS = ['cuda', 'rocm', 'vulkan'];
|
||||
const GPU_ACCELERATIONS = ['cuda', 'rocm', 'vulkan', 'clblast'];
|
||||
const TENSOR_SPLIT_ACCELERATIONS = ['cuda', 'rocm', 'vulkan'];
|
||||
|
||||
interface GpuDeviceSelectorProps {
|
||||
availableAccelerations: AccelerationOption[];
|
||||
|
|
@ -15,7 +15,7 @@ export const GpuDeviceSelector = ({
|
|||
availableAccelerations,
|
||||
}: GpuDeviceSelectorProps) => {
|
||||
const {
|
||||
backend,
|
||||
acceleration,
|
||||
gpuDeviceSelection,
|
||||
tensorSplit,
|
||||
setGpuDeviceSelection,
|
||||
|
|
@ -23,13 +23,17 @@ export const GpuDeviceSelector = ({
|
|||
} = useLaunchConfigStore();
|
||||
|
||||
const selectedAcceleration = availableAccelerations.find(
|
||||
(a) => a.value === backend
|
||||
(a) => a.value === acceleration
|
||||
);
|
||||
const isGpu = GPU_BACKENDS.includes(backend);
|
||||
const isGpuAcceleration = GPU_ACCELERATIONS.includes(acceleration);
|
||||
|
||||
const getDiscreteDeviceCount = () => {
|
||||
if (!selectedAcceleration?.devices) return 0;
|
||||
if (backend === 'clblast' || backend === 'vulkan' || backend === 'rocm') {
|
||||
if (
|
||||
acceleration === 'clblast' ||
|
||||
acceleration === 'vulkan' ||
|
||||
acceleration === 'rocm'
|
||||
) {
|
||||
return selectedAcceleration.devices.filter(
|
||||
(device) => typeof device === 'string' || !device.isIntegrated
|
||||
).length;
|
||||
|
|
@ -39,24 +43,26 @@ export const GpuDeviceSelector = ({
|
|||
|
||||
const hasMultipleDevices = getDiscreteDeviceCount() > 1;
|
||||
const showTensorSplit =
|
||||
TENSOR_SPLIT_BACKENDS.includes(backend) &&
|
||||
TENSOR_SPLIT_ACCELERATIONS.includes(acceleration) &&
|
||||
hasMultipleDevices &&
|
||||
gpuDeviceSelection === 'all';
|
||||
|
||||
if (!isGpu || !hasMultipleDevices) {
|
||||
if (!isGpuAcceleration || !hasMultipleDevices) {
|
||||
return null;
|
||||
}
|
||||
|
||||
const deviceOptions = (() => {
|
||||
if (!selectedAcceleration?.devices) return [];
|
||||
|
||||
if (backend === 'clblast') {
|
||||
if (acceleration === 'clblast') {
|
||||
return selectedAcceleration.devices
|
||||
.map((device, index) => {
|
||||
if (typeof device === 'object' && device.isIntegrated) {
|
||||
return null;
|
||||
}
|
||||
|
||||
const deviceName = typeof device === 'string' ? device : device.name;
|
||||
|
||||
return {
|
||||
value: index.toString(),
|
||||
label: `GPU ${index}: ${deviceName}`,
|
||||
|
|
@ -67,7 +73,7 @@ export const GpuDeviceSelector = ({
|
|||
);
|
||||
}
|
||||
|
||||
if (backend === 'vulkan' || backend === 'rocm') {
|
||||
if (acceleration === 'vulkan' || acceleration === 'rocm') {
|
||||
const discreteDeviceOptions = selectedAcceleration.devices
|
||||
.map((device, index) => {
|
||||
if (typeof device === 'object' && device.isIntegrated) {
|
||||
|
|
|
|||
|
|
@ -11,7 +11,7 @@ import { ImageGenerationTab } from '@/components/screens/Launch/ImageGenerationT
|
|||
import { WarningDisplay } from '@/components/WarningDisplay';
|
||||
import { ConfigFileManager } from '@/components/screens/Launch/ConfigFileManager';
|
||||
import { DEFAULT_MODEL_URL } from '@/constants';
|
||||
import type { ConfigFile } from '@/types';
|
||||
import type { Acceleration, ConfigFile } from '@/types';
|
||||
|
||||
interface LaunchScreenProps {
|
||||
onLaunch: () => void;
|
||||
|
|
@ -47,7 +47,7 @@ export const LaunchScreen = ({ onLaunch }: LaunchScreenProps) => {
|
|||
quantmatmul,
|
||||
usemmap,
|
||||
debugmode,
|
||||
backend,
|
||||
acceleration,
|
||||
gpuDeviceSelection,
|
||||
gpuPlatform,
|
||||
tensorSplit,
|
||||
|
|
@ -66,7 +66,7 @@ export const LaunchScreen = ({ onLaunch }: LaunchScreenProps) => {
|
|||
parseAndApplyConfigFile,
|
||||
loadConfigFromFile,
|
||||
setModel,
|
||||
setBackend,
|
||||
setAcceleration,
|
||||
} = useLaunchConfigStore();
|
||||
|
||||
const { isLaunching, handleLaunch } = useLaunchLogic({
|
||||
|
|
@ -78,7 +78,7 @@ export const LaunchScreen = ({ onLaunch }: LaunchScreenProps) => {
|
|||
const { warnings: combinedWarnings } = useWarnings({
|
||||
model,
|
||||
sdmodel,
|
||||
backend,
|
||||
acceleration,
|
||||
configLoaded,
|
||||
});
|
||||
|
||||
|
|
@ -86,10 +86,10 @@ export const LaunchScreen = ({ onLaunch }: LaunchScreenProps) => {
|
|||
const accelerations =
|
||||
await window.electronAPI.kobold.getAvailableAccelerations();
|
||||
|
||||
if (!backend && accelerations && accelerations.length > 0) {
|
||||
setBackend(accelerations[0].value);
|
||||
if (!acceleration && accelerations && accelerations.length > 0) {
|
||||
setAcceleration(accelerations[0].value as Acceleration);
|
||||
}
|
||||
}, [backend, setBackend]);
|
||||
}, [acceleration, setAcceleration]);
|
||||
|
||||
const setInitialDefaults = useCallback(
|
||||
(currentModel: string, currentSdModel: string) => {
|
||||
|
|
@ -177,9 +177,9 @@ export const LaunchScreen = ({ onLaunch }: LaunchScreenProps) => {
|
|||
debugmode,
|
||||
moecpu,
|
||||
moeexperts,
|
||||
usecuda: backend === 'cuda' || backend === 'rocm',
|
||||
usevulkan: backend === 'vulkan',
|
||||
useclblast: backend === 'clblast',
|
||||
usecuda: acceleration === 'cuda' || acceleration === 'rocm',
|
||||
usevulkan: acceleration === 'vulkan',
|
||||
useclblast: acceleration === 'clblast',
|
||||
gpuDeviceSelection,
|
||||
tensorSplit,
|
||||
sdmodel,
|
||||
|
|
@ -295,7 +295,7 @@ export const LaunchScreen = ({ onLaunch }: LaunchScreenProps) => {
|
|||
flashattention,
|
||||
noavx2,
|
||||
failsafe,
|
||||
backend,
|
||||
acceleration,
|
||||
lowvram,
|
||||
gpuDeviceSelection,
|
||||
gpuPlatform,
|
||||
|
|
@ -333,7 +333,7 @@ export const LaunchScreen = ({ onLaunch }: LaunchScreenProps) => {
|
|||
flashattention,
|
||||
noavx2,
|
||||
failsafe,
|
||||
backend,
|
||||
acceleration,
|
||||
lowvram,
|
||||
gpuDeviceSelection,
|
||||
gpuPlatform,
|
||||
|
|
|
|||
|
|
@ -7,7 +7,6 @@ import {
|
|||
Loader,
|
||||
Center,
|
||||
Anchor,
|
||||
Divider,
|
||||
} from '@mantine/core';
|
||||
import { ExternalLink } from 'lucide-react';
|
||||
import { DownloadCard } from '@/components/DownloadCard';
|
||||
|
|
@ -327,8 +326,6 @@ export const BackendsTab = () => {
|
|||
</Card>
|
||||
)}
|
||||
|
||||
<Divider my="md" />
|
||||
|
||||
<ImportBackendLink
|
||||
disabled={isDisabled}
|
||||
onSuccess={loadInstalledBackends}
|
||||
|
|
|
|||
|
|
@ -22,7 +22,7 @@ interface LaunchArgs {
|
|||
flashattention: boolean;
|
||||
noavx2: boolean;
|
||||
failsafe: boolean;
|
||||
backend: string;
|
||||
acceleration: string;
|
||||
lowvram: boolean;
|
||||
gpuDeviceSelection: string;
|
||||
gpuPlatform: number;
|
||||
|
|
@ -97,9 +97,10 @@ const buildModelArgs = (
|
|||
const buildConfigArgs = (isImageMode: boolean, launchArgs: LaunchArgs) => {
|
||||
const args: string[] = [];
|
||||
|
||||
const isGpuBackend = launchArgs.backend && launchArgs.backend !== 'cpu';
|
||||
const isGpuAcceleration =
|
||||
launchArgs.acceleration && launchArgs.acceleration !== 'cpu';
|
||||
|
||||
if (isGpuBackend) {
|
||||
if (isGpuAcceleration) {
|
||||
if (launchArgs.autoGpuLayers && launchArgs.gpuLayers > 0) {
|
||||
args.push('--gpulayers', launchArgs.gpuLayers.toString());
|
||||
} else if (!launchArgs.autoGpuLayers && launchArgs.gpuLayers > 0) {
|
||||
|
|
@ -213,8 +214,8 @@ const buildBackendArgs = (launchArgs: LaunchArgs, platform: string) => {
|
|||
return args;
|
||||
}
|
||||
|
||||
if (!launchArgs.backend || launchArgs.backend === 'cpu') {
|
||||
if (launchArgs.backend === 'cpu') {
|
||||
if (!launchArgs.acceleration || launchArgs.acceleration === 'cpu') {
|
||||
if (launchArgs.acceleration === 'cpu') {
|
||||
args.push('--usecpu');
|
||||
}
|
||||
|
||||
|
|
@ -222,23 +223,26 @@ const buildBackendArgs = (launchArgs: LaunchArgs, platform: string) => {
|
|||
}
|
||||
|
||||
const isTensorSplitSupported =
|
||||
launchArgs.backend === 'cuda' ||
|
||||
launchArgs.backend === 'rocm' ||
|
||||
launchArgs.backend === 'vulkan';
|
||||
launchArgs.acceleration === 'cuda' ||
|
||||
launchArgs.acceleration === 'rocm' ||
|
||||
launchArgs.acceleration === 'vulkan';
|
||||
|
||||
if (launchArgs.backend === 'cuda' || launchArgs.backend === 'rocm') {
|
||||
if (
|
||||
launchArgs.acceleration === 'cuda' ||
|
||||
launchArgs.acceleration === 'rocm'
|
||||
) {
|
||||
args.push(...buildCudaArgs(launchArgs));
|
||||
|
||||
if (launchArgs.gpuDeviceSelection === 'all' && isTensorSplitSupported) {
|
||||
addTensorSplitArgs(args, launchArgs);
|
||||
}
|
||||
} else if (launchArgs.backend === 'vulkan') {
|
||||
} else if (launchArgs.acceleration === 'vulkan') {
|
||||
args.push(...buildVulkanArgs());
|
||||
|
||||
if (launchArgs.gpuDeviceSelection === 'all' && isTensorSplitSupported) {
|
||||
addTensorSplitArgs(args, launchArgs);
|
||||
}
|
||||
} else if (launchArgs.backend === 'clblast') {
|
||||
} else if (launchArgs.acceleration === 'clblast') {
|
||||
args.push(...buildClblastArgs(launchArgs));
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -10,7 +10,7 @@ export interface Warning {
|
|||
interface UseWarningsProps {
|
||||
model: string;
|
||||
sdmodel: string;
|
||||
backend?: string;
|
||||
acceleration?: string;
|
||||
configLoaded?: boolean;
|
||||
}
|
||||
|
||||
|
|
@ -92,7 +92,7 @@ const checkGpuWarnings = async (
|
|||
}
|
||||
|
||||
warnings.push({
|
||||
type: 'warning',
|
||||
type: 'info',
|
||||
message,
|
||||
});
|
||||
}
|
||||
|
|
@ -100,11 +100,13 @@ const checkGpuWarnings = async (
|
|||
return warnings;
|
||||
};
|
||||
|
||||
const checkVramWarnings = async (backend: string): Promise<Warning[]> => {
|
||||
const checkVramWarnings = async (acceleration: string): Promise<Warning[]> => {
|
||||
const warnings: Warning[] = [];
|
||||
const isGpuBackend = ['cuda', 'rocm', 'vulkan', 'clblast'].includes(backend);
|
||||
const isGpuAcceleration = ['cuda', 'rocm', 'vulkan', 'clblast'].includes(
|
||||
acceleration
|
||||
);
|
||||
|
||||
if (isGpuBackend) {
|
||||
if (isGpuAcceleration) {
|
||||
const gpuMemoryInfo = await window.electronAPI.kobold.detectGPUMemory();
|
||||
|
||||
if (gpuMemoryInfo) {
|
||||
|
|
@ -133,12 +135,12 @@ const checkVramWarnings = async (backend: string): Promise<Warning[]> => {
|
|||
};
|
||||
|
||||
const checkCpuWarnings = (
|
||||
backend: string,
|
||||
acceleration: string,
|
||||
availableAccelerations: AccelerationOption[]
|
||||
) => {
|
||||
const warnings: Warning[] = [];
|
||||
|
||||
if (backend !== 'cpu') {
|
||||
if (acceleration !== 'cpu') {
|
||||
return warnings;
|
||||
}
|
||||
|
||||
|
|
@ -157,7 +159,7 @@ const checkCpuWarnings = (
|
|||
};
|
||||
|
||||
const checkBackendWarnings = async (params?: {
|
||||
backend: string;
|
||||
acceleration: string;
|
||||
cpuCapabilities: CPUCapabilities | null;
|
||||
availableAccelerations: AccelerationOption[];
|
||||
}) => {
|
||||
|
|
@ -181,13 +183,16 @@ const checkBackendWarnings = async (params?: {
|
|||
warnings.push(...gpuWarnings);
|
||||
|
||||
if (params) {
|
||||
const { backend, cpuCapabilities, availableAccelerations } = params;
|
||||
const { acceleration, cpuCapabilities, availableAccelerations } = params;
|
||||
|
||||
const vramWarnings = await checkVramWarnings(backend);
|
||||
const vramWarnings = await checkVramWarnings(acceleration);
|
||||
warnings.push(...vramWarnings);
|
||||
|
||||
if (cpuCapabilities) {
|
||||
const cpuWarnings = checkCpuWarnings(backend, availableAccelerations);
|
||||
const cpuWarnings = checkCpuWarnings(
|
||||
acceleration,
|
||||
availableAccelerations
|
||||
);
|
||||
warnings.push(...cpuWarnings);
|
||||
}
|
||||
}
|
||||
|
|
@ -198,7 +203,7 @@ const checkBackendWarnings = async (params?: {
|
|||
export const useWarnings = ({
|
||||
model,
|
||||
sdmodel,
|
||||
backend,
|
||||
acceleration,
|
||||
configLoaded = false,
|
||||
}: UseWarningsProps) => {
|
||||
const [backendWarnings, setBackendWarnings] = useState<Warning[]>([]);
|
||||
|
|
@ -209,7 +214,7 @@ export const useWarnings = ({
|
|||
);
|
||||
|
||||
const updateBackendWarnings = useCallback(async () => {
|
||||
if (!backend) {
|
||||
if (!acceleration) {
|
||||
setBackendWarnings([]);
|
||||
return;
|
||||
}
|
||||
|
|
@ -220,13 +225,13 @@ export const useWarnings = ({
|
|||
]);
|
||||
|
||||
const result = await checkBackendWarnings({
|
||||
backend,
|
||||
acceleration,
|
||||
cpuCapabilities: cpuCapabilitiesResult,
|
||||
availableAccelerations,
|
||||
});
|
||||
|
||||
setBackendWarnings(result);
|
||||
}, [backend]);
|
||||
}, [acceleration]);
|
||||
|
||||
useEffect(() => {
|
||||
// eslint-disable-next-line react-hooks/set-state-in-effect
|
||||
|
|
|
|||
|
|
@ -1,7 +1,7 @@
|
|||
import { ipcMain, app } from 'electron';
|
||||
import { join } from 'path';
|
||||
import { platform } from 'process';
|
||||
import type { Screen } from '@/types';
|
||||
import type { Screen, Acceleration } from '@/types';
|
||||
import {
|
||||
stopKoboldCpp,
|
||||
launchKoboldCppWithCustomFrontends,
|
||||
|
|
@ -182,13 +182,15 @@ export function setupIPCHandlers() {
|
|||
modelPath: string,
|
||||
contextSize: number,
|
||||
availableVramGB: number,
|
||||
flashAttention: boolean
|
||||
flashAttention: boolean,
|
||||
acceleration: Acceleration
|
||||
) =>
|
||||
calculateOptimalGpuLayers({
|
||||
modelPath,
|
||||
contextSize,
|
||||
availableVramGB,
|
||||
flashAttention,
|
||||
acceleration,
|
||||
})
|
||||
);
|
||||
|
||||
|
|
|
|||
|
|
@ -234,15 +234,17 @@ export async function launchKoboldCpp(
|
|||
const handleServerReady = () => {
|
||||
const isKoboldFrontend =
|
||||
frontendPreference === 'koboldcpp' ||
|
||||
frontendPreference === 'llamacpp' ||
|
||||
(!isTextMode && imageGenerationFrontendPreference === 'builtin');
|
||||
|
||||
if (isKoboldFrontend) {
|
||||
sendToRenderer('server-ready');
|
||||
}
|
||||
|
||||
readyResolve?.({ success: true, pid: child.pid });
|
||||
};
|
||||
|
||||
child.stdout?.on('data', (data) => {
|
||||
const handleOutput = (data: Buffer) => {
|
||||
const output = data.toString();
|
||||
const filtered = debugmode ? output : filterSpam(output);
|
||||
if (filtered.trim()) {
|
||||
|
|
@ -254,21 +256,10 @@ export async function launchKoboldCpp(
|
|||
hasProcessStartedSuccessfully = true;
|
||||
handleServerReady();
|
||||
}
|
||||
});
|
||||
};
|
||||
|
||||
child.stderr?.on('data', (data) => {
|
||||
const output = data.toString();
|
||||
const filtered = debugmode ? output : filterSpam(output);
|
||||
if (filtered.trim()) {
|
||||
sendKoboldOutput(filtered, true);
|
||||
}
|
||||
|
||||
if (!isReady && output.includes(SERVER_READY_SIGNALS.KOBOLDCPP)) {
|
||||
isReady = true;
|
||||
hasProcessStartedSuccessfully = true;
|
||||
handleServerReady();
|
||||
}
|
||||
});
|
||||
child.stdout?.on('data', handleOutput);
|
||||
child.stderr?.on('data', handleOutput);
|
||||
|
||||
child.on('exit', (code, signal) => {
|
||||
const isCrash = signal !== null || (code !== null && code !== 0);
|
||||
|
|
|
|||
|
|
@ -270,7 +270,7 @@ export async function resolveModelPath(
|
|||
const localPath = getModelLocalPath(urlOrPath, paramType);
|
||||
|
||||
if (await pathExists(localPath)) {
|
||||
sendKoboldOutput(`Using cached model at: ${localPath}\n`);
|
||||
sendKoboldOutput(`Using cached model at: ${localPath}`);
|
||||
onProgress?.({
|
||||
type: 'complete',
|
||||
localPath,
|
||||
|
|
@ -278,14 +278,14 @@ export async function resolveModelPath(
|
|||
return localPath;
|
||||
}
|
||||
|
||||
sendKoboldOutput(`Downloading model from ${urlOrPath} to ${localPath}...\n`);
|
||||
sendKoboldOutput(`Downloading model from ${urlOrPath} to ${localPath}...`);
|
||||
|
||||
const progressCallback = onProgress || ((p: DownloadProgress) => p);
|
||||
|
||||
try {
|
||||
await downloadFile(urlOrPath, localPath, progressCallback);
|
||||
|
||||
sendKoboldOutput(`Model downloaded successfully to: ${localPath}\n\n`);
|
||||
sendKoboldOutput(`Model downloaded successfully to: ${localPath}\n`);
|
||||
progressCallback({
|
||||
type: 'complete',
|
||||
localPath,
|
||||
|
|
|
|||
|
|
@ -63,14 +63,16 @@ const koboldAPI: KoboldAPI = {
|
|||
modelPath,
|
||||
contextSize,
|
||||
availableVramGB,
|
||||
flashAttention
|
||||
flashAttention,
|
||||
acceleration
|
||||
) =>
|
||||
ipcRenderer.invoke(
|
||||
'kobold:calculateOptimalLayers',
|
||||
modelPath,
|
||||
contextSize,
|
||||
availableVramGB,
|
||||
flashAttention
|
||||
flashAttention,
|
||||
acceleration
|
||||
),
|
||||
stopKoboldCpp: () => ipcRenderer.invoke('kobold:stopKoboldCpp'),
|
||||
onDownloadProgress: (callback) => {
|
||||
|
|
|
|||
|
|
@ -1,5 +1,5 @@
|
|||
import { create } from 'zustand';
|
||||
import type { ConfigFile, SdConvDirectMode } from '@/types';
|
||||
import type { Acceleration, ConfigFile, SdConvDirectMode } from '@/types';
|
||||
import { IMAGE_MODEL_PRESETS } from '@/constants/imageModelPresets';
|
||||
import { DEFAULT_AUTO_GPU_LAYERS, DEFAULT_CONTEXT_SIZE } from '@/constants';
|
||||
|
||||
|
|
@ -25,7 +25,7 @@ interface LaunchConfigState {
|
|||
quantmatmul: boolean;
|
||||
usemmap: boolean;
|
||||
debugmode: boolean;
|
||||
backend: string;
|
||||
acceleration: Acceleration;
|
||||
gpuDeviceSelection: string;
|
||||
tensorSplit: string;
|
||||
gpuPlatform: number;
|
||||
|
|
@ -65,7 +65,7 @@ interface LaunchConfigState {
|
|||
setUsemmap: (usemmap: boolean) => void;
|
||||
setDebugmode: (debugmode: boolean) => void;
|
||||
setPreLaunchCommands: (commands: string[]) => void;
|
||||
setBackend: (backend: string) => void;
|
||||
setAcceleration: (acceleration: Acceleration) => void;
|
||||
setGpuDeviceSelection: (selection: string) => void;
|
||||
setTensorSplit: (split: string) => void;
|
||||
setGpuPlatform: (platform: number) => void;
|
||||
|
|
@ -125,7 +125,7 @@ export const useLaunchConfigStore = create<LaunchConfigState>((set, get) => ({
|
|||
quantmatmul: true,
|
||||
usemmap: true,
|
||||
debugmode: false,
|
||||
backend: '',
|
||||
acceleration: '' as Acceleration,
|
||||
gpuDeviceSelection: '0',
|
||||
tensorSplit: '',
|
||||
gpuPlatform: 0,
|
||||
|
|
@ -170,9 +170,9 @@ export const useLaunchConfigStore = create<LaunchConfigState>((set, get) => ({
|
|||
setUsemmap: (usemmap) => set({ usemmap }),
|
||||
setDebugmode: (debugmode) => set({ debugmode }),
|
||||
setPreLaunchCommands: (commands) => set({ preLaunchCommands: commands }),
|
||||
setBackend: (backend) =>
|
||||
setAcceleration: (acceleration) =>
|
||||
set({
|
||||
backend,
|
||||
acceleration,
|
||||
gpuDeviceSelection: '0',
|
||||
tensorSplit: '',
|
||||
}),
|
||||
|
|
@ -331,7 +331,7 @@ export const useLaunchConfigStore = create<LaunchConfigState>((set, get) => ({
|
|||
|
||||
if (configData.usecuda === true) {
|
||||
const gpuInfo = await window.electronAPI.kobold.detectGPU();
|
||||
updates.backend = gpuInfo.hasNVIDIA ? 'cuda' : 'rocm';
|
||||
updates.acceleration = gpuInfo.hasNVIDIA ? 'cuda' : 'rocm';
|
||||
|
||||
if (
|
||||
Array.isArray(configData.usecuda) &&
|
||||
|
|
@ -343,17 +343,17 @@ export const useLaunchConfigStore = create<LaunchConfigState>((set, get) => ({
|
|||
updates.quantmatmul = mmqMode === 'mmq';
|
||||
}
|
||||
} else if (configData.usevulkan === true) {
|
||||
updates.backend = 'vulkan';
|
||||
updates.acceleration = 'vulkan';
|
||||
} else if (
|
||||
Array.isArray(configData.useclblast) &&
|
||||
configData.useclblast.length === 2
|
||||
) {
|
||||
updates.backend = 'clblast';
|
||||
updates.acceleration = 'clblast';
|
||||
const [deviceIndex, platformIndex] = configData.useclblast;
|
||||
updates.gpuDeviceSelection = deviceIndex.toString();
|
||||
updates.gpuPlatform = platformIndex;
|
||||
} else {
|
||||
updates.backend = 'cpu';
|
||||
updates.acceleration = 'cpu';
|
||||
}
|
||||
|
||||
if (typeof configData.gpuDeviceSelection === 'string') {
|
||||
|
|
|
|||
4
src/types/electron.d.ts
vendored
4
src/types/electron.d.ts
vendored
|
|
@ -6,6 +6,7 @@ import type {
|
|||
SystemMemoryInfo,
|
||||
} from '@/types/hardware';
|
||||
import type {
|
||||
Acceleration,
|
||||
AccelerationOption,
|
||||
AccelerationSupport,
|
||||
Screen,
|
||||
|
|
@ -170,7 +171,8 @@ export interface KoboldAPI {
|
|||
modelPath: string,
|
||||
contextSize: number,
|
||||
availableVramGB: number,
|
||||
flashAttention: boolean
|
||||
flashAttention: boolean,
|
||||
acceleration: Acceleration
|
||||
) => Promise<OptimalLayersResult>;
|
||||
stopKoboldCpp: () => void;
|
||||
onDownloadProgress: (callback: (progress: number) => void) => () => void;
|
||||
|
|
|
|||
2
src/types/index.d.ts
vendored
2
src/types/index.d.ts
vendored
|
|
@ -103,6 +103,8 @@ export interface AccelerationSupport {
|
|||
cuda: boolean;
|
||||
}
|
||||
|
||||
export type Acceleration = keyof AccelerationSupport | 'cpu';
|
||||
|
||||
export interface ModelAnalysis {
|
||||
general: {
|
||||
architecture: string;
|
||||
|
|
|
|||
|
|
@ -1,30 +1,48 @@
|
|||
import { gguf } from '@huggingface/gguf';
|
||||
import { stat } from 'fs/promises';
|
||||
import type { Acceleration } from '@/types';
|
||||
|
||||
interface VramCalculationParams {
|
||||
modelPath: string;
|
||||
contextSize: number;
|
||||
availableVramGB: number;
|
||||
flashAttention?: boolean;
|
||||
acceleration: Acceleration;
|
||||
}
|
||||
|
||||
function getAccelerationOverhead(acceleration: Acceleration) {
|
||||
switch (acceleration) {
|
||||
case 'cuda':
|
||||
return { multiplier: 1.05, computeBufferGB: 0.2, headroomGB: 0.1 };
|
||||
case 'vulkan':
|
||||
return { multiplier: 1.05, computeBufferGB: 0.2, headroomGB: 0.1 };
|
||||
case 'rocm':
|
||||
return { multiplier: 1.15, computeBufferGB: 0.4, headroomGB: 0.2 };
|
||||
case 'clblast':
|
||||
return { multiplier: 1.2, computeBufferGB: 0.5, headroomGB: 0.3 };
|
||||
// eslint-disable-next-line no-comments/disallowComments
|
||||
// assuming metal on macOS which we refer to as "cpu" acceleration
|
||||
case 'cpu':
|
||||
return { multiplier: 1.05, computeBufferGB: 0.2, headroomGB: 0.1 };
|
||||
default:
|
||||
return { multiplier: 1.1, computeBufferGB: 0.3, headroomGB: 0.15 };
|
||||
}
|
||||
}
|
||||
|
||||
function estimateContextVram(
|
||||
contextSize: number,
|
||||
layers: number,
|
||||
embeddingLength: number,
|
||||
kvDim: number,
|
||||
flashAttention: boolean
|
||||
) {
|
||||
const bytesPerElement = 2;
|
||||
let kvCacheSizeBytes =
|
||||
2 * contextSize * layers * embeddingLength * bytesPerElement;
|
||||
let kvCacheSizeBytes = 2 * contextSize * layers * kvDim * bytesPerElement;
|
||||
|
||||
if (flashAttention) {
|
||||
kvCacheSizeBytes *= 0.5;
|
||||
}
|
||||
|
||||
const kvCacheSizeGB = kvCacheSizeBytes / 1024 ** 3;
|
||||
|
||||
return kvCacheSizeGB;
|
||||
return kvCacheSizeBytes / 1024 ** 3;
|
||||
}
|
||||
|
||||
export async function calculateOptimalGpuLayers({
|
||||
|
|
@ -32,6 +50,7 @@ export async function calculateOptimalGpuLayers({
|
|||
contextSize,
|
||||
availableVramGB,
|
||||
flashAttention = false,
|
||||
acceleration,
|
||||
}: VramCalculationParams) {
|
||||
const isUrl =
|
||||
modelPath.startsWith('http://') || modelPath.startsWith('https://');
|
||||
|
|
@ -75,25 +94,26 @@ export async function calculateOptimalGpuLayers({
|
|||
const headDim = embeddingLength / headCount;
|
||||
const kvDim = headCountKv * headDim;
|
||||
|
||||
const modelSizeGB = fileSize / 1024 ** 3;
|
||||
const vramPerLayerGB = modelSizeGB / totalLayers;
|
||||
const { multiplier, computeBufferGB, headroomGB } =
|
||||
getAccelerationOverhead(acceleration);
|
||||
|
||||
const headroomGB = 0.1;
|
||||
const availableForModel = availableVramGB - headroomGB;
|
||||
const modelSizeGB = fileSize / 1024 ** 3;
|
||||
const effectiveModelSizeGB = modelSizeGB * multiplier;
|
||||
const vramPerLayerGB = effectiveModelSizeGB / totalLayers;
|
||||
|
||||
const availableForModel = availableVramGB - computeBufferGB - headroomGB;
|
||||
|
||||
let recommendedLayers = 0;
|
||||
let modelVramGB = 0;
|
||||
let contextVramGB = 0;
|
||||
|
||||
for (let layers = 1; layers <= totalLayers; layers++) {
|
||||
modelVramGB = layers * vramPerLayerGB;
|
||||
contextVramGB = estimateContextVram(
|
||||
const modelVram = layers * vramPerLayerGB;
|
||||
const contextVram = estimateContextVram(
|
||||
contextSize,
|
||||
layers,
|
||||
kvDim,
|
||||
flashAttention
|
||||
);
|
||||
const totalVram = modelVramGB + contextVramGB;
|
||||
const totalVram = modelVram + contextVram;
|
||||
|
||||
if (totalVram <= availableForModel) {
|
||||
recommendedLayers = layers;
|
||||
|
|
@ -102,21 +122,20 @@ export async function calculateOptimalGpuLayers({
|
|||
}
|
||||
}
|
||||
|
||||
const finalContextVram = estimateContextVram(
|
||||
const modelVramGB = recommendedLayers * vramPerLayerGB;
|
||||
const contextVramGB = estimateContextVram(
|
||||
contextSize,
|
||||
recommendedLayers,
|
||||
kvDim,
|
||||
flashAttention
|
||||
);
|
||||
const estimatedVramUsageGB =
|
||||
recommendedLayers * vramPerLayerGB + finalContextVram;
|
||||
|
||||
return {
|
||||
recommendedLayers,
|
||||
totalLayers,
|
||||
estimatedVramUsageGB,
|
||||
modelVramGB: recommendedLayers * vramPerLayerGB,
|
||||
contextVramGB: finalContextVram,
|
||||
estimatedVramUsageGB: modelVramGB + contextVramGB + computeBufferGB,
|
||||
modelVramGB,
|
||||
contextVramGB,
|
||||
headroomGB,
|
||||
};
|
||||
}
|
||||
|
|
|
|||
22
yarn.lock
22
yarn.lock
|
|
@ -3747,8 +3747,8 @@ __metadata:
|
|||
lucide-react: "npm:^0.555.0"
|
||||
mime-types: "npm:^3.0.2"
|
||||
prettier: "npm:^3.7.4"
|
||||
react: "npm:^19.2.0"
|
||||
react-dom: "npm:^19.2.0"
|
||||
react: "npm:^19.2.1"
|
||||
react-dom: "npm:^19.2.1"
|
||||
react-error-boundary: "npm:^6.0.0"
|
||||
rollup-plugin-visualizer: "npm:^6.0.5"
|
||||
systeminformation: "npm:^5.27.11"
|
||||
|
|
@ -5593,14 +5593,14 @@ __metadata:
|
|||
languageName: node
|
||||
linkType: hard
|
||||
|
||||
"react-dom@npm:^19.2.0":
|
||||
version: 19.2.0
|
||||
resolution: "react-dom@npm:19.2.0"
|
||||
"react-dom@npm:^19.2.1":
|
||||
version: 19.2.1
|
||||
resolution: "react-dom@npm:19.2.1"
|
||||
dependencies:
|
||||
scheduler: "npm:^0.27.0"
|
||||
peerDependencies:
|
||||
react: ^19.2.0
|
||||
checksum: 10c0/fa2cae05248d01288e91523b590ce4e7635b1e13f1344e225f850d722a8da037bf0782f63b1c1d46353334e0c696909b82e582f8cad607948fde6f7646cc18d9
|
||||
react: ^19.2.1
|
||||
checksum: 10c0/e56b6b3d72314df580ca800b70a69a21c6372703c8f45d9b5451ca6519faefb2496d76ffa9c5adb94136d2bbf2fd303d0dfc208a2cd77ede3132877471af9470
|
||||
languageName: node
|
||||
linkType: hard
|
||||
|
||||
|
|
@ -5703,10 +5703,10 @@ __metadata:
|
|||
languageName: node
|
||||
linkType: hard
|
||||
|
||||
"react@npm:^19.2.0":
|
||||
version: 19.2.0
|
||||
resolution: "react@npm:19.2.0"
|
||||
checksum: 10c0/1b6d64eacb9324725bfe1e7860cb7a6b8a34bc89a482920765ebff5c10578eb487e6b46b2f0df263bd27a25edbdae2c45e5ea5d81ae61404301c1a7192c38330
|
||||
"react@npm:^19.2.1":
|
||||
version: 19.2.1
|
||||
resolution: "react@npm:19.2.1"
|
||||
checksum: 10c0/2b5eaf407abb3db84090434c20d6c5a8e447ab7abcd8fe9eaf1ddc299babcf31284ee9db7ea5671d21c85ac5298bd632fa1a7da1ed78d5b368a537f5e1cd5d62
|
||||
languageName: node
|
||||
linkType: hard
|
||||
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue