mirror of
https://github.com/lone-cloud/gerbil
synced 2026-06-03 19:54:44 -07:00
223 lines
7.5 KiB
TypeScript
223 lines
7.5 KiB
TypeScript
import { Stack, Group, Text, TextInput, NumberInput } from '@mantine/core';
|
|
import { useState, useEffect } from 'react';
|
|
import { InfoTooltip } from '@/components/InfoTooltip';
|
|
import { CheckboxWithTooltip } from '@/components/CheckboxWithTooltip';
|
|
import { useLaunchConfig } from '@/hooks/useLaunchConfig';
|
|
import { Logger } from '@/utils/logger';
|
|
|
|
export const AdvancedTab = () => {
|
|
const {
|
|
additionalArguments,
|
|
noshift,
|
|
flashattention,
|
|
noavx2,
|
|
failsafe,
|
|
lowvram,
|
|
quantmatmul,
|
|
usemmap,
|
|
backend,
|
|
moecpu,
|
|
moeexperts,
|
|
handleAdditionalArgumentsChange,
|
|
handleNoshiftChange,
|
|
handleFlashattentionChange,
|
|
handleNoavx2Change,
|
|
handleFailsafeChange,
|
|
handleLowvramChange,
|
|
handleQuantmatmulChange,
|
|
handleUsemmapChange,
|
|
handleMoecpuChange,
|
|
handleMoeexpertsChange,
|
|
} = useLaunchConfig();
|
|
const [backendSupport, setBackendSupport] = useState<{
|
|
noavx2: boolean;
|
|
failsafe: boolean;
|
|
} | null>(null);
|
|
const [isLoading, setIsLoading] = useState(true);
|
|
|
|
const isGpuBackend = backend === 'cuda' || backend === 'rocm';
|
|
|
|
useEffect(() => {
|
|
const detectBackendSupport = async () => {
|
|
const support = await Logger.safeExecute(
|
|
() => window.electronAPI.kobold.detectBackendSupport(),
|
|
'Failed to detect backend support:'
|
|
);
|
|
|
|
if (support) {
|
|
setBackendSupport({
|
|
noavx2: support.noavx2,
|
|
failsafe: support.failsafe,
|
|
});
|
|
} else {
|
|
setBackendSupport({ noavx2: false, failsafe: false });
|
|
}
|
|
|
|
setIsLoading(false);
|
|
};
|
|
|
|
void detectBackendSupport();
|
|
}, []);
|
|
|
|
return (
|
|
<Stack gap="md">
|
|
<div>
|
|
<Group gap="xs" align="center" mb="md">
|
|
<Text size="sm" fw={600}>
|
|
Performance Options
|
|
</Text>
|
|
</Group>
|
|
<Stack gap="md">
|
|
<Group gap="lg" align="flex-start" wrap="nowrap">
|
|
<CheckboxWithTooltip
|
|
checked={!noshift}
|
|
onChange={(checked) => handleNoshiftChange(!checked)}
|
|
label="Context Shift"
|
|
tooltip="Use Context Shifting to reduce reprocessing."
|
|
/>
|
|
|
|
<CheckboxWithTooltip
|
|
checked={noshift}
|
|
onChange={handleNoshiftChange}
|
|
label="No Shift"
|
|
tooltip="Don't use GPU layer shifting for incomplete offloads, which may reduce model performance."
|
|
/>
|
|
</Group>
|
|
|
|
<Group gap="lg" align="flex-start" wrap="nowrap">
|
|
<CheckboxWithTooltip
|
|
checked={flashattention}
|
|
onChange={handleFlashattentionChange}
|
|
label="Flash Attention"
|
|
tooltip="Enable flash attention to reduce memory usage. May produce incorrect answers for some prompts, but improves performance."
|
|
/>
|
|
|
|
<CheckboxWithTooltip
|
|
checked={usemmap}
|
|
onChange={handleUsemmapChange}
|
|
label="MMAP"
|
|
tooltip="Use MMAP to load models when enabled."
|
|
/>
|
|
</Group>
|
|
|
|
<Group gap="lg" align="flex-start" wrap="nowrap">
|
|
<CheckboxWithTooltip
|
|
checked={quantmatmul && isGpuBackend}
|
|
onChange={handleQuantmatmulChange}
|
|
label="QuantMatMul"
|
|
tooltip={
|
|
!isGpuBackend
|
|
? 'QuantMatMul is only available for CUDA and ROCm backends.'
|
|
: 'Enable MMQ mode to use finetuned kernels instead of default CuBLAS/HipBLAS for prompt processing.'
|
|
}
|
|
disabled={!isGpuBackend}
|
|
/>
|
|
|
|
<CheckboxWithTooltip
|
|
checked={lowvram && isGpuBackend}
|
|
onChange={handleLowvramChange}
|
|
label="Low VRAM"
|
|
tooltip={
|
|
!isGpuBackend
|
|
? 'Low VRAM mode is only available for CUDA and ROCm backends.'
|
|
: 'Avoid offloading KV Cache or scratch buffers to VRAM. Allows more layers to fit, but may result in a speed loss.'
|
|
}
|
|
disabled={!isGpuBackend}
|
|
/>
|
|
</Group>
|
|
|
|
<Group gap="lg" align="flex-start" wrap="nowrap">
|
|
<CheckboxWithTooltip
|
|
checked={noavx2}
|
|
onChange={handleNoavx2Change}
|
|
label="Disable AVX2"
|
|
tooltip={
|
|
!backendSupport?.noavx2 && !isLoading
|
|
? 'This binary does not support the no-AVX2 mode.'
|
|
: 'Do not use AVX2 instructions, a slower compatibility mode for older devices.'
|
|
}
|
|
disabled={isLoading || !backendSupport?.noavx2}
|
|
/>
|
|
|
|
<CheckboxWithTooltip
|
|
checked={failsafe}
|
|
onChange={handleFailsafeChange}
|
|
label="Failsafe"
|
|
tooltip={
|
|
!backendSupport?.failsafe && !isLoading
|
|
? 'This binary does not support failsafe mode.'
|
|
: 'Use failsafe mode, extremely slow CPU only compatibility mode that should work on all devices. Can be combined with useclblast if your device supports OpenCL.'
|
|
}
|
|
disabled={isLoading || !backendSupport?.failsafe}
|
|
/>
|
|
</Group>
|
|
</Stack>
|
|
</div>
|
|
|
|
<div>
|
|
<Group gap="xs" align="center" mb="md">
|
|
<Text size="sm" fw={600}>
|
|
Mixture of Experts (MoE) Settings
|
|
</Text>
|
|
</Group>
|
|
<Stack gap="md">
|
|
<Group gap="lg" align="flex-start" wrap="nowrap">
|
|
<div style={{ flex: 1, minWidth: 200 }}>
|
|
<Group gap="xs" align="center" mb="xs">
|
|
<Text size="sm" fw={500}>
|
|
MoE Experts
|
|
</Text>
|
|
<InfoTooltip label="How many experts to use for MoE models. Set to -1 to follow GGUF metadata (default), or specify a specific number of experts." />
|
|
</Group>
|
|
<NumberInput
|
|
value={moeexperts}
|
|
onChange={(value) =>
|
|
handleMoeexpertsChange(Number(value) || -1)
|
|
}
|
|
min={-1}
|
|
max={128}
|
|
step={1}
|
|
size="sm"
|
|
placeholder="-1"
|
|
/>
|
|
</div>
|
|
|
|
<div style={{ flex: 1, minWidth: 200 }}>
|
|
<Group gap="xs" align="center" mb="xs">
|
|
<Text size="sm" fw={500}>
|
|
MoE CPU Layers
|
|
</Text>
|
|
<InfoTooltip label="Keep the Mixture of Experts (MoE) weights of the first N layers in the CPU. Set to 0 to disable (default), or specify the number of layers to keep on CPU." />
|
|
</Group>
|
|
<NumberInput
|
|
value={moecpu}
|
|
onChange={(value) => handleMoecpuChange(Number(value) || 0)}
|
|
min={0}
|
|
max={999}
|
|
step={1}
|
|
size="sm"
|
|
placeholder="0"
|
|
/>
|
|
</div>
|
|
</Group>
|
|
</Stack>
|
|
</div>
|
|
|
|
<div>
|
|
<Group gap="xs" align="center" mb="xs">
|
|
<Text size="sm" fw={500}>
|
|
Additional arguments
|
|
</Text>
|
|
<InfoTooltip label="Additional command line arguments to pass to the KoboldCPP binary. Leave this empty if you don't know what they are." />
|
|
</Group>
|
|
<TextInput
|
|
placeholder="Additional command line arguments"
|
|
value={additionalArguments}
|
|
onChange={(event) =>
|
|
handleAdditionalArgumentsChange(event.currentTarget.value)
|
|
}
|
|
/>
|
|
</div>
|
|
</Stack>
|
|
);
|
|
};
|