gerbil/src/components/screens/Launch/AdvancedTab.tsx

import { Stack, Group, Text, TextInput, NumberInput } from '@mantine/core';
import { useState, useEffect } from 'react';
import { InfoTooltip } from '@/components/InfoTooltip';
import { CheckboxWithTooltip } from '@/components/CheckboxWithTooltip';
import { useLaunchConfig } from '@/hooks/useLaunchConfig';
import { Logger } from '@/utils/logger';

export const AdvancedTab = () => {
  const {
    additionalArguments,
    noshift,
    flashattention,
    noavx2,
    failsafe,
    lowvram,
    quantmatmul,
    usemmap,
    backend,
    moecpu,
    moeexperts,
    handleAdditionalArgumentsChange,
    handleNoshiftChange,
    handleFlashattentionChange,
    handleNoavx2Change,
    handleFailsafeChange,
    handleLowvramChange,
    handleQuantmatmulChange,
    handleUsemmapChange,
    handleMoecpuChange,
    handleMoeexpertsChange,
  } = useLaunchConfig();
  const [backendSupport, setBackendSupport] = useState<{
    noavx2: boolean;
    failsafe: boolean;
  } | null>(null);
  const [isLoading, setIsLoading] = useState(true);

  const isGpuBackend = backend === 'cuda' || backend === 'rocm';

  useEffect(() => {
    const detectBackendSupport = async () => {
      const support = await Logger.safeExecute(
        () => window.electronAPI.kobold.detectBackendSupport(),
        'Failed to detect backend support:'
      );

      if (support) {
        setBackendSupport({
          noavx2: support.noavx2,
          failsafe: support.failsafe,
        });
      } else {
        setBackendSupport({ noavx2: false, failsafe: false });
      }

      setIsLoading(false);
    };

    void detectBackendSupport();
  }, []);

  return (
    <Stack gap="md">
      <div>
        <Group gap="xs" align="center" mb="md">
          <Text size="sm" fw={600}>
            Performance Options
          </Text>
        </Group>
        <Stack gap="md">
          <Group gap="lg" align="flex-start" wrap="nowrap">
            <CheckboxWithTooltip
              checked={!noshift}
              onChange={(checked) => handleNoshiftChange(!checked)}
              label="Context Shift"
              tooltip="Use Context Shifting to reduce reprocessing."
            />

            <CheckboxWithTooltip
              checked={noshift}
              onChange={handleNoshiftChange}
              label="No Shift"
              tooltip="Don't use GPU layer shifting for incomplete offloads, which may reduce model performance."
            />
          </Group>

          <Group gap="lg" align="flex-start" wrap="nowrap">
            <CheckboxWithTooltip
              checked={flashattention}
              onChange={handleFlashattentionChange}
              label="Flash Attention"
              tooltip="Enable flash attention to reduce memory usage. May produce incorrect answers for some prompts, but improves performance."
            />

            <CheckboxWithTooltip
              checked={usemmap}
              onChange={handleUsemmapChange}
              label="MMAP"
              tooltip="Use MMAP to load models when enabled."
            />
          </Group>

          <Group gap="lg" align="flex-start" wrap="nowrap">
            <CheckboxWithTooltip
              checked={quantmatmul && isGpuBackend}
              onChange={handleQuantmatmulChange}
              label="QuantMatMul"
              tooltip={
                !isGpuBackend
                  ? 'QuantMatMul is only available for CUDA and ROCm backends.'
                  : 'Enable MMQ mode to use finetuned kernels instead of default CuBLAS/HipBLAS for prompt processing.'
              }
              disabled={!isGpuBackend}
            />

            <CheckboxWithTooltip
              checked={lowvram && isGpuBackend}
              onChange={handleLowvramChange}
              label="Low VRAM"
              tooltip={
                !isGpuBackend
                  ? 'Low VRAM mode is only available for CUDA and ROCm backends.'
                  : 'Avoid offloading KV Cache or scratch buffers to VRAM. Allows more layers to fit, but may result in a speed loss.'
              }
              disabled={!isGpuBackend}
            />
          </Group>

          <Group gap="lg" align="flex-start" wrap="nowrap">
            <CheckboxWithTooltip
              checked={noavx2}
              onChange={handleNoavx2Change}
              label="Disable AVX2"
              tooltip={
                !backendSupport?.noavx2 && !isLoading
                  ? 'This binary does not support the no-AVX2 mode.'
                  : 'Do not use AVX2 instructions, a slower compatibility mode for older devices.'
              }
              disabled={isLoading || !backendSupport?.noavx2}
            />

            <CheckboxWithTooltip
              checked={failsafe}
              onChange={handleFailsafeChange}
              label="Failsafe"
              tooltip={
                !backendSupport?.failsafe && !isLoading
                  ? 'This binary does not support failsafe mode.'
                  : 'Use failsafe mode, extremely slow CPU only compatibility mode that should work on all devices. Can be combined with useclblast if your device supports OpenCL.'
              }
              disabled={isLoading || !backendSupport?.failsafe}
            />
          </Group>
        </Stack>
      </div>

      <div>
        <Group gap="xs" align="center" mb="md">
          <Text size="sm" fw={600}>
            Mixture of Experts (MoE) Settings
          </Text>
        </Group>
        <Stack gap="md">
          <Group gap="lg" align="flex-start" wrap="nowrap">
            <div style={{ flex: 1, minWidth: 200 }}>
              <Group gap="xs" align="center" mb="xs">
                <Text size="sm" fw={500}>
                  MoE Experts
                </Text>
                <InfoTooltip label="How many experts to use for MoE models. Set to -1 to follow GGUF metadata (default), or specify a specific number of experts." />
              </Group>
              <NumberInput
                value={moeexperts}
                onChange={(value) =>
                  handleMoeexpertsChange(Number(value) || -1)
                }
                min={-1}
                max={128}
                step={1}
                size="sm"
                placeholder="-1"
              />
            </div>

            <div style={{ flex: 1, minWidth: 200 }}>
              <Group gap="xs" align="center" mb="xs">
                <Text size="sm" fw={500}>
                  MoE CPU Layers
                </Text>
                <InfoTooltip label="Keep the Mixture of Experts (MoE) weights of the first N layers in the CPU. Set to 0 to disable (default), or specify the number of layers to keep on CPU." />
              </Group>
              <NumberInput
                value={moecpu}
                onChange={(value) => handleMoecpuChange(Number(value) || 0)}
                min={0}
                max={999}
                step={1}
                size="sm"
                placeholder="0"
              />
            </div>
          </Group>
        </Stack>
      </div>

      <div>
        <Group gap="xs" align="center" mb="xs">
          <Text size="sm" fw={500}>
            Additional arguments
          </Text>
          <InfoTooltip label="Additional command line arguments to pass to the KoboldCPP binary. Leave this empty if you don't know what they are." />
        </Group>
        <TextInput
          placeholder="Additional command line arguments"
          value={additionalArguments}
          onChange={(event) =>
            handleAdditionalArgumentsChange(event.currentTarget.value)
          }
        />
      </div>
    </Stack>
  );
};