update sdui, new performance tab, start patching llama.cpp UI, lock openwebui to 0.6.41, add a new "clar data" button for openwebui select, new smartcache + pipelineparallel UI performance options

2026-06-03 19:54:44 -07:00 · 2025-12-22 02:22:18 -08:00 · 2025-12-22 02:22:18 -08:00 · 01668ea12a
commit 01668ea12a
parent 00276b0e99
17 changed files with 377 additions and 170 deletions
--- a/assets/kcpp_sdui.embd
+++ b/assets/kcpp_sdui.embd
--- a/assets/lcpp.gz.embd
+++ b/assets/lcpp.gz.embd
--- a/package.json
+++ b/package.json
@ -1,7 +1,7 @@
 {
  "name": "gerbil",
  "productName": "Gerbil",
-  "version": "1.16.4",
+  "version": "1.17.0",
  "description": "Run Large Language Models locally",
  "main": "out/main/index.js",
  "homepage": "./",
@ -41,7 +41,7 @@
  "dependencies": {
    "@codemirror/search": "^6.5.11",
    "@codemirror/theme-one-dark": "^6.1.3",
-    "@codemirror/view": "^6.39.4",
+    "@codemirror/view": "^6.39.5",
    "@fontsource/inter": "^5.2.8",
    "@huggingface/gguf": "^0.3.2",
    "@mantine/core": "^8.3.10",
@ -49,7 +49,7 @@
    "@uiw/react-codemirror": "^4.25.4",
    "electron-updater": "^6.6.2",
    "execa": "^9.6.1",
-    "lucide-react": "^0.561.0",
+    "lucide-react": "^0.562.0",
    "mime-types": "^3.0.2",
    "react": "^19.2.3",
    "react-dom": "^19.2.3",
@ -115,6 +115,10 @@
      {
        "from": "assets/kcpp_sdui.embd",
        "to": "assets/kcpp_sdui.embd"
+      },
+      {
+        "from": "assets/lcpp.gz.embd",
+        "to": "assets/lcpp.gz.embd"
      }
    ],
    "extraResources": [
--- a/src/components/screens/Launch/AdvancedTab.tsx
+++ b/src/components/screens/Launch/AdvancedTab.tsx
@ -3,7 +3,6 @@ import {
  Group,
  Text,
  TextInput,
-  NumberInput,
  Button,
  SimpleGrid,
  ActionIcon,
@ -19,29 +18,14 @@ export const AdvancedTab = () => {
  const {
    additionalArguments,
    preLaunchCommands,
-    noshift,
-    flashattention,
    noavx2,
    failsafe,
-    lowvram,
-    quantmatmul,
-    usemmap,
    debugmode,
-    acceleration,
-    moecpu,
-    moeexperts,
    setAdditionalArguments,
    setPreLaunchCommands,
-    setNoshift,
-    setFlashattention,
    setNoavx2,
    setFailsafe,
-    setLowvram,
-    setQuantmatmul,
-    setUsemmap,
    setDebugmode,
-    setMoecpu,
-    setMoeexperts,
  } = useLaunchConfigStore();
  const [commandLineModalOpen, setCommandLineModalOpen] = useState(false);
  const [backendSupport, setBackendSupport] = useState<{
@ -58,8 +42,6 @@ export const AdvancedTab = () => {
    setAdditionalArguments(updatedArgs);
  };

-  const isGpuAcceleration = acceleration === 'cuda' || acceleration === 'rocm';
-
  useEffect(() => {
    const detectAccelerationSupport = async () => {
      const support =
@ -84,20 +66,6 @@ export const AdvancedTab = () => {
    <Stack gap="md">
      <div>
        <SimpleGrid cols={3} spacing="lg" verticalSpacing="md">
-          <CheckboxWithTooltip
-            checked={!noshift}
-            onChange={(checked) => setNoshift(!checked)}
-            label="Context Shift"
-            tooltip="Use Context Shifting to reduce reprocessing."
-          />
-
-          <CheckboxWithTooltip
-            checked={noshift}
-            onChange={setNoshift}
-            label="No Shift"
-            tooltip="Don't use GPU layer shifting for incomplete offloads, which may reduce model performance."
-          />
-
          <CheckboxWithTooltip
            checked={noavx2}
            onChange={setNoavx2}
@ -110,25 +78,6 @@ export const AdvancedTab = () => {
            disabled={isLoading || !backendSupport?.noavx2}
          />

-          <CheckboxWithTooltip
-            checked={usemmap}
-            onChange={setUsemmap}
-            label="MMAP"
-            tooltip="Use MMAP to load models when enabled."
-          />
-
-          <CheckboxWithTooltip
-            checked={quantmatmul && isGpuAcceleration}
-            onChange={setQuantmatmul}
-            label="QuantMatMul"
-            tooltip={
-              !isGpuAcceleration
-                ? 'QuantMatMul is only available for CUDA and ROCm accelerations.'
-                : 'Enable MMQ mode to use finetuned kernels instead of default CuBLAS/HipBLAS for prompt processing.'
-            }
-            disabled={!isGpuAcceleration}
-          />
-
          <CheckboxWithTooltip
            checked={failsafe}
            onChange={setFailsafe}
@ -141,25 +90,6 @@ export const AdvancedTab = () => {
            disabled={isLoading || !backendSupport?.failsafe}
          />

-          <CheckboxWithTooltip
-            checked={flashattention}
-            onChange={setFlashattention}
-            label="Flash Attention"
-            tooltip="Enable flash attention to reduce memory usage. May produce incorrect answers for some prompts, but improves performance."
-          />
-
-          <CheckboxWithTooltip
-            checked={lowvram && isGpuAcceleration}
-            onChange={setLowvram}
-            label="Low VRAM"
-            tooltip={
-              !isGpuAcceleration
-                ? 'Low VRAM mode is only available for CUDA and ROCm accelerations.'
-                : 'Avoid offloading KV Cache or scratch buffers to VRAM. Allows more layers to fit, but may result in a speed loss.'
-            }
-            disabled={!isGpuAcceleration}
-          />
-
          <CheckboxWithTooltip
            checked={debugmode}
            onChange={setDebugmode}
@ -169,46 +99,6 @@ export const AdvancedTab = () => {
        </SimpleGrid>
      </div>

-      <div>
-        <Stack gap="md">
-          <Group gap="lg" align="flex-start" wrap="nowrap">
-            <div style={{ flex: 1, minWidth: 200 }}>
-              <Group gap="xs" align="center" mb="xs">
-                <Text size="sm" fw={500}>
-                  MoE Experts
-                </Text>
-                <InfoTooltip label="How many experts to use for MoE models. Set to -1 to follow GGUF metadata (default), or specify a specific number of experts." />
-              </Group>
-              <NumberInput
-                value={moeexperts}
-                onChange={(value) => setMoeexperts(Number(value))}
-                min={-1}
-                max={128}
-                step={1}
-                size="sm"
-              />
-            </div>
-
-            <div style={{ flex: 1, minWidth: 200 }}>
-              <Group gap="xs" align="center" mb="xs">
-                <Text size="sm" fw={500}>
-                  MoE CPU Layers
-                </Text>
-                <InfoTooltip label="Keep the Mixture of Experts (MoE) weights of the first N layers in the CPU. Set to 0 to disable (default), or specify the number of layers to keep on CPU." />
-              </Group>
-              <NumberInput
-                value={moecpu}
-                onChange={(value) => setMoecpu(Number(value) || 0)}
-                min={0}
-                max={999}
-                step={1}
-                size="sm"
-              />
-            </div>
-          </Group>
-        </Stack>
-      </div>
-
      <div>
        <Group mb="xs" justify="space-between">
          <Group>
--- a/src/components/screens/Launch/CommandLineArgumentsModal.tsx
+++ b/src/components/screens/Launch/CommandLineArgumentsModal.tsx
@ -67,6 +67,8 @@ const UI_COVERED_ARGS = new Set([
  '--tensor_split',
  '--debugmode',
  '--lowvram',
+  '--smartcache',
+  '--pipelineparallel',
 ] as const) as ReadonlySet<string>;

 const IGNORED_ARGS = new Set([
@ -88,6 +90,7 @@ const IGNORED_ARGS = new Set([
  '--no-mmap',
  '--sdnotile',
  '--testmemory',
+  '--forceversion',
 ] as const) as ReadonlySet<string>;

 const COMMAND_LINE_ARGUMENTS = [
@ -377,7 +380,21 @@ const COMMAND_LINE_ARGUMENTS = [
    description:
      'How many tokens to generate by default, if not specified. Must be smaller than context size. Usually, your frontend GUI will override this.',
    type: 'int',
-    default: 768,
+    default: 896,
+    category: 'Performance',
+  },
+  {
+    flag: '--smartcache',
+    description:
+      'Enables intelligent context switching by saving KV cache snapshots to RAM. Requires fast forwarding.',
+    type: 'boolean',
+    category: 'Performance',
+  },
+  {
+    flag: '--pipelineparallel',
+    description:
+      'Enable Pipeline Parallelism for faster multigpu speeds but using more memory, only active for multigpu.',
+    type: 'boolean',
    category: 'Performance',
  },
  {
@ -433,15 +450,6 @@ const COMMAND_LINE_ARGUMENTS = [
    type: 'boolean',
    category: 'Advanced',
  },
-  {
-    flag: '--forceversion',
-    description:
-      'If the model file format detection fails (e.g. rogue modified model) you can set this to override the detected format (enter desired version, e.g. 401 for GPTNeoX-Type2).',
-    metavar: '[version]',
-    type: 'int',
-    default: 0,
-    category: 'Advanced',
-  },
  {
    flag: '--smartcontext',
    description:
--- a/src/components/screens/Launch/PerformanceTab.tsx
+++ b/src/components/screens/Launch/PerformanceTab.tsx
@ -0,0 +1,153 @@
+import { Stack, Group, Text, NumberInput, SimpleGrid } from '@mantine/core';
+import { InfoTooltip } from '@/components/InfoTooltip';
+import { CheckboxWithTooltip } from '@/components/CheckboxWithTooltip';
+import { useLaunchConfigStore } from '@/stores/launchConfig';
+
+export const PerformanceTab = () => {
+  const {
+    noshift,
+    flashattention,
+    lowvram,
+    quantmatmul,
+    usemmap,
+    acceleration,
+    moecpu,
+    moeexperts,
+    smartcache,
+    pipelineparallel,
+    setNoshift,
+    setFlashattention,
+    setLowvram,
+    setQuantmatmul,
+    setUsemmap,
+    setMoecpu,
+    setMoeexperts,
+    setSmartcache,
+    setPipelineparallel,
+  } = useLaunchConfigStore();
+
+  const isGpuAcceleration = acceleration === 'cuda' || acceleration === 'rocm';
+
+  return (
+    <Stack gap="md">
+      <div>
+        <SimpleGrid cols={3} spacing="lg" verticalSpacing="md">
+          <CheckboxWithTooltip
+            checked={!noshift}
+            onChange={(checked) => setNoshift(!checked)}
+            label="Context Shift"
+            tooltip="Use Context Shifting to reduce reprocessing and improve performance with long contexts."
+          />
+
+          <CheckboxWithTooltip
+            checked={noshift}
+            onChange={setNoshift}
+            label="No Shift"
+            tooltip="Disable context shifting. May reduce performance but can help with compatibility issues."
+          />
+
+          <CheckboxWithTooltip
+            checked={smartcache}
+            onChange={setSmartcache}
+            label="Smart Cache"
+            tooltip="Enables intelligent context switching by saving KV cache snapshots to RAM. Requires fast forwarding."
+          />
+        </SimpleGrid>
+      </div>
+
+      <div>
+        <SimpleGrid cols={3} spacing="lg" verticalSpacing="md">
+          <CheckboxWithTooltip
+            checked={flashattention}
+            onChange={setFlashattention}
+            label="Flash Attention"
+            tooltip="Enable flash attention to reduce memory usage and improve performance. May produce incorrect answers for some prompts."
+          />
+
+          <CheckboxWithTooltip
+            checked={lowvram && isGpuAcceleration}
+            onChange={setLowvram}
+            label="Low VRAM"
+            tooltip={
+              !isGpuAcceleration
+                ? 'Low VRAM mode is only available for CUDA and ROCm accelerations.'
+                : 'Avoid offloading KV Cache or scratch buffers to VRAM. Allows more layers to fit, but may result in a speed loss.'
+            }
+            disabled={!isGpuAcceleration}
+          />
+
+          <CheckboxWithTooltip
+            checked={quantmatmul && isGpuAcceleration}
+            onChange={setQuantmatmul}
+            label="QuantMatMul"
+            tooltip={
+              !isGpuAcceleration
+                ? 'QuantMatMul is only available for CUDA and ROCm accelerations.'
+                : 'Enable MMQ mode to use finetuned kernels instead of default CuBLAS/HipBLAS for prompt processing.'
+            }
+            disabled={!isGpuAcceleration}
+          />
+
+          <CheckboxWithTooltip
+            checked={pipelineparallel && isGpuAcceleration}
+            onChange={setPipelineparallel}
+            label="Pipeline Parallel"
+            tooltip={
+              !isGpuAcceleration
+                ? 'Pipeline Parallelism is only available for multi-GPU setups.'
+                : 'Enable Pipeline Parallelism for faster multi-GPU speeds but using more memory. Only active for multi-GPU setups.'
+            }
+            disabled={!isGpuAcceleration}
+          />
+
+          <CheckboxWithTooltip
+            checked={usemmap}
+            onChange={setUsemmap}
+            label="MMAP"
+            tooltip="Use memory-mapped file I/O for faster model loading. Recommended for most systems."
+          />
+        </SimpleGrid>
+      </div>
+
+      <div>
+        <Stack gap="md">
+          <Group gap="lg" align="flex-start" wrap="nowrap">
+            <div style={{ flex: 1, minWidth: 200 }}>
+              <Group gap="xs" align="center" mb="xs">
+                <Text size="sm" fw={500}>
+                  MoE Experts
+                </Text>
+                <InfoTooltip label="How many experts to use for MoE models. Set to -1 to follow GGUF metadata (default), or specify a specific number of experts." />
+              </Group>
+              <NumberInput
+                value={moeexperts}
+                onChange={(value) => setMoeexperts(Number(value))}
+                min={-1}
+                max={128}
+                step={1}
+                size="sm"
+              />
+            </div>
+
+            <div style={{ flex: 1, minWidth: 200 }}>
+              <Group gap="xs" align="center" mb="xs">
+                <Text size="sm" fw={500}>
+                  MoE CPU Layers
+                </Text>
+                <InfoTooltip label="Keep the Mixture of Experts (MoE) weights of the first N layers in the CPU. Set to 0 to disable (default), or specify the number of layers to keep on CPU." />
+              </Group>
+              <NumberInput
+                value={moecpu}
+                onChange={(value) => setMoecpu(Number(value) || 0)}
+                min={0}
+                max={999}
+                step={1}
+                size="sm"
+              />
+            </div>
+          </Group>
+        </Stack>
+      </div>
+    </Stack>
+  );
+};
--- a/src/components/screens/Launch/index.tsx
+++ b/src/components/screens/Launch/index.tsx
@ -6,6 +6,7 @@ import { useLaunchLogic } from '@/hooks/useLaunchLogic';
 import { useWarnings } from '@/hooks/useWarnings';
 import { GeneralTab } from '@/components/screens/Launch/GeneralTab/index';
 import { AdvancedTab } from '@/components/screens/Launch/AdvancedTab';
+import { PerformanceTab } from '@/components/screens/Launch/PerformanceTab';
 import { NetworkTab } from '@/components/screens/Launch/NetworkTab';
 import { ImageGenerationTab } from '@/components/screens/Launch/ImageGenerationTab';
 import { WarningDisplay } from '@/components/WarningDisplay';
@ -63,6 +64,8 @@ export const LaunchScreen = ({ onLaunch }: LaunchScreenProps) => {
    sdclipgpu,
    moecpu,
    moeexperts,
+    smartcache,
+    pipelineparallel,
    parseAndApplyConfigFile,
    loadConfigFromFile,
    setModel,
@ -177,6 +180,8 @@ export const LaunchScreen = ({ onLaunch }: LaunchScreenProps) => {
    debugmode,
    moecpu,
    moeexperts,
+    smartcache,
+    pipelineparallel,
    usecuda: acceleration === 'cuda' || acceleration === 'rocm',
    usevulkan: acceleration === 'vulkan',
    useclblast: acceleration === 'clblast',
@ -316,6 +321,8 @@ export const LaunchScreen = ({ onLaunch }: LaunchScreenProps) => {
      sdclipgpu,
      moecpu,
      moeexperts,
+      smartcache,
+      pipelineparallel,
    });
  }, [
    handleLaunch,
@ -354,6 +361,8 @@ export const LaunchScreen = ({ onLaunch }: LaunchScreenProps) => {
    sdclipgpu,
    moecpu,
    moeexperts,
+    smartcache,
+    pipelineparallel,
  ]);

  return (
@ -397,6 +406,7 @@ export const LaunchScreen = ({ onLaunch }: LaunchScreenProps) => {
              <Tabs.List>
                <Tabs.Tab value="general">General</Tabs.Tab>
                <Tabs.Tab value="image">Image Generation</Tabs.Tab>
+                <Tabs.Tab value="performance">Performance</Tabs.Tab>
                <Tabs.Tab value="network">Network</Tabs.Tab>
                <Tabs.Tab value="advanced">Advanced</Tabs.Tab>
              </Tabs.List>
@ -405,16 +415,20 @@ export const LaunchScreen = ({ onLaunch }: LaunchScreenProps) => {
                <GeneralTab configLoaded={configLoaded} />
              </Tabs.Panel>

-              <Tabs.Panel value="advanced">
-                <AdvancedTab />
+              <Tabs.Panel value="image">
+                <ImageGenerationTab />
+              </Tabs.Panel>
+
+              <Tabs.Panel value="performance">
+                <PerformanceTab />
              </Tabs.Panel>

              <Tabs.Panel value="network">
                <NetworkTab />
              </Tabs.Panel>

-              <Tabs.Panel value="image">
-                <ImageGenerationTab />
+              <Tabs.Panel value="advanced">
+                <AdvancedTab />
              </Tabs.Panel>
            </Tabs>

--- a/src/components/settings/FrontendInterfaceSelector.tsx
+++ b/src/components/settings/FrontendInterfaceSelector.tsx
@ -1,5 +1,5 @@
 import { useState, useEffect, useCallback, useMemo } from 'react';
-import { Text, Box, Anchor, rem } from '@mantine/core';
+import { Text, Box, Anchor, rem, Button, Group, Stack } from '@mantine/core';
 import { Monitor, Image } from 'lucide-react';
 import { usePreferencesStore } from '@/stores/preferences';
 import type {
@ -8,6 +8,7 @@ import type {
 } from '@/types';
 import { FRONTENDS } from '@/constants';
 import { Select } from '@/components/Select';
+import { Modal } from '@/components/Modal';

 interface FrontendRequirement {
  id: string;
@ -40,6 +41,8 @@ export const FrontendInterfaceSelector = ({
    Map<string, boolean>
  >(new Map());

+  const [showClearDataModal, setShowClearDataModal] = useState(false);
+
  const frontendConfigs: FrontendConfig[] = useMemo(
    () => [
      {
@ -133,6 +136,11 @@ export const FrontendInterfaceSelector = ({
    );
  };

+  const handleClearOpenWebUIData = async () => {
+    await window.electronAPI.dependencies.clearOpenWebUIData();
+    setShowClearDataModal(false);
+  };
+
  const renderDisabledFrontendWarnings = () => {
    const disabledFrontends = frontendConfigs.filter(
      (config) => !isFrontendAvailable(config.value)
@ -256,21 +264,72 @@ export const FrontendInterfaceSelector = ({
          </Text>
        )}

-        <Select
-          value={frontendPreference}
-          onChange={handleFrontendPreferenceChange}
-          disabled={isOnInterfaceScreen}
-          data={frontendConfigs.map((config) => ({
-            value: config.value,
-            label: config.label,
-            disabled: !isFrontendAvailable(config.value),
-          }))}
-          leftSection={<Monitor style={{ width: rem(16), height: rem(16) }} />}
-        />
+        <Group gap="xs" align="flex-end">
+          <Select
+            value={frontendPreference}
+            onChange={handleFrontendPreferenceChange}
+            disabled={isOnInterfaceScreen}
+            data={frontendConfigs.map((config) => ({
+              value: config.value,
+              label: config.label,
+              disabled: !isFrontendAvailable(config.value),
+            }))}
+            leftSection={
+              <Monitor style={{ width: rem(16), height: rem(16) }} />
+            }
+            style={{ flex: 1 }}
+          />
+
+          {frontendPreference === 'openwebui' && (
+            <Button
+              variant="light"
+              color="orange"
+              onClick={() => setShowClearDataModal(true)}
+              disabled={isOnInterfaceScreen}
+            >
+              Clear Data
+            </Button>
+          )}
+        </Group>

        {renderDisabledFrontendWarnings()}
      </div>

+      <Modal
+        opened={showClearDataModal}
+        onClose={() => setShowClearDataModal(false)}
+        title="Clear Open WebUI Data?"
+      >
+        <Stack gap="md">
+          <Text size="sm" c="dimmed">
+            This will permanently delete all Open WebUI data including:
+          </Text>
+          <Box component="ul" pl="md">
+            <Text component="li" size="sm" c="dimmed">
+              Chat history
+            </Text>
+            <Text component="li" size="sm" c="dimmed">
+              User settings
+            </Text>
+            <Text component="li" size="sm" c="dimmed">
+              Database
+            </Text>
+          </Box>
+
+          <Group justify="flex-end" gap="sm">
+            <Button
+              variant="subtle"
+              onClick={() => setShowClearDataModal(false)}
+            >
+              Cancel
+            </Button>
+            <Button color="red" onClick={handleClearOpenWebUIData}>
+              Clear Data
+            </Button>
+          </Group>
+        </Stack>
+      </Modal>
+
      <div>
        <Text fw={500} mb="xs">
          Image Generation Frontend
--- a/src/hooks/useLaunchLogic.ts
+++ b/src/hooks/useLaunchLogic.ts
@ -43,6 +43,8 @@ interface LaunchArgs {
  sdclipgpu: boolean;
  moecpu: number;
  moeexperts: number;
+  smartcache: boolean;
+  pipelineparallel: boolean;
 }

 const buildModelArgs = (
@ -149,6 +151,14 @@ const buildConfigArgs = (isImageMode: boolean, launchArgs: LaunchArgs) => {
    args.push('--moecpu', launchArgs.moecpu.toString());
  }

+  if (launchArgs.smartcache) {
+    args.push('--smartcache');
+  }
+
+  if (launchArgs.pipelineparallel) {
+    args.push('--pipelineparallel');
+  }
+
  return args;
 };

--- a/src/main/ipc.ts
+++ b/src/main/ipc.ts
@ -290,6 +290,18 @@ export function setupIPCHandlers() {

  ipcMain.handle('dependencies:isUvAvailable', () => isUvAvailable());

+  ipcMain.handle('dependencies:clearOpenWebUIData', async () => {
+    const { rm } = await import('fs/promises');
+    const openWebUIDataDir = join(getInstallDir(), 'openwebui-data');
+    try {
+      await rm(openWebUIDataDir, { recursive: true, force: true });
+      return { success: true };
+    } catch (error) {
+      logError('Failed to clear Open WebUI data:', error as Error);
+      return { success: false, error: (error as Error).message };
+    }
+  });
+
  ipcMain.on('monitoring:start', () => startMonitoring(mainWindow));

  ipcMain.on('monitoring:stop', () => stopMonitoring());
--- a/src/main/modules/koboldcpp/launcher/index.ts
+++ b/src/main/modules/koboldcpp/launcher/index.ts
@ -16,7 +16,12 @@ import {
 } from '@/main/modules/config';
 import { startFrontend as startSillyTavernFrontend } from '@/main/modules/sillytavern';
 import { startFrontend as startOpenWebUIFrontend } from '@/main/modules/openwebui';
-import { patchKliteEmbd, patchKcppSduiEmbd, filterSpam } from './patches';
+import {
+  patchKliteEmbd,
+  patchKcppSduiEmbd,
+  patchLcppGzEmbd,
+  filterSpam,
+} from './patches';
 import { startProxy, stopProxy } from '../proxy';
 import { startTunnel, stopTunnel } from '../tunnel';
 import { resolveModelPath, abortActiveDownloads } from '../model-download';
@ -191,7 +196,11 @@ export async function launchKoboldCpp(
      if (isTextMode) {
        await patchKliteEmbd(binaryDir);
      }
-    } else if (isImageMode && imageGenerationFrontendPreference === 'builtin') {
+    } else if (frontendPreference === 'llamacpp') {
+      await patchLcppGzEmbd(binaryDir);
+    }
+
+    if (isImageMode && imageGenerationFrontendPreference === 'builtin') {
      await patchKcppSduiEmbd(binaryDir);
    }

--- a/src/main/modules/koboldcpp/launcher/patches.ts
+++ b/src/main/modules/koboldcpp/launcher/patches.ts
@ -112,6 +112,23 @@ export const patchKcppSduiEmbd = (unpackedDir: string) =>
    }
  }, 'Failed to patch kcpp_sdui.embd');

+export const patchLcppGzEmbd = (unpackedDir: string) =>
+  tryExecute(async () => {
+    const possiblePaths = [
+      join(unpackedDir, '_internal', 'embd_res', 'lcpp.gz.embd'),
+      join(unpackedDir, 'lcpp.gz.embd'),
+    ];
+
+    const sourceAssetPath = getAssetPath('lcpp.gz.embd');
+
+    for (const targetPath of possiblePaths) {
+      if (await pathExists(targetPath)) {
+        await copyFile(sourceAssetPath, targetPath);
+        break;
+      }
+    }
+  }, 'Failed to patch lcpp.gz.embd');
+
 export function filterSpam(output: string) {
  const spamPatterns = [
    /^print_info:/,
--- a/src/main/modules/openwebui.ts
+++ b/src/main/modules/openwebui.ts
@ -15,7 +15,13 @@ import { PROXY } from '@/constants/proxy';

 let openWebUIProcess: ChildProcess | null = null;

-const OPENWEBUI_BASE_ARGS = ['--python', '3.11', 'open-webui@latest', 'serve'];
+const OPENWEBUI_VERSION = '0.6.41';
+const OPENWEBUI_BASE_ARGS = [
+  '--python',
+  '3.11',
+  `open-webui@${OPENWEBUI_VERSION}`,
+  'serve',
+];

 on('SIGINT', () => {
  void stopFrontend();
--- a/src/preload/index.ts
+++ b/src/preload/index.ts
@ -208,6 +208,8 @@ const logsAPI: LogsAPI = {
 const dependenciesAPI: DependenciesAPI = {
  isUvAvailable: () => ipcRenderer.invoke('dependencies:isUvAvailable'),
  isNpxAvailable: () => ipcRenderer.invoke('dependencies:isNpxAvailable'),
+  clearOpenWebUIData: () =>
+    ipcRenderer.invoke('dependencies:clearOpenWebUIData'),
 };

 const monitoringAPI: MonitoringAPI = {
--- a/src/stores/launchConfig.ts
+++ b/src/stores/launchConfig.ts
@ -41,6 +41,8 @@ interface LaunchConfigState {
  sdclipgpu: boolean;
  moecpu: number;
  moeexperts: number;
+  smartcache: boolean;
+  pipelineparallel: boolean;
  isImageGenerationMode: boolean;
  isTextMode: boolean;

@ -81,6 +83,8 @@ interface LaunchConfigState {
  setSdclipgpu: (enabled: boolean) => void;
  setMoecpu: (moecpu: number) => void;
  setMoeexperts: (moeexperts: number) => void;
+  setSmartcache: (smartcache: boolean) => void;
+  setPipelineparallel: (pipelineparallel: boolean) => void;

  parseAndApplyConfigFile: (configPath: string) => Promise<void>;
  loadConfigFromFile: (
@ -141,6 +145,8 @@ export const useLaunchConfigStore = create<LaunchConfigState>((set, get) => ({
  sdclipgpu: false,
  moecpu: 0,
  moeexperts: -1,
+  smartcache: false,
+  pipelineparallel: false,

  isImageGenerationMode: false,
  isTextMode: false,
@ -195,6 +201,8 @@ export const useLaunchConfigStore = create<LaunchConfigState>((set, get) => ({
  setSdclipgpu: (enabled) => set({ sdclipgpu: enabled }),
  setMoecpu: (moeCpu) => set({ moecpu: moeCpu }),
  setMoeexperts: (moeExperts) => set({ moeexperts: moeExperts }),
+  setSmartcache: (smartcache) => set({ smartcache }),
+  setPipelineparallel: (pipelineparallel) => set({ pipelineparallel }),

  // eslint-disable-next-line sonarjs/cognitive-complexity
  parseAndApplyConfigFile: async (configPath: string) => {
@ -426,6 +434,18 @@ export const useLaunchConfigStore = create<LaunchConfigState>((set, get) => ({
        updates.moeexperts = -1;
      }

+      if (typeof configData.smartcache === 'boolean') {
+        updates.smartcache = configData.smartcache;
+      } else {
+        updates.smartcache = false;
+      }
+
+      if (typeof configData.pipelineparallel === 'boolean') {
+        updates.pipelineparallel = configData.pipelineparallel;
+      } else {
+        updates.pipelineparallel = false;
+      }
+
      set(updates);
    }
  },
--- a/src/types/electron.d.ts
+++ b/src/types/electron.d.ts
@ -121,6 +121,8 @@ export interface KoboldConfig {
  preLaunchCommands?: string[];
  moecpu?: number;
  moeexperts?: number;
+  smartcache?: boolean;
+  pipelineparallel?: boolean;
  autoGpuLayers?: boolean;
  model?: string;
  backend?: string;
@ -252,6 +254,7 @@ export interface LogsAPI {
 export interface DependenciesAPI {
  isNpxAvailable: () => Promise<boolean>;
  isUvAvailable: () => Promise<boolean>;
+  clearOpenWebUIData: () => Promise<{ success: boolean; error?: string }>;
 }

 export interface MonitoringAPI {
--- a/yarn.lock
+++ b/yarn.lock
@ -315,15 +315,15 @@ __metadata:
  languageName: node
  linkType: hard

-"@codemirror/view@npm:^6.0.0, @codemirror/view@npm:^6.17.0, @codemirror/view@npm:^6.23.0, @codemirror/view@npm:^6.27.0, @codemirror/view@npm:^6.35.0, @codemirror/view@npm:^6.39.4":
-  version: 6.39.4
-  resolution: "@codemirror/view@npm:6.39.4"
+"@codemirror/view@npm:^6.0.0, @codemirror/view@npm:^6.17.0, @codemirror/view@npm:^6.23.0, @codemirror/view@npm:^6.27.0, @codemirror/view@npm:^6.35.0, @codemirror/view@npm:^6.39.5":
+  version: 6.39.5
+  resolution: "@codemirror/view@npm:6.39.5"
  dependencies:
    "@codemirror/state": "npm:^6.5.0"
    crelt: "npm:^1.0.6"
    style-mod: "npm:^4.1.0"
    w3c-keyname: "npm:^2.2.4"
-  checksum: 10c0/aa17cfbfb0714243cccc9e970f4f382c3e747435f2fc63d40125370379227436fb529473dde856a276b8b7df98f83c55f4cc5cd0c24a56335f24233103b7dfd0
+  checksum: 10c0/54fc600b54c336283e8c73b6a0a99156880682121d73c2d4b74fc5a93f956e33b54ba16cd7debbf5fc28a48b18aeb643217e5200e3af0588909ca90cb275e957
  languageName: node
  linkType: hard

@ -4124,7 +4124,7 @@ __metadata:
  dependencies:
    "@codemirror/search": "npm:^6.5.11"
    "@codemirror/theme-one-dark": "npm:^6.1.3"
-    "@codemirror/view": "npm:^6.39.4"
+    "@codemirror/view": "npm:^6.39.5"
    "@eslint/js": "npm:^9.39.2"
    "@fontsource/inter": "npm:^5.2.8"
    "@huggingface/gguf": "npm:^0.3.2"
@ -4154,7 +4154,7 @@ __metadata:
    execa: "npm:^9.6.1"
    globals: "npm:^16.5.0"
    jiti: "npm:^2.6.1"
-    lucide-react: "npm:^0.561.0"
+    lucide-react: "npm:^0.562.0"
    mime-types: "npm:^3.0.2"
    prettier: "npm:^3.7.4"
    react: "npm:^19.2.3"
@ -5387,12 +5387,12 @@ __metadata:
  languageName: node
  linkType: hard

-"lucide-react@npm:^0.561.0":
-  version: 0.561.0
-  resolution: "lucide-react@npm:0.561.0"
+"lucide-react@npm:^0.562.0":
+  version: 0.562.0
+  resolution: "lucide-react@npm:0.562.0"
  peerDependencies:
    react: ^16.5.1 || ^17.0.0 || ^18.0.0 || ^19.0.0
-  checksum: 10c0/d23c96a543bdb1e11cec1f05a631f86f961f2ee17a10da42e7c297bb06c5f1587a1b66927f58082c67d8edc0749b0a5e44e543b461deab9db19d22fb9ca93cd7
+  checksum: 10c0/c713a2490916e42c678b9df0d7309d8cdb715af1d7139c7eb49c45547901b276a7cf722548012a668dc0e337eeadf5141d5af8c8fcda6721fc58fb2d288efcba
  languageName: node
  linkType: hard