update sdui, new performance tab, start patching llama.cpp UI, lock openwebui to 0.6.41, add a new "clar data" button for openwebui select, new smartcache + pipelineparallel UI performance options

This commit is contained in:
Egor 2025-12-22 02:22:18 -08:00
parent 00276b0e99
commit 01668ea12a
17 changed files with 377 additions and 170 deletions

File diff suppressed because one or more lines are too long

BIN
assets/lcpp.gz.embd Normal file

Binary file not shown.

View file

@ -1,7 +1,7 @@
{ {
"name": "gerbil", "name": "gerbil",
"productName": "Gerbil", "productName": "Gerbil",
"version": "1.16.4", "version": "1.17.0",
"description": "Run Large Language Models locally", "description": "Run Large Language Models locally",
"main": "out/main/index.js", "main": "out/main/index.js",
"homepage": "./", "homepage": "./",
@ -41,7 +41,7 @@
"dependencies": { "dependencies": {
"@codemirror/search": "^6.5.11", "@codemirror/search": "^6.5.11",
"@codemirror/theme-one-dark": "^6.1.3", "@codemirror/theme-one-dark": "^6.1.3",
"@codemirror/view": "^6.39.4", "@codemirror/view": "^6.39.5",
"@fontsource/inter": "^5.2.8", "@fontsource/inter": "^5.2.8",
"@huggingface/gguf": "^0.3.2", "@huggingface/gguf": "^0.3.2",
"@mantine/core": "^8.3.10", "@mantine/core": "^8.3.10",
@ -49,7 +49,7 @@
"@uiw/react-codemirror": "^4.25.4", "@uiw/react-codemirror": "^4.25.4",
"electron-updater": "^6.6.2", "electron-updater": "^6.6.2",
"execa": "^9.6.1", "execa": "^9.6.1",
"lucide-react": "^0.561.0", "lucide-react": "^0.562.0",
"mime-types": "^3.0.2", "mime-types": "^3.0.2",
"react": "^19.2.3", "react": "^19.2.3",
"react-dom": "^19.2.3", "react-dom": "^19.2.3",
@ -115,6 +115,10 @@
{ {
"from": "assets/kcpp_sdui.embd", "from": "assets/kcpp_sdui.embd",
"to": "assets/kcpp_sdui.embd" "to": "assets/kcpp_sdui.embd"
},
{
"from": "assets/lcpp.gz.embd",
"to": "assets/lcpp.gz.embd"
} }
], ],
"extraResources": [ "extraResources": [

View file

@ -3,7 +3,6 @@ import {
Group, Group,
Text, Text,
TextInput, TextInput,
NumberInput,
Button, Button,
SimpleGrid, SimpleGrid,
ActionIcon, ActionIcon,
@ -19,29 +18,14 @@ export const AdvancedTab = () => {
const { const {
additionalArguments, additionalArguments,
preLaunchCommands, preLaunchCommands,
noshift,
flashattention,
noavx2, noavx2,
failsafe, failsafe,
lowvram,
quantmatmul,
usemmap,
debugmode, debugmode,
acceleration,
moecpu,
moeexperts,
setAdditionalArguments, setAdditionalArguments,
setPreLaunchCommands, setPreLaunchCommands,
setNoshift,
setFlashattention,
setNoavx2, setNoavx2,
setFailsafe, setFailsafe,
setLowvram,
setQuantmatmul,
setUsemmap,
setDebugmode, setDebugmode,
setMoecpu,
setMoeexperts,
} = useLaunchConfigStore(); } = useLaunchConfigStore();
const [commandLineModalOpen, setCommandLineModalOpen] = useState(false); const [commandLineModalOpen, setCommandLineModalOpen] = useState(false);
const [backendSupport, setBackendSupport] = useState<{ const [backendSupport, setBackendSupport] = useState<{
@ -58,8 +42,6 @@ export const AdvancedTab = () => {
setAdditionalArguments(updatedArgs); setAdditionalArguments(updatedArgs);
}; };
const isGpuAcceleration = acceleration === 'cuda' || acceleration === 'rocm';
useEffect(() => { useEffect(() => {
const detectAccelerationSupport = async () => { const detectAccelerationSupport = async () => {
const support = const support =
@ -84,20 +66,6 @@ export const AdvancedTab = () => {
<Stack gap="md"> <Stack gap="md">
<div> <div>
<SimpleGrid cols={3} spacing="lg" verticalSpacing="md"> <SimpleGrid cols={3} spacing="lg" verticalSpacing="md">
<CheckboxWithTooltip
checked={!noshift}
onChange={(checked) => setNoshift(!checked)}
label="Context Shift"
tooltip="Use Context Shifting to reduce reprocessing."
/>
<CheckboxWithTooltip
checked={noshift}
onChange={setNoshift}
label="No Shift"
tooltip="Don't use GPU layer shifting for incomplete offloads, which may reduce model performance."
/>
<CheckboxWithTooltip <CheckboxWithTooltip
checked={noavx2} checked={noavx2}
onChange={setNoavx2} onChange={setNoavx2}
@ -110,25 +78,6 @@ export const AdvancedTab = () => {
disabled={isLoading || !backendSupport?.noavx2} disabled={isLoading || !backendSupport?.noavx2}
/> />
<CheckboxWithTooltip
checked={usemmap}
onChange={setUsemmap}
label="MMAP"
tooltip="Use MMAP to load models when enabled."
/>
<CheckboxWithTooltip
checked={quantmatmul && isGpuAcceleration}
onChange={setQuantmatmul}
label="QuantMatMul"
tooltip={
!isGpuAcceleration
? 'QuantMatMul is only available for CUDA and ROCm accelerations.'
: 'Enable MMQ mode to use finetuned kernels instead of default CuBLAS/HipBLAS for prompt processing.'
}
disabled={!isGpuAcceleration}
/>
<CheckboxWithTooltip <CheckboxWithTooltip
checked={failsafe} checked={failsafe}
onChange={setFailsafe} onChange={setFailsafe}
@ -141,25 +90,6 @@ export const AdvancedTab = () => {
disabled={isLoading || !backendSupport?.failsafe} disabled={isLoading || !backendSupport?.failsafe}
/> />
<CheckboxWithTooltip
checked={flashattention}
onChange={setFlashattention}
label="Flash Attention"
tooltip="Enable flash attention to reduce memory usage. May produce incorrect answers for some prompts, but improves performance."
/>
<CheckboxWithTooltip
checked={lowvram && isGpuAcceleration}
onChange={setLowvram}
label="Low VRAM"
tooltip={
!isGpuAcceleration
? 'Low VRAM mode is only available for CUDA and ROCm accelerations.'
: 'Avoid offloading KV Cache or scratch buffers to VRAM. Allows more layers to fit, but may result in a speed loss.'
}
disabled={!isGpuAcceleration}
/>
<CheckboxWithTooltip <CheckboxWithTooltip
checked={debugmode} checked={debugmode}
onChange={setDebugmode} onChange={setDebugmode}
@ -169,46 +99,6 @@ export const AdvancedTab = () => {
</SimpleGrid> </SimpleGrid>
</div> </div>
<div>
<Stack gap="md">
<Group gap="lg" align="flex-start" wrap="nowrap">
<div style={{ flex: 1, minWidth: 200 }}>
<Group gap="xs" align="center" mb="xs">
<Text size="sm" fw={500}>
MoE Experts
</Text>
<InfoTooltip label="How many experts to use for MoE models. Set to -1 to follow GGUF metadata (default), or specify a specific number of experts." />
</Group>
<NumberInput
value={moeexperts}
onChange={(value) => setMoeexperts(Number(value))}
min={-1}
max={128}
step={1}
size="sm"
/>
</div>
<div style={{ flex: 1, minWidth: 200 }}>
<Group gap="xs" align="center" mb="xs">
<Text size="sm" fw={500}>
MoE CPU Layers
</Text>
<InfoTooltip label="Keep the Mixture of Experts (MoE) weights of the first N layers in the CPU. Set to 0 to disable (default), or specify the number of layers to keep on CPU." />
</Group>
<NumberInput
value={moecpu}
onChange={(value) => setMoecpu(Number(value) || 0)}
min={0}
max={999}
step={1}
size="sm"
/>
</div>
</Group>
</Stack>
</div>
<div> <div>
<Group mb="xs" justify="space-between"> <Group mb="xs" justify="space-between">
<Group> <Group>

View file

@ -67,6 +67,8 @@ const UI_COVERED_ARGS = new Set([
'--tensor_split', '--tensor_split',
'--debugmode', '--debugmode',
'--lowvram', '--lowvram',
'--smartcache',
'--pipelineparallel',
] as const) as ReadonlySet<string>; ] as const) as ReadonlySet<string>;
const IGNORED_ARGS = new Set([ const IGNORED_ARGS = new Set([
@ -88,6 +90,7 @@ const IGNORED_ARGS = new Set([
'--no-mmap', '--no-mmap',
'--sdnotile', '--sdnotile',
'--testmemory', '--testmemory',
'--forceversion',
] as const) as ReadonlySet<string>; ] as const) as ReadonlySet<string>;
const COMMAND_LINE_ARGUMENTS = [ const COMMAND_LINE_ARGUMENTS = [
@ -377,7 +380,21 @@ const COMMAND_LINE_ARGUMENTS = [
description: description:
'How many tokens to generate by default, if not specified. Must be smaller than context size. Usually, your frontend GUI will override this.', 'How many tokens to generate by default, if not specified. Must be smaller than context size. Usually, your frontend GUI will override this.',
type: 'int', type: 'int',
default: 768, default: 896,
category: 'Performance',
},
{
flag: '--smartcache',
description:
'Enables intelligent context switching by saving KV cache snapshots to RAM. Requires fast forwarding.',
type: 'boolean',
category: 'Performance',
},
{
flag: '--pipelineparallel',
description:
'Enable Pipeline Parallelism for faster multigpu speeds but using more memory, only active for multigpu.',
type: 'boolean',
category: 'Performance', category: 'Performance',
}, },
{ {
@ -433,15 +450,6 @@ const COMMAND_LINE_ARGUMENTS = [
type: 'boolean', type: 'boolean',
category: 'Advanced', category: 'Advanced',
}, },
{
flag: '--forceversion',
description:
'If the model file format detection fails (e.g. rogue modified model) you can set this to override the detected format (enter desired version, e.g. 401 for GPTNeoX-Type2).',
metavar: '[version]',
type: 'int',
default: 0,
category: 'Advanced',
},
{ {
flag: '--smartcontext', flag: '--smartcontext',
description: description:

View file

@ -0,0 +1,153 @@
import { Stack, Group, Text, NumberInput, SimpleGrid } from '@mantine/core';
import { InfoTooltip } from '@/components/InfoTooltip';
import { CheckboxWithTooltip } from '@/components/CheckboxWithTooltip';
import { useLaunchConfigStore } from '@/stores/launchConfig';
export const PerformanceTab = () => {
const {
noshift,
flashattention,
lowvram,
quantmatmul,
usemmap,
acceleration,
moecpu,
moeexperts,
smartcache,
pipelineparallel,
setNoshift,
setFlashattention,
setLowvram,
setQuantmatmul,
setUsemmap,
setMoecpu,
setMoeexperts,
setSmartcache,
setPipelineparallel,
} = useLaunchConfigStore();
const isGpuAcceleration = acceleration === 'cuda' || acceleration === 'rocm';
return (
<Stack gap="md">
<div>
<SimpleGrid cols={3} spacing="lg" verticalSpacing="md">
<CheckboxWithTooltip
checked={!noshift}
onChange={(checked) => setNoshift(!checked)}
label="Context Shift"
tooltip="Use Context Shifting to reduce reprocessing and improve performance with long contexts."
/>
<CheckboxWithTooltip
checked={noshift}
onChange={setNoshift}
label="No Shift"
tooltip="Disable context shifting. May reduce performance but can help with compatibility issues."
/>
<CheckboxWithTooltip
checked={smartcache}
onChange={setSmartcache}
label="Smart Cache"
tooltip="Enables intelligent context switching by saving KV cache snapshots to RAM. Requires fast forwarding."
/>
</SimpleGrid>
</div>
<div>
<SimpleGrid cols={3} spacing="lg" verticalSpacing="md">
<CheckboxWithTooltip
checked={flashattention}
onChange={setFlashattention}
label="Flash Attention"
tooltip="Enable flash attention to reduce memory usage and improve performance. May produce incorrect answers for some prompts."
/>
<CheckboxWithTooltip
checked={lowvram && isGpuAcceleration}
onChange={setLowvram}
label="Low VRAM"
tooltip={
!isGpuAcceleration
? 'Low VRAM mode is only available for CUDA and ROCm accelerations.'
: 'Avoid offloading KV Cache or scratch buffers to VRAM. Allows more layers to fit, but may result in a speed loss.'
}
disabled={!isGpuAcceleration}
/>
<CheckboxWithTooltip
checked={quantmatmul && isGpuAcceleration}
onChange={setQuantmatmul}
label="QuantMatMul"
tooltip={
!isGpuAcceleration
? 'QuantMatMul is only available for CUDA and ROCm accelerations.'
: 'Enable MMQ mode to use finetuned kernels instead of default CuBLAS/HipBLAS for prompt processing.'
}
disabled={!isGpuAcceleration}
/>
<CheckboxWithTooltip
checked={pipelineparallel && isGpuAcceleration}
onChange={setPipelineparallel}
label="Pipeline Parallel"
tooltip={
!isGpuAcceleration
? 'Pipeline Parallelism is only available for multi-GPU setups.'
: 'Enable Pipeline Parallelism for faster multi-GPU speeds but using more memory. Only active for multi-GPU setups.'
}
disabled={!isGpuAcceleration}
/>
<CheckboxWithTooltip
checked={usemmap}
onChange={setUsemmap}
label="MMAP"
tooltip="Use memory-mapped file I/O for faster model loading. Recommended for most systems."
/>
</SimpleGrid>
</div>
<div>
<Stack gap="md">
<Group gap="lg" align="flex-start" wrap="nowrap">
<div style={{ flex: 1, minWidth: 200 }}>
<Group gap="xs" align="center" mb="xs">
<Text size="sm" fw={500}>
MoE Experts
</Text>
<InfoTooltip label="How many experts to use for MoE models. Set to -1 to follow GGUF metadata (default), or specify a specific number of experts." />
</Group>
<NumberInput
value={moeexperts}
onChange={(value) => setMoeexperts(Number(value))}
min={-1}
max={128}
step={1}
size="sm"
/>
</div>
<div style={{ flex: 1, minWidth: 200 }}>
<Group gap="xs" align="center" mb="xs">
<Text size="sm" fw={500}>
MoE CPU Layers
</Text>
<InfoTooltip label="Keep the Mixture of Experts (MoE) weights of the first N layers in the CPU. Set to 0 to disable (default), or specify the number of layers to keep on CPU." />
</Group>
<NumberInput
value={moecpu}
onChange={(value) => setMoecpu(Number(value) || 0)}
min={0}
max={999}
step={1}
size="sm"
/>
</div>
</Group>
</Stack>
</div>
</Stack>
);
};

View file

@ -6,6 +6,7 @@ import { useLaunchLogic } from '@/hooks/useLaunchLogic';
import { useWarnings } from '@/hooks/useWarnings'; import { useWarnings } from '@/hooks/useWarnings';
import { GeneralTab } from '@/components/screens/Launch/GeneralTab/index'; import { GeneralTab } from '@/components/screens/Launch/GeneralTab/index';
import { AdvancedTab } from '@/components/screens/Launch/AdvancedTab'; import { AdvancedTab } from '@/components/screens/Launch/AdvancedTab';
import { PerformanceTab } from '@/components/screens/Launch/PerformanceTab';
import { NetworkTab } from '@/components/screens/Launch/NetworkTab'; import { NetworkTab } from '@/components/screens/Launch/NetworkTab';
import { ImageGenerationTab } from '@/components/screens/Launch/ImageGenerationTab'; import { ImageGenerationTab } from '@/components/screens/Launch/ImageGenerationTab';
import { WarningDisplay } from '@/components/WarningDisplay'; import { WarningDisplay } from '@/components/WarningDisplay';
@ -63,6 +64,8 @@ export const LaunchScreen = ({ onLaunch }: LaunchScreenProps) => {
sdclipgpu, sdclipgpu,
moecpu, moecpu,
moeexperts, moeexperts,
smartcache,
pipelineparallel,
parseAndApplyConfigFile, parseAndApplyConfigFile,
loadConfigFromFile, loadConfigFromFile,
setModel, setModel,
@ -177,6 +180,8 @@ export const LaunchScreen = ({ onLaunch }: LaunchScreenProps) => {
debugmode, debugmode,
moecpu, moecpu,
moeexperts, moeexperts,
smartcache,
pipelineparallel,
usecuda: acceleration === 'cuda' || acceleration === 'rocm', usecuda: acceleration === 'cuda' || acceleration === 'rocm',
usevulkan: acceleration === 'vulkan', usevulkan: acceleration === 'vulkan',
useclblast: acceleration === 'clblast', useclblast: acceleration === 'clblast',
@ -316,6 +321,8 @@ export const LaunchScreen = ({ onLaunch }: LaunchScreenProps) => {
sdclipgpu, sdclipgpu,
moecpu, moecpu,
moeexperts, moeexperts,
smartcache,
pipelineparallel,
}); });
}, [ }, [
handleLaunch, handleLaunch,
@ -354,6 +361,8 @@ export const LaunchScreen = ({ onLaunch }: LaunchScreenProps) => {
sdclipgpu, sdclipgpu,
moecpu, moecpu,
moeexperts, moeexperts,
smartcache,
pipelineparallel,
]); ]);
return ( return (
@ -397,6 +406,7 @@ export const LaunchScreen = ({ onLaunch }: LaunchScreenProps) => {
<Tabs.List> <Tabs.List>
<Tabs.Tab value="general">General</Tabs.Tab> <Tabs.Tab value="general">General</Tabs.Tab>
<Tabs.Tab value="image">Image Generation</Tabs.Tab> <Tabs.Tab value="image">Image Generation</Tabs.Tab>
<Tabs.Tab value="performance">Performance</Tabs.Tab>
<Tabs.Tab value="network">Network</Tabs.Tab> <Tabs.Tab value="network">Network</Tabs.Tab>
<Tabs.Tab value="advanced">Advanced</Tabs.Tab> <Tabs.Tab value="advanced">Advanced</Tabs.Tab>
</Tabs.List> </Tabs.List>
@ -405,16 +415,20 @@ export const LaunchScreen = ({ onLaunch }: LaunchScreenProps) => {
<GeneralTab configLoaded={configLoaded} /> <GeneralTab configLoaded={configLoaded} />
</Tabs.Panel> </Tabs.Panel>
<Tabs.Panel value="advanced"> <Tabs.Panel value="image">
<AdvancedTab /> <ImageGenerationTab />
</Tabs.Panel>
<Tabs.Panel value="performance">
<PerformanceTab />
</Tabs.Panel> </Tabs.Panel>
<Tabs.Panel value="network"> <Tabs.Panel value="network">
<NetworkTab /> <NetworkTab />
</Tabs.Panel> </Tabs.Panel>
<Tabs.Panel value="image"> <Tabs.Panel value="advanced">
<ImageGenerationTab /> <AdvancedTab />
</Tabs.Panel> </Tabs.Panel>
</Tabs> </Tabs>

View file

@ -1,5 +1,5 @@
import { useState, useEffect, useCallback, useMemo } from 'react'; import { useState, useEffect, useCallback, useMemo } from 'react';
import { Text, Box, Anchor, rem } from '@mantine/core'; import { Text, Box, Anchor, rem, Button, Group, Stack } from '@mantine/core';
import { Monitor, Image } from 'lucide-react'; import { Monitor, Image } from 'lucide-react';
import { usePreferencesStore } from '@/stores/preferences'; import { usePreferencesStore } from '@/stores/preferences';
import type { import type {
@ -8,6 +8,7 @@ import type {
} from '@/types'; } from '@/types';
import { FRONTENDS } from '@/constants'; import { FRONTENDS } from '@/constants';
import { Select } from '@/components/Select'; import { Select } from '@/components/Select';
import { Modal } from '@/components/Modal';
interface FrontendRequirement { interface FrontendRequirement {
id: string; id: string;
@ -40,6 +41,8 @@ export const FrontendInterfaceSelector = ({
Map<string, boolean> Map<string, boolean>
>(new Map()); >(new Map());
const [showClearDataModal, setShowClearDataModal] = useState(false);
const frontendConfigs: FrontendConfig[] = useMemo( const frontendConfigs: FrontendConfig[] = useMemo(
() => [ () => [
{ {
@ -133,6 +136,11 @@ export const FrontendInterfaceSelector = ({
); );
}; };
const handleClearOpenWebUIData = async () => {
await window.electronAPI.dependencies.clearOpenWebUIData();
setShowClearDataModal(false);
};
const renderDisabledFrontendWarnings = () => { const renderDisabledFrontendWarnings = () => {
const disabledFrontends = frontendConfigs.filter( const disabledFrontends = frontendConfigs.filter(
(config) => !isFrontendAvailable(config.value) (config) => !isFrontendAvailable(config.value)
@ -256,6 +264,7 @@ export const FrontendInterfaceSelector = ({
</Text> </Text>
)} )}
<Group gap="xs" align="flex-end">
<Select <Select
value={frontendPreference} value={frontendPreference}
onChange={handleFrontendPreferenceChange} onChange={handleFrontendPreferenceChange}
@ -265,12 +274,62 @@ export const FrontendInterfaceSelector = ({
label: config.label, label: config.label,
disabled: !isFrontendAvailable(config.value), disabled: !isFrontendAvailable(config.value),
}))} }))}
leftSection={<Monitor style={{ width: rem(16), height: rem(16) }} />} leftSection={
<Monitor style={{ width: rem(16), height: rem(16) }} />
}
style={{ flex: 1 }}
/> />
{frontendPreference === 'openwebui' && (
<Button
variant="light"
color="orange"
onClick={() => setShowClearDataModal(true)}
disabled={isOnInterfaceScreen}
>
Clear Data
</Button>
)}
</Group>
{renderDisabledFrontendWarnings()} {renderDisabledFrontendWarnings()}
</div> </div>
<Modal
opened={showClearDataModal}
onClose={() => setShowClearDataModal(false)}
title="Clear Open WebUI Data?"
>
<Stack gap="md">
<Text size="sm" c="dimmed">
This will permanently delete all Open WebUI data including:
</Text>
<Box component="ul" pl="md">
<Text component="li" size="sm" c="dimmed">
Chat history
</Text>
<Text component="li" size="sm" c="dimmed">
User settings
</Text>
<Text component="li" size="sm" c="dimmed">
Database
</Text>
</Box>
<Group justify="flex-end" gap="sm">
<Button
variant="subtle"
onClick={() => setShowClearDataModal(false)}
>
Cancel
</Button>
<Button color="red" onClick={handleClearOpenWebUIData}>
Clear Data
</Button>
</Group>
</Stack>
</Modal>
<div> <div>
<Text fw={500} mb="xs"> <Text fw={500} mb="xs">
Image Generation Frontend Image Generation Frontend

View file

@ -43,6 +43,8 @@ interface LaunchArgs {
sdclipgpu: boolean; sdclipgpu: boolean;
moecpu: number; moecpu: number;
moeexperts: number; moeexperts: number;
smartcache: boolean;
pipelineparallel: boolean;
} }
const buildModelArgs = ( const buildModelArgs = (
@ -149,6 +151,14 @@ const buildConfigArgs = (isImageMode: boolean, launchArgs: LaunchArgs) => {
args.push('--moecpu', launchArgs.moecpu.toString()); args.push('--moecpu', launchArgs.moecpu.toString());
} }
if (launchArgs.smartcache) {
args.push('--smartcache');
}
if (launchArgs.pipelineparallel) {
args.push('--pipelineparallel');
}
return args; return args;
}; };

View file

@ -290,6 +290,18 @@ export function setupIPCHandlers() {
ipcMain.handle('dependencies:isUvAvailable', () => isUvAvailable()); ipcMain.handle('dependencies:isUvAvailable', () => isUvAvailable());
ipcMain.handle('dependencies:clearOpenWebUIData', async () => {
const { rm } = await import('fs/promises');
const openWebUIDataDir = join(getInstallDir(), 'openwebui-data');
try {
await rm(openWebUIDataDir, { recursive: true, force: true });
return { success: true };
} catch (error) {
logError('Failed to clear Open WebUI data:', error as Error);
return { success: false, error: (error as Error).message };
}
});
ipcMain.on('monitoring:start', () => startMonitoring(mainWindow)); ipcMain.on('monitoring:start', () => startMonitoring(mainWindow));
ipcMain.on('monitoring:stop', () => stopMonitoring()); ipcMain.on('monitoring:stop', () => stopMonitoring());

View file

@ -16,7 +16,12 @@ import {
} from '@/main/modules/config'; } from '@/main/modules/config';
import { startFrontend as startSillyTavernFrontend } from '@/main/modules/sillytavern'; import { startFrontend as startSillyTavernFrontend } from '@/main/modules/sillytavern';
import { startFrontend as startOpenWebUIFrontend } from '@/main/modules/openwebui'; import { startFrontend as startOpenWebUIFrontend } from '@/main/modules/openwebui';
import { patchKliteEmbd, patchKcppSduiEmbd, filterSpam } from './patches'; import {
patchKliteEmbd,
patchKcppSduiEmbd,
patchLcppGzEmbd,
filterSpam,
} from './patches';
import { startProxy, stopProxy } from '../proxy'; import { startProxy, stopProxy } from '../proxy';
import { startTunnel, stopTunnel } from '../tunnel'; import { startTunnel, stopTunnel } from '../tunnel';
import { resolveModelPath, abortActiveDownloads } from '../model-download'; import { resolveModelPath, abortActiveDownloads } from '../model-download';
@ -191,7 +196,11 @@ export async function launchKoboldCpp(
if (isTextMode) { if (isTextMode) {
await patchKliteEmbd(binaryDir); await patchKliteEmbd(binaryDir);
} }
} else if (isImageMode && imageGenerationFrontendPreference === 'builtin') { } else if (frontendPreference === 'llamacpp') {
await patchLcppGzEmbd(binaryDir);
}
if (isImageMode && imageGenerationFrontendPreference === 'builtin') {
await patchKcppSduiEmbd(binaryDir); await patchKcppSduiEmbd(binaryDir);
} }

View file

@ -112,6 +112,23 @@ export const patchKcppSduiEmbd = (unpackedDir: string) =>
} }
}, 'Failed to patch kcpp_sdui.embd'); }, 'Failed to patch kcpp_sdui.embd');
export const patchLcppGzEmbd = (unpackedDir: string) =>
tryExecute(async () => {
const possiblePaths = [
join(unpackedDir, '_internal', 'embd_res', 'lcpp.gz.embd'),
join(unpackedDir, 'lcpp.gz.embd'),
];
const sourceAssetPath = getAssetPath('lcpp.gz.embd');
for (const targetPath of possiblePaths) {
if (await pathExists(targetPath)) {
await copyFile(sourceAssetPath, targetPath);
break;
}
}
}, 'Failed to patch lcpp.gz.embd');
export function filterSpam(output: string) { export function filterSpam(output: string) {
const spamPatterns = [ const spamPatterns = [
/^print_info:/, /^print_info:/,

View file

@ -15,7 +15,13 @@ import { PROXY } from '@/constants/proxy';
let openWebUIProcess: ChildProcess | null = null; let openWebUIProcess: ChildProcess | null = null;
const OPENWEBUI_BASE_ARGS = ['--python', '3.11', 'open-webui@latest', 'serve']; const OPENWEBUI_VERSION = '0.6.41';
const OPENWEBUI_BASE_ARGS = [
'--python',
'3.11',
`open-webui@${OPENWEBUI_VERSION}`,
'serve',
];
on('SIGINT', () => { on('SIGINT', () => {
void stopFrontend(); void stopFrontend();

View file

@ -208,6 +208,8 @@ const logsAPI: LogsAPI = {
const dependenciesAPI: DependenciesAPI = { const dependenciesAPI: DependenciesAPI = {
isUvAvailable: () => ipcRenderer.invoke('dependencies:isUvAvailable'), isUvAvailable: () => ipcRenderer.invoke('dependencies:isUvAvailable'),
isNpxAvailable: () => ipcRenderer.invoke('dependencies:isNpxAvailable'), isNpxAvailable: () => ipcRenderer.invoke('dependencies:isNpxAvailable'),
clearOpenWebUIData: () =>
ipcRenderer.invoke('dependencies:clearOpenWebUIData'),
}; };
const monitoringAPI: MonitoringAPI = { const monitoringAPI: MonitoringAPI = {

View file

@ -41,6 +41,8 @@ interface LaunchConfigState {
sdclipgpu: boolean; sdclipgpu: boolean;
moecpu: number; moecpu: number;
moeexperts: number; moeexperts: number;
smartcache: boolean;
pipelineparallel: boolean;
isImageGenerationMode: boolean; isImageGenerationMode: boolean;
isTextMode: boolean; isTextMode: boolean;
@ -81,6 +83,8 @@ interface LaunchConfigState {
setSdclipgpu: (enabled: boolean) => void; setSdclipgpu: (enabled: boolean) => void;
setMoecpu: (moecpu: number) => void; setMoecpu: (moecpu: number) => void;
setMoeexperts: (moeexperts: number) => void; setMoeexperts: (moeexperts: number) => void;
setSmartcache: (smartcache: boolean) => void;
setPipelineparallel: (pipelineparallel: boolean) => void;
parseAndApplyConfigFile: (configPath: string) => Promise<void>; parseAndApplyConfigFile: (configPath: string) => Promise<void>;
loadConfigFromFile: ( loadConfigFromFile: (
@ -141,6 +145,8 @@ export const useLaunchConfigStore = create<LaunchConfigState>((set, get) => ({
sdclipgpu: false, sdclipgpu: false,
moecpu: 0, moecpu: 0,
moeexperts: -1, moeexperts: -1,
smartcache: false,
pipelineparallel: false,
isImageGenerationMode: false, isImageGenerationMode: false,
isTextMode: false, isTextMode: false,
@ -195,6 +201,8 @@ export const useLaunchConfigStore = create<LaunchConfigState>((set, get) => ({
setSdclipgpu: (enabled) => set({ sdclipgpu: enabled }), setSdclipgpu: (enabled) => set({ sdclipgpu: enabled }),
setMoecpu: (moeCpu) => set({ moecpu: moeCpu }), setMoecpu: (moeCpu) => set({ moecpu: moeCpu }),
setMoeexperts: (moeExperts) => set({ moeexperts: moeExperts }), setMoeexperts: (moeExperts) => set({ moeexperts: moeExperts }),
setSmartcache: (smartcache) => set({ smartcache }),
setPipelineparallel: (pipelineparallel) => set({ pipelineparallel }),
// eslint-disable-next-line sonarjs/cognitive-complexity // eslint-disable-next-line sonarjs/cognitive-complexity
parseAndApplyConfigFile: async (configPath: string) => { parseAndApplyConfigFile: async (configPath: string) => {
@ -426,6 +434,18 @@ export const useLaunchConfigStore = create<LaunchConfigState>((set, get) => ({
updates.moeexperts = -1; updates.moeexperts = -1;
} }
if (typeof configData.smartcache === 'boolean') {
updates.smartcache = configData.smartcache;
} else {
updates.smartcache = false;
}
if (typeof configData.pipelineparallel === 'boolean') {
updates.pipelineparallel = configData.pipelineparallel;
} else {
updates.pipelineparallel = false;
}
set(updates); set(updates);
} }
}, },

View file

@ -121,6 +121,8 @@ export interface KoboldConfig {
preLaunchCommands?: string[]; preLaunchCommands?: string[];
moecpu?: number; moecpu?: number;
moeexperts?: number; moeexperts?: number;
smartcache?: boolean;
pipelineparallel?: boolean;
autoGpuLayers?: boolean; autoGpuLayers?: boolean;
model?: string; model?: string;
backend?: string; backend?: string;
@ -252,6 +254,7 @@ export interface LogsAPI {
export interface DependenciesAPI { export interface DependenciesAPI {
isNpxAvailable: () => Promise<boolean>; isNpxAvailable: () => Promise<boolean>;
isUvAvailable: () => Promise<boolean>; isUvAvailable: () => Promise<boolean>;
clearOpenWebUIData: () => Promise<{ success: boolean; error?: string }>;
} }
export interface MonitoringAPI { export interface MonitoringAPI {

View file

@ -315,15 +315,15 @@ __metadata:
languageName: node languageName: node
linkType: hard linkType: hard
"@codemirror/view@npm:^6.0.0, @codemirror/view@npm:^6.17.0, @codemirror/view@npm:^6.23.0, @codemirror/view@npm:^6.27.0, @codemirror/view@npm:^6.35.0, @codemirror/view@npm:^6.39.4": "@codemirror/view@npm:^6.0.0, @codemirror/view@npm:^6.17.0, @codemirror/view@npm:^6.23.0, @codemirror/view@npm:^6.27.0, @codemirror/view@npm:^6.35.0, @codemirror/view@npm:^6.39.5":
version: 6.39.4 version: 6.39.5
resolution: "@codemirror/view@npm:6.39.4" resolution: "@codemirror/view@npm:6.39.5"
dependencies: dependencies:
"@codemirror/state": "npm:^6.5.0" "@codemirror/state": "npm:^6.5.0"
crelt: "npm:^1.0.6" crelt: "npm:^1.0.6"
style-mod: "npm:^4.1.0" style-mod: "npm:^4.1.0"
w3c-keyname: "npm:^2.2.4" w3c-keyname: "npm:^2.2.4"
checksum: 10c0/aa17cfbfb0714243cccc9e970f4f382c3e747435f2fc63d40125370379227436fb529473dde856a276b8b7df98f83c55f4cc5cd0c24a56335f24233103b7dfd0 checksum: 10c0/54fc600b54c336283e8c73b6a0a99156880682121d73c2d4b74fc5a93f956e33b54ba16cd7debbf5fc28a48b18aeb643217e5200e3af0588909ca90cb275e957
languageName: node languageName: node
linkType: hard linkType: hard
@ -4124,7 +4124,7 @@ __metadata:
dependencies: dependencies:
"@codemirror/search": "npm:^6.5.11" "@codemirror/search": "npm:^6.5.11"
"@codemirror/theme-one-dark": "npm:^6.1.3" "@codemirror/theme-one-dark": "npm:^6.1.3"
"@codemirror/view": "npm:^6.39.4" "@codemirror/view": "npm:^6.39.5"
"@eslint/js": "npm:^9.39.2" "@eslint/js": "npm:^9.39.2"
"@fontsource/inter": "npm:^5.2.8" "@fontsource/inter": "npm:^5.2.8"
"@huggingface/gguf": "npm:^0.3.2" "@huggingface/gguf": "npm:^0.3.2"
@ -4154,7 +4154,7 @@ __metadata:
execa: "npm:^9.6.1" execa: "npm:^9.6.1"
globals: "npm:^16.5.0" globals: "npm:^16.5.0"
jiti: "npm:^2.6.1" jiti: "npm:^2.6.1"
lucide-react: "npm:^0.561.0" lucide-react: "npm:^0.562.0"
mime-types: "npm:^3.0.2" mime-types: "npm:^3.0.2"
prettier: "npm:^3.7.4" prettier: "npm:^3.7.4"
react: "npm:^19.2.3" react: "npm:^19.2.3"
@ -5387,12 +5387,12 @@ __metadata:
languageName: node languageName: node
linkType: hard linkType: hard
"lucide-react@npm:^0.561.0": "lucide-react@npm:^0.562.0":
version: 0.561.0 version: 0.562.0
resolution: "lucide-react@npm:0.561.0" resolution: "lucide-react@npm:0.562.0"
peerDependencies: peerDependencies:
react: ^16.5.1 || ^17.0.0 || ^18.0.0 || ^19.0.0 react: ^16.5.1 || ^17.0.0 || ^18.0.0 || ^19.0.0
checksum: 10c0/d23c96a543bdb1e11cec1f05a631f86f961f2ee17a10da42e7c297bb06c5f1587a1b66927f58082c67d8edc0749b0a5e44e543b461deab9db19d22fb9ca93cd7 checksum: 10c0/c713a2490916e42c678b9df0d7309d8cdb715af1d7139c7eb49c45547901b276a7cf722548012a668dc0e337eeadf5141d5af8c8fcda6721fc58fb2d288efcba
languageName: node languageName: node
linkType: hard linkType: hard