Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
25 changes: 23 additions & 2 deletions frontend/src/components/SettingsPanel.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -85,6 +85,8 @@ interface SettingsPanelProps {
// VACE settings
vaceEnabled?: boolean;
onVaceEnabledChange?: (enabled: boolean) => void;
vaceUseInputVideo?: boolean;
onVaceUseInputVideoChange?: (enabled: boolean) => void;
vaceContextScale?: number;
onVaceContextScaleChange?: (scale: number) => void;
}
Expand Down Expand Up @@ -124,6 +126,8 @@ export function SettingsPanel({
spoutAvailable = false,
vaceEnabled = true,
onVaceEnabledChange,
vaceUseInputVideo = true,
onVaceUseInputVideoChange,
vaceContextScale = 1.0,
onVaceContextScaleChange,
}: SettingsPanelProps) {
Expand Down Expand Up @@ -347,7 +351,7 @@ export function SettingsPanel({
<div className="flex items-center justify-between gap-2">
<LabelWithTooltip
label="VACE"
tooltip="Enable VACE (Video All-In-One Creation and Editing) support for reference image conditioning and structural guidance. When enabled, incoming video in V2V mode is routed to VACE for conditioning. When disabled, V2V uses faster regular encoding. Requires pipeline reload to take effect."
tooltip="Enable VACE (Video All-In-One Creation and Editing) support for reference image conditioning and structural guidance. When enabled, you can use reference images for R2V generation. In Video input mode, a separate toggle controls whether the input video is used for VACE conditioning or for latent initialization. Requires pipeline reload to take effect."
className="text-sm font-medium"
/>
<Toggle
Expand All @@ -363,7 +367,24 @@ export function SettingsPanel({
</div>

{vaceEnabled && (
<div className="rounded-lg border bg-card p-3">
<div className="rounded-lg border bg-card p-3 space-y-3">
<div className="flex items-center justify-between gap-2">
<LabelWithTooltip
label="Use Input Video"
tooltip="When enabled in Video input mode, the input video is used for VACE conditioning. When disabled, the input video is used for latent initialization instead, allowing you to use reference images while in Video input mode."
className="text-xs text-muted-foreground"
/>
<Toggle
pressed={vaceUseInputVideo}
onPressedChange={onVaceUseInputVideoChange || (() => {})}
variant="outline"
size="sm"
className="h-7"
disabled={isStreaming || isLoading || inputMode !== "video"}
>
{vaceUseInputVideo ? "ON" : "OFF"}
</Toggle>
</div>
<div className="flex items-center gap-2">
<LabelWithTooltip
label="Scale:"
Expand Down
1 change: 1 addition & 0 deletions frontend/src/hooks/useWebRTC.ts
Original file line number Diff line number Diff line change
Expand Up @@ -326,6 +326,7 @@ export function useWebRTC(options?: UseWebRTCOptions) {
spout_sender?: { enabled: boolean; name: string };
spout_receiver?: { enabled: boolean; name: string };
vace_ref_images?: string[];
vace_use_input_video?: boolean;
vace_context_scale?: number;
}) => {
if (
Expand Down
18 changes: 18 additions & 0 deletions frontend/src/pages/StreamPage.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -487,6 +487,16 @@ export function StreamPage() {
// Note: This setting requires pipeline reload, so we don't send parameter update here
};

const handleVaceUseInputVideoChange = (enabled: boolean) => {
updateSettings({ vaceUseInputVideo: enabled });
// Send parameter update to backend if streaming
if (isStreaming) {
sendParameterUpdate({
vace_use_input_video: enabled,
});
}
};

const handleRefImagesChange = (images: string[]) => {
updateSettings({ refImages: images });
};
Expand Down Expand Up @@ -790,6 +800,7 @@ export function StreamPage() {
spout_sender?: { enabled: boolean; name: string };
spout_receiver?: { enabled: boolean; name: string };
vace_ref_images?: string[];
vace_use_input_video?: boolean;
vace_context_scale?: number;
} = {
// Signal the intended input mode to the backend so it doesn't
Expand Down Expand Up @@ -826,6 +837,11 @@ export function StreamPage() {
initialParameters.vace_ref_images = vaceParams.vace_ref_images;
initialParameters.vace_context_scale = vaceParams.vace_context_scale;
}
// Add vace_use_input_video parameter
if (currentMode === "video") {
initialParameters.vace_use_input_video =
settings.vaceUseInputVideo ?? false;
}

// Video mode parameters - applies to all pipelines in video mode
if (currentMode === "video") {
Expand Down Expand Up @@ -1115,6 +1131,8 @@ export function StreamPage() {
settings.inputMode !== "video")
}
onVaceEnabledChange={handleVaceEnabledChange}
vaceUseInputVideo={settings.vaceUseInputVideo ?? false}
onVaceUseInputVideoChange={handleVaceUseInputVideoChange}
vaceContextScale={settings.vaceContextScale ?? 1.0}
onVaceContextScaleChange={handleVaceContextScaleChange}
/>
Expand Down
1 change: 1 addition & 0 deletions frontend/src/types/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -71,6 +71,7 @@ export interface SettingsState {
};
// VACE-specific settings
vaceEnabled?: boolean;
vaceUseInputVideo?: boolean;
refImages?: string[];
vaceContextScale?: number;
}
Expand Down
10 changes: 6 additions & 4 deletions src/scope/server/frame_processor.py
Original file line number Diff line number Diff line change
Expand Up @@ -717,14 +717,16 @@ def process_chunk(self):
call_params["lora_scales"] = lora_scales

# Route video input based on VACE status
# We do not support combining normal V2V (denoising from noisy video latents) and VACE V2V editing
# We do not support combining latent initialization and VACE conditioning
if video_input is not None:
vace_enabled = getattr(pipeline, "vace_enabled", False)
if vace_enabled:
# VACE V2V editing mode: route to vace_input_frames
vace_use_input_video = self.parameters.get("vace_use_input_video", True)

if vace_enabled and vace_use_input_video:
# VACE conditioning: route to vace_input_frames
call_params["vace_input_frames"] = video_input
else:
# Normal V2V mode: route to video
# Latent initialization: route to video
call_params["video"] = video_input

output = pipeline(**call_params)
Expand Down
8 changes: 6 additions & 2 deletions src/scope/server/schema.py
Original file line number Diff line number Diff line change
Expand Up @@ -107,6 +107,10 @@ class Parameters(BaseModel):
default=None,
description="List of reference image file paths for VACE conditioning. Images should be located in the assets directory (at the same level as the models directory).",
)
vace_use_input_video: bool | None = Field(
default=None,
description="When enabled in Video input mode, the input video is used for VACE conditioning. When disabled, the input video is used for latent initialization instead, allowing reference images to be used while in Video input mode.",
)
vace_context_scale: float = Field(
default=1.0,
description="Scaling factor for VACE hint injection. Higher values make reference images more influential.",
Expand Down Expand Up @@ -304,7 +308,7 @@ class StreamDiffusionV2LoadParams(LoRAEnabledLoadParams):
)
vace_enabled: bool = Field(
default=True,
description="Enable VACE (Video All-In-One Creation and Editing) support for reference image conditioning and structural guidance. When enabled, incoming video in V2V mode is routed to VACE for conditioning. When disabled, V2V uses faster regular encoding.",
description="Enable VACE (Video All-In-One Creation and Editing) support for reference image conditioning and structural guidance. When enabled, input video in Video input mode can be used for VACE conditioning. When disabled, video uses faster regular encoding for latent initialization.",
)


Expand Down Expand Up @@ -343,7 +347,7 @@ class LongLiveLoadParams(LoRAEnabledLoadParams):
)
vace_enabled: bool = Field(
default=True,
description="Enable VACE (Video All-In-One Creation and Editing) support for reference image conditioning and structural guidance. When enabled, incoming video in V2V mode is routed to VACE for conditioning. When disabled, V2V uses faster regular encoding.",
description="Enable VACE (Video All-In-One Creation and Editing) support for reference image conditioning and structural guidance. When enabled, input video in Video input mode can be used for VACE conditioning. When disabled, video uses faster regular encoding for latent initialization.",
)


Expand Down
Loading