daydreamlive · yondonfu · Jan 5, 2026 · Jan 3, 2026 · Jan 5, 2026
diff --git a/frontend/src/components/SettingsPanel.tsx b/frontend/src/components/SettingsPanel.tsx
@@ -85,6 +85,8 @@ interface SettingsPanelProps {
   // VACE settings
   vaceEnabled?: boolean;
   onVaceEnabledChange?: (enabled: boolean) => void;
+  vaceUseInputVideo?: boolean;
+  onVaceUseInputVideoChange?: (enabled: boolean) => void;
   vaceContextScale?: number;
   onVaceContextScaleChange?: (scale: number) => void;
 }
@@ -124,6 +126,8 @@ export function SettingsPanel({
   spoutAvailable = false,
   vaceEnabled = true,
   onVaceEnabledChange,
+  vaceUseInputVideo = true,
+  onVaceUseInputVideoChange,
   vaceContextScale = 1.0,
   onVaceContextScaleChange,
 }: SettingsPanelProps) {
@@ -347,7 +351,7 @@ export function SettingsPanel({
             <div className="flex items-center justify-between gap-2">
               <LabelWithTooltip
                 label="VACE"
-                tooltip="Enable VACE (Video All-In-One Creation and Editing) support for reference image conditioning and structural guidance. When enabled, incoming video in V2V mode is routed to VACE for conditioning. When disabled, V2V uses faster regular encoding. Requires pipeline reload to take effect."
+                tooltip="Enable VACE (Video All-In-One Creation and Editing) support for reference image conditioning and structural guidance. When enabled, you can use reference images for R2V generation. In Video input mode, a separate toggle controls whether the input video is used for VACE conditioning or for latent initialization. Requires pipeline reload to take effect."
                 className="text-sm font-medium"
               />
               <Toggle
@@ -363,7 +367,24 @@ export function SettingsPanel({
             </div>
 
             {vaceEnabled && (
-              <div className="rounded-lg border bg-card p-3">
+              <div className="rounded-lg border bg-card p-3 space-y-3">
+                <div className="flex items-center justify-between gap-2">
+                  <LabelWithTooltip
+                    label="Use Input Video"
+                    tooltip="When enabled in Video input mode, the input video is used for VACE conditioning. When disabled, the input video is used for latent initialization instead, allowing you to use reference images while in Video input mode."
+                    className="text-xs text-muted-foreground"
+                  />
+                  <Toggle
+                    pressed={vaceUseInputVideo}
+                    onPressedChange={onVaceUseInputVideoChange || (() => {})}
+                    variant="outline"
+                    size="sm"
+                    className="h-7"
+                    disabled={isStreaming || isLoading || inputMode !== "video"}
+                  >
+                    {vaceUseInputVideo ? "ON" : "OFF"}
+                  </Toggle>
+                </div>
                 <div className="flex items-center gap-2">
                   <LabelWithTooltip
                     label="Scale:"

diff --git a/frontend/src/hooks/useWebRTC.ts b/frontend/src/hooks/useWebRTC.ts
@@ -326,6 +326,7 @@ export function useWebRTC(options?: UseWebRTCOptions) {
       spout_sender?: { enabled: boolean; name: string };
       spout_receiver?: { enabled: boolean; name: string };
       vace_ref_images?: string[];
+      vace_use_input_video?: boolean;
       vace_context_scale?: number;
     }) => {
       if (

diff --git a/frontend/src/pages/StreamPage.tsx b/frontend/src/pages/StreamPage.tsx
@@ -487,6 +487,16 @@ export function StreamPage() {
     // Note: This setting requires pipeline reload, so we don't send parameter update here
   };
 
+  const handleVaceUseInputVideoChange = (enabled: boolean) => {
+    updateSettings({ vaceUseInputVideo: enabled });
+    // Send parameter update to backend if streaming
+    if (isStreaming) {
+      sendParameterUpdate({
+        vace_use_input_video: enabled,
+      });
+    }
+  };
+
   const handleRefImagesChange = (images: string[]) => {
     updateSettings({ refImages: images });
   };
@@ -790,6 +800,7 @@ export function StreamPage() {
         spout_sender?: { enabled: boolean; name: string };
         spout_receiver?: { enabled: boolean; name: string };
         vace_ref_images?: string[];
+        vace_use_input_video?: boolean;
         vace_context_scale?: number;
       } = {
         // Signal the intended input mode to the backend so it doesn't
@@ -826,6 +837,11 @@ export function StreamPage() {
         initialParameters.vace_ref_images = vaceParams.vace_ref_images;
         initialParameters.vace_context_scale = vaceParams.vace_context_scale;
       }
+      // Add vace_use_input_video parameter
+      if (currentMode === "video") {
+        initialParameters.vace_use_input_video =
+          settings.vaceUseInputVideo ?? false;
+      }
 
       // Video mode parameters - applies to all pipelines in video mode
       if (currentMode === "video") {
@@ -1115,6 +1131,8 @@ export function StreamPage() {
                 settings.inputMode !== "video")
             }
             onVaceEnabledChange={handleVaceEnabledChange}
+            vaceUseInputVideo={settings.vaceUseInputVideo ?? false}
+            onVaceUseInputVideoChange={handleVaceUseInputVideoChange}
             vaceContextScale={settings.vaceContextScale ?? 1.0}
             onVaceContextScaleChange={handleVaceContextScaleChange}
           />

diff --git a/frontend/src/types/index.ts b/frontend/src/types/index.ts
@@ -71,6 +71,7 @@ export interface SettingsState {
   };
   // VACE-specific settings
   vaceEnabled?: boolean;
+  vaceUseInputVideo?: boolean;
   refImages?: string[];
   vaceContextScale?: number;
 }

diff --git a/src/scope/server/frame_processor.py b/src/scope/server/frame_processor.py
@@ -717,14 +717,16 @@ def process_chunk(self):
                 call_params["lora_scales"] = lora_scales
 
             # Route video input based on VACE status
-            # We do not support combining normal V2V (denoising from noisy video latents) and VACE V2V editing
+            # We do not support combining latent initialization and VACE conditioning
             if video_input is not None:
                 vace_enabled = getattr(pipeline, "vace_enabled", False)
-                if vace_enabled:
-                    # VACE V2V editing mode: route to vace_input_frames
+                vace_use_input_video = self.parameters.get("vace_use_input_video", True)
+
+                if vace_enabled and vace_use_input_video:
+                    # VACE conditioning: route to vace_input_frames
                     call_params["vace_input_frames"] = video_input
                 else:
-                    # Normal V2V mode: route to video
+                    # Latent initialization: route to video
                     call_params["video"] = video_input
 
             output = pipeline(**call_params)

diff --git a/src/scope/server/schema.py b/src/scope/server/schema.py
@@ -107,6 +107,10 @@ class Parameters(BaseModel):
         default=None,
         description="List of reference image file paths for VACE conditioning. Images should be located in the assets directory (at the same level as the models directory).",
     )
+    vace_use_input_video: bool | None = Field(
+        default=None,
+        description="When enabled in Video input mode, the input video is used for VACE conditioning. When disabled, the input video is used for latent initialization instead, allowing reference images to be used while in Video input mode.",
+    )
     vace_context_scale: float = Field(
         default=1.0,
         description="Scaling factor for VACE hint injection. Higher values make reference images more influential.",
@@ -304,7 +308,7 @@ class StreamDiffusionV2LoadParams(LoRAEnabledLoadParams):
     )
     vace_enabled: bool = Field(
         default=True,
-        description="Enable VACE (Video All-In-One Creation and Editing) support for reference image conditioning and structural guidance. When enabled, incoming video in V2V mode is routed to VACE for conditioning. When disabled, V2V uses faster regular encoding.",
+        description="Enable VACE (Video All-In-One Creation and Editing) support for reference image conditioning and structural guidance. When enabled, input video in Video input mode can be used for VACE conditioning. When disabled, video uses faster regular encoding for latent initialization.",
     )
 
 
@@ -343,7 +347,7 @@ class LongLiveLoadParams(LoRAEnabledLoadParams):
     )
     vace_enabled: bool = Field(
         default=True,
-        description="Enable VACE (Video All-In-One Creation and Editing) support for reference image conditioning and structural guidance. When enabled, incoming video in V2V mode is routed to VACE for conditioning. When disabled, V2V uses faster regular encoding.",
+        description="Enable VACE (Video All-In-One Creation and Editing) support for reference image conditioning and structural guidance. When enabled, input video in Video input mode can be used for VACE conditioning. When disabled, video uses faster regular encoding for latent initialization.",
     )