signsur4739379373 commited on
Commit
35e3bb5
·
1 Parent(s): 1a4ceb2

Add quality presets

Browse files
Files changed (2) hide show
  1. app.py +115 -1
  2. requirements.txt +1 -0
app.py CHANGED
@@ -89,6 +89,7 @@ NODE_FIRST_FRAME = "797"
89
  NODE_LIKENESS_GUIDE = "806"
90
  NODE_LIKENESS_ANCHOR = "827"
91
  NODE_LATENT_ANCHOR = "731"
 
92
 
93
  DEFAULT_NEGATIVE = (
94
  "captions, music, transition, VR, bad quality, subtitles, text, watermark, "
@@ -459,6 +460,7 @@ def _set_slider(workflow: dict[str, Any], node_id: str, value: int | float) -> N
459
  def _inject_params(
460
  workflow: dict[str, Any],
461
  *,
 
462
  image_name: str,
463
  prompt: str,
464
  negative_prompt: str,
@@ -486,19 +488,102 @@ def _inject_params(
486
  anchor = workflow.get(NODE_LIKENESS_ANCHOR, {}).get("inputs", {})
487
  latent_anchor = workflow.get(NODE_LATENT_ANCHOR, {}).get("inputs", {})
488
 
489
- if mode == "anchor only":
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
490
  guide["strength"] = 0.0
491
  anchor["strength"] = 0.0
492
  latent_anchor["strength"] = latent_anchor_strength
493
  else:
494
  guide["strength"] = likeness_strength
 
495
  anchor["strength"] = likeness_anchor_strength
496
  latent_anchor["strength"] = latent_anchor_strength
497
  guide["face_detect"] = "manual" if mode == "manual bbox" else "auto"
498
  guide["face_bbox_within_reference"] = face_bbox.strip()
499
  guide["reference_mask_mode"] = "bbox_softfade"
 
 
 
 
 
 
 
 
 
500
  anchor["reference_source"] = "auto"
 
 
 
 
 
 
 
 
501
  anchor["override_face_bbox"] = face_bbox.strip()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
502
 
503
  return workflow
504
 
@@ -565,6 +650,7 @@ def get_gpu_duration(
565
  image_path: str,
566
  prompt: str,
567
  negative_prompt: str,
 
568
  seconds: float,
569
  max_width: int,
570
  max_height: int,
@@ -592,6 +678,7 @@ def generate(
592
  image_path: str,
593
  prompt: str,
594
  negative_prompt: str,
 
595
  seconds: float,
596
  max_width: int,
597
  max_height: int,
@@ -624,6 +711,7 @@ def generate(
624
 
625
  workflow = _inject_params(
626
  _workflow_template(),
 
627
  image_name=image_name,
628
  prompt=prompt.strip(),
629
  negative_prompt=negative_prompt.strip() or DEFAULT_NEGATIVE,
@@ -674,6 +762,24 @@ if os.environ.get("SKIP_STARTUP_SETUP") != "1":
674
  _ensure_models()
675
 
676
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
677
  with gr.Blocks(title="LTX image-to-video") as demo:
678
  gr.Markdown("# LTX image-to-video")
679
  with gr.Row():
@@ -681,6 +787,7 @@ with gr.Blocks(title="LTX image-to-video") as demo:
681
  image = gr.Image(label="reference image", type="filepath")
682
  prompt = gr.Textbox(label="prompt", lines=4)
683
  negative = gr.Textbox(label="negative prompt", value=DEFAULT_NEGATIVE, lines=2)
 
684
  seconds = gr.Slider(1.0, 10.0, value=4.0, step=0.5, label="duration")
685
  with gr.Row():
686
  max_width = gr.Slider(512, 1536, value=1120, step=32, label="max width")
@@ -707,6 +814,7 @@ with gr.Blocks(title="LTX image-to-video") as demo:
707
  image,
708
  prompt,
709
  negative,
 
710
  seconds,
711
  max_width,
712
  max_height,
@@ -722,6 +830,12 @@ with gr.Blocks(title="LTX image-to-video") as demo:
722
  outputs=[video, status, used_seed],
723
  )
724
 
 
 
 
 
 
 
725
  demo.queue(default_concurrency_limit=1)
726
 
727
  if __name__ == "__main__":
 
89
  NODE_LIKENESS_GUIDE = "806"
90
  NODE_LIKENESS_ANCHOR = "827"
91
  NODE_LATENT_ANCHOR = "731"
92
+ PRESETS = ["original", "tuned"]
93
 
94
  DEFAULT_NEGATIVE = (
95
  "captions, music, transition, VR, bad quality, subtitles, text, watermark, "
 
460
  def _inject_params(
461
  workflow: dict[str, Any],
462
  *,
463
+ preset: str,
464
  image_name: str,
465
  prompt: str,
466
  negative_prompt: str,
 
488
  anchor = workflow.get(NODE_LIKENESS_ANCHOR, {}).get("inputs", {})
489
  latent_anchor = workflow.get(NODE_LATENT_ANCHOR, {}).get("inputs", {})
490
 
491
+ if preset == "original":
492
+ guide["strength"] = likeness_strength
493
+ guide["placement_mode"] = "silent_reference"
494
+ guide["face_detect"] = "manual"
495
+ guide["reference_mask_mode"] = "bbox_only"
496
+ guide["face_padding"] = 0.15
497
+ guide["crf"] = 24
498
+ guide["blur_radius"] = 0
499
+ guide["interpolation"] = "area"
500
+ guide["crop"] = "center"
501
+ guide["attention_strength"] = 1
502
+ guide["emit_latent"] = "passthrough"
503
+ guide["debug"] = False
504
+
505
+ anchor["strength"] = likeness_anchor_strength
506
+ anchor["reference_source"] = "auto"
507
+ anchor["similarity_threshold"] = 0.5
508
+ anchor["decay_with_distance"] = 0
509
+ anchor["bypass"] = False
510
+ anchor["debug"] = False
511
+ anchor["advanced_mode"] = False
512
+ anchor["depth_curve"] = "middle"
513
+ anchor["block_index_filter"] = ""
514
+ anchor["similarity_sharpness"] = 8
515
+ anchor["override_face_bbox"] = ""
516
+ anchor["skip_when_sigma_above"] = 0
517
+ anchor["pull_mode"] = "directional"
518
+ anchor["late_block_falloff"] = 0.4
519
+
520
+ latent_anchor["strength"] = latent_anchor_strength
521
+ latent_anchor["cache_at_step"] = 5
522
+ latent_anchor["similarity_threshold"] = 0.5
523
+ latent_anchor["decay_with_distance"] = 0.15
524
+ latent_anchor["energy_threshold"] = 0.3
525
+ latent_anchor["bypass"] = False
526
+ latent_anchor["debug"] = False
527
+ latent_anchor["advanced_mode"] = True
528
+ latent_anchor["cache_mode"] = "schedule"
529
+ latent_anchor["forwards_per_step"] = 2
530
+ latent_anchor["cache_warmup"] = 50
531
+ latent_anchor["anchor_frame"] = 0
532
+ latent_anchor["depth_curve"] = "flat"
533
+ latent_anchor["block_index_filter"] = ""
534
+
535
+ if mode == "manual bbox" and face_bbox.strip():
536
+ guide["face_bbox_within_reference"] = face_bbox.strip()
537
+ anchor["frame_0_bbox"] = face_bbox.strip()
538
+
539
+ elif mode == "anchor only":
540
  guide["strength"] = 0.0
541
  anchor["strength"] = 0.0
542
  latent_anchor["strength"] = latent_anchor_strength
543
  else:
544
  guide["strength"] = likeness_strength
545
+ guide["placement_mode"] = "silent_reference"
546
  anchor["strength"] = likeness_anchor_strength
547
  latent_anchor["strength"] = latent_anchor_strength
548
  guide["face_detect"] = "manual" if mode == "manual bbox" else "auto"
549
  guide["face_bbox_within_reference"] = face_bbox.strip()
550
  guide["reference_mask_mode"] = "bbox_softfade"
551
+ guide["face_padding"] = 0.15
552
+ guide["crf"] = 24
553
+ guide["blur_radius"] = 0
554
+ guide["interpolation"] = "area"
555
+ guide["crop"] = "center"
556
+ guide["attention_strength"] = 1
557
+ guide["emit_latent"] = "passthrough"
558
+ guide["debug"] = False
559
+
560
  anchor["reference_source"] = "auto"
561
+ anchor["similarity_threshold"] = 0.45
562
+ anchor["decay_with_distance"] = 0
563
+ anchor["bypass"] = False
564
+ anchor["debug"] = False
565
+ anchor["advanced_mode"] = True
566
+ anchor["depth_curve"] = "flat"
567
+ anchor["block_index_filter"] = ""
568
+ anchor["similarity_sharpness"] = 6
569
  anchor["override_face_bbox"] = face_bbox.strip()
570
+ anchor["skip_when_sigma_above"] = 0
571
+ anchor["pull_mode"] = "directional"
572
+ anchor["late_block_falloff"] = 0.4
573
+
574
+ latent_anchor["cache_at_step"] = 5
575
+ latent_anchor["similarity_threshold"] = 0.5
576
+ latent_anchor["decay_with_distance"] = 0.15
577
+ latent_anchor["energy_threshold"] = 0.3
578
+ latent_anchor["bypass"] = False
579
+ latent_anchor["debug"] = False
580
+ latent_anchor["advanced_mode"] = True
581
+ latent_anchor["cache_mode"] = "schedule"
582
+ latent_anchor["forwards_per_step"] = 2
583
+ latent_anchor["cache_warmup"] = 50
584
+ latent_anchor["anchor_frame"] = 0
585
+ latent_anchor["depth_curve"] = "flat"
586
+ latent_anchor["block_index_filter"] = ""
587
 
588
  return workflow
589
 
 
650
  image_path: str,
651
  prompt: str,
652
  negative_prompt: str,
653
+ preset: str,
654
  seconds: float,
655
  max_width: int,
656
  max_height: int,
 
678
  image_path: str,
679
  prompt: str,
680
  negative_prompt: str,
681
+ preset: str,
682
  seconds: float,
683
  max_width: int,
684
  max_height: int,
 
711
 
712
  workflow = _inject_params(
713
  _workflow_template(),
714
+ preset=preset,
715
  image_name=image_name,
716
  prompt=prompt.strip(),
717
  negative_prompt=negative_prompt.strip() or DEFAULT_NEGATIVE,
 
762
  _ensure_models()
763
 
764
 
765
+ def apply_preset(preset: str):
766
+ if preset == "original":
767
+ return (
768
+ gr.update(value="auto face"),
769
+ gr.update(value=0.9),
770
+ gr.update(value=0.5),
771
+ gr.update(value=0.11),
772
+ gr.update(value=0.77),
773
+ )
774
+ return (
775
+ gr.update(value="auto face"),
776
+ gr.update(value=0.9),
777
+ gr.update(value=0.15),
778
+ gr.update(value=0.08),
779
+ gr.update(value=0.82),
780
+ )
781
+
782
+
783
  with gr.Blocks(title="LTX image-to-video") as demo:
784
  gr.Markdown("# LTX image-to-video")
785
  with gr.Row():
 
787
  image = gr.Image(label="reference image", type="filepath")
788
  prompt = gr.Textbox(label="prompt", lines=4)
789
  negative = gr.Textbox(label="negative prompt", value=DEFAULT_NEGATIVE, lines=2)
790
+ preset = gr.Dropdown(PRESETS, value="tuned", label="preset")
791
  seconds = gr.Slider(1.0, 10.0, value=4.0, step=0.5, label="duration")
792
  with gr.Row():
793
  max_width = gr.Slider(512, 1536, value=1120, step=32, label="max width")
 
814
  image,
815
  prompt,
816
  negative,
817
+ preset,
818
  seconds,
819
  max_width,
820
  max_height,
 
830
  outputs=[video, status, used_seed],
831
  )
832
 
833
+ preset.change(
834
+ fn=apply_preset,
835
+ inputs=[preset],
836
+ outputs=[mode, likeness_strength, likeness_anchor_strength, latent_anchor_strength, first_frame_strength],
837
+ )
838
+
839
  demo.queue(default_concurrency_limit=1)
840
 
841
  if __name__ == "__main__":
requirements.txt CHANGED
@@ -14,6 +14,7 @@ scipy
14
  numpy
15
  pillow
16
  opencv-python-headless
 
17
  av
18
  kornia<0.8.0
19
  psutil
 
14
  numpy
15
  pillow
16
  opencv-python-headless
17
+ mediapipe
18
  av
19
  kornia<0.8.0
20
  psutil