Spaces:
Running on Zero
Running on Zero
Commit ·
35e3bb5
1
Parent(s): 1a4ceb2
Add quality presets
Browse files- app.py +115 -1
- requirements.txt +1 -0
app.py
CHANGED
|
@@ -89,6 +89,7 @@ NODE_FIRST_FRAME = "797"
|
|
| 89 |
NODE_LIKENESS_GUIDE = "806"
|
| 90 |
NODE_LIKENESS_ANCHOR = "827"
|
| 91 |
NODE_LATENT_ANCHOR = "731"
|
|
|
|
| 92 |
|
| 93 |
DEFAULT_NEGATIVE = (
|
| 94 |
"captions, music, transition, VR, bad quality, subtitles, text, watermark, "
|
|
@@ -459,6 +460,7 @@ def _set_slider(workflow: dict[str, Any], node_id: str, value: int | float) -> N
|
|
| 459 |
def _inject_params(
|
| 460 |
workflow: dict[str, Any],
|
| 461 |
*,
|
|
|
|
| 462 |
image_name: str,
|
| 463 |
prompt: str,
|
| 464 |
negative_prompt: str,
|
|
@@ -486,19 +488,102 @@ def _inject_params(
|
|
| 486 |
anchor = workflow.get(NODE_LIKENESS_ANCHOR, {}).get("inputs", {})
|
| 487 |
latent_anchor = workflow.get(NODE_LATENT_ANCHOR, {}).get("inputs", {})
|
| 488 |
|
| 489 |
-
if
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 490 |
guide["strength"] = 0.0
|
| 491 |
anchor["strength"] = 0.0
|
| 492 |
latent_anchor["strength"] = latent_anchor_strength
|
| 493 |
else:
|
| 494 |
guide["strength"] = likeness_strength
|
|
|
|
| 495 |
anchor["strength"] = likeness_anchor_strength
|
| 496 |
latent_anchor["strength"] = latent_anchor_strength
|
| 497 |
guide["face_detect"] = "manual" if mode == "manual bbox" else "auto"
|
| 498 |
guide["face_bbox_within_reference"] = face_bbox.strip()
|
| 499 |
guide["reference_mask_mode"] = "bbox_softfade"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 500 |
anchor["reference_source"] = "auto"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 501 |
anchor["override_face_bbox"] = face_bbox.strip()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 502 |
|
| 503 |
return workflow
|
| 504 |
|
|
@@ -565,6 +650,7 @@ def get_gpu_duration(
|
|
| 565 |
image_path: str,
|
| 566 |
prompt: str,
|
| 567 |
negative_prompt: str,
|
|
|
|
| 568 |
seconds: float,
|
| 569 |
max_width: int,
|
| 570 |
max_height: int,
|
|
@@ -592,6 +678,7 @@ def generate(
|
|
| 592 |
image_path: str,
|
| 593 |
prompt: str,
|
| 594 |
negative_prompt: str,
|
|
|
|
| 595 |
seconds: float,
|
| 596 |
max_width: int,
|
| 597 |
max_height: int,
|
|
@@ -624,6 +711,7 @@ def generate(
|
|
| 624 |
|
| 625 |
workflow = _inject_params(
|
| 626 |
_workflow_template(),
|
|
|
|
| 627 |
image_name=image_name,
|
| 628 |
prompt=prompt.strip(),
|
| 629 |
negative_prompt=negative_prompt.strip() or DEFAULT_NEGATIVE,
|
|
@@ -674,6 +762,24 @@ if os.environ.get("SKIP_STARTUP_SETUP") != "1":
|
|
| 674 |
_ensure_models()
|
| 675 |
|
| 676 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 677 |
with gr.Blocks(title="LTX image-to-video") as demo:
|
| 678 |
gr.Markdown("# LTX image-to-video")
|
| 679 |
with gr.Row():
|
|
@@ -681,6 +787,7 @@ with gr.Blocks(title="LTX image-to-video") as demo:
|
|
| 681 |
image = gr.Image(label="reference image", type="filepath")
|
| 682 |
prompt = gr.Textbox(label="prompt", lines=4)
|
| 683 |
negative = gr.Textbox(label="negative prompt", value=DEFAULT_NEGATIVE, lines=2)
|
|
|
|
| 684 |
seconds = gr.Slider(1.0, 10.0, value=4.0, step=0.5, label="duration")
|
| 685 |
with gr.Row():
|
| 686 |
max_width = gr.Slider(512, 1536, value=1120, step=32, label="max width")
|
|
@@ -707,6 +814,7 @@ with gr.Blocks(title="LTX image-to-video") as demo:
|
|
| 707 |
image,
|
| 708 |
prompt,
|
| 709 |
negative,
|
|
|
|
| 710 |
seconds,
|
| 711 |
max_width,
|
| 712 |
max_height,
|
|
@@ -722,6 +830,12 @@ with gr.Blocks(title="LTX image-to-video") as demo:
|
|
| 722 |
outputs=[video, status, used_seed],
|
| 723 |
)
|
| 724 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 725 |
demo.queue(default_concurrency_limit=1)
|
| 726 |
|
| 727 |
if __name__ == "__main__":
|
|
|
|
| 89 |
NODE_LIKENESS_GUIDE = "806"
|
| 90 |
NODE_LIKENESS_ANCHOR = "827"
|
| 91 |
NODE_LATENT_ANCHOR = "731"
|
| 92 |
+
PRESETS = ["original", "tuned"]
|
| 93 |
|
| 94 |
DEFAULT_NEGATIVE = (
|
| 95 |
"captions, music, transition, VR, bad quality, subtitles, text, watermark, "
|
|
|
|
| 460 |
def _inject_params(
|
| 461 |
workflow: dict[str, Any],
|
| 462 |
*,
|
| 463 |
+
preset: str,
|
| 464 |
image_name: str,
|
| 465 |
prompt: str,
|
| 466 |
negative_prompt: str,
|
|
|
|
| 488 |
anchor = workflow.get(NODE_LIKENESS_ANCHOR, {}).get("inputs", {})
|
| 489 |
latent_anchor = workflow.get(NODE_LATENT_ANCHOR, {}).get("inputs", {})
|
| 490 |
|
| 491 |
+
if preset == "original":
|
| 492 |
+
guide["strength"] = likeness_strength
|
| 493 |
+
guide["placement_mode"] = "silent_reference"
|
| 494 |
+
guide["face_detect"] = "manual"
|
| 495 |
+
guide["reference_mask_mode"] = "bbox_only"
|
| 496 |
+
guide["face_padding"] = 0.15
|
| 497 |
+
guide["crf"] = 24
|
| 498 |
+
guide["blur_radius"] = 0
|
| 499 |
+
guide["interpolation"] = "area"
|
| 500 |
+
guide["crop"] = "center"
|
| 501 |
+
guide["attention_strength"] = 1
|
| 502 |
+
guide["emit_latent"] = "passthrough"
|
| 503 |
+
guide["debug"] = False
|
| 504 |
+
|
| 505 |
+
anchor["strength"] = likeness_anchor_strength
|
| 506 |
+
anchor["reference_source"] = "auto"
|
| 507 |
+
anchor["similarity_threshold"] = 0.5
|
| 508 |
+
anchor["decay_with_distance"] = 0
|
| 509 |
+
anchor["bypass"] = False
|
| 510 |
+
anchor["debug"] = False
|
| 511 |
+
anchor["advanced_mode"] = False
|
| 512 |
+
anchor["depth_curve"] = "middle"
|
| 513 |
+
anchor["block_index_filter"] = ""
|
| 514 |
+
anchor["similarity_sharpness"] = 8
|
| 515 |
+
anchor["override_face_bbox"] = ""
|
| 516 |
+
anchor["skip_when_sigma_above"] = 0
|
| 517 |
+
anchor["pull_mode"] = "directional"
|
| 518 |
+
anchor["late_block_falloff"] = 0.4
|
| 519 |
+
|
| 520 |
+
latent_anchor["strength"] = latent_anchor_strength
|
| 521 |
+
latent_anchor["cache_at_step"] = 5
|
| 522 |
+
latent_anchor["similarity_threshold"] = 0.5
|
| 523 |
+
latent_anchor["decay_with_distance"] = 0.15
|
| 524 |
+
latent_anchor["energy_threshold"] = 0.3
|
| 525 |
+
latent_anchor["bypass"] = False
|
| 526 |
+
latent_anchor["debug"] = False
|
| 527 |
+
latent_anchor["advanced_mode"] = True
|
| 528 |
+
latent_anchor["cache_mode"] = "schedule"
|
| 529 |
+
latent_anchor["forwards_per_step"] = 2
|
| 530 |
+
latent_anchor["cache_warmup"] = 50
|
| 531 |
+
latent_anchor["anchor_frame"] = 0
|
| 532 |
+
latent_anchor["depth_curve"] = "flat"
|
| 533 |
+
latent_anchor["block_index_filter"] = ""
|
| 534 |
+
|
| 535 |
+
if mode == "manual bbox" and face_bbox.strip():
|
| 536 |
+
guide["face_bbox_within_reference"] = face_bbox.strip()
|
| 537 |
+
anchor["frame_0_bbox"] = face_bbox.strip()
|
| 538 |
+
|
| 539 |
+
elif mode == "anchor only":
|
| 540 |
guide["strength"] = 0.0
|
| 541 |
anchor["strength"] = 0.0
|
| 542 |
latent_anchor["strength"] = latent_anchor_strength
|
| 543 |
else:
|
| 544 |
guide["strength"] = likeness_strength
|
| 545 |
+
guide["placement_mode"] = "silent_reference"
|
| 546 |
anchor["strength"] = likeness_anchor_strength
|
| 547 |
latent_anchor["strength"] = latent_anchor_strength
|
| 548 |
guide["face_detect"] = "manual" if mode == "manual bbox" else "auto"
|
| 549 |
guide["face_bbox_within_reference"] = face_bbox.strip()
|
| 550 |
guide["reference_mask_mode"] = "bbox_softfade"
|
| 551 |
+
guide["face_padding"] = 0.15
|
| 552 |
+
guide["crf"] = 24
|
| 553 |
+
guide["blur_radius"] = 0
|
| 554 |
+
guide["interpolation"] = "area"
|
| 555 |
+
guide["crop"] = "center"
|
| 556 |
+
guide["attention_strength"] = 1
|
| 557 |
+
guide["emit_latent"] = "passthrough"
|
| 558 |
+
guide["debug"] = False
|
| 559 |
+
|
| 560 |
anchor["reference_source"] = "auto"
|
| 561 |
+
anchor["similarity_threshold"] = 0.45
|
| 562 |
+
anchor["decay_with_distance"] = 0
|
| 563 |
+
anchor["bypass"] = False
|
| 564 |
+
anchor["debug"] = False
|
| 565 |
+
anchor["advanced_mode"] = True
|
| 566 |
+
anchor["depth_curve"] = "flat"
|
| 567 |
+
anchor["block_index_filter"] = ""
|
| 568 |
+
anchor["similarity_sharpness"] = 6
|
| 569 |
anchor["override_face_bbox"] = face_bbox.strip()
|
| 570 |
+
anchor["skip_when_sigma_above"] = 0
|
| 571 |
+
anchor["pull_mode"] = "directional"
|
| 572 |
+
anchor["late_block_falloff"] = 0.4
|
| 573 |
+
|
| 574 |
+
latent_anchor["cache_at_step"] = 5
|
| 575 |
+
latent_anchor["similarity_threshold"] = 0.5
|
| 576 |
+
latent_anchor["decay_with_distance"] = 0.15
|
| 577 |
+
latent_anchor["energy_threshold"] = 0.3
|
| 578 |
+
latent_anchor["bypass"] = False
|
| 579 |
+
latent_anchor["debug"] = False
|
| 580 |
+
latent_anchor["advanced_mode"] = True
|
| 581 |
+
latent_anchor["cache_mode"] = "schedule"
|
| 582 |
+
latent_anchor["forwards_per_step"] = 2
|
| 583 |
+
latent_anchor["cache_warmup"] = 50
|
| 584 |
+
latent_anchor["anchor_frame"] = 0
|
| 585 |
+
latent_anchor["depth_curve"] = "flat"
|
| 586 |
+
latent_anchor["block_index_filter"] = ""
|
| 587 |
|
| 588 |
return workflow
|
| 589 |
|
|
|
|
| 650 |
image_path: str,
|
| 651 |
prompt: str,
|
| 652 |
negative_prompt: str,
|
| 653 |
+
preset: str,
|
| 654 |
seconds: float,
|
| 655 |
max_width: int,
|
| 656 |
max_height: int,
|
|
|
|
| 678 |
image_path: str,
|
| 679 |
prompt: str,
|
| 680 |
negative_prompt: str,
|
| 681 |
+
preset: str,
|
| 682 |
seconds: float,
|
| 683 |
max_width: int,
|
| 684 |
max_height: int,
|
|
|
|
| 711 |
|
| 712 |
workflow = _inject_params(
|
| 713 |
_workflow_template(),
|
| 714 |
+
preset=preset,
|
| 715 |
image_name=image_name,
|
| 716 |
prompt=prompt.strip(),
|
| 717 |
negative_prompt=negative_prompt.strip() or DEFAULT_NEGATIVE,
|
|
|
|
| 762 |
_ensure_models()
|
| 763 |
|
| 764 |
|
| 765 |
+
def apply_preset(preset: str):
|
| 766 |
+
if preset == "original":
|
| 767 |
+
return (
|
| 768 |
+
gr.update(value="auto face"),
|
| 769 |
+
gr.update(value=0.9),
|
| 770 |
+
gr.update(value=0.5),
|
| 771 |
+
gr.update(value=0.11),
|
| 772 |
+
gr.update(value=0.77),
|
| 773 |
+
)
|
| 774 |
+
return (
|
| 775 |
+
gr.update(value="auto face"),
|
| 776 |
+
gr.update(value=0.9),
|
| 777 |
+
gr.update(value=0.15),
|
| 778 |
+
gr.update(value=0.08),
|
| 779 |
+
gr.update(value=0.82),
|
| 780 |
+
)
|
| 781 |
+
|
| 782 |
+
|
| 783 |
with gr.Blocks(title="LTX image-to-video") as demo:
|
| 784 |
gr.Markdown("# LTX image-to-video")
|
| 785 |
with gr.Row():
|
|
|
|
| 787 |
image = gr.Image(label="reference image", type="filepath")
|
| 788 |
prompt = gr.Textbox(label="prompt", lines=4)
|
| 789 |
negative = gr.Textbox(label="negative prompt", value=DEFAULT_NEGATIVE, lines=2)
|
| 790 |
+
preset = gr.Dropdown(PRESETS, value="tuned", label="preset")
|
| 791 |
seconds = gr.Slider(1.0, 10.0, value=4.0, step=0.5, label="duration")
|
| 792 |
with gr.Row():
|
| 793 |
max_width = gr.Slider(512, 1536, value=1120, step=32, label="max width")
|
|
|
|
| 814 |
image,
|
| 815 |
prompt,
|
| 816 |
negative,
|
| 817 |
+
preset,
|
| 818 |
seconds,
|
| 819 |
max_width,
|
| 820 |
max_height,
|
|
|
|
| 830 |
outputs=[video, status, used_seed],
|
| 831 |
)
|
| 832 |
|
| 833 |
+
preset.change(
|
| 834 |
+
fn=apply_preset,
|
| 835 |
+
inputs=[preset],
|
| 836 |
+
outputs=[mode, likeness_strength, likeness_anchor_strength, latent_anchor_strength, first_frame_strength],
|
| 837 |
+
)
|
| 838 |
+
|
| 839 |
demo.queue(default_concurrency_limit=1)
|
| 840 |
|
| 841 |
if __name__ == "__main__":
|
requirements.txt
CHANGED
|
@@ -14,6 +14,7 @@ scipy
|
|
| 14 |
numpy
|
| 15 |
pillow
|
| 16 |
opencv-python-headless
|
|
|
|
| 17 |
av
|
| 18 |
kornia<0.8.0
|
| 19 |
psutil
|
|
|
|
| 14 |
numpy
|
| 15 |
pillow
|
| 16 |
opencv-python-headless
|
| 17 |
+
mediapipe
|
| 18 |
av
|
| 19 |
kornia<0.8.0
|
| 20 |
psutil
|