Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merging the main (now matching upstream main) to "backup" branch #42

Closed
wants to merge 8 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
20 changes: 20 additions & 0 deletions .dockerignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
# The .dockerignore file excludes files from the container build process.
#
# https://docs.docker.com/engine/reference/builder/#dockerignore-file

# Replicate
/ckpts/*

# Exclude Git files
**/.git
**/.github
**/.gitignore

# Exclude Python cache files
__pycache__
.mypy_cache
.pytest_cache
.ruff_cache

# Exclude Python virtual environment
/venv
16 changes: 16 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
# Byte-compiled / optimized / DLL files
__pycache__/

# .DS_Store
.DS_Store

# Replicate
.cog
/ckpts/*
*.pt
*.mp4
output.*.mp4
output.*.png
output.*.webp
output.*.jpg
output.*.jpeg
2 changes: 2 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,8 @@
<a href="https://github.com/Tencent/HunyuanVideo/blob/main/assets/hunyuanvideo.pdf"><img src="https://img.shields.io/static/v1?label=Tech Report&message=Arxiv:HunyuanVideo&color=red&logo=arxiv"></a> &ensp;
<a href="https://huggingface.co/tencent/HunyuanVideo"><img src="https://img.shields.io/static/v1?label=HunyuanVideo&message=HuggingFace&color=yellow"></a> &ensp; &ensp;
<a href="https://huggingface.co/tencent/HunyuanVideo-PromptRewrite"><img src="https://img.shields.io/static/v1?label=HunyuanVideo-PromptRewrite&message=HuggingFace&color=yellow"></a> &ensp; &ensp;

[![Replicate](https://replicate.com/zsxkib/hunyuan-video/badge)](https://replicate.com/zsxkib/hunyuan-video)
</div>

-----
Expand Down
38 changes: 38 additions & 0 deletions cog.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
# Configuration for Cog ⚙️
# Reference: https://cog.run/yaml

build:
# set to true if your model requires a GPU
gpu: true
cuda: "12.1"

# python version in the form '3.11' or '3.11.4'
python_version: "3.10.9"

# a list of packages in the format <package-name>==<version>
python_packages:
- "torch==2.1.1"
- "torchvision==0.16.1"
- "opencv-python==4.9.0.80"
- "diffusers==0.30.2"
- "transformers==4.46.3" # was "transformers==4.39.3" before
- "tokenizers==0.20.3" # was "tokenizers==0.15.2" before
- "accelerate==1.1.1"
- "pandas==2.0.3"
- "numpy==1.24.4"
- "einops==0.7.0"
- "tqdm==4.66.2"
- "loguru==0.7.2"
- "imageio==2.34.0"
- "imageio-ffmpeg==0.5.1"
- "safetensors==0.4.3"
# - "git+https://github.com/Dao-AILab/[email protected]"

# commands run after the environment is setup
run:
# - python -m pip install git+https://github.com/Dao-AILab/[email protected]
- FLASH_ATTENTION_SKIP_CUDA_BUILD=TRUE pip install flash-attn --no-build-isolation
- curl -o /usr/local/bin/pget -L "https://github.com/replicate/pget/releases/download/v0.8.2/pget_linux_x86_64" && chmod +x /usr/local/bin/pget

# predict.py defines how predictions are run on your model
predict: "predict.py:Predictor"
210 changes: 210 additions & 0 deletions predict.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,210 @@
# Prediction interface for Cog ⚙️
# https://cog.run/python


from argparse import Namespace
import shutil
import subprocess
import time
from cog import BasePredictor, Input, Path
import os
import torch
import imageio
import torchvision
from einops import rearrange

from hyvideo.inference import HunyuanVideoSampler

MODEL_CACHE = "ckpts"
BASE_URL = f"https://weights.replicate.delivery/default/hunyuan-video/{MODEL_CACHE}/"


def download_weights(url: str, dest: str) -> None:
start = time.time()
print("[!] Initiating download from URL: ", url)
print("[~] Destination path: ", dest)
if ".tar" in dest:
dest = os.path.dirname(dest)
command = ["pget", "-vf" + ("x" if ".tar" in url else ""), url, dest]
try:
print(f"[~] Running command: {' '.join(command)}")
subprocess.check_call(command, close_fds=False)
except subprocess.CalledProcessError as e:
print(
f"[ERROR] Failed to download weights. Command '{' '.join(e.cmd)}' returned non-zero exit status {e.returncode}."
)
raise
print("[+] Download completed in: ", time.time() - start, "seconds")


def save_videos_grid(videos: torch.Tensor, path: str, rescale=False, n_rows=1, fps=24):
"""save videos by video tensor"""
videos = rearrange(videos, "b c t h w -> t b c h w")
outputs = []
for idx, x in enumerate(videos):
x = torchvision.utils.make_grid(x, nrow=n_rows)
x = x.permute(1, 2, 0) # Convert to HWC
if rescale:
x = (x + 1.0) / 2.0 # -1,1 -> 0,1
x = torch.clamp(x, 0, 1)
x = (x * 255).byte().cpu().numpy()
outputs.append(x)

# Create frames directory
frames_dir = os.path.join(os.path.dirname(path), "frames_temp")
os.makedirs(frames_dir, exist_ok=True)

# Save frames as images
for i, frame in enumerate(outputs):
frame_path = os.path.join(frames_dir, f"frame_{i:05d}.png")
imageio.imwrite(frame_path, frame)

# Build the ffmpeg command
frame_pattern = os.path.join(frames_dir, "frame_%05d.png")
ffmpeg_cmd = f'ffmpeg -y -framerate {fps} -i "{frame_pattern}" -c:v libx264 -pix_fmt yuv420p "{path}"'

# Run the ffmpeg command
os.system(ffmpeg_cmd)

# Clean up frames directory
shutil.rmtree(frames_dir)


class Predictor(BasePredictor):
def setup(self):
"""Load the model into memory to make running multiple predictions efficient"""
os.makedirs(MODEL_CACHE, exist_ok=True)
model_files = [
"hunyuan-video-t2v-720p.tar",
"text_encoder.tar",
"text_encoder_2.tar",
]
for model_file in model_files:
url = BASE_URL + model_file
filename = url.split("/")[-1]
dest_path = os.path.join(MODEL_CACHE, filename)
if not os.path.exists(dest_path.replace(".tar", "")):
download_weights(url, dest_path)

args_dict = {
"model": "HYVideo-T/2-cfgdistill",
"latent_channels": 16,
"precision": "bf16",
"rope_theta": 256,
"vae": "884-16c-hy",
"vae_precision": "fp16",
"vae_tiling": True,
"text_encoder": "llm",
"text_encoder_precision": "fp16",
"text_states_dim": 4096,
"text_len": 256,
"tokenizer": "llm",
"prompt_template": "dit-llm-encode",
"prompt_template_video": "dit-llm-encode-video",
"hidden_state_skip_layer": 2,
"apply_final_norm": False,
"text_encoder_2": "clipL",
"text_encoder_precision_2": "fp16",
"text_states_dim_2": 768,
"tokenizer_2": "clipL",
"text_len_2": 77,
"denoise_type": "flow",
"flow_solver": "euler",
"flow_shift": 7.0,
"flow_reverse": True,
"use_linear_quadratic_schedule": False,
"linear_schedule_end": 25,
"use_cpu_offload": True,
"batch_size": 1,
"disable_autocast": False,
"cfg_scale": 1.0,
"embedded_cfg_scale": 6.0,
"reproduce": False,
"model_base": "ckpts",
"dit_weight": "ckpts/hunyuan-video-t2v-720p/transformers/mp_rank_00_model_states.pt",
"model_resolution": "540p",
"load_key": "module",
"save_path": "./results",
"save_path_suffix": "",
"name_suffix": "",
"num_videos": 1,
"video_size": [480, 854],
"seed_type": "auto",
"video_length": 129,
"infer_steps": 50,
"prompt": "A cat walks on the grass, realistic style.",
"seed": 65025,
"neg_prompt": None,
}
args = Namespace(**args_dict)
# Initialize the video sampler
self.hunyuan_video_sampler = HunyuanVideoSampler.from_pretrained(
Path(MODEL_CACHE),
args=args,
)

def predict(
self,
prompt: str = Input(
description="Text prompt to generate video.",
default="A cat walks on the grass, realistic style.",
),
width: int = Input(
description="Width of the video in pixels.", default=854, ge=1
),
height: int = Input(
description="Height of the video in pixels.", default=480, ge=1
),
video_length: int = Input(
description="Length of the video in frames.", default=129, ge=1
),
infer_steps: int = Input(
description="Number of inference steps.", default=50, ge=1
),
flow_shift: float = Input(description="Flow-shift parameter.", default=7.0),
embedded_guidance_scale: float = Input(
description="Embedded guidance scale for generation.",
default=6.0,
ge=1.0,
le=6.0,
),
seed: int = Input(description="Random seed for reproducibility.", default=None),
) -> Path:
"""Run a single prediction on the model."""
if seed is None:
seed = int.from_bytes(os.urandom(2), "big")
print(f"Using seed: {seed}")

# Update video_size in the sampler's args to match the requested dimensions
self.hunyuan_video_sampler.args.video_size = [height, width]

# Create save path and clear any existing files
save_path = "./results"
if os.path.exists(save_path):
shutil.rmtree(save_path)
os.makedirs(save_path)

# Generate video using HunyuanVideoSampler
outputs = self.hunyuan_video_sampler.predict(
prompt=prompt,
height=height,
width=width,
video_length=video_length,
seed=seed,
negative_prompt=None,
infer_steps=infer_steps,
guidance_scale=1.0,
num_videos_per_prompt=1,
flow_shift=flow_shift,
batch_size=1,
embedded_guidance_scale=embedded_guidance_scale,
)

samples = outputs["samples"]

# Save the generated video
sample = samples[0].unsqueeze(0)
output_path = f"{save_path}/video.mp4"
save_videos_grid(sample, output_path, fps=24)

return Path(output_path)