initial commit

2b4729da · kijai · 8e536752 · 2b4729da · 2b4729da · 2b4729da
--- a/.gitignore
+++ b/.gitignore
+# Byte-compiled / optimized / DLL files
+__pycache__/
+*.py[cod]
+*$py.class
+
+# C extensions
+*.so
+
+# Distribution / packaging
+.Python
+build/
+develop-eggs/
+dist/
+downloads/
+eggs/
+.eggs/
+lib/
+lib64/
+parts/
+sdist/
+var/
+wheels/
+share/python-wheels/
+*.egg-info/
+.installed.cfg
+*.egg
+MANIFEST
+
+# PyInstaller
+#  Usually these files are written by a python script from a template
+#  before PyInstaller builds the exe, so as to inject date/other infos into it.
+*.manifest
+*.spec
+
+# Installer logs
+pip-log.txt
+pip-delete-this-directory.txt
+
+# Unit test / coverage reports
+htmlcov/
+.tox/
+.nox/
+.coverage
+.coverage.*
+.cache
+nosetests.xml
+coverage.xml
+*.cover
+*.py,cover
+.hypothesis/
+.pytest_cache/
+cover/
+
+# Translations
+*.mo
+*.pot
+
+# Django stuff:
+*.log
+local_settings.py
+db.sqlite3
+db.sqlite3-journal
+
+# Flask stuff:
+instance/
+.webassets-cache
+
+# Scrapy stuff:
+.scrapy
+
+# Sphinx documentation
+docs/_build/
+
+# PyBuilder
+.pybuilder/
+target/
+
+# Jupyter Notebook
+.ipynb_checkpoints
+
+# IPython
+profile_default/
+ipython_config.py
+
+# pyenv
+#   For a library or package, you might want to ignore these files since the code is
+#   intended to run in multiple environments; otherwise, check them in:
+# .python-version
+
+# pipenv
+#   According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
+#   However, in case of collaboration, if having platform-specific dependencies or dependencies
+#   having no cross-platform support, pipenv may install dependencies that don't work, or not
+#   install all needed dependencies.
+#Pipfile.lock
+
+# poetry
+#   Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
+#   This is especially recommended for binary packages to ensure reproducibility, and is more
+#   commonly ignored for libraries.
+#   https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
+#poetry.lock
+
+# PEP 582; used by e.g. github.com/David-OConnor/pyflow
+__pypackages__/
+
+# Celery stuff
+celerybeat-schedule
+celerybeat.pid
+
+# SageMath parsed files
+*.sage.py
+
+# Environments
+.env
+.venv
+env/
+venv/
+ENV/
+env.bak/
+venv.bak/
+
+# Spyder project settings
+.spyderproject
+.spyproject
+
+# Rope project settings
+.ropeproject
+
+# mkdocs documentation
+#/site
+
+# mypy
+.mypy_cache/
+.dmypy.json
+dmypy.json
+
+# Pyre type checker
+.pyre/
+
+# pytype static type analyzer
+.pytype/
+
+# Cython debug symbols
+cython_debug/
+
+# PyCharm
+#  JetBrains specific template is maintained in a separate JetBrains.gitignore that can
+#  be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
+#  and can be added to the global gitignore or merged into this file.  For a more nuclear
+#  option (not recommended) you can uncomment the following to ignore the entire idea folder.
+.idea/
+
+# custom ignores
+.DS_Store
+_.*
+
+# models and outputs
+models/
+outputs/
--- a/LICENSE
+++ b/LICENSE
--- a/README.md
+++ b/README.md
+# WORK IN PROGRESS
+# MimicMotion wrapper for ComfyUI
+
+needs SVD 1.1 to ComfyUI/models/diffusers https://huggingface.co/stabilityai/stable-video-diffusion-img2vid-xt-1-1/tree/main
+the whole folder, but subfolders only need one of the .safetensors + config.json
\ No newline at end of file
--- a/__init__.py
+++ b/__init__.py
+from .nodes import NODE_CLASS_MAPPINGS, NODE_DISPLAY_NAME_MAPPINGS
+
+__all__ = ["NODE_CLASS_MAPPINGS", "NODE_DISPLAY_NAME_MAPPINGS"]
\ No newline at end of file
--- a/assets/example_data/images/demo1.jpg
+++ b/assets/example_data/images/demo1.jpg
--- a/assets/example_data/videos/pose1.mp4
+++ b/assets/example_data/videos/pose1.mp4
--- a/configs/test.yaml
+++ b/configs/test.yaml
+# base svd model path
+base_model_path: models/SVD/stable-video-diffusion-img2vid-xt-1-1
+
+# checkpoint path
+ckpt_path: models/MimicMotion.pth
+
+test_case:
+  - ref_video_path: assets/example_data/videos/pose1.mp4
+    ref_image_path: assets/example_data/images/demo1.jpg
+    num_frames: 16
+    resolution: 576
+    frames_overlap: 6
+    num_inference_steps: 25
+    noise_aug_strength: 0
+    guidance_scale: 2.0
+    sample_stride: 2
+    fps: 15
+    seed: 42
+
+
--- a/constants.py
+++ b/constants.py
+# w/h apsect ratio
+ASPECT_RATIO = 9 / 16
--- a/environment.yaml
+++ b/environment.yaml
+name: mimicmotion
+channels:
+  - pytorch
+  - nvidia
+dependencies:
+  - python=3.11
+  - pytorch=2.0.1
+  - torchvision=0.15.2
+  - pytorch-cuda=11.7
+  - pip
+  - pip:
+    - diffusers==0.27.0
+    - transformers==4.32.1
+    - decord==0.6.0
+    - einops
+    - omegaconf
--- a/inference.py
+++ b/inference.py
+import os
+import argparse
+import logging
+import math
+from omegaconf import OmegaConf
+from datetime import datetime
+from pathlib import Path
+
+import numpy as np
+import torch.jit
+from torchvision.datasets.folder import pil_loader
+from torchvision.transforms.functional import pil_to_tensor, resize, center_crop
+from torchvision.transforms.functional import to_pil_image
+
+
+from constants import ASPECT_RATIO
+
+from mimicmotion.pipelines.pipeline_mimicmotion import MimicMotionPipeline
+from mimicmotion.utils.loader import create_pipeline
+from mimicmotion.utils.utils import save_to_mp4
+from mimicmotion.dwpose.preprocess import get_video_pose, get_image_pose
+
+logging.basicConfig(level=logging.INFO, format="%(asctime)s: [%(levelname)s] %(message)s")
+logger = logging.getLogger(__name__)
+device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+
+
+def preprocess(video_path, image_path, resolution=576, sample_stride=2):
+    """preprocess ref image pose and video pose
+
+    Args:
+        video_path (str): input video pose path
+        image_path (str): reference image path
+        resolution (int, optional):  Defaults to 576.
+        sample_stride (int, optional): Defaults to 2.
+    """
+    image_pixels = pil_loader(image_path)
+    image_pixels = pil_to_tensor(image_pixels) # (c, h, w)
+    h, w = image_pixels.shape[-2:]
+    ############################ compute target h/w according to original aspect ratio ###############################
+    if h>w:
+        w_target, h_target = resolution, int(resolution / ASPECT_RATIO // 64) * 64
+    else:
+        w_target, h_target = int(resolution / ASPECT_RATIO // 64) * 64, resolution
+    h_w_ratio = float(h) / float(w)
+    if h_w_ratio < h_target / w_target:
+        h_resize, w_resize = h_target, math.ceil(h_target / h_w_ratio)
+    else:
+        h_resize, w_resize = math.ceil(w_target * h_w_ratio), w_target
+    image_pixels = resize(image_pixels, [h_resize, w_resize], antialias=None)
+    image_pixels = center_crop(image_pixels, [h_target, w_target])
+    image_pixels = image_pixels.permute((1, 2, 0)).numpy()
+    ##################################### get image&video pose value #################################################
+    image_pose = get_image_pose(image_pixels)
+    video_pose = get_video_pose(video_path, image_pixels, sample_stride=sample_stride)
+    pose_pixels = np.concatenate([np.expand_dims(image_pose, 0), video_pose])
+    image_pixels = np.transpose(np.expand_dims(image_pixels, 0), (0, 3, 1, 2))
+    return torch.from_numpy(pose_pixels.copy()) / 127.5 - 1, torch.from_numpy(image_pixels) / 127.5 - 1
+
+
+def run_pipeline(pipeline: MimicMotionPipeline, image_pixels, pose_pixels, device, task_config):
+    image_pixels = [to_pil_image(img.to(torch.uint8)) for img in (image_pixels + 1.0) * 127.5]
+    pose_pixels = pose_pixels.unsqueeze(0).to(device)
+    generator = torch.Generator(device=device)
+    generator.manual_seed(task_config.seed)
+    frames = pipeline(
+        image_pixels, image_pose=pose_pixels, num_frames=pose_pixels.size(1),
+        tile_size=task_config.num_frames, tile_overlap=task_config.frames_overlap,
+        height=pose_pixels.shape[-2], width=pose_pixels.shape[-1], fps=7,
+        noise_aug_strength=task_config.noise_aug_strength, num_inference_steps=task_config.num_inference_steps,
+        generator=generator, min_guidance_scale=task_config.guidance_scale, 
+        max_guidance_scale=task_config.guidance_scale, decode_chunk_size=8, output_type="pt", device=device
+    ).frames.cpu()
+    video_frames = (frames * 255.0).to(torch.uint8)
+
+    for vid_idx in range(video_frames.shape[0]):
+        # deprecated first frame because of ref image
+        _video_frames = video_frames[vid_idx, 1:]
+
+    return _video_frames
+
+
+@torch.no_grad()
+def main(args):
+    if not args.no_use_float16 :
+        torch.set_default_dtype(torch.float16)
+
+    infer_config = OmegaConf.load(args.inference_config)
+    pipeline = create_pipeline(infer_config, device)
+
+    for task in infer_config.test_case:
+        ############################################## Pre-process data ##############################################
+        pose_pixels, image_pixels = preprocess(
+            task.ref_video_path, task.ref_image_path, 
+            resolution=task.resolution, sample_stride=task.sample_stride
+        )
+        ########################################### Run MimicMotion pipeline ###########################################
+        _video_frames = run_pipeline(
+            pipeline, 
+            image_pixels, pose_pixels, 
+            device, task
+        )
+        ################################### save results to output folder. ###########################################
+        save_to_mp4(
+            _video_frames, 
+            f"{args.output_dir}/{os.path.basename(task.ref_video_path).split('.')[0]}" \
+            f"_{datetime.now().strftime('%Y%m%d%H%M%S')}.mp4",
+            fps=task.fps,
+        )
+
+def set_logger(log_file=None, log_level=logging.INFO):
+    log_handler = logging.FileHandler(log_file, "w")
+    log_handler.setFormatter(
+        logging.Formatter("[%(asctime)s][%(name)s][%(levelname)s]: %(message)s")
+    )
+    log_handler.setLevel(log_level)
+    logger.addHandler(log_handler)
+
+
+if __name__ == "__main__":    
+    parser = argparse.ArgumentParser()
+    parser.add_argument("--log_file", type=str, default=None)
+    parser.add_argument("--inference_config", type=str, default="configs/test.yaml") #ToDo
+    parser.add_argument("--output_dir", type=str, default="outputs/", help="path to output")
+    parser.add_argument("--no_use_float16",
+                        action="store_true",
+                        help="Whether use float16 to speed up inference",
+    )
+    args = parser.parse_args()
+
+    Path(args.output_dir).mkdir(parents=True, exist_ok=True)
+    set_logger(args.log_file \
+               if args.log_file is not None else f"{args.output_dir}/{datetime.now().strftime('%Y%m%d%H%M%S')}.log")
+    main(args)
+    logger.info(f"--- Finished ---")
+
--- a/mimicmotion/__init__.py
+++ b/mimicmotion/__init__.py
--- a/mimicmotion/modules/__init__.py
+++ b/mimicmotion/modules/__init__.py
--- a/mimicmotion/modules/attention.py
+++ b/mimicmotion/modules/attention.py
--- a/mimicmotion/modules/pose_net.py
+++ b/mimicmotion/modules/pose_net.py
+from pathlib import Path
+
+import einops
+import numpy as np
+import torch
+import torch.nn as nn
+import torch.nn.init as init
+
+
+class PoseNet(nn.Module):
+    """a tiny conv network for introducing pose sequence as the condition
+    """
+    def __init__(self, noise_latent_channels=320, *args, **kwargs):
+        super().__init__(*args, **kwargs)
+        # multiple convolution layers
+        self.conv_layers = nn.Sequential(
+            nn.Conv2d(in_channels=3, out_channels=3, kernel_size=3, padding=1),
+            nn.SiLU(),
+            nn.Conv2d(in_channels=3, out_channels=16, kernel_size=4, stride=2, padding=1),
+            nn.SiLU(),
+
+            nn.Conv2d(in_channels=16, out_channels=16, kernel_size=3, padding=1),
+            nn.SiLU(),
+            nn.Conv2d(in_channels=16, out_channels=32, kernel_size=4, stride=2, padding=1),
+            nn.SiLU(),
+
+            nn.Conv2d(in_channels=32, out_channels=32, kernel_size=3, padding=1),
+            nn.SiLU(),
+            nn.Conv2d(in_channels=32, out_channels=64, kernel_size=4, stride=2, padding=1),
+            nn.SiLU(),
+
+            nn.Conv2d(in_channels=64, out_channels=64, kernel_size=3, padding=1),
+            nn.SiLU(),
+            nn.Conv2d(in_channels=64, out_channels=128, kernel_size=3, stride=1, padding=1),
+            nn.SiLU()
+        )
+
+        # Final projection layer
+        self.final_proj = nn.Conv2d(in_channels=128, out_channels=noise_latent_channels, kernel_size=1)
+
+        # Initialize layers
+        self._initialize_weights()
+
+        self.scale = nn.Parameter(torch.ones(1) * 2)
+
+    def _initialize_weights(self):
+        """Initialize weights with He. initialization and zero out the biases
+        """
+        for m in self.conv_layers:
+            if isinstance(m, nn.Conv2d):
+                n = m.kernel_size[0] * m.kernel_size[1] * m.in_channels
+                init.normal_(m.weight, mean=0.0, std=np.sqrt(2. / n))
+                if m.bias is not None:
+                    init.zeros_(m.bias)
+        init.zeros_(self.final_proj.weight)
+        if self.final_proj.bias is not None:
+            init.zeros_(self.final_proj.bias)
+
+    def forward(self, x):
+        if x.ndim == 5:
+            x = einops.rearrange(x, "b f c h w -> (b f) c h w")
+        x = self.conv_layers(x)
+        x = self.final_proj(x)
+
+        return x * self.scale
+
+    @classmethod
+    def from_pretrained(cls, pretrained_model_path):
+        """load pretrained pose-net weights
+        """
+        if not Path(pretrained_model_path).exists():
+            print(f"There is no model file in {pretrained_model_path}")
+        print(f"loaded PoseNet's pretrained weights from {pretrained_model_path}.")
+
+        state_dict = torch.load(pretrained_model_path, map_location="cpu")
+        model = PoseNet(noise_latent_channels=320)
+
+        model.load_state_dict(state_dict, strict=True)
+
+        return model
--- a/mimicmotion/modules/unet.py
+++ b/mimicmotion/modules/unet.py
--- a/mimicmotion/pipelines/pipeline_mimicmotion.py
+++ b/mimicmotion/pipelines/pipeline_mimicmotion.py
--- a/mimicmotion/utils/__init__.py
+++ b/mimicmotion/utils/__init__.py
--- a/mimicmotion/utils/loader.py
+++ b/mimicmotion/utils/loader.py
+import logging
+
+import torch
+import torch.utils.checkpoint
+from diffusers.models import AutoencoderKLTemporalDecoder
+from diffusers.schedulers import EulerDiscreteScheduler
+from transformers import CLIPImageProcessor, CLIPVisionModelWithProjection
+
+from ..modules.unet import UNetSpatioTemporalConditionModel
+from ..modules.pose_net import PoseNet
+from ..pipelines.pipeline_mimicmotion import MimicMotionPipeline
+
+logger = logging.getLogger(__name__)
+
+class MimicMotionModel(torch.nn.Module):
+    def __init__(self, base_model_path):
+        """construnct base model components and load pretrained svd model except pose-net
+        Args:
+            base_model_path (str): pretrained svd model path
+        """
+        super().__init__()
+        self.unet = UNetSpatioTemporalConditionModel.from_config(
+            UNetSpatioTemporalConditionModel.load_config(base_model_path, subfolder="unet"))
+        self.vae = AutoencoderKLTemporalDecoder.from_pretrained(
+            base_model_path, subfolder="vae").half()
+        self.image_encoder = CLIPVisionModelWithProjection.from_pretrained(
+            base_model_path, subfolder="image_encoder")
+        self.noise_scheduler = EulerDiscreteScheduler.from_pretrained(
+            base_model_path, subfolder="scheduler")
+        self.feature_extractor = CLIPImageProcessor.from_pretrained(
+            base_model_path, subfolder="feature_extractor")
+        # pose_net
+        self.pose_net = PoseNet(noise_latent_channels=self.unet.config.block_out_channels[0])
+
+def create_pipeline(infer_config, device):
+    """create mimicmotion pipeline and load pretrained weight
+
+    Args:
+        infer_config (str): 
+        device (str or torch.device): "cpu" or "cuda:{device_id}"
+    """
+    mimicmotion_models = MimicMotionModel(infer_config.base_model_path).to(device=device).eval()
+    mimicmotion_models.load_state_dict(torch.load(infer_config.ckpt_path, map_location=device), strict=False)
+    pipeline = MimicMotionPipeline(
+        vae=mimicmotion_models.vae, 
+        image_encoder=mimicmotion_models.image_encoder, 
+        unet=mimicmotion_models.unet, 
+        scheduler=mimicmotion_models.noise_scheduler,
+        feature_extractor=mimicmotion_models.feature_extractor, 
+        pose_net=mimicmotion_models.pose_net
+    )
+    return pipeline
+
--- a/mimicmotion/utils/utils.py
+++ b/mimicmotion/utils/utils.py
+import logging
+from pathlib import Path
+
+from torchvision.io import write_video
+
+logger = logging.getLogger(__name__)
+
+def save_to_mp4(frames, save_path, fps=7):
+    frames = frames.permute((0, 2, 3, 1))  # (f, c, h, w) to (f, h, w, c)
+    Path(save_path).parent.mkdir(parents=True, exist_ok=True)
+    write_video(save_path, frames, fps=fps)
+
--- a/nodes.py
+++ b/nodes.py
+import os
+from omegaconf import OmegaConf
+import torch
+import torch.nn.functional as F
+import sys
+
+script_directory = os.path.dirname(os.path.abspath(__file__))
+sys.path.append(script_directory)
+
+from einops import repeat
+import folder_paths
+import comfy.model_management as mm
+import comfy.utils
+
+from contextlib import nullcontext
+try:
+    from accelerate import init_empty_weights
+    is_accelerate_available = True
+except:
+    pass
+
+from mimicmotion.pipelines.pipeline_mimicmotion import MimicMotionPipeline
+
+from diffusers.models import AutoencoderKLTemporalDecoder
+from diffusers.schedulers import EulerDiscreteScheduler
+from transformers import CLIPImageProcessor, CLIPVisionModelWithProjection
+
+from mimicmotion.modules.unet import UNetSpatioTemporalConditionModel
+from mimicmotion.modules.pose_net import PoseNet
+from mimicmotion.pipelines.pipeline_mimicmotion import MimicMotionPipeline
+
+class MimicMotionModel(torch.nn.Module):
+    def __init__(self, base_model_path):
+        """construnct base model components and load pretrained svd model except pose-net
+        Args:
+            base_model_path (str): pretrained svd model path
+        """
+        super().__init__()
+        self.unet = UNetSpatioTemporalConditionModel.from_config(
+            UNetSpatioTemporalConditionModel.load_config(base_model_path, subfolder="unet", variant="fp16"))
+        self.vae = AutoencoderKLTemporalDecoder.from_pretrained(
+            base_model_path, subfolder="vae", variant="fp16")
+        self.image_encoder = CLIPVisionModelWithProjection.from_pretrained(
+            base_model_path, subfolder="image_encoder", variant="fp16")
+        self.noise_scheduler = EulerDiscreteScheduler.from_pretrained(
+            base_model_path, subfolder="scheduler")
+        self.feature_extractor = CLIPImageProcessor.from_pretrained(
+            base_model_path, subfolder="feature_extractor")
+        # pose_net
+        self.pose_net = PoseNet(noise_latent_channels=self.unet.config.block_out_channels[0])
+
+class DownloadAndLoadMimicMotionModel:
+    @classmethod
+    def INPUT_TYPES(s):
+        return {"required": {
+            "model": (
+                    [   'MimicMotion-fp16.safetensors',
+                    ],
+                    ),
+            "precision": (
+                    [
+                        'fp32',
+                        'fp16',
+                        'bf16',
+                    ], {
+                        "default": 'fp16'
+                    }),
+            },
+        }
+
+    RETURN_TYPES = ("MIMICPIPE",)
+    RETURN_NAMES = ("mimic_pipeline",)
+    FUNCTION = "loadmodel"
+    CATEGORY = "MimicMotionWrapper"
+
+    def loadmodel(self, precision, model):
+        device = mm.get_torch_device()
+        mm.soft_empty_cache()
+        dtype = {"bf16": torch.bfloat16, "fp16": torch.float16, "fp32": torch.float32}[precision]
+        
+        download_path = os.path.join(folder_paths.models_dir, "mimicmotion")
+        model_path = os.path.join(download_path, model)
+        
+        if not os.path.exists(model_path):
+            print(f"Downloading model to: {model_path}")
+            from huggingface_hub import snapshot_download
+            snapshot_download(repo_id="Kijai/MimicMotion_pruned", 
+                                allow_patterns=[f"*{model}*"],
+                                local_dir=download_path, 
+                                local_dir_use_symlinks=False)
+
+        ckpt_base_name = os.path.basename(model_path)
+        print(f"Loading model from: {model_path}")
+
+        svd_path = os.path.join(folder_paths.models_dir, "diffusers", "stable-video-diffusion-img2vid-xt-1-1")
+
+        if not os.path.exists(svd_path):
+            raise ValueError(f"Please download stable-video-diffusion-img2vid-xt-1-1 to {svd_path}")
+
+        mimicmotion_models = MimicMotionModel(svd_path).to(device=device).eval()
+        mimicmotion_models.load_state_dict(comfy.utils.load_torch_file(model_path), strict=False)
+
+        pipeline = MimicMotionPipeline(
+            vae=mimicmotion_models.vae, 
+            image_encoder=mimicmotion_models.image_encoder, 
+            unet=mimicmotion_models.unet, 
+            scheduler=mimicmotion_models.noise_scheduler,
+            feature_extractor=mimicmotion_models.feature_extractor, 
+            pose_net=mimicmotion_models.pose_net,
+        )
+        pipeline.unet.to(dtype)
+        pipeline.pose_net.to(dtype)
+        pipeline.vae.to(dtype)
+        pipeline.image_encoder.to(dtype)
+        pipeline.pose_net.to(dtype)        
+
+        mimic_model = {
+            'pipeline': pipeline,
+            'dtype': dtype
+        }
+        return (mimic_model,)
+    
+class MimicMotionSampler:
+    @classmethod
+    def INPUT_TYPES(s):
+        return {"required": {
+            "mimic_pipeline": ("MIMICPIPE",),
+            "ref_image": ("IMAGE",),
+            "pose_images": ("IMAGE",),
+            "steps": ("INT", {"default": 25, "min": 1, "max": 200, "step": 1}),
+            "cfg_min": ("FLOAT", {"default": 2.0, "min": 0.0, "max": 20.0, "step": 0.01}),
+            "cfg_max": ("FLOAT", {"default": 2.0, "min": 0.0, "max": 20.0, "step": 0.01}),
+            "seed": ("INT", {"default": 0, "min": 0, "max": 0xffffffffffffffff}),
+            "fps": ("INT", {"default": 15, "min": 2, "max": 100, "step": 1}),
+            "noise_aug_strength": ("FLOAT", {"default": 0.0, "min": 0.0, "max": 10.0, "step": 0.01}),
+            "keep_model_loaded": ("BOOLEAN", {"default": True}),            
+            },
+        }
+
+    RETURN_TYPES = ("IMAGE",)
+    RETURN_NAMES = ("images",)
+    FUNCTION = "process"
+    CATEGORY = "MimicMotionWrapper"
+
+    def process(self, mimic_pipeline, ref_image, pose_images, cfg_min, cfg_max, steps, seed, noise_aug_strength, fps, keep_model_loaded):
+        device = mm.get_torch_device()
+        offload_device = mm.unet_offload_device()
+        mm.unload_all_models()
+        mm.soft_empty_cache()
+        dtype = mimic_pipeline['dtype']
+        pipeline = mimic_pipeline['pipeline']
+
+        B, H, W, C = pose_images.shape
+        ref_image = ref_image.permute(0, 3, 1, 2).to(device).to(dtype)
+        pose_images = pose_images.permute(0, 3, 1, 2).to(device).to(dtype)
+        ref_image = ref_image * 2 - 1
+        pose_images = pose_images * 2 - 1
+
+        generator = torch.Generator(device=device)
+        generator.manual_seed(seed)
+
+        frames = pipeline(
+            ref_image, 
+            image_pose=pose_images, 
+            num_frames=B,
+            tile_size = 16, 
+            tile_overlap= 6,
+            height=H,
+            width=W, 
+            fps=fps,
+            noise_aug_strength=noise_aug_strength, 
+            num_inference_steps=steps,
+            generator=generator,
+            min_guidance_scale=cfg_min, 
+            max_guidance_scale=cfg_max, 
+            decode_chunk_size=8, 
+            output_type="pt", 
+            device=device
+        ).frames
+        frames = frames.squeeze(0).permute(0, 2, 3, 1).cpu().float()
+        print(frames.shape)
+
+        return frames,
+    
+
+NODE_CLASS_MAPPINGS = {
+    "DownloadAndLoadMimicMotionModel": DownloadAndLoadMimicMotionModel,
+    "MimicMotionSampler": MimicMotionSampler,
+
+}
+NODE_DISPLAY_NAME_MAPPINGS = {
+    "DownloadAndLoadMimicMotionModel": "DownloadAndLoadMimicMotionModel",
+    "MimicMotionSampler": "MimicMotionSampler",
+}
--- a/requirements.txt
+++ b/requirements.txt
+diffusers>=0.27.0
+transformers>=4.32.1
\ No newline at end of file