提交 246e53a6 authored 作者: kijai's avatar kijai

support 1.1 and use torchscript for dwpose

上级 793ee7a9
......@@ -14,7 +14,7 @@
"1": 86
},
"flags": {},
"order": 7,
"order": 6,
"mode": 0,
"inputs": [
{
......@@ -56,40 +56,6 @@
"Node name for S&R": "GetImageSizeAndCount"
}
},
{
"id": 2,
"type": "DownloadAndLoadMimicMotionModel",
"pos": [
764,
229
],
"size": {
"0": 315,
"1": 106
},
"flags": {},
"order": 0,
"mode": 0,
"outputs": [
{
"name": "mimic_pipeline",
"type": "MIMICPIPE",
"links": [
146,
150
],
"shape": 3
}
],
"properties": {
"Node name for S&R": "DownloadAndLoadMimicMotionModel"
},
"widgets_values": [
"MimicMotion-fp16.safetensors",
"fp16",
false
]
},
{
"id": 42,
"type": "MimicMotionGetPoses",
......@@ -102,7 +68,7 @@
"1": 126
},
"flags": {},
"order": 6,
"order": 5,
"mode": 0,
"inputs": [
{
......@@ -158,7 +124,7 @@
"1": 410.70074462890625
},
"flags": {},
"order": 1,
"order": 0,
"mode": 0,
"outputs": [
{
......@@ -197,7 +163,7 @@
"1": 242
},
"flags": {},
"order": 5,
"order": 4,
"mode": 0,
"inputs": [
{
......@@ -276,7 +242,7 @@
"1": 242
},
"flags": {},
"order": 4,
"order": 3,
"mode": 0,
"inputs": [
{
......@@ -359,10 +325,10 @@
],
"size": [
2861.660400390625,
1566.960177951389
1586.960177951389
],
"flags": {},
"order": 12,
"order": 11,
"mode": 0,
"inputs": [
{
......@@ -379,6 +345,11 @@
"name": "meta_batch",
"type": "VHS_BatchManager",
"link": null
},
{
"name": "vae",
"type": "VAE",
"link": null
}
],
"outputs": [
......@@ -406,10 +377,11 @@
"hidden": false,
"paused": false,
"params": {
"filename": "MimicMotion_00001.mp4",
"filename": "MimicMotion_00002.mp4",
"subfolder": "",
"type": "temp",
"format": "video/h264-mp4"
"format": "video/h264-mp4",
"frame_rate": 12
}
}
}
......@@ -426,7 +398,7 @@
"1": 190
},
"flags": {},
"order": 11,
"order": 10,
"mode": 0,
"inputs": [
{
......@@ -476,12 +448,12 @@
1466,
396
],
"size": [
255.46680297851572,
78
],
"size": {
"0": 255.466796875,
"1": 78
},
"flags": {},
"order": 10,
"order": 9,
"mode": 0,
"inputs": [
{
......@@ -514,64 +486,6 @@
4
]
},
{
"id": 57,
"type": "MimicMotionSampler",
"pos": [
1101,
419
],
"size": {
"0": 315,
"1": 314
},
"flags": {},
"order": 9,
"mode": 0,
"inputs": [
{
"name": "mimic_pipeline",
"type": "MIMICPIPE",
"link": 146
},
{
"name": "ref_image",
"type": "IMAGE",
"link": 147
},
{
"name": "pose_images",
"type": "IMAGE",
"link": 148
}
],
"outputs": [
{
"name": "samples",
"type": "LATENT",
"links": [
149
],
"shape": 3,
"slot_index": 0
}
],
"properties": {
"Node name for S&R": "MimicMotionSampler"
},
"widgets_values": [
25,
2,
2,
42,
"fixed",
15,
0,
16,
6,
false
]
},
{
"id": 37,
"type": "VHS_VideoCombine",
......@@ -581,10 +495,10 @@
],
"size": [
440,
978.6666666666666
998.6666666666666
],
"flags": {},
"order": 8,
"order": 7,
"mode": 0,
"inputs": [
{
......@@ -601,6 +515,11 @@
"name": "meta_batch",
"type": "VHS_BatchManager",
"link": null
},
{
"name": "vae",
"type": "VAE",
"link": null
}
],
"outputs": [
......@@ -625,37 +544,15 @@
"hidden": false,
"paused": false,
"params": {
"filename": "MimicPose_00001.webp",
"filename": "MimicPose_00006.webp",
"subfolder": "",
"type": "temp",
"format": "image/webp"
"format": "image/webp",
"frame_rate": 8
}
}
}
},
{
"id": 51,
"type": "Note",
"pos": [
770,
85
],
"size": [
310.11510095517497,
95.53232006987258
],
"flags": {},
"order": 2,
"mode": 0,
"properties": {
"text": ""
},
"widgets_values": [
"Downloads MimicMotion model and fp16 version of SVD XT 1.1\n\nlcm version is experimental and most likely doesn't work well"
],
"color": "#432",
"bgcolor": "#653"
},
{
"id": 5,
"type": "VHS_LoadVideo",
......@@ -668,13 +565,18 @@
658.5777723524305
],
"flags": {},
"order": 3,
"order": 1,
"mode": 0,
"inputs": [
{
"name": "meta_batch",
"type": "VHS_BatchManager",
"link": null
},
{
"name": "vae",
"type": "VAE",
"link": null
}
],
"outputs": [
......@@ -733,6 +635,106 @@
}
}
}
},
{
"id": 2,
"type": "DownloadAndLoadMimicMotionModel",
"pos": [
658,
230
],
"size": [
404.81472289843646,
89.03937164746077
],
"flags": {},
"order": 2,
"mode": 0,
"outputs": [
{
"name": "mimic_pipeline",
"type": "MIMICPIPE",
"links": [
146,
150
],
"shape": 3
}
],
"properties": {
"Node name for S&R": "DownloadAndLoadMimicMotionModel"
},
"widgets_values": [
"MimicMotionMergedUnet_1-0-fp16.safetensors",
"fp16"
]
},
{
"id": 57,
"type": "MimicMotionSampler",
"pos": [
1101,
419
],
"size": {
"0": 315,
"1": 430
},
"flags": {},
"order": 8,
"mode": 0,
"inputs": [
{
"name": "mimic_pipeline",
"type": "MIMICPIPE",
"link": 146
},
{
"name": "ref_image",
"type": "IMAGE",
"link": 147
},
{
"name": "pose_images",
"type": "IMAGE",
"link": 148
},
{
"name": "optional_scheduler",
"type": "DIFFUSERS_SCHEDULER",
"link": null
}
],
"outputs": [
{
"name": "samples",
"type": "LATENT",
"links": [
149
],
"shape": 3,
"slot_index": 0
}
],
"properties": {
"Node name for S&R": "MimicMotionSampler"
},
"widgets_values": [
20,
2,
2,
42,
"fixed",
15,
0,
16,
6,
false,
1,
0,
1,
1
]
}
],
"links": [
......@@ -885,10 +887,10 @@
"config": {},
"extra": {
"ds": {
"scale": 0.5644739300537777,
"scale": 0.5131581182307069,
"offset": {
"0": 763.3873291015625,
"1": 37.92726135253906
"0": 572.9068982237912,
"1": 109.246958798682
}
}
},
......
......@@ -20,7 +20,7 @@ class DWposeDetector:
device: (str) 'cpu' or 'cuda:{device_id}'
"""
def __init__(self, model_det, model_pose, device='cpu'):
self.pose_estimation = Wholebody(model_det=model_det, model_pose=model_pose, device=device)
self.pose_estimation = Wholebody(model_det=model_det, model_pose=model_pose)
def __call__(self, oriImg):
oriImg = oriImg.copy()
......
import cv2
import numpy as np
import torch
def nms(boxes, scores, nms_thr):
"""Single class NMS implemented in Numpy."""
x1 = boxes[:, 0]
y1 = boxes[:, 1]
x2 = boxes[:, 2]
y2 = boxes[:, 3]
areas = (x2 - x1 + 1) * (y2 - y1 + 1)
order = scores.argsort()[::-1]
keep = []
while order.size > 0:
i = order[0]
keep.append(i)
xx1 = np.maximum(x1[i], x1[order[1:]])
yy1 = np.maximum(y1[i], y1[order[1:]])
xx2 = np.minimum(x2[i], x2[order[1:]])
yy2 = np.minimum(y2[i], y2[order[1:]])
w = np.maximum(0.0, xx2 - xx1 + 1)
h = np.maximum(0.0, yy2 - yy1 + 1)
inter = w * h
ovr = inter / (areas[i] + areas[order[1:]] - inter)
inds = np.where(ovr <= nms_thr)[0]
order = order[inds + 1]
return keep
def multiclass_nms(boxes, scores, nms_thr, score_thr):
"""Multiclass NMS implemented in Numpy. Class-aware version."""
final_dets = []
num_classes = scores.shape[1]
for cls_ind in range(num_classes):
cls_scores = scores[:, cls_ind]
valid_score_mask = cls_scores > score_thr
if valid_score_mask.sum() == 0:
continue
else:
valid_scores = cls_scores[valid_score_mask]
valid_boxes = boxes[valid_score_mask]
keep = nms(valid_boxes, valid_scores, nms_thr)
if len(keep) > 0:
cls_inds = np.ones((len(keep), 1)) * cls_ind
dets = np.concatenate(
[valid_boxes[keep], valid_scores[keep, None], cls_inds], 1
)
final_dets.append(dets)
if len(final_dets) == 0:
return None
return np.concatenate(final_dets, 0)
def demo_postprocess(outputs, img_size, p6=False):
grids = []
expanded_strides = []
strides = [8, 16, 32] if not p6 else [8, 16, 32, 64]
hsizes = [img_size[0] // stride for stride in strides]
wsizes = [img_size[1] // stride for stride in strides]
for hsize, wsize, stride in zip(hsizes, wsizes, strides):
xv, yv = np.meshgrid(np.arange(wsize), np.arange(hsize))
grid = np.stack((xv, yv), 2).reshape(1, -1, 2)
grids.append(grid)
shape = grid.shape[:2]
expanded_strides.append(np.full((*shape, 1), stride))
grids = np.concatenate(grids, 1)
expanded_strides = np.concatenate(expanded_strides, 1)
outputs[..., :2] = (outputs[..., :2] + grids) * expanded_strides
outputs[..., 2:4] = np.exp(outputs[..., 2:4]) * expanded_strides
return outputs
def preprocess(img, input_size, swap=(2, 0, 1)):
if len(img.shape) == 3:
padded_img = np.ones((input_size[0], input_size[1], 3), dtype=np.uint8) * 114
else:
padded_img = np.ones(input_size, dtype=np.uint8) * 114
r = min(input_size[0] / img.shape[0], input_size[1] / img.shape[1])
resized_img = cv2.resize(
img,
(int(img.shape[1] * r), int(img.shape[0] * r)),
interpolation=cv2.INTER_LINEAR,
).astype(np.uint8)
padded_img[: int(img.shape[0] * r), : int(img.shape[1] * r)] = resized_img
padded_img = padded_img.transpose(swap)
padded_img = np.ascontiguousarray(padded_img, dtype=np.float32)
return padded_img, r
def inference_detector(model, oriImg, detect_classes=[0]):
input_shape = (640,640)
img, ratio = preprocess(oriImg, input_shape)
device, dtype = next(model.parameters()).device, next(model.parameters()).dtype
input = img[None, :, :, :]
input = torch.from_numpy(input).to(device, dtype)
output = model(input).float().cpu().detach().numpy()
predictions = demo_postprocess(output[0], input_shape)
boxes = predictions[:, :4]
scores = predictions[:, 4:5] * predictions[:, 5:]
boxes_xyxy = np.ones_like(boxes)
boxes_xyxy[:, 0] = boxes[:, 0] - boxes[:, 2]/2.
boxes_xyxy[:, 1] = boxes[:, 1] - boxes[:, 3]/2.
boxes_xyxy[:, 2] = boxes[:, 0] + boxes[:, 2]/2.
boxes_xyxy[:, 3] = boxes[:, 1] + boxes[:, 3]/2.
boxes_xyxy /= ratio
dets = multiclass_nms(boxes_xyxy, scores, nms_thr=0.45, score_thr=0.1)
if dets is None:
return None
final_boxes, final_scores, final_cls_inds = dets[:, :4], dets[:, 4], dets[:, 5]
isscore = final_scores>0.3
iscat = np.isin(final_cls_inds, detect_classes)
isbbox = [ i and j for (i, j) in zip(isscore, iscat)]
final_boxes = final_boxes[isbbox]
return final_boxes
\ No newline at end of file
差异被折叠。
import numpy as np
import onnxruntime as ort
from .onnxdet import inference_detector
from .onnxpose import inference_pose
import comfy.model_management as mm
#import onnxruntime as ort
# from .onnxdet import inference_detector
# from .onnxpose import inference_pose
from .jit_det import inference_detector as inference_jit_yolox
from .jit_pose import inference_pose as inference_jit_pose
class Wholebody:
"""detect human pose by dwpose
"""
def __init__(self, model_det, model_pose, device="cpu"):
providers = ['CPUExecutionProvider'] if device == 'cpu' else ['CUDAExecutionProvider']
provider_options = None if device == 'cpu' else [{'device_id': 0}]
self.session_det = ort.InferenceSession(
path_or_bytes=model_det, providers=providers, provider_options=provider_options
)
self.session_pose = ort.InferenceSession(
path_or_bytes=model_pose, providers=providers, provider_options=provider_options
)
def __init__(self, model_det, model_pose):
#providers = ['CPUExecutionProvider'] if device == 'cpu' else ['CUDAExecutionProvider']
#provider_options = None if device == 'cpu' else [{'device_id': 0}]
# self.session_det = ort.InferenceSession(
# path_or_bytes=model_det, providers=providers, provider_options=provider_options
# )
# self.session_pose = ort.InferenceSession(
# path_or_bytes=model_pose, providers=providers, provider_options=provider_options
# )
self.det = model_det
self.pose = model_pose
def __call__(self, oriImg):
"""call to process dwpose-detect
......@@ -26,8 +33,9 @@ class Wholebody:
oriImg (np.ndarray): detected image
"""
det_result = inference_detector(self.session_det, oriImg)
keypoints, scores = inference_pose(self.session_pose, det_result, oriImg)
det_result = inference_jit_yolox(self.det, oriImg, detect_classes=[0])
keypoints, scores = inference_jit_pose(self.pose, det_result, oriImg)
keypoints_info = np.concatenate(
(keypoints, scores[..., None]), axis=-1)
......
......@@ -635,10 +635,10 @@ class MimicMotionPipeline(DiffusionPipeline):
# Check if the current timestep is within the start and end step range
if start_step_index <= i <= end_step_index:
# Apply pose_latents as currently done
print(f"Applying pose on step {i}")
#print(f"Applying pose on step {i}")
pose_latents_to_use = pose_latents[:, idx].flatten(0, 1)
else:
print(f"Not applying pose on step {i}")
#print(f"Not applying pose on step {i}")
# Apply an alternative if pose_latents should not be used outside this range
# This could be zeros, or any other placeholder logic you define.
pose_latents_to_use = torch.zeros_like(pose_latents[:, idx].flatten(0, 1))
......
差异被折叠。
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论