Skip to content
项目
群组
代码片段
帮助
当前项目
正在载入...
登录 / 注册
切换导航面板
C
ComfyUI-MimicMotionWrapper
项目
项目
详情
活动
周期分析
仓库
仓库
文件
提交
分支
标签
贡献者
图表
比较
统计图
议题
0
议题
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
CI / CD
CI / CD
流水线
作业
日程
统计图
Wiki
Wiki
代码片段
代码片段
成员
成员
折叠边栏
关闭边栏
活动
图像
聊天
创建新问题
作业
提交
问题看板
Open sidebar
王骏浩
ComfyUI-MimicMotionWrapper
Commits
246e53a6
提交
246e53a6
authored
7月 08, 2024
作者:
kijai
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
support 1.1 and use torchscript for dwpose
上级
793ee7a9
隐藏空白字符变更
内嵌
并排
正在显示
7 个修改的文件
包含
731 行增加
和
214 行删除
+731
-214
mimic_motion_example_02.json
examples/mimic_motion_example_02.json
+140
-138
dwpose_detector.py
mimicmotion/dwpose/dwpose_detector.py
+1
-1
jit_det.py
mimicmotion/dwpose/jit_det.py
+126
-0
jit_pose.py
mimicmotion/dwpose/jit_pose.py
+364
-0
wholebody.py
mimicmotion/dwpose/wholebody.py
+23
-15
pipeline_mimicmotion.py
mimicmotion/pipelines/pipeline_mimicmotion.py
+2
-2
nodes.py
nodes.py
+75
-58
没有找到文件。
examples/mimic_motion_example_02.json
浏览文件 @
246e53a6
...
...
@@ -14,7 +14,7 @@
"1"
:
86
},
"flags"
:
{},
"order"
:
7
,
"order"
:
6
,
"mode"
:
0
,
"inputs"
:
[
{
...
...
@@ -56,40 +56,6 @@
"Node name for S&R"
:
"GetImageSizeAndCount"
}
},
{
"id"
:
2
,
"type"
:
"DownloadAndLoadMimicMotionModel"
,
"pos"
:
[
764
,
229
],
"size"
:
{
"0"
:
315
,
"1"
:
106
},
"flags"
:
{},
"order"
:
0
,
"mode"
:
0
,
"outputs"
:
[
{
"name"
:
"mimic_pipeline"
,
"type"
:
"MIMICPIPE"
,
"links"
:
[
146
,
150
],
"shape"
:
3
}
],
"properties"
:
{
"Node name for S&R"
:
"DownloadAndLoadMimicMotionModel"
},
"widgets_values"
:
[
"MimicMotion-fp16.safetensors"
,
"fp16"
,
false
]
},
{
"id"
:
42
,
"type"
:
"MimicMotionGetPoses"
,
...
...
@@ -102,7 +68,7 @@
"1"
:
126
},
"flags"
:
{},
"order"
:
6
,
"order"
:
5
,
"mode"
:
0
,
"inputs"
:
[
{
...
...
@@ -158,7 +124,7 @@
"1"
:
410.70074462890625
},
"flags"
:
{},
"order"
:
1
,
"order"
:
0
,
"mode"
:
0
,
"outputs"
:
[
{
...
...
@@ -197,7 +163,7 @@
"1"
:
242
},
"flags"
:
{},
"order"
:
5
,
"order"
:
4
,
"mode"
:
0
,
"inputs"
:
[
{
...
...
@@ -276,7 +242,7 @@
"1"
:
242
},
"flags"
:
{},
"order"
:
4
,
"order"
:
3
,
"mode"
:
0
,
"inputs"
:
[
{
...
...
@@ -359,10 +325,10 @@
],
"size"
:
[
2861.660400390625
,
15
6
6.960177951389
15
8
6.960177951389
],
"flags"
:
{},
"order"
:
1
2
,
"order"
:
1
1
,
"mode"
:
0
,
"inputs"
:
[
{
...
...
@@ -379,6 +345,11 @@
"name"
:
"meta_batch"
,
"type"
:
"VHS_BatchManager"
,
"link"
:
null
},
{
"name"
:
"vae"
,
"type"
:
"VAE"
,
"link"
:
null
}
],
"outputs"
:
[
...
...
@@ -406,10 +377,11 @@
"hidden"
:
false
,
"paused"
:
false
,
"params"
:
{
"filename"
:
"MimicMotion_0000
1
.mp4"
,
"filename"
:
"MimicMotion_0000
2
.mp4"
,
"subfolder"
:
""
,
"type"
:
"temp"
,
"format"
:
"video/h264-mp4"
"format"
:
"video/h264-mp4"
,
"frame_rate"
:
12
}
}
}
...
...
@@ -426,7 +398,7 @@
"1"
:
190
},
"flags"
:
{},
"order"
:
1
1
,
"order"
:
1
0
,
"mode"
:
0
,
"inputs"
:
[
{
...
...
@@ -476,12 +448,12 @@
1466
,
396
],
"size"
:
[
255.46680297851572
,
78
]
,
"size"
:
{
"0"
:
255.466796875
,
"1"
:
78
}
,
"flags"
:
{},
"order"
:
10
,
"order"
:
9
,
"mode"
:
0
,
"inputs"
:
[
{
...
...
@@ -514,64 +486,6 @@
4
]
},
{
"id"
:
57
,
"type"
:
"MimicMotionSampler"
,
"pos"
:
[
1101
,
419
],
"size"
:
{
"0"
:
315
,
"1"
:
314
},
"flags"
:
{},
"order"
:
9
,
"mode"
:
0
,
"inputs"
:
[
{
"name"
:
"mimic_pipeline"
,
"type"
:
"MIMICPIPE"
,
"link"
:
146
},
{
"name"
:
"ref_image"
,
"type"
:
"IMAGE"
,
"link"
:
147
},
{
"name"
:
"pose_images"
,
"type"
:
"IMAGE"
,
"link"
:
148
}
],
"outputs"
:
[
{
"name"
:
"samples"
,
"type"
:
"LATENT"
,
"links"
:
[
149
],
"shape"
:
3
,
"slot_index"
:
0
}
],
"properties"
:
{
"Node name for S&R"
:
"MimicMotionSampler"
},
"widgets_values"
:
[
25
,
2
,
2
,
42
,
"fixed"
,
15
,
0
,
16
,
6
,
false
]
},
{
"id"
:
37
,
"type"
:
"VHS_VideoCombine"
,
...
...
@@ -581,10 +495,10 @@
],
"size"
:
[
440
,
9
7
8.6666666666666
9
9
8.6666666666666
],
"flags"
:
{},
"order"
:
8
,
"order"
:
7
,
"mode"
:
0
,
"inputs"
:
[
{
...
...
@@ -601,6 +515,11 @@
"name"
:
"meta_batch"
,
"type"
:
"VHS_BatchManager"
,
"link"
:
null
},
{
"name"
:
"vae"
,
"type"
:
"VAE"
,
"link"
:
null
}
],
"outputs"
:
[
...
...
@@ -625,37 +544,15 @@
"hidden"
:
false
,
"paused"
:
false
,
"params"
:
{
"filename"
:
"MimicPose_0000
1
.webp"
,
"filename"
:
"MimicPose_0000
6
.webp"
,
"subfolder"
:
""
,
"type"
:
"temp"
,
"format"
:
"image/webp"
"format"
:
"image/webp"
,
"frame_rate"
:
8
}
}
}
},
{
"id"
:
51
,
"type"
:
"Note"
,
"pos"
:
[
770
,
85
],
"size"
:
[
310.11510095517497
,
95.53232006987258
],
"flags"
:
{},
"order"
:
2
,
"mode"
:
0
,
"properties"
:
{
"text"
:
""
},
"widgets_values"
:
[
"Downloads MimicMotion model and fp16 version of SVD XT 1.1
\n\n
lcm version is experimental and most likely doesn't work well"
],
"color"
:
"#432"
,
"bgcolor"
:
"#653"
},
{
"id"
:
5
,
"type"
:
"VHS_LoadVideo"
,
...
...
@@ -668,13 +565,18 @@
658.5777723524305
],
"flags"
:
{},
"order"
:
3
,
"order"
:
1
,
"mode"
:
0
,
"inputs"
:
[
{
"name"
:
"meta_batch"
,
"type"
:
"VHS_BatchManager"
,
"link"
:
null
},
{
"name"
:
"vae"
,
"type"
:
"VAE"
,
"link"
:
null
}
],
"outputs"
:
[
...
...
@@ -733,6 +635,106 @@
}
}
}
},
{
"id"
:
2
,
"type"
:
"DownloadAndLoadMimicMotionModel"
,
"pos"
:
[
658
,
230
],
"size"
:
[
404.81472289843646
,
89.03937164746077
],
"flags"
:
{},
"order"
:
2
,
"mode"
:
0
,
"outputs"
:
[
{
"name"
:
"mimic_pipeline"
,
"type"
:
"MIMICPIPE"
,
"links"
:
[
146
,
150
],
"shape"
:
3
}
],
"properties"
:
{
"Node name for S&R"
:
"DownloadAndLoadMimicMotionModel"
},
"widgets_values"
:
[
"MimicMotionMergedUnet_1-0-fp16.safetensors"
,
"fp16"
]
},
{
"id"
:
57
,
"type"
:
"MimicMotionSampler"
,
"pos"
:
[
1101
,
419
],
"size"
:
{
"0"
:
315
,
"1"
:
430
},
"flags"
:
{},
"order"
:
8
,
"mode"
:
0
,
"inputs"
:
[
{
"name"
:
"mimic_pipeline"
,
"type"
:
"MIMICPIPE"
,
"link"
:
146
},
{
"name"
:
"ref_image"
,
"type"
:
"IMAGE"
,
"link"
:
147
},
{
"name"
:
"pose_images"
,
"type"
:
"IMAGE"
,
"link"
:
148
},
{
"name"
:
"optional_scheduler"
,
"type"
:
"DIFFUSERS_SCHEDULER"
,
"link"
:
null
}
],
"outputs"
:
[
{
"name"
:
"samples"
,
"type"
:
"LATENT"
,
"links"
:
[
149
],
"shape"
:
3
,
"slot_index"
:
0
}
],
"properties"
:
{
"Node name for S&R"
:
"MimicMotionSampler"
},
"widgets_values"
:
[
20
,
2
,
2
,
42
,
"fixed"
,
15
,
0
,
16
,
6
,
false
,
1
,
0
,
1
,
1
]
}
],
"links"
:
[
...
...
@@ -885,10 +887,10 @@
"config"
:
{},
"extra"
:
{
"ds"
:
{
"scale"
:
0.5
644739300537777
,
"scale"
:
0.5
131581182307069
,
"offset"
:
{
"0"
:
763.3873291015625
,
"1"
:
37.92726135253906
"0"
:
572.9068982237912
,
"1"
:
109.246958798682
}
}
},
...
...
mimicmotion/dwpose/dwpose_detector.py
浏览文件 @
246e53a6
...
...
@@ -20,7 +20,7 @@ class DWposeDetector:
device: (str) 'cpu' or 'cuda:{device_id}'
"""
def
__init__
(
self
,
model_det
,
model_pose
,
device
=
'cpu'
):
self
.
pose_estimation
=
Wholebody
(
model_det
=
model_det
,
model_pose
=
model_pose
,
device
=
device
)
self
.
pose_estimation
=
Wholebody
(
model_det
=
model_det
,
model_pose
=
model_pose
)
def
__call__
(
self
,
oriImg
):
oriImg
=
oriImg
.
copy
()
...
...
mimicmotion/dwpose/jit_det.py
0 → 100644
浏览文件 @
246e53a6
import
cv2
import
numpy
as
np
import
torch
def
nms
(
boxes
,
scores
,
nms_thr
):
"""Single class NMS implemented in Numpy."""
x1
=
boxes
[:,
0
]
y1
=
boxes
[:,
1
]
x2
=
boxes
[:,
2
]
y2
=
boxes
[:,
3
]
areas
=
(
x2
-
x1
+
1
)
*
(
y2
-
y1
+
1
)
order
=
scores
.
argsort
()[::
-
1
]
keep
=
[]
while
order
.
size
>
0
:
i
=
order
[
0
]
keep
.
append
(
i
)
xx1
=
np
.
maximum
(
x1
[
i
],
x1
[
order
[
1
:]])
yy1
=
np
.
maximum
(
y1
[
i
],
y1
[
order
[
1
:]])
xx2
=
np
.
minimum
(
x2
[
i
],
x2
[
order
[
1
:]])
yy2
=
np
.
minimum
(
y2
[
i
],
y2
[
order
[
1
:]])
w
=
np
.
maximum
(
0.0
,
xx2
-
xx1
+
1
)
h
=
np
.
maximum
(
0.0
,
yy2
-
yy1
+
1
)
inter
=
w
*
h
ovr
=
inter
/
(
areas
[
i
]
+
areas
[
order
[
1
:]]
-
inter
)
inds
=
np
.
where
(
ovr
<=
nms_thr
)[
0
]
order
=
order
[
inds
+
1
]
return
keep
def
multiclass_nms
(
boxes
,
scores
,
nms_thr
,
score_thr
):
"""Multiclass NMS implemented in Numpy. Class-aware version."""
final_dets
=
[]
num_classes
=
scores
.
shape
[
1
]
for
cls_ind
in
range
(
num_classes
):
cls_scores
=
scores
[:,
cls_ind
]
valid_score_mask
=
cls_scores
>
score_thr
if
valid_score_mask
.
sum
()
==
0
:
continue
else
:
valid_scores
=
cls_scores
[
valid_score_mask
]
valid_boxes
=
boxes
[
valid_score_mask
]
keep
=
nms
(
valid_boxes
,
valid_scores
,
nms_thr
)
if
len
(
keep
)
>
0
:
cls_inds
=
np
.
ones
((
len
(
keep
),
1
))
*
cls_ind
dets
=
np
.
concatenate
(
[
valid_boxes
[
keep
],
valid_scores
[
keep
,
None
],
cls_inds
],
1
)
final_dets
.
append
(
dets
)
if
len
(
final_dets
)
==
0
:
return
None
return
np
.
concatenate
(
final_dets
,
0
)
def
demo_postprocess
(
outputs
,
img_size
,
p6
=
False
):
grids
=
[]
expanded_strides
=
[]
strides
=
[
8
,
16
,
32
]
if
not
p6
else
[
8
,
16
,
32
,
64
]
hsizes
=
[
img_size
[
0
]
//
stride
for
stride
in
strides
]
wsizes
=
[
img_size
[
1
]
//
stride
for
stride
in
strides
]
for
hsize
,
wsize
,
stride
in
zip
(
hsizes
,
wsizes
,
strides
):
xv
,
yv
=
np
.
meshgrid
(
np
.
arange
(
wsize
),
np
.
arange
(
hsize
))
grid
=
np
.
stack
((
xv
,
yv
),
2
)
.
reshape
(
1
,
-
1
,
2
)
grids
.
append
(
grid
)
shape
=
grid
.
shape
[:
2
]
expanded_strides
.
append
(
np
.
full
((
*
shape
,
1
),
stride
))
grids
=
np
.
concatenate
(
grids
,
1
)
expanded_strides
=
np
.
concatenate
(
expanded_strides
,
1
)
outputs
[
...
,
:
2
]
=
(
outputs
[
...
,
:
2
]
+
grids
)
*
expanded_strides
outputs
[
...
,
2
:
4
]
=
np
.
exp
(
outputs
[
...
,
2
:
4
])
*
expanded_strides
return
outputs
def
preprocess
(
img
,
input_size
,
swap
=
(
2
,
0
,
1
)):
if
len
(
img
.
shape
)
==
3
:
padded_img
=
np
.
ones
((
input_size
[
0
],
input_size
[
1
],
3
),
dtype
=
np
.
uint8
)
*
114
else
:
padded_img
=
np
.
ones
(
input_size
,
dtype
=
np
.
uint8
)
*
114
r
=
min
(
input_size
[
0
]
/
img
.
shape
[
0
],
input_size
[
1
]
/
img
.
shape
[
1
])
resized_img
=
cv2
.
resize
(
img
,
(
int
(
img
.
shape
[
1
]
*
r
),
int
(
img
.
shape
[
0
]
*
r
)),
interpolation
=
cv2
.
INTER_LINEAR
,
)
.
astype
(
np
.
uint8
)
padded_img
[:
int
(
img
.
shape
[
0
]
*
r
),
:
int
(
img
.
shape
[
1
]
*
r
)]
=
resized_img
padded_img
=
padded_img
.
transpose
(
swap
)
padded_img
=
np
.
ascontiguousarray
(
padded_img
,
dtype
=
np
.
float32
)
return
padded_img
,
r
def
inference_detector
(
model
,
oriImg
,
detect_classes
=
[
0
]):
input_shape
=
(
640
,
640
)
img
,
ratio
=
preprocess
(
oriImg
,
input_shape
)
device
,
dtype
=
next
(
model
.
parameters
())
.
device
,
next
(
model
.
parameters
())
.
dtype
input
=
img
[
None
,
:,
:,
:]
input
=
torch
.
from_numpy
(
input
)
.
to
(
device
,
dtype
)
output
=
model
(
input
)
.
float
()
.
cpu
()
.
detach
()
.
numpy
()
predictions
=
demo_postprocess
(
output
[
0
],
input_shape
)
boxes
=
predictions
[:,
:
4
]
scores
=
predictions
[:,
4
:
5
]
*
predictions
[:,
5
:]
boxes_xyxy
=
np
.
ones_like
(
boxes
)
boxes_xyxy
[:,
0
]
=
boxes
[:,
0
]
-
boxes
[:,
2
]
/
2.
boxes_xyxy
[:,
1
]
=
boxes
[:,
1
]
-
boxes
[:,
3
]
/
2.
boxes_xyxy
[:,
2
]
=
boxes
[:,
0
]
+
boxes
[:,
2
]
/
2.
boxes_xyxy
[:,
3
]
=
boxes
[:,
1
]
+
boxes
[:,
3
]
/
2.
boxes_xyxy
/=
ratio
dets
=
multiclass_nms
(
boxes_xyxy
,
scores
,
nms_thr
=
0.45
,
score_thr
=
0.1
)
if
dets
is
None
:
return
None
final_boxes
,
final_scores
,
final_cls_inds
=
dets
[:,
:
4
],
dets
[:,
4
],
dets
[:,
5
]
isscore
=
final_scores
>
0.3
iscat
=
np
.
isin
(
final_cls_inds
,
detect_classes
)
isbbox
=
[
i
and
j
for
(
i
,
j
)
in
zip
(
isscore
,
iscat
)]
final_boxes
=
final_boxes
[
isbbox
]
return
final_boxes
\ No newline at end of file
mimicmotion/dwpose/jit_pose.py
0 → 100644
浏览文件 @
246e53a6
from
typing
import
List
,
Tuple
import
cv2
import
numpy
as
np
import
torch
def
preprocess
(
img
:
np
.
ndarray
,
out_bbox
,
input_size
:
Tuple
[
int
,
int
]
=
(
192
,
256
)
)
->
Tuple
[
np
.
ndarray
,
np
.
ndarray
,
np
.
ndarray
]:
"""Do preprocessing for DWPose model inference.
Args:
img (np.ndarray): Input image in shape.
input_size (tuple): Input image size in shape (w, h).
Returns:
tuple:
- resized_img (np.ndarray): Preprocessed image.
- center (np.ndarray): Center of image.
- scale (np.ndarray): Scale of image.
"""
# get shape of image
img_shape
=
img
.
shape
[:
2
]
out_img
,
out_center
,
out_scale
=
[],
[],
[]
if
len
(
out_bbox
)
==
0
:
out_bbox
=
[[
0
,
0
,
img_shape
[
1
],
img_shape
[
0
]]]
for
i
in
range
(
len
(
out_bbox
)):
x0
=
out_bbox
[
i
][
0
]
y0
=
out_bbox
[
i
][
1
]
x1
=
out_bbox
[
i
][
2
]
y1
=
out_bbox
[
i
][
3
]
bbox
=
np
.
array
([
x0
,
y0
,
x1
,
y1
])
# get center and scale
center
,
scale
=
bbox_xyxy2cs
(
bbox
,
padding
=
1.25
)
# do affine transformation
resized_img
,
scale
=
top_down_affine
(
input_size
,
scale
,
center
,
img
)
# normalize image
mean
=
np
.
array
([
123.675
,
116.28
,
103.53
])
std
=
np
.
array
([
58.395
,
57.12
,
57.375
])
resized_img
=
(
resized_img
-
mean
)
/
std
out_img
.
append
(
resized_img
)
out_center
.
append
(
center
)
out_scale
.
append
(
scale
)
return
out_img
,
out_center
,
out_scale
def
inference
(
model
,
img
,
bs
=
5
):
"""Inference DWPose model implemented in TorchScript.
Args:
model : TorchScript Model.
img : Input image in shape.
Returns:
outputs : Output of DWPose model.
"""
all_out
=
[]
# build input
orig_img_count
=
len
(
img
)
#Pad zeros to fit batch size
for
_
in
range
(
bs
-
(
orig_img_count
%
bs
)):
img
.
append
(
np
.
zeros_like
(
img
[
0
]))
input
=
np
.
stack
(
img
,
axis
=
0
)
.
transpose
(
0
,
3
,
1
,
2
)
device
,
dtype
=
next
(
model
.
parameters
())
.
device
,
next
(
model
.
parameters
())
.
dtype
input
=
torch
.
from_numpy
(
input
)
.
to
(
device
,
dtype
)
out1
,
out2
=
[],
[]
for
i
in
range
(
input
.
shape
[
0
]
//
bs
):
curr_batch_output
=
model
(
input
[
i
*
bs
:(
i
+
1
)
*
bs
])
out1
.
append
(
curr_batch_output
[
0
]
.
float
())
out2
.
append
(
curr_batch_output
[
1
]
.
float
())
out1
,
out2
=
torch
.
cat
(
out1
,
dim
=
0
)[:
orig_img_count
],
torch
.
cat
(
out2
,
dim
=
0
)[:
orig_img_count
]
out1
,
out2
=
out1
.
float
()
.
cpu
()
.
detach
()
.
numpy
(),
out2
.
float
()
.
cpu
()
.
detach
()
.
numpy
()
all_outputs
=
out1
,
out2
for
batch_idx
in
range
(
len
(
all_outputs
[
0
])):
outputs
=
[
all_outputs
[
i
][
batch_idx
:
batch_idx
+
1
,
...
]
for
i
in
range
(
len
(
all_outputs
))]
all_out
.
append
(
outputs
)
return
all_out
def
postprocess
(
outputs
:
List
[
np
.
ndarray
],
model_input_size
:
Tuple
[
int
,
int
],
center
:
Tuple
[
int
,
int
],
scale
:
Tuple
[
int
,
int
],
simcc_split_ratio
:
float
=
2.0
)
->
Tuple
[
np
.
ndarray
,
np
.
ndarray
]:
"""Postprocess for DWPose model output.
Args:
outputs (np.ndarray): Output of RTMPose model.
model_input_size (tuple): RTMPose model Input image size.
center (tuple): Center of bbox in shape (x, y).
scale (tuple): Scale of bbox in shape (w, h).
simcc_split_ratio (float): Split ratio of simcc.
Returns:
tuple:
- keypoints (np.ndarray): Rescaled keypoints.
- scores (np.ndarray): Model predict scores.
"""
all_key
=
[]
all_score
=
[]
for
i
in
range
(
len
(
outputs
)):
# use simcc to decode
simcc_x
,
simcc_y
=
outputs
[
i
]
keypoints
,
scores
=
decode
(
simcc_x
,
simcc_y
,
simcc_split_ratio
)
# rescale keypoints
keypoints
=
keypoints
/
model_input_size
*
scale
[
i
]
+
center
[
i
]
-
scale
[
i
]
/
2
all_key
.
append
(
keypoints
[
0
])
all_score
.
append
(
scores
[
0
])
return
np
.
array
(
all_key
),
np
.
array
(
all_score
)
def
bbox_xyxy2cs
(
bbox
:
np
.
ndarray
,
padding
:
float
=
1.
)
->
Tuple
[
np
.
ndarray
,
np
.
ndarray
]:
"""Transform the bbox format from (x,y,w,h) into (center, scale)
Args:
bbox (ndarray): Bounding box(es) in shape (4,) or (n, 4), formatted
as (left, top, right, bottom)
padding (float): BBox padding factor that will be multilied to scale.
Default: 1.0
Returns:
tuple: A tuple containing center and scale.
- np.ndarray[float32]: Center (x, y) of the bbox in shape (2,) or
(n, 2)
- np.ndarray[float32]: Scale (w, h) of the bbox in shape (2,) or
(n, 2)
"""
# convert single bbox from (4, ) to (1, 4)
dim
=
bbox
.
ndim
if
dim
==
1
:
bbox
=
bbox
[
None
,
:]
# get bbox center and scale
x1
,
y1
,
x2
,
y2
=
np
.
hsplit
(
bbox
,
[
1
,
2
,
3
])
center
=
np
.
hstack
([
x1
+
x2
,
y1
+
y2
])
*
0.5
scale
=
np
.
hstack
([
x2
-
x1
,
y2
-
y1
])
*
padding
if
dim
==
1
:
center
=
center
[
0
]
scale
=
scale
[
0
]
return
center
,
scale
def
_fix_aspect_ratio
(
bbox_scale
:
np
.
ndarray
,
aspect_ratio
:
float
)
->
np
.
ndarray
:
"""Extend the scale to match the given aspect ratio.
Args:
scale (np.ndarray): The image scale (w, h) in shape (2, )
aspect_ratio (float): The ratio of ``w/h``
Returns:
np.ndarray: The reshaped image scale in (2, )
"""
w
,
h
=
np
.
hsplit
(
bbox_scale
,
[
1
])
bbox_scale
=
np
.
where
(
w
>
h
*
aspect_ratio
,
np
.
hstack
([
w
,
w
/
aspect_ratio
]),
np
.
hstack
([
h
*
aspect_ratio
,
h
]))
return
bbox_scale
def
_rotate_point
(
pt
:
np
.
ndarray
,
angle_rad
:
float
)
->
np
.
ndarray
:
"""Rotate a point by an angle.
Args:
pt (np.ndarray): 2D point coordinates (x, y) in shape (2, )
angle_rad (float): rotation angle in radian
Returns:
np.ndarray: Rotated point in shape (2, )
"""
sn
,
cs
=
np
.
sin
(
angle_rad
),
np
.
cos
(
angle_rad
)
rot_mat
=
np
.
array
([[
cs
,
-
sn
],
[
sn
,
cs
]])
return
rot_mat
@
pt
def
_get_3rd_point
(
a
:
np
.
ndarray
,
b
:
np
.
ndarray
)
->
np
.
ndarray
:
"""To calculate the affine matrix, three pairs of points are required. This
function is used to get the 3rd point, given 2D points a & b.
The 3rd point is defined by rotating vector `a - b` by 90 degrees
anticlockwise, using b as the rotation center.
Args:
a (np.ndarray): The 1st point (x,y) in shape (2, )
b (np.ndarray): The 2nd point (x,y) in shape (2, )
Returns:
np.ndarray: The 3rd point.
"""
direction
=
a
-
b
c
=
b
+
np
.
r_
[
-
direction
[
1
],
direction
[
0
]]
return
c
def
get_warp_matrix
(
center
:
np
.
ndarray
,
scale
:
np
.
ndarray
,
rot
:
float
,
output_size
:
Tuple
[
int
,
int
],
shift
:
Tuple
[
float
,
float
]
=
(
0.
,
0.
),
inv
:
bool
=
False
)
->
np
.
ndarray
:
"""Calculate the affine transformation matrix that can warp the bbox area
in the input image to the output size.
Args:
center (np.ndarray[2, ]): Center of the bounding box (x, y).
scale (np.ndarray[2, ]): Scale of the bounding box
wrt [width, height].
rot (float): Rotation angle (degree).
output_size (np.ndarray[2, ] | list(2,)): Size of the
destination heatmaps.
shift (0-100
%
): Shift translation ratio wrt the width/height.
Default (0., 0.).
inv (bool): Option to inverse the affine transform direction.
(inv=False: src->dst or inv=True: dst->src)
Returns:
np.ndarray: A 2x3 transformation matrix
"""
shift
=
np
.
array
(
shift
)
src_w
=
scale
[
0
]
dst_w
=
output_size
[
0
]
dst_h
=
output_size
[
1
]
# compute transformation matrix
rot_rad
=
np
.
deg2rad
(
rot
)
src_dir
=
_rotate_point
(
np
.
array
([
0.
,
src_w
*
-
0.5
]),
rot_rad
)
dst_dir
=
np
.
array
([
0.
,
dst_w
*
-
0.5
])
# get four corners of the src rectangle in the original image
src
=
np
.
zeros
((
3
,
2
),
dtype
=
np
.
float32
)
src
[
0
,
:]
=
center
+
scale
*
shift
src
[
1
,
:]
=
center
+
src_dir
+
scale
*
shift
src
[
2
,
:]
=
_get_3rd_point
(
src
[
0
,
:],
src
[
1
,
:])
# get four corners of the dst rectangle in the input image
dst
=
np
.
zeros
((
3
,
2
),
dtype
=
np
.
float32
)
dst
[
0
,
:]
=
[
dst_w
*
0.5
,
dst_h
*
0.5
]
dst
[
1
,
:]
=
np
.
array
([
dst_w
*
0.5
,
dst_h
*
0.5
])
+
dst_dir
dst
[
2
,
:]
=
_get_3rd_point
(
dst
[
0
,
:],
dst
[
1
,
:])
if
inv
:
warp_mat
=
cv2
.
getAffineTransform
(
np
.
float32
(
dst
),
np
.
float32
(
src
))
else
:
warp_mat
=
cv2
.
getAffineTransform
(
np
.
float32
(
src
),
np
.
float32
(
dst
))
return
warp_mat
def
top_down_affine
(
input_size
:
dict
,
bbox_scale
:
dict
,
bbox_center
:
dict
,
img
:
np
.
ndarray
)
->
Tuple
[
np
.
ndarray
,
np
.
ndarray
]:
"""Get the bbox image as the model input by affine transform.
Args:
input_size (dict): The input size of the model.
bbox_scale (dict): The bbox scale of the img.
bbox_center (dict): The bbox center of the img.
img (np.ndarray): The original image.
Returns:
tuple: A tuple containing center and scale.
- np.ndarray[float32]: img after affine transform.
- np.ndarray[float32]: bbox scale after affine transform.
"""
w
,
h
=
input_size
warp_size
=
(
int
(
w
),
int
(
h
))
# reshape bbox to fixed aspect ratio
bbox_scale
=
_fix_aspect_ratio
(
bbox_scale
,
aspect_ratio
=
w
/
h
)
# get the affine matrix
center
=
bbox_center
scale
=
bbox_scale
rot
=
0
warp_mat
=
get_warp_matrix
(
center
,
scale
,
rot
,
output_size
=
(
w
,
h
))
# do affine transform
img
=
cv2
.
warpAffine
(
img
,
warp_mat
,
warp_size
,
flags
=
cv2
.
INTER_LINEAR
)
return
img
,
bbox_scale
def
get_simcc_maximum
(
simcc_x
:
np
.
ndarray
,
simcc_y
:
np
.
ndarray
)
->
Tuple
[
np
.
ndarray
,
np
.
ndarray
]:
"""Get maximum response location and value from simcc representations.
Note:
instance number: N
num_keypoints: K
heatmap height: H
heatmap width: W
Args:
simcc_x (np.ndarray): x-axis SimCC in shape (K, Wx) or (N, K, Wx)
simcc_y (np.ndarray): y-axis SimCC in shape (K, Wy) or (N, K, Wy)
Returns:
tuple:
- locs (np.ndarray): locations of maximum heatmap responses in shape
(K, 2) or (N, K, 2)
- vals (np.ndarray): values of maximum heatmap responses in shape
(K,) or (N, K)
"""
N
,
K
,
Wx
=
simcc_x
.
shape
simcc_x
=
simcc_x
.
reshape
(
N
*
K
,
-
1
)
simcc_y
=
simcc_y
.
reshape
(
N
*
K
,
-
1
)
# get maximum value locations
x_locs
=
np
.
argmax
(
simcc_x
,
axis
=
1
)
y_locs
=
np
.
argmax
(
simcc_y
,
axis
=
1
)
locs
=
np
.
stack
((
x_locs
,
y_locs
),
axis
=-
1
)
.
astype
(
np
.
float32
)
max_val_x
=
np
.
amax
(
simcc_x
,
axis
=
1
)
max_val_y
=
np
.
amax
(
simcc_y
,
axis
=
1
)
# get maximum value across x and y axis
mask
=
max_val_x
>
max_val_y
max_val_x
[
mask
]
=
max_val_y
[
mask
]
vals
=
max_val_x
locs
[
vals
<=
0.
]
=
-
1
# reshape
locs
=
locs
.
reshape
(
N
,
K
,
2
)
vals
=
vals
.
reshape
(
N
,
K
)
return
locs
,
vals
def
decode
(
simcc_x
:
np
.
ndarray
,
simcc_y
:
np
.
ndarray
,
simcc_split_ratio
)
->
Tuple
[
np
.
ndarray
,
np
.
ndarray
]:
"""Modulate simcc distribution with Gaussian.
Args:
simcc_x (np.ndarray[K, Wx]): model predicted simcc in x.
simcc_y (np.ndarray[K, Wy]): model predicted simcc in y.
simcc_split_ratio (int): The split ratio of simcc.
Returns:
tuple: A tuple containing center and scale.
- np.ndarray[float32]: keypoints in shape (K, 2) or (n, K, 2)
- np.ndarray[float32]: scores in shape (K,) or (n, K)
"""
keypoints
,
scores
=
get_simcc_maximum
(
simcc_x
,
simcc_y
)
keypoints
/=
simcc_split_ratio
return
keypoints
,
scores
def
inference_pose
(
model
,
out_bbox
,
oriImg
,
model_input_size
=
(
288
,
384
)):
resized_img
,
center
,
scale
=
preprocess
(
oriImg
,
out_bbox
,
model_input_size
)
#outputs = inference(session, resized_img, dtype)
outputs
=
inference
(
model
,
resized_img
)
keypoints
,
scores
=
postprocess
(
outputs
,
model_input_size
,
center
,
scale
)
return
keypoints
,
scores
\ No newline at end of file
mimicmotion/dwpose/wholebody.py
浏览文件 @
246e53a6
import
numpy
as
np
import
onnxruntime
as
ort
from
.onnxdet
import
inference_detector
from
.onnxpose
import
inference_pose
import
comfy.model_management
as
mm
#import onnxruntime as ort
# from .onnxdet import inference_detector
# from .onnxpose import inference_pose
from
.jit_det
import
inference_detector
as
inference_jit_yolox
from
.jit_pose
import
inference_pose
as
inference_jit_pose
class
Wholebody
:
"""detect human pose by dwpose
"""
def
__init__
(
self
,
model_det
,
model_pose
,
device
=
"cpu"
):
providers
=
[
'CPUExecutionProvider'
]
if
device
==
'cpu'
else
[
'CUDAExecutionProvider'
]
provider_options
=
None
if
device
==
'cpu'
else
[{
'device_id'
:
0
}]
self
.
session_det
=
ort
.
InferenceSession
(
path_or_bytes
=
model_det
,
providers
=
providers
,
provider_options
=
provider_options
)
self
.
session_pose
=
ort
.
InferenceSession
(
path_or_bytes
=
model_pose
,
providers
=
providers
,
provider_options
=
provider_options
)
def
__init__
(
self
,
model_det
,
model_pose
):
#providers = ['CPUExecutionProvider'] if device == 'cpu' else ['CUDAExecutionProvider']
#provider_options = None if device == 'cpu' else [{'device_id': 0}]
# self.session_det = ort.InferenceSession(
# path_or_bytes=model_det, providers=providers, provider_options=provider_options
# )
# self.session_pose = ort.InferenceSession(
# path_or_bytes=model_pose, providers=providers, provider_options=provider_options
# )
self
.
det
=
model_det
self
.
pose
=
model_pose
def
__call__
(
self
,
oriImg
):
"""call to process dwpose-detect
...
...
@@ -26,8 +33,9 @@ class Wholebody:
oriImg (np.ndarray): detected image
"""
det_result
=
inference_detector
(
self
.
session_det
,
oriImg
)
keypoints
,
scores
=
inference_pose
(
self
.
session_pose
,
det_result
,
oriImg
)
det_result
=
inference_jit_yolox
(
self
.
det
,
oriImg
,
detect_classes
=
[
0
])
keypoints
,
scores
=
inference_jit_pose
(
self
.
pose
,
det_result
,
oriImg
)
keypoints_info
=
np
.
concatenate
(
(
keypoints
,
scores
[
...
,
None
]),
axis
=-
1
)
...
...
mimicmotion/pipelines/pipeline_mimicmotion.py
浏览文件 @
246e53a6
...
...
@@ -635,10 +635,10 @@ class MimicMotionPipeline(DiffusionPipeline):
# Check if the current timestep is within the start and end step range
if
start_step_index
<=
i
<=
end_step_index
:
# Apply pose_latents as currently done
print
(
f
"Applying pose on step {i}"
)
#
print(f"Applying pose on step {i}")
pose_latents_to_use
=
pose_latents
[:,
idx
]
.
flatten
(
0
,
1
)
else
:
print
(
f
"Not applying pose on step {i}"
)
#
print(f"Not applying pose on step {i}")
# Apply an alternative if pose_latents should not be used outside this range
# This could be zeros, or any other placeholder logic you define.
pose_latents_to_use
=
torch
.
zeros_like
(
pose_latents
[:,
idx
]
.
flatten
(
0
,
1
))
...
...
nodes.py
浏览文件 @
246e53a6
...
...
@@ -19,6 +19,10 @@ from .mimicmotion.modules.pose_net import PoseNet
from
.lcm_scheduler
import
AnimateLCMSVDStochasticIterativeScheduler
from
accelerate
import
init_empty_weights
from
accelerate.utils
import
set_module_tensor_to_device
def
loglinear_interp
(
t_steps
,
num_steps
):
"""
Performs log-linear interpolation of a given array of decreasing numbers.
...
...
@@ -59,7 +63,8 @@ class DownloadAndLoadMimicMotionModel:
def
INPUT_TYPES
(
s
):
return
{
"required"
:
{
"model"
:
(
[
'MimicMotion-fp16.safetensors'
,
[
'MimicMotionMergedUnet_1-0-fp16.safetensors'
,
'MimicMotionMergedUnet_1-1-fp16.safetensors'
,
],
),
"precision"
:
(
...
...
@@ -70,8 +75,6 @@ class DownloadAndLoadMimicMotionModel:
],
{
"default"
:
'fp16'
}),
"lcm"
:
(
"BOOLEAN"
,
{
"default"
:
False
}),
},
}
...
...
@@ -80,12 +83,12 @@ class DownloadAndLoadMimicMotionModel:
FUNCTION
=
"loadmodel"
CATEGORY
=
"MimicMotionWrapper"
def
loadmodel
(
self
,
precision
,
model
,
lcm
):
def
loadmodel
(
self
,
precision
,
model
):
device
=
mm
.
get_torch_device
()
mm
.
soft_empty_cache
()
dtype
=
{
"bf16"
:
torch
.
bfloat16
,
"fp16"
:
torch
.
float16
,
"fp32"
:
torch
.
float32
}[
precision
]
pbar
=
comfy
.
utils
.
ProgressBar
(
3
)
pbar
=
comfy
.
utils
.
ProgressBar
(
5
)
download_path
=
os
.
path
.
join
(
folder_paths
.
models_dir
,
"mimicmotion"
)
model_path
=
os
.
path
.
join
(
download_path
,
model
)
...
...
@@ -102,57 +105,53 @@ class DownloadAndLoadMimicMotionModel:
pbar
.
update
(
1
)
svd_path
=
os
.
path
.
join
(
folder_paths
.
models_dir
,
"diffusers"
,
"stable-video-diffusion-img2vid-xt-1-1"
)
svd_lcm_path
=
os
.
path
.
join
(
folder_paths
.
models_dir
,
"diffusers"
,
"stable-video-diffusion-img2vid-xt-1-1-lcm"
,
"unet_lcm"
)
if
lcm
and
not
os
.
path
.
exists
(
svd_lcm
_path
):
print
(
f
"Downloading
AnimateLCM
SVD model to: {model_path}"
)
if
not
os
.
path
.
exists
(
svd
_path
):
print
(
f
"Downloading SVD model to: {model_path}"
)
from
huggingface_hub
import
snapshot_download
snapshot_download
(
repo_id
=
"Kijai/AnimateLCM-SVD-Comfy"
,
allow_patterns
=
[
f
"*.json"
,
"*diffusion_pytorch_model.fp16.safetensors*"
],
snapshot_download
(
repo_id
=
"vdo/stable-video-diffusion-img2vid-xt-1-1"
,
allow_patterns
=
[
f
"*.json"
,
"*fp16*"
],
ignore_patterns
=
[
"*unet*"
],
local_dir
=
svd_path
,
local_dir_use_symlinks
=
False
)
else
:
if
not
os
.
path
.
exists
(
svd_path
):
print
(
f
"Downloading SVD model to: {model_path}"
)
from
huggingface_hub
import
snapshot_download
snapshot_download
(
repo_id
=
"vdo/stable-video-diffusion-img2vid-xt-1-1"
,
allow_patterns
=
[
f
"*.json"
,
"*fp16*"
],
local_dir
=
svd_path
,
local_dir_use_symlinks
=
False
)
pbar
.
update
(
1
)
mimicmotion_models
=
MimicMotionModel
(
svd_path
,
lcm
=
lcm
)
.
to
(
device
=
device
)
.
eval
()
mimic_motion_sd
=
comfy
.
utils
.
load_torch_file
(
model_path
)
mimicmotion_models
.
load_state_dict
(
mimic_motion_sd
,
strict
=
False
)
unet_config
=
UNetSpatioTemporalConditionModel
.
load_config
(
svd_path
,
subfolder
=
"unet"
,
variant
=
"fp16"
)
print
(
"Loading UNET"
)
with
(
init_empty_weights
()):
self
.
unet
=
UNetSpatioTemporalConditionModel
.
from_config
(
unet_config
)
sd
=
comfy
.
utils
.
load_torch_file
(
os
.
path
.
join
(
model_path
))
for
key
in
sd
:
set_module_tensor_to_device
(
self
.
unet
,
key
,
dtype
=
dtype
,
device
=
device
,
value
=
sd
[
key
])
del
sd
pbar
.
update
(
1
)
if
lcm
:
lcm_noise_scheduler
=
AnimateLCMSVDStochasticIterativeScheduler
(
num_train_timesteps
=
40
,
sigma_min
=
0.002
,
sigma_max
=
700.0
,
sigma_data
=
1.0
,
s_noise
=
1.0
,
rho
=
7
,
clip_denoised
=
False
,
)
scheduler
=
lcm_noise_scheduler
else
:
scheduler
=
mimicmotion_models
.
noise_scheduler
print
(
"Loading VAE"
)
self
.
vae
=
AutoencoderKLTemporalDecoder
.
from_pretrained
(
svd_path
,
subfolder
=
"vae"
,
variant
=
"fp16"
,
low_cpu_mem_usage
=
True
)
.
to
(
dtype
)
.
to
(
device
)
.
eval
()
print
(
"Loading IMAGE_ENCODER"
)
self
.
image_encoder
=
CLIPVisionModelWithProjection
.
from_pretrained
(
svd_path
,
subfolder
=
"image_encoder"
,
variant
=
"fp16"
,
low_cpu_mem_usage
=
True
)
.
to
(
dtype
)
.
to
(
device
)
.
eval
()
pbar
.
update
(
1
)
self
.
noise_scheduler
=
EulerDiscreteScheduler
.
from_pretrained
(
svd_path
,
subfolder
=
"scheduler"
)
self
.
feature_extractor
=
CLIPImageProcessor
.
from_pretrained
(
svd_path
,
subfolder
=
"feature_extractor"
)
print
(
"Loading POSE_NET"
)
self
.
pose_net
=
PoseNet
(
noise_latent_channels
=
self
.
unet
.
config
.
block_out_channels
[
0
])
.
to
(
dtype
)
.
to
(
device
)
.
eval
()
pose_net_sd
=
comfy
.
utils
.
load_torch_file
(
os
.
path
.
join
(
script_directory
,
'models'
,
'mimic_motion_pose_net.safetensors'
))
self
.
unet
.
load_state_dict
(
pose_net_sd
,
strict
=
False
)
self
.
pose_net
.
load_state_dict
(
pose_net_sd
,
strict
=
False
)
del
pose_net_sd
pipeline
=
MimicMotionPipeline
(
vae
=
mimicmotion_models
.
vae
,
image_encoder
=
mimicmotion_models
.
image_encoder
,
unet
=
mimicmotion_models
.
unet
,
scheduler
=
scheduler
,
feature_extractor
=
mimicmotion_models
.
feature_extractor
,
pose_net
=
mimicmotion_models
.
pose_net
,
vae
=
self
.
vae
,
image_encoder
=
self
.
image_encoder
,
unet
=
self
.
unet
,
scheduler
=
s
elf
.
noise_s
cheduler
,
feature_extractor
=
self
.
feature_extractor
,
pose_net
=
self
.
pose_net
,
)
pipeline
.
unet
.
to
(
dtype
)
pipeline
.
pose_net
.
to
(
dtype
)
pipeline
.
vae
.
to
(
dtype
)
pipeline
.
image_encoder
.
to
(
dtype
)
mimic_model
=
{
'pipeline'
:
pipeline
,
'dtype'
:
dtype
...
...
@@ -266,7 +265,7 @@ class MimicMotionSampler:
original_scheduler
=
pipeline
.
scheduler
if
optional_scheduler
is
not
None
:
print
(
"Using optional scheduler: "
,
optional_scheduler
)
print
(
"Using optional scheduler: "
,
optional_scheduler
[
'noise_scheduler'
]
)
pipeline
.
scheduler
=
optional_scheduler
[
'noise_scheduler'
]
sigmas
=
optional_scheduler
[
'sigmas'
]
...
...
@@ -375,13 +374,17 @@ class MimicMotionGetPoses:
def
process
(
self
,
ref_image
,
pose_images
,
include_body
,
include_hand
,
include_face
):
device
=
mm
.
get_torch_device
()
offload_device
=
mm
.
unet_offload_device
()
from
.mimicmotion.dwpose.util
import
draw_pose
from
.mimicmotion.dwpose.dwpose_detector
import
DWposeDetector
assert
ref_image
.
shape
[
1
:
3
]
==
pose_images
.
shape
[
1
:
3
],
"ref_image and pose_images must have the same resolution"
yolo_model
=
"yolox_l.onnx"
dw_pose_model
=
"dw-ll_ucoco_384.onnx"
#yolo_model = "yolox_l.onnx"
#dw_pose_model = "dw-ll_ucoco_384.onnx"
dw_pose_model
=
"dw-ll_ucoco_384_bs5.torchscript.pt"
yolo_model
=
"yolox_l.torchscript.pt"
model_base_path
=
os
.
path
.
join
(
script_directory
,
"models"
,
"DWPose"
)
model_det
=
os
.
path
.
join
(
model_base_path
,
yolo_model
)
...
...
@@ -390,7 +393,7 @@ class MimicMotionGetPoses:
if
not
os
.
path
.
exists
(
model_det
):
print
(
f
"Downloading yolo model to: {model_base_path}"
)
from
huggingface_hub
import
snapshot_download
snapshot_download
(
repo_id
=
"
yzd-v/DWPose
"
,
snapshot_download
(
repo_id
=
"
hr16/yolox-onnx
"
,
allow_patterns
=
[
f
"*{yolo_model}*"
],
local_dir
=
model_base_path
,
local_dir_use_symlinks
=
False
)
...
...
@@ -398,23 +401,34 @@ class MimicMotionGetPoses:
if
not
os
.
path
.
exists
(
model_pose
):
print
(
f
"Downloading dwpose model to: {model_base_path}"
)
from
huggingface_hub
import
snapshot_download
snapshot_download
(
repo_id
=
"
yzd-v/DWPose
"
,
snapshot_download
(
repo_id
=
"
hr16/DWPose-TorchScript-BatchSize5
"
,
allow_patterns
=
[
f
"*{dw_pose_model}*"
],
local_dir
=
model_base_path
,
local_dir_use_symlinks
=
False
)
model_det
=
os
.
path
.
join
(
model_base_path
,
yolo_model
)
model_pose
=
os
.
path
.
join
(
model_base_path
,
dw_pose_model
)
if
not
hasattr
(
self
,
"det"
)
or
not
hasattr
(
self
,
"pose"
):
self
.
det
=
torch
.
jit
.
load
(
model_det
)
self
.
pose
=
torch
.
jit
.
load
(
model_pose
)
dwprocessor
=
DWposeDetector
(
model_det
=
os
.
path
.
join
(
model_base_path
,
"yolox_l.onnx"
),
model_pose
=
os
.
path
.
join
(
model_base_path
,
"dw-ll_ucoco_384.onnx"
),
device
=
device
)
self
.
dwprocessor
=
DWposeDetector
(
model_det
=
self
.
det
,
model_pose
=
self
.
pose
)
ref_image
=
ref_image
.
squeeze
(
0
)
.
cpu
()
.
numpy
()
*
255
self
.
det
=
self
.
det
.
to
(
device
)
self
.
pose
=
self
.
pose
.
to
(
device
)
# select ref-keypoint from reference pose for pose rescale
ref_pose
=
dwprocessor
(
ref_image
)
ref_keypoint_id
=
[
0
,
1
,
2
,
5
,
8
,
11
,
14
,
15
,
16
,
17
]
ref_pose
=
self
.
dwprocessor
(
ref_image
)
#ref_keypoint_id = [0, 1, 2, 5, 8, 11, 14, 15, 16, 17]
ref_keypoint_id
=
[
0
,
1
,
2
,
5
,
8
,
9
,
10
,
11
,
12
,
13
,
14
,
15
,
16
,
17
]
ref_keypoint_id
=
[
i
for
i
in
ref_keypoint_id
\
if
ref_pose
[
'bodies'
][
'score'
]
.
shape
[
0
]
>
0
and
ref_pose
[
'bodies'
][
'score'
][
0
][
i
]
>
0.3
]
#if ref_pose['bodies']['score'].shape[0] > 0 and ref_pose['bodies']['score'][0][i] > 0.3]
if
len
(
ref_pose
[
'bodies'
][
'subset'
])
>
0
and
ref_pose
[
'bodies'
][
'subset'
][
0
][
i
]
>=
.
0
]
ref_body
=
ref_pose
[
'bodies'
][
'candidate'
][
ref_keypoint_id
]
height
,
width
,
_
=
ref_image
.
shape
...
...
@@ -424,9 +438,12 @@ class MimicMotionGetPoses:
pbar
=
comfy
.
utils
.
ProgressBar
(
len
(
pose_images_np
))
detected_poses_np_list
=
[]
for
img_np
in
pose_images_np
:
detected_poses_np_list
.
append
(
dwprocessor
(
img_np
))
detected_poses_np_list
.
append
(
self
.
dwprocessor
(
img_np
))
pbar
.
update
(
1
)
self
.
det
=
self
.
det
.
to
(
offload_device
)
self
.
pose
=
self
.
pose
.
to
(
offload_device
)
detected_bodies
=
np
.
stack
(
[
p
[
'bodies'
][
'candidate'
]
for
p
in
detected_poses_np_list
if
p
[
'bodies'
][
'candidate'
]
.
shape
[
0
]
==
18
])[:,
ref_keypoint_id
]
...
...
编写
预览
Markdown
格式
0%
重试
或
添加新文件
添加附件
取消
您添加了
0
人
到此讨论。请谨慎行事。
请先完成此评论的编辑!
取消
请
注册
或者
登录
后发表评论