You're looking at a specific version of this model. Jump to the model overview.
Input
Run this model in Node.js with one line of code:
npm install replicate
REPLICATE_API_TOKEN
environment variable:export REPLICATE_API_TOKEN=<paste-your-token-here>
Find your API token in your account settings.
import Replicate from "replicate";
const replicate = new Replicate({
auth: process.env.REPLICATE_API_TOKEN,
});
Run zsxkib/v-express using Replicateβs API. Check out the model's schema for an overview of inputs and outputs.
const output = await replicate.run(
"zsxkib/v-express:f3400fd305d761778fea70c22b8951356187d4096ab5fc1a553d1333095cf7c6",
{
input: {
audio_path: "https://replicate.delivery/pbxt/L3WYDN9aRYHe4jqstFC1gNYslCrtmd3oAjGoCSMEDSAg8jsn/aud.mp3",
reference_image: "https://replicate.delivery/pbxt/L3WYDjrlsOiUQMAMjtIcopmNH79WM3EwxYUSMKdGYm6Gklz5/out_1.webp",
retarget_strategy: "fix_face",
num_inference_steps: 25,
audio_attention_weight: 3,
reference_attention_weight: 0.95
}
}
);
console.log(output);
To learn more, take a look at the guide on getting started with Node.js.
pip install replicate
REPLICATE_API_TOKEN
environment variable:export REPLICATE_API_TOKEN=<paste-your-token-here>
Find your API token in your account settings.
import replicate
Run zsxkib/v-express using Replicateβs API. Check out the model's schema for an overview of inputs and outputs.
output = replicate.run(
"zsxkib/v-express:f3400fd305d761778fea70c22b8951356187d4096ab5fc1a553d1333095cf7c6",
input={
"audio_path": "https://replicate.delivery/pbxt/L3WYDN9aRYHe4jqstFC1gNYslCrtmd3oAjGoCSMEDSAg8jsn/aud.mp3",
"reference_image": "https://replicate.delivery/pbxt/L3WYDjrlsOiUQMAMjtIcopmNH79WM3EwxYUSMKdGYm6Gklz5/out_1.webp",
"retarget_strategy": "fix_face",
"num_inference_steps": 25,
"audio_attention_weight": 3,
"reference_attention_weight": 0.95
}
)
print(output)
To learn more, take a look at the guide on getting started with Python.
REPLICATE_API_TOKEN
environment variable:export REPLICATE_API_TOKEN=<paste-your-token-here>
Find your API token in your account settings.
Run zsxkib/v-express using Replicateβs API. Check out the model's schema for an overview of inputs and outputs.
curl -s -X POST \
-H "Authorization: Bearer $REPLICATE_API_TOKEN" \
-H "Content-Type: application/json" \
-H "Prefer: wait" \
-d $'{
"version": "f3400fd305d761778fea70c22b8951356187d4096ab5fc1a553d1333095cf7c6",
"input": {
"audio_path": "https://replicate.delivery/pbxt/L3WYDN9aRYHe4jqstFC1gNYslCrtmd3oAjGoCSMEDSAg8jsn/aud.mp3",
"reference_image": "https://replicate.delivery/pbxt/L3WYDjrlsOiUQMAMjtIcopmNH79WM3EwxYUSMKdGYm6Gklz5/out_1.webp",
"retarget_strategy": "fix_face",
"num_inference_steps": 25,
"audio_attention_weight": 3,
"reference_attention_weight": 0.95
}
}' \
https://api.replicate.com/v1/predictions
To learn more, take a look at Replicateβs HTTP API reference docs.
Add a payment method to run this model.
By signing in, you agree to our
terms of service and privacy policy
Output
{
"completed_at": "2024-06-07T16:43:07.414221Z",
"created_at": "2024-06-07T16:35:49.429000Z",
"data_removed": false,
"error": null,
"id": "qvktt6bg6nrgp0cfye780czr88",
"input": {
"audio_path": "https://replicate.delivery/pbxt/L3WYDN9aRYHe4jqstFC1gNYslCrtmd3oAjGoCSMEDSAg8jsn/aud.mp3",
"reference_image": "https://replicate.delivery/pbxt/L3WYDjrlsOiUQMAMjtIcopmNH79WM3EwxYUSMKdGYm6Gklz5/out_1.webp",
"retarget_strategy": "fix_face",
"num_inference_steps": 25,
"audio_attention_weight": 3,
"reference_attention_weight": 0.95
},
"logs": "/root/.pyenv/versions/3.11.9/lib/python3.11/site-packages/insightface/utils/transform.py:68: FutureWarning: `rcond` parameter will change to the default of machine precision times ``max(M, N)`` where M and N are the input matrix dimensions.\nTo use the future default and silence this warning we advise to pass `rcond=None`, to keep using the old, explicitly pass `rcond=-1`.\nP = np.linalg.lstsq(X_homo, Y)[0].T # Affine matrix. 3 x 4\n/src/predict.py:224: UserWarning: Creating a tensor from a list of numpy.ndarrays is extremely slow. Please consider converting the list to a single numpy.ndarray with numpy.array() before converting to a tensor. (Triggered internally at ../torch/csrc/utils/tensor_new.cpp:245.)\nkps_sequence = torch.tensor([reference_kps] * video_length)\n/src/pipelines/v_express_pipeline.py:516: FutureWarning: Accessing config attribute `in_channels` directly via 'UNet3DConditionModel' object attribute is deprecated. Please access 'in_channels' over 'UNet3DConditionModel's config object instead, e.g. 'unet.config.in_channels'.\nnum_channels_latents = self.denoising_unet.in_channels\n 0%| | 0/25 [00:00<?, ?it/s]\n 4%|β | 1/25 [00:08<03:28, 8.67s/it]\n 8%|β | 2/25 [00:17<03:18, 8.62s/it]\n 12%|ββ | 3/25 [00:25<03:09, 8.62s/it]\n 16%|ββ | 4/25 [00:34<03:01, 8.62s/it]\n 20%|ββ | 5/25 [00:43<02:52, 8.63s/it]\n 24%|βββ | 6/25 [00:51<02:44, 8.65s/it]\n 28%|βββ | 7/25 [01:00<02:36, 8.67s/it]\n 32%|ββββ | 8/25 [01:09<02:27, 8.68s/it]\n 36%|ββββ | 9/25 [01:17<02:19, 8.69s/it]\n 40%|ββββ | 10/25 [01:26<02:10, 8.70s/it]\n 44%|βββββ | 11/25 [01:35<02:01, 8.71s/it]\n 48%|βββββ | 12/25 [01:44<01:53, 8.72s/it]\n 52%|ββββββ | 13/25 [01:52<01:44, 8.73s/it]\n 56%|ββββββ | 14/25 [02:01<01:36, 8.73s/it]\n 60%|ββββββ | 15/25 [02:10<01:27, 8.74s/it]\n 64%|βββββββ | 16/25 [02:19<01:18, 8.75s/it]\n 68%|βββββββ | 17/25 [02:27<01:10, 8.76s/it]\n 72%|ββββββββ | 18/25 [02:36<01:01, 8.77s/it]\n 76%|ββββββββ | 19/25 [02:45<00:52, 8.77s/it]\n 80%|ββββββββ | 20/25 [02:54<00:43, 8.77s/it]\n 84%|βββββββββ | 21/25 [03:03<00:35, 8.77s/it]\n 88%|βββββββββ | 22/25 [03:11<00:26, 8.78s/it]\n 92%|ββββββββββ| 23/25 [03:20<00:17, 8.78s/it]\n 96%|ββββββββββ| 24/25 [03:29<00:08, 8.78s/it]\n100%|ββββββββββ| 25/25 [03:38<00:00, 8.78s/it]\n100%|ββββββββββ| 25/25 [03:38<00:00, 8.73s/it]\n 0%| | 0/120 [00:00<?, ?it/s]\n 4%|β | 5/120 [00:00<00:02, 43.24it/s]\n 8%|β | 10/120 [00:00<00:04, 23.74it/s]\n 11%|β | 13/120 [00:00<00:04, 21.45it/s]\n 13%|ββ | 16/120 [00:00<00:05, 20.21it/s]\n 16%|ββ | 19/120 [00:00<00:05, 19.46it/s]\n 18%|ββ | 22/120 [00:01<00:05, 18.99it/s]\n 20%|ββ | 24/120 [00:01<00:05, 18.76it/s]\n 22%|βββ | 26/120 [00:01<00:05, 18.57it/s]\n 23%|βββ | 28/120 [00:01<00:04, 18.43it/s]\n 25%|βββ | 30/120 [00:01<00:04, 18.31it/s]\n 27%|βββ | 32/120 [00:01<00:04, 18.23it/s]\n 28%|βββ | 34/120 [00:01<00:04, 18.17it/s]\n 30%|βββ | 36/120 [00:01<00:04, 18.14it/s]\n 32%|ββββ | 38/120 [00:01<00:04, 18.11it/s]\n 33%|ββββ | 40/120 [00:02<00:04, 18.09it/s]\n 35%|ββββ | 42/120 [00:02<00:04, 18.07it/s]\n 37%|ββββ | 44/120 [00:02<00:04, 18.06it/s]\n 38%|ββββ | 46/120 [00:02<00:04, 18.05it/s]\n 40%|ββββ | 48/120 [00:02<00:03, 18.05it/s]\n 42%|βββββ | 50/120 [00:02<00:03, 18.04it/s]\n 43%|βββββ | 52/120 [00:02<00:03, 18.05it/s]\n 45%|βββββ | 54/120 [00:02<00:03, 18.04it/s]\n 47%|βββββ | 56/120 [00:02<00:03, 18.04it/s]\n 48%|βββββ | 58/120 [00:03<00:03, 18.04it/s]\n 50%|βββββ | 60/120 [00:03<00:03, 18.05it/s]\n 52%|ββββββ | 62/120 [00:03<00:03, 18.05it/s]\n 53%|ββββββ | 64/120 [00:03<00:03, 18.04it/s]\n 55%|ββββββ | 66/120 [00:03<00:02, 18.05it/s]\n 57%|ββββββ | 68/120 [00:03<00:02, 18.04it/s]\n 58%|ββββββ | 70/120 [00:03<00:02, 18.05it/s]\n 60%|ββββββ | 72/120 [00:03<00:02, 18.05it/s]\n 62%|βββββββ | 74/120 [00:03<00:02, 18.05it/s]\n 63%|βββββββ | 76/120 [00:04<00:02, 18.03it/s]\n 65%|βββββββ | 78/120 [00:04<00:02, 18.03it/s]\n 67%|βββββββ | 80/120 [00:04<00:02, 18.04it/s]\n 68%|βββββββ | 82/120 [00:04<00:02, 18.05it/s]\n 70%|βββββββ | 84/120 [00:04<00:01, 18.05it/s]\n 72%|ββββββββ | 86/120 [00:04<00:01, 18.05it/s]\n 73%|ββββββββ | 88/120 [00:04<00:01, 18.04it/s]\n 75%|ββββββββ | 90/120 [00:04<00:01, 18.03it/s]\n 77%|ββββββββ | 92/120 [00:04<00:01, 18.03it/s]\n 78%|ββββββββ | 94/120 [00:05<00:01, 18.04it/s]\n 80%|ββββββββ | 96/120 [00:05<00:01, 18.04it/s]\n 82%|βββββββββ | 98/120 [00:05<00:01, 18.04it/s]\n 83%|βββββββββ | 100/120 [00:05<00:01, 18.03it/s]\n 85%|βββββββββ | 102/120 [00:05<00:00, 18.04it/s]\n 87%|βββββββββ | 104/120 [00:05<00:00, 18.04it/s]\n 88%|βββββββββ | 106/120 [00:05<00:00, 18.05it/s]\n 90%|βββββββββ | 108/120 [00:05<00:00, 18.04it/s]\n 92%|ββββββββββ| 110/120 [00:05<00:00, 18.04it/s]\n 93%|ββββββββββ| 112/120 [00:06<00:00, 18.04it/s]\n 95%|ββββββββββ| 114/120 [00:06<00:00, 18.04it/s]\n 97%|ββββββββββ| 116/120 [00:06<00:00, 18.04it/s]\n 98%|ββββββββββ| 118/120 [00:06<00:00, 18.05it/s]\n100%|ββββββββββ| 120/120 [00:06<00:00, 18.04it/s]\n100%|ββββββββββ| 120/120 [00:06<00:00, 18.49it/s]",
"metrics": {
"predict_time": 267.307416,
"total_time": 437.985221
},
"output": "https://replicate.delivery/pbxt/CCAYLqwVsXJfNq5WnL7tl9alHibYEHzqlqMhThfbwrxaqO8SA/output_video.mp4",
"started_at": "2024-06-07T16:38:40.106805Z",
"status": "succeeded",
"urls": {
"get": "https://api.replicate.com/v1/predictions/qvktt6bg6nrgp0cfye780czr88",
"cancel": "https://api.replicate.com/v1/predictions/qvktt6bg6nrgp0cfye780czr88/cancel"
},
"version": "f3400fd305d761778fea70c22b8951356187d4096ab5fc1a553d1333095cf7c6"
}
/root/.pyenv/versions/3.11.9/lib/python3.11/site-packages/insightface/utils/transform.py:68: FutureWarning: `rcond` parameter will change to the default of machine precision times ``max(M, N)`` where M and N are the input matrix dimensions.
To use the future default and silence this warning we advise to pass `rcond=None`, to keep using the old, explicitly pass `rcond=-1`.
P = np.linalg.lstsq(X_homo, Y)[0].T # Affine matrix. 3 x 4
/src/predict.py:224: UserWarning: Creating a tensor from a list of numpy.ndarrays is extremely slow. Please consider converting the list to a single numpy.ndarray with numpy.array() before converting to a tensor. (Triggered internally at ../torch/csrc/utils/tensor_new.cpp:245.)
kps_sequence = torch.tensor([reference_kps] * video_length)
/src/pipelines/v_express_pipeline.py:516: FutureWarning: Accessing config attribute `in_channels` directly via 'UNet3DConditionModel' object attribute is deprecated. Please access 'in_channels' over 'UNet3DConditionModel's config object instead, e.g. 'unet.config.in_channels'.
num_channels_latents = self.denoising_unet.in_channels
0%| | 0/25 [00:00<?, ?it/s]
4%|β | 1/25 [00:08<03:28, 8.67s/it]
8%|β | 2/25 [00:17<03:18, 8.62s/it]
12%|ββ | 3/25 [00:25<03:09, 8.62s/it]
16%|ββ | 4/25 [00:34<03:01, 8.62s/it]
20%|ββ | 5/25 [00:43<02:52, 8.63s/it]
24%|βββ | 6/25 [00:51<02:44, 8.65s/it]
28%|βββ | 7/25 [01:00<02:36, 8.67s/it]
32%|ββββ | 8/25 [01:09<02:27, 8.68s/it]
36%|ββββ | 9/25 [01:17<02:19, 8.69s/it]
40%|ββββ | 10/25 [01:26<02:10, 8.70s/it]
44%|βββββ | 11/25 [01:35<02:01, 8.71s/it]
48%|βββββ | 12/25 [01:44<01:53, 8.72s/it]
52%|ββββββ | 13/25 [01:52<01:44, 8.73s/it]
56%|ββββββ | 14/25 [02:01<01:36, 8.73s/it]
60%|ββββββ | 15/25 [02:10<01:27, 8.74s/it]
64%|βββββββ | 16/25 [02:19<01:18, 8.75s/it]
68%|βββββββ | 17/25 [02:27<01:10, 8.76s/it]
72%|ββββββββ | 18/25 [02:36<01:01, 8.77s/it]
76%|ββββββββ | 19/25 [02:45<00:52, 8.77s/it]
80%|ββββββββ | 20/25 [02:54<00:43, 8.77s/it]
84%|βββββββββ | 21/25 [03:03<00:35, 8.77s/it]
88%|βββββββββ | 22/25 [03:11<00:26, 8.78s/it]
92%|ββββββββββ| 23/25 [03:20<00:17, 8.78s/it]
96%|ββββββββββ| 24/25 [03:29<00:08, 8.78s/it]
100%|ββββββββββ| 25/25 [03:38<00:00, 8.78s/it]
100%|ββββββββββ| 25/25 [03:38<00:00, 8.73s/it]
0%| | 0/120 [00:00<?, ?it/s]
4%|β | 5/120 [00:00<00:02, 43.24it/s]
8%|β | 10/120 [00:00<00:04, 23.74it/s]
11%|β | 13/120 [00:00<00:04, 21.45it/s]
13%|ββ | 16/120 [00:00<00:05, 20.21it/s]
16%|ββ | 19/120 [00:00<00:05, 19.46it/s]
18%|ββ | 22/120 [00:01<00:05, 18.99it/s]
20%|ββ | 24/120 [00:01<00:05, 18.76it/s]
22%|βββ | 26/120 [00:01<00:05, 18.57it/s]
23%|βββ | 28/120 [00:01<00:04, 18.43it/s]
25%|βββ | 30/120 [00:01<00:04, 18.31it/s]
27%|βββ | 32/120 [00:01<00:04, 18.23it/s]
28%|βββ | 34/120 [00:01<00:04, 18.17it/s]
30%|βββ | 36/120 [00:01<00:04, 18.14it/s]
32%|ββββ | 38/120 [00:01<00:04, 18.11it/s]
33%|ββββ | 40/120 [00:02<00:04, 18.09it/s]
35%|ββββ | 42/120 [00:02<00:04, 18.07it/s]
37%|ββββ | 44/120 [00:02<00:04, 18.06it/s]
38%|ββββ | 46/120 [00:02<00:04, 18.05it/s]
40%|ββββ | 48/120 [00:02<00:03, 18.05it/s]
42%|βββββ | 50/120 [00:02<00:03, 18.04it/s]
43%|βββββ | 52/120 [00:02<00:03, 18.05it/s]
45%|βββββ | 54/120 [00:02<00:03, 18.04it/s]
47%|βββββ | 56/120 [00:02<00:03, 18.04it/s]
48%|βββββ | 58/120 [00:03<00:03, 18.04it/s]
50%|βββββ | 60/120 [00:03<00:03, 18.05it/s]
52%|ββββββ | 62/120 [00:03<00:03, 18.05it/s]
53%|ββββββ | 64/120 [00:03<00:03, 18.04it/s]
55%|ββββββ | 66/120 [00:03<00:02, 18.05it/s]
57%|ββββββ | 68/120 [00:03<00:02, 18.04it/s]
58%|ββββββ | 70/120 [00:03<00:02, 18.05it/s]
60%|ββββββ | 72/120 [00:03<00:02, 18.05it/s]
62%|βββββββ | 74/120 [00:03<00:02, 18.05it/s]
63%|βββββββ | 76/120 [00:04<00:02, 18.03it/s]
65%|βββββββ | 78/120 [00:04<00:02, 18.03it/s]
67%|βββββββ | 80/120 [00:04<00:02, 18.04it/s]
68%|βββββββ | 82/120 [00:04<00:02, 18.05it/s]
70%|βββββββ | 84/120 [00:04<00:01, 18.05it/s]
72%|ββββββββ | 86/120 [00:04<00:01, 18.05it/s]
73%|ββββββββ | 88/120 [00:04<00:01, 18.04it/s]
75%|ββββββββ | 90/120 [00:04<00:01, 18.03it/s]
77%|ββββββββ | 92/120 [00:04<00:01, 18.03it/s]
78%|ββββββββ | 94/120 [00:05<00:01, 18.04it/s]
80%|ββββββββ | 96/120 [00:05<00:01, 18.04it/s]
82%|βββββββββ | 98/120 [00:05<00:01, 18.04it/s]
83%|βββββββββ | 100/120 [00:05<00:01, 18.03it/s]
85%|βββββββββ | 102/120 [00:05<00:00, 18.04it/s]
87%|βββββββββ | 104/120 [00:05<00:00, 18.04it/s]
88%|βββββββββ | 106/120 [00:05<00:00, 18.05it/s]
90%|βββββββββ | 108/120 [00:05<00:00, 18.04it/s]
92%|ββββββββββ| 110/120 [00:05<00:00, 18.04it/s]
93%|ββββββββββ| 112/120 [00:06<00:00, 18.04it/s]
95%|ββββββββββ| 114/120 [00:06<00:00, 18.04it/s]
97%|ββββββββββ| 116/120 [00:06<00:00, 18.04it/s]
98%|ββββββββββ| 118/120 [00:06<00:00, 18.05it/s]
100%|ββββββββββ| 120/120 [00:06<00:00, 18.04it/s]
100%|ββββββββββ| 120/120 [00:06<00:00, 18.49it/s]