typefile
{
"diffusion_inference_steps": 1000,
"key_shift_mode": 0,
"pitch_shift_control": "Auto Shift",
"source_audio": "https://replicate.delivery/pbxt/K5coMzCs7mnhljhRVhdhN29I3RlHPkneVxrbPtyArzxvAVtI/adele.wav",
"target_singer": "Taylor Swift"
}npm install replicate
REPLICATE_API_TOKEN environment variable:export REPLICATE_API_TOKEN=r8_7mM**********************************
This is your API token. Keep it to yourself.
import Replicate from "replicate";
import fs from "node:fs";
const replicate = new Replicate({
auth: process.env.REPLICATE_API_TOKEN,
});
Run lucataco/singing_voice_conversion using Replicate’s API. Check out the model's schema for an overview of inputs and outputs.
const output = await replicate.run(
"lucataco/singing_voice_conversion:f29872ee3557e0186735048f1d6de98a52518ae5c49e19453b3fdaad710bdc2b",
{
input: {
diffusion_inference_steps: 1000,
key_shift_mode: 0,
pitch_shift_control: "Auto Shift",
source_audio: "https://replicate.delivery/pbxt/K5coMzCs7mnhljhRVhdhN29I3RlHPkneVxrbPtyArzxvAVtI/adele.wav",
target_singer: "Taylor Swift"
}
}
);
// To access the file URL:
console.log(output.url()); //=> "http://example.com"
// To write the file to disk:
fs.writeFile("my-image.png", output);
To learn more, take a look at the guide on getting started with Node.js.
pip install replicate
REPLICATE_API_TOKEN environment variable:export REPLICATE_API_TOKEN=r8_7mM**********************************
This is your API token. Keep it to yourself.
import replicate
Run lucataco/singing_voice_conversion using Replicate’s API. Check out the model's schema for an overview of inputs and outputs.
output = replicate.run(
"lucataco/singing_voice_conversion:f29872ee3557e0186735048f1d6de98a52518ae5c49e19453b3fdaad710bdc2b",
input={
"diffusion_inference_steps": 1000,
"key_shift_mode": 0,
"pitch_shift_control": "Auto Shift",
"source_audio": "https://replicate.delivery/pbxt/K5coMzCs7mnhljhRVhdhN29I3RlHPkneVxrbPtyArzxvAVtI/adele.wav",
"target_singer": "Taylor Swift"
}
)
# To access the file URL:
print(output.url())
#=> "http://example.com"
# To write the file to disk:
with open("my-image.png", "wb") as file:
file.write(output.read())
To learn more, take a look at the guide on getting started with Python.
REPLICATE_API_TOKEN environment variable:export REPLICATE_API_TOKEN=r8_7mM**********************************
This is your API token. Keep it to yourself.
Run lucataco/singing_voice_conversion using Replicate’s API. Check out the model's schema for an overview of inputs and outputs.
curl -s -X POST \
-H "Authorization: Bearer $REPLICATE_API_TOKEN" \
-H "Content-Type: application/json" \
-H "Prefer: wait" \
-d $'{
"version": "lucataco/singing_voice_conversion:f29872ee3557e0186735048f1d6de98a52518ae5c49e19453b3fdaad710bdc2b",
"input": {
"diffusion_inference_steps": 1000,
"key_shift_mode": 0,
"pitch_shift_control": "Auto Shift",
"source_audio": "https://replicate.delivery/pbxt/K5coMzCs7mnhljhRVhdhN29I3RlHPkneVxrbPtyArzxvAVtI/adele.wav",
"target_singer": "Taylor Swift"
}
}' \
https://api.replicate.com/v1/predictions
To learn more, take a look at Replicate’s HTTP API reference docs.
{
"id": "h37sr5dbojsspt56c34pvjanoe",
"model": "lucataco/singing_voice_conversion",
"version": "f29872ee3557e0186735048f1d6de98a52518ae5c49e19453b3fdaad710bdc2b",
"input": {
"diffusion_inference_steps": 1000,
"key_shift_mode": 0,
"pitch_shift_control": "Auto Shift",
"source_audio": "https://replicate.delivery/pbxt/K5coMzCs7mnhljhRVhdhN29I3RlHPkneVxrbPtyArzxvAVtI/adele.wav",
"target_singer": "Taylor Swift"
},
"logs": "/tmp/input_audio\nvocalist_l1_TaylorSwift\nautoshift\ngetopt: unrecognized option '--diffusion_inference_steps'\nExprimental Configuration File: ckpts/svc/vocalist_l1_contentvec+whisper/args.json\nThe following values were not passed to `accelerate launch` and had defaults used instead:\n`--num_processes` was set to a value of `1`\n`--num_machines` was set to a value of `1`\n`--mixed_precision` was set to a value of `'no'`\n`--dynamo_backend` was set to a value of `'no'`\nTo avoid this warning pass in values for each of the problematic parameters or run `accelerate config`.\nMonotonic align not found. Please make sure you have compiled it.\nThere are 1 source audios:\n**********\nConversion for source...\nPrepare for meta eval data: 0.0s\n 0%| | 0/1 [00:00<?, ?it/s]\n 0%| | 0/1 [00:00<?, ?it/s]\u001b[A\n100%|██████████| 1/1 [00:01<00:00, 1.98s/it]\u001b[A\n100%|██████████| 1/1 [00:01<00:00, 1.98s/it]\nPrepare for acoustic features: 2.0s\nPrepare for content features: 0.0s\n2023-12-21 22:37:31 | INFO | inference | ========================================================\n2023-12-21 22:37:31 | INFO | inference | ||\t\tNew inference process started.\t\t||\n2023-12-21 22:37:31 | INFO | inference | ========================================================\n2023-12-21 22:37:31 | INFO | inference |\n2023-12-21 22:37:31 | DEBUG | inference | Using DEBUG logging level.\n2023-12-21 22:37:31 | DEBUG | inference | Acoustic dir: ckpts/svc/vocalist_l1_contentvec+whisper\n2023-12-21 22:37:31 | DEBUG | inference | Vocoder dir: pretrained/bigvgan\n2023-12-21 22:37:31 | DEBUG | inference | Setting random seed done in 0.83ms\n2023-12-21 22:37:31 | DEBUG | inference | Random seed: 10086\n2023-12-21 22:37:31 | INFO | inference | Building dataset...\n2023-12-21 22:37:31 | INFO | inference | Building dataset done in 4.60ms\n2023-12-21 22:37:31 | INFO | inference | Building model...\n2023-12-21 22:37:31 | INFO | inference | Building model done in 276.183ms\n2023-12-21 22:37:31 | INFO | inference | Initializing accelerate...\n2023-12-21 22:37:32 | INFO | inference | Initializing accelerate done in 1057.268ms\n2023-12-21 22:37:32 | INFO | inference | Loading checkpoint...\n2023-12-21 22:37:32 | INFO | accelerate.accelerator | Loading states from ckpts/svc/vocalist_l1_contentvec+whisper/checkpoint/epoch-6852_step-0678447_loss-1.946773\n2023-12-21 22:37:32 | INFO | accelerate.checkpointing | All model weights loaded successfully\n2023-12-21 22:37:32 | INFO | accelerate.checkpointing | All optimizer states loaded successfully\n2023-12-21 22:37:32 | INFO | accelerate.checkpointing | All scheduler states loaded successfully\n2023-12-21 22:37:32 | INFO | accelerate.checkpointing | All dataloader sampler states loaded successfully\n2023-12-21 22:37:32 | INFO | accelerate.checkpointing | All random states loaded successfully\n2023-12-21 22:37:32 | INFO | accelerate.accelerator | Loading in 0 custom states\n2023-12-21 22:37:32 | INFO | inference | Loading checkpoint done in 106.015ms\n2023-12-21 22:37:32 | INFO | inference | Using PNDM scheduler.\nModel Init: 1.5s\nAuto transposing: source f0 median = 372.9, target f0 median = 286.9, factor = 0.77\n 0%| | 0/1009 [00:00<?, ?it/s]\u001b[A\n 0%| | 1/1009 [00:02<39:02, 2.32s/it]\u001b[A\n 2%|▏ | 20/1009 [00:02<01:26, 11.39it/s]\u001b[A\n 4%|▍ | 39/1009 [00:02<00:38, 25.02it/s]\u001b[A\n 6%|▌ | 58/1009 [00:02<00:23, 41.23it/s]\u001b[A\n 8%|▊ | 77/1009 [00:02<00:15, 59.36it/s]\u001b[A\n 10%|▉ | 96/1009 [00:02<00:11, 78.69it/s]\u001b[A\n 11%|█▏ | 115/1009 [00:02<00:09, 98.15it/s]\u001b[A\n 13%|█▎ | 134/1009 [00:03<00:07, 116.43it/s]\u001b[A\n 15%|█▌ | 153/1009 [00:03<00:06, 132.68it/s]\u001b[A\n 17%|█▋ | 172/1009 [00:03<00:05, 146.29it/s]\u001b[A\n 19%|█▉ | 191/1009 [00:03<00:05, 157.09it/s]\u001b[A\n 21%|██ | 210/1009 [00:03<00:04, 164.64it/s]\u001b[A\n 23%|██▎ | 229/1009 [00:03<00:04, 170.24it/s]\u001b[A\n 25%|██▍ | 248/1009 [00:03<00:04, 174.54it/s]\u001b[A\n 26%|██▋ | 267/1009 [00:03<00:04, 176.83it/s]\u001b[A\n 28%|██▊ | 286/1009 [00:03<00:04, 178.76it/s]\u001b[A\n 30%|███ | 305/1009 [00:03<00:03, 180.21it/s]\u001b[A\n 32%|███▏ | 324/1009 [00:04<00:03, 179.83it/s]\u001b[A\n 34%|███▍ | 343/1009 [00:04<00:03, 179.98it/s]\u001b[A\n 36%|███▌ | 362/1009 [00:04<00:03, 181.63it/s]\u001b[A\n 38%|███▊ | 381/1009 [00:04<00:03, 181.04it/s]\u001b[A\n 40%|███▉ | 400/1009 [00:04<00:03, 182.13it/s]\u001b[A\n 42%|████▏ | 419/1009 [00:04<00:03, 182.37it/s]\u001b[A\n 43%|████▎ | 438/1009 [00:04<00:03, 183.85it/s]\u001b[A\n 45%|████▌ | 457/1009 [00:04<00:02, 184.98it/s]\u001b[A\n 47%|████▋ | 476/1009 [00:04<00:02, 185.91it/s]\u001b[A\n 49%|████▉ | 495/1009 [00:04<00:02, 186.25it/s]\u001b[A\n 51%|█████ | 514/1009 [00:05<00:02, 187.04it/s]\u001b[A\n 53%|█████▎ | 533/1009 [00:05<00:02, 187.69it/s]\u001b[A\n 55%|█████▍ | 552/1009 [00:05<00:02, 188.32it/s]\u001b[A\n 57%|█████▋ | 571/1009 [00:05<00:02, 188.13it/s]\u001b[A\n 58%|█████▊ | 590/1009 [00:05<00:02, 188.37it/s]\u001b[A\n 60%|██████ | 609/1009 [00:05<00:02, 188.66it/s]\u001b[A\n 62%|██████▏ | 628/1009 [00:05<00:02, 188.88it/s]\u001b[A\n 64%|██████▍ | 647/1009 [00:05<00:01, 188.97it/s]\u001b[A\n 66%|██████▌ | 666/1009 [00:05<00:01, 188.77it/s]\u001b[A\n 68%|██████▊ | 685/1009 [00:05<00:01, 188.38it/s]\u001b[A\n 70%|██████▉ | 704/1009 [00:06<00:01, 188.63it/s]\u001b[A\n 72%|███████▏ | 723/1009 [00:06<00:01, 188.84it/s]\u001b[A\n 74%|███████▎ | 742/1009 [00:06<00:01, 189.15it/s]\u001b[A\n 75%|███████▌ | 761/1009 [00:06<00:01, 188.98it/s]\u001b[A\n 77%|███████▋ | 780/1009 [00:06<00:01, 189.16it/s]\u001b[A\n 79%|███████▉ | 799/1009 [00:06<00:01, 186.62it/s]\u001b[A\n 81%|████████ | 819/1009 [00:06<00:01, 188.31it/s]\u001b[A\n 83%|████████▎ | 838/1009 [00:06<00:00, 185.22it/s]\u001b[A\n 85%|████████▍ | 857/1009 [00:06<00:00, 186.46it/s]\u001b[A\n 87%|████████▋ | 877/1009 [00:07<00:00, 188.15it/s]\u001b[A\n 89%|████████▉ | 897/1009 [00:07<00:00, 188.85it/s]\u001b[A\n 91%|█████████ | 917/1009 [00:07<00:00, 189.80it/s]\u001b[A\n 93%|█████████▎| 937/1009 [00:07<00:00, 190.56it/s]\u001b[A\n 95%|█████████▍| 957/1009 [00:07<00:00, 190.87it/s]\u001b[A\n 97%|█████████▋| 977/1009 [00:07<00:00, 191.29it/s]\u001b[A\n 99%|█████████▉| 997/1009 [00:07<00:00, 190.46it/s]\u001b[A\n100%|██████████| 1009/1009 [00:07<00:00, 130.99it/s]\nSynthesis audios using bigvgan vocoder...\nLoading Vocoder from Weights file: /src/Amphion/pretrained/bigvgan/400000.pt\nFor predicted mels, #sample = 1...\nModel inference: 14.1s\n100%|██████████| 1/1 [00:17<00:00, 17.56s/it]\n100%|██████████| 1/1 [00:17<00:00, 17.56s/it]\n/src/Amphion/result/source/source_vocalist_l1_TaylorSwift.wav",
"output": "https://replicate.delivery/pbxt/FoHobqVw0mrPOluLgRQGEW01GwDo5tvSefKNDyc79hY8AnESA/source_vocalist_l1_TaylorSwift.wav",
"data_removed": false,
"error": null,
"source": "web",
"status": "succeeded",
"created_at": "2023-12-21T22:35:12.670504Z",
"started_at": "2023-12-21T22:37:17.68907Z",
"completed_at": "2023-12-21T22:37:49.201677Z",
"urls": {
"cancel": "https://api.replicate.com/v1/predictions/h37sr5dbojsspt56c34pvjanoe/cancel",
"get": "https://api.replicate.com/v1/predictions/h37sr5dbojsspt56c34pvjanoe"
},
"metrics": {
"predict_time": 31.512607,
"total_time": 156.531173
}
}