Readme
This model doesn't have a readme.
Run this model in Node.js with one line of code:
npm install replicate
REPLICATE_API_TOKEN
environment variable:export REPLICATE_API_TOKEN=<paste-your-token-here>
Find your API token in your account settings.
import Replicate from "replicate";
const replicate = new Replicate({
auth: process.env.REPLICATE_API_TOKEN,
});
Run datong-new/rvc using Replicate’s API. Check out the model's schema for an overview of inputs and outputs.
const output = await replicate.run(
"datong-new/rvc:5da9f66869beacc8f2484215e25c88053acfe24044d64d4b26bbc40f7b5428dc",
{
input: {
f0_up_key: 8,
operation: "train_infer",
accompaniment: true,
audio_for_infer: "https://replicate.delivery/pbxt/KSuE9SkdHEsXPUlZmWj4kMwRNZwJ0CR0EFTzjJAwVqKY8brY/1.wav",
audio_for_train: "https://replicate.delivery/pbxt/KSuE9M9iVBPPXjGWfkGpiwD9iZOlHSwAwVmX0vHaA2hJ41Ca/wobunanguo.flac"
}
}
);
console.log(output);
To learn more, take a look at the guide on getting started with Node.js.
pip install replicate
REPLICATE_API_TOKEN
environment variable:export REPLICATE_API_TOKEN=<paste-your-token-here>
Find your API token in your account settings.
import replicate
Run datong-new/rvc using Replicate’s API. Check out the model's schema for an overview of inputs and outputs.
output = replicate.run(
"datong-new/rvc:5da9f66869beacc8f2484215e25c88053acfe24044d64d4b26bbc40f7b5428dc",
input={
"f0_up_key": 8,
"operation": "train_infer",
"accompaniment": True,
"audio_for_infer": "https://replicate.delivery/pbxt/KSuE9SkdHEsXPUlZmWj4kMwRNZwJ0CR0EFTzjJAwVqKY8brY/1.wav",
"audio_for_train": "https://replicate.delivery/pbxt/KSuE9M9iVBPPXjGWfkGpiwD9iZOlHSwAwVmX0vHaA2hJ41Ca/wobunanguo.flac"
}
)
print(output)
To learn more, take a look at the guide on getting started with Python.
REPLICATE_API_TOKEN
environment variable:export REPLICATE_API_TOKEN=<paste-your-token-here>
Find your API token in your account settings.
Run datong-new/rvc using Replicate’s API. Check out the model's schema for an overview of inputs and outputs.
curl -s -X POST \
-H "Authorization: Bearer $REPLICATE_API_TOKEN" \
-H "Content-Type: application/json" \
-H "Prefer: wait" \
-d $'{
"version": "datong-new/rvc:5da9f66869beacc8f2484215e25c88053acfe24044d64d4b26bbc40f7b5428dc",
"input": {
"f0_up_key": 8,
"operation": "train_infer",
"accompaniment": true,
"audio_for_infer": "https://replicate.delivery/pbxt/KSuE9SkdHEsXPUlZmWj4kMwRNZwJ0CR0EFTzjJAwVqKY8brY/1.wav",
"audio_for_train": "https://replicate.delivery/pbxt/KSuE9M9iVBPPXjGWfkGpiwD9iZOlHSwAwVmX0vHaA2hJ41Ca/wobunanguo.flac"
}
}' \
https://api.replicate.com/v1/predictions
To learn more, take a look at Replicate’s HTTP API reference docs.
Add a payment method to run this model.
By signing in, you agree to our
terms of service and privacy policy
ckpt_path
default.pthcloned_audio
This is a modal window.
Beginning of dialog window. Escape will cancel and close the window.
End of dialog window.
{
"completed_at": "2024-02-25T12:51:58.322834Z",
"created_at": "2024-02-25T12:44:50.386033Z",
"data_removed": false,
"error": null,
"id": "kwohmxzbqtrk2ssyxusm2wf4sy",
"input": {
"f0_up_key": 8,
"operation": "train_infer",
"accompaniment": true,
"audio_for_infer": "https://replicate.delivery/pbxt/KSuE9SkdHEsXPUlZmWj4kMwRNZwJ0CR0EFTzjJAwVqKY8brY/1.wav",
"audio_for_train": "https://replicate.delivery/pbxt/KSuE9M9iVBPPXjGWfkGpiwD9iZOlHSwAwVmX0vHaA2hJ41Ca/wobunanguo.flac"
},
"logs": "0%| | 0/116 [00:00<?, ?it/s]\n 1%| | 1/116 [00:01<02:31, 1.32s/it]\n 3%|▎ | 3/116 [00:01<00:46, 2.44it/s]\n 4%|▍ | 5/116 [00:01<00:26, 4.19it/s]\n 6%|▌ | 7/116 [00:01<00:17, 6.09it/s]\n 8%|▊ | 9/116 [00:01<00:13, 7.95it/s]\n 9%|▉ | 11/116 [00:02<00:10, 9.62it/s]\n 11%|█ | 13/116 [00:02<00:09, 11.13it/s]\n 13%|█▎ | 15/116 [00:02<00:08, 12.23it/s]\n 15%|█▍ | 17/116 [00:02<00:07, 13.11it/s]\n 16%|█▋ | 19/116 [00:02<00:06, 13.90it/s]\n 18%|█▊ | 21/116 [00:02<00:06, 14.54it/s]\n 20%|█▉ | 23/116 [00:02<00:06, 14.98it/s]\n 22%|██▏ | 25/116 [00:02<00:05, 15.21it/s]\n 23%|██▎ | 27/116 [00:03<00:05, 15.31it/s]\n 25%|██▌ | 29/116 [00:03<00:05, 15.47it/s]\n 27%|██▋ | 31/116 [00:03<00:05, 15.77it/s]\n 28%|██▊ | 33/116 [00:03<00:05, 15.90it/s]\n 30%|███ | 35/116 [00:03<00:05, 16.05it/s]\n 32%|███▏ | 37/116 [00:03<00:04, 16.16it/s]\n 34%|███▎ | 39/116 [00:03<00:04, 16.15it/s]\n 35%|███▌ | 41/116 [00:03<00:04, 15.93it/s]\n 37%|███▋ | 43/116 [00:04<00:04, 15.79it/s]\n 39%|███▉ | 45/116 [00:04<00:04, 15.85it/s]\n 41%|████ | 47/116 [00:04<00:04, 15.82it/s]\n 42%|████▏ | 49/116 [00:04<00:04, 15.92it/s]\n 44%|████▍ | 51/116 [00:04<00:04, 15.89it/s]\n 46%|████▌ | 53/116 [00:04<00:03, 15.96it/s]\n 47%|████▋ | 55/116 [00:04<00:03, 15.96it/s]\n 49%|████▉ | 57/116 [00:04<00:03, 15.92it/s]\n 51%|█████ | 59/116 [00:05<00:03, 15.87it/s]\n 53%|█████▎ | 61/116 [00:05<00:03, 15.94it/s]\n 54%|█████▍ | 63/116 [00:05<00:03, 16.00it/s]\n 56%|█████▌ | 65/116 [00:05<00:03, 16.06it/s]\n 58%|█████▊ | 67/116 [00:05<00:03, 16.01it/s]\n 59%|█████▉ | 69/116 [00:05<00:02, 16.05it/s]\n 61%|██████ | 71/116 [00:05<00:02, 16.06it/s]\n 63%|██████▎ | 73/116 [00:05<00:02, 16.09it/s]\n 65%|██████▍ | 75/116 [00:06<00:02, 15.72it/s]\n 66%|██████▋ | 77/116 [00:06<00:02, 15.75it/s]\n 68%|██████▊ | 79/116 [00:06<00:02, 15.66it/s]\n 70%|██████▉ | 81/116 [00:06<00:02, 15.75it/s]\n 72%|███████▏ | 83/116 [00:06<00:02, 15.84it/s]\n 73%|███████▎ | 85/116 [00:06<00:01, 15.84it/s]\n 75%|███████▌ | 87/116 [00:06<00:01, 15.81it/s]\n 77%|███████▋ | 89/116 [00:06<00:01, 15.84it/s]\n 78%|███████▊ | 91/116 [00:07<00:01, 15.84it/s]\n 80%|████████ | 93/116 [00:07<00:01, 15.91it/s]\n 82%|████████▏ | 95/116 [00:07<00:01, 15.86it/s]\n 84%|████████▎ | 97/116 [00:07<00:01, 15.89it/s]\n 85%|████████▌ | 99/116 [00:07<00:01, 15.87it/s]\n 87%|████████▋ | 101/116 [00:07<00:00, 15.92it/s]\n 89%|████████▉ | 103/116 [00:07<00:00, 15.79it/s]\n 91%|█████████ | 105/116 [00:07<00:00, 15.36it/s]\n 92%|█████████▏| 107/116 [00:08<00:00, 15.20it/s]\n 94%|█████████▍| 109/116 [00:08<00:00, 14.89it/s]\n 96%|█████████▌| 111/116 [00:08<00:00, 14.91it/s]\n 97%|█████████▋| 113/116 [00:08<00:00, 14.87it/s]\n 99%|█████████▉| 115/116 [00:08<00:00, 14.91it/s]\n100%|██████████| 116/116 [00:08<00:00, 13.33it/s]\n2024-02-25 12:49:19 | INFO | fairseq.tasks.hubert_pretraining | current directory is /src\n2024-02-25 12:49:19 | INFO | fairseq.tasks.hubert_pretraining | HubertPretrainingTask Config {'_name': 'hubert_pretraining', 'data': 'metadata', 'fine_tuning': False, 'labels': ['km'], 'label_dir': 'label', 'label_rate': 50.0, 'sample_rate': 16000, 'normalize': False, 'enable_padding': False, 'max_keep_size': None, 'max_sample_size': 250000, 'min_sample_size': 32000, 'single_target': False, 'random_crop': True, 'pad_audio': False}\n2024-02-25 12:49:19 | INFO | fairseq.models.hubert.hubert | HubertModel Config: {'_name': 'hubert', 'label_rate': 50.0, 'extractor_mode': default, 'encoder_layers': 12, 'encoder_embed_dim': 768, 'encoder_ffn_embed_dim': 3072, 'encoder_attention_heads': 12, 'activation_fn': gelu, 'layer_type': transformer, 'dropout': 0.1, 'attention_dropout': 0.1, 'activation_dropout': 0.0, 'encoder_layerdrop': 0.05, 'dropout_input': 0.1, 'dropout_features': 0.1, 'final_dim': 256, 'untie_final_proj': True, 'layer_norm_first': False, 'conv_feature_layers': '[(512,10,5)] + [(512,3,2)] * 4 + [(512,2,2)] * 2', 'conv_bias': False, 'logit_temp': 0.1, 'target_glu': False, 'feature_grad_mult': 0.1, 'mask_length': 10, 'mask_prob': 0.8, 'mask_selection': static, 'mask_other': 0.0, 'no_mask_overlap': False, 'mask_min_space': 1, 'mask_channel_length': 10, 'mask_channel_prob': 0.0, 'mask_channel_selection': static, 'mask_channel_other': 0.0, 'no_mask_channel_overlap': False, 'mask_channel_min_space': 1, 'conv_pos': 128, 'conv_pos_groups': 16, 'latent_temp': [2.0, 0.5, 0.999995], 'skip_masked': False, 'skip_nomask': False, 'checkpoint_activations': False, 'required_seq_len_multiple': 2, 'depthwise_conv_kernel_size': 31, 'attn_type': '', 'pos_enc_type': 'abs', 'fp16': False}\n/root/.pyenv/versions/3.8.10/lib/python3.8/site-packages/torch/nn/utils/weight_norm.py:28: UserWarning: torch.nn.utils.weight_norm is deprecated in favor of torch.nn.utils.parametrizations.weight_norm.\nwarnings.warn(\"torch.nn.utils.weight_norm is deprecated in favor of torch.nn.utils.parametrizations.weight_norm.\")\n/root/.pyenv/versions/3.8.10/lib/python3.8/site-packages/torch/nn/utils/weight_norm.py:28: UserWarning: torch.nn.utils.weight_norm is deprecated in favor of torch.nn.utils.parametrizations.weight_norm.\nwarnings.warn(\"torch.nn.utils.weight_norm is deprecated in favor of torch.nn.utils.parametrizations.weight_norm.\")\n/root/.pyenv/versions/3.8.10/lib/python3.8/site-packages/torch/functional.py:660: UserWarning: stft with return_complex=False is deprecated. In a future pytorch release, stft will return complex tensors for all inputs, and return_complex=False will raise an error.\nNote: you can still call torch.view_as_real on the complex output to recover the old return format. (Triggered internally at ../aten/src/ATen/native/SpectralOps.cpp:874.)\nreturn _VF.stft(input, n_fft, hop_length, win_length, window, # type: ignore[attr-defined]\n/root/.pyenv/versions/3.8.10/lib/python3.8/site-packages/torch/functional.py:660: UserWarning: stft with return_complex=False is deprecated. In a future pytorch release, stft will return complex tensors for all inputs, and return_complex=False will raise an error.\nNote: you can still call torch.view_as_real on the complex output to recover the old return format. (Triggered internally at ../aten/src/ATen/native/SpectralOps.cpp:874.)\nreturn _VF.stft(input, n_fft, hop_length, win_length, window, # type: ignore[attr-defined]\n/root/.pyenv/versions/3.8.10/lib/python3.8/site-packages/torch/functional.py:660: UserWarning: stft with return_complex=False is deprecated. In a future pytorch release, stft will return complex tensors for all inputs, and return_complex=False will raise an error.\nNote: you can still call torch.view_as_real on the complex output to recover the old return format. (Triggered internally at ../aten/src/ATen/native/SpectralOps.cpp:874.)\nreturn _VF.stft(input, n_fft, hop_length, win_length, window, # type: ignore[attr-defined]\n/root/.pyenv/versions/3.8.10/lib/python3.8/site-packages/torch/functional.py:660: UserWarning: stft with return_complex=False is deprecated. In a future pytorch release, stft will return complex tensors for all inputs, and return_complex=False will raise an error.\nNote: you can still call torch.view_as_real on the complex output to recover the old return format. (Triggered internally at ../aten/src/ATen/native/SpectralOps.cpp:874.)\nreturn _VF.stft(input, n_fft, hop_length, win_length, window, # type: ignore[attr-defined]\n/root/.pyenv/versions/3.8.10/lib/python3.8/site-packages/torch/functional.py:660: UserWarning: stft with return_complex=False is deprecated. In a future pytorch release, stft will return complex tensors for all inputs, and return_complex=False will raise an error.\nNote: you can still call torch.view_as_real on the complex output to recover the old return format. (Triggered internally at ../aten/src/ATen/native/SpectralOps.cpp:874.)\nreturn _VF.stft(input, n_fft, hop_length, win_length, window, # type: ignore[attr-defined]\n/root/.pyenv/versions/3.8.10/lib/python3.8/site-packages/torch/autograd/__init__.py:266: UserWarning: Grad strides do not match bucket view strides. This may indicate grad was not created according to the gradient layout contract, or that the param's strides changed since DDP was constructed. This is not an error, but may impair performance.\ngrad.sizes() = [64, 1, 4], strides() = [4, 1, 1]\nbucket_view.sizes() = [64, 1, 4], strides() = [4, 4, 1] (Triggered internally at ../torch/csrc/distributed/c10d/reducer.cpp:322.)\nVariable._execution_engine.run_backward( # Calls into the C++ engine to run the backward pass\n/root/.pyenv/versions/3.8.10/lib/python3.8/multiprocessing/resource_tracker.py:216: UserWarning: resource_tracker: There appear to be 20 leaked semaphore objects to clean up at shutdown\nwarnings.warn('resource_tracker: There appear to be %d '\n 0%| | 0/65 [00:00<?, ?it/s]\n 2%|▏ | 1/65 [00:00<00:06, 9.82it/s]\n 5%|▍ | 3/65 [00:00<00:05, 11.97it/s]\n 8%|▊ | 5/65 [00:00<00:04, 13.09it/s]\n 11%|█ | 7/65 [00:00<00:04, 13.78it/s]\n 14%|█▍ | 9/65 [00:00<00:03, 14.08it/s]\n 17%|█▋ | 11/65 [00:00<00:03, 14.28it/s]\n 20%|██ | 13/65 [00:00<00:03, 14.47it/s]\n 23%|██▎ | 15/65 [00:01<00:03, 14.56it/s]\n 26%|██▌ | 17/65 [00:01<00:03, 14.66it/s]\n 29%|██▉ | 19/65 [00:01<00:03, 14.64it/s]\n 32%|███▏ | 21/65 [00:01<00:02, 14.76it/s]\n 35%|███▌ | 23/65 [00:01<00:02, 14.84it/s]\n 38%|███▊ | 25/65 [00:01<00:02, 14.95it/s]\n 42%|████▏ | 27/65 [00:01<00:02, 15.00it/s]\n 45%|████▍ | 29/65 [00:02<00:02, 15.02it/s]\n 48%|████▊ | 31/65 [00:02<00:02, 14.98it/s]\n 51%|█████ | 33/65 [00:02<00:02, 14.79it/s]\n 54%|█████▍ | 35/65 [00:02<00:02, 14.89it/s]\n 57%|█████▋ | 37/65 [00:02<00:01, 14.84it/s]\n 60%|██████ | 39/65 [00:02<00:01, 14.79it/s]\n 63%|██████▎ | 41/65 [00:02<00:01, 14.79it/s]\n 66%|██████▌ | 43/65 [00:02<00:01, 14.84it/s]\n 69%|██████▉ | 45/65 [00:03<00:01, 14.84it/s]\n 72%|███████▏ | 47/65 [00:03<00:01, 14.78it/s]\n 75%|███████▌ | 49/65 [00:03<00:01, 14.78it/s]\n 78%|███████▊ | 51/65 [00:03<00:00, 14.82it/s]\n 82%|████████▏ | 53/65 [00:03<00:00, 14.93it/s]\n 85%|████████▍ | 55/65 [00:03<00:00, 14.89it/s]\n 88%|████████▊ | 57/65 [00:03<00:00, 14.88it/s]\n 91%|█████████ | 59/65 [00:04<00:00, 14.90it/s]\n 94%|█████████▍| 61/65 [00:04<00:00, 14.90it/s]\n 97%|█████████▋| 63/65 [00:04<00:00, 14.98it/s]\n100%|██████████| 65/65 [00:04<00:00, 14.97it/s]\n100%|██████████| 65/65 [00:04<00:00, 14.67it/s]",
"metrics": {
"predict_time": 262.949361,
"total_time": 427.936801
},
"output": {
"ckpt_path": "https://replicate.delivery/pbxt/V3fTVi4SctXuQSxtwkw57z1fJdHeSzJeUXJl1DvlmG02e0RTC/default.pth",
"cloned_audio": "https://replicate.delivery/pbxt/dIl0M30PMzaTLpGgF2v4BpZAqhBOMESzTcVTylAFpmW7pjmE/audio_cloned.wav"
},
"started_at": "2024-02-25T12:47:35.373473Z",
"status": "succeeded",
"urls": {
"get": "https://api.replicate.com/v1/predictions/kwohmxzbqtrk2ssyxusm2wf4sy",
"cancel": "https://api.replicate.com/v1/predictions/kwohmxzbqtrk2ssyxusm2wf4sy/cancel"
},
"version": "07402961aee9e589b0d5fd05368158da3f8df772849c95c2a08c2e3d031fd19a"
}
0%| | 0/116 [00:00<?, ?it/s]
1%| | 1/116 [00:01<02:31, 1.32s/it]
3%|▎ | 3/116 [00:01<00:46, 2.44it/s]
4%|▍ | 5/116 [00:01<00:26, 4.19it/s]
6%|▌ | 7/116 [00:01<00:17, 6.09it/s]
8%|▊ | 9/116 [00:01<00:13, 7.95it/s]
9%|▉ | 11/116 [00:02<00:10, 9.62it/s]
11%|█ | 13/116 [00:02<00:09, 11.13it/s]
13%|█▎ | 15/116 [00:02<00:08, 12.23it/s]
15%|█▍ | 17/116 [00:02<00:07, 13.11it/s]
16%|█▋ | 19/116 [00:02<00:06, 13.90it/s]
18%|█▊ | 21/116 [00:02<00:06, 14.54it/s]
20%|█▉ | 23/116 [00:02<00:06, 14.98it/s]
22%|██▏ | 25/116 [00:02<00:05, 15.21it/s]
23%|██▎ | 27/116 [00:03<00:05, 15.31it/s]
25%|██▌ | 29/116 [00:03<00:05, 15.47it/s]
27%|██▋ | 31/116 [00:03<00:05, 15.77it/s]
28%|██▊ | 33/116 [00:03<00:05, 15.90it/s]
30%|███ | 35/116 [00:03<00:05, 16.05it/s]
32%|███▏ | 37/116 [00:03<00:04, 16.16it/s]
34%|███▎ | 39/116 [00:03<00:04, 16.15it/s]
35%|███▌ | 41/116 [00:03<00:04, 15.93it/s]
37%|███▋ | 43/116 [00:04<00:04, 15.79it/s]
39%|███▉ | 45/116 [00:04<00:04, 15.85it/s]
41%|████ | 47/116 [00:04<00:04, 15.82it/s]
42%|████▏ | 49/116 [00:04<00:04, 15.92it/s]
44%|████▍ | 51/116 [00:04<00:04, 15.89it/s]
46%|████▌ | 53/116 [00:04<00:03, 15.96it/s]
47%|████▋ | 55/116 [00:04<00:03, 15.96it/s]
49%|████▉ | 57/116 [00:04<00:03, 15.92it/s]
51%|█████ | 59/116 [00:05<00:03, 15.87it/s]
53%|█████▎ | 61/116 [00:05<00:03, 15.94it/s]
54%|█████▍ | 63/116 [00:05<00:03, 16.00it/s]
56%|█████▌ | 65/116 [00:05<00:03, 16.06it/s]
58%|█████▊ | 67/116 [00:05<00:03, 16.01it/s]
59%|█████▉ | 69/116 [00:05<00:02, 16.05it/s]
61%|██████ | 71/116 [00:05<00:02, 16.06it/s]
63%|██████▎ | 73/116 [00:05<00:02, 16.09it/s]
65%|██████▍ | 75/116 [00:06<00:02, 15.72it/s]
66%|██████▋ | 77/116 [00:06<00:02, 15.75it/s]
68%|██████▊ | 79/116 [00:06<00:02, 15.66it/s]
70%|██████▉ | 81/116 [00:06<00:02, 15.75it/s]
72%|███████▏ | 83/116 [00:06<00:02, 15.84it/s]
73%|███████▎ | 85/116 [00:06<00:01, 15.84it/s]
75%|███████▌ | 87/116 [00:06<00:01, 15.81it/s]
77%|███████▋ | 89/116 [00:06<00:01, 15.84it/s]
78%|███████▊ | 91/116 [00:07<00:01, 15.84it/s]
80%|████████ | 93/116 [00:07<00:01, 15.91it/s]
82%|████████▏ | 95/116 [00:07<00:01, 15.86it/s]
84%|████████▎ | 97/116 [00:07<00:01, 15.89it/s]
85%|████████▌ | 99/116 [00:07<00:01, 15.87it/s]
87%|████████▋ | 101/116 [00:07<00:00, 15.92it/s]
89%|████████▉ | 103/116 [00:07<00:00, 15.79it/s]
91%|█████████ | 105/116 [00:07<00:00, 15.36it/s]
92%|█████████▏| 107/116 [00:08<00:00, 15.20it/s]
94%|█████████▍| 109/116 [00:08<00:00, 14.89it/s]
96%|█████████▌| 111/116 [00:08<00:00, 14.91it/s]
97%|█████████▋| 113/116 [00:08<00:00, 14.87it/s]
99%|█████████▉| 115/116 [00:08<00:00, 14.91it/s]
100%|██████████| 116/116 [00:08<00:00, 13.33it/s]
2024-02-25 12:49:19 | INFO | fairseq.tasks.hubert_pretraining | current directory is /src
2024-02-25 12:49:19 | INFO | fairseq.tasks.hubert_pretraining | HubertPretrainingTask Config {'_name': 'hubert_pretraining', 'data': 'metadata', 'fine_tuning': False, 'labels': ['km'], 'label_dir': 'label', 'label_rate': 50.0, 'sample_rate': 16000, 'normalize': False, 'enable_padding': False, 'max_keep_size': None, 'max_sample_size': 250000, 'min_sample_size': 32000, 'single_target': False, 'random_crop': True, 'pad_audio': False}
2024-02-25 12:49:19 | INFO | fairseq.models.hubert.hubert | HubertModel Config: {'_name': 'hubert', 'label_rate': 50.0, 'extractor_mode': default, 'encoder_layers': 12, 'encoder_embed_dim': 768, 'encoder_ffn_embed_dim': 3072, 'encoder_attention_heads': 12, 'activation_fn': gelu, 'layer_type': transformer, 'dropout': 0.1, 'attention_dropout': 0.1, 'activation_dropout': 0.0, 'encoder_layerdrop': 0.05, 'dropout_input': 0.1, 'dropout_features': 0.1, 'final_dim': 256, 'untie_final_proj': True, 'layer_norm_first': False, 'conv_feature_layers': '[(512,10,5)] + [(512,3,2)] * 4 + [(512,2,2)] * 2', 'conv_bias': False, 'logit_temp': 0.1, 'target_glu': False, 'feature_grad_mult': 0.1, 'mask_length': 10, 'mask_prob': 0.8, 'mask_selection': static, 'mask_other': 0.0, 'no_mask_overlap': False, 'mask_min_space': 1, 'mask_channel_length': 10, 'mask_channel_prob': 0.0, 'mask_channel_selection': static, 'mask_channel_other': 0.0, 'no_mask_channel_overlap': False, 'mask_channel_min_space': 1, 'conv_pos': 128, 'conv_pos_groups': 16, 'latent_temp': [2.0, 0.5, 0.999995], 'skip_masked': False, 'skip_nomask': False, 'checkpoint_activations': False, 'required_seq_len_multiple': 2, 'depthwise_conv_kernel_size': 31, 'attn_type': '', 'pos_enc_type': 'abs', 'fp16': False}
/root/.pyenv/versions/3.8.10/lib/python3.8/site-packages/torch/nn/utils/weight_norm.py:28: UserWarning: torch.nn.utils.weight_norm is deprecated in favor of torch.nn.utils.parametrizations.weight_norm.
warnings.warn("torch.nn.utils.weight_norm is deprecated in favor of torch.nn.utils.parametrizations.weight_norm.")
/root/.pyenv/versions/3.8.10/lib/python3.8/site-packages/torch/nn/utils/weight_norm.py:28: UserWarning: torch.nn.utils.weight_norm is deprecated in favor of torch.nn.utils.parametrizations.weight_norm.
warnings.warn("torch.nn.utils.weight_norm is deprecated in favor of torch.nn.utils.parametrizations.weight_norm.")
/root/.pyenv/versions/3.8.10/lib/python3.8/site-packages/torch/functional.py:660: UserWarning: stft with return_complex=False is deprecated. In a future pytorch release, stft will return complex tensors for all inputs, and return_complex=False will raise an error.
Note: you can still call torch.view_as_real on the complex output to recover the old return format. (Triggered internally at ../aten/src/ATen/native/SpectralOps.cpp:874.)
return _VF.stft(input, n_fft, hop_length, win_length, window, # type: ignore[attr-defined]
/root/.pyenv/versions/3.8.10/lib/python3.8/site-packages/torch/functional.py:660: UserWarning: stft with return_complex=False is deprecated. In a future pytorch release, stft will return complex tensors for all inputs, and return_complex=False will raise an error.
Note: you can still call torch.view_as_real on the complex output to recover the old return format. (Triggered internally at ../aten/src/ATen/native/SpectralOps.cpp:874.)
return _VF.stft(input, n_fft, hop_length, win_length, window, # type: ignore[attr-defined]
/root/.pyenv/versions/3.8.10/lib/python3.8/site-packages/torch/functional.py:660: UserWarning: stft with return_complex=False is deprecated. In a future pytorch release, stft will return complex tensors for all inputs, and return_complex=False will raise an error.
Note: you can still call torch.view_as_real on the complex output to recover the old return format. (Triggered internally at ../aten/src/ATen/native/SpectralOps.cpp:874.)
return _VF.stft(input, n_fft, hop_length, win_length, window, # type: ignore[attr-defined]
/root/.pyenv/versions/3.8.10/lib/python3.8/site-packages/torch/functional.py:660: UserWarning: stft with return_complex=False is deprecated. In a future pytorch release, stft will return complex tensors for all inputs, and return_complex=False will raise an error.
Note: you can still call torch.view_as_real on the complex output to recover the old return format. (Triggered internally at ../aten/src/ATen/native/SpectralOps.cpp:874.)
return _VF.stft(input, n_fft, hop_length, win_length, window, # type: ignore[attr-defined]
/root/.pyenv/versions/3.8.10/lib/python3.8/site-packages/torch/functional.py:660: UserWarning: stft with return_complex=False is deprecated. In a future pytorch release, stft will return complex tensors for all inputs, and return_complex=False will raise an error.
Note: you can still call torch.view_as_real on the complex output to recover the old return format. (Triggered internally at ../aten/src/ATen/native/SpectralOps.cpp:874.)
return _VF.stft(input, n_fft, hop_length, win_length, window, # type: ignore[attr-defined]
/root/.pyenv/versions/3.8.10/lib/python3.8/site-packages/torch/autograd/__init__.py:266: UserWarning: Grad strides do not match bucket view strides. This may indicate grad was not created according to the gradient layout contract, or that the param's strides changed since DDP was constructed. This is not an error, but may impair performance.
grad.sizes() = [64, 1, 4], strides() = [4, 1, 1]
bucket_view.sizes() = [64, 1, 4], strides() = [4, 4, 1] (Triggered internally at ../torch/csrc/distributed/c10d/reducer.cpp:322.)
Variable._execution_engine.run_backward( # Calls into the C++ engine to run the backward pass
/root/.pyenv/versions/3.8.10/lib/python3.8/multiprocessing/resource_tracker.py:216: UserWarning: resource_tracker: There appear to be 20 leaked semaphore objects to clean up at shutdown
warnings.warn('resource_tracker: There appear to be %d '
0%| | 0/65 [00:00<?, ?it/s]
2%|▏ | 1/65 [00:00<00:06, 9.82it/s]
5%|▍ | 3/65 [00:00<00:05, 11.97it/s]
8%|▊ | 5/65 [00:00<00:04, 13.09it/s]
11%|█ | 7/65 [00:00<00:04, 13.78it/s]
14%|█▍ | 9/65 [00:00<00:03, 14.08it/s]
17%|█▋ | 11/65 [00:00<00:03, 14.28it/s]
20%|██ | 13/65 [00:00<00:03, 14.47it/s]
23%|██▎ | 15/65 [00:01<00:03, 14.56it/s]
26%|██▌ | 17/65 [00:01<00:03, 14.66it/s]
29%|██▉ | 19/65 [00:01<00:03, 14.64it/s]
32%|███▏ | 21/65 [00:01<00:02, 14.76it/s]
35%|███▌ | 23/65 [00:01<00:02, 14.84it/s]
38%|███▊ | 25/65 [00:01<00:02, 14.95it/s]
42%|████▏ | 27/65 [00:01<00:02, 15.00it/s]
45%|████▍ | 29/65 [00:02<00:02, 15.02it/s]
48%|████▊ | 31/65 [00:02<00:02, 14.98it/s]
51%|█████ | 33/65 [00:02<00:02, 14.79it/s]
54%|█████▍ | 35/65 [00:02<00:02, 14.89it/s]
57%|█████▋ | 37/65 [00:02<00:01, 14.84it/s]
60%|██████ | 39/65 [00:02<00:01, 14.79it/s]
63%|██████▎ | 41/65 [00:02<00:01, 14.79it/s]
66%|██████▌ | 43/65 [00:02<00:01, 14.84it/s]
69%|██████▉ | 45/65 [00:03<00:01, 14.84it/s]
72%|███████▏ | 47/65 [00:03<00:01, 14.78it/s]
75%|███████▌ | 49/65 [00:03<00:01, 14.78it/s]
78%|███████▊ | 51/65 [00:03<00:00, 14.82it/s]
82%|████████▏ | 53/65 [00:03<00:00, 14.93it/s]
85%|████████▍ | 55/65 [00:03<00:00, 14.89it/s]
88%|████████▊ | 57/65 [00:03<00:00, 14.88it/s]
91%|█████████ | 59/65 [00:04<00:00, 14.90it/s]
94%|█████████▍| 61/65 [00:04<00:00, 14.90it/s]
97%|█████████▋| 63/65 [00:04<00:00, 14.98it/s]
100%|██████████| 65/65 [00:04<00:00, 14.97it/s]
100%|██████████| 65/65 [00:04<00:00, 14.67it/s]
This output was created using a different version of the model, datong-new/rvc:07402961.
This model runs on Nvidia T4 GPU hardware. We don't yet have enough runs of this model to provide performance information.
This model doesn't have a readme.
This model is cold. You'll get a fast response if the model is warm and already running, and a slower response if the model is cold and starting up.
Choose a file from your machine
Hint: you can also drag files onto the input
Choose a file from your machine
Hint: you can also drag files onto the input
Choose a file from your machine
Hint: you can also drag files onto the input
0%| | 0/116 [00:00<?, ?it/s]
1%| | 1/116 [00:01<02:31, 1.32s/it]
3%|▎ | 3/116 [00:01<00:46, 2.44it/s]
4%|▍ | 5/116 [00:01<00:26, 4.19it/s]
6%|▌ | 7/116 [00:01<00:17, 6.09it/s]
8%|▊ | 9/116 [00:01<00:13, 7.95it/s]
9%|▉ | 11/116 [00:02<00:10, 9.62it/s]
11%|█ | 13/116 [00:02<00:09, 11.13it/s]
13%|█▎ | 15/116 [00:02<00:08, 12.23it/s]
15%|█▍ | 17/116 [00:02<00:07, 13.11it/s]
16%|█▋ | 19/116 [00:02<00:06, 13.90it/s]
18%|█▊ | 21/116 [00:02<00:06, 14.54it/s]
20%|█▉ | 23/116 [00:02<00:06, 14.98it/s]
22%|██▏ | 25/116 [00:02<00:05, 15.21it/s]
23%|██▎ | 27/116 [00:03<00:05, 15.31it/s]
25%|██▌ | 29/116 [00:03<00:05, 15.47it/s]
27%|██▋ | 31/116 [00:03<00:05, 15.77it/s]
28%|██▊ | 33/116 [00:03<00:05, 15.90it/s]
30%|███ | 35/116 [00:03<00:05, 16.05it/s]
32%|███▏ | 37/116 [00:03<00:04, 16.16it/s]
34%|███▎ | 39/116 [00:03<00:04, 16.15it/s]
35%|███▌ | 41/116 [00:03<00:04, 15.93it/s]
37%|███▋ | 43/116 [00:04<00:04, 15.79it/s]
39%|███▉ | 45/116 [00:04<00:04, 15.85it/s]
41%|████ | 47/116 [00:04<00:04, 15.82it/s]
42%|████▏ | 49/116 [00:04<00:04, 15.92it/s]
44%|████▍ | 51/116 [00:04<00:04, 15.89it/s]
46%|████▌ | 53/116 [00:04<00:03, 15.96it/s]
47%|████▋ | 55/116 [00:04<00:03, 15.96it/s]
49%|████▉ | 57/116 [00:04<00:03, 15.92it/s]
51%|█████ | 59/116 [00:05<00:03, 15.87it/s]
53%|█████▎ | 61/116 [00:05<00:03, 15.94it/s]
54%|█████▍ | 63/116 [00:05<00:03, 16.00it/s]
56%|█████▌ | 65/116 [00:05<00:03, 16.06it/s]
58%|█████▊ | 67/116 [00:05<00:03, 16.01it/s]
59%|█████▉ | 69/116 [00:05<00:02, 16.05it/s]
61%|██████ | 71/116 [00:05<00:02, 16.06it/s]
63%|██████▎ | 73/116 [00:05<00:02, 16.09it/s]
65%|██████▍ | 75/116 [00:06<00:02, 15.72it/s]
66%|██████▋ | 77/116 [00:06<00:02, 15.75it/s]
68%|██████▊ | 79/116 [00:06<00:02, 15.66it/s]
70%|██████▉ | 81/116 [00:06<00:02, 15.75it/s]
72%|███████▏ | 83/116 [00:06<00:02, 15.84it/s]
73%|███████▎ | 85/116 [00:06<00:01, 15.84it/s]
75%|███████▌ | 87/116 [00:06<00:01, 15.81it/s]
77%|███████▋ | 89/116 [00:06<00:01, 15.84it/s]
78%|███████▊ | 91/116 [00:07<00:01, 15.84it/s]
80%|████████ | 93/116 [00:07<00:01, 15.91it/s]
82%|████████▏ | 95/116 [00:07<00:01, 15.86it/s]
84%|████████▎ | 97/116 [00:07<00:01, 15.89it/s]
85%|████████▌ | 99/116 [00:07<00:01, 15.87it/s]
87%|████████▋ | 101/116 [00:07<00:00, 15.92it/s]
89%|████████▉ | 103/116 [00:07<00:00, 15.79it/s]
91%|█████████ | 105/116 [00:07<00:00, 15.36it/s]
92%|█████████▏| 107/116 [00:08<00:00, 15.20it/s]
94%|█████████▍| 109/116 [00:08<00:00, 14.89it/s]
96%|█████████▌| 111/116 [00:08<00:00, 14.91it/s]
97%|█████████▋| 113/116 [00:08<00:00, 14.87it/s]
99%|█████████▉| 115/116 [00:08<00:00, 14.91it/s]
100%|██████████| 116/116 [00:08<00:00, 13.33it/s]
2024-02-25 12:49:19 | INFO | fairseq.tasks.hubert_pretraining | current directory is /src
2024-02-25 12:49:19 | INFO | fairseq.tasks.hubert_pretraining | HubertPretrainingTask Config {'_name': 'hubert_pretraining', 'data': 'metadata', 'fine_tuning': False, 'labels': ['km'], 'label_dir': 'label', 'label_rate': 50.0, 'sample_rate': 16000, 'normalize': False, 'enable_padding': False, 'max_keep_size': None, 'max_sample_size': 250000, 'min_sample_size': 32000, 'single_target': False, 'random_crop': True, 'pad_audio': False}
2024-02-25 12:49:19 | INFO | fairseq.models.hubert.hubert | HubertModel Config: {'_name': 'hubert', 'label_rate': 50.0, 'extractor_mode': default, 'encoder_layers': 12, 'encoder_embed_dim': 768, 'encoder_ffn_embed_dim': 3072, 'encoder_attention_heads': 12, 'activation_fn': gelu, 'layer_type': transformer, 'dropout': 0.1, 'attention_dropout': 0.1, 'activation_dropout': 0.0, 'encoder_layerdrop': 0.05, 'dropout_input': 0.1, 'dropout_features': 0.1, 'final_dim': 256, 'untie_final_proj': True, 'layer_norm_first': False, 'conv_feature_layers': '[(512,10,5)] + [(512,3,2)] * 4 + [(512,2,2)] * 2', 'conv_bias': False, 'logit_temp': 0.1, 'target_glu': False, 'feature_grad_mult': 0.1, 'mask_length': 10, 'mask_prob': 0.8, 'mask_selection': static, 'mask_other': 0.0, 'no_mask_overlap': False, 'mask_min_space': 1, 'mask_channel_length': 10, 'mask_channel_prob': 0.0, 'mask_channel_selection': static, 'mask_channel_other': 0.0, 'no_mask_channel_overlap': False, 'mask_channel_min_space': 1, 'conv_pos': 128, 'conv_pos_groups': 16, 'latent_temp': [2.0, 0.5, 0.999995], 'skip_masked': False, 'skip_nomask': False, 'checkpoint_activations': False, 'required_seq_len_multiple': 2, 'depthwise_conv_kernel_size': 31, 'attn_type': '', 'pos_enc_type': 'abs', 'fp16': False}
/root/.pyenv/versions/3.8.10/lib/python3.8/site-packages/torch/nn/utils/weight_norm.py:28: UserWarning: torch.nn.utils.weight_norm is deprecated in favor of torch.nn.utils.parametrizations.weight_norm.
warnings.warn("torch.nn.utils.weight_norm is deprecated in favor of torch.nn.utils.parametrizations.weight_norm.")
/root/.pyenv/versions/3.8.10/lib/python3.8/site-packages/torch/nn/utils/weight_norm.py:28: UserWarning: torch.nn.utils.weight_norm is deprecated in favor of torch.nn.utils.parametrizations.weight_norm.
warnings.warn("torch.nn.utils.weight_norm is deprecated in favor of torch.nn.utils.parametrizations.weight_norm.")
/root/.pyenv/versions/3.8.10/lib/python3.8/site-packages/torch/functional.py:660: UserWarning: stft with return_complex=False is deprecated. In a future pytorch release, stft will return complex tensors for all inputs, and return_complex=False will raise an error.
Note: you can still call torch.view_as_real on the complex output to recover the old return format. (Triggered internally at ../aten/src/ATen/native/SpectralOps.cpp:874.)
return _VF.stft(input, n_fft, hop_length, win_length, window, # type: ignore[attr-defined]
/root/.pyenv/versions/3.8.10/lib/python3.8/site-packages/torch/functional.py:660: UserWarning: stft with return_complex=False is deprecated. In a future pytorch release, stft will return complex tensors for all inputs, and return_complex=False will raise an error.
Note: you can still call torch.view_as_real on the complex output to recover the old return format. (Triggered internally at ../aten/src/ATen/native/SpectralOps.cpp:874.)
return _VF.stft(input, n_fft, hop_length, win_length, window, # type: ignore[attr-defined]
/root/.pyenv/versions/3.8.10/lib/python3.8/site-packages/torch/functional.py:660: UserWarning: stft with return_complex=False is deprecated. In a future pytorch release, stft will return complex tensors for all inputs, and return_complex=False will raise an error.
Note: you can still call torch.view_as_real on the complex output to recover the old return format. (Triggered internally at ../aten/src/ATen/native/SpectralOps.cpp:874.)
return _VF.stft(input, n_fft, hop_length, win_length, window, # type: ignore[attr-defined]
/root/.pyenv/versions/3.8.10/lib/python3.8/site-packages/torch/functional.py:660: UserWarning: stft with return_complex=False is deprecated. In a future pytorch release, stft will return complex tensors for all inputs, and return_complex=False will raise an error.
Note: you can still call torch.view_as_real on the complex output to recover the old return format. (Triggered internally at ../aten/src/ATen/native/SpectralOps.cpp:874.)
return _VF.stft(input, n_fft, hop_length, win_length, window, # type: ignore[attr-defined]
/root/.pyenv/versions/3.8.10/lib/python3.8/site-packages/torch/functional.py:660: UserWarning: stft with return_complex=False is deprecated. In a future pytorch release, stft will return complex tensors for all inputs, and return_complex=False will raise an error.
Note: you can still call torch.view_as_real on the complex output to recover the old return format. (Triggered internally at ../aten/src/ATen/native/SpectralOps.cpp:874.)
return _VF.stft(input, n_fft, hop_length, win_length, window, # type: ignore[attr-defined]
/root/.pyenv/versions/3.8.10/lib/python3.8/site-packages/torch/autograd/__init__.py:266: UserWarning: Grad strides do not match bucket view strides. This may indicate grad was not created according to the gradient layout contract, or that the param's strides changed since DDP was constructed. This is not an error, but may impair performance.
grad.sizes() = [64, 1, 4], strides() = [4, 1, 1]
bucket_view.sizes() = [64, 1, 4], strides() = [4, 4, 1] (Triggered internally at ../torch/csrc/distributed/c10d/reducer.cpp:322.)
Variable._execution_engine.run_backward( # Calls into the C++ engine to run the backward pass
/root/.pyenv/versions/3.8.10/lib/python3.8/multiprocessing/resource_tracker.py:216: UserWarning: resource_tracker: There appear to be 20 leaked semaphore objects to clean up at shutdown
warnings.warn('resource_tracker: There appear to be %d '
0%| | 0/65 [00:00<?, ?it/s]
2%|▏ | 1/65 [00:00<00:06, 9.82it/s]
5%|▍ | 3/65 [00:00<00:05, 11.97it/s]
8%|▊ | 5/65 [00:00<00:04, 13.09it/s]
11%|█ | 7/65 [00:00<00:04, 13.78it/s]
14%|█▍ | 9/65 [00:00<00:03, 14.08it/s]
17%|█▋ | 11/65 [00:00<00:03, 14.28it/s]
20%|██ | 13/65 [00:00<00:03, 14.47it/s]
23%|██▎ | 15/65 [00:01<00:03, 14.56it/s]
26%|██▌ | 17/65 [00:01<00:03, 14.66it/s]
29%|██▉ | 19/65 [00:01<00:03, 14.64it/s]
32%|███▏ | 21/65 [00:01<00:02, 14.76it/s]
35%|███▌ | 23/65 [00:01<00:02, 14.84it/s]
38%|███▊ | 25/65 [00:01<00:02, 14.95it/s]
42%|████▏ | 27/65 [00:01<00:02, 15.00it/s]
45%|████▍ | 29/65 [00:02<00:02, 15.02it/s]
48%|████▊ | 31/65 [00:02<00:02, 14.98it/s]
51%|█████ | 33/65 [00:02<00:02, 14.79it/s]
54%|█████▍ | 35/65 [00:02<00:02, 14.89it/s]
57%|█████▋ | 37/65 [00:02<00:01, 14.84it/s]
60%|██████ | 39/65 [00:02<00:01, 14.79it/s]
63%|██████▎ | 41/65 [00:02<00:01, 14.79it/s]
66%|██████▌ | 43/65 [00:02<00:01, 14.84it/s]
69%|██████▉ | 45/65 [00:03<00:01, 14.84it/s]
72%|███████▏ | 47/65 [00:03<00:01, 14.78it/s]
75%|███████▌ | 49/65 [00:03<00:01, 14.78it/s]
78%|███████▊ | 51/65 [00:03<00:00, 14.82it/s]
82%|████████▏ | 53/65 [00:03<00:00, 14.93it/s]
85%|████████▍ | 55/65 [00:03<00:00, 14.89it/s]
88%|████████▊ | 57/65 [00:03<00:00, 14.88it/s]
91%|█████████ | 59/65 [00:04<00:00, 14.90it/s]
94%|█████████▍| 61/65 [00:04<00:00, 14.90it/s]
97%|█████████▋| 63/65 [00:04<00:00, 14.98it/s]
100%|██████████| 65/65 [00:04<00:00, 14.97it/s]
100%|██████████| 65/65 [00:04<00:00, 14.67it/s]