Readme
fine tuned stable audio
music generation with fine tuned stable audio
Run this model in Node.js with one line of code:
npm install replicate
REPLICATE_API_TOKEN
environment variable:export REPLICATE_API_TOKEN=<paste-your-token-here>
Find your API token in your account settings.
import Replicate from "replicate";
const replicate = new Replicate({
auth: process.env.REPLICATE_API_TOKEN,
});
Run ardianfe/stable-audio-2 using Replicate’s API. Check out the model's schema for an overview of inputs and outputs.
const output = await replicate.run(
"ardianfe/stable-audio-2:6761efad154cb36efc2b10bb5432b561e3ce2557130557b34f9fd4874a894c84",
{
input: {
seed: -1,
steps: 100,
prompt: "acoustic guitar",
song_id: 10001,
cfg_scale: 6,
sigma_max: 500,
sigma_min: 0.03,
batch_size: 1,
sampler_type: "dpmpp-3m-sde",
output_format: "mp3",
seconds_start: 0,
seconds_total: 60,
negative_prompt: "",
init_noise_level: 1
}
}
);
console.log(output);
To learn more, take a look at the guide on getting started with Node.js.
pip install replicate
REPLICATE_API_TOKEN
environment variable:export REPLICATE_API_TOKEN=<paste-your-token-here>
Find your API token in your account settings.
import replicate
Run ardianfe/stable-audio-2 using Replicate’s API. Check out the model's schema for an overview of inputs and outputs.
output = replicate.run(
"ardianfe/stable-audio-2:6761efad154cb36efc2b10bb5432b561e3ce2557130557b34f9fd4874a894c84",
input={
"seed": -1,
"steps": 100,
"prompt": "acoustic guitar",
"song_id": 10001,
"cfg_scale": 6,
"sigma_max": 500,
"sigma_min": 0.03,
"batch_size": 1,
"sampler_type": "dpmpp-3m-sde",
"output_format": "mp3",
"seconds_start": 0,
"seconds_total": 60,
"negative_prompt": "",
"init_noise_level": 1
}
)
print(output)
To learn more, take a look at the guide on getting started with Python.
REPLICATE_API_TOKEN
environment variable:export REPLICATE_API_TOKEN=<paste-your-token-here>
Find your API token in your account settings.
Run ardianfe/stable-audio-2 using Replicate’s API. Check out the model's schema for an overview of inputs and outputs.
curl -s -X POST \
-H "Authorization: Bearer $REPLICATE_API_TOKEN" \
-H "Content-Type: application/json" \
-H "Prefer: wait" \
-d $'{
"version": "ardianfe/stable-audio-2:6761efad154cb36efc2b10bb5432b561e3ce2557130557b34f9fd4874a894c84",
"input": {
"seed": -1,
"steps": 100,
"prompt": "acoustic guitar",
"song_id": 10001,
"cfg_scale": 6,
"sigma_max": 500,
"sigma_min": 0.03,
"batch_size": 1,
"sampler_type": "dpmpp-3m-sde",
"output_format": "mp3",
"seconds_start": 0,
"seconds_total": 60,
"negative_prompt": "",
"init_noise_level": 1
}
}' \
https://api.replicate.com/v1/predictions
To learn more, take a look at Replicate’s HTTP API reference docs.
Add a payment method to run this model.
By signing in, you agree to our
terms of service and privacy policy
{
"completed_at": "2024-10-16T00:07:47.521407Z",
"created_at": "2024-10-16T00:06:43.544000Z",
"data_removed": false,
"error": null,
"id": "5z7whv7rb1rgg0cjjajtny239m",
"input": {
"seed": -1,
"steps": 100,
"prompt": "acoustic guitar",
"song_id": 10001,
"cfg_scale": 6,
"sigma_max": 500,
"sigma_min": 0.03,
"batch_size": 1,
"sampler_type": "dpmpp-3m-sde",
"output_format": "mp3",
"seconds_start": 0,
"seconds_total": 60,
"negative_prompt": "",
"init_noise_level": 1
},
"logs": "Prompt: acoustic guitar\n2228991867\n/src/stable_audio_tools/models/conditioners.py:314: FutureWarning: `torch.cuda.amp.autocast(args...)` is deprecated. Please use `torch.amp.autocast('cuda', args...)` instead.\nwith torch.cuda.amp.autocast(dtype=torch.float16) and torch.set_grad_enabled(self.enable_grad):\n/src/stable_audio_tools/inference/sampling.py:177: FutureWarning: `torch.cuda.amp.autocast(args...)` is deprecated. Please use `torch.amp.autocast('cuda', args...)` instead.\nwith torch.cuda.amp.autocast():\n 0%| | 0/100 [00:00<?, ?it/s]/root/.pyenv/versions/3.10.14/lib/python3.10/contextlib.py:103: FutureWarning: `torch.backends.cuda.sdp_kernel()` is deprecated. In the future, this context manager will be removed. Please see `torch.nn.attention.sdpa_kernel()` for the new context manager, with updated signature.\nself.gen = func(*args, **kwds)\n 1%| | 1/100 [00:00<00:18, 5.39it/s]\n 2%|▏ | 2/100 [00:00<00:13, 7.33it/s]\n 3%|▎ | 3/100 [00:00<00:11, 8.28it/s]\n 4%|▍ | 4/100 [00:00<00:10, 8.84it/s]\n 5%|▌ | 5/100 [00:00<00:10, 9.17it/s]\n 6%|▌ | 6/100 [00:00<00:10, 9.39it/s]\n 7%|▋ | 7/100 [00:00<00:09, 9.54it/s]\n 8%|▊ | 8/100 [00:00<00:09, 9.63it/s]\n 9%|▉ | 9/100 [00:00<00:09, 9.70it/s]\n 10%|█ | 10/100 [00:01<00:09, 9.75it/s]\n 11%|█ | 11/100 [00:01<00:09, 9.78it/s]\n 12%|█▏ | 12/100 [00:01<00:08, 9.82it/s]\n 13%|█▎ | 13/100 [00:01<00:08, 9.83it/s]\n 14%|█▍ | 14/100 [00:01<00:08, 9.85it/s]\n 15%|█▌ | 15/100 [00:01<00:08, 9.84it/s]\n 16%|█▌ | 16/100 [00:01<00:08, 9.84it/s]\n 17%|█▋ | 17/100 [00:01<00:08, 9.85it/s]\n 18%|█▊ | 18/100 [00:01<00:08, 9.84it/s]\n 19%|█▉ | 19/100 [00:02<00:08, 9.81it/s]\n 20%|██ | 20/100 [00:02<00:08, 9.83it/s]\n 21%|██ | 21/100 [00:02<00:08, 9.82it/s]\n 22%|██▏ | 22/100 [00:02<00:07, 9.83it/s]\n 23%|██▎ | 23/100 [00:02<00:07, 9.86it/s]\n 24%|██▍ | 24/100 [00:02<00:07, 9.87it/s]\n 25%|██▌ | 25/100 [00:02<00:07, 9.87it/s]\n 26%|██▌ | 26/100 [00:02<00:07, 9.86it/s]\n 27%|██▋ | 27/100 [00:02<00:07, 9.88it/s]\n 28%|██▊ | 28/100 [00:02<00:07, 9.89it/s]\n 29%|██▉ | 29/100 [00:03<00:07, 9.88it/s]\n 30%|███ | 30/100 [00:03<00:07, 9.88it/s]\n 31%|███ | 31/100 [00:03<00:06, 9.88it/s]\n 32%|███▏ | 32/100 [00:03<00:06, 9.86it/s]\n 33%|███▎ | 33/100 [00:03<00:06, 9.85it/s]\n 34%|███▍ | 34/100 [00:03<00:06, 9.84it/s]\n 35%|███▌ | 35/100 [00:03<00:06, 9.86it/s]\n 36%|███▌ | 36/100 [00:03<00:06, 9.87it/s]\n 37%|███▋ | 37/100 [00:03<00:06, 9.88it/s]\n 38%|███▊ | 38/100 [00:03<00:06, 9.88it/s]\n 39%|███▉ | 39/100 [00:04<00:06, 9.87it/s]\n 40%|████ | 40/100 [00:04<00:06, 9.86it/s]\n 41%|████ | 41/100 [00:04<00:05, 9.83it/s]\n 42%|████▏ | 42/100 [00:04<00:05, 9.82it/s]\n 43%|████▎ | 43/100 [00:04<00:05, 9.80it/s]\n 44%|████▍ | 44/100 [00:04<00:05, 9.78it/s]\n 45%|████▌ | 45/100 [00:04<00:05, 9.81it/s]\n 46%|████▌ | 46/100 [00:04<00:05, 9.82it/s]\n 47%|████▋ | 47/100 [00:04<00:05, 9.87it/s]\n 48%|████▊ | 48/100 [00:04<00:05, 9.89it/s]\n 49%|████▉ | 49/100 [00:05<00:05, 9.90it/s]\n 50%|█████ | 50/100 [00:05<00:05, 9.91it/s]\n 51%|█████ | 51/100 [00:05<00:04, 9.91it/s]\n 52%|█████▏ | 52/100 [00:05<00:04, 9.91it/s]\n 53%|█████▎ | 53/100 [00:05<00:04, 9.91it/s]\n 54%|█████▍ | 54/100 [00:05<00:04, 9.93it/s]\n 55%|█████▌ | 55/100 [00:05<00:04, 9.93it/s]\n 56%|█████▌ | 56/100 [00:05<00:04, 9.94it/s]\n 57%|█████▋ | 57/100 [00:05<00:04, 9.94it/s]\n 58%|█████▊ | 58/100 [00:05<00:04, 9.94it/s]\n 59%|█████▉ | 59/100 [00:06<00:04, 9.94it/s]\n 60%|██████ | 60/100 [00:06<00:04, 9.94it/s]\n 61%|██████ | 61/100 [00:06<00:03, 9.93it/s]\n 62%|██████▏ | 62/100 [00:06<00:03, 9.94it/s]\n 63%|██████▎ | 63/100 [00:06<00:03, 9.95it/s]\n 64%|██████▍ | 64/100 [00:06<00:03, 9.94it/s]\n 65%|██████▌ | 65/100 [00:06<00:03, 9.92it/s]\n 66%|██████▌ | 66/100 [00:06<00:03, 9.93it/s]\n 67%|██████▋ | 67/100 [00:06<00:03, 9.93it/s]\n 68%|██████▊ | 68/100 [00:06<00:03, 9.93it/s]\n 69%|██████▉ | 69/100 [00:07<00:03, 9.95it/s]\n 70%|███████ | 70/100 [00:07<00:03, 9.95it/s]\n 71%|███████ | 71/100 [00:07<00:02, 9.95it/s]\n 72%|███████▏ | 72/100 [00:07<00:02, 9.94it/s]\n 73%|███████▎ | 73/100 [00:07<00:02, 9.93it/s]\n 75%|███████▌ | 75/100 [00:07<00:02, 9.97it/s]\n 77%|███████▋ | 77/100 [00:07<00:02, 9.99it/s]\n 79%|███████▉ | 79/100 [00:08<00:02, 10.00it/s]\n 80%|████████ | 80/100 [00:08<00:01, 10.00it/s]\n 81%|████████ | 81/100 [00:08<00:01, 9.98it/s]\n 83%|████████▎ | 83/100 [00:08<00:01, 9.98it/s]\n 84%|████████▍ | 84/100 [00:08<00:01, 9.96it/s]\n 86%|████████▌ | 86/100 [00:08<00:01, 9.97it/s]\n 88%|████████▊ | 88/100 [00:08<00:01, 9.97it/s]\n 89%|████████▉ | 89/100 [00:09<00:01, 9.94it/s]\n 90%|█████████ | 90/100 [00:09<00:01, 9.93it/s]\n 91%|█████████ | 91/100 [00:09<00:00, 9.94it/s]\n 92%|█████████▏| 92/100 [00:09<00:00, 9.93it/s]\n 93%|█████████▎| 93/100 [00:09<00:00, 9.94it/s]\n 95%|█████████▌| 95/100 [00:09<00:00, 9.94it/s]\n 96%|█████████▌| 96/100 [00:09<00:00, 9.94it/s]\n 98%|█████████▊| 98/100 [00:09<00:00, 9.99it/s]/root/.pyenv/versions/3.10.14/lib/python3.10/site-packages/torchsde/_brownian/brownian_interval.py:599: UserWarning: Should have ta>=t0 but got ta=0.029999999329447746 and t0=0.03.\nwarnings.warn(f\"Should have ta>=t0 but got ta={ta} and t0={self._start}.\")\n100%|██████████| 100/100 [00:10<00:00, 10.10it/s]\n100%|██████████| 100/100 [00:10<00:00, 9.83it/s]\nffmpeg version 4.4.2-0ubuntu0.22.04.1 Copyright (c) 2000-2021 the FFmpeg developers\nbuilt with gcc 11 (Ubuntu 11.2.0-19ubuntu1)\nconfiguration: --prefix=/usr --extra-version=0ubuntu0.22.04.1 --toolchain=hardened --libdir=/usr/lib/x86_64-linux-gnu --incdir=/usr/include/x86_64-linux-gnu --arch=amd64 --enable-gpl --disable-stripping --enable-gnutls --enable-ladspa --enable-libaom --enable-libass --enable-libbluray --enable-libbs2b --enable-libcaca --enable-libcdio --enable-libcodec2 --enable-libdav1d --enable-libflite --enable-libfontconfig --enable-libfreetype --enable-libfribidi --enable-libgme --enable-libgsm --enable-libjack --enable-libmp3lame --enable-libmysofa --enable-libopenjpeg --enable-libopenmpt --enable-libopus --enable-libpulse --enable-librabbitmq --enable-librubberband --enable-libshine --enable-libsnappy --enable-libsoxr --enable-libspeex --enable-libsrt --enable-libssh --enable-libtheora --enable-libtwolame --enable-libvidstab --enable-libvorbis --enable-libvpx --enable-libwebp --enable-libx265 --enable-libxml2 --enable-libxvid --enable-libzimg --enable-libzmq --enable-libzvbi --enable-lv2 --enable-omx --enable-openal --enable-opencl --enable-opengl --enable-sdl2 --enable-pocketsphinx --enable-librsvg --enable-libmfx --enable-libdc1394 --enable-libdrm --enable-libiec61883 --enable-chromaprint --enable-frei0r --enable-libx264 --enable-shared\nlibavutil 56. 70.100 / 56. 70.100\nlibavcodec 58.134.100 / 58.134.100\nlibavformat 58. 76.100 / 58. 76.100\nlibavdevice 58. 13.100 / 58. 13.100\nlibavfilter 7.110.100 / 7.110.100\nlibswscale 5. 9.100 / 5. 9.100\nlibswresample 3. 9.100 / 3. 9.100\nlibpostproc 55. 9.100 / 55. 9.100\nGuessed Channel Layout for Input Stream #0.0 : stereo\nInput #0, wav, from 'output.wav':\nMetadata:\nencoder : Lavf58.76.100\nDuration: 00:01:00.98, bitrate: 1411 kb/s\nStream #0:0: Audio: pcm_s16le ([1][0][0][0] / 0x0001), 44100 Hz, stereo, s16, 1411 kb/s\nStream mapping:\nStream #0:0 -> #0:0 (pcm_s16le (native) -> mp3 (libmp3lame))\nPress [q] to stop, [?] for help\nOutput #0, mp3, to 'output.mp3':\nMetadata:\nTSSE : Lavf58.76.100\nStream #0:0: Audio: mp3, 44100 Hz, stereo, s16p\nMetadata:\nencoder : Lavc58.134.100 libmp3lame\nsize= 0kB time=00:00:00.00 bitrate=N/A speed=N/A\nsize= 512kB time=00:00:33.72 bitrate= 124.4kbits/s speed=67.4x\nsize= 954kB time=00:01:00.97 bitrate= 128.1kbits/s speed=76.4x\nvideo:0kB audio:953kB subtitle:0kB other streams:0kB global headers:0kB muxing overhead: 0.025913%\nSuccess: {'message': 'Song state with id 10001 updated successfully'}",
"metrics": {
"predict_time": 15.780399997,
"total_time": 63.977407
},
"output": {
"output": "https://storage.googleapis.com/lagoe_prod_generated_songs/10001/10001.mp3"
},
"started_at": "2024-10-16T00:07:31.741007Z",
"status": "succeeded",
"urls": {
"get": "https://api.replicate.com/v1/predictions/5z7whv7rb1rgg0cjjajtny239m",
"cancel": "https://api.replicate.com/v1/predictions/5z7whv7rb1rgg0cjjajtny239m/cancel"
},
"version": "4a98a2079e2e0cd380582ba8b159e943664135b721db6bbf1eefdffbdebba2d1"
}
Prompt: acoustic guitar
2228991867
/src/stable_audio_tools/models/conditioners.py:314: FutureWarning: `torch.cuda.amp.autocast(args...)` is deprecated. Please use `torch.amp.autocast('cuda', args...)` instead.
with torch.cuda.amp.autocast(dtype=torch.float16) and torch.set_grad_enabled(self.enable_grad):
/src/stable_audio_tools/inference/sampling.py:177: FutureWarning: `torch.cuda.amp.autocast(args...)` is deprecated. Please use `torch.amp.autocast('cuda', args...)` instead.
with torch.cuda.amp.autocast():
0%| | 0/100 [00:00<?, ?it/s]/root/.pyenv/versions/3.10.14/lib/python3.10/contextlib.py:103: FutureWarning: `torch.backends.cuda.sdp_kernel()` is deprecated. In the future, this context manager will be removed. Please see `torch.nn.attention.sdpa_kernel()` for the new context manager, with updated signature.
self.gen = func(*args, **kwds)
1%| | 1/100 [00:00<00:18, 5.39it/s]
2%|▏ | 2/100 [00:00<00:13, 7.33it/s]
3%|▎ | 3/100 [00:00<00:11, 8.28it/s]
4%|▍ | 4/100 [00:00<00:10, 8.84it/s]
5%|▌ | 5/100 [00:00<00:10, 9.17it/s]
6%|▌ | 6/100 [00:00<00:10, 9.39it/s]
7%|▋ | 7/100 [00:00<00:09, 9.54it/s]
8%|▊ | 8/100 [00:00<00:09, 9.63it/s]
9%|▉ | 9/100 [00:00<00:09, 9.70it/s]
10%|█ | 10/100 [00:01<00:09, 9.75it/s]
11%|█ | 11/100 [00:01<00:09, 9.78it/s]
12%|█▏ | 12/100 [00:01<00:08, 9.82it/s]
13%|█▎ | 13/100 [00:01<00:08, 9.83it/s]
14%|█▍ | 14/100 [00:01<00:08, 9.85it/s]
15%|█▌ | 15/100 [00:01<00:08, 9.84it/s]
16%|█▌ | 16/100 [00:01<00:08, 9.84it/s]
17%|█▋ | 17/100 [00:01<00:08, 9.85it/s]
18%|█▊ | 18/100 [00:01<00:08, 9.84it/s]
19%|█▉ | 19/100 [00:02<00:08, 9.81it/s]
20%|██ | 20/100 [00:02<00:08, 9.83it/s]
21%|██ | 21/100 [00:02<00:08, 9.82it/s]
22%|██▏ | 22/100 [00:02<00:07, 9.83it/s]
23%|██▎ | 23/100 [00:02<00:07, 9.86it/s]
24%|██▍ | 24/100 [00:02<00:07, 9.87it/s]
25%|██▌ | 25/100 [00:02<00:07, 9.87it/s]
26%|██▌ | 26/100 [00:02<00:07, 9.86it/s]
27%|██▋ | 27/100 [00:02<00:07, 9.88it/s]
28%|██▊ | 28/100 [00:02<00:07, 9.89it/s]
29%|██▉ | 29/100 [00:03<00:07, 9.88it/s]
30%|███ | 30/100 [00:03<00:07, 9.88it/s]
31%|███ | 31/100 [00:03<00:06, 9.88it/s]
32%|███▏ | 32/100 [00:03<00:06, 9.86it/s]
33%|███▎ | 33/100 [00:03<00:06, 9.85it/s]
34%|███▍ | 34/100 [00:03<00:06, 9.84it/s]
35%|███▌ | 35/100 [00:03<00:06, 9.86it/s]
36%|███▌ | 36/100 [00:03<00:06, 9.87it/s]
37%|███▋ | 37/100 [00:03<00:06, 9.88it/s]
38%|███▊ | 38/100 [00:03<00:06, 9.88it/s]
39%|███▉ | 39/100 [00:04<00:06, 9.87it/s]
40%|████ | 40/100 [00:04<00:06, 9.86it/s]
41%|████ | 41/100 [00:04<00:05, 9.83it/s]
42%|████▏ | 42/100 [00:04<00:05, 9.82it/s]
43%|████▎ | 43/100 [00:04<00:05, 9.80it/s]
44%|████▍ | 44/100 [00:04<00:05, 9.78it/s]
45%|████▌ | 45/100 [00:04<00:05, 9.81it/s]
46%|████▌ | 46/100 [00:04<00:05, 9.82it/s]
47%|████▋ | 47/100 [00:04<00:05, 9.87it/s]
48%|████▊ | 48/100 [00:04<00:05, 9.89it/s]
49%|████▉ | 49/100 [00:05<00:05, 9.90it/s]
50%|█████ | 50/100 [00:05<00:05, 9.91it/s]
51%|█████ | 51/100 [00:05<00:04, 9.91it/s]
52%|█████▏ | 52/100 [00:05<00:04, 9.91it/s]
53%|█████▎ | 53/100 [00:05<00:04, 9.91it/s]
54%|█████▍ | 54/100 [00:05<00:04, 9.93it/s]
55%|█████▌ | 55/100 [00:05<00:04, 9.93it/s]
56%|█████▌ | 56/100 [00:05<00:04, 9.94it/s]
57%|█████▋ | 57/100 [00:05<00:04, 9.94it/s]
58%|█████▊ | 58/100 [00:05<00:04, 9.94it/s]
59%|█████▉ | 59/100 [00:06<00:04, 9.94it/s]
60%|██████ | 60/100 [00:06<00:04, 9.94it/s]
61%|██████ | 61/100 [00:06<00:03, 9.93it/s]
62%|██████▏ | 62/100 [00:06<00:03, 9.94it/s]
63%|██████▎ | 63/100 [00:06<00:03, 9.95it/s]
64%|██████▍ | 64/100 [00:06<00:03, 9.94it/s]
65%|██████▌ | 65/100 [00:06<00:03, 9.92it/s]
66%|██████▌ | 66/100 [00:06<00:03, 9.93it/s]
67%|██████▋ | 67/100 [00:06<00:03, 9.93it/s]
68%|██████▊ | 68/100 [00:06<00:03, 9.93it/s]
69%|██████▉ | 69/100 [00:07<00:03, 9.95it/s]
70%|███████ | 70/100 [00:07<00:03, 9.95it/s]
71%|███████ | 71/100 [00:07<00:02, 9.95it/s]
72%|███████▏ | 72/100 [00:07<00:02, 9.94it/s]
73%|███████▎ | 73/100 [00:07<00:02, 9.93it/s]
75%|███████▌ | 75/100 [00:07<00:02, 9.97it/s]
77%|███████▋ | 77/100 [00:07<00:02, 9.99it/s]
79%|███████▉ | 79/100 [00:08<00:02, 10.00it/s]
80%|████████ | 80/100 [00:08<00:01, 10.00it/s]
81%|████████ | 81/100 [00:08<00:01, 9.98it/s]
83%|████████▎ | 83/100 [00:08<00:01, 9.98it/s]
84%|████████▍ | 84/100 [00:08<00:01, 9.96it/s]
86%|████████▌ | 86/100 [00:08<00:01, 9.97it/s]
88%|████████▊ | 88/100 [00:08<00:01, 9.97it/s]
89%|████████▉ | 89/100 [00:09<00:01, 9.94it/s]
90%|█████████ | 90/100 [00:09<00:01, 9.93it/s]
91%|█████████ | 91/100 [00:09<00:00, 9.94it/s]
92%|█████████▏| 92/100 [00:09<00:00, 9.93it/s]
93%|█████████▎| 93/100 [00:09<00:00, 9.94it/s]
95%|█████████▌| 95/100 [00:09<00:00, 9.94it/s]
96%|█████████▌| 96/100 [00:09<00:00, 9.94it/s]
98%|█████████▊| 98/100 [00:09<00:00, 9.99it/s]/root/.pyenv/versions/3.10.14/lib/python3.10/site-packages/torchsde/_brownian/brownian_interval.py:599: UserWarning: Should have ta>=t0 but got ta=0.029999999329447746 and t0=0.03.
warnings.warn(f"Should have ta>=t0 but got ta={ta} and t0={self._start}.")
100%|██████████| 100/100 [00:10<00:00, 10.10it/s]
100%|██████████| 100/100 [00:10<00:00, 9.83it/s]
ffmpeg version 4.4.2-0ubuntu0.22.04.1 Copyright (c) 2000-2021 the FFmpeg developers
built with gcc 11 (Ubuntu 11.2.0-19ubuntu1)
configuration: --prefix=/usr --extra-version=0ubuntu0.22.04.1 --toolchain=hardened --libdir=/usr/lib/x86_64-linux-gnu --incdir=/usr/include/x86_64-linux-gnu --arch=amd64 --enable-gpl --disable-stripping --enable-gnutls --enable-ladspa --enable-libaom --enable-libass --enable-libbluray --enable-libbs2b --enable-libcaca --enable-libcdio --enable-libcodec2 --enable-libdav1d --enable-libflite --enable-libfontconfig --enable-libfreetype --enable-libfribidi --enable-libgme --enable-libgsm --enable-libjack --enable-libmp3lame --enable-libmysofa --enable-libopenjpeg --enable-libopenmpt --enable-libopus --enable-libpulse --enable-librabbitmq --enable-librubberband --enable-libshine --enable-libsnappy --enable-libsoxr --enable-libspeex --enable-libsrt --enable-libssh --enable-libtheora --enable-libtwolame --enable-libvidstab --enable-libvorbis --enable-libvpx --enable-libwebp --enable-libx265 --enable-libxml2 --enable-libxvid --enable-libzimg --enable-libzmq --enable-libzvbi --enable-lv2 --enable-omx --enable-openal --enable-opencl --enable-opengl --enable-sdl2 --enable-pocketsphinx --enable-librsvg --enable-libmfx --enable-libdc1394 --enable-libdrm --enable-libiec61883 --enable-chromaprint --enable-frei0r --enable-libx264 --enable-shared
libavutil 56. 70.100 / 56. 70.100
libavcodec 58.134.100 / 58.134.100
libavformat 58. 76.100 / 58. 76.100
libavdevice 58. 13.100 / 58. 13.100
libavfilter 7.110.100 / 7.110.100
libswscale 5. 9.100 / 5. 9.100
libswresample 3. 9.100 / 3. 9.100
libpostproc 55. 9.100 / 55. 9.100
Guessed Channel Layout for Input Stream #0.0 : stereo
Input #0, wav, from 'output.wav':
Metadata:
encoder : Lavf58.76.100
Duration: 00:01:00.98, bitrate: 1411 kb/s
Stream #0:0: Audio: pcm_s16le ([1][0][0][0] / 0x0001), 44100 Hz, stereo, s16, 1411 kb/s
Stream mapping:
Stream #0:0 -> #0:0 (pcm_s16le (native) -> mp3 (libmp3lame))
Press [q] to stop, [?] for help
Output #0, mp3, to 'output.mp3':
Metadata:
TSSE : Lavf58.76.100
Stream #0:0: Audio: mp3, 44100 Hz, stereo, s16p
Metadata:
encoder : Lavc58.134.100 libmp3lame
size= 0kB time=00:00:00.00 bitrate=N/A speed=N/A
size= 512kB time=00:00:33.72 bitrate= 124.4kbits/s speed=67.4x
size= 954kB time=00:01:00.97 bitrate= 128.1kbits/s speed=76.4x
video:0kB audio:953kB subtitle:0kB other streams:0kB global headers:0kB muxing overhead: 0.025913%
Success: {'message': 'Song state with id 10001 updated successfully'}
This output was created using a different version of the model, ardianfe/stable-audio-2:4a98a207.
This model runs on Nvidia L40S GPU hardware. We don't yet have enough runs of this model to provide performance information.
fine tuned stable audio
This model is cold. You'll get a fast response if the model is warm and already running, and a slower response if the model is cold and starting up.
Prompt: acoustic guitar
2228991867
/src/stable_audio_tools/models/conditioners.py:314: FutureWarning: `torch.cuda.amp.autocast(args...)` is deprecated. Please use `torch.amp.autocast('cuda', args...)` instead.
with torch.cuda.amp.autocast(dtype=torch.float16) and torch.set_grad_enabled(self.enable_grad):
/src/stable_audio_tools/inference/sampling.py:177: FutureWarning: `torch.cuda.amp.autocast(args...)` is deprecated. Please use `torch.amp.autocast('cuda', args...)` instead.
with torch.cuda.amp.autocast():
0%| | 0/100 [00:00<?, ?it/s]/root/.pyenv/versions/3.10.14/lib/python3.10/contextlib.py:103: FutureWarning: `torch.backends.cuda.sdp_kernel()` is deprecated. In the future, this context manager will be removed. Please see `torch.nn.attention.sdpa_kernel()` for the new context manager, with updated signature.
self.gen = func(*args, **kwds)
1%| | 1/100 [00:00<00:18, 5.39it/s]
2%|▏ | 2/100 [00:00<00:13, 7.33it/s]
3%|▎ | 3/100 [00:00<00:11, 8.28it/s]
4%|▍ | 4/100 [00:00<00:10, 8.84it/s]
5%|▌ | 5/100 [00:00<00:10, 9.17it/s]
6%|▌ | 6/100 [00:00<00:10, 9.39it/s]
7%|▋ | 7/100 [00:00<00:09, 9.54it/s]
8%|▊ | 8/100 [00:00<00:09, 9.63it/s]
9%|▉ | 9/100 [00:00<00:09, 9.70it/s]
10%|█ | 10/100 [00:01<00:09, 9.75it/s]
11%|█ | 11/100 [00:01<00:09, 9.78it/s]
12%|█▏ | 12/100 [00:01<00:08, 9.82it/s]
13%|█▎ | 13/100 [00:01<00:08, 9.83it/s]
14%|█▍ | 14/100 [00:01<00:08, 9.85it/s]
15%|█▌ | 15/100 [00:01<00:08, 9.84it/s]
16%|█▌ | 16/100 [00:01<00:08, 9.84it/s]
17%|█▋ | 17/100 [00:01<00:08, 9.85it/s]
18%|█▊ | 18/100 [00:01<00:08, 9.84it/s]
19%|█▉ | 19/100 [00:02<00:08, 9.81it/s]
20%|██ | 20/100 [00:02<00:08, 9.83it/s]
21%|██ | 21/100 [00:02<00:08, 9.82it/s]
22%|██▏ | 22/100 [00:02<00:07, 9.83it/s]
23%|██▎ | 23/100 [00:02<00:07, 9.86it/s]
24%|██▍ | 24/100 [00:02<00:07, 9.87it/s]
25%|██▌ | 25/100 [00:02<00:07, 9.87it/s]
26%|██▌ | 26/100 [00:02<00:07, 9.86it/s]
27%|██▋ | 27/100 [00:02<00:07, 9.88it/s]
28%|██▊ | 28/100 [00:02<00:07, 9.89it/s]
29%|██▉ | 29/100 [00:03<00:07, 9.88it/s]
30%|███ | 30/100 [00:03<00:07, 9.88it/s]
31%|███ | 31/100 [00:03<00:06, 9.88it/s]
32%|███▏ | 32/100 [00:03<00:06, 9.86it/s]
33%|███▎ | 33/100 [00:03<00:06, 9.85it/s]
34%|███▍ | 34/100 [00:03<00:06, 9.84it/s]
35%|███▌ | 35/100 [00:03<00:06, 9.86it/s]
36%|███▌ | 36/100 [00:03<00:06, 9.87it/s]
37%|███▋ | 37/100 [00:03<00:06, 9.88it/s]
38%|███▊ | 38/100 [00:03<00:06, 9.88it/s]
39%|███▉ | 39/100 [00:04<00:06, 9.87it/s]
40%|████ | 40/100 [00:04<00:06, 9.86it/s]
41%|████ | 41/100 [00:04<00:05, 9.83it/s]
42%|████▏ | 42/100 [00:04<00:05, 9.82it/s]
43%|████▎ | 43/100 [00:04<00:05, 9.80it/s]
44%|████▍ | 44/100 [00:04<00:05, 9.78it/s]
45%|████▌ | 45/100 [00:04<00:05, 9.81it/s]
46%|████▌ | 46/100 [00:04<00:05, 9.82it/s]
47%|████▋ | 47/100 [00:04<00:05, 9.87it/s]
48%|████▊ | 48/100 [00:04<00:05, 9.89it/s]
49%|████▉ | 49/100 [00:05<00:05, 9.90it/s]
50%|█████ | 50/100 [00:05<00:05, 9.91it/s]
51%|█████ | 51/100 [00:05<00:04, 9.91it/s]
52%|█████▏ | 52/100 [00:05<00:04, 9.91it/s]
53%|█████▎ | 53/100 [00:05<00:04, 9.91it/s]
54%|█████▍ | 54/100 [00:05<00:04, 9.93it/s]
55%|█████▌ | 55/100 [00:05<00:04, 9.93it/s]
56%|█████▌ | 56/100 [00:05<00:04, 9.94it/s]
57%|█████▋ | 57/100 [00:05<00:04, 9.94it/s]
58%|█████▊ | 58/100 [00:05<00:04, 9.94it/s]
59%|█████▉ | 59/100 [00:06<00:04, 9.94it/s]
60%|██████ | 60/100 [00:06<00:04, 9.94it/s]
61%|██████ | 61/100 [00:06<00:03, 9.93it/s]
62%|██████▏ | 62/100 [00:06<00:03, 9.94it/s]
63%|██████▎ | 63/100 [00:06<00:03, 9.95it/s]
64%|██████▍ | 64/100 [00:06<00:03, 9.94it/s]
65%|██████▌ | 65/100 [00:06<00:03, 9.92it/s]
66%|██████▌ | 66/100 [00:06<00:03, 9.93it/s]
67%|██████▋ | 67/100 [00:06<00:03, 9.93it/s]
68%|██████▊ | 68/100 [00:06<00:03, 9.93it/s]
69%|██████▉ | 69/100 [00:07<00:03, 9.95it/s]
70%|███████ | 70/100 [00:07<00:03, 9.95it/s]
71%|███████ | 71/100 [00:07<00:02, 9.95it/s]
72%|███████▏ | 72/100 [00:07<00:02, 9.94it/s]
73%|███████▎ | 73/100 [00:07<00:02, 9.93it/s]
75%|███████▌ | 75/100 [00:07<00:02, 9.97it/s]
77%|███████▋ | 77/100 [00:07<00:02, 9.99it/s]
79%|███████▉ | 79/100 [00:08<00:02, 10.00it/s]
80%|████████ | 80/100 [00:08<00:01, 10.00it/s]
81%|████████ | 81/100 [00:08<00:01, 9.98it/s]
83%|████████▎ | 83/100 [00:08<00:01, 9.98it/s]
84%|████████▍ | 84/100 [00:08<00:01, 9.96it/s]
86%|████████▌ | 86/100 [00:08<00:01, 9.97it/s]
88%|████████▊ | 88/100 [00:08<00:01, 9.97it/s]
89%|████████▉ | 89/100 [00:09<00:01, 9.94it/s]
90%|█████████ | 90/100 [00:09<00:01, 9.93it/s]
91%|█████████ | 91/100 [00:09<00:00, 9.94it/s]
92%|█████████▏| 92/100 [00:09<00:00, 9.93it/s]
93%|█████████▎| 93/100 [00:09<00:00, 9.94it/s]
95%|█████████▌| 95/100 [00:09<00:00, 9.94it/s]
96%|█████████▌| 96/100 [00:09<00:00, 9.94it/s]
98%|█████████▊| 98/100 [00:09<00:00, 9.99it/s]/root/.pyenv/versions/3.10.14/lib/python3.10/site-packages/torchsde/_brownian/brownian_interval.py:599: UserWarning: Should have ta>=t0 but got ta=0.029999999329447746 and t0=0.03.
warnings.warn(f"Should have ta>=t0 but got ta={ta} and t0={self._start}.")
100%|██████████| 100/100 [00:10<00:00, 10.10it/s]
100%|██████████| 100/100 [00:10<00:00, 9.83it/s]
ffmpeg version 4.4.2-0ubuntu0.22.04.1 Copyright (c) 2000-2021 the FFmpeg developers
built with gcc 11 (Ubuntu 11.2.0-19ubuntu1)
configuration: --prefix=/usr --extra-version=0ubuntu0.22.04.1 --toolchain=hardened --libdir=/usr/lib/x86_64-linux-gnu --incdir=/usr/include/x86_64-linux-gnu --arch=amd64 --enable-gpl --disable-stripping --enable-gnutls --enable-ladspa --enable-libaom --enable-libass --enable-libbluray --enable-libbs2b --enable-libcaca --enable-libcdio --enable-libcodec2 --enable-libdav1d --enable-libflite --enable-libfontconfig --enable-libfreetype --enable-libfribidi --enable-libgme --enable-libgsm --enable-libjack --enable-libmp3lame --enable-libmysofa --enable-libopenjpeg --enable-libopenmpt --enable-libopus --enable-libpulse --enable-librabbitmq --enable-librubberband --enable-libshine --enable-libsnappy --enable-libsoxr --enable-libspeex --enable-libsrt --enable-libssh --enable-libtheora --enable-libtwolame --enable-libvidstab --enable-libvorbis --enable-libvpx --enable-libwebp --enable-libx265 --enable-libxml2 --enable-libxvid --enable-libzimg --enable-libzmq --enable-libzvbi --enable-lv2 --enable-omx --enable-openal --enable-opencl --enable-opengl --enable-sdl2 --enable-pocketsphinx --enable-librsvg --enable-libmfx --enable-libdc1394 --enable-libdrm --enable-libiec61883 --enable-chromaprint --enable-frei0r --enable-libx264 --enable-shared
libavutil 56. 70.100 / 56. 70.100
libavcodec 58.134.100 / 58.134.100
libavformat 58. 76.100 / 58. 76.100
libavdevice 58. 13.100 / 58. 13.100
libavfilter 7.110.100 / 7.110.100
libswscale 5. 9.100 / 5. 9.100
libswresample 3. 9.100 / 3. 9.100
libpostproc 55. 9.100 / 55. 9.100
Guessed Channel Layout for Input Stream #0.0 : stereo
Input #0, wav, from 'output.wav':
Metadata:
encoder : Lavf58.76.100
Duration: 00:01:00.98, bitrate: 1411 kb/s
Stream #0:0: Audio: pcm_s16le ([1][0][0][0] / 0x0001), 44100 Hz, stereo, s16, 1411 kb/s
Stream mapping:
Stream #0:0 -> #0:0 (pcm_s16le (native) -> mp3 (libmp3lame))
Press [q] to stop, [?] for help
Output #0, mp3, to 'output.mp3':
Metadata:
TSSE : Lavf58.76.100
Stream #0:0: Audio: mp3, 44100 Hz, stereo, s16p
Metadata:
encoder : Lavc58.134.100 libmp3lame
size= 0kB time=00:00:00.00 bitrate=N/A speed=N/A
size= 512kB time=00:00:33.72 bitrate= 124.4kbits/s speed=67.4x
size= 954kB time=00:01:00.97 bitrate= 128.1kbits/s speed=76.4x
video:0kB audio:953kB subtitle:0kB other streams:0kB global headers:0kB muxing overhead: 0.025913%
Success: {'message': 'Song state with id 10001 updated successfully'}