jimothyjohn/stable-audio-3-medium

stable-audio-3-medium

Public

6 runs

License

GitHub

Weights

Run jimothyjohn/stable-audio-3-medium with an API

Use one of our client libraries to get started quickly. Clicking on a library will take you to the Playground tab where you can tweak different inputs, see the results, and copy the corresponding code to use in your own project.

Input schema

The fields you can use to run this model with an API. If you don't give a value for a field its default value will be used.

Field	Type	Default value	Description
prompt	string		Text description of the audio to generate.
negative_prompt	string		Qualities to avoid in the output. Only affects -base models.
duration	number	30 Min: 1 Max: 380	Length of the generated audio in seconds.
steps	integer	8 Min: 4 Max: 50	Number of diffusion sampling steps. 8 is the post-trained default; lower is faster, higher rarely helps.
cfg_scale	number	1 Min: 0.5 Max: 15	Classifier-free guidance scale. Only affects -base models.
seed	integer	-1	Random seed. -1 selects a new random seed for each run.
init_audio	string		Optional source audio for audio-to-audio editing.
init_noise_level	number	0.9 Max: 1	How much the init audio influences the output. 1.0 = pure generation, lower keeps more of the original.
inpaint_audio	string		Optional source audio for inpainting or continuation. Set inpaint_start_seconds and inpaint_end_seconds to mark the region to regenerate; set start to the file length to extend it.
inpaint_start_seconds	number	0	Start of the inpaint region in seconds (used with inpaint_audio).
inpaint_end_seconds	number	0	End of the inpaint region in seconds (used with inpaint_audio).
output_format	None	wav	Output file format.

{
  "type": "object",
  "title": "Input",
  "required": [
    "prompt"
  ],
  "properties": {
    "seed": {
      "type": "integer",
      "title": "Seed",
      "default": -1,
      "x-order": 5,
      "description": "Random seed. -1 selects a new random seed for each run."
    },
    "steps": {
      "type": "integer",
      "title": "Steps",
      "default": 8,
      "maximum": 50,
      "minimum": 4,
      "x-order": 3,
      "description": "Number of diffusion sampling steps. 8 is the post-trained default; lower is faster, higher rarely helps."
    },
    "prompt": {
      "type": "string",
      "title": "Prompt",
      "x-order": 0,
      "description": "Text description of the audio to generate."
    },
    "duration": {
      "type": "number",
      "title": "Duration",
      "default": 30,
      "maximum": 380,
      "minimum": 1,
      "x-order": 2,
      "description": "Length of the generated audio in seconds."
    },
    "cfg_scale": {
      "type": "number",
      "title": "Cfg Scale",
      "default": 1,
      "maximum": 15,
      "minimum": 0.5,
      "x-order": 4,
      "description": "Classifier-free guidance scale. Only affects -base models."
    },
    "init_audio": {
      "type": "string",
      "title": "Init Audio",
      "format": "uri",
      "x-order": 6,
      "nullable": true,
      "description": "Optional source audio for audio-to-audio editing."
    },
    "inpaint_audio": {
      "type": "string",
      "title": "Inpaint Audio",
      "format": "uri",
      "x-order": 8,
      "nullable": true,
      "description": "Optional source audio for inpainting or continuation. Set inpaint_start_seconds and inpaint_end_seconds to mark the region to regenerate; set start to the file length to extend it."
    },
    "output_format": {
      "enum": [
        "wav",
        "mp3"
      ],
      "type": "string",
      "title": "output_format",
      "description": "Output file format.",
      "default": "wav",
      "x-order": 11
    },
    "negative_prompt": {
      "type": "string",
      "title": "Negative Prompt",
      "default": "",
      "x-order": 1,
      "description": "Qualities to avoid in the output. Only affects -base models."
    },
    "init_noise_level": {
      "type": "number",
      "title": "Init Noise Level",
      "default": 0.9,
      "maximum": 1,
      "minimum": 0,
      "x-order": 7,
      "description": "How much the init audio influences the output. 1.0 = pure generation, lower keeps more of the original."
    },
    "inpaint_end_seconds": {
      "type": "number",
      "title": "Inpaint End Seconds",
      "default": 0,
      "minimum": 0,
      "x-order": 10,
      "description": "End of the inpaint region in seconds (used with inpaint_audio)."
    },
    "inpaint_start_seconds": {
      "type": "number",
      "title": "Inpaint Start Seconds",
      "default": 0,
      "minimum": 0,
      "x-order": 9,
      "description": "Start of the inpaint region in seconds (used with inpaint_audio)."
    }
  }
}

Output schema

The shape of the response you’ll get when you run this model with an API.

Schema

{
  "type": "string",
  "title": "Output",
  "format": "uri"
}