multitrickfox
/
musicgen_custom
- Public
- 2 runs
Run multitrickfox/musicgen_custom with an API
Use one of our client libraries to get started quickly. Clicking on a library will take you to the Playground tab where you can tweak different inputs, see the results, and copy the corresponding code to use in your own project.
Input schema
The fields you can use to run this model with an API. If you don't give a value for a field its default value will be used.
Field | Type | Default value | Description |
---|---|---|---|
model_version |
string
(enum)
|
stereo-melody-large
Options: stereo-melody-large, stereo-large, melody-large, large, stereo-melody, stereo-medium, melody, medium |
Model to use for generation
|
prompt |
string
|
A description of the music you want to generate.
|
|
input_audio |
string
|
An audio file that will influence the generated music. If `continuation` is `True`, the generated music will be a continuation of the audio file. Otherwise, the generated music will mimic the audio file's melody.
|
|
duration |
integer
|
8
|
Duration of the generated audio in seconds.
|
continuation |
boolean
|
False
|
If `True`, generated music will continue from `input_audio`. Otherwise, generated music will mimic `input_audio`'s melody.
|
continuation_start |
integer
|
0
|
Start time of the audio file to use for continuation.
|
continuation_end |
integer
|
End time of the audio file to use for continuation. If -1 or None, will default to the end of the audio clip.
|
|
multi_band_diffusion |
boolean
|
False
|
If `True`, the EnCodec tokens will be decoded with MultiBand Diffusion. Only works with non-stereo models.
|
normalization_strategy |
string
(enum)
|
loudness
Options: loudness, clip, peak, rms |
Strategy for normalizing audio.
|
top_k |
integer
|
250
|
Reduces sampling to the k most likely tokens.
|
top_p |
number
|
0
|
Reduces sampling to tokens with cumulative probability of p. When set to `0` (default), top_k sampling is used.
|
temperature |
number
|
1
|
Controls the 'conservativeness' of the sampling process. Higher temperature means more diversity.
|
classifier_free_guidance |
integer
|
3
|
Increases the influence of inputs on the output. Higher values produce lower-varience outputs that adhere more closely to inputs.
|
output_format |
string
(enum)
|
wav
Options: wav, mp3 |
Output format for generated audio.
|
seed |
integer
|
Seed for random number generator. If None or -1, a random seed will be used.
|
{
"type": "object",
"title": "Input",
"properties": {
"seed": {
"type": "integer",
"title": "Seed",
"x-order": 14,
"description": "Seed for random number generator. If None or -1, a random seed will be used."
},
"top_k": {
"type": "integer",
"title": "Top K",
"default": 250,
"x-order": 9,
"description": "Reduces sampling to the k most likely tokens."
},
"top_p": {
"type": "number",
"title": "Top P",
"default": 0,
"x-order": 10,
"description": "Reduces sampling to tokens with cumulative probability of p. When set to `0` (default), top_k sampling is used."
},
"prompt": {
"type": "string",
"title": "Prompt",
"x-order": 1,
"description": "A description of the music you want to generate."
},
"duration": {
"type": "integer",
"title": "Duration",
"default": 8,
"x-order": 3,
"description": "Duration of the generated audio in seconds."
},
"input_audio": {
"type": "string",
"title": "Input Audio",
"format": "uri",
"x-order": 2,
"description": "An audio file that will influence the generated music. If `continuation` is `True`, the generated music will be a continuation of the audio file. Otherwise, the generated music will mimic the audio file's melody."
},
"temperature": {
"type": "number",
"title": "Temperature",
"default": 1,
"x-order": 11,
"description": "Controls the 'conservativeness' of the sampling process. Higher temperature means more diversity."
},
"continuation": {
"type": "boolean",
"title": "Continuation",
"default": false,
"x-order": 4,
"description": "If `True`, generated music will continue from `input_audio`. Otherwise, generated music will mimic `input_audio`'s melody."
},
"model_version": {
"enum": [
"stereo-melody-large",
"stereo-large",
"melody-large",
"large",
"stereo-melody",
"stereo-medium",
"melody",
"medium"
],
"type": "string",
"title": "model_version",
"description": "Model to use for generation",
"default": "stereo-melody-large",
"x-order": 0
},
"output_format": {
"enum": [
"wav",
"mp3"
],
"type": "string",
"title": "output_format",
"description": "Output format for generated audio.",
"default": "wav",
"x-order": 13
},
"continuation_end": {
"type": "integer",
"title": "Continuation End",
"minimum": 0,
"x-order": 6,
"description": "End time of the audio file to use for continuation. If -1 or None, will default to the end of the audio clip."
},
"continuation_start": {
"type": "integer",
"title": "Continuation Start",
"default": 0,
"minimum": 0,
"x-order": 5,
"description": "Start time of the audio file to use for continuation."
},
"multi_band_diffusion": {
"type": "boolean",
"title": "Multi Band Diffusion",
"default": false,
"x-order": 7,
"description": "If `True`, the EnCodec tokens will be decoded with MultiBand Diffusion. Only works with non-stereo models."
},
"normalization_strategy": {
"enum": [
"loudness",
"clip",
"peak",
"rms"
],
"type": "string",
"title": "normalization_strategy",
"description": "Strategy for normalizing audio.",
"default": "loudness",
"x-order": 8
},
"classifier_free_guidance": {
"type": "integer",
"title": "Classifier Free Guidance",
"default": 3,
"x-order": 12,
"description": "Increases the influence of inputs on the output. Higher values produce lower-varience outputs that adhere more closely to inputs."
}
}
}
Output schema
The shape of the response you’ll get when you run this model with an API.
Schema
{
"type": "string",
"title": "Output",
"format": "uri"
}