javanasse/musicgen-remixer-dev
Public
2
runs
Run javanasse/musicgen-remixer-dev with an API
Use one of our client libraries to get started quickly. Clicking on a library will take you to the Playground tab where you can tweak different inputs, see the results, and copy the corresponding code to use in your own project.
Input schema
The fields you can use to run this model with an API. If you don't give a value for a field its default value will be used.
| Field | Type | Default value | Description |
|---|---|---|---|
| model_version |
None
|
stereo-chord
|
Model type. Computations take longer when using `large` or `stereo` models.
|
| prompt |
string
|
A description of the music you want to generate.
|
|
| music_input |
string
|
An audio file input for the remix.
|
|
| multi_band_diffusion |
boolean
|
False
|
If `True`, the EnCodec tokens will be decoded with MultiBand Diffusion. Not compatible with `stereo` models.
|
| normalization_strategy |
None
|
loudness
|
Strategy for normalizing audio.
|
| beat_sync_threshold |
number
|
When beat syncing, if the gap between generated downbeat timing and input audio downbeat timing is larger than `beat_sync_threshold`, consider the beats are not corresponding. If `None` or `-1`, `1.1/(bpm/60)` will be used as the value. 0.75 is a good value to set.
|
|
| large_chord_voca |
boolean
|
True
|
If `True`, more chords like 7th, diminished and etc are used. If `False` only 12 major and 12 minor chords are used.
|
| chroma_coefficient |
number
|
1
Min: 0.5 Max: 2 |
Coefficient value multiplied to multi-hot chord chroma.
|
| top_k |
integer
|
250
|
Reduces sampling to the k most likely tokens.
|
| top_p |
number
|
0
|
Reduces sampling to tokens with cumulative probability of p. When set to `0` (default), top_k sampling is used.
|
| temperature |
number
|
1
|
Controls the 'conservativeness' of the sampling process. Higher temperature means more diversity.
|
| classifier_free_guidance |
integer
|
3
|
Increases the influence of inputs on the output. Higher values produce lower-varience outputs that adhere more closely to inputs.
|
| output_format |
None
|
wav
|
Output format for generated audio.
|
| return_instrumental |
boolean
|
False
|
If `True`, the instrumental audio will also be returned.
|
| seed |
integer
|
Seed for random number generator. If `None` or `-1`, a random seed will be used.
|
{
"type": "object",
"title": "Input",
"properties": {
"seed": {
"type": "integer",
"title": "Seed",
"x-order": 14,
"description": "Seed for random number generator. If `None` or `-1`, a random seed will be used."
},
"top_k": {
"type": "integer",
"title": "Top K",
"default": 250,
"x-order": 8,
"description": "Reduces sampling to the k most likely tokens."
},
"top_p": {
"type": "number",
"title": "Top P",
"default": 0,
"x-order": 9,
"description": "Reduces sampling to tokens with cumulative probability of p. When set to `0` (default), top_k sampling is used."
},
"prompt": {
"type": "string",
"title": "Prompt",
"x-order": 1,
"description": "A description of the music you want to generate."
},
"music_input": {
"type": "string",
"title": "Music Input",
"format": "uri",
"x-order": 2,
"description": "An audio file input for the remix."
},
"temperature": {
"type": "number",
"title": "Temperature",
"default": 1,
"x-order": 10,
"description": "Controls the 'conservativeness' of the sampling process. Higher temperature means more diversity."
},
"model_version": {
"enum": [
"stereo-chord",
"stereo-chord-large",
"chord",
"chord-large"
],
"type": "string",
"title": "model_version",
"description": "Model type. Computations take longer when using `large` or `stereo` models.",
"default": "stereo-chord",
"x-order": 0
},
"output_format": {
"enum": [
"wav",
"mp3"
],
"type": "string",
"title": "output_format",
"description": "Output format for generated audio.",
"default": "wav",
"x-order": 12
},
"large_chord_voca": {
"type": "boolean",
"title": "Large Chord Voca",
"default": true,
"x-order": 6,
"description": "If `True`, more chords like 7th, diminished and etc are used. If `False` only 12 major and 12 minor chords are used."
},
"chroma_coefficient": {
"type": "number",
"title": "Chroma Coefficient",
"default": 1,
"maximum": 2,
"minimum": 0.5,
"x-order": 7,
"description": "Coefficient value multiplied to multi-hot chord chroma."
},
"beat_sync_threshold": {
"type": "number",
"title": "Beat Sync Threshold",
"x-order": 5,
"description": "When beat syncing, if the gap between generated downbeat timing and input audio downbeat timing is larger than `beat_sync_threshold`, consider the beats are not corresponding. If `None` or `-1`, `1.1/(bpm/60)` will be used as the value. 0.75 is a good value to set."
},
"return_instrumental": {
"type": "boolean",
"title": "Return Instrumental",
"default": false,
"x-order": 13,
"description": "If `True`, the instrumental audio will also be returned."
},
"multi_band_diffusion": {
"type": "boolean",
"title": "Multi Band Diffusion",
"default": false,
"x-order": 3,
"description": "If `True`, the EnCodec tokens will be decoded with MultiBand Diffusion. Not compatible with `stereo` models."
},
"normalization_strategy": {
"enum": [
"loudness",
"clip",
"peak",
"rms"
],
"type": "string",
"title": "normalization_strategy",
"description": "Strategy for normalizing audio.",
"default": "loudness",
"x-order": 4
},
"classifier_free_guidance": {
"type": "integer",
"title": "Classifier Free Guidance",
"default": 3,
"x-order": 11,
"description": "Increases the influence of inputs on the output. Higher values produce lower-varience outputs that adhere more closely to inputs."
}
}
}
Output schema
The shape of the response you’ll get when you run this model with an API.
Schema
{
"type": "array",
"items": {
"type": "string",
"format": "uri"
},
"title": "Output"
}