zhouzhengjun/lora_train | API reference

zhouzhengjun / lora_train

Public
5 runs

Run zhouzhengjun/lora_train with an API

Use one of our client libraries to get started quickly. Clicking on a library will take you to the Playground tab where you can tweak different inputs, see the results, and copy the corresponding code to use in your own project.

Input schema

The fields you can use to run this model with an API. If you don't give a value for a field its default value will be used.

Field	Type	Default value	Description
instance_data	string		A ZIP file containing your training images (JPG, PNG, etc. size not restricted). These images contain your 'subject' that you want the trained model to embed in the output domain for later generating customized scenes beyond the training images. For best results, use images without noise or unrelated objects in the background.
seed	integer	1337	A seed for reproducible training
resolution	integer	512	The resolution for input images. All the images in the train/validation dataset will be resized to this resolution.
train_text_encoder	boolean	True	Whether to train the text encoder
train_batch_size	integer	1	Batch size (per device) for the training dataloader.
gradient_accumulation_steps	integer	4	Number of updates steps to accumulate before performing a backward/update pass.
gradient_checkpointing	boolean	False	Whether or not to use gradient checkpointing to save memory at the expense of slower backward pass.
scale_lr	boolean	True	Scale the learning rate by the number of GPUs, gradient accumulation steps, and batch size.
lr_scheduler	string (enum)	constant Options: linear, cosine, cosine_with_restarts, polynomial, constant, constant_with_warmup	The scheduler type to use
lr_warmup_steps	integer	0	Number of steps for the warmup in the lr scheduler.
clip_ti_decay	boolean	True	Whether or not to perform Bayesian Learning Rule on norm of the CLIP latent.
color_jitter	boolean	True	Whether or not to use color jitter at augmentation.
continue_inversion	boolean	False	Whether or not to continue inversion.
continue_inversion_lr	number	0.0001	The learning rate for continuing an inversion.
initializer_tokens	string		The tokens to use for the initializer. If not provided, will randomly initialize from gaussian N(0,0.017^2)
learning_rate_text	number	0.00001	The learning rate for the text encoder.
learning_rate_ti	number	0.0005	The learning rate for the TI.
learning_rate_unet	number	0.0001	The learning rate for the unet.
lora_rank	integer	4	Rank of the LoRA. Larger it is, more likely to capture fidelity but less likely to be editable. Larger rank will make the end result larger.
lora_dropout_p	number	0.1	Dropout for the LoRA layer. Reference LoRA paper for more details.
lora_scale	number	1	Scaling parameter at the end of the LoRA layer.
lr_scheduler_lora	string (enum)	constant Options: linear, cosine, cosine_with_restarts, polynomial, constant, constant_with_warmup	The scheduler type to use
lr_warmup_steps_lora	integer	0	Number of steps for the warmup in the lr scheduler.
max_train_steps_ti	integer	500	The maximum number of training steps for the TI.
max_train_steps_tuning	integer	1000	The maximum number of training steps for the tuning.
placeholder_token_at_data	string		If this value is provided as 'X\|Y', it will transform target word X into Y at caption. You are required to provide caption as filename (not regarding extension), and Y has to contain placeholder token below. You are also required to set `None` for `use_template` argument to use this feature.
placeholder_tokens	string	<s1>\|<s2>	The placeholder tokens to use for the initializer. If not provided, will use the first tokens of the data.
use_face_segmentation_condition	boolean	False	Whether or not to use the face segmentation condition.
use_template	string (enum)	object Options: object, style, none	The template to use for the inversion.
weight_decay_lora	number	0.001	The weight decay for the LORA loss.
weight_decay_ti	number	0	The weight decay for the TI.

{
  "type": "object",
  "title": "Input",
  "required": [
    "instance_data"
  ],
  "properties": {
    "seed": {
      "type": "integer",
      "title": "Seed",
      "default": 1337,
      "x-order": 1,
      "description": "A seed for reproducible training"
    },
    "scale_lr": {
      "type": "boolean",
      "title": "Scale Lr",
      "default": true,
      "x-order": 7,
      "description": "Scale the learning rate by the number of GPUs, gradient accumulation steps, and batch size."
    },
    "lora_rank": {
      "type": "integer",
      "title": "Lora Rank",
      "default": 4,
      "x-order": 18,
      "description": "Rank of the LoRA. Larger it is, more likely to capture fidelity but less likely to be editable. Larger rank will make the end result larger."
    },
    "lora_scale": {
      "type": "number",
      "title": "Lora Scale",
      "default": 1,
      "x-order": 20,
      "description": "Scaling parameter at the end of the LoRA layer."
    },
    "resolution": {
      "type": "integer",
      "title": "Resolution",
      "default": 512,
      "x-order": 2,
      "description": "The resolution for input images. All the images in the train/validation dataset will be resized to this resolution."
    },
    "color_jitter": {
      "type": "boolean",
      "title": "Color Jitter",
      "default": true,
      "x-order": 11,
      "description": "Whether or not to use color jitter at augmentation."
    },
    "lr_scheduler": {
      "enum": [
        "linear",
        "cosine",
        "cosine_with_restarts",
        "polynomial",
        "constant",
        "constant_with_warmup"
      ],
      "type": "string",
      "title": "lr_scheduler",
      "description": "The scheduler type to use",
      "default": "constant",
      "x-order": 8
    },
    "use_template": {
      "enum": [
        "object",
        "style",
        "none"
      ],
      "type": "string",
      "title": "use_template",
      "description": "The template to use for the inversion.",
      "default": "object",
      "x-order": 28
    },
    "clip_ti_decay": {
      "type": "boolean",
      "title": "Clip Ti Decay",
      "default": true,
      "x-order": 10,
      "description": "Whether or not to perform Bayesian Learning Rule on norm of the CLIP latent."
    },
    "instance_data": {
      "type": "string",
      "title": "Instance Data",
      "format": "uri",
      "x-order": 0,
      "description": "A ZIP file containing your training images (JPG, PNG, etc. size not restricted). These images contain your 'subject' that you want the trained model to embed in the output domain for later generating customized scenes beyond the training images. For best results, use images without noise or unrelated objects in the background."
    },
    "lora_dropout_p": {
      "type": "number",
      "title": "Lora Dropout P",
      "default": 0.1,
      "x-order": 19,
      "description": "Dropout for the LoRA layer. Reference LoRA paper for more details."
    },
    "lr_warmup_steps": {
      "type": "integer",
      "title": "Lr Warmup Steps",
      "default": 0,
      "x-order": 9,
      "description": "Number of steps for the warmup in the lr scheduler."
    },
    "weight_decay_ti": {
      "type": "number",
      "title": "Weight Decay Ti",
      "default": 0,
      "x-order": 30,
      "description": "The weight decay for the TI."
    },
    "learning_rate_ti": {
      "type": "number",
      "title": "Learning Rate Ti",
      "default": 0.0005,
      "x-order": 16,
      "description": "The learning rate for the TI."
    },
    "train_batch_size": {
      "type": "integer",
      "title": "Train Batch Size",
      "default": 1,
      "x-order": 4,
      "description": "Batch size (per device) for the training dataloader."
    },
    "lr_scheduler_lora": {
      "enum": [
        "linear",
        "cosine",
        "cosine_with_restarts",
        "polynomial",
        "constant",
        "constant_with_warmup"
      ],
      "type": "string",
      "title": "lr_scheduler_lora",
      "description": "The scheduler type to use",
      "default": "constant",
      "x-order": 21
    },
    "weight_decay_lora": {
      "type": "number",
      "title": "Weight Decay Lora",
      "default": 0.001,
      "x-order": 29,
      "description": "The weight decay for the LORA loss."
    },
    "continue_inversion": {
      "type": "boolean",
      "title": "Continue Inversion",
      "default": false,
      "x-order": 12,
      "description": "Whether or not to continue inversion."
    },
    "initializer_tokens": {
      "type": "string",
      "title": "Initializer Tokens",
      "x-order": 14,
      "description": "The tokens to use for the initializer. If not provided, will randomly initialize from gaussian N(0,0.017^2)"
    },
    "learning_rate_text": {
      "type": "number",
      "title": "Learning Rate Text",
      "default": 1e-05,
      "x-order": 15,
      "description": "The learning rate for the text encoder."
    },
    "learning_rate_unet": {
      "type": "number",
      "title": "Learning Rate Unet",
      "default": 0.0001,
      "x-order": 17,
      "description": "The learning rate for the unet."
    },
    "max_train_steps_ti": {
      "type": "integer",
      "title": "Max Train Steps Ti",
      "default": 500,
      "x-order": 23,
      "description": "The maximum number of training steps for the TI."
    },
    "placeholder_tokens": {
      "type": "string",
      "title": "Placeholder Tokens",
      "default": "<s1>|<s2>",
      "x-order": 26,
      "description": "The placeholder tokens to use for the initializer. If not provided, will use the first tokens of the data."
    },
    "train_text_encoder": {
      "type": "boolean",
      "title": "Train Text Encoder",
      "default": true,
      "x-order": 3,
      "description": "Whether to train the text encoder"
    },
    "lr_warmup_steps_lora": {
      "type": "integer",
      "title": "Lr Warmup Steps Lora",
      "default": 0,
      "x-order": 22,
      "description": "Number of steps for the warmup in the lr scheduler."
    },
    "continue_inversion_lr": {
      "type": "number",
      "title": "Continue Inversion Lr",
      "default": 0.0001,
      "x-order": 13,
      "description": "The learning rate for continuing an inversion."
    },
    "gradient_checkpointing": {
      "type": "boolean",
      "title": "Gradient Checkpointing",
      "default": false,
      "x-order": 6,
      "description": "Whether or not to use gradient checkpointing to save memory at the expense of slower backward pass."
    },
    "max_train_steps_tuning": {
      "type": "integer",
      "title": "Max Train Steps Tuning",
      "default": 1000,
      "x-order": 24,
      "description": "The maximum number of training steps for the tuning."
    },
    "placeholder_token_at_data": {
      "type": "string",
      "title": "Placeholder Token At Data",
      "x-order": 25,
      "description": "If this value is provided as 'X|Y', it will transform target word X into Y at caption. You are required to provide caption as filename (not regarding extension), and Y has to contain placeholder token below. You are also required to set `None` for `use_template` argument to use this feature."
    },
    "gradient_accumulation_steps": {
      "type": "integer",
      "title": "Gradient Accumulation Steps",
      "default": 4,
      "x-order": 5,
      "description": "Number of updates steps to accumulate before performing a backward/update pass."
    },
    "use_face_segmentation_condition": {
      "type": "boolean",
      "title": "Use Face Segmentation Condition",
      "default": false,
      "x-order": 27,
      "description": "Whether or not to use the face segmentation condition."
    }
  }
}

Output schema

The shape of the response you’ll get when you run this model with an API.

Schema

{
  "type": "string",
  "title": "Output",
  "format": "uri"
}