typefile
{
"batch_size": 4,
"epochs": 16,
"gradient_checkpointing": true,
"hf_token": "[REDACTED]",
"hub_model_id": "lucataco/hunyuan-musubi-rose-6",
"input_videos": "https://replicate.delivery/pbxt/MGxrtkh22jTrOb2yHYgI1D921ahZItEy1C5fIKqwI2y1AdrI/RSNG-6.zip",
"learning_rate": 0.001,
"optimizer": "adamw8bit",
"rank": 32,
"seed": 42,
"timestep_sampling": "sigmoid"
}npm install replicate
REPLICATE_API_TOKEN environment variable:export REPLICATE_API_TOKEN=r8_5VY**********************************
This is your API token. Keep it to yourself.
import Replicate from "replicate";
import fs from "node:fs";
const replicate = new Replicate({
auth: process.env.REPLICATE_API_TOKEN,
});
Run lucataco/musubi-tuner using Replicate’s API. Check out the model's schema for an overview of inputs and outputs.
const output = await replicate.run(
"lucataco/musubi-tuner:52fd0b5e82ee62f98969971bf80814059e408e4114522e7b3859ba2c03077c40",
{
input: {
batch_size: 4,
epochs: 16,
gradient_checkpointing: true,
hf_token: "[REDACTED]",
hub_model_id: "lucataco/hunyuan-musubi-rose-6",
input_videos: "https://replicate.delivery/pbxt/MGxrtkh22jTrOb2yHYgI1D921ahZItEy1C5fIKqwI2y1AdrI/RSNG-6.zip",
learning_rate: 0.001,
optimizer: "adamw8bit",
rank: 32,
seed: 42,
timestep_sampling: "sigmoid"
}
}
);
// To access the file URL:
console.log(output.url()); //=> "http://example.com"
// To write the file to disk:
fs.writeFile("my-image.png", output);
To learn more, take a look at the guide on getting started with Node.js.
pip install replicate
REPLICATE_API_TOKEN environment variable:export REPLICATE_API_TOKEN=r8_5VY**********************************
This is your API token. Keep it to yourself.
import replicate
Run lucataco/musubi-tuner using Replicate’s API. Check out the model's schema for an overview of inputs and outputs.
output = replicate.run(
"lucataco/musubi-tuner:52fd0b5e82ee62f98969971bf80814059e408e4114522e7b3859ba2c03077c40",
input={
"batch_size": 4,
"epochs": 16,
"gradient_checkpointing": True,
"hf_token": "[REDACTED]",
"hub_model_id": "lucataco/hunyuan-musubi-rose-6",
"input_videos": "https://replicate.delivery/pbxt/MGxrtkh22jTrOb2yHYgI1D921ahZItEy1C5fIKqwI2y1AdrI/RSNG-6.zip",
"learning_rate": 0.001,
"optimizer": "adamw8bit",
"rank": 32,
"seed": 42,
"timestep_sampling": "sigmoid"
}
)
# To access the file URL:
print(output.url())
#=> "http://example.com"
# To write the file to disk:
with open("my-image.png", "wb") as file:
file.write(output.read())
To learn more, take a look at the guide on getting started with Python.
REPLICATE_API_TOKEN environment variable:export REPLICATE_API_TOKEN=r8_5VY**********************************
This is your API token. Keep it to yourself.
Run lucataco/musubi-tuner using Replicate’s API. Check out the model's schema for an overview of inputs and outputs.
curl -s -X POST \
-H "Authorization: Bearer $REPLICATE_API_TOKEN" \
-H "Content-Type: application/json" \
-H "Prefer: wait" \
-d $'{
"version": "lucataco/musubi-tuner:52fd0b5e82ee62f98969971bf80814059e408e4114522e7b3859ba2c03077c40",
"input": {
"batch_size": 4,
"epochs": 16,
"gradient_checkpointing": true,
"hf_token": "[REDACTED]",
"hub_model_id": "lucataco/hunyuan-musubi-rose-6",
"input_videos": "https://replicate.delivery/pbxt/MGxrtkh22jTrOb2yHYgI1D921ahZItEy1C5fIKqwI2y1AdrI/RSNG-6.zip",
"learning_rate": 0.001,
"optimizer": "adamw8bit",
"rank": 32,
"seed": 42,
"timestep_sampling": "sigmoid"
}
}' \
https://api.replicate.com/v1/predictions
To learn more, take a look at Replicate’s HTTP API reference docs.
{
"id": "8e970hygfnrmc0cm789sz6sr38",
"model": "lucataco/musubi-tuner",
"version": "52fd0b5e82ee62f98969971bf80814059e408e4114522e7b3859ba2c03077c40",
"input": {
"batch_size": 4,
"epochs": 16,
"gradient_checkpointing": true,
"hf_token": "[REDACTED]",
"hub_model_id": "lucataco/hunyuan-musubi-rose-6",
"input_videos": "https://replicate.delivery/pbxt/MGxrtkh22jTrOb2yHYgI1D921ahZItEy1C5fIKqwI2y1AdrI/RSNG-6.zip",
"learning_rate": 0.001,
"optimizer": "adamw8bit",
"rank": 32,
"seed": 42,
"timestep_sampling": "sigmoid"
},
"logs": "Using seed: 42\nCleaning up past runs\nExtracted 12 files from zip to folder: input/videos\nRunning latent pre-caching command\nINFO:__main__:Load dataset config from train.toml\nINFO:dataset.image_video_dataset:glob images in ./input/videos\nINFO:dataset.image_video_dataset:found 6 videos\nINFO:dataset.config_utils:[Dataset 0]\nis_image_dataset: False\nresolution: (960, 544)\nbatch_size: 1\ncaption_extension: \".txt\"\nenable_bucket: True\nbucket_no_upscale: False\ncache_directory: \"./input/cache_directory\"\ndebug_dataset: False\nvideo_directory: \"./input/videos\"\nvideo_jsonl_file: \"None\"\ntarget_frames: [1, 25, 45]\nframe_extraction: head\nframe_stride: 1\nframe_sample: 1\nINFO:hunyuan_model.vae:Loading 3D VAE model (884-16c-hy) from: ckpts/hunyuan-video-t2v-720p/vae/pytorch_model.pt\nINFO:hunyuan_model.vae:VAE to dtype: torch.float16\nLoaded VAE: FrozenDict([('in_channels', 3), ('out_channels', 3), ('down_block_types', ['DownEncoderBlockCausal3D', 'DownEncoderBlockCausal3D', 'DownEncoderBlockCausal3D', 'DownEncoderBlockCausal3D']), ('up_block_types', ['UpDecoderBlockCausal3D', 'UpDecoderBlockCausal3D', 'UpDecoderBlockCausal3D', 'UpDecoderBlockCausal3D']), ('block_out_channels', [128, 256, 512, 512]), ('layers_per_block', 2), ('act_fn', 'silu'), ('latent_channels', 16), ('norm_num_groups', 32), ('sample_size', 256), ('sample_tsize', 64), ('scaling_factor', 0.476986), ('force_upcast', True), ('spatial_compression_ratio', 8), ('time_compression_ratio', 4), ('mid_block_add_attention', True), ('_use_default_values', ['spatial_compression_ratio', 'force_upcast']), ('_class_name', 'AutoencoderKLCausal3D'), ('_diffusers_version', '0.4.2')]), dtype: torch.float16\nINFO:__main__:Set chunk_size to 32 for CausalConv3d in VAE\nEncoding dataset [0]\n0it [00:00, ?it/s]\n1it [00:01, 1.19s/it]\n2it [00:01, 1.70it/s]\n3it [00:01, 2.53it/s]\n4it [00:01, 3.29it/s]\n5it [00:01, 3.95it/s]\n6it [00:02, 4.49it/s]\n7it [00:03, 1.38it/s]\n8it [00:05, 1.04s/it]\n9it [00:07, 1.26s/it]\n10it [00:08, 1.41s/it]\n11it [00:10, 1.51s/it]\n12it [00:12, 1.58s/it]\n13it [00:15, 2.03s/it]\n14it [00:18, 2.35s/it]\n15it [00:21, 2.57s/it]\n16it [00:24, 2.72s/it]\n17it [00:27, 2.86s/it]\n18it [00:31, 2.93s/it]\n18it [00:31, 1.72s/it]\nRunning text encoder output pre-caching command\nThe cache for model files in Transformers v4.22.0 has been updated. Migrating your old cache. This is a one-time only operation. You can interrupt this and resume the migration later on by calling `transformers.utils.move_cache()`.\n0it [00:00, ?it/s]\n0it [00:00, ?it/s]\nINFO:__main__:Load dataset config from train.toml\nINFO:dataset.image_video_dataset:glob images in ./input/videos\nINFO:dataset.image_video_dataset:found 6 videos\nINFO:dataset.config_utils:[Dataset 0]\nis_image_dataset: False\nresolution: (960, 544)\nbatch_size: 1\ncaption_extension: \".txt\"\nenable_bucket: True\nbucket_no_upscale: False\ncache_directory: \"./input/cache_directory\"\ndebug_dataset: False\nvideo_directory: \"./input/videos\"\nvideo_jsonl_file: \"None\"\ntarget_frames: [1, 25, 45]\nframe_extraction: head\nframe_stride: 1\nframe_sample: 1\nINFO:__main__:loading text encoder 1: ckpts/text_encoder\nINFO:hunyuan_model.text_encoder:Loading text encoder model (llm) from: ckpts/text_encoder\nLoading checkpoint shards: 0%| | 0/4 [00:00<?, ?it/s]\nLoading checkpoint shards: 50%|█████ | 2/4 [00:00<00:00, 14.95it/s]\nLoading checkpoint shards: 100%|██████████| 4/4 [00:00<00:00, 15.17it/s]\nLoading checkpoint shards: 100%|██████████| 4/4 [00:00<00:00, 15.12it/s]\nINFO:hunyuan_model.text_encoder:Text encoder to dtype: torch.float16\nINFO:hunyuan_model.text_encoder:Loading tokenizer (llm) from: ckpts/text_encoder\nINFO:__main__:Encoding with Text Encoder 1\nEncoding dataset [0]\n0it [00:00, ?it/s]\n1it [00:00, 2.20it/s]\n6it [00:00, 13.51it/s]\n6it [00:00, 10.75it/s]\nINFO:__main__:loading text encoder 2: ckpts/text_encoder_2\nINFO:hunyuan_model.text_encoder:Loading text encoder model (clipL) from: ckpts/text_encoder_2\nINFO:hunyuan_model.text_encoder:Text encoder to dtype: torch.float16\nINFO:hunyuan_model.text_encoder:Loading tokenizer (clipL) from: ckpts/text_encoder_2\nINFO:__main__:Encoding with Text Encoder 2\nEncoding dataset [0]\n0it [00:00, ?it/s]\n1it [00:00, 8.03it/s]\n6it [00:00, 38.54it/s]\nRunning training command\nThe following values were not passed to `accelerate launch` and had defaults used instead:\n`--num_processes` was set to a value of `1`\n`--num_machines` was set to a value of `1`\n`--dynamo_backend` was set to a value of `'no'`\nTo avoid this warning pass in values for each of the problematic parameters or run `accelerate config`.\nTrying to import sageattention\nSuccessfully imported sageattention\nINFO:__main__:Load dataset config from train.toml\nINFO:dataset.image_video_dataset:glob images in ./input/videos\nINFO:dataset.image_video_dataset:found 6 videos\nINFO:dataset.config_utils:[Dataset 0]\nis_image_dataset: False\nresolution: (960, 544)\nbatch_size: 1\ncaption_extension: \".txt\"\nenable_bucket: True\nbucket_no_upscale: False\ncache_directory: \"./input/cache_directory\"\ndebug_dataset: False\nvideo_directory: \"./input/videos\"\nvideo_jsonl_file: \"None\"\ntarget_frames: [1, 25, 45]\nframe_extraction: head\nframe_stride: 1\nframe_sample: 1\nINFO:dataset.image_video_dataset:bucket: (960, 544, 1), count: 6\nINFO:dataset.image_video_dataset:bucket: (960, 544, 25), count: 6\nINFO:dataset.image_video_dataset:bucket: (960, 544, 45), count: 6\nINFO:dataset.image_video_dataset:total batches: 18\nINFO:__main__:preparing accelerator\naccelerator device: cuda\nINFO:__main__:DiT precision: torch.bfloat16, weight precision: torch.float8_e4m3fn\nINFO:__main__:Loading DiT model from ckpts/hunyuan-video-t2v-720p/transformers/mp_rank_00_model_states.pt\nUsing torch attention mode, split_attn: False\nimport network module: networks.lora\nINFO:networks.lora:create LoRA network. base dim (rank): 32, alpha: 1\nINFO:networks.lora:neuron dropout: p=None, rank dropout: p=None, module dropout: p=None\nINFO:networks.lora:create LoRA for U-Net/DiT: 320 modules.\nINFO:networks.lora:enable LoRA for U-Net: 320 modules\nHYVideoDiffusionTransformer: Gradient checkpointing enabled.\nprepare optimizer, data loader etc.\nINFO:__main__:use 8-bit AdamW optimizer | {}\noverride steps. steps for 16 epochs is / 指定エポックまでのステップ数: 288\nINFO:__main__:casting model to torch.float8_e4m3fn\nrunning training / 学習開始\nnum train items / 学習画像、動画数: 18\nnum batches per epoch / 1epochのバッチ数: 18\nnum epochs / epoch数: 16\nbatch size per device / バッチサイズ: 1\ngradient accumulation steps / 勾配を合計するステップ数 = 1\ntotal optimization steps / 学習ステップ数: 288\nINFO:__main__:calculate hash for DiT model: ckpts/hunyuan-video-t2v-720p/transformers/mp_rank_00_model_states.pt\nsteps: 0%| | 0/288 [00:00<?, ?it/s]INFO:__main__:DiT dtype: torch.float8_e4m3fn, device: cuda:0\nepoch 1/16\nINFO:dataset.image_video_dataset:epoch is incremented. current_epoch: 0, epoch: 1\nINFO:dataset.image_video_dataset:epoch is incremented. current_epoch: 0, epoch: 1\nsteps: 0%| | 1/288 [00:11<54:34, 11.41s/it]\nsteps: 0%| | 1/288 [00:11<54:34, 11.41s/it, avr_loss=0.0441]\nsteps: 1%| | 2/288 [00:16<38:46, 8.14s/it, avr_loss=0.0441]\nsteps: 1%| | 2/288 [00:16<38:46, 8.14s/it, avr_loss=0.0749]\nsteps: 1%| | 3/288 [00:21<33:25, 7.04s/it, avr_loss=0.0749]\nsteps: 1%| | 3/288 [00:21<33:25, 7.04s/it, avr_loss=0.0828]\nsteps: 1%|▏ | 4/288 [00:25<30:42, 6.49s/it, avr_loss=0.0828]\nsteps: 1%|▏ | 4/288 [00:25<30:42, 6.49s/it, avr_loss=0.0677]\nsteps: 2%|▏ | 5/288 [00:36<34:25, 7.30s/it, avr_loss=0.0677]\nsteps: 2%|▏ | 5/288 [00:36<34:25, 7.30s/it, avr_loss=0.0588]\nsteps: 2%|▏ | 6/288 [00:41<32:28, 6.91s/it, avr_loss=0.0588]\nsteps: 2%|▏ | 6/288 [00:41<32:28, 6.91s/it, avr_loss=0.0536]\nsteps: 2%|▏ | 7/288 [00:52<34:53, 7.45s/it, avr_loss=0.0536]\nsteps: 2%|▏ | 7/288 [00:52<34:53, 7.45s/it, avr_loss=0.0489]\nsteps: 3%|▎ | 8/288 [00:52<30:49, 6.60s/it, avr_loss=0.0489]\nsteps: 3%|▎ | 8/288 [00:52<30:49, 6.60s/it, avr_loss=0.0482]\nsteps: 3%|▎ | 9/288 [00:57<29:47, 6.41s/it, avr_loss=0.0482]\nsteps: 3%|▎ | 9/288 [00:57<29:47, 6.41s/it, avr_loss=0.0463]\nsteps: 3%|▎ | 10/288 [01:08<31:35, 6.82s/it, avr_loss=0.0463]\nsteps: 3%|▎ | 10/288 [01:08<31:35, 6.82s/it, avr_loss=0.0456]\nsteps: 4%|▍ | 11/288 [01:08<28:54, 6.26s/it, avr_loss=0.0456]\nsteps: 4%|▍ | 11/288 [01:08<28:54, 6.26s/it, avr_loss=0.0461]\nsteps: 4%|▍ | 12/288 [01:09<26:42, 5.81s/it, avr_loss=0.0461]\nsteps: 4%|▍ | 12/288 [01:09<26:42, 5.81s/it, avr_loss=0.0453]\nsteps: 5%|▍ | 13/288 [01:10<24:48, 5.41s/it, avr_loss=0.0453]\nsteps: 5%|▍ | 13/288 [01:10<24:48, 5.41s/it, avr_loss=0.0444]\nsteps: 5%|▍ | 14/288 [01:20<26:22, 5.78s/it, avr_loss=0.0444]\nsteps: 5%|▍ | 14/288 [01:20<26:22, 5.78s/it, avr_loss=0.0427]\nsteps: 5%|▌ | 15/288 [01:21<24:44, 5.44s/it, avr_loss=0.0427]\nsteps: 5%|▌ | 15/288 [01:21<24:44, 5.44s/it, avr_loss=0.0422]\nsteps: 6%|▌ | 16/288 [01:26<24:28, 5.40s/it, avr_loss=0.0422]\nsteps: 6%|▌ | 16/288 [01:26<24:28, 5.40s/it, avr_loss=0.0408]\nsteps: 6%|▌ | 17/288 [01:31<24:13, 5.36s/it, avr_loss=0.0408]\nsteps: 6%|▌ | 17/288 [01:31<24:13, 5.36s/it, avr_loss=0.0407]\nsteps: 6%|▋ | 18/288 [01:31<22:58, 5.10s/it, avr_loss=0.0407]\nepoch 2/16\nsteps: 6%|▋ | 18/288 [01:31<22:58, 5.10s/it, avr_loss=0.05] INFO:dataset.image_video_dataset:epoch is incremented. current_epoch: 1, epoch: 2\nINFO:dataset.image_video_dataset:epoch is incremented. current_epoch: 1, epoch: 2\nsteps: 7%|▋ | 19/288 [01:32<21:52, 4.88s/it, avr_loss=0.05]\nsteps: 7%|▋ | 19/288 [01:32<21:52, 4.88s/it, avr_loss=0.0552]\nsteps: 7%|▋ | 20/288 [01:37<21:47, 4.88s/it, avr_loss=0.0552]\nsteps: 7%|▋ | 20/288 [01:37<21:47, 4.88s/it, avr_loss=0.052] \nsteps: 7%|▋ | 21/288 [01:48<22:54, 5.15s/it, avr_loss=0.052]\nsteps: 7%|▋ | 21/288 [01:48<22:54, 5.15s/it, avr_loss=0.0477]\nsteps: 8%|▊ | 22/288 [01:52<22:45, 5.13s/it, avr_loss=0.0477]\nsteps: 8%|▊ | 22/288 [01:52<22:45, 5.13s/it, avr_loss=0.0476]\nsteps: 8%|▊ | 23/288 [01:57<22:36, 5.12s/it, avr_loss=0.0476]\nsteps: 8%|▊ | 23/288 [01:57<22:36, 5.12s/it, avr_loss=0.0474]\nsteps: 8%|▊ | 24/288 [01:58<21:42, 4.93s/it, avr_loss=0.0474]\nsteps: 8%|▊ | 24/288 [01:58<21:42, 4.93s/it, avr_loss=0.0525]\nsteps: 9%|▊ | 25/288 [02:09<22:37, 5.16s/it, avr_loss=0.0525]\nsteps: 9%|▊ | 25/288 [02:09<22:37, 5.16s/it, avr_loss=0.0522]\nsteps: 9%|▉ | 26/288 [02:19<23:26, 5.37s/it, avr_loss=0.0522]\nsteps: 9%|▉ | 26/288 [02:19<23:26, 5.37s/it, avr_loss=0.0524]\nsteps: 9%|▉ | 27/288 [02:20<22:36, 5.20s/it, avr_loss=0.0524]\nsteps: 9%|▉ | 27/288 [02:20<22:36, 5.20s/it, avr_loss=0.0561]\nsteps: 10%|▉ | 28/288 [02:30<23:20, 5.39s/it, avr_loss=0.0561]\nsteps: 10%|▉ | 28/288 [02:30<23:20, 5.39s/it, avr_loss=0.0553]\nsteps: 10%|█ | 29/288 [02:31<22:33, 5.22s/it, avr_loss=0.0553]\nsteps: 10%|█ | 29/288 [02:31<22:33, 5.22s/it, avr_loss=0.055] \nsteps: 10%|█ | 30/288 [02:32<21:48, 5.07s/it, avr_loss=0.055]\nsteps: 10%|█ | 30/288 [02:32<21:48, 5.07s/it, avr_loss=0.055]\nsteps: 11%|█ | 31/288 [02:37<21:42, 5.07s/it, avr_loss=0.055]\nsteps: 11%|█ | 31/288 [02:37<21:42, 5.07s/it, avr_loss=0.0545]\nsteps: 11%|█ | 32/288 [02:47<22:21, 5.24s/it, avr_loss=0.0545]\nsteps: 11%|█ | 32/288 [02:47<22:21, 5.24s/it, avr_loss=0.0546]\nsteps: 11%|█▏ | 33/288 [02:52<22:12, 5.23s/it, avr_loss=0.0546]\nsteps: 11%|█▏ | 33/288 [02:52<22:12, 5.23s/it, avr_loss=0.0541]\nsteps: 12%|█▏ | 34/288 [02:57<22:04, 5.22s/it, avr_loss=0.0541]\nsteps: 12%|█▏ | 34/288 [02:57<22:04, 5.22s/it, avr_loss=0.0547]\nsteps: 12%|█▏ | 35/288 [03:08<22:39, 5.37s/it, avr_loss=0.0547]\nsteps: 12%|█▏ | 35/288 [03:08<22:39, 5.37s/it, avr_loss=0.0536]\nsteps: 12%|█▎ | 36/288 [03:08<22:00, 5.24s/it, avr_loss=0.0536]\nepoch 3/16\nsteps: 12%|█▎ | 36/288 [03:08<22:00, 5.24s/it, avr_loss=0.044] INFO:dataset.image_video_dataset:epoch is incremented. current_epoch: 2, epoch: 3\nINFO:dataset.image_video_dataset:epoch is incremented. current_epoch: 2, epoch: 3\nsteps: 13%|█▎ | 37/288 [03:13<21:53, 5.23s/it, avr_loss=0.044]\nsteps: 13%|█▎ | 37/288 [03:13<21:53, 5.23s/it, avr_loss=0.0409]\nsteps: 13%|█▎ | 38/288 [03:18<21:45, 5.22s/it, avr_loss=0.0409]\nsteps: 13%|█▎ | 38/288 [03:18<21:45, 5.22s/it, avr_loss=0.0397]\nsteps: 14%|█▎ | 39/288 [03:23<21:38, 5.21s/it, avr_loss=0.0397]\nsteps: 14%|█▎ | 39/288 [03:23<21:38, 5.21s/it, avr_loss=0.0395]\nsteps: 14%|█▍ | 40/288 [03:33<22:06, 5.35s/it, avr_loss=0.0395]\nsteps: 14%|█▍ | 40/288 [03:33<22:06, 5.35s/it, avr_loss=0.0448]\nsteps: 14%|█▍ | 41/288 [03:44<22:32, 5.47s/it, avr_loss=0.0448]\nsteps: 14%|█▍ | 41/288 [03:44<22:32, 5.47s/it, avr_loss=0.045] \nsteps: 15%|█▍ | 42/288 [03:55<22:56, 5.60s/it, avr_loss=0.045]\nsteps: 15%|█▍ | 42/288 [03:55<22:56, 5.60s/it, avr_loss=0.0398]\nsteps: 15%|█▍ | 43/288 [03:59<22:47, 5.58s/it, avr_loss=0.0398]\nsteps: 15%|█▍ | 43/288 [03:59<22:47, 5.58s/it, avr_loss=0.0403]\nsteps: 15%|█▌ | 44/288 [04:10<23:09, 5.69s/it, avr_loss=0.0403]\nsteps: 15%|█▌ | 44/288 [04:10<23:09, 5.69s/it, avr_loss=0.0387]\nsteps: 16%|█▌ | 45/288 [04:11<22:36, 5.58s/it, avr_loss=0.0387]\nsteps: 16%|█▌ | 45/288 [04:11<22:36, 5.58s/it, avr_loss=0.0353]\nsteps: 16%|█▌ | 46/288 [04:16<22:26, 5.57s/it, avr_loss=0.0353]\nsteps: 16%|█▌ | 46/288 [04:16<22:26, 5.57s/it, avr_loss=0.0353]\nsteps: 16%|█▋ | 47/288 [04:16<21:56, 5.46s/it, avr_loss=0.0353]\nsteps: 16%|█▋ | 47/288 [04:16<21:56, 5.46s/it, avr_loss=0.0346]\nsteps: 17%|█▋ | 48/288 [04:27<22:16, 5.57s/it, avr_loss=0.0346]\nsteps: 17%|█▋ | 48/288 [04:27<22:16, 5.57s/it, avr_loss=0.0337]\nsteps: 17%|█▋ | 49/288 [04:32<22:07, 5.55s/it, avr_loss=0.0337]\nsteps: 17%|█▋ | 49/288 [04:32<22:07, 5.55s/it, avr_loss=0.0332]\nsteps: 17%|█▋ | 50/288 [04:32<21:38, 5.46s/it, avr_loss=0.0332]\nsteps: 17%|█▋ | 50/288 [04:32<21:38, 5.46s/it, avr_loss=0.034] \nsteps: 18%|█▊ | 51/288 [04:33<21:11, 5.36s/it, avr_loss=0.034]\nsteps: 18%|█▊ | 51/288 [04:33<21:11, 5.36s/it, avr_loss=0.0354]\nsteps: 18%|█▊ | 52/288 [04:34<20:44, 5.27s/it, avr_loss=0.0354]\nsteps: 18%|█▊ | 52/288 [04:34<20:44, 5.27s/it, avr_loss=0.0388]\nsteps: 18%|█▊ | 53/288 [04:44<21:03, 5.38s/it, avr_loss=0.0388]\nsteps: 18%|█▊ | 53/288 [04:44<21:03, 5.38s/it, avr_loss=0.0387]\nsteps: 19%|█▉ | 54/288 [04:49<20:55, 5.37s/it, avr_loss=0.0387]\nepoch 4/16\nsteps: 19%|█▉ | 54/288 [04:49<20:55, 5.37s/it, avr_loss=0.0381]INFO:dataset.image_video_dataset:epoch is incremented. current_epoch: 3, epoch: 4\nINFO:dataset.image_video_dataset:epoch is incremented. current_epoch: 3, epoch: 4\nsteps: 19%|█▉ | 55/288 [05:00<21:12, 5.46s/it, avr_loss=0.0381]\nsteps: 19%|█▉ | 55/288 [05:00<21:12, 5.46s/it, avr_loss=0.0357]\nsteps: 19%|█▉ | 56/288 [05:01<20:47, 5.38s/it, avr_loss=0.0357]\nsteps: 19%|█▉ | 56/288 [05:01<20:47, 5.38s/it, avr_loss=0.0358]\nsteps: 20%|█▉ | 57/288 [05:11<21:02, 5.47s/it, avr_loss=0.0358]\nsteps: 20%|█▉ | 57/288 [05:11<21:02, 5.47s/it, avr_loss=0.036] \nsteps: 20%|██ | 58/288 [05:22<21:17, 5.55s/it, avr_loss=0.036]\nsteps: 20%|██ | 58/288 [05:22<21:17, 5.55s/it, avr_loss=0.031]\nsteps: 20%|██ | 59/288 [05:27<21:09, 5.54s/it, avr_loss=0.031]\nsteps: 20%|██ | 59/288 [05:27<21:09, 5.54s/it, avr_loss=0.0311]\nsteps: 21%|██ | 60/288 [05:27<20:45, 5.46s/it, avr_loss=0.0311]\nsteps: 21%|██ | 60/288 [05:27<20:45, 5.46s/it, avr_loss=0.0319]\nsteps: 21%|██ | 61/288 [05:32<20:37, 5.45s/it, avr_loss=0.0319]\nsteps: 21%|██ | 61/288 [05:32<20:37, 5.45s/it, avr_loss=0.0316]\nsteps: 22%|██▏ | 62/288 [05:33<20:14, 5.38s/it, avr_loss=0.0316]\nsteps: 22%|██▏ | 62/288 [05:33<20:14, 5.38s/it, avr_loss=0.033] \nsteps: 22%|██▏ | 63/288 [05:38<20:07, 5.37s/it, avr_loss=0.033]\nsteps: 22%|██▏ | 63/288 [05:38<20:07, 5.37s/it, avr_loss=0.0321]\nsteps: 22%|██▏ | 64/288 [05:38<19:45, 5.29s/it, avr_loss=0.0321]\nsteps: 22%|██▏ | 64/288 [05:38<19:45, 5.29s/it, avr_loss=0.0332]\nsteps: 23%|██▎ | 65/288 [05:49<19:58, 5.37s/it, avr_loss=0.0332]\nsteps: 23%|██▎ | 65/288 [05:49<19:58, 5.37s/it, avr_loss=0.0327]\nsteps: 23%|██▎ | 66/288 [05:54<19:51, 5.37s/it, avr_loss=0.0327]\nsteps: 23%|██▎ | 66/288 [05:54<19:51, 5.37s/it, avr_loss=0.0325]\nsteps: 23%|██▎ | 67/288 [06:04<20:03, 5.45s/it, avr_loss=0.0325]\nsteps: 23%|██▎ | 67/288 [06:04<20:03, 5.45s/it, avr_loss=0.036] \nsteps: 24%|██▎ | 68/288 [06:05<19:42, 5.38s/it, avr_loss=0.036]\nsteps: 24%|██▎ | 68/288 [06:05<19:42, 5.38s/it, avr_loss=0.0364]\nsteps: 24%|██▍ | 69/288 [06:06<19:22, 5.31s/it, avr_loss=0.0364]\nsteps: 24%|██▍ | 69/288 [06:06<19:22, 5.31s/it, avr_loss=0.0453]\nsteps: 24%|██▍ | 70/288 [06:07<19:02, 5.24s/it, avr_loss=0.0453]\nsteps: 24%|██▍ | 70/288 [06:07<19:02, 5.24s/it, avr_loss=0.0425]\nsteps: 25%|██▍ | 71/288 [06:17<19:13, 5.32s/it, avr_loss=0.0425]\nsteps: 25%|██▍ | 71/288 [06:17<19:13, 5.32s/it, avr_loss=0.0436]\nsteps: 25%|██▌ | 72/288 [06:22<19:07, 5.31s/it, avr_loss=0.0436]\nepoch 5/16\nsteps: 25%|██▌ | 72/288 [06:22<19:07, 5.31s/it, avr_loss=0.0446]INFO:dataset.image_video_dataset:epoch is incremented. current_epoch: 4, epoch: 5\nINFO:dataset.image_video_dataset:epoch is incremented. current_epoch: 4, epoch: 5\nsteps: 25%|██▌ | 73/288 [06:27<19:00, 5.31s/it, avr_loss=0.0446]\nsteps: 25%|██▌ | 73/288 [06:27<19:00, 5.31s/it, avr_loss=0.0434]\nsteps: 26%|██▌ | 74/288 [06:27<18:42, 5.24s/it, avr_loss=0.0434]\nsteps: 26%|██▌ | 74/288 [06:27<18:42, 5.24s/it, avr_loss=0.0498]\nsteps: 26%|██▌ | 75/288 [06:28<18:23, 5.18s/it, avr_loss=0.0498]\nsteps: 26%|██▌ | 75/288 [06:28<18:23, 5.18s/it, avr_loss=0.0501]\nsteps: 26%|██▋ | 76/288 [06:39<18:33, 5.25s/it, avr_loss=0.0501]\nsteps: 26%|██▋ | 76/288 [06:39<18:33, 5.25s/it, avr_loss=0.0508]\nsteps: 27%|██▋ | 77/288 [06:39<18:15, 5.19s/it, avr_loss=0.0508]\nsteps: 27%|██▋ | 77/288 [06:39<18:15, 5.19s/it, avr_loss=0.0522]\nsteps: 27%|██▋ | 78/288 [06:40<17:58, 5.14s/it, avr_loss=0.0522]\nsteps: 27%|██▋ | 78/288 [06:40<17:58, 5.14s/it, avr_loss=0.0533]\nsteps: 27%|██▋ | 79/288 [06:51<18:07, 5.21s/it, avr_loss=0.0533]\nsteps: 27%|██▋ | 79/288 [06:51<18:07, 5.21s/it, avr_loss=0.0539]\nsteps: 28%|██▊ | 80/288 [06:56<18:01, 5.20s/it, avr_loss=0.0539]\nsteps: 28%|██▊ | 80/288 [06:56<18:01, 5.20s/it, avr_loss=0.0525]\nsteps: 28%|██▊ | 81/288 [06:56<17:44, 5.14s/it, avr_loss=0.0525]\nsteps: 28%|██▊ | 81/288 [06:56<17:44, 5.14s/it, avr_loss=0.0533]\nsteps: 28%|██▊ | 82/288 [07:01<17:39, 5.14s/it, avr_loss=0.0533]\nsteps: 28%|██▊ | 82/288 [07:01<17:39, 5.14s/it, avr_loss=0.0527]\nsteps: 29%|██▉ | 83/288 [07:06<17:33, 5.14s/it, avr_loss=0.0527]\nsteps: 29%|██▉ | 83/288 [07:06<17:33, 5.14s/it, avr_loss=0.0532]\nsteps: 29%|██▉ | 84/288 [07:07<17:17, 5.08s/it, avr_loss=0.0532]\nsteps: 29%|██▉ | 84/288 [07:07<17:17, 5.08s/it, avr_loss=0.0547]\nsteps: 30%|██▉ | 85/288 [07:11<17:11, 5.08s/it, avr_loss=0.0547]\nsteps: 30%|██▉ | 85/288 [07:11<17:11, 5.08s/it, avr_loss=0.0522]\nsteps: 30%|██▉ | 86/288 [07:22<17:19, 5.15s/it, avr_loss=0.0522]\nsteps: 30%|██▉ | 86/288 [07:22<17:19, 5.15s/it, avr_loss=0.0508]\nsteps: 30%|███ | 87/288 [07:33<17:26, 5.21s/it, avr_loss=0.0508]\nsteps: 30%|███ | 87/288 [07:33<17:26, 5.21s/it, avr_loss=0.0399]\nsteps: 31%|███ | 88/288 [07:43<17:34, 5.27s/it, avr_loss=0.0399]\nsteps: 31%|███ | 88/288 [07:43<17:34, 5.27s/it, avr_loss=0.0386]\nsteps: 31%|███ | 89/288 [07:44<17:18, 5.22s/it, avr_loss=0.0386]\nsteps: 31%|███ | 89/288 [07:44<17:18, 5.22s/it, avr_loss=0.0402]\nsteps: 31%|███▏ | 90/288 [07:49<17:12, 5.21s/it, avr_loss=0.0402]\nepoch 6/16\nsteps: 31%|███▏ | 90/288 [07:49<17:12, 5.21s/it, avr_loss=0.0399]INFO:dataset.image_video_dataset:epoch is incremented. current_epoch: 5, epoch: 6\nINFO:dataset.image_video_dataset:epoch is incremented. current_epoch: 5, epoch: 6\nsteps: 32%|███▏ | 91/288 [07:54<17:06, 5.21s/it, avr_loss=0.0399]\nsteps: 32%|███▏ | 91/288 [07:54<17:06, 5.21s/it, avr_loss=0.0403]\nsteps: 32%|███▏ | 92/288 [07:54<16:51, 5.16s/it, avr_loss=0.0403]\nsteps: 32%|███▏ | 92/288 [07:54<16:51, 5.16s/it, avr_loss=0.0342]\nsteps: 32%|███▏ | 93/288 [07:59<16:46, 5.16s/it, avr_loss=0.0342]\nsteps: 32%|███▏ | 93/288 [07:59<16:46, 5.16s/it, avr_loss=0.0347]\nsteps: 33%|███▎ | 94/288 [08:04<16:40, 5.16s/it, avr_loss=0.0347]\nsteps: 33%|███▎ | 94/288 [08:04<16:40, 5.16s/it, avr_loss=0.0337]\nsteps: 33%|███▎ | 95/288 [08:05<16:25, 5.11s/it, avr_loss=0.0337]\nsteps: 33%|███▎ | 95/288 [08:05<16:25, 5.11s/it, avr_loss=0.0335]\nsteps: 33%|███▎ | 96/288 [08:10<16:20, 5.11s/it, avr_loss=0.0335]\nsteps: 33%|███▎ | 96/288 [08:10<16:20, 5.11s/it, avr_loss=0.0327]\nsteps: 34%|███▎ | 97/288 [08:10<16:06, 5.06s/it, avr_loss=0.0327]\nsteps: 34%|███▎ | 97/288 [08:10<16:06, 5.06s/it, avr_loss=0.0338]\nsteps: 34%|███▍ | 98/288 [08:21<16:12, 5.12s/it, avr_loss=0.0338]\nsteps: 34%|███▍ | 98/288 [08:21<16:12, 5.12s/it, avr_loss=0.034] \nsteps: 34%|███▍ | 99/288 [08:22<15:58, 5.07s/it, avr_loss=0.034]\nsteps: 34%|███▍ | 99/288 [08:22<15:58, 5.07s/it, avr_loss=0.0335]\nsteps: 35%|███▍ | 100/288 [08:26<15:53, 5.07s/it, avr_loss=0.0335]\nsteps: 35%|███▍ | 100/288 [08:26<15:53, 5.07s/it, avr_loss=0.0338]\nsteps: 35%|███▌ | 101/288 [08:27<15:39, 5.03s/it, avr_loss=0.0338]\nsteps: 35%|███▌ | 101/288 [08:27<15:39, 5.03s/it, avr_loss=0.035] \nsteps: 35%|███▌ | 102/288 [08:28<15:26, 4.98s/it, avr_loss=0.035]\nsteps: 35%|███▌ | 102/288 [08:28<15:26, 4.98s/it, avr_loss=0.0343]\nsteps: 36%|███▌ | 103/288 [08:38<15:32, 5.04s/it, avr_loss=0.0343]\nsteps: 36%|███▌ | 103/288 [08:38<15:32, 5.04s/it, avr_loss=0.0334]\nsteps: 36%|███▌ | 104/288 [08:49<15:36, 5.09s/it, avr_loss=0.0334]\nsteps: 36%|███▌ | 104/288 [08:49<15:36, 5.09s/it, avr_loss=0.0332]\nsteps: 36%|███▋ | 105/288 [08:54<15:31, 5.09s/it, avr_loss=0.0332]\nsteps: 36%|███▋ | 105/288 [08:54<15:31, 5.09s/it, avr_loss=0.0335]\nsteps: 37%|███▋ | 106/288 [08:55<15:18, 5.05s/it, avr_loss=0.0335]\nsteps: 37%|███▋ | 106/288 [08:55<15:18, 5.05s/it, avr_loss=0.035] \nsteps: 37%|███▋ | 107/288 [08:59<15:13, 5.05s/it, avr_loss=0.035]\nsteps: 37%|███▋ | 107/288 [08:59<15:13, 5.05s/it, avr_loss=0.0344]\nsteps: 38%|███▊ | 108/288 [09:10<15:17, 5.10s/it, avr_loss=0.0344]\nepoch 7/16\nsteps: 38%|███▊ | 108/288 [09:10<15:17, 5.10s/it, avr_loss=0.0334]INFO:dataset.image_video_dataset:epoch is incremented. current_epoch: 6, epoch: 7\nINFO:dataset.image_video_dataset:epoch is incremented. current_epoch: 6, epoch: 7\nsteps: 38%|███▊ | 109/288 [09:21<15:21, 5.15s/it, avr_loss=0.0334]\nsteps: 38%|███▊ | 109/288 [09:21<15:21, 5.15s/it, avr_loss=0.0346]\nsteps: 38%|███▊ | 110/288 [09:22<15:09, 5.11s/it, avr_loss=0.0346]\nsteps: 38%|███▊ | 110/288 [09:22<15:09, 5.11s/it, avr_loss=0.0346]\nsteps: 39%|███▊ | 111/288 [09:32<15:13, 5.16s/it, avr_loss=0.0346]\nsteps: 39%|███▊ | 111/288 [09:32<15:13, 5.16s/it, avr_loss=0.0348]\nsteps: 39%|███▉ | 112/288 [09:43<15:16, 5.21s/it, avr_loss=0.0348]\nsteps: 39%|███▉ | 112/288 [09:43<15:16, 5.21s/it, avr_loss=0.0347]\nsteps: 39%|███▉ | 113/288 [09:53<15:19, 5.26s/it, avr_loss=0.0347]\nsteps: 39%|███▉ | 113/288 [09:53<15:19, 5.26s/it, avr_loss=0.0334]\nsteps: 40%|███▉ | 114/288 [09:54<15:07, 5.22s/it, avr_loss=0.0334]\nsteps: 40%|███▉ | 114/288 [09:54<15:07, 5.22s/it, avr_loss=0.0331]\nsteps: 40%|███▉ | 115/288 [09:59<15:01, 5.21s/it, avr_loss=0.0331]\nsteps: 40%|███▉ | 115/288 [09:59<15:01, 5.21s/it, avr_loss=0.031] \nsteps: 40%|████ | 116/288 [10:04<14:56, 5.21s/it, avr_loss=0.031]\nsteps: 40%|████ | 116/288 [10:04<14:56, 5.21s/it, avr_loss=0.0336]\nsteps: 41%|████ | 117/288 [10:04<14:44, 5.17s/it, avr_loss=0.0336]\nsteps: 41%|████ | 117/288 [10:04<14:44, 5.17s/it, avr_loss=0.0336]\nsteps: 41%|████ | 118/288 [10:05<14:32, 5.13s/it, avr_loss=0.0336]\nsteps: 41%|████ | 118/288 [10:05<14:32, 5.13s/it, avr_loss=0.033] \nsteps: 41%|████▏ | 119/288 [10:06<14:21, 5.10s/it, avr_loss=0.033]\nsteps: 41%|████▏ | 119/288 [10:06<14:21, 5.10s/it, avr_loss=0.0322]\nsteps: 42%|████▏ | 120/288 [10:11<14:15, 5.09s/it, avr_loss=0.0322]\nsteps: 42%|████▏ | 120/288 [10:11<14:15, 5.09s/it, avr_loss=0.0314]\nsteps: 42%|████▏ | 121/288 [10:16<14:10, 5.09s/it, avr_loss=0.0314]\nsteps: 42%|████▏ | 121/288 [10:16<14:10, 5.09s/it, avr_loss=0.0319]\nsteps: 42%|████▏ | 122/288 [10:16<13:59, 5.06s/it, avr_loss=0.0319]\nsteps: 42%|████▏ | 122/288 [10:16<13:59, 5.06s/it, avr_loss=0.0338]\nsteps: 43%|████▎ | 123/288 [10:27<14:01, 5.10s/it, avr_loss=0.0338]\nsteps: 43%|████▎ | 123/288 [10:27<14:01, 5.10s/it, avr_loss=0.034] \nsteps: 43%|████▎ | 124/288 [10:37<14:03, 5.14s/it, avr_loss=0.034]\nsteps: 43%|████▎ | 124/288 [10:37<14:03, 5.14s/it, avr_loss=0.0326]\nsteps: 43%|████▎ | 125/288 [10:42<13:58, 5.14s/it, avr_loss=0.0326]\nsteps: 43%|████▎ | 125/288 [10:42<13:58, 5.14s/it, avr_loss=0.0304]\nsteps: 44%|████▍ | 126/288 [10:43<13:47, 5.11s/it, avr_loss=0.0304]\nepoch 8/16\nsteps: 44%|████▍ | 126/288 [10:43<13:47, 5.11s/it, avr_loss=0.0316]INFO:dataset.image_video_dataset:epoch is incremented. current_epoch: 7, epoch: 8\nINFO:dataset.image_video_dataset:epoch is incremented. current_epoch: 7, epoch: 8\nsteps: 44%|████▍ | 127/288 [10:44<13:36, 5.07s/it, avr_loss=0.0316]\nsteps: 44%|████▍ | 127/288 [10:44<13:36, 5.07s/it, avr_loss=0.033] \nsteps: 44%|████▍ | 128/288 [10:49<13:31, 5.07s/it, avr_loss=0.033]\nsteps: 44%|████▍ | 128/288 [10:49<13:31, 5.07s/it, avr_loss=0.0326]\nsteps: 45%|████▍ | 129/288 [10:53<13:25, 5.07s/it, avr_loss=0.0326]\nsteps: 45%|████▍ | 129/288 [10:53<13:25, 5.07s/it, avr_loss=0.032] \nsteps: 45%|████▌ | 130/288 [10:54<13:15, 5.03s/it, avr_loss=0.032]\nsteps: 45%|████▌ | 130/288 [10:54<13:15, 5.03s/it, avr_loss=0.0331]\nsteps: 45%|████▌ | 131/288 [11:05<13:17, 5.08s/it, avr_loss=0.0331]\nsteps: 45%|████▌ | 131/288 [11:05<13:17, 5.08s/it, avr_loss=0.0332]\nsteps: 46%|████▌ | 132/288 [11:09<13:11, 5.07s/it, avr_loss=0.0332]\nsteps: 46%|████▌ | 132/288 [11:09<13:11, 5.07s/it, avr_loss=0.033] \nsteps: 46%|████▌ | 133/288 [11:20<13:13, 5.12s/it, avr_loss=0.033]\nsteps: 46%|████▌ | 133/288 [11:20<13:13, 5.12s/it, avr_loss=0.0334]\nsteps: 47%|████▋ | 134/288 [11:21<13:02, 5.08s/it, avr_loss=0.0334]\nsteps: 47%|████▋ | 134/288 [11:21<13:02, 5.08s/it, avr_loss=0.0311]\nsteps: 47%|████▋ | 135/288 [11:31<13:04, 5.13s/it, avr_loss=0.0311]\nsteps: 47%|████▋ | 135/288 [11:31<13:04, 5.13s/it, avr_loss=0.0306]\nsteps: 47%|████▋ | 136/288 [11:32<12:54, 5.09s/it, avr_loss=0.0306]\nsteps: 47%|████▋ | 136/288 [11:32<12:54, 5.09s/it, avr_loss=0.0316]\nsteps: 48%|████▊ | 137/288 [11:33<12:44, 5.06s/it, avr_loss=0.0316]\nsteps: 48%|████▊ | 137/288 [11:33<12:44, 5.06s/it, avr_loss=0.0317]\nsteps: 48%|████▊ | 138/288 [11:38<12:38, 5.06s/it, avr_loss=0.0317]\nsteps: 48%|████▊ | 138/288 [11:38<12:38, 5.06s/it, avr_loss=0.0324]\nsteps: 48%|████▊ | 139/288 [11:43<12:33, 5.06s/it, avr_loss=0.0324]\nsteps: 48%|████▊ | 139/288 [11:43<12:33, 5.06s/it, avr_loss=0.0321]\nsteps: 49%|████▊ | 140/288 [11:43<12:23, 5.03s/it, avr_loss=0.0321]\nsteps: 49%|████▊ | 140/288 [11:43<12:23, 5.03s/it, avr_loss=0.031] \nsteps: 49%|████▉ | 141/288 [11:48<12:18, 5.03s/it, avr_loss=0.031]\nsteps: 49%|████▉ | 141/288 [11:48<12:18, 5.03s/it, avr_loss=0.0328]\nsteps: 49%|████▉ | 142/288 [11:53<12:13, 5.02s/it, avr_loss=0.0328]\nsteps: 49%|████▉ | 142/288 [11:53<12:13, 5.02s/it, avr_loss=0.0346]\nsteps: 50%|████▉ | 143/288 [12:03<12:14, 5.06s/it, avr_loss=0.0346]\nsteps: 50%|████▉ | 143/288 [12:03<12:14, 5.06s/it, avr_loss=0.035] \nsteps: 50%|█████ | 144/288 [12:14<12:14, 5.10s/it, avr_loss=0.035]\nepoch 9/16\nsteps: 50%|█████ | 144/288 [12:14<12:14, 5.10s/it, avr_loss=0.0339]INFO:dataset.image_video_dataset:epoch is incremented. current_epoch: 8, epoch: 9\nINFO:dataset.image_video_dataset:epoch is incremented. current_epoch: 8, epoch: 9\nsteps: 50%|█████ | 145/288 [12:25<12:14, 5.14s/it, avr_loss=0.0339]\nsteps: 50%|█████ | 145/288 [12:25<12:14, 5.14s/it, avr_loss=0.031] \nsteps: 51%|█████ | 146/288 [12:35<12:14, 5.18s/it, avr_loss=0.031]\nsteps: 51%|█████ | 146/288 [12:35<12:14, 5.18s/it, avr_loss=0.0313]\nsteps: 51%|█████ | 147/288 [12:46<12:14, 5.21s/it, avr_loss=0.0313]\nsteps: 51%|█████ | 147/288 [12:46<12:14, 5.21s/it, avr_loss=0.0306]\nsteps: 51%|█████▏ | 148/288 [12:46<12:05, 5.18s/it, avr_loss=0.0306]\nsteps: 51%|█████▏ | 148/288 [12:46<12:05, 5.18s/it, avr_loss=0.0302]\nsteps: 52%|█████▏ | 149/288 [12:57<12:05, 5.22s/it, avr_loss=0.0302]\nsteps: 52%|█████▏ | 149/288 [12:57<12:05, 5.22s/it, avr_loss=0.0307]\nsteps: 52%|█████▏ | 150/288 [12:58<11:55, 5.19s/it, avr_loss=0.0307]\nsteps: 52%|█████▏ | 150/288 [12:58<11:55, 5.19s/it, avr_loss=0.0306]\nsteps: 52%|█████▏ | 151/288 [13:08<11:55, 5.22s/it, avr_loss=0.0306]\nsteps: 52%|█████▏ | 151/288 [13:08<11:55, 5.22s/it, avr_loss=0.0307]\nsteps: 53%|█████▎ | 152/288 [13:09<11:46, 5.19s/it, avr_loss=0.0307]\nsteps: 53%|█████▎ | 152/288 [13:09<11:46, 5.19s/it, avr_loss=0.0324]\nsteps: 53%|█████▎ | 153/288 [13:20<11:45, 5.23s/it, avr_loss=0.0324]\nsteps: 53%|█████▎ | 153/288 [13:20<11:45, 5.23s/it, avr_loss=0.0337]\nsteps: 53%|█████▎ | 154/288 [13:24<11:40, 5.23s/it, avr_loss=0.0337]\nsteps: 53%|█████▎ | 154/288 [13:24<11:40, 5.23s/it, avr_loss=0.0321]\nsteps: 54%|█████▍ | 155/288 [13:25<11:31, 5.20s/it, avr_loss=0.0321]\nsteps: 54%|█████▍ | 155/288 [13:25<11:31, 5.20s/it, avr_loss=0.032] \nsteps: 54%|█████▍ | 156/288 [13:26<11:22, 5.17s/it, avr_loss=0.032]\nsteps: 54%|█████▍ | 156/288 [13:26<11:22, 5.17s/it, avr_loss=0.0316]\nsteps: 55%|█████▍ | 157/288 [13:31<11:16, 5.17s/it, avr_loss=0.0316]\nsteps: 55%|█████▍ | 157/288 [13:31<11:16, 5.17s/it, avr_loss=0.0322]\nsteps: 55%|█████▍ | 158/288 [13:31<11:08, 5.14s/it, avr_loss=0.0322]\nsteps: 55%|█████▍ | 158/288 [13:31<11:08, 5.14s/it, avr_loss=0.0337]\nsteps: 55%|█████▌ | 159/288 [13:42<11:07, 5.17s/it, avr_loss=0.0337]\nsteps: 55%|█████▌ | 159/288 [13:42<11:07, 5.17s/it, avr_loss=0.034] \nsteps: 56%|█████▌ | 160/288 [13:47<11:01, 5.17s/it, avr_loss=0.034]\nsteps: 56%|█████▌ | 160/288 [13:47<11:01, 5.17s/it, avr_loss=0.0464]\nsteps: 56%|█████▌ | 161/288 [13:58<11:01, 5.21s/it, avr_loss=0.0464]\nsteps: 56%|█████▌ | 161/288 [13:58<11:01, 5.21s/it, avr_loss=0.0458]\nsteps: 56%|█████▋ | 162/288 [14:02<10:55, 5.20s/it, avr_loss=0.0458]\nepoch 10/16\nsteps: 56%|█████▋ | 162/288 [14:02<10:55, 5.20s/it, avr_loss=0.0461]INFO:dataset.image_video_dataset:epoch is incremented. current_epoch: 9, epoch: 10\nINFO:dataset.image_video_dataset:epoch is incremented. current_epoch: 9, epoch: 10\nsteps: 57%|█████▋ | 163/288 [14:03<10:46, 5.18s/it, avr_loss=0.0461]\nsteps: 57%|█████▋ | 163/288 [14:03<10:46, 5.18s/it, avr_loss=0.0469]\nsteps: 57%|█████▋ | 164/288 [14:04<10:38, 5.15s/it, avr_loss=0.0469]\nsteps: 57%|█████▋ | 164/288 [14:04<10:38, 5.15s/it, avr_loss=0.0467]\nsteps: 57%|█████▋ | 165/288 [14:09<10:32, 5.15s/it, avr_loss=0.0467]\nsteps: 57%|█████▋ | 165/288 [14:09<10:32, 5.15s/it, avr_loss=0.0471]\nsteps: 58%|█████▊ | 166/288 [14:09<10:24, 5.12s/it, avr_loss=0.0471]\nsteps: 58%|█████▊ | 166/288 [14:09<10:24, 5.12s/it, avr_loss=0.0478]\nsteps: 58%|█████▊ | 167/288 [14:10<10:16, 5.09s/it, avr_loss=0.0478]\nsteps: 58%|█████▊ | 167/288 [14:10<10:16, 5.09s/it, avr_loss=0.0494]\nsteps: 58%|█████▊ | 168/288 [14:15<10:10, 5.09s/it, avr_loss=0.0494]\nsteps: 58%|█████▊ | 168/288 [14:15<10:10, 5.09s/it, avr_loss=0.049] \nsteps: 59%|█████▊ | 169/288 [14:16<10:02, 5.07s/it, avr_loss=0.049]\nsteps: 59%|█████▊ | 169/288 [14:16<10:02, 5.07s/it, avr_loss=0.0493]\nsteps: 59%|█████▉ | 170/288 [14:16<09:54, 5.04s/it, avr_loss=0.0493]\nsteps: 59%|█████▉ | 170/288 [14:16<09:54, 5.04s/it, avr_loss=0.0477]\nsteps: 59%|█████▉ | 171/288 [14:21<09:49, 5.04s/it, avr_loss=0.0477]\nsteps: 59%|█████▉ | 171/288 [14:21<09:49, 5.04s/it, avr_loss=0.0466]\nsteps: 60%|█████▉ | 172/288 [14:26<09:44, 5.04s/it, avr_loss=0.0466]\nsteps: 60%|█████▉ | 172/288 [14:26<09:44, 5.04s/it, avr_loss=0.0465]\nsteps: 60%|██████ | 173/288 [14:37<09:42, 5.07s/it, avr_loss=0.0465]\nsteps: 60%|██████ | 173/288 [14:37<09:42, 5.07s/it, avr_loss=0.0452]\nsteps: 60%|██████ | 174/288 [14:47<09:41, 5.10s/it, avr_loss=0.0452]\nsteps: 60%|██████ | 174/288 [14:47<09:41, 5.10s/it, avr_loss=0.0447]\nsteps: 61%|██████ | 175/288 [14:58<09:40, 5.13s/it, avr_loss=0.0447]\nsteps: 61%|██████ | 175/288 [14:58<09:40, 5.13s/it, avr_loss=0.0452]\nsteps: 61%|██████ | 176/288 [15:03<09:34, 5.13s/it, avr_loss=0.0452]\nsteps: 61%|██████ | 176/288 [15:03<09:34, 5.13s/it, avr_loss=0.0521]\nsteps: 61%|██████▏ | 177/288 [15:13<09:33, 5.16s/it, avr_loss=0.0521]\nsteps: 61%|██████▏ | 177/288 [15:13<09:33, 5.16s/it, avr_loss=0.0498]\nsteps: 62%|██████▏ | 178/288 [15:24<09:31, 5.19s/it, avr_loss=0.0498]\nsteps: 62%|██████▏ | 178/288 [15:24<09:31, 5.19s/it, avr_loss=0.0357]\nsteps: 62%|██████▏ | 179/288 [15:29<09:25, 5.19s/it, avr_loss=0.0357]\nsteps: 62%|██████▏ | 179/288 [15:29<09:25, 5.19s/it, avr_loss=0.0367]\nsteps: 62%|██████▎ | 180/288 [15:39<09:23, 5.22s/it, avr_loss=0.0367]\nepoch 11/16\nsteps: 62%|██████▎ | 180/288 [15:39<09:23, 5.22s/it, avr_loss=0.0366]INFO:dataset.image_video_dataset:epoch is incremented. current_epoch: 10, epoch: 11\nINFO:dataset.image_video_dataset:epoch is incremented. current_epoch: 10, epoch: 11\nsteps: 63%|██████▎ | 181/288 [15:50<09:21, 5.25s/it, avr_loss=0.0366]\nsteps: 63%|██████▎ | 181/288 [15:50<09:21, 5.25s/it, avr_loss=0.0357]\nsteps: 63%|██████▎ | 182/288 [16:00<09:19, 5.28s/it, avr_loss=0.0357]\nsteps: 63%|██████▎ | 182/288 [16:00<09:19, 5.28s/it, avr_loss=0.0356]\nsteps: 64%|██████▎ | 183/288 [16:01<09:11, 5.25s/it, avr_loss=0.0356]\nsteps: 64%|██████▎ | 183/288 [16:01<09:11, 5.25s/it, avr_loss=0.0356]\nsteps: 64%|██████▍ | 184/288 [16:12<09:09, 5.28s/it, avr_loss=0.0356]\nsteps: 64%|██████▍ | 184/288 [16:12<09:09, 5.28s/it, avr_loss=0.034] \nsteps: 64%|██████▍ | 185/288 [16:22<09:07, 5.31s/it, avr_loss=0.034]\nsteps: 64%|██████▍ | 185/288 [16:22<09:07, 5.31s/it, avr_loss=0.0316]\nsteps: 65%|██████▍ | 186/288 [16:33<09:04, 5.34s/it, avr_loss=0.0316]\nsteps: 65%|██████▍ | 186/288 [16:33<09:04, 5.34s/it, avr_loss=0.0331]\nsteps: 65%|██████▍ | 187/288 [16:34<08:56, 5.32s/it, avr_loss=0.0331]\nsteps: 65%|██████▍ | 187/288 [16:34<08:56, 5.32s/it, avr_loss=0.0336]\nsteps: 65%|██████▌ | 188/288 [16:44<08:54, 5.34s/it, avr_loss=0.0336]\nsteps: 65%|██████▌ | 188/288 [16:44<08:54, 5.34s/it, avr_loss=0.0334]\nsteps: 66%|██████▌ | 189/288 [16:49<08:48, 5.34s/it, avr_loss=0.0334]\nsteps: 66%|██████▌ | 189/288 [16:49<08:48, 5.34s/it, avr_loss=0.034] \nsteps: 66%|██████▌ | 190/288 [16:50<08:40, 5.32s/it, avr_loss=0.034]\nsteps: 66%|██████▌ | 190/288 [16:50<08:40, 5.32s/it, avr_loss=0.0365]\nsteps: 66%|██████▋ | 191/288 [16:50<08:33, 5.29s/it, avr_loss=0.0365]\nsteps: 66%|██████▋ | 191/288 [16:50<08:33, 5.29s/it, avr_loss=0.0374]\nsteps: 67%|██████▋ | 192/288 [16:55<08:27, 5.29s/it, avr_loss=0.0374]\nsteps: 67%|██████▋ | 192/288 [16:55<08:27, 5.29s/it, avr_loss=0.0377]\nsteps: 67%|██████▋ | 193/288 [17:00<08:22, 5.29s/it, avr_loss=0.0377]\nsteps: 67%|██████▋ | 193/288 [17:00<08:22, 5.29s/it, avr_loss=0.0367]\nsteps: 67%|██████▋ | 194/288 [17:05<08:16, 5.29s/it, avr_loss=0.0367]\nsteps: 67%|██████▋ | 194/288 [17:05<08:16, 5.29s/it, avr_loss=0.0275]\nsteps: 68%|██████▊ | 195/288 [17:10<08:11, 5.28s/it, avr_loss=0.0275]\nsteps: 68%|██████▊ | 195/288 [17:10<08:11, 5.28s/it, avr_loss=0.0274]\nsteps: 68%|██████▊ | 196/288 [17:10<08:03, 5.26s/it, avr_loss=0.0274]\nsteps: 68%|██████▊ | 196/288 [17:10<08:03, 5.26s/it, avr_loss=0.0308]\nsteps: 68%|██████▊ | 197/288 [17:11<07:56, 5.24s/it, avr_loss=0.0308]\nsteps: 68%|██████▊ | 197/288 [17:11<07:56, 5.24s/it, avr_loss=0.0324]\nsteps: 69%|██████▉ | 198/288 [17:22<07:53, 5.26s/it, avr_loss=0.0324]\nepoch 12/16\nsteps: 69%|██████▉ | 198/288 [17:22<07:53, 5.26s/it, avr_loss=0.032] INFO:dataset.image_video_dataset:epoch is incremented. current_epoch: 11, epoch: 12\nINFO:dataset.image_video_dataset:epoch is incremented. current_epoch: 11, epoch: 12\nsteps: 69%|██████▉ | 199/288 [17:32<07:50, 5.29s/it, avr_loss=0.032]\nsteps: 69%|██████▉ | 199/288 [17:32<07:50, 5.29s/it, avr_loss=0.032]\nsteps: 69%|██████▉ | 200/288 [17:33<07:43, 5.27s/it, avr_loss=0.032]\nsteps: 69%|██████▉ | 200/288 [17:33<07:43, 5.27s/it, avr_loss=0.0339]\nsteps: 70%|██████▉ | 201/288 [17:34<07:36, 5.25s/it, avr_loss=0.0339]\nsteps: 70%|██████▉ | 201/288 [17:34<07:36, 5.25s/it, avr_loss=0.0346]\nsteps: 70%|███████ | 202/288 [17:45<07:33, 5.27s/it, avr_loss=0.0346]\nsteps: 70%|███████ | 202/288 [17:45<07:33, 5.27s/it, avr_loss=0.0345]\nsteps: 70%|███████ | 203/288 [17:45<07:26, 5.25s/it, avr_loss=0.0345]\nsteps: 70%|███████ | 203/288 [17:45<07:26, 5.25s/it, avr_loss=0.0353]\nsteps: 71%|███████ | 204/288 [17:46<07:19, 5.23s/it, avr_loss=0.0353]\nsteps: 71%|███████ | 204/288 [17:46<07:19, 5.23s/it, avr_loss=0.0342]\nsteps: 71%|███████ | 205/288 [17:47<07:12, 5.21s/it, avr_loss=0.0342]\nsteps: 71%|███████ | 205/288 [17:47<07:12, 5.21s/it, avr_loss=0.0353]\nsteps: 72%|███████▏ | 206/288 [17:52<07:06, 5.20s/it, avr_loss=0.0353]\nsteps: 72%|███████▏ | 206/288 [17:52<07:06, 5.20s/it, avr_loss=0.035] \nsteps: 72%|███████▏ | 207/288 [17:56<07:01, 5.20s/it, avr_loss=0.035]\nsteps: 72%|███████▏ | 207/288 [17:56<07:01, 5.20s/it, avr_loss=0.0342]\nsteps: 72%|███████▏ | 208/288 [18:01<06:56, 5.20s/it, avr_loss=0.0342]\nsteps: 72%|███████▏ | 208/288 [18:01<06:56, 5.20s/it, avr_loss=0.032] \nsteps: 73%|███████▎ | 209/288 [18:12<06:52, 5.23s/it, avr_loss=0.032]\nsteps: 73%|███████▎ | 209/288 [18:12<06:52, 5.23s/it, avr_loss=0.0312]\nsteps: 73%|███████▎ | 210/288 [18:12<06:45, 5.20s/it, avr_loss=0.0312]\nsteps: 73%|███████▎ | 210/288 [18:12<06:45, 5.20s/it, avr_loss=0.0319]\nsteps: 73%|███████▎ | 211/288 [18:17<06:40, 5.20s/it, avr_loss=0.0319]\nsteps: 73%|███████▎ | 211/288 [18:17<06:40, 5.20s/it, avr_loss=0.0317]\nsteps: 74%|███████▎ | 212/288 [18:22<06:35, 5.20s/it, avr_loss=0.0317]\nsteps: 74%|███████▎ | 212/288 [18:22<06:35, 5.20s/it, avr_loss=0.0319]\nsteps: 74%|███████▍ | 213/288 [18:33<06:31, 5.23s/it, avr_loss=0.0319]\nsteps: 74%|███████▍ | 213/288 [18:33<06:31, 5.23s/it, avr_loss=0.0321]\nsteps: 74%|███████▍ | 214/288 [18:33<06:25, 5.21s/it, avr_loss=0.0321]\nsteps: 74%|███████▍ | 214/288 [18:33<06:25, 5.21s/it, avr_loss=0.0297]\nsteps: 75%|███████▍ | 215/288 [18:44<06:21, 5.23s/it, avr_loss=0.0297]\nsteps: 75%|███████▍ | 215/288 [18:44<06:21, 5.23s/it, avr_loss=0.0275]\nsteps: 75%|███████▌ | 216/288 [18:49<06:16, 5.23s/it, avr_loss=0.0275]\nepoch 13/16\nsteps: 75%|███████▌ | 216/288 [18:49<06:16, 5.23s/it, avr_loss=0.0278]INFO:dataset.image_video_dataset:epoch is incremented. current_epoch: 12, epoch: 13\nINFO:dataset.image_video_dataset:epoch is incremented. current_epoch: 12, epoch: 13\nsteps: 75%|███████▌ | 217/288 [18:54<06:11, 5.23s/it, avr_loss=0.0278]\nsteps: 75%|███████▌ | 217/288 [18:54<06:11, 5.23s/it, avr_loss=0.0291]\nsteps: 76%|███████▌ | 218/288 [19:04<06:07, 5.25s/it, avr_loss=0.0291]\nsteps: 76%|███████▌ | 218/288 [19:04<06:07, 5.25s/it, avr_loss=0.0266]\nsteps: 76%|███████▌ | 219/288 [19:05<06:00, 5.23s/it, avr_loss=0.0266]\nsteps: 76%|███████▌ | 219/288 [19:05<06:00, 5.23s/it, avr_loss=0.0269]\nsteps: 76%|███████▋ | 220/288 [19:16<05:57, 5.25s/it, avr_loss=0.0269]\nsteps: 76%|███████▋ | 220/288 [19:16<05:57, 5.25s/it, avr_loss=0.0267]\nsteps: 77%|███████▋ | 221/288 [19:16<05:50, 5.23s/it, avr_loss=0.0267]\nsteps: 77%|███████▋ | 221/288 [19:16<05:50, 5.23s/it, avr_loss=0.0278]\nsteps: 77%|███████▋ | 222/288 [19:21<05:45, 5.23s/it, avr_loss=0.0278]\nsteps: 77%|███████▋ | 222/288 [19:21<05:45, 5.23s/it, avr_loss=0.0273]\nsteps: 77%|███████▋ | 223/288 [19:32<05:41, 5.26s/it, avr_loss=0.0273]\nsteps: 77%|███████▋ | 223/288 [19:32<05:41, 5.26s/it, avr_loss=0.0252]\nsteps: 78%|███████▊ | 224/288 [19:37<05:36, 5.25s/it, avr_loss=0.0252]\nsteps: 78%|███████▊ | 224/288 [19:37<05:36, 5.25s/it, avr_loss=0.025] \nsteps: 78%|███████▊ | 225/288 [19:41<05:30, 5.25s/it, avr_loss=0.025]\nsteps: 78%|███████▊ | 225/288 [19:41<05:30, 5.25s/it, avr_loss=0.0252]\nsteps: 78%|███████▊ | 226/288 [19:46<05:25, 5.25s/it, avr_loss=0.0252]\nsteps: 78%|███████▊ | 226/288 [19:46<05:25, 5.25s/it, avr_loss=0.0253]\nsteps: 79%|███████▉ | 227/288 [19:57<05:21, 5.27s/it, avr_loss=0.0253]\nsteps: 79%|███████▉ | 227/288 [19:57<05:21, 5.27s/it, avr_loss=0.0267]\nsteps: 79%|███████▉ | 228/288 [20:02<05:16, 5.27s/it, avr_loss=0.0267]\nsteps: 79%|███████▉ | 228/288 [20:02<05:16, 5.27s/it, avr_loss=0.0308]\nsteps: 80%|███████▉ | 229/288 [20:12<05:12, 5.30s/it, avr_loss=0.0308]\nsteps: 80%|███████▉ | 229/288 [20:12<05:12, 5.30s/it, avr_loss=0.0308]\nsteps: 80%|███████▉ | 230/288 [20:13<05:06, 5.28s/it, avr_loss=0.0308]\nsteps: 80%|███████▉ | 230/288 [20:13<05:06, 5.28s/it, avr_loss=0.0327]\nsteps: 80%|████████ | 231/288 [20:14<04:59, 5.26s/it, avr_loss=0.0327]\nsteps: 80%|████████ | 231/288 [20:14<04:59, 5.26s/it, avr_loss=0.0336]\nsteps: 81%|████████ | 232/288 [20:14<04:53, 5.24s/it, avr_loss=0.0336]\nsteps: 81%|████████ | 232/288 [20:14<04:53, 5.24s/it, avr_loss=0.0343]\nsteps: 81%|████████ | 233/288 [20:25<04:49, 5.26s/it, avr_loss=0.0343]\nsteps: 81%|████████ | 233/288 [20:25<04:49, 5.26s/it, avr_loss=0.034] \nsteps: 81%|████████▏ | 234/288 [20:30<04:43, 5.26s/it, avr_loss=0.034]\nepoch 14/16\nsteps: 81%|████████▏ | 234/288 [20:30<04:43, 5.26s/it, avr_loss=0.0428]INFO:dataset.image_video_dataset:epoch is incremented. current_epoch: 13, epoch: 14\nINFO:dataset.image_video_dataset:epoch is incremented. current_epoch: 13, epoch: 14\nsteps: 82%|████████▏ | 235/288 [20:41<04:39, 5.28s/it, avr_loss=0.0428]\nsteps: 82%|████████▏ | 235/288 [20:41<04:39, 5.28s/it, avr_loss=0.0428]\nsteps: 82%|████████▏ | 236/288 [20:46<04:34, 5.28s/it, avr_loss=0.0428]\nsteps: 82%|████████▏ | 236/288 [20:46<04:34, 5.28s/it, avr_loss=0.0436]\nsteps: 82%|████████▏ | 237/288 [20:51<04:29, 5.28s/it, avr_loss=0.0436]\nsteps: 82%|████████▏ | 237/288 [20:51<04:29, 5.28s/it, avr_loss=0.042] \nsteps: 83%|████████▎ | 238/288 [20:55<04:23, 5.28s/it, avr_loss=0.042]\nsteps: 83%|████████▎ | 238/288 [20:55<04:23, 5.28s/it, avr_loss=0.0424]\nsteps: 83%|████████▎ | 239/288 [21:00<04:18, 5.27s/it, avr_loss=0.0424]\nsteps: 83%|████████▎ | 239/288 [21:00<04:18, 5.27s/it, avr_loss=0.0422]\nsteps: 83%|████████▎ | 240/288 [21:01<04:12, 5.26s/it, avr_loss=0.0422]\nsteps: 83%|████████▎ | 240/288 [21:01<04:12, 5.26s/it, avr_loss=0.0423]\nsteps: 84%|████████▎ | 241/288 [21:06<04:06, 5.25s/it, avr_loss=0.0423]\nsteps: 84%|████████▎ | 241/288 [21:06<04:06, 5.25s/it, avr_loss=0.0534]\nsteps: 84%|████████▍ | 242/288 [21:11<04:01, 5.25s/it, avr_loss=0.0534]\nsteps: 84%|████████▍ | 242/288 [21:11<04:01, 5.25s/it, avr_loss=0.0541]\nsteps: 84%|████████▍ | 243/288 [21:21<03:57, 5.27s/it, avr_loss=0.0541]\nsteps: 84%|████████▍ | 243/288 [21:21<03:57, 5.27s/it, avr_loss=0.0542]\nsteps: 85%|████████▍ | 244/288 [21:26<03:51, 5.27s/it, avr_loss=0.0542]\nsteps: 85%|████████▍ | 244/288 [21:26<03:51, 5.27s/it, avr_loss=0.0559]\nsteps: 85%|████████▌ | 245/288 [21:27<03:45, 5.25s/it, avr_loss=0.0559]\nsteps: 85%|████████▌ | 245/288 [21:27<03:45, 5.25s/it, avr_loss=0.0558]\nsteps: 85%|████████▌ | 246/288 [21:27<03:39, 5.24s/it, avr_loss=0.0558]\nsteps: 85%|████████▌ | 246/288 [21:27<03:39, 5.24s/it, avr_loss=0.0528]\nsteps: 86%|████████▌ | 247/288 [21:38<03:35, 5.26s/it, avr_loss=0.0528]\nsteps: 86%|████████▌ | 247/288 [21:38<03:35, 5.26s/it, avr_loss=0.0531]\nsteps: 86%|████████▌ | 248/288 [21:49<03:31, 5.28s/it, avr_loss=0.0531]\nsteps: 86%|████████▌ | 248/288 [21:49<03:31, 5.28s/it, avr_loss=0.0519]\nsteps: 86%|████████▋ | 249/288 [21:59<03:26, 5.30s/it, avr_loss=0.0519]\nsteps: 86%|████████▋ | 249/288 [21:59<03:26, 5.30s/it, avr_loss=0.051] \nsteps: 87%|████████▋ | 250/288 [22:10<03:22, 5.32s/it, avr_loss=0.051]\nsteps: 87%|████████▋ | 250/288 [22:10<03:22, 5.32s/it, avr_loss=0.0542]\nsteps: 87%|████████▋ | 251/288 [22:10<03:16, 5.30s/it, avr_loss=0.0542]\nsteps: 87%|████████▋ | 251/288 [22:10<03:16, 5.30s/it, avr_loss=0.056] \nsteps: 88%|████████▊ | 252/288 [22:11<03:10, 5.28s/it, avr_loss=0.056]\nepoch 15/16\nsteps: 88%|████████▊ | 252/288 [22:11<03:10, 5.28s/it, avr_loss=0.049]INFO:dataset.image_video_dataset:epoch is incremented. current_epoch: 14, epoch: 15\nINFO:dataset.image_video_dataset:epoch is incremented. current_epoch: 14, epoch: 15\nsteps: 88%|████████▊ | 253/288 [22:22<03:05, 5.31s/it, avr_loss=0.049]\nsteps: 88%|████████▊ | 253/288 [22:22<03:05, 5.31s/it, avr_loss=0.0478]\nsteps: 88%|████████▊ | 254/288 [22:27<03:00, 5.30s/it, avr_loss=0.0478]\nsteps: 88%|████████▊ | 254/288 [22:27<03:00, 5.30s/it, avr_loss=0.0472]\nsteps: 89%|████████▊ | 255/288 [22:31<02:54, 5.30s/it, avr_loss=0.0472]\nsteps: 89%|████████▊ | 255/288 [22:31<02:54, 5.30s/it, avr_loss=0.0481]\nsteps: 89%|████████▉ | 256/288 [22:32<02:49, 5.28s/it, avr_loss=0.0481]\nsteps: 89%|████████▉ | 256/288 [22:32<02:49, 5.28s/it, avr_loss=0.0494]\nsteps: 89%|████████▉ | 257/288 [22:43<02:44, 5.30s/it, avr_loss=0.0494]\nsteps: 89%|████████▉ | 257/288 [22:43<02:44, 5.30s/it, avr_loss=0.048] \nsteps: 90%|████████▉ | 258/288 [22:53<02:39, 5.32s/it, avr_loss=0.048]\nsteps: 90%|████████▉ | 258/288 [22:53<02:39, 5.32s/it, avr_loss=0.0478]\nsteps: 90%|████████▉ | 259/288 [23:04<02:35, 5.35s/it, avr_loss=0.0478]\nsteps: 90%|████████▉ | 259/288 [23:04<02:35, 5.35s/it, avr_loss=0.0369]\nsteps: 90%|█████████ | 260/288 [23:15<02:30, 5.37s/it, avr_loss=0.0369]\nsteps: 90%|█████████ | 260/288 [23:15<02:30, 5.37s/it, avr_loss=0.036] \nsteps: 91%|█████████ | 261/288 [23:15<02:24, 5.35s/it, avr_loss=0.036]\nsteps: 91%|█████████ | 261/288 [23:15<02:24, 5.35s/it, avr_loss=0.0368]\nsteps: 91%|█████████ | 262/288 [23:16<02:18, 5.33s/it, avr_loss=0.0368]\nsteps: 91%|█████████ | 262/288 [23:16<02:18, 5.33s/it, avr_loss=0.0365]\nsteps: 91%|█████████▏| 263/288 [23:21<02:13, 5.33s/it, avr_loss=0.0365]\nsteps: 91%|█████████▏| 263/288 [23:21<02:13, 5.33s/it, avr_loss=0.0356]\nsteps: 92%|█████████▏| 264/288 [23:32<02:08, 5.35s/it, avr_loss=0.0356]\nsteps: 92%|█████████▏| 264/288 [23:32<02:08, 5.35s/it, avr_loss=0.0335]\nsteps: 92%|█████████▏| 265/288 [23:36<02:02, 5.35s/it, avr_loss=0.0335]\nsteps: 92%|█████████▏| 265/288 [23:36<02:02, 5.35s/it, avr_loss=0.0333]\nsteps: 92%|█████████▏| 266/288 [23:47<01:58, 5.37s/it, avr_loss=0.0333]\nsteps: 92%|█████████▏| 266/288 [23:47<01:58, 5.37s/it, avr_loss=0.0325]\nsteps: 93%|█████████▎| 267/288 [23:48<01:52, 5.35s/it, avr_loss=0.0325]\nsteps: 93%|█████████▎| 267/288 [23:48<01:52, 5.35s/it, avr_loss=0.0327]\nsteps: 93%|█████████▎| 268/288 [23:48<01:46, 5.33s/it, avr_loss=0.0327]\nsteps: 93%|█████████▎| 268/288 [23:48<01:46, 5.33s/it, avr_loss=0.0309]\nsteps: 93%|█████████▎| 269/288 [23:53<01:41, 5.33s/it, avr_loss=0.0309]\nsteps: 93%|█████████▎| 269/288 [23:53<01:41, 5.33s/it, avr_loss=0.0291]\nsteps: 94%|█████████▍| 270/288 [23:58<01:35, 5.33s/it, avr_loss=0.0291]\nepoch 16/16\nsteps: 94%|█████████▍| 270/288 [23:58<01:35, 5.33s/it, avr_loss=0.0273]INFO:dataset.image_video_dataset:epoch is incremented. current_epoch: 15, epoch: 16\nINFO:dataset.image_video_dataset:epoch is incremented. current_epoch: 15, epoch: 16\nsteps: 94%|█████████▍| 271/288 [23:59<01:30, 5.31s/it, avr_loss=0.0273]\nsteps: 94%|█████████▍| 271/288 [23:59<01:30, 5.31s/it, avr_loss=0.0284]\nsteps: 94%|█████████▍| 272/288 [24:00<01:24, 5.29s/it, avr_loss=0.0284]\nsteps: 94%|█████████▍| 272/288 [24:00<01:24, 5.29s/it, avr_loss=0.0294]\nsteps: 95%|█████████▍| 273/288 [24:00<01:19, 5.28s/it, avr_loss=0.0294]\nsteps: 95%|█████████▍| 273/288 [24:00<01:19, 5.28s/it, avr_loss=0.0307]\nsteps: 95%|█████████▌| 274/288 [24:01<01:13, 5.26s/it, avr_loss=0.0307]\nsteps: 95%|█████████▌| 274/288 [24:01<01:13, 5.26s/it, avr_loss=0.0378]\nsteps: 95%|█████████▌| 275/288 [24:06<01:08, 5.26s/it, avr_loss=0.0378]\nsteps: 95%|█████████▌| 275/288 [24:06<01:08, 5.26s/it, avr_loss=0.0397]\nsteps: 96%|█████████▌| 276/288 [24:11<01:03, 5.26s/it, avr_loss=0.0397]\nsteps: 96%|█████████▌| 276/288 [24:11<01:03, 5.26s/it, avr_loss=0.0389]\nsteps: 96%|█████████▌| 277/288 [24:16<00:57, 5.26s/it, avr_loss=0.0389]\nsteps: 96%|█████████▌| 277/288 [24:16<00:57, 5.26s/it, avr_loss=0.039] \nsteps: 97%|█████████▋| 278/288 [24:20<00:52, 5.25s/it, avr_loss=0.039]\nsteps: 97%|█████████▋| 278/288 [24:20<00:52, 5.25s/it, avr_loss=0.0397]\nsteps: 97%|█████████▋| 279/288 [24:21<00:47, 5.24s/it, avr_loss=0.0397]\nsteps: 97%|█████████▋| 279/288 [24:21<00:47, 5.24s/it, avr_loss=0.0394]\nsteps: 97%|█████████▋| 280/288 [24:32<00:42, 5.26s/it, avr_loss=0.0394]\nsteps: 97%|█████████▋| 280/288 [24:32<00:42, 5.26s/it, avr_loss=0.0378]\nsteps: 98%|█████████▊| 281/288 [24:37<00:36, 5.26s/it, avr_loss=0.0378]\nsteps: 98%|█████████▊| 281/288 [24:37<00:36, 5.26s/it, avr_loss=0.0383]\nsteps: 98%|█████████▊| 282/288 [24:47<00:31, 5.28s/it, avr_loss=0.0383]\nsteps: 98%|█████████▊| 282/288 [24:47<00:31, 5.28s/it, avr_loss=0.0385]\nsteps: 98%|█████████▊| 283/288 [24:48<00:26, 5.26s/it, avr_loss=0.0385]\nsteps: 98%|█████████▊| 283/288 [24:48<00:26, 5.26s/it, avr_loss=0.0525]\nsteps: 99%|█████████▊| 284/288 [24:49<00:20, 5.24s/it, avr_loss=0.0525]\nsteps: 99%|█████████▊| 284/288 [24:49<00:20, 5.24s/it, avr_loss=0.0532]\nsteps: 99%|█████████▉| 285/288 [24:59<00:15, 5.26s/it, avr_loss=0.0532]\nsteps: 99%|█████████▉| 285/288 [24:59<00:15, 5.26s/it, avr_loss=0.0529]\nsteps: 99%|█████████▉| 286/288 [25:10<00:10, 5.28s/it, avr_loss=0.0529]\nsteps: 99%|█████████▉| 286/288 [25:10<00:10, 5.28s/it, avr_loss=0.0503]\nsteps: 100%|█████████▉| 287/288 [25:20<00:05, 5.30s/it, avr_loss=0.0503]\nsteps: 100%|█████████▉| 287/288 [25:20<00:05, 5.30s/it, avr_loss=0.0504]\nsteps: 100%|██████████| 288/288 [25:25<00:00, 5.30s/it, avr_loss=0.0504]\nsaving checkpoint: output/lora.safetensors\nsteps: 100%|██████████| 288/288 [25:25<00:00, 5.30s/it, avr_loss=0.0507]INFO:__main__:model saved.\nsteps: 100%|██████████| 288/288 [25:26<00:00, 5.30s/it, avr_loss=0.0507]\nThe token has not been saved to the git credentials helper. Pass `add_to_git_credential=True` in this function directly or `--add-to-git-credential` if using via `huggingface-cli` if you want to set the git credential as well.\nToken is valid (permission: write).\nThe token `Discord` has been saved to /root/.cache/huggingface/stored_tokens\nYour token has been saved to /root/.cache/huggingface/token\nLogin successful.\nThe current active token is: `Discord`\nConsider using `hf_transfer` for faster uploads. This solution comes with some limitations. See https://huggingface.co/docs/huggingface_hub/hf_transfer for more details.\nStart hashing 1 files.\nFinished hashing 1 files.\nlora.safetensors: 0%| | 0.00/323M [00:00<?, ?B/s]\nlora.safetensors: 3%|▎ | 11.0M/323M [00:00<00:02, 110MB/s]\nlora.safetensors: 7%|▋ | 21.9M/323M [00:00<00:04, 65.4MB/s]\nlora.safetensors: 10%|▉ | 32.0M/323M [00:00<00:05, 55.6MB/s]\nlora.safetensors: 15%|█▍ | 48.0M/323M [00:00<00:04, 58.3MB/s]\nlora.safetensors: 20%|█▉ | 64.0M/323M [00:01<00:04, 64.0MB/s]\nlora.safetensors: 25%|██▍ | 80.0M/323M [00:01<00:04, 57.3MB/s]\nlora.safetensors: 30%|██▉ | 96.0M/323M [00:01<00:03, 61.9MB/s]\nlora.safetensors: 35%|███▍ | 112M/323M [00:01<00:03, 65.4MB/s] \nlora.safetensors: 40%|███▉ | 128M/323M [00:02<00:03, 63.9MB/s]\nlora.safetensors: 45%|████▍ | 144M/323M [00:02<00:02, 63.8MB/s]\nlora.safetensors: 50%|████▉ | 160M/323M [00:02<00:02, 62.7MB/s]\nlora.safetensors: 55%|█████▍ | 176M/323M [00:02<00:02, 66.0MB/s]\nlora.safetensors: 60%|█████▉ | 192M/323M [00:03<00:01, 65.7MB/s]\nlora.safetensors: 64%|██████▍ | 208M/323M [00:03<00:01, 60.3MB/s]\nlora.safetensors: 69%|██████▉ | 224M/323M [00:03<00:02, 43.0MB/s]\nlora.safetensors: 74%|███████▍ | 240M/323M [00:04<00:01, 48.5MB/s]\nlora.safetensors: 79%|███████▉ | 256M/323M [00:04<00:01, 53.9MB/s]\nlora.safetensors: 84%|████████▍ | 272M/323M [00:04<00:00, 59.1MB/s]\nlora.safetensors: 89%|████████▉ | 288M/323M [00:04<00:00, 60.6MB/s]\nlora.safetensors: 94%|█████████▍| 304M/323M [00:05<00:00, 64.6MB/s]\nlora.safetensors: 99%|█████████▉| 320M/323M [00:05<00:00, 65.3MB/s]\nlora.safetensors: 100%|██████████| 323M/323M [00:05<00:00, 58.2MB/s]\nhttps://huggingface.co/lucataco/hunyuan-musubi-rose-6/tree/main/.\n./\n./lora.safetensors",
"output": "https://replicate.delivery/xezq/OaYSnHQ3QeRSWygF3Yeto8XuKRP5ljDbZq0iefTKpdRRfQSgC/trained_model.tar",
"data_removed": false,
"error": null,
"source": "web",
"status": "succeeded",
"created_at": "2025-01-06T05:26:04.669Z",
"started_at": "2025-01-06T05:26:42.943359Z",
"completed_at": "2025-01-06T05:53:56.582286Z",
"urls": {
"cancel": "https://api.replicate.com/v1/predictions/8e970hygfnrmc0cm789sz6sr38/cancel",
"get": "https://api.replicate.com/v1/predictions/8e970hygfnrmc0cm789sz6sr38",
"stream": "https://stream.replicate.com/v1/files/bcwr-ywysy5aih5qroigvs55xuhnaqopg6q7wvhipbwupybg2tlkivyja",
"web": "https://replicate.com/p/8e970hygfnrmc0cm789sz6sr38"
},
"metrics": {
"predict_time": 1633.638927304,
"total_time": 1671.913286
}
}