Failed to load versions. Head to the versions page to see all versions for this model.
You're looking at a specific version of this model. Jump to the model overview.
Input
Run this model in Node.js with one line of code:
npm install replicate
REPLICATE_API_TOKEN
environment variable:export REPLICATE_API_TOKEN=<paste-your-token-here>
Find your API token in your account settings.
import Replicate from "replicate";
const replicate = new Replicate({
auth: process.env.REPLICATE_API_TOKEN,
});
Run lucataco/stable-diffusion-3.5-large-lora-trainer using Replicate’s API. Check out the model's schema for an overview of inputs and outputs.
const output = await replicate.run(
"lucataco/stable-diffusion-3.5-large-lora-trainer:6ebda45af5b9c30edee3149cc1624b7f7cae8fab7c692e2c51d82f5fed3198ee",
{
input: {
rank: 16,
backend: "no",
optimizer: "AdamW",
resolution: 768,
input_images: "https://replicate.delivery/pbxt/LrJveDd3TVKraYSxEWkMl0txKP39KdIBof5EO2IAsuTNIrFU/yarn.zip",
lr_scheduler: "constant",
learning_rate: 0.0001,
instance_prompt: "Frog, yarn art style",
max_train_steps: 700,
text_encoder_lr: 0.000005,
train_batch_size: 1,
train_text_encoder: false,
gradient_accumulation_steps: 1
}
}
);
// To access the file URL:
console.log(output.url()); //=> "http://example.com"
// To write the file to disk:
fs.writeFile("my-image.png", output);
To learn more, take a look at the guide on getting started with Node.js.
pip install replicate
REPLICATE_API_TOKEN
environment variable:export REPLICATE_API_TOKEN=<paste-your-token-here>
Find your API token in your account settings.
import replicate
Run lucataco/stable-diffusion-3.5-large-lora-trainer using Replicate’s API. Check out the model's schema for an overview of inputs and outputs.
output = replicate.run(
"lucataco/stable-diffusion-3.5-large-lora-trainer:6ebda45af5b9c30edee3149cc1624b7f7cae8fab7c692e2c51d82f5fed3198ee",
input={
"rank": 16,
"backend": "no",
"optimizer": "AdamW",
"resolution": 768,
"input_images": "https://replicate.delivery/pbxt/LrJveDd3TVKraYSxEWkMl0txKP39KdIBof5EO2IAsuTNIrFU/yarn.zip",
"lr_scheduler": "constant",
"learning_rate": 0.0001,
"instance_prompt": "Frog, yarn art style",
"max_train_steps": 700,
"text_encoder_lr": 0.000005,
"train_batch_size": 1,
"train_text_encoder": False,
"gradient_accumulation_steps": 1
}
)
print(output)
To learn more, take a look at the guide on getting started with Python.
REPLICATE_API_TOKEN
environment variable:export REPLICATE_API_TOKEN=<paste-your-token-here>
Find your API token in your account settings.
Run lucataco/stable-diffusion-3.5-large-lora-trainer using Replicate’s API. Check out the model's schema for an overview of inputs and outputs.
curl -s -X POST \
-H "Authorization: Bearer $REPLICATE_API_TOKEN" \
-H "Content-Type: application/json" \
-H "Prefer: wait" \
-d $'{
"version": "6ebda45af5b9c30edee3149cc1624b7f7cae8fab7c692e2c51d82f5fed3198ee",
"input": {
"rank": 16,
"backend": "no",
"optimizer": "AdamW",
"resolution": 768,
"input_images": "https://replicate.delivery/pbxt/LrJveDd3TVKraYSxEWkMl0txKP39KdIBof5EO2IAsuTNIrFU/yarn.zip",
"lr_scheduler": "constant",
"learning_rate": 0.0001,
"instance_prompt": "Frog, yarn art style",
"max_train_steps": 700,
"text_encoder_lr": 0.000005,
"train_batch_size": 1,
"train_text_encoder": false,
"gradient_accumulation_steps": 1
}
}' \
https://api.replicate.com/v1/predictions
To learn more, take a look at Replicate’s HTTP API reference docs.
Add a payment method to run this model.
By signing in, you agree to our
terms of service and privacy policy
Output
{
"completed_at": "2024-10-25T23:39:14.239794Z",
"created_at": "2024-10-25T23:31:46.168000Z",
"data_removed": false,
"error": null,
"id": "ng14j2cff1rj40cjrr2vbz667m",
"input": {
"rank": 16,
"backend": "no",
"optimizer": "AdamW",
"resolution": 768,
"input_images": "https://replicate.delivery/pbxt/LrJveDd3TVKraYSxEWkMl0txKP39KdIBof5EO2IAsuTNIrFU/yarn.zip",
"lr_scheduler": "constant",
"learning_rate": 0.0001,
"instance_prompt": "Frog, yarn art style",
"max_train_steps": 700,
"train_batch_size": 1,
"gradient_accumulation_steps": 1
},
"logs": "Using seed: 3595070789\nExtracted 16 files from zip to input_images\nUsing params: ['accelerate', 'launch', '--dynamo_backend', 'no', 'train_dreambooth_lora_sd3.py', '--pretrained_model_name_or_path', 'stable-diffusion-3.5-large', '--instance_data_dir', 'input_images', '--rank', '16', '--output_dir', '/tmp/train/output/sd35_large_train_replicate', '--mixed_precision', 'bf16', '--instance_prompt', 'Frog, yarn art style', '--resolution', '768', '--train_batch_size', '1', '--gradient_accumulation_steps', '1', '--optimizer', 'AdamW', '--learning_rate', '0.0001', '--lr_scheduler', 'constant', '--lr_warmup_steps', '0', '--max_train_steps', '700', '--checkpointing_steps', '701', '--seed', '3595070789', '--logging_dir', '/tmp/logs']\n10/25/2024 23:33:02 - INFO - __main__ - Distributed environment: DistributedType.NO\nNum processes: 1\nProcess index: 0\nLocal process index: 0\nDevice: cuda\nMixed precision type: bf16\nYou set `add_prefix_space`. The tokenizer needs to be converted from the slow tokenizers\nYou are using a model of type clip_text_model to instantiate a model of type . This is not supported for all configurations of models and can yield errors.\nYou are using a model of type clip_text_model to instantiate a model of type . This is not supported for all configurations of models and can yield errors.\nYou are using a model of type t5 to instantiate a model of type . This is not supported for all configurations of models and can yield errors.\n{'base_image_seq_len', 'base_shift', 'max_shift', 'max_image_seq_len', 'use_dynamic_shifting'} was not found in config. Values will be initialized to default values.\nLoading checkpoint shards: 0%| | 0/2 [00:00<?, ?it/s]\nLoading checkpoint shards: 50%|█████ | 1/2 [00:03<00:03, 3.67s/it]\nLoading checkpoint shards: 100%|██████████| 2/2 [00:07<00:00, 3.64s/it]\nLoading checkpoint shards: 100%|██████████| 2/2 [00:07<00:00, 3.64s/it]\n{'dual_attention_layers'} was not found in config. Values will be initialized to default values.\n10/25/2024 23:33:53 - INFO - __main__ - ***** Running training *****\n10/25/2024 23:33:53 - INFO - __main__ - Num examples = 16\n10/25/2024 23:33:53 - INFO - __main__ - Num batches each epoch = 16\n10/25/2024 23:33:53 - INFO - __main__ - Num Epochs = 44\n10/25/2024 23:33:53 - INFO - __main__ - Instantaneous batch size per device = 1\n10/25/2024 23:33:53 - INFO - __main__ - Total train batch size (w. parallel, distributed & accumulation) = 1\n10/25/2024 23:33:53 - INFO - __main__ - Gradient Accumulation steps = 1\n10/25/2024 23:33:53 - INFO - __main__ - Total optimization steps = 700\nSteps: 0%| | 0/700 [00:00<?, ?it/s]\nSteps: 0%| | 1/700 [00:00<07:26, 1.56it/s]\nSteps: 0%| | 1/700 [00:00<07:26, 1.56it/s, loss=0.132, lr=0.0001]\nSteps: 0%| | 2/700 [00:01<05:50, 1.99it/s, loss=0.132, lr=0.0001]\nSteps: 0%| | 2/700 [00:01<05:50, 1.99it/s, loss=0.189, lr=0.0001]\nSteps: 0%| | 3/700 [00:01<05:27, 2.13it/s, loss=0.189, lr=0.0001]\nSteps: 0%| | 3/700 [00:01<05:27, 2.13it/s, loss=0.0392, lr=0.0001]\nSteps: 1%| | 4/700 [00:01<05:17, 2.20it/s, loss=0.0392, lr=0.0001]\nSteps: 1%| | 4/700 [00:01<05:17, 2.20it/s, loss=0.203, lr=0.0001] \nSteps: 1%| | 5/700 [00:02<05:10, 2.24it/s, loss=0.203, lr=0.0001]\nSteps: 1%| | 5/700 [00:02<05:10, 2.24it/s, loss=0.165, lr=0.0001]\nSteps: 1%| | 6/700 [00:02<05:07, 2.26it/s, loss=0.165, lr=0.0001]\nSteps: 1%| | 6/700 [00:02<05:07, 2.26it/s, loss=0.175, lr=0.0001]\nSteps: 1%| | 7/700 [00:03<05:04, 2.27it/s, loss=0.175, lr=0.0001]\nSteps: 1%| | 7/700 [00:03<05:04, 2.27it/s, loss=0.171, lr=0.0001]\nSteps: 1%| | 8/700 [00:03<05:02, 2.28it/s, loss=0.171, lr=0.0001]\nSteps: 1%| | 8/700 [00:03<05:02, 2.28it/s, loss=0.141, lr=0.0001]\nSteps: 1%|▏ | 9/700 [00:04<05:01, 2.29it/s, loss=0.141, lr=0.0001]\nSteps: 1%|▏ | 9/700 [00:04<05:01, 2.29it/s, loss=0.203, lr=0.0001]\nSteps: 1%|▏ | 10/700 [00:04<05:00, 2.30it/s, loss=0.203, lr=0.0001]\nSteps: 1%|▏ | 10/700 [00:04<05:00, 2.30it/s, loss=0.0762, lr=0.0001]\nSteps: 2%|▏ | 11/700 [00:04<04:59, 2.30it/s, loss=0.0762, lr=0.0001]\nSteps: 2%|▏ | 11/700 [00:04<04:59, 2.30it/s, loss=0.0826, lr=0.0001]\nSteps: 2%|▏ | 12/700 [00:05<04:59, 2.30it/s, loss=0.0826, lr=0.0001]\nSteps: 2%|▏ | 12/700 [00:05<04:59, 2.30it/s, loss=0.19, lr=0.0001] \nSteps: 2%|▏ | 13/700 [00:05<04:59, 2.30it/s, loss=0.19, lr=0.0001]\nSteps: 2%|▏ | 13/700 [00:05<04:59, 2.30it/s, loss=0.285, lr=0.0001]\nSteps: 2%|▏ | 14/700 [00:06<04:58, 2.30it/s, loss=0.285, lr=0.0001]\nSteps: 2%|▏ | 14/700 [00:06<04:58, 2.30it/s, loss=0.144, lr=0.0001]\nSteps: 2%|▏ | 15/700 [00:06<04:57, 2.30it/s, loss=0.144, lr=0.0001]\nSteps: 2%|▏ | 15/700 [00:06<04:57, 2.30it/s, loss=0.134, lr=0.0001]\nSteps: 2%|▏ | 16/700 [00:07<04:56, 2.31it/s, loss=0.134, lr=0.0001]\nSteps: 2%|▏ | 16/700 [00:07<04:56, 2.31it/s, loss=0.189, lr=0.0001]\nSteps: 2%|▏ | 17/700 [00:07<04:57, 2.30it/s, loss=0.189, lr=0.0001]\nSteps: 2%|▏ | 17/700 [00:07<04:57, 2.30it/s, loss=0.097, lr=0.0001]\nSteps: 3%|▎ | 18/700 [00:07<04:56, 2.30it/s, loss=0.097, lr=0.0001]\nSteps: 3%|▎ | 18/700 [00:08<04:56, 2.30it/s, loss=0.215, lr=0.0001]\nSteps: 3%|▎ | 19/700 [00:08<04:55, 2.30it/s, loss=0.215, lr=0.0001]\nSteps: 3%|▎ | 19/700 [00:08<04:55, 2.30it/s, loss=0.173, lr=0.0001]\nSteps: 3%|▎ | 20/700 [00:08<04:55, 2.30it/s, loss=0.173, lr=0.0001]\nSteps: 3%|▎ | 20/700 [00:08<04:55, 2.30it/s, loss=0.0768, lr=0.0001]\nSteps: 3%|▎ | 21/700 [00:09<04:54, 2.30it/s, loss=0.0768, lr=0.0001]\nSteps: 3%|▎ | 21/700 [00:09<04:54, 2.30it/s, loss=0.0714, lr=0.0001]\nSteps: 3%|▎ | 22/700 [00:09<04:54, 2.30it/s, loss=0.0714, lr=0.0001]\nSteps: 3%|▎ | 22/700 [00:09<04:54, 2.30it/s, loss=0.148, lr=0.0001] \nSteps: 3%|▎ | 23/700 [00:10<04:54, 2.30it/s, loss=0.148, lr=0.0001]\nSteps: 3%|▎ | 23/700 [00:10<04:54, 2.30it/s, loss=0.297, lr=0.0001]\nSteps: 3%|▎ | 24/700 [00:10<04:53, 2.30it/s, loss=0.297, lr=0.0001]\nSteps: 3%|▎ | 24/700 [00:10<04:53, 2.30it/s, loss=0.0754, lr=0.0001]\nSteps: 4%|▎ | 25/700 [00:11<04:53, 2.30it/s, loss=0.0754, lr=0.0001]\nSteps: 4%|▎ | 25/700 [00:11<04:53, 2.30it/s, loss=0.116, lr=0.0001] \nSteps: 4%|▎ | 26/700 [00:11<04:52, 2.30it/s, loss=0.116, lr=0.0001]\nSteps: 4%|▎ | 26/700 [00:11<04:52, 2.30it/s, loss=0.0963, lr=0.0001]\nSteps: 4%|▍ | 27/700 [00:11<04:52, 2.30it/s, loss=0.0963, lr=0.0001]\nSteps: 4%|▍ | 27/700 [00:11<04:52, 2.30it/s, loss=0.0578, lr=0.0001]\nSteps: 4%|▍ | 28/700 [00:12<04:51, 2.30it/s, loss=0.0578, lr=0.0001]\nSteps: 4%|▍ | 28/700 [00:12<04:51, 2.30it/s, loss=0.0973, lr=0.0001]\nSteps: 4%|▍ | 29/700 [00:12<04:51, 2.30it/s, loss=0.0973, lr=0.0001]\nSteps: 4%|▍ | 29/700 [00:12<04:51, 2.30it/s, loss=0.116, lr=0.0001] \nSteps: 4%|▍ | 30/700 [00:13<04:51, 2.30it/s, loss=0.116, lr=0.0001]\nSteps: 4%|▍ | 30/700 [00:13<04:51, 2.30it/s, loss=0.191, lr=0.0001]\nSteps: 4%|▍ | 31/700 [00:13<04:50, 2.30it/s, loss=0.191, lr=0.0001]\nSteps: 4%|▍ | 31/700 [00:13<04:50, 2.30it/s, loss=0.113, lr=0.0001]\nSteps: 5%|▍ | 32/700 [00:14<04:49, 2.30it/s, loss=0.113, lr=0.0001]\nSteps: 5%|▍ | 32/700 [00:14<04:49, 2.30it/s, loss=0.187, lr=0.0001]\nSteps: 5%|▍ | 33/700 [00:14<04:50, 2.29it/s, loss=0.187, lr=0.0001]\nSteps: 5%|▍ | 33/700 [00:14<04:50, 2.29it/s, loss=0.104, lr=0.0001]\nSteps: 5%|▍ | 34/700 [00:14<04:50, 2.30it/s, loss=0.104, lr=0.0001]\nSteps: 5%|▍ | 34/700 [00:14<04:50, 2.30it/s, loss=0.176, lr=0.0001]\nSteps: 5%|▌ | 35/700 [00:15<04:49, 2.30it/s, loss=0.176, lr=0.0001]\nSteps: 5%|▌ | 35/700 [00:15<04:49, 2.30it/s, loss=0.0212, lr=0.0001]\nSteps: 5%|▌ | 36/700 [00:15<04:48, 2.30it/s, loss=0.0212, lr=0.0001]\nSteps: 5%|▌ | 36/700 [00:15<04:48, 2.30it/s, loss=0.0399, lr=0.0001]\nSteps: 5%|▌ | 37/700 [00:16<04:47, 2.30it/s, loss=0.0399, lr=0.0001]\nSteps: 5%|▌ | 37/700 [00:16<04:47, 2.30it/s, loss=0.078, lr=0.0001] \nSteps: 5%|▌ | 38/700 [00:16<04:47, 2.30it/s, loss=0.078, lr=0.0001]\nSteps: 5%|▌ | 38/700 [00:16<04:47, 2.30it/s, loss=0.208, lr=0.0001]\nSteps: 6%|▌ | 39/700 [00:17<04:46, 2.31it/s, loss=0.208, lr=0.0001]\nSteps: 6%|▌ | 39/700 [00:17<04:46, 2.31it/s, loss=0.212, lr=0.0001]\nSteps: 6%|▌ | 40/700 [00:17<04:46, 2.31it/s, loss=0.212, lr=0.0001]\nSteps: 6%|▌ | 40/700 [00:17<04:46, 2.31it/s, loss=0.119, lr=0.0001]\nSteps: 6%|▌ | 41/700 [00:17<04:45, 2.31it/s, loss=0.119, lr=0.0001]\nSteps: 6%|▌ | 41/700 [00:18<04:45, 2.31it/s, loss=0.186, lr=0.0001]\nSteps: 6%|▌ | 42/700 [00:18<04:45, 2.31it/s, loss=0.186, lr=0.0001]\nSteps: 6%|▌ | 42/700 [00:18<04:45, 2.31it/s, loss=0.0453, lr=0.0001]\nSteps: 6%|▌ | 43/700 [00:18<04:44, 2.31it/s, loss=0.0453, lr=0.0001]\nSteps: 6%|▌ | 43/700 [00:18<04:44, 2.31it/s, loss=0.125, lr=0.0001] \nSteps: 6%|▋ | 44/700 [00:19<04:44, 2.31it/s, loss=0.125, lr=0.0001]\nSteps: 6%|▋ | 44/700 [00:19<04:44, 2.31it/s, loss=0.299, lr=0.0001]\nSteps: 6%|▋ | 45/700 [00:19<04:43, 2.31it/s, loss=0.299, lr=0.0001]\nSteps: 6%|▋ | 45/700 [00:19<04:43, 2.31it/s, loss=0.0874, lr=0.0001]\nSteps: 7%|▋ | 46/700 [00:20<04:43, 2.31it/s, loss=0.0874, lr=0.0001]\nSteps: 7%|▋ | 46/700 [00:20<04:43, 2.31it/s, loss=0.178, lr=0.0001] \nSteps: 7%|▋ | 47/700 [00:20<04:43, 2.31it/s, loss=0.178, lr=0.0001]\nSteps: 7%|▋ | 47/700 [00:20<04:43, 2.31it/s, loss=0.166, lr=0.0001]\nSteps: 7%|▋ | 48/700 [00:21<04:42, 2.31it/s, loss=0.166, lr=0.0001]\nSteps: 7%|▋ | 48/700 [00:21<04:42, 2.31it/s, loss=0.0528, lr=0.0001]\nSteps: 7%|▋ | 49/700 [00:21<04:43, 2.30it/s, loss=0.0528, lr=0.0001]\nSteps: 7%|▋ | 49/700 [00:21<04:43, 2.30it/s, loss=0.159, lr=0.0001] \nSteps: 7%|▋ | 50/700 [00:21<04:42, 2.30it/s, loss=0.159, lr=0.0001]\nSteps: 7%|▋ | 50/700 [00:21<04:42, 2.30it/s, loss=0.103, lr=0.0001]\nSteps: 7%|▋ | 51/700 [00:22<04:41, 2.30it/s, loss=0.103, lr=0.0001]\nSteps: 7%|▋ | 51/700 [00:22<04:41, 2.30it/s, loss=0.034, lr=0.0001]\nSteps: 7%|▋ | 52/700 [00:22<04:41, 2.30it/s, loss=0.034, lr=0.0001]\nSteps: 7%|▋ | 52/700 [00:22<04:41, 2.30it/s, loss=0.0843, lr=0.0001]\nSteps: 8%|▊ | 53/700 [00:23<04:40, 2.31it/s, loss=0.0843, lr=0.0001]\nSteps: 8%|▊ | 53/700 [00:23<04:40, 2.31it/s, loss=0.163, lr=0.0001] \nSteps: 8%|▊ | 54/700 [00:23<04:40, 2.31it/s, loss=0.163, lr=0.0001]\nSteps: 8%|▊ | 54/700 [00:23<04:40, 2.31it/s, loss=0.202, lr=0.0001]\nSteps: 8%|▊ | 55/700 [00:24<04:40, 2.30it/s, loss=0.202, lr=0.0001]\nSteps: 8%|▊ | 55/700 [00:24<04:40, 2.30it/s, loss=0.178, lr=0.0001]\nSteps: 8%|▊ | 56/700 [00:24<04:39, 2.31it/s, loss=0.178, lr=0.0001]\nSteps: 8%|▊ | 56/700 [00:24<04:39, 2.31it/s, loss=0.215, lr=0.0001]\nSteps: 8%|▊ | 57/700 [00:24<04:38, 2.31it/s, loss=0.215, lr=0.0001]\nSteps: 8%|▊ | 57/700 [00:24<04:38, 2.31it/s, loss=0.0982, lr=0.0001]\nSteps: 8%|▊ | 58/700 [00:25<04:38, 2.31it/s, loss=0.0982, lr=0.0001]\nSteps: 8%|▊ | 58/700 [00:25<04:38, 2.31it/s, loss=0.143, lr=0.0001] \nSteps: 8%|▊ | 59/700 [00:25<04:37, 2.31it/s, loss=0.143, lr=0.0001]\nSteps: 8%|▊ | 59/700 [00:25<04:37, 2.31it/s, loss=0.156, lr=0.0001]\nSteps: 9%|▊ | 60/700 [00:26<04:37, 2.31it/s, loss=0.156, lr=0.0001]\nSteps: 9%|▊ | 60/700 [00:26<04:37, 2.31it/s, loss=0.117, lr=0.0001]\nSteps: 9%|▊ | 61/700 [00:26<04:36, 2.31it/s, loss=0.117, lr=0.0001]\nSteps: 9%|▊ | 61/700 [00:26<04:36, 2.31it/s, loss=0.168, lr=0.0001]\nSteps: 9%|▉ | 62/700 [00:27<04:36, 2.31it/s, loss=0.168, lr=0.0001]\nSteps: 9%|▉ | 62/700 [00:27<04:36, 2.31it/s, loss=0.098, lr=0.0001]\nSteps: 9%|▉ | 63/700 [00:27<04:36, 2.31it/s, loss=0.098, lr=0.0001]\nSteps: 9%|▉ | 63/700 [00:27<04:36, 2.31it/s, loss=0.16, lr=0.0001] \nSteps: 9%|▉ | 64/700 [00:27<04:35, 2.31it/s, loss=0.16, lr=0.0001]\nSteps: 9%|▉ | 64/700 [00:27<04:35, 2.31it/s, loss=0.0913, lr=0.0001]\nSteps: 9%|▉ | 65/700 [00:28<04:36, 2.30it/s, loss=0.0913, lr=0.0001]\nSteps: 9%|▉ | 65/700 [00:28<04:36, 2.30it/s, loss=0.232, lr=0.0001] \nSteps: 9%|▉ | 66/700 [00:28<04:36, 2.29it/s, loss=0.232, lr=0.0001]\nSteps: 9%|▉ | 66/700 [00:28<04:36, 2.29it/s, loss=0.204, lr=0.0001]\nSteps: 10%|▉ | 67/700 [00:29<04:35, 2.30it/s, loss=0.204, lr=0.0001]\nSteps: 10%|▉ | 67/700 [00:29<04:35, 2.30it/s, loss=0.0839, lr=0.0001]\nSteps: 10%|▉ | 68/700 [00:29<04:34, 2.30it/s, loss=0.0839, lr=0.0001]\nSteps: 10%|▉ | 68/700 [00:29<04:34, 2.30it/s, loss=0.163, lr=0.0001] \nSteps: 10%|▉ | 69/700 [00:30<04:33, 2.30it/s, loss=0.163, lr=0.0001]\nSteps: 10%|▉ | 69/700 [00:30<04:33, 2.30it/s, loss=0.117, lr=0.0001]\nSteps: 10%|█ | 70/700 [00:30<04:33, 2.31it/s, loss=0.117, lr=0.0001]\nSteps: 10%|█ | 70/700 [00:30<04:33, 2.31it/s, loss=0.116, lr=0.0001]\nSteps: 10%|█ | 71/700 [00:30<04:32, 2.31it/s, loss=0.116, lr=0.0001]\nSteps: 10%|█ | 71/700 [00:31<04:32, 2.31it/s, loss=0.273, lr=0.0001]\nSteps: 10%|█ | 72/700 [00:31<04:32, 2.31it/s, loss=0.273, lr=0.0001]\nSteps: 10%|█ | 72/700 [00:31<04:32, 2.31it/s, loss=0.2, lr=0.0001] \nSteps: 10%|█ | 73/700 [00:31<04:31, 2.31it/s, loss=0.2, lr=0.0001]\nSteps: 10%|█ | 73/700 [00:31<04:31, 2.31it/s, loss=0.189, lr=0.0001]\nSteps: 11%|█ | 74/700 [00:32<04:31, 2.31it/s, loss=0.189, lr=0.0001]\nSteps: 11%|█ | 74/700 [00:32<04:31, 2.31it/s, loss=0.201, lr=0.0001]\nSteps: 11%|█ | 75/700 [00:32<04:30, 2.31it/s, loss=0.201, lr=0.0001]\nSteps: 11%|█ | 75/700 [00:32<04:30, 2.31it/s, loss=0.13, lr=0.0001] \nSteps: 11%|█ | 76/700 [00:33<04:30, 2.31it/s, loss=0.13, lr=0.0001]\nSteps: 11%|█ | 76/700 [00:33<04:30, 2.31it/s, loss=0.128, lr=0.0001]\nSteps: 11%|█ | 77/700 [00:33<04:29, 2.31it/s, loss=0.128, lr=0.0001]\nSteps: 11%|█ | 77/700 [00:33<04:29, 2.31it/s, loss=0.19, lr=0.0001] \nSteps: 11%|█ | 78/700 [00:34<04:29, 2.31it/s, loss=0.19, lr=0.0001]\nSteps: 11%|█ | 78/700 [00:34<04:29, 2.31it/s, loss=0.117, lr=0.0001]\nSteps: 11%|█▏ | 79/700 [00:34<04:28, 2.31it/s, loss=0.117, lr=0.0001]\nSteps: 11%|█▏ | 79/700 [00:34<04:28, 2.31it/s, loss=0.0576, lr=0.0001]\nSteps: 11%|█▏ | 80/700 [00:34<04:28, 2.31it/s, loss=0.0576, lr=0.0001]\nSteps: 11%|█▏ | 80/700 [00:34<04:28, 2.31it/s, loss=0.0391, lr=0.0001]\nSteps: 12%|█▏ | 81/700 [00:35<04:29, 2.30it/s, loss=0.0391, lr=0.0001]\nSteps: 12%|█▏ | 81/700 [00:35<04:29, 2.30it/s, loss=0.157, lr=0.0001] \nSteps: 12%|█▏ | 82/700 [00:35<04:28, 2.30it/s, loss=0.157, lr=0.0001]\nSteps: 12%|█▏ | 82/700 [00:35<04:28, 2.30it/s, loss=0.0326, lr=0.0001]\nSteps: 12%|█▏ | 83/700 [00:36<04:27, 2.30it/s, loss=0.0326, lr=0.0001]\nSteps: 12%|█▏ | 83/700 [00:36<04:27, 2.30it/s, loss=0.0692, lr=0.0001]\nSteps: 12%|█▏ | 84/700 [00:36<04:27, 2.30it/s, loss=0.0692, lr=0.0001]\nSteps: 12%|█▏ | 84/700 [00:36<04:27, 2.30it/s, loss=0.175, lr=0.0001] \nSteps: 12%|█▏ | 85/700 [00:37<04:26, 2.31it/s, loss=0.175, lr=0.0001]\nSteps: 12%|█▏ | 85/700 [00:37<04:26, 2.31it/s, loss=0.134, lr=0.0001]\nSteps: 12%|█▏ | 86/700 [00:37<04:26, 2.31it/s, loss=0.134, lr=0.0001]\nSteps: 12%|█▏ | 86/700 [00:37<04:26, 2.31it/s, loss=0.137, lr=0.0001]\nSteps: 12%|█▏ | 87/700 [00:37<04:26, 2.30it/s, loss=0.137, lr=0.0001]\nSteps: 12%|█▏ | 87/700 [00:37<04:26, 2.30it/s, loss=0.0814, lr=0.0001]\nSteps: 13%|█▎ | 88/700 [00:38<04:25, 2.30it/s, loss=0.0814, lr=0.0001]\nSteps: 13%|█▎ | 88/700 [00:38<04:25, 2.30it/s, loss=0.29, lr=0.0001] \nSteps: 13%|█▎ | 89/700 [00:38<04:25, 2.31it/s, loss=0.29, lr=0.0001]\nSteps: 13%|█▎ | 89/700 [00:38<04:25, 2.31it/s, loss=0.122, lr=0.0001]\nSteps: 13%|█▎ | 90/700 [00:39<04:24, 2.31it/s, loss=0.122, lr=0.0001]\nSteps: 13%|█▎ | 90/700 [00:39<04:24, 2.31it/s, loss=0.0188, lr=0.0001]\nSteps: 13%|█▎ | 91/700 [00:39<04:24, 2.31it/s, loss=0.0188, lr=0.0001]\nSteps: 13%|█▎ | 91/700 [00:39<04:24, 2.31it/s, loss=0.146, lr=0.0001] \nSteps: 13%|█▎ | 92/700 [00:40<04:23, 2.31it/s, loss=0.146, lr=0.0001]\nSteps: 13%|█▎ | 92/700 [00:40<04:23, 2.31it/s, loss=0.0699, lr=0.0001]\nSteps: 13%|█▎ | 93/700 [00:40<04:22, 2.31it/s, loss=0.0699, lr=0.0001]\nSteps: 13%|█▎ | 93/700 [00:40<04:22, 2.31it/s, loss=0.0927, lr=0.0001]\nSteps: 13%|█▎ | 94/700 [00:40<04:22, 2.31it/s, loss=0.0927, lr=0.0001]\nSteps: 13%|█▎ | 94/700 [00:40<04:22, 2.31it/s, loss=0.147, lr=0.0001] \nSteps: 14%|█▎ | 95/700 [00:41<04:21, 2.31it/s, loss=0.147, lr=0.0001]\nSteps: 14%|█▎ | 95/700 [00:41<04:21, 2.31it/s, loss=0.0597, lr=0.0001]\nSteps: 14%|█▎ | 96/700 [00:41<04:21, 2.31it/s, loss=0.0597, lr=0.0001]\nSteps: 14%|█▎ | 96/700 [00:41<04:21, 2.31it/s, loss=0.107, lr=0.0001] \nSteps: 14%|█▍ | 97/700 [00:42<04:22, 2.30it/s, loss=0.107, lr=0.0001]\nSteps: 14%|█▍ | 97/700 [00:42<04:22, 2.30it/s, loss=0.103, lr=0.0001]\nSteps: 14%|█▍ | 98/700 [00:42<04:21, 2.30it/s, loss=0.103, lr=0.0001]\nSteps: 14%|█▍ | 98/700 [00:42<04:21, 2.30it/s, loss=0.127, lr=0.0001]\nSteps: 14%|█▍ | 99/700 [00:43<04:21, 2.30it/s, loss=0.127, lr=0.0001]\nSteps: 14%|█▍ | 99/700 [00:43<04:21, 2.30it/s, loss=0.0597, lr=0.0001]\nSteps: 14%|█▍ | 100/700 [00:43<04:21, 2.30it/s, loss=0.0597, lr=0.0001]\nSteps: 14%|█▍ | 100/700 [00:43<04:21, 2.30it/s, loss=0.0843, lr=0.0001]\nSteps: 14%|█▍ | 101/700 [00:44<04:20, 2.30it/s, loss=0.0843, lr=0.0001]\nSteps: 14%|█▍ | 101/700 [00:44<04:20, 2.30it/s, loss=0.0791, lr=0.0001]\nSteps: 15%|█▍ | 102/700 [00:44<04:19, 2.30it/s, loss=0.0791, lr=0.0001]\nSteps: 15%|█▍ | 102/700 [00:44<04:19, 2.30it/s, loss=0.0923, lr=0.0001]\nSteps: 15%|█▍ | 103/700 [00:44<04:19, 2.30it/s, loss=0.0923, lr=0.0001]\nSteps: 15%|█▍ | 103/700 [00:44<04:19, 2.30it/s, loss=0.159, lr=0.0001] \nSteps: 15%|█▍ | 104/700 [00:45<04:18, 2.30it/s, loss=0.159, lr=0.0001]\nSteps: 15%|█▍ | 104/700 [00:45<04:18, 2.30it/s, loss=0.304, lr=0.0001]\nSteps: 15%|█▌ | 105/700 [00:45<04:18, 2.30it/s, loss=0.304, lr=0.0001]\nSteps: 15%|█▌ | 105/700 [00:45<04:18, 2.30it/s, loss=0.0677, lr=0.0001]\nSteps: 15%|█▌ | 106/700 [00:46<04:17, 2.31it/s, loss=0.0677, lr=0.0001]\nSteps: 15%|█▌ | 106/700 [00:46<04:17, 2.31it/s, loss=0.102, lr=0.0001] \nSteps: 15%|█▌ | 107/700 [00:46<04:17, 2.31it/s, loss=0.102, lr=0.0001]\nSteps: 15%|█▌ | 107/700 [00:46<04:17, 2.31it/s, loss=0.129, lr=0.0001]\nSteps: 15%|█▌ | 108/700 [00:47<04:16, 2.31it/s, loss=0.129, lr=0.0001]\nSteps: 15%|█▌ | 108/700 [00:47<04:16, 2.31it/s, loss=0.131, lr=0.0001]\nSteps: 16%|█▌ | 109/700 [00:47<04:16, 2.31it/s, loss=0.131, lr=0.0001]\nSteps: 16%|█▌ | 109/700 [00:47<04:16, 2.31it/s, loss=0.0958, lr=0.0001]\nSteps: 16%|█▌ | 110/700 [00:47<04:15, 2.31it/s, loss=0.0958, lr=0.0001]\nSteps: 16%|█▌ | 110/700 [00:47<04:15, 2.31it/s, loss=0.244, lr=0.0001] \nSteps: 16%|█▌ | 111/700 [00:48<04:15, 2.31it/s, loss=0.244, lr=0.0001]\nSteps: 16%|█▌ | 111/700 [00:48<04:15, 2.31it/s, loss=0.278, lr=0.0001]\nSteps: 16%|█▌ | 112/700 [00:48<04:14, 2.31it/s, loss=0.278, lr=0.0001]\nSteps: 16%|█▌ | 112/700 [00:48<04:14, 2.31it/s, loss=0.1, lr=0.0001] \nSteps: 16%|█▌ | 113/700 [00:49<04:15, 2.30it/s, loss=0.1, lr=0.0001]\nSteps: 16%|█▌ | 113/700 [00:49<04:15, 2.30it/s, loss=0.133, lr=0.0001]\nSteps: 16%|█▋ | 114/700 [00:49<04:14, 2.30it/s, loss=0.133, lr=0.0001]\nSteps: 16%|█▋ | 114/700 [00:49<04:14, 2.30it/s, loss=0.253, lr=0.0001]\nSteps: 16%|█▋ | 115/700 [00:50<04:14, 2.30it/s, loss=0.253, lr=0.0001]\nSteps: 16%|█▋ | 115/700 [00:50<04:14, 2.30it/s, loss=0.114, lr=0.0001]\nSteps: 17%|█▋ | 116/700 [00:50<04:13, 2.30it/s, loss=0.114, lr=0.0001]\nSteps: 17%|█▋ | 116/700 [00:50<04:13, 2.30it/s, loss=0.154, lr=0.0001]\nSteps: 17%|█▋ | 117/700 [00:50<04:14, 2.29it/s, loss=0.154, lr=0.0001]\nSteps: 17%|█▋ | 117/700 [00:50<04:14, 2.29it/s, loss=0.202, lr=0.0001]\nSteps: 17%|█▋ | 118/700 [00:51<04:14, 2.29it/s, loss=0.202, lr=0.0001]\nSteps: 17%|█▋ | 118/700 [00:51<04:14, 2.29it/s, loss=0.0992, lr=0.0001]\nSteps: 17%|█▋ | 119/700 [00:51<04:13, 2.29it/s, loss=0.0992, lr=0.0001]\nSteps: 17%|█▋ | 119/700 [00:51<04:13, 2.29it/s, loss=0.166, lr=0.0001] \nSteps: 17%|█▋ | 120/700 [00:52<04:12, 2.30it/s, loss=0.166, lr=0.0001]\nSteps: 17%|█▋ | 120/700 [00:52<04:12, 2.30it/s, loss=0.124, lr=0.0001]\nSteps: 17%|█▋ | 121/700 [00:52<04:11, 2.30it/s, loss=0.124, lr=0.0001]\nSteps: 17%|█▋ | 121/700 [00:52<04:11, 2.30it/s, loss=0.0382, lr=0.0001]\nSteps: 17%|█▋ | 122/700 [00:53<04:11, 2.29it/s, loss=0.0382, lr=0.0001]\nSteps: 17%|█▋ | 122/700 [00:53<04:11, 2.29it/s, loss=0.0882, lr=0.0001]\nSteps: 18%|█▊ | 123/700 [00:53<04:11, 2.30it/s, loss=0.0882, lr=0.0001]\nSteps: 18%|█▊ | 123/700 [00:53<04:11, 2.30it/s, loss=0.0856, lr=0.0001]\nSteps: 18%|█▊ | 124/700 [00:54<04:10, 2.30it/s, loss=0.0856, lr=0.0001]\nSteps: 18%|█▊ | 124/700 [00:54<04:10, 2.30it/s, loss=0.145, lr=0.0001] \nSteps: 18%|█▊ | 125/700 [00:54<04:10, 2.29it/s, loss=0.145, lr=0.0001]\nSteps: 18%|█▊ | 125/700 [00:54<04:10, 2.29it/s, loss=0.14, lr=0.0001] \nSteps: 18%|█▊ | 126/700 [00:54<04:09, 2.30it/s, loss=0.14, lr=0.0001]\nSteps: 18%|█▊ | 126/700 [00:54<04:09, 2.30it/s, loss=0.194, lr=0.0001]\nSteps: 18%|█▊ | 127/700 [00:55<04:08, 2.31it/s, loss=0.194, lr=0.0001]\nSteps: 18%|█▊ | 127/700 [00:55<04:08, 2.31it/s, loss=0.101, lr=0.0001]\nSteps: 18%|█▊ | 128/700 [00:55<04:07, 2.31it/s, loss=0.101, lr=0.0001]\nSteps: 18%|█▊ | 128/700 [00:55<04:07, 2.31it/s, loss=0.106, lr=0.0001]\nSteps: 18%|█▊ | 129/700 [00:56<04:08, 2.30it/s, loss=0.106, lr=0.0001]\nSteps: 18%|█▊ | 129/700 [00:56<04:08, 2.30it/s, loss=0.138, lr=0.0001]\nSteps: 19%|█▊ | 130/700 [00:56<04:07, 2.30it/s, loss=0.138, lr=0.0001]\nSteps: 19%|█▊ | 130/700 [00:56<04:07, 2.30it/s, loss=0.229, lr=0.0001]\nSteps: 19%|█▊ | 131/700 [00:57<04:07, 2.30it/s, loss=0.229, lr=0.0001]\nSteps: 19%|█▊ | 131/700 [00:57<04:07, 2.30it/s, loss=0.125, lr=0.0001]\nSteps: 19%|█▉ | 132/700 [00:57<04:06, 2.30it/s, loss=0.125, lr=0.0001]\nSteps: 19%|█▉ | 132/700 [00:57<04:06, 2.30it/s, loss=0.251, lr=0.0001]\nSteps: 19%|█▉ | 133/700 [00:57<04:06, 2.30it/s, loss=0.251, lr=0.0001]\nSteps: 19%|█▉ | 133/700 [00:57<04:06, 2.30it/s, loss=0.111, lr=0.0001]\nSteps: 19%|█▉ | 134/700 [00:58<04:05, 2.30it/s, loss=0.111, lr=0.0001]\nSteps: 19%|█▉ | 134/700 [00:58<04:05, 2.30it/s, loss=0.0731, lr=0.0001]\nSteps: 19%|█▉ | 135/700 [00:58<04:05, 2.30it/s, loss=0.0731, lr=0.0001]\nSteps: 19%|█▉ | 135/700 [00:58<04:05, 2.30it/s, loss=0.146, lr=0.0001] \nSteps: 19%|█▉ | 136/700 [00:59<04:05, 2.30it/s, loss=0.146, lr=0.0001]\nSteps: 19%|█▉ | 136/700 [00:59<04:05, 2.30it/s, loss=0.0851, lr=0.0001]\nSteps: 20%|█▉ | 137/700 [00:59<04:04, 2.30it/s, loss=0.0851, lr=0.0001]\nSteps: 20%|█▉ | 137/700 [00:59<04:04, 2.30it/s, loss=0.245, lr=0.0001] \nSteps: 20%|█▉ | 138/700 [01:00<04:03, 2.31it/s, loss=0.245, lr=0.0001]\nSteps: 20%|█▉ | 138/700 [01:00<04:03, 2.31it/s, loss=0.113, lr=0.0001]\nSteps: 20%|█▉ | 139/700 [01:00<04:03, 2.30it/s, loss=0.113, lr=0.0001]\nSteps: 20%|█▉ | 139/700 [01:00<04:03, 2.30it/s, loss=0.158, lr=0.0001]\nSteps: 20%|██ | 140/700 [01:00<04:02, 2.31it/s, loss=0.158, lr=0.0001]\nSteps: 20%|██ | 140/700 [01:00<04:02, 2.31it/s, loss=0.0694, lr=0.0001]\nSteps: 20%|██ | 141/700 [01:01<04:02, 2.31it/s, loss=0.0694, lr=0.0001]\nSteps: 20%|██ | 141/700 [01:01<04:02, 2.31it/s, loss=0.0592, lr=0.0001]\nSteps: 20%|██ | 142/700 [01:01<04:02, 2.31it/s, loss=0.0592, lr=0.0001]\nSteps: 20%|██ | 142/700 [01:01<04:02, 2.31it/s, loss=0.0842, lr=0.0001]\nSteps: 20%|██ | 143/700 [01:02<04:01, 2.31it/s, loss=0.0842, lr=0.0001]\nSteps: 20%|██ | 143/700 [01:02<04:01, 2.31it/s, loss=0.286, lr=0.0001] \nSteps: 21%|██ | 144/700 [01:02<04:00, 2.31it/s, loss=0.286, lr=0.0001]\nSteps: 21%|██ | 144/700 [01:02<04:00, 2.31it/s, loss=0.153, lr=0.0001]\nSteps: 21%|██ | 145/700 [01:03<04:01, 2.30it/s, loss=0.153, lr=0.0001]\nSteps: 21%|██ | 145/700 [01:03<04:01, 2.30it/s, loss=0.128, lr=0.0001]\nSteps: 21%|██ | 146/700 [01:03<04:00, 2.30it/s, loss=0.128, lr=0.0001]\nSteps: 21%|██ | 146/700 [01:03<04:00, 2.30it/s, loss=0.135, lr=0.0001]\nSteps: 21%|██ | 147/700 [01:03<03:59, 2.30it/s, loss=0.135, lr=0.0001]\nSteps: 21%|██ | 147/700 [01:04<03:59, 2.30it/s, loss=0.133, lr=0.0001]\nSteps: 21%|██ | 148/700 [01:04<03:59, 2.31it/s, loss=0.133, lr=0.0001]\nSteps: 21%|██ | 148/700 [01:04<03:59, 2.31it/s, loss=0.139, lr=0.0001]\nSteps: 21%|██▏ | 149/700 [01:04<03:58, 2.31it/s, loss=0.139, lr=0.0001]\nSteps: 21%|██▏ | 149/700 [01:04<03:58, 2.31it/s, loss=0.0741, lr=0.0001]\nSteps: 21%|██▏ | 150/700 [01:05<03:58, 2.31it/s, loss=0.0741, lr=0.0001]\nSteps: 21%|██▏ | 150/700 [01:05<03:58, 2.31it/s, loss=0.26, lr=0.0001] \nSteps: 22%|██▏ | 151/700 [01:05<03:57, 2.31it/s, loss=0.26, lr=0.0001]\nSteps: 22%|██▏ | 151/700 [01:05<03:57, 2.31it/s, loss=0.14, lr=0.0001]\nSteps: 22%|██▏ | 152/700 [01:06<03:57, 2.31it/s, loss=0.14, lr=0.0001]\nSteps: 22%|██▏ | 152/700 [01:06<03:57, 2.31it/s, loss=0.118, lr=0.0001]\nSteps: 22%|██▏ | 153/700 [01:06<03:56, 2.31it/s, loss=0.118, lr=0.0001]\nSteps: 22%|██▏ | 153/700 [01:06<03:56, 2.31it/s, loss=0.119, lr=0.0001]\nSteps: 22%|██▏ | 154/700 [01:07<03:56, 2.31it/s, loss=0.119, lr=0.0001]\nSteps: 22%|██▏ | 154/700 [01:07<03:56, 2.31it/s, loss=0.0301, lr=0.0001]\nSteps: 22%|██▏ | 155/700 [01:07<03:55, 2.31it/s, loss=0.0301, lr=0.0001]\nSteps: 22%|██▏ | 155/700 [01:07<03:55, 2.31it/s, loss=0.147, lr=0.0001] \nSteps: 22%|██▏ | 156/700 [01:07<03:55, 2.31it/s, loss=0.147, lr=0.0001]\nSteps: 22%|██▏ | 156/700 [01:07<03:55, 2.31it/s, loss=0.246, lr=0.0001]\nSteps: 22%|██▏ | 157/700 [01:08<03:55, 2.31it/s, loss=0.246, lr=0.0001]\nSteps: 22%|██▏ | 157/700 [01:08<03:55, 2.31it/s, loss=0.281, lr=0.0001]\nSteps: 23%|██▎ | 158/700 [01:08<03:54, 2.31it/s, loss=0.281, lr=0.0001]\nSteps: 23%|██▎ | 158/700 [01:08<03:54, 2.31it/s, loss=0.114, lr=0.0001]\nSteps: 23%|██▎ | 159/700 [01:09<03:54, 2.31it/s, loss=0.114, lr=0.0001]\nSteps: 23%|██▎ | 159/700 [01:09<03:54, 2.31it/s, loss=0.0437, lr=0.0001]\nSteps: 23%|██▎ | 160/700 [01:09<03:53, 2.31it/s, loss=0.0437, lr=0.0001]\nSteps: 23%|██▎ | 160/700 [01:09<03:53, 2.31it/s, loss=0.0781, lr=0.0001]\nSteps: 23%|██▎ | 161/700 [01:10<03:54, 2.30it/s, loss=0.0781, lr=0.0001]\nSteps: 23%|██▎ | 161/700 [01:10<03:54, 2.30it/s, loss=0.0544, lr=0.0001]\nSteps: 23%|██▎ | 162/700 [01:10<03:53, 2.30it/s, loss=0.0544, lr=0.0001]\nSteps: 23%|██▎ | 162/700 [01:10<03:53, 2.30it/s, loss=0.199, lr=0.0001] \nSteps: 23%|██▎ | 163/700 [01:10<03:53, 2.30it/s, loss=0.199, lr=0.0001]\nSteps: 23%|██▎ | 163/700 [01:10<03:53, 2.30it/s, loss=0.164, lr=0.0001]\nSteps: 23%|██▎ | 164/700 [01:11<03:52, 2.31it/s, loss=0.164, lr=0.0001]\nSteps: 23%|██▎ | 164/700 [01:11<03:52, 2.31it/s, loss=0.0932, lr=0.0001]\nSteps: 24%|██▎ | 165/700 [01:11<03:51, 2.31it/s, loss=0.0932, lr=0.0001]\nSteps: 24%|██▎ | 165/700 [01:11<03:51, 2.31it/s, loss=0.116, lr=0.0001] \nSteps: 24%|██▎ | 166/700 [01:12<03:51, 2.31it/s, loss=0.116, lr=0.0001]\nSteps: 24%|██▎ | 166/700 [01:12<03:51, 2.31it/s, loss=0.0942, lr=0.0001]\nSteps: 24%|██▍ | 167/700 [01:12<03:50, 2.31it/s, loss=0.0942, lr=0.0001]\nSteps: 24%|██▍ | 167/700 [01:12<03:50, 2.31it/s, loss=0.105, lr=0.0001] \nSteps: 24%|██▍ | 168/700 [01:13<03:50, 2.31it/s, loss=0.105, lr=0.0001]\nSteps: 24%|██▍ | 168/700 [01:13<03:50, 2.31it/s, loss=0.141, lr=0.0001]\nSteps: 24%|██▍ | 169/700 [01:13<03:50, 2.31it/s, loss=0.141, lr=0.0001]\nSteps: 24%|██▍ | 169/700 [01:13<03:50, 2.31it/s, loss=0.146, lr=0.0001]\nSteps: 24%|██▍ | 170/700 [01:13<03:49, 2.31it/s, loss=0.146, lr=0.0001]\nSteps: 24%|██▍ | 170/700 [01:13<03:49, 2.31it/s, loss=0.0638, lr=0.0001]\nSteps: 24%|██▍ | 171/700 [01:14<03:49, 2.31it/s, loss=0.0638, lr=0.0001]\nSteps: 24%|██▍ | 171/700 [01:14<03:49, 2.31it/s, loss=0.16, lr=0.0001] \nSteps: 25%|██▍ | 172/700 [01:14<03:48, 2.31it/s, loss=0.16, lr=0.0001]\nSteps: 25%|██▍ | 172/700 [01:14<03:48, 2.31it/s, loss=0.215, lr=0.0001]\nSteps: 25%|██▍ | 173/700 [01:15<03:48, 2.31it/s, loss=0.215, lr=0.0001]\nSteps: 25%|██▍ | 173/700 [01:15<03:48, 2.31it/s, loss=0.21, lr=0.0001] \nSteps: 25%|██▍ | 174/700 [01:15<03:47, 2.31it/s, loss=0.21, lr=0.0001]\nSteps: 25%|██▍ | 174/700 [01:15<03:47, 2.31it/s, loss=0.174, lr=0.0001]\nSteps: 25%|██▌ | 175/700 [01:16<03:47, 2.31it/s, loss=0.174, lr=0.0001]\nSteps: 25%|██▌ | 175/700 [01:16<03:47, 2.31it/s, loss=0.117, lr=0.0001]\nSteps: 25%|██▌ | 176/700 [01:16<03:46, 2.31it/s, loss=0.117, lr=0.0001]\nSteps: 25%|██▌ | 176/700 [01:16<03:46, 2.31it/s, loss=0.169, lr=0.0001]\nSteps: 25%|██▌ | 177/700 [01:16<03:47, 2.30it/s, loss=0.169, lr=0.0001]\nSteps: 25%|██▌ | 177/700 [01:17<03:47, 2.30it/s, loss=0.0948, lr=0.0001]\nSteps: 25%|██▌ | 178/700 [01:17<03:46, 2.30it/s, loss=0.0948, lr=0.0001]\nSteps: 25%|██▌ | 178/700 [01:17<03:46, 2.30it/s, loss=0.275, lr=0.0001] \nSteps: 26%|██▌ | 179/700 [01:17<03:46, 2.30it/s, loss=0.275, lr=0.0001]\nSteps: 26%|██▌ | 179/700 [01:17<03:46, 2.30it/s, loss=0.109, lr=0.0001]\nSteps: 26%|██▌ | 180/700 [01:18<03:45, 2.31it/s, loss=0.109, lr=0.0001]\nSteps: 26%|██▌ | 180/700 [01:18<03:45, 2.31it/s, loss=0.0641, lr=0.0001]\nSteps: 26%|██▌ | 181/700 [01:18<03:45, 2.30it/s, loss=0.0641, lr=0.0001]\nSteps: 26%|██▌ | 181/700 [01:18<03:45, 2.30it/s, loss=0.245, lr=0.0001] \nSteps: 26%|██▌ | 182/700 [01:19<03:44, 2.31it/s, loss=0.245, lr=0.0001]\nSteps: 26%|██▌ | 182/700 [01:19<03:44, 2.31it/s, loss=0.133, lr=0.0001]\nSteps: 26%|██▌ | 183/700 [01:19<03:44, 2.31it/s, loss=0.133, lr=0.0001]\nSteps: 26%|██▌ | 183/700 [01:19<03:44, 2.31it/s, loss=0.0986, lr=0.0001]\nSteps: 26%|██▋ | 184/700 [01:20<03:43, 2.30it/s, loss=0.0986, lr=0.0001]\nSteps: 26%|██▋ | 184/700 [01:20<03:43, 2.30it/s, loss=0.152, lr=0.0001] \nSteps: 26%|██▋ | 185/700 [01:20<03:43, 2.31it/s, loss=0.152, lr=0.0001]\nSteps: 26%|██▋ | 185/700 [01:20<03:43, 2.31it/s, loss=0.136, lr=0.0001]\nSteps: 27%|██▋ | 186/700 [01:20<03:42, 2.31it/s, loss=0.136, lr=0.0001]\nSteps: 27%|██▋ | 186/700 [01:20<03:42, 2.31it/s, loss=0.172, lr=0.0001]\nSteps: 27%|██▋ | 187/700 [01:21<03:42, 2.31it/s, loss=0.172, lr=0.0001]\nSteps: 27%|██▋ | 187/700 [01:21<03:42, 2.31it/s, loss=0.31, lr=0.0001] \nSteps: 27%|██▋ | 188/700 [01:21<03:42, 2.30it/s, loss=0.31, lr=0.0001]\nSteps: 27%|██▋ | 188/700 [01:21<03:42, 2.30it/s, loss=0.124, lr=0.0001]\nSteps: 27%|██▋ | 189/700 [01:22<03:41, 2.30it/s, loss=0.124, lr=0.0001]\nSteps: 27%|██▋ | 189/700 [01:22<03:41, 2.30it/s, loss=0.049, lr=0.0001]\nSteps: 27%|██▋ | 190/700 [01:22<03:41, 2.30it/s, loss=0.049, lr=0.0001]\nSteps: 27%|██▋ | 190/700 [01:22<03:41, 2.30it/s, loss=0.0852, lr=0.0001]\nSteps: 27%|██▋ | 191/700 [01:23<03:41, 2.30it/s, loss=0.0852, lr=0.0001]\nSteps: 27%|██▋ | 191/700 [01:23<03:41, 2.30it/s, loss=0.0649, lr=0.0001]\nSteps: 27%|██▋ | 192/700 [01:23<03:40, 2.31it/s, loss=0.0649, lr=0.0001]\nSteps: 27%|██▋ | 192/700 [01:23<03:40, 2.31it/s, loss=0.0476, lr=0.0001]\nSteps: 28%|██▊ | 193/700 [01:23<03:41, 2.29it/s, loss=0.0476, lr=0.0001]\nSteps: 28%|██▊ | 193/700 [01:23<03:41, 2.29it/s, loss=0.0807, lr=0.0001]\nSteps: 28%|██▊ | 194/700 [01:24<03:40, 2.29it/s, loss=0.0807, lr=0.0001]\nSteps: 28%|██▊ | 194/700 [01:24<03:40, 2.29it/s, loss=0.207, lr=0.0001] \nSteps: 28%|██▊ | 195/700 [01:24<03:39, 2.30it/s, loss=0.207, lr=0.0001]\nSteps: 28%|██▊ | 195/700 [01:24<03:39, 2.30it/s, loss=0.153, lr=0.0001]\nSteps: 28%|██▊ | 196/700 [01:25<03:38, 2.30it/s, loss=0.153, lr=0.0001]\nSteps: 28%|██▊ | 196/700 [01:25<03:38, 2.30it/s, loss=0.0468, lr=0.0001]\nSteps: 28%|██▊ | 197/700 [01:25<03:38, 2.31it/s, loss=0.0468, lr=0.0001]\nSteps: 28%|██▊ | 197/700 [01:25<03:38, 2.31it/s, loss=0.194, lr=0.0001] \nSteps: 28%|██▊ | 198/700 [01:26<03:37, 2.31it/s, loss=0.194, lr=0.0001]\nSteps: 28%|██▊ | 198/700 [01:26<03:37, 2.31it/s, loss=0.341, lr=0.0001]\nSteps: 28%|██▊ | 199/700 [01:26<03:37, 2.31it/s, loss=0.341, lr=0.0001]\nSteps: 28%|██▊ | 199/700 [01:26<03:37, 2.31it/s, loss=0.0981, lr=0.0001]\nSteps: 29%|██▊ | 200/700 [01:26<03:36, 2.31it/s, loss=0.0981, lr=0.0001]\nSteps: 29%|██▊ | 200/700 [01:27<03:36, 2.31it/s, loss=0.193, lr=0.0001] \nSteps: 29%|██▊ | 201/700 [01:27<03:36, 2.30it/s, loss=0.193, lr=0.0001]\nSteps: 29%|██▊ | 201/700 [01:27<03:36, 2.30it/s, loss=0.0917, lr=0.0001]\nSteps: 29%|██▉ | 202/700 [01:27<03:35, 2.31it/s, loss=0.0917, lr=0.0001]\nSteps: 29%|██▉ | 202/700 [01:27<03:35, 2.31it/s, loss=0.149, lr=0.0001] \nSteps: 29%|██▉ | 203/700 [01:28<03:35, 2.31it/s, loss=0.149, lr=0.0001]\nSteps: 29%|██▉ | 203/700 [01:28<03:35, 2.31it/s, loss=0.0842, lr=0.0001]\nSteps: 29%|██▉ | 204/700 [01:28<03:34, 2.31it/s, loss=0.0842, lr=0.0001]\nSteps: 29%|██▉ | 204/700 [01:28<03:34, 2.31it/s, loss=0.27, lr=0.0001] \nSteps: 29%|██▉ | 205/700 [01:29<03:34, 2.31it/s, loss=0.27, lr=0.0001]\nSteps: 29%|██▉ | 205/700 [01:29<03:34, 2.31it/s, loss=0.234, lr=0.0001]\nSteps: 29%|██▉ | 206/700 [01:29<03:34, 2.31it/s, loss=0.234, lr=0.0001]\nSteps: 29%|██▉ | 206/700 [01:29<03:34, 2.31it/s, loss=0.125, lr=0.0001]\nSteps: 30%|██▉ | 207/700 [01:30<03:33, 2.31it/s, loss=0.125, lr=0.0001]\nSteps: 30%|██▉ | 207/700 [01:30<03:33, 2.31it/s, loss=0.0958, lr=0.0001]\nSteps: 30%|██▉ | 208/700 [01:30<03:33, 2.31it/s, loss=0.0958, lr=0.0001]\nSteps: 30%|██▉ | 208/700 [01:30<03:33, 2.31it/s, loss=0.0906, lr=0.0001]\nSteps: 30%|██▉ | 209/700 [01:30<03:33, 2.30it/s, loss=0.0906, lr=0.0001]\nSteps: 30%|██▉ | 209/700 [01:30<03:33, 2.30it/s, loss=0.0941, lr=0.0001]\nSteps: 30%|███ | 210/700 [01:31<03:32, 2.30it/s, loss=0.0941, lr=0.0001]\nSteps: 30%|███ | 210/700 [01:31<03:32, 2.30it/s, loss=0.0909, lr=0.0001]\nSteps: 30%|███ | 211/700 [01:31<03:32, 2.30it/s, loss=0.0909, lr=0.0001]\nSteps: 30%|███ | 211/700 [01:31<03:32, 2.30it/s, loss=0.126, lr=0.0001] \nSteps: 30%|███ | 212/700 [01:32<03:31, 2.30it/s, loss=0.126, lr=0.0001]\nSteps: 30%|███ | 212/700 [01:32<03:31, 2.30it/s, loss=0.148, lr=0.0001]\nSteps: 30%|███ | 213/700 [01:32<03:31, 2.31it/s, loss=0.148, lr=0.0001]\nSteps: 30%|███ | 213/700 [01:32<03:31, 2.31it/s, loss=0.259, lr=0.0001]\nSteps: 31%|███ | 214/700 [01:33<03:30, 2.31it/s, loss=0.259, lr=0.0001]\nSteps: 31%|███ | 214/700 [01:33<03:30, 2.31it/s, loss=0.233, lr=0.0001]\nSteps: 31%|███ | 215/700 [01:33<03:30, 2.31it/s, loss=0.233, lr=0.0001]\nSteps: 31%|███ | 215/700 [01:33<03:30, 2.31it/s, loss=0.0979, lr=0.0001]\nSteps: 31%|███ | 216/700 [01:33<03:29, 2.31it/s, loss=0.0979, lr=0.0001]\nSteps: 31%|███ | 216/700 [01:33<03:29, 2.31it/s, loss=0.167, lr=0.0001] \nSteps: 31%|███ | 217/700 [01:34<03:29, 2.31it/s, loss=0.167, lr=0.0001]\nSteps: 31%|███ | 217/700 [01:34<03:29, 2.31it/s, loss=0.136, lr=0.0001]\nSteps: 31%|███ | 218/700 [01:34<03:28, 2.31it/s, loss=0.136, lr=0.0001]\nSteps: 31%|███ | 218/700 [01:34<03:28, 2.31it/s, loss=0.112, lr=0.0001]\nSteps: 31%|███▏ | 219/700 [01:35<03:28, 2.31it/s, loss=0.112, lr=0.0001]\nSteps: 31%|███▏ | 219/700 [01:35<03:28, 2.31it/s, loss=0.0973, lr=0.0001]\nSteps: 31%|███▏ | 220/700 [01:35<03:27, 2.31it/s, loss=0.0973, lr=0.0001]\nSteps: 31%|███▏ | 220/700 [01:35<03:27, 2.31it/s, loss=0.113, lr=0.0001] \nSteps: 32%|███▏ | 221/700 [01:36<03:27, 2.31it/s, loss=0.113, lr=0.0001]\nSteps: 32%|███▏ | 221/700 [01:36<03:27, 2.31it/s, loss=0.094, lr=0.0001]\nSteps: 32%|███▏ | 222/700 [01:36<03:26, 2.31it/s, loss=0.094, lr=0.0001]\nSteps: 32%|███▏ | 222/700 [01:36<03:26, 2.31it/s, loss=0.141, lr=0.0001]\nSteps: 32%|███▏ | 223/700 [01:36<03:26, 2.31it/s, loss=0.141, lr=0.0001]\nSteps: 32%|███▏ | 223/700 [01:36<03:26, 2.31it/s, loss=0.148, lr=0.0001]\nSteps: 32%|███▏ | 224/700 [01:37<03:25, 2.31it/s, loss=0.148, lr=0.0001]\nSteps: 32%|███▏ | 224/700 [01:37<03:25, 2.31it/s, loss=0.105, lr=0.0001]\nSteps: 32%|███▏ | 225/700 [01:37<03:26, 2.30it/s, loss=0.105, lr=0.0001]\nSteps: 32%|███▏ | 225/700 [01:37<03:26, 2.30it/s, loss=0.255, lr=0.0001]\nSteps: 32%|███▏ | 226/700 [01:38<03:25, 2.30it/s, loss=0.255, lr=0.0001]\nSteps: 32%|███▏ | 226/700 [01:38<03:25, 2.30it/s, loss=0.189, lr=0.0001]\nSteps: 32%|███▏ | 227/700 [01:38<03:25, 2.30it/s, loss=0.189, lr=0.0001]\nSteps: 32%|███▏ | 227/700 [01:38<03:25, 2.30it/s, loss=0.117, lr=0.0001]\nSteps: 33%|███▎ | 228/700 [01:39<03:24, 2.31it/s, loss=0.117, lr=0.0001]\nSteps: 33%|███▎ | 228/700 [01:39<03:24, 2.31it/s, loss=0.0894, lr=0.0001]\nSteps: 33%|███▎ | 229/700 [01:39<03:24, 2.31it/s, loss=0.0894, lr=0.0001]\nSteps: 33%|███▎ | 229/700 [01:39<03:24, 2.31it/s, loss=0.107, lr=0.0001] \nSteps: 33%|███▎ | 230/700 [01:39<03:23, 2.31it/s, loss=0.107, lr=0.0001]\nSteps: 33%|███▎ | 230/700 [01:40<03:23, 2.31it/s, loss=0.0873, lr=0.0001]\nSteps: 33%|███▎ | 231/700 [01:40<03:23, 2.31it/s, loss=0.0873, lr=0.0001]\nSteps: 33%|███▎ | 231/700 [01:40<03:23, 2.31it/s, loss=0.0671, lr=0.0001]\nSteps: 33%|███▎ | 232/700 [01:40<03:22, 2.31it/s, loss=0.0671, lr=0.0001]\nSteps: 33%|███▎ | 232/700 [01:40<03:22, 2.31it/s, loss=0.094, lr=0.0001] \nSteps: 33%|███▎ | 233/700 [01:41<03:22, 2.31it/s, loss=0.094, lr=0.0001]\nSteps: 33%|███▎ | 233/700 [01:41<03:22, 2.31it/s, loss=0.124, lr=0.0001]\nSteps: 33%|███▎ | 234/700 [01:41<03:21, 2.31it/s, loss=0.124, lr=0.0001]\nSteps: 33%|███▎ | 234/700 [01:41<03:21, 2.31it/s, loss=0.0847, lr=0.0001]\nSteps: 34%|███▎ | 235/700 [01:42<03:21, 2.31it/s, loss=0.0847, lr=0.0001]\nSteps: 34%|███▎ | 235/700 [01:42<03:21, 2.31it/s, loss=0.236, lr=0.0001] \nSteps: 34%|███▎ | 236/700 [01:42<03:20, 2.31it/s, loss=0.236, lr=0.0001]\nSteps: 34%|███▎ | 236/700 [01:42<03:20, 2.31it/s, loss=0.0215, lr=0.0001]\nSteps: 34%|███▍ | 237/700 [01:43<03:20, 2.31it/s, loss=0.0215, lr=0.0001]\nSteps: 34%|███▍ | 237/700 [01:43<03:20, 2.31it/s, loss=0.0918, lr=0.0001]\nSteps: 34%|███▍ | 238/700 [01:43<03:19, 2.31it/s, loss=0.0918, lr=0.0001]\nSteps: 34%|███▍ | 238/700 [01:43<03:19, 2.31it/s, loss=0.152, lr=0.0001] \nSteps: 34%|███▍ | 239/700 [01:43<03:19, 2.31it/s, loss=0.152, lr=0.0001]\nSteps: 34%|███▍ | 239/700 [01:43<03:19, 2.31it/s, loss=0.0908, lr=0.0001]\nSteps: 34%|███▍ | 240/700 [01:44<03:18, 2.31it/s, loss=0.0908, lr=0.0001]\nSteps: 34%|███▍ | 240/700 [01:44<03:18, 2.31it/s, loss=0.0664, lr=0.0001]\nSteps: 34%|███▍ | 241/700 [01:44<03:19, 2.30it/s, loss=0.0664, lr=0.0001]\nSteps: 34%|███▍ | 241/700 [01:44<03:19, 2.30it/s, loss=0.0761, lr=0.0001]\nSteps: 35%|███▍ | 242/700 [01:45<03:18, 2.30it/s, loss=0.0761, lr=0.0001]\nSteps: 35%|███▍ | 242/700 [01:45<03:18, 2.30it/s, loss=0.0773, lr=0.0001]\nSteps: 35%|███▍ | 243/700 [01:45<03:18, 2.31it/s, loss=0.0773, lr=0.0001]\nSteps: 35%|███▍ | 243/700 [01:45<03:18, 2.31it/s, loss=0.127, lr=0.0001] \nSteps: 35%|███▍ | 244/700 [01:46<03:17, 2.31it/s, loss=0.127, lr=0.0001]\nSteps: 35%|███▍ | 244/700 [01:46<03:17, 2.31it/s, loss=0.16, lr=0.0001] \nSteps: 35%|███▌ | 245/700 [01:46<03:17, 2.31it/s, loss=0.16, lr=0.0001]\nSteps: 35%|███▌ | 245/700 [01:46<03:17, 2.31it/s, loss=0.0749, lr=0.0001]\nSteps: 35%|███▌ | 246/700 [01:46<03:16, 2.31it/s, loss=0.0749, lr=0.0001]\nSteps: 35%|███▌ | 246/700 [01:46<03:16, 2.31it/s, loss=0.143, lr=0.0001] \nSteps: 35%|███▌ | 247/700 [01:47<03:16, 2.31it/s, loss=0.143, lr=0.0001]\nSteps: 35%|███▌ | 247/700 [01:47<03:16, 2.31it/s, loss=0.221, lr=0.0001]\nSteps: 35%|███▌ | 248/700 [01:47<03:15, 2.31it/s, loss=0.221, lr=0.0001]\nSteps: 35%|███▌ | 248/700 [01:47<03:15, 2.31it/s, loss=0.0879, lr=0.0001]\nSteps: 36%|███▌ | 249/700 [01:48<03:15, 2.31it/s, loss=0.0879, lr=0.0001]\nSteps: 36%|███▌ | 249/700 [01:48<03:15, 2.31it/s, loss=0.0838, lr=0.0001]\nSteps: 36%|███▌ | 250/700 [01:48<03:14, 2.31it/s, loss=0.0838, lr=0.0001]\nSteps: 36%|███▌ | 250/700 [01:48<03:14, 2.31it/s, loss=0.166, lr=0.0001] \nSteps: 36%|███▌ | 251/700 [01:49<03:14, 2.31it/s, loss=0.166, lr=0.0001]\nSteps: 36%|███▌ | 251/700 [01:49<03:14, 2.31it/s, loss=0.156, lr=0.0001]\nSteps: 36%|███▌ | 252/700 [01:49<03:13, 2.31it/s, loss=0.156, lr=0.0001]\nSteps: 36%|███▌ | 252/700 [01:49<03:13, 2.31it/s, loss=0.256, lr=0.0001]\nSteps: 36%|███▌ | 253/700 [01:49<03:13, 2.31it/s, loss=0.256, lr=0.0001]\nSteps: 36%|███▌ | 253/700 [01:49<03:13, 2.31it/s, loss=0.044, lr=0.0001]\nSteps: 36%|███▋ | 254/700 [01:50<03:12, 2.31it/s, loss=0.044, lr=0.0001]\nSteps: 36%|███▋ | 254/700 [01:50<03:12, 2.31it/s, loss=0.182, lr=0.0001]\nSteps: 36%|███▋ | 255/700 [01:50<03:12, 2.31it/s, loss=0.182, lr=0.0001]\nSteps: 36%|███▋ | 255/700 [01:50<03:12, 2.31it/s, loss=0.102, lr=0.0001]\nSteps: 37%|███▋ | 256/700 [01:51<03:12, 2.31it/s, loss=0.102, lr=0.0001]\nSteps: 37%|███▋ | 256/700 [01:51<03:12, 2.31it/s, loss=0.151, lr=0.0001]\nSteps: 37%|███▋ | 257/700 [01:51<03:12, 2.30it/s, loss=0.151, lr=0.0001]\nSteps: 37%|███▋ | 257/700 [01:51<03:12, 2.30it/s, loss=0.0976, lr=0.0001]\nSteps: 37%|███▋ | 258/700 [01:52<03:11, 2.30it/s, loss=0.0976, lr=0.0001]\nSteps: 37%|███▋ | 258/700 [01:52<03:11, 2.30it/s, loss=0.193, lr=0.0001] \nSteps: 37%|███▋ | 259/700 [01:52<03:11, 2.31it/s, loss=0.193, lr=0.0001]\nSteps: 37%|███▋ | 259/700 [01:52<03:11, 2.31it/s, loss=0.0853, lr=0.0001]\nSteps: 37%|███▋ | 260/700 [01:52<03:10, 2.31it/s, loss=0.0853, lr=0.0001]\nSteps: 37%|███▋ | 260/700 [01:53<03:10, 2.31it/s, loss=0.201, lr=0.0001] \nSteps: 37%|███▋ | 261/700 [01:53<03:10, 2.31it/s, loss=0.201, lr=0.0001]\nSteps: 37%|███▋ | 261/700 [01:53<03:10, 2.31it/s, loss=0.191, lr=0.0001]\nSteps: 37%|███▋ | 262/700 [01:53<03:09, 2.31it/s, loss=0.191, lr=0.0001]\nSteps: 37%|███▋ | 262/700 [01:53<03:09, 2.31it/s, loss=0.0494, lr=0.0001]\nSteps: 38%|███▊ | 263/700 [01:54<03:09, 2.31it/s, loss=0.0494, lr=0.0001]\nSteps: 38%|███▊ | 263/700 [01:54<03:09, 2.31it/s, loss=0.0995, lr=0.0001]\nSteps: 38%|███▊ | 264/700 [01:54<03:08, 2.31it/s, loss=0.0995, lr=0.0001]\nSteps: 38%|███▊ | 264/700 [01:54<03:08, 2.31it/s, loss=0.204, lr=0.0001] \nSteps: 38%|███▊ | 265/700 [01:55<03:08, 2.31it/s, loss=0.204, lr=0.0001]\nSteps: 38%|███▊ | 265/700 [01:55<03:08, 2.31it/s, loss=0.18, lr=0.0001] \nSteps: 38%|███▊ | 266/700 [01:55<03:07, 2.31it/s, loss=0.18, lr=0.0001]\nSteps: 38%|███▊ | 266/700 [01:55<03:07, 2.31it/s, loss=0.107, lr=0.0001]\nSteps: 38%|███▊ | 267/700 [01:56<03:07, 2.31it/s, loss=0.107, lr=0.0001]\nSteps: 38%|███▊ | 267/700 [01:56<03:07, 2.31it/s, loss=0.243, lr=0.0001]\nSteps: 38%|███▊ | 268/700 [01:56<03:06, 2.31it/s, loss=0.243, lr=0.0001]\nSteps: 38%|███▊ | 268/700 [01:56<03:06, 2.31it/s, loss=0.0764, lr=0.0001]\nSteps: 38%|███▊ | 269/700 [01:56<03:06, 2.31it/s, loss=0.0764, lr=0.0001]\nSteps: 38%|███▊ | 269/700 [01:56<03:06, 2.31it/s, loss=0.103, lr=0.0001] \nSteps: 39%|███▊ | 270/700 [01:57<03:06, 2.31it/s, loss=0.103, lr=0.0001]\nSteps: 39%|███▊ | 270/700 [01:57<03:06, 2.31it/s, loss=0.114, lr=0.0001]\nSteps: 39%|███▊ | 271/700 [01:57<03:05, 2.31it/s, loss=0.114, lr=0.0001]\nSteps: 39%|███▊ | 271/700 [01:57<03:05, 2.31it/s, loss=0.206, lr=0.0001]\nSteps: 39%|███▉ | 272/700 [01:58<03:05, 2.31it/s, loss=0.206, lr=0.0001]\nSteps: 39%|███▉ | 272/700 [01:58<03:05, 2.31it/s, loss=0.108, lr=0.0001]\nSteps: 39%|███▉ | 273/700 [01:58<03:05, 2.30it/s, loss=0.108, lr=0.0001]\nSteps: 39%|███▉ | 273/700 [01:58<03:05, 2.30it/s, loss=0.14, lr=0.0001] \nSteps: 39%|███▉ | 274/700 [01:59<03:04, 2.30it/s, loss=0.14, lr=0.0001]\nSteps: 39%|███▉ | 274/700 [01:59<03:04, 2.30it/s, loss=0.0251, lr=0.0001]\nSteps: 39%|███▉ | 275/700 [01:59<03:04, 2.31it/s, loss=0.0251, lr=0.0001]\nSteps: 39%|███▉ | 275/700 [01:59<03:04, 2.31it/s, loss=0.151, lr=0.0001] \nSteps: 39%|███▉ | 276/700 [01:59<03:03, 2.31it/s, loss=0.151, lr=0.0001]\nSteps: 39%|███▉ | 276/700 [01:59<03:03, 2.31it/s, loss=0.128, lr=0.0001]\nSteps: 40%|███▉ | 277/700 [02:00<03:03, 2.31it/s, loss=0.128, lr=0.0001]\nSteps: 40%|███▉ | 277/700 [02:00<03:03, 2.31it/s, loss=0.097, lr=0.0001]\nSteps: 40%|███▉ | 278/700 [02:00<03:02, 2.31it/s, loss=0.097, lr=0.0001]\nSteps: 40%|███▉ | 278/700 [02:00<03:02, 2.31it/s, loss=0.293, lr=0.0001]\nSteps: 40%|███▉ | 279/700 [02:01<03:02, 2.31it/s, loss=0.293, lr=0.0001]\nSteps: 40%|███▉ | 279/700 [02:01<03:02, 2.31it/s, loss=0.286, lr=0.0001]\nSteps: 40%|████ | 280/700 [02:01<03:01, 2.31it/s, loss=0.286, lr=0.0001]\nSteps: 40%|████ | 280/700 [02:01<03:01, 2.31it/s, loss=0.171, lr=0.0001]\nSteps: 40%|████ | 281/700 [02:02<03:01, 2.31it/s, loss=0.171, lr=0.0001]\nSteps: 40%|████ | 281/700 [02:02<03:01, 2.31it/s, loss=0.2, lr=0.0001] \nSteps: 40%|████ | 282/700 [02:02<03:00, 2.31it/s, loss=0.2, lr=0.0001]\nSteps: 40%|████ | 282/700 [02:02<03:00, 2.31it/s, loss=0.153, lr=0.0001]\nSteps: 40%|████ | 283/700 [02:02<03:00, 2.31it/s, loss=0.153, lr=0.0001]\nSteps: 40%|████ | 283/700 [02:02<03:00, 2.31it/s, loss=0.132, lr=0.0001]\nSteps: 41%|████ | 284/700 [02:03<02:59, 2.31it/s, loss=0.132, lr=0.0001]\nSteps: 41%|████ | 284/700 [02:03<02:59, 2.31it/s, loss=0.115, lr=0.0001]\nSteps: 41%|████ | 285/700 [02:03<02:59, 2.31it/s, loss=0.115, lr=0.0001]\nSteps: 41%|████ | 285/700 [02:03<02:59, 2.31it/s, loss=0.159, lr=0.0001]\nSteps: 41%|████ | 286/700 [02:04<02:59, 2.31it/s, loss=0.159, lr=0.0001]\nSteps: 41%|████ | 286/700 [02:04<02:59, 2.31it/s, loss=0.0701, lr=0.0001]\nSteps: 41%|████ | 287/700 [02:04<02:58, 2.31it/s, loss=0.0701, lr=0.0001]\nSteps: 41%|████ | 287/700 [02:04<02:58, 2.31it/s, loss=0.134, lr=0.0001] \nSteps: 41%|████ | 288/700 [02:05<02:58, 2.31it/s, loss=0.134, lr=0.0001]\nSteps: 41%|████ | 288/700 [02:05<02:58, 2.31it/s, loss=0.188, lr=0.0001]\nSteps: 41%|████▏ | 289/700 [02:05<02:58, 2.30it/s, loss=0.188, lr=0.0001]\nSteps: 41%|████▏ | 289/700 [02:05<02:58, 2.30it/s, loss=0.0311, lr=0.0001]\nSteps: 41%|████▏ | 290/700 [02:05<02:58, 2.30it/s, loss=0.0311, lr=0.0001]\nSteps: 41%|████▏ | 290/700 [02:05<02:58, 2.30it/s, loss=0.13, lr=0.0001] \nSteps: 42%|████▏ | 291/700 [02:06<02:57, 2.30it/s, loss=0.13, lr=0.0001]\nSteps: 42%|████▏ | 291/700 [02:06<02:57, 2.30it/s, loss=0.286, lr=0.0001]\nSteps: 42%|████▏ | 292/700 [02:06<02:57, 2.30it/s, loss=0.286, lr=0.0001]\nSteps: 42%|████▏ | 292/700 [02:06<02:57, 2.30it/s, loss=0.136, lr=0.0001]\nSteps: 42%|████▏ | 293/700 [02:07<02:56, 2.31it/s, loss=0.136, lr=0.0001]\nSteps: 42%|████▏ | 293/700 [02:07<02:56, 2.31it/s, loss=0.0702, lr=0.0001]\nSteps: 42%|████▏ | 294/700 [02:07<02:55, 2.31it/s, loss=0.0702, lr=0.0001]\nSteps: 42%|████▏ | 294/700 [02:07<02:55, 2.31it/s, loss=0.161, lr=0.0001] \nSteps: 42%|████▏ | 295/700 [02:08<02:55, 2.31it/s, loss=0.161, lr=0.0001]\nSteps: 42%|████▏ | 295/700 [02:08<02:55, 2.31it/s, loss=0.0911, lr=0.0001]\nSteps: 42%|████▏ | 296/700 [02:08<02:54, 2.31it/s, loss=0.0911, lr=0.0001]\nSteps: 42%|████▏ | 296/700 [02:08<02:54, 2.31it/s, loss=0.074, lr=0.0001] \nSteps: 42%|████▏ | 297/700 [02:08<02:54, 2.31it/s, loss=0.074, lr=0.0001]\nSteps: 42%|████▏ | 297/700 [02:09<02:54, 2.31it/s, loss=0.112, lr=0.0001]\nSteps: 43%|████▎ | 298/700 [02:09<02:54, 2.31it/s, loss=0.112, lr=0.0001]\nSteps: 43%|████▎ | 298/700 [02:09<02:54, 2.31it/s, loss=0.0824, lr=0.0001]\nSteps: 43%|████▎ | 299/700 [02:09<02:53, 2.31it/s, loss=0.0824, lr=0.0001]\nSteps: 43%|████▎ | 299/700 [02:09<02:53, 2.31it/s, loss=0.124, lr=0.0001] \nSteps: 43%|████▎ | 300/700 [02:10<02:53, 2.31it/s, loss=0.124, lr=0.0001]\nSteps: 43%|████▎ | 300/700 [02:10<02:53, 2.31it/s, loss=0.129, lr=0.0001]\nSteps: 43%|████▎ | 301/700 [02:10<02:53, 2.31it/s, loss=0.129, lr=0.0001]\nSteps: 43%|████▎ | 301/700 [02:10<02:53, 2.31it/s, loss=0.148, lr=0.0001]\nSteps: 43%|████▎ | 302/700 [02:11<02:52, 2.31it/s, loss=0.148, lr=0.0001]\nSteps: 43%|████▎ | 302/700 [02:11<02:52, 2.31it/s, loss=0.0999, lr=0.0001]\nSteps: 43%|████▎ | 303/700 [02:11<02:51, 2.31it/s, loss=0.0999, lr=0.0001]\nSteps: 43%|████▎ | 303/700 [02:11<02:51, 2.31it/s, loss=0.0991, lr=0.0001]\nSteps: 43%|████▎ | 304/700 [02:12<02:51, 2.31it/s, loss=0.0991, lr=0.0001]\nSteps: 43%|████▎ | 304/700 [02:12<02:51, 2.31it/s, loss=0.206, lr=0.0001] \nSteps: 44%|████▎ | 305/700 [02:12<02:51, 2.30it/s, loss=0.206, lr=0.0001]\nSteps: 44%|████▎ | 305/700 [02:12<02:51, 2.30it/s, loss=0.0953, lr=0.0001]\nSteps: 44%|████▎ | 306/700 [02:12<02:51, 2.30it/s, loss=0.0953, lr=0.0001]\nSteps: 44%|████▎ | 306/700 [02:12<02:51, 2.30it/s, loss=0.132, lr=0.0001] \nSteps: 44%|████▍ | 307/700 [02:13<02:50, 2.31it/s, loss=0.132, lr=0.0001]\nSteps: 44%|████▍ | 307/700 [02:13<02:50, 2.31it/s, loss=0.0862, lr=0.0001]\nSteps: 44%|████▍ | 308/700 [02:13<02:49, 2.31it/s, loss=0.0862, lr=0.0001]\nSteps: 44%|████▍ | 308/700 [02:13<02:49, 2.31it/s, loss=0.0361, lr=0.0001]\nSteps: 44%|████▍ | 309/700 [02:14<02:49, 2.31it/s, loss=0.0361, lr=0.0001]\nSteps: 44%|████▍ | 309/700 [02:14<02:49, 2.31it/s, loss=0.229, lr=0.0001] \nSteps: 44%|████▍ | 310/700 [02:14<02:49, 2.31it/s, loss=0.229, lr=0.0001]\nSteps: 44%|████▍ | 310/700 [02:14<02:49, 2.31it/s, loss=0.133, lr=0.0001]\nSteps: 44%|████▍ | 311/700 [02:15<02:48, 2.31it/s, loss=0.133, lr=0.0001]\nSteps: 44%|████▍ | 311/700 [02:15<02:48, 2.31it/s, loss=0.163, lr=0.0001]\nSteps: 45%|████▍ | 312/700 [02:15<02:47, 2.31it/s, loss=0.163, lr=0.0001]\nSteps: 45%|████▍ | 312/700 [02:15<02:47, 2.31it/s, loss=0.116, lr=0.0001]\nSteps: 45%|████▍ | 313/700 [02:15<02:47, 2.31it/s, loss=0.116, lr=0.0001]\nSteps: 45%|████▍ | 313/700 [02:15<02:47, 2.31it/s, loss=0.309, lr=0.0001]\nSteps: 45%|████▍ | 314/700 [02:16<02:47, 2.31it/s, loss=0.309, lr=0.0001]\nSteps: 45%|████▍ | 314/700 [02:16<02:47, 2.31it/s, loss=0.0657, lr=0.0001]\nSteps: 45%|████▌ | 315/700 [02:16<02:46, 2.31it/s, loss=0.0657, lr=0.0001]\nSteps: 45%|████▌ | 315/700 [02:16<02:46, 2.31it/s, loss=0.0988, lr=0.0001]\nSteps: 45%|████▌ | 316/700 [02:17<02:46, 2.31it/s, loss=0.0988, lr=0.0001]\nSteps: 45%|████▌ | 316/700 [02:17<02:46, 2.31it/s, loss=0.103, lr=0.0001] \nSteps: 45%|████▌ | 317/700 [02:17<02:45, 2.31it/s, loss=0.103, lr=0.0001]\nSteps: 45%|████▌ | 317/700 [02:17<02:45, 2.31it/s, loss=0.282, lr=0.0001]\nSteps: 45%|████▌ | 318/700 [02:18<02:45, 2.31it/s, loss=0.282, lr=0.0001]\nSteps: 45%|████▌ | 318/700 [02:18<02:45, 2.31it/s, loss=0.162, lr=0.0001]\nSteps: 46%|████▌ | 319/700 [02:18<02:45, 2.31it/s, loss=0.162, lr=0.0001]\nSteps: 46%|████▌ | 319/700 [02:18<02:45, 2.31it/s, loss=0.11, lr=0.0001] \nSteps: 46%|████▌ | 320/700 [02:18<02:44, 2.31it/s, loss=0.11, lr=0.0001]\nSteps: 46%|████▌ | 320/700 [02:18<02:44, 2.31it/s, loss=0.165, lr=0.0001]\nSteps: 46%|████▌ | 321/700 [02:19<02:44, 2.30it/s, loss=0.165, lr=0.0001]\nSteps: 46%|████▌ | 321/700 [02:19<02:44, 2.30it/s, loss=0.105, lr=0.0001]\nSteps: 46%|████▌ | 322/700 [02:19<02:44, 2.30it/s, loss=0.105, lr=0.0001]\nSteps: 46%|████▌ | 322/700 [02:19<02:44, 2.30it/s, loss=0.246, lr=0.0001]\nSteps: 46%|████▌ | 323/700 [02:20<02:43, 2.30it/s, loss=0.246, lr=0.0001]\nSteps: 46%|████▌ | 323/700 [02:20<02:43, 2.30it/s, loss=0.0769, lr=0.0001]\nSteps: 46%|████▋ | 324/700 [02:20<02:43, 2.31it/s, loss=0.0769, lr=0.0001]\nSteps: 46%|████▋ | 324/700 [02:20<02:43, 2.31it/s, loss=0.101, lr=0.0001] \nSteps: 46%|████▋ | 325/700 [02:21<02:42, 2.31it/s, loss=0.101, lr=0.0001]\nSteps: 46%|████▋ | 325/700 [02:21<02:42, 2.31it/s, loss=0.161, lr=0.0001]\nSteps: 47%|████▋ | 326/700 [02:21<02:42, 2.31it/s, loss=0.161, lr=0.0001]\nSteps: 47%|████▋ | 326/700 [02:21<02:42, 2.31it/s, loss=0.175, lr=0.0001]\nSteps: 47%|████▋ | 327/700 [02:22<02:41, 2.31it/s, loss=0.175, lr=0.0001]\nSteps: 47%|████▋ | 327/700 [02:22<02:41, 2.31it/s, loss=0.147, lr=0.0001]\nSteps: 47%|████▋ | 328/700 [02:22<02:40, 2.31it/s, loss=0.147, lr=0.0001]\nSteps: 47%|████▋ | 328/700 [02:22<02:40, 2.31it/s, loss=0.258, lr=0.0001]\nSteps: 47%|████▋ | 329/700 [02:22<02:40, 2.31it/s, loss=0.258, lr=0.0001]\nSteps: 47%|████▋ | 329/700 [02:22<02:40, 2.31it/s, loss=0.117, lr=0.0001]\nSteps: 47%|████▋ | 330/700 [02:23<02:40, 2.31it/s, loss=0.117, lr=0.0001]\nSteps: 47%|████▋ | 330/700 [02:23<02:40, 2.31it/s, loss=0.0967, lr=0.0001]\nSteps: 47%|████▋ | 331/700 [02:23<02:39, 2.31it/s, loss=0.0967, lr=0.0001]\nSteps: 47%|████▋ | 331/700 [02:23<02:39, 2.31it/s, loss=0.0688, lr=0.0001]\nSteps: 47%|████▋ | 332/700 [02:24<02:39, 2.31it/s, loss=0.0688, lr=0.0001]\nSteps: 47%|████▋ | 332/700 [02:24<02:39, 2.31it/s, loss=0.102, lr=0.0001] \nSteps: 48%|████▊ | 333/700 [02:24<02:38, 2.31it/s, loss=0.102, lr=0.0001]\nSteps: 48%|████▊ | 333/700 [02:24<02:38, 2.31it/s, loss=0.0854, lr=0.0001]\nSteps: 48%|████▊ | 334/700 [02:25<02:38, 2.31it/s, loss=0.0854, lr=0.0001]\nSteps: 48%|████▊ | 334/700 [02:25<02:38, 2.31it/s, loss=0.0907, lr=0.0001]\nSteps: 48%|████▊ | 335/700 [02:25<02:37, 2.31it/s, loss=0.0907, lr=0.0001]\nSteps: 48%|████▊ | 335/700 [02:25<02:37, 2.31it/s, loss=0.243, lr=0.0001] \nSteps: 48%|████▊ | 336/700 [02:25<02:37, 2.31it/s, loss=0.243, lr=0.0001]\nSteps: 48%|████▊ | 336/700 [02:25<02:37, 2.31it/s, loss=0.182, lr=0.0001]\nSteps: 48%|████▊ | 337/700 [02:26<02:37, 2.30it/s, loss=0.182, lr=0.0001]\nSteps: 48%|████▊ | 337/700 [02:26<02:37, 2.30it/s, loss=0.165, lr=0.0001]\nSteps: 48%|████▊ | 338/700 [02:26<02:37, 2.30it/s, loss=0.165, lr=0.0001]\nSteps: 48%|████▊ | 338/700 [02:26<02:37, 2.30it/s, loss=0.116, lr=0.0001]\nSteps: 48%|████▊ | 339/700 [02:27<02:36, 2.31it/s, loss=0.116, lr=0.0001]\nSteps: 48%|████▊ | 339/700 [02:27<02:36, 2.31it/s, loss=0.0656, lr=0.0001]\nSteps: 49%|████▊ | 340/700 [02:27<02:36, 2.31it/s, loss=0.0656, lr=0.0001]\nSteps: 49%|████▊ | 340/700 [02:27<02:36, 2.31it/s, loss=0.0485, lr=0.0001]\nSteps: 49%|████▊ | 341/700 [02:28<02:35, 2.31it/s, loss=0.0485, lr=0.0001]\nSteps: 49%|████▊ | 341/700 [02:28<02:35, 2.31it/s, loss=0.0723, lr=0.0001]\nSteps: 49%|████▉ | 342/700 [02:28<02:34, 2.31it/s, loss=0.0723, lr=0.0001]\nSteps: 49%|████▉ | 342/700 [02:28<02:34, 2.31it/s, loss=0.057, lr=0.0001] \nSteps: 49%|████▉ | 343/700 [02:28<02:34, 2.31it/s, loss=0.057, lr=0.0001]\nSteps: 49%|████▉ | 343/700 [02:28<02:34, 2.31it/s, loss=0.159, lr=0.0001]\nSteps: 49%|████▉ | 344/700 [02:29<02:34, 2.31it/s, loss=0.159, lr=0.0001]\nSteps: 49%|████▉ | 344/700 [02:29<02:34, 2.31it/s, loss=0.193, lr=0.0001]\nSteps: 49%|████▉ | 345/700 [02:29<02:33, 2.31it/s, loss=0.193, lr=0.0001]\nSteps: 49%|████▉ | 345/700 [02:29<02:33, 2.31it/s, loss=0.236, lr=0.0001]\nSteps: 49%|████▉ | 346/700 [02:30<02:33, 2.31it/s, loss=0.236, lr=0.0001]\nSteps: 49%|████▉ | 346/700 [02:30<02:33, 2.31it/s, loss=0.108, lr=0.0001]\nSteps: 50%|████▉ | 347/700 [02:30<02:33, 2.31it/s, loss=0.108, lr=0.0001]\nSteps: 50%|████▉ | 347/700 [02:30<02:33, 2.31it/s, loss=0.0848, lr=0.0001]\nSteps: 50%|████▉ | 348/700 [02:31<02:32, 2.31it/s, loss=0.0848, lr=0.0001]\nSteps: 50%|████▉ | 348/700 [02:31<02:32, 2.31it/s, loss=0.135, lr=0.0001] \nSteps: 50%|████▉ | 349/700 [02:31<02:32, 2.31it/s, loss=0.135, lr=0.0001]\nSteps: 50%|████▉ | 349/700 [02:31<02:32, 2.31it/s, loss=0.141, lr=0.0001]\nSteps: 50%|█████ | 350/700 [02:31<02:31, 2.31it/s, loss=0.141, lr=0.0001]\nSteps: 50%|█████ | 350/700 [02:31<02:31, 2.31it/s, loss=0.0529, lr=0.0001]\nSteps: 50%|█████ | 351/700 [02:32<02:31, 2.31it/s, loss=0.0529, lr=0.0001]\nSteps: 50%|█████ | 351/700 [02:32<02:31, 2.31it/s, loss=0.0894, lr=0.0001]\nSteps: 50%|█████ | 352/700 [02:32<02:30, 2.31it/s, loss=0.0894, lr=0.0001]\nSteps: 50%|█████ | 352/700 [02:32<02:30, 2.31it/s, loss=0.343, lr=0.0001] \nSteps: 50%|█████ | 353/700 [02:33<02:30, 2.30it/s, loss=0.343, lr=0.0001]\nSteps: 50%|█████ | 353/700 [02:33<02:30, 2.30it/s, loss=0.195, lr=0.0001]\nSteps: 51%|█████ | 354/700 [02:33<02:30, 2.30it/s, loss=0.195, lr=0.0001]\nSteps: 51%|█████ | 354/700 [02:33<02:30, 2.30it/s, loss=0.107, lr=0.0001]\nSteps: 51%|█████ | 355/700 [02:34<02:29, 2.30it/s, loss=0.107, lr=0.0001]\nSteps: 51%|█████ | 355/700 [02:34<02:29, 2.30it/s, loss=0.0284, lr=0.0001]\nSteps: 51%|█████ | 356/700 [02:34<02:29, 2.31it/s, loss=0.0284, lr=0.0001]\nSteps: 51%|█████ | 356/700 [02:34<02:29, 2.31it/s, loss=0.167, lr=0.0001] \nSteps: 51%|█████ | 357/700 [02:35<02:28, 2.31it/s, loss=0.167, lr=0.0001]\nSteps: 51%|█████ | 357/700 [02:35<02:28, 2.31it/s, loss=0.14, lr=0.0001] \nSteps: 51%|█████ | 358/700 [02:35<02:28, 2.31it/s, loss=0.14, lr=0.0001]\nSteps: 51%|█████ | 358/700 [02:35<02:28, 2.31it/s, loss=0.111, lr=0.0001]\nSteps: 51%|█████▏ | 359/700 [02:35<02:27, 2.31it/s, loss=0.111, lr=0.0001]\nSteps: 51%|█████▏ | 359/700 [02:35<02:27, 2.31it/s, loss=0.199, lr=0.0001]\nSteps: 51%|█████▏ | 360/700 [02:36<02:27, 2.31it/s, loss=0.199, lr=0.0001]\nSteps: 51%|█████▏ | 360/700 [02:36<02:27, 2.31it/s, loss=0.2, lr=0.0001] \nSteps: 52%|█████▏ | 361/700 [02:36<02:26, 2.31it/s, loss=0.2, lr=0.0001]\nSteps: 52%|█████▏ | 361/700 [02:36<02:26, 2.31it/s, loss=0.0617, lr=0.0001]\nSteps: 52%|█████▏ | 362/700 [02:37<02:26, 2.31it/s, loss=0.0617, lr=0.0001]\nSteps: 52%|█████▏ | 362/700 [02:37<02:26, 2.31it/s, loss=0.202, lr=0.0001] \nSteps: 52%|█████▏ | 363/700 [02:37<02:25, 2.31it/s, loss=0.202, lr=0.0001]\nSteps: 52%|█████▏ | 363/700 [02:37<02:25, 2.31it/s, loss=0.081, lr=0.0001]\nSteps: 52%|█████▏ | 364/700 [02:38<02:25, 2.31it/s, loss=0.081, lr=0.0001]\nSteps: 52%|█████▏ | 364/700 [02:38<02:25, 2.31it/s, loss=0.158, lr=0.0001]\nSteps: 52%|█████▏ | 365/700 [02:38<02:25, 2.31it/s, loss=0.158, lr=0.0001]\nSteps: 52%|█████▏ | 365/700 [02:38<02:25, 2.31it/s, loss=0.111, lr=0.0001]\nSteps: 52%|█████▏ | 366/700 [02:38<02:24, 2.31it/s, loss=0.111, lr=0.0001]\nSteps: 52%|█████▏ | 366/700 [02:38<02:24, 2.31it/s, loss=0.166, lr=0.0001]\nSteps: 52%|█████▏ | 367/700 [02:39<02:24, 2.31it/s, loss=0.166, lr=0.0001]\nSteps: 52%|█████▏ | 367/700 [02:39<02:24, 2.31it/s, loss=0.261, lr=0.0001]\nSteps: 53%|█████▎ | 368/700 [02:39<02:23, 2.31it/s, loss=0.261, lr=0.0001]\nSteps: 53%|█████▎ | 368/700 [02:39<02:23, 2.31it/s, loss=0.119, lr=0.0001]\nSteps: 53%|█████▎ | 369/700 [02:40<02:24, 2.30it/s, loss=0.119, lr=0.0001]\nSteps: 53%|█████▎ | 369/700 [02:40<02:24, 2.30it/s, loss=0.0896, lr=0.0001]\nSteps: 53%|█████▎ | 370/700 [02:40<02:23, 2.30it/s, loss=0.0896, lr=0.0001]\nSteps: 53%|█████▎ | 370/700 [02:40<02:23, 2.30it/s, loss=0.101, lr=0.0001] \nSteps: 53%|█████▎ | 371/700 [02:41<02:23, 2.30it/s, loss=0.101, lr=0.0001]\nSteps: 53%|█████▎ | 371/700 [02:41<02:23, 2.30it/s, loss=0.112, lr=0.0001]\nSteps: 53%|█████▎ | 372/700 [02:41<02:22, 2.30it/s, loss=0.112, lr=0.0001]\nSteps: 53%|█████▎ | 372/700 [02:41<02:22, 2.30it/s, loss=0.132, lr=0.0001]\nSteps: 53%|█████▎ | 373/700 [02:41<02:21, 2.30it/s, loss=0.132, lr=0.0001]\nSteps: 53%|█████▎ | 373/700 [02:41<02:21, 2.30it/s, loss=0.15, lr=0.0001] \nSteps: 53%|█████▎ | 374/700 [02:42<02:21, 2.31it/s, loss=0.15, lr=0.0001]\nSteps: 53%|█████▎ | 374/700 [02:42<02:21, 2.31it/s, loss=0.326, lr=0.0001]\nSteps: 54%|█████▎ | 375/700 [02:42<02:20, 2.31it/s, loss=0.326, lr=0.0001]\nSteps: 54%|█████▎ | 375/700 [02:42<02:20, 2.31it/s, loss=0.117, lr=0.0001]\nSteps: 54%|█████▎ | 376/700 [02:43<02:20, 2.30it/s, loss=0.117, lr=0.0001]\nSteps: 54%|█████▎ | 376/700 [02:43<02:20, 2.30it/s, loss=0.128, lr=0.0001]\nSteps: 54%|█████▍ | 377/700 [02:43<02:20, 2.30it/s, loss=0.128, lr=0.0001]\nSteps: 54%|█████▍ | 377/700 [02:43<02:20, 2.30it/s, loss=0.146, lr=0.0001]\nSteps: 54%|█████▍ | 378/700 [02:44<02:19, 2.31it/s, loss=0.146, lr=0.0001]\nSteps: 54%|█████▍ | 378/700 [02:44<02:19, 2.31it/s, loss=0.219, lr=0.0001]\nSteps: 54%|█████▍ | 379/700 [02:44<02:19, 2.31it/s, loss=0.219, lr=0.0001]\nSteps: 54%|█████▍ | 379/700 [02:44<02:19, 2.31it/s, loss=0.0741, lr=0.0001]\nSteps: 54%|█████▍ | 380/700 [02:44<02:18, 2.31it/s, loss=0.0741, lr=0.0001]\nSteps: 54%|█████▍ | 380/700 [02:45<02:18, 2.31it/s, loss=0.104, lr=0.0001] \nSteps: 54%|█████▍ | 381/700 [02:45<02:18, 2.31it/s, loss=0.104, lr=0.0001]\nSteps: 54%|█████▍ | 381/700 [02:45<02:18, 2.31it/s, loss=0.0772, lr=0.0001]\nSteps: 55%|█████▍ | 382/700 [02:45<02:18, 2.30it/s, loss=0.0772, lr=0.0001]\nSteps: 55%|█████▍ | 382/700 [02:45<02:18, 2.30it/s, loss=0.213, lr=0.0001] \nSteps: 55%|█████▍ | 383/700 [02:46<02:29, 2.11it/s, loss=0.213, lr=0.0001]\nSteps: 55%|█████▍ | 383/700 [02:46<02:29, 2.11it/s, loss=0.197, lr=0.0001]\nSteps: 55%|█████▍ | 384/700 [02:46<02:25, 2.16it/s, loss=0.197, lr=0.0001]\nSteps: 55%|█████▍ | 384/700 [02:46<02:25, 2.16it/s, loss=0.172, lr=0.0001]\nSteps: 55%|█████▌ | 385/700 [02:47<02:23, 2.20it/s, loss=0.172, lr=0.0001]\nSteps: 55%|█████▌ | 385/700 [02:47<02:23, 2.20it/s, loss=0.108, lr=0.0001]\nSteps: 55%|█████▌ | 386/700 [02:47<02:20, 2.23it/s, loss=0.108, lr=0.0001]\nSteps: 55%|█████▌ | 386/700 [02:47<02:20, 2.23it/s, loss=0.0851, lr=0.0001]\nSteps: 55%|█████▌ | 387/700 [02:48<02:18, 2.25it/s, loss=0.0851, lr=0.0001]\nSteps: 55%|█████▌ | 387/700 [02:48<02:18, 2.25it/s, loss=0.037, lr=0.0001] \nSteps: 55%|█████▌ | 388/700 [02:48<02:17, 2.27it/s, loss=0.037, lr=0.0001]\nSteps: 55%|█████▌ | 388/700 [02:48<02:17, 2.27it/s, loss=0.278, lr=0.0001]\nSteps: 56%|█████▌ | 389/700 [02:49<02:16, 2.28it/s, loss=0.278, lr=0.0001]\nSteps: 56%|█████▌ | 389/700 [02:49<02:16, 2.28it/s, loss=0.0438, lr=0.0001]\nSteps: 56%|█████▌ | 390/700 [02:49<02:15, 2.29it/s, loss=0.0438, lr=0.0001]\nSteps: 56%|█████▌ | 390/700 [02:49<02:15, 2.29it/s, loss=0.171, lr=0.0001] \nSteps: 56%|█████▌ | 391/700 [02:49<02:14, 2.29it/s, loss=0.171, lr=0.0001]\nSteps: 56%|█████▌ | 391/700 [02:49<02:14, 2.29it/s, loss=0.0965, lr=0.0001]\nSteps: 56%|█████▌ | 392/700 [02:50<02:14, 2.30it/s, loss=0.0965, lr=0.0001]\nSteps: 56%|█████▌ | 392/700 [02:50<02:14, 2.30it/s, loss=0.061, lr=0.0001] \nSteps: 56%|█████▌ | 393/700 [02:50<02:13, 2.30it/s, loss=0.061, lr=0.0001]\nSteps: 56%|█████▌ | 393/700 [02:50<02:13, 2.30it/s, loss=0.0909, lr=0.0001]\nSteps: 56%|█████▋ | 394/700 [02:51<02:12, 2.30it/s, loss=0.0909, lr=0.0001]\nSteps: 56%|█████▋ | 394/700 [02:51<02:12, 2.30it/s, loss=0.0822, lr=0.0001]\nSteps: 56%|█████▋ | 395/700 [02:51<02:12, 2.31it/s, loss=0.0822, lr=0.0001]\nSteps: 56%|█████▋ | 395/700 [02:51<02:12, 2.31it/s, loss=0.0202, lr=0.0001]\nSteps: 57%|█████▋ | 396/700 [02:52<02:11, 2.31it/s, loss=0.0202, lr=0.0001]\nSteps: 57%|█████▋ | 396/700 [02:52<02:11, 2.31it/s, loss=0.084, lr=0.0001] \nSteps: 57%|█████▋ | 397/700 [02:52<02:11, 2.31it/s, loss=0.084, lr=0.0001]\nSteps: 57%|█████▋ | 397/700 [02:52<02:11, 2.31it/s, loss=0.165, lr=0.0001]\nSteps: 57%|█████▋ | 398/700 [02:52<02:10, 2.31it/s, loss=0.165, lr=0.0001]\nSteps: 57%|█████▋ | 398/700 [02:52<02:10, 2.31it/s, loss=0.121, lr=0.0001]\nSteps: 57%|█████▋ | 399/700 [02:53<02:10, 2.31it/s, loss=0.121, lr=0.0001]\nSteps: 57%|█████▋ | 399/700 [02:53<02:10, 2.31it/s, loss=0.17, lr=0.0001] \nSteps: 57%|█████▋ | 400/700 [02:53<02:09, 2.31it/s, loss=0.17, lr=0.0001]\nSteps: 57%|█████▋ | 400/700 [02:53<02:09, 2.31it/s, loss=0.176, lr=0.0001]\nSteps: 57%|█████▋ | 401/700 [02:54<02:10, 2.30it/s, loss=0.176, lr=0.0001]\nSteps: 57%|█████▋ | 401/700 [02:54<02:10, 2.30it/s, loss=0.165, lr=0.0001]\nSteps: 57%|█████▋ | 402/700 [02:54<02:09, 2.30it/s, loss=0.165, lr=0.0001]\nSteps: 57%|█████▋ | 402/700 [02:54<02:09, 2.30it/s, loss=0.0535, lr=0.0001]\nSteps: 58%|█████▊ | 403/700 [02:55<02:08, 2.31it/s, loss=0.0535, lr=0.0001]\nSteps: 58%|█████▊ | 403/700 [02:55<02:08, 2.31it/s, loss=0.15, lr=0.0001] \nSteps: 58%|█████▊ | 404/700 [02:55<02:08, 2.31it/s, loss=0.15, lr=0.0001]\nSteps: 58%|█████▊ | 404/700 [02:55<02:08, 2.31it/s, loss=0.122, lr=0.0001]\nSteps: 58%|█████▊ | 405/700 [02:55<02:07, 2.31it/s, loss=0.122, lr=0.0001]\nSteps: 58%|█████▊ | 405/700 [02:55<02:07, 2.31it/s, loss=0.111, lr=0.0001]\nSteps: 58%|█████▊ | 406/700 [02:56<02:07, 2.31it/s, loss=0.111, lr=0.0001]\nSteps: 58%|█████▊ | 406/700 [02:56<02:07, 2.31it/s, loss=0.148, lr=0.0001]\nSteps: 58%|█████▊ | 407/700 [02:56<02:06, 2.31it/s, loss=0.148, lr=0.0001]\nSteps: 58%|█████▊ | 407/700 [02:56<02:06, 2.31it/s, loss=0.135, lr=0.0001]\nSteps: 58%|█████▊ | 408/700 [02:57<02:06, 2.31it/s, loss=0.135, lr=0.0001]\nSteps: 58%|█████▊ | 408/700 [02:57<02:06, 2.31it/s, loss=0.0779, lr=0.0001]\nSteps: 58%|█████▊ | 409/700 [02:57<02:05, 2.31it/s, loss=0.0779, lr=0.0001]\nSteps: 58%|█████▊ | 409/700 [02:57<02:05, 2.31it/s, loss=0.125, lr=0.0001] \nSteps: 59%|█████▊ | 410/700 [02:58<02:05, 2.31it/s, loss=0.125, lr=0.0001]\nSteps: 59%|█████▊ | 410/700 [02:58<02:05, 2.31it/s, loss=0.116, lr=0.0001]\nSteps: 59%|█████▊ | 411/700 [02:58<02:05, 2.31it/s, loss=0.116, lr=0.0001]\nSteps: 59%|█████▊ | 411/700 [02:58<02:05, 2.31it/s, loss=0.187, lr=0.0001]\nSteps: 59%|█████▉ | 412/700 [02:58<02:04, 2.31it/s, loss=0.187, lr=0.0001]\nSteps: 59%|█████▉ | 412/700 [02:59<02:04, 2.31it/s, loss=0.0657, lr=0.0001]\nSteps: 59%|█████▉ | 413/700 [02:59<02:04, 2.31it/s, loss=0.0657, lr=0.0001]\nSteps: 59%|█████▉ | 413/700 [02:59<02:04, 2.31it/s, loss=0.0886, lr=0.0001]\nSteps: 59%|█████▉ | 414/700 [02:59<02:03, 2.31it/s, loss=0.0886, lr=0.0001]\nSteps: 59%|█████▉ | 414/700 [02:59<02:03, 2.31it/s, loss=0.127, lr=0.0001] \nSteps: 59%|█████▉ | 415/700 [03:00<02:03, 2.31it/s, loss=0.127, lr=0.0001]\nSteps: 59%|█████▉ | 415/700 [03:00<02:03, 2.31it/s, loss=0.0474, lr=0.0001]\nSteps: 59%|█████▉ | 416/700 [03:00<02:02, 2.31it/s, loss=0.0474, lr=0.0001]\nSteps: 59%|█████▉ | 416/700 [03:00<02:02, 2.31it/s, loss=0.135, lr=0.0001] \nSteps: 60%|█████▉ | 417/700 [03:01<02:03, 2.30it/s, loss=0.135, lr=0.0001]\nSteps: 60%|█████▉ | 417/700 [03:01<02:03, 2.30it/s, loss=0.127, lr=0.0001]\nSteps: 60%|█████▉ | 418/700 [03:01<02:02, 2.30it/s, loss=0.127, lr=0.0001]\nSteps: 60%|█████▉ | 418/700 [03:01<02:02, 2.30it/s, loss=0.136, lr=0.0001]\nSteps: 60%|█████▉ | 419/700 [03:02<02:01, 2.31it/s, loss=0.136, lr=0.0001]\nSteps: 60%|█████▉ | 419/700 [03:02<02:01, 2.31it/s, loss=0.197, lr=0.0001]\nSteps: 60%|██████ | 420/700 [03:02<02:01, 2.31it/s, loss=0.197, lr=0.0001]\nSteps: 60%|██████ | 420/700 [03:02<02:01, 2.31it/s, loss=0.0675, lr=0.0001]\nSteps: 60%|██████ | 421/700 [03:02<02:00, 2.31it/s, loss=0.0675, lr=0.0001]\nSteps: 60%|██████ | 421/700 [03:02<02:00, 2.31it/s, loss=0.0898, lr=0.0001]\nSteps: 60%|██████ | 422/700 [03:03<02:00, 2.31it/s, loss=0.0898, lr=0.0001]\nSteps: 60%|██████ | 422/700 [03:03<02:00, 2.31it/s, loss=0.118, lr=0.0001] \nSteps: 60%|██████ | 423/700 [03:03<01:59, 2.31it/s, loss=0.118, lr=0.0001]\nSteps: 60%|██████ | 423/700 [03:03<01:59, 2.31it/s, loss=0.14, lr=0.0001] \nSteps: 61%|██████ | 424/700 [03:04<01:59, 2.31it/s, loss=0.14, lr=0.0001]\nSteps: 61%|██████ | 424/700 [03:04<01:59, 2.31it/s, loss=0.0937, lr=0.0001]\nSteps: 61%|██████ | 425/700 [03:04<01:59, 2.31it/s, loss=0.0937, lr=0.0001]\nSteps: 61%|██████ | 425/700 [03:04<01:59, 2.31it/s, loss=0.138, lr=0.0001] \nSteps: 61%|██████ | 426/700 [03:05<01:58, 2.31it/s, loss=0.138, lr=0.0001]\nSteps: 61%|██████ | 426/700 [03:05<01:58, 2.31it/s, loss=0.158, lr=0.0001]\nSteps: 61%|██████ | 427/700 [03:05<01:58, 2.31it/s, loss=0.158, lr=0.0001]\nSteps: 61%|██████ | 427/700 [03:05<01:58, 2.31it/s, loss=0.0508, lr=0.0001]\nSteps: 61%|██████ | 428/700 [03:05<01:57, 2.31it/s, loss=0.0508, lr=0.0001]\nSteps: 61%|██████ | 428/700 [03:05<01:57, 2.31it/s, loss=0.0954, lr=0.0001]\nSteps: 61%|██████▏ | 429/700 [03:06<01:57, 2.31it/s, loss=0.0954, lr=0.0001]\nSteps: 61%|██████▏ | 429/700 [03:06<01:57, 2.31it/s, loss=0.315, lr=0.0001] \nSteps: 61%|██████▏ | 430/700 [03:06<01:56, 2.31it/s, loss=0.315, lr=0.0001]\nSteps: 61%|██████▏ | 430/700 [03:06<01:56, 2.31it/s, loss=0.166, lr=0.0001]\nSteps: 62%|██████▏ | 431/700 [03:07<01:56, 2.31it/s, loss=0.166, lr=0.0001]\nSteps: 62%|██████▏ | 431/700 [03:07<01:56, 2.31it/s, loss=0.09, lr=0.0001] \nSteps: 62%|██████▏ | 432/700 [03:07<01:55, 2.31it/s, loss=0.09, lr=0.0001]\nSteps: 62%|██████▏ | 432/700 [03:07<01:55, 2.31it/s, loss=0.0611, lr=0.0001]\nSteps: 62%|██████▏ | 433/700 [03:08<01:56, 2.30it/s, loss=0.0611, lr=0.0001]\nSteps: 62%|██████▏ | 433/700 [03:08<01:56, 2.30it/s, loss=0.23, lr=0.0001] \nSteps: 62%|██████▏ | 434/700 [03:08<01:55, 2.30it/s, loss=0.23, lr=0.0001]\nSteps: 62%|██████▏ | 434/700 [03:08<01:55, 2.30it/s, loss=0.221, lr=0.0001]\nSteps: 62%|██████▏ | 435/700 [03:08<01:55, 2.30it/s, loss=0.221, lr=0.0001]\nSteps: 62%|██████▏ | 435/700 [03:08<01:55, 2.30it/s, loss=0.0432, lr=0.0001]\nSteps: 62%|██████▏ | 436/700 [03:09<01:54, 2.31it/s, loss=0.0432, lr=0.0001]\nSteps: 62%|██████▏ | 436/700 [03:09<01:54, 2.31it/s, loss=0.127, lr=0.0001] \nSteps: 62%|██████▏ | 437/700 [03:09<01:53, 2.31it/s, loss=0.127, lr=0.0001]\nSteps: 62%|██████▏ | 437/700 [03:09<01:53, 2.31it/s, loss=0.121, lr=0.0001]\nSteps: 63%|██████▎ | 438/700 [03:10<01:53, 2.31it/s, loss=0.121, lr=0.0001]\nSteps: 63%|██████▎ | 438/700 [03:10<01:53, 2.31it/s, loss=0.104, lr=0.0001]\nSteps: 63%|██████▎ | 439/700 [03:10<01:53, 2.31it/s, loss=0.104, lr=0.0001]\nSteps: 63%|██████▎ | 439/700 [03:10<01:53, 2.31it/s, loss=0.0318, lr=0.0001]\nSteps: 63%|██████▎ | 440/700 [03:11<01:52, 2.31it/s, loss=0.0318, lr=0.0001]\nSteps: 63%|██████▎ | 440/700 [03:11<01:52, 2.31it/s, loss=0.109, lr=0.0001] \nSteps: 63%|██████▎ | 441/700 [03:11<01:52, 2.31it/s, loss=0.109, lr=0.0001]\nSteps: 63%|██████▎ | 441/700 [03:11<01:52, 2.31it/s, loss=0.0869, lr=0.0001]\nSteps: 63%|██████▎ | 442/700 [03:11<01:51, 2.31it/s, loss=0.0869, lr=0.0001]\nSteps: 63%|██████▎ | 442/700 [03:12<01:51, 2.31it/s, loss=0.0479, lr=0.0001]\nSteps: 63%|██████▎ | 443/700 [03:12<01:51, 2.31it/s, loss=0.0479, lr=0.0001]\nSteps: 63%|██████▎ | 443/700 [03:12<01:51, 2.31it/s, loss=0.0615, lr=0.0001]\nSteps: 63%|██████▎ | 444/700 [03:12<01:50, 2.31it/s, loss=0.0615, lr=0.0001]\nSteps: 63%|██████▎ | 444/700 [03:12<01:50, 2.31it/s, loss=0.0695, lr=0.0001]\nSteps: 64%|██████▎ | 445/700 [03:13<01:50, 2.31it/s, loss=0.0695, lr=0.0001]\nSteps: 64%|██████▎ | 445/700 [03:13<01:50, 2.31it/s, loss=0.109, lr=0.0001] \nSteps: 64%|██████▎ | 446/700 [03:13<01:49, 2.31it/s, loss=0.109, lr=0.0001]\nSteps: 64%|██████▎ | 446/700 [03:13<01:49, 2.31it/s, loss=0.155, lr=0.0001]\nSteps: 64%|██████▍ | 447/700 [03:14<01:49, 2.31it/s, loss=0.155, lr=0.0001]\nSteps: 64%|██████▍ | 447/700 [03:14<01:49, 2.31it/s, loss=0.0106, lr=0.0001]\nSteps: 64%|██████▍ | 448/700 [03:14<01:49, 2.31it/s, loss=0.0106, lr=0.0001]\nSteps: 64%|██████▍ | 448/700 [03:14<01:49, 2.31it/s, loss=0.176, lr=0.0001] \nSteps: 64%|██████▍ | 449/700 [03:15<01:49, 2.30it/s, loss=0.176, lr=0.0001]\nSteps: 64%|██████▍ | 449/700 [03:15<01:49, 2.30it/s, loss=0.193, lr=0.0001]\nSteps: 64%|██████▍ | 450/700 [03:15<01:48, 2.30it/s, loss=0.193, lr=0.0001]\nSteps: 64%|██████▍ | 450/700 [03:15<01:48, 2.30it/s, loss=0.104, lr=0.0001]\nSteps: 64%|██████▍ | 451/700 [03:15<01:47, 2.31it/s, loss=0.104, lr=0.0001]\nSteps: 64%|██████▍ | 451/700 [03:15<01:47, 2.31it/s, loss=0.0734, lr=0.0001]\nSteps: 65%|██████▍ | 452/700 [03:16<01:47, 2.31it/s, loss=0.0734, lr=0.0001]\nSteps: 65%|██████▍ | 452/700 [03:16<01:47, 2.31it/s, loss=0.272, lr=0.0001] \nSteps: 65%|██████▍ | 453/700 [03:16<01:47, 2.31it/s, loss=0.272, lr=0.0001]\nSteps: 65%|██████▍ | 453/700 [03:16<01:47, 2.31it/s, loss=0.0395, lr=0.0001]\nSteps: 65%|██████▍ | 454/700 [03:17<01:46, 2.31it/s, loss=0.0395, lr=0.0001]\nSteps: 65%|██████▍ | 454/700 [03:17<01:46, 2.31it/s, loss=0.118, lr=0.0001] \nSteps: 65%|██████▌ | 455/700 [03:17<01:46, 2.31it/s, loss=0.118, lr=0.0001]\nSteps: 65%|██████▌ | 455/700 [03:17<01:46, 2.31it/s, loss=0.0978, lr=0.0001]\nSteps: 65%|██████▌ | 456/700 [03:18<01:45, 2.31it/s, loss=0.0978, lr=0.0001]\nSteps: 65%|██████▌ | 456/700 [03:18<01:45, 2.31it/s, loss=0.152, lr=0.0001] \nSteps: 65%|██████▌ | 457/700 [03:18<01:45, 2.31it/s, loss=0.152, lr=0.0001]\nSteps: 65%|██████▌ | 457/700 [03:18<01:45, 2.31it/s, loss=0.095, lr=0.0001]\nSteps: 65%|██████▌ | 458/700 [03:18<01:44, 2.31it/s, loss=0.095, lr=0.0001]\nSteps: 65%|██████▌ | 458/700 [03:18<01:44, 2.31it/s, loss=0.178, lr=0.0001]\nSteps: 66%|██████▌ | 459/700 [03:19<01:44, 2.31it/s, loss=0.178, lr=0.0001]\nSteps: 66%|██████▌ | 459/700 [03:19<01:44, 2.31it/s, loss=0.161, lr=0.0001]\nSteps: 66%|██████▌ | 460/700 [03:19<01:43, 2.31it/s, loss=0.161, lr=0.0001]\nSteps: 66%|██████▌ | 460/700 [03:19<01:43, 2.31it/s, loss=0.135, lr=0.0001]\nSteps: 66%|██████▌ | 461/700 [03:20<01:43, 2.31it/s, loss=0.135, lr=0.0001]\nSteps: 66%|██████▌ | 461/700 [03:20<01:43, 2.31it/s, loss=0.165, lr=0.0001]\nSteps: 66%|██████▌ | 462/700 [03:20<01:43, 2.31it/s, loss=0.165, lr=0.0001]\nSteps: 66%|██████▌ | 462/700 [03:20<01:43, 2.31it/s, loss=0.162, lr=0.0001]\nSteps: 66%|██████▌ | 463/700 [03:21<01:42, 2.31it/s, loss=0.162, lr=0.0001]\nSteps: 66%|██████▌ | 463/700 [03:21<01:42, 2.31it/s, loss=0.177, lr=0.0001]\nSteps: 66%|██████▋ | 464/700 [03:21<01:42, 2.31it/s, loss=0.177, lr=0.0001]\nSteps: 66%|██████▋ | 464/700 [03:21<01:42, 2.31it/s, loss=0.158, lr=0.0001]\nSteps: 66%|██████▋ | 465/700 [03:21<01:42, 2.30it/s, loss=0.158, lr=0.0001]\nSteps: 66%|██████▋ | 465/700 [03:21<01:42, 2.30it/s, loss=0.203, lr=0.0001]\nSteps: 67%|██████▋ | 466/700 [03:22<01:41, 2.30it/s, loss=0.203, lr=0.0001]\nSteps: 67%|██████▋ | 466/700 [03:22<01:41, 2.30it/s, loss=0.0449, lr=0.0001]\nSteps: 67%|██████▋ | 467/700 [03:22<01:41, 2.31it/s, loss=0.0449, lr=0.0001]\nSteps: 67%|██████▋ | 467/700 [03:22<01:41, 2.31it/s, loss=0.259, lr=0.0001] \nSteps: 67%|██████▋ | 468/700 [03:23<01:40, 2.31it/s, loss=0.259, lr=0.0001]\nSteps: 67%|██████▋ | 468/700 [03:23<01:40, 2.31it/s, loss=0.177, lr=0.0001]\nSteps: 67%|██████▋ | 469/700 [03:23<01:40, 2.31it/s, loss=0.177, lr=0.0001]\nSteps: 67%|██████▋ | 469/700 [03:23<01:40, 2.31it/s, loss=0.118, lr=0.0001]\nSteps: 67%|██████▋ | 470/700 [03:24<01:39, 2.31it/s, loss=0.118, lr=0.0001]\nSteps: 67%|██████▋ | 470/700 [03:24<01:39, 2.31it/s, loss=0.164, lr=0.0001]\nSteps: 67%|██████▋ | 471/700 [03:24<01:39, 2.31it/s, loss=0.164, lr=0.0001]\nSteps: 67%|██████▋ | 471/700 [03:24<01:39, 2.31it/s, loss=0.0637, lr=0.0001]\nSteps: 67%|██████▋ | 472/700 [03:24<01:38, 2.31it/s, loss=0.0637, lr=0.0001]\nSteps: 67%|██████▋ | 472/700 [03:25<01:38, 2.31it/s, loss=0.101, lr=0.0001] \nSteps: 68%|██████▊ | 473/700 [03:25<01:38, 2.31it/s, loss=0.101, lr=0.0001]\nSteps: 68%|██████▊ | 473/700 [03:25<01:38, 2.31it/s, loss=0.197, lr=0.0001]\nSteps: 68%|██████▊ | 474/700 [03:25<01:37, 2.31it/s, loss=0.197, lr=0.0001]\nSteps: 68%|██████▊ | 474/700 [03:25<01:37, 2.31it/s, loss=0.246, lr=0.0001]\nSteps: 68%|██████▊ | 475/700 [03:26<01:37, 2.31it/s, loss=0.246, lr=0.0001]\nSteps: 68%|██████▊ | 475/700 [03:26<01:37, 2.31it/s, loss=0.0803, lr=0.0001]\nSteps: 68%|██████▊ | 476/700 [03:26<01:36, 2.31it/s, loss=0.0803, lr=0.0001]\nSteps: 68%|██████▊ | 476/700 [03:26<01:36, 2.31it/s, loss=0.131, lr=0.0001] \nSteps: 68%|██████▊ | 477/700 [03:27<01:36, 2.31it/s, loss=0.131, lr=0.0001]\nSteps: 68%|██████▊ | 477/700 [03:27<01:36, 2.31it/s, loss=0.0571, lr=0.0001]\nSteps: 68%|██████▊ | 478/700 [03:27<01:36, 2.31it/s, loss=0.0571, lr=0.0001]\nSteps: 68%|██████▊ | 478/700 [03:27<01:36, 2.31it/s, loss=0.126, lr=0.0001] \nSteps: 68%|██████▊ | 479/700 [03:27<01:35, 2.31it/s, loss=0.126, lr=0.0001]\nSteps: 68%|██████▊ | 479/700 [03:28<01:35, 2.31it/s, loss=0.148, lr=0.0001]\nSteps: 69%|██████▊ | 480/700 [03:28<01:35, 2.31it/s, loss=0.148, lr=0.0001]\nSteps: 69%|██████▊ | 480/700 [03:28<01:35, 2.31it/s, loss=0.0757, lr=0.0001]\nSteps: 69%|██████▊ | 481/700 [03:28<01:35, 2.30it/s, loss=0.0757, lr=0.0001]\nSteps: 69%|██████▊ | 481/700 [03:28<01:35, 2.30it/s, loss=0.118, lr=0.0001] \nSteps: 69%|██████▉ | 482/700 [03:29<01:34, 2.30it/s, loss=0.118, lr=0.0001]\nSteps: 69%|██████▉ | 482/700 [03:29<01:34, 2.30it/s, loss=0.233, lr=0.0001]\nSteps: 69%|██████▉ | 483/700 [03:29<01:34, 2.30it/s, loss=0.233, lr=0.0001]\nSteps: 69%|██████▉ | 483/700 [03:29<01:34, 2.30it/s, loss=0.146, lr=0.0001]\nSteps: 69%|██████▉ | 484/700 [03:30<01:33, 2.31it/s, loss=0.146, lr=0.0001]\nSteps: 69%|██████▉ | 484/700 [03:30<01:33, 2.31it/s, loss=0.129, lr=0.0001]\nSteps: 69%|██████▉ | 485/700 [03:30<01:33, 2.31it/s, loss=0.129, lr=0.0001]\nSteps: 69%|██████▉ | 485/700 [03:30<01:33, 2.31it/s, loss=0.179, lr=0.0001]\nSteps: 69%|██████▉ | 486/700 [03:31<01:32, 2.31it/s, loss=0.179, lr=0.0001]\nSteps: 69%|██████▉ | 486/700 [03:31<01:32, 2.31it/s, loss=0.0674, lr=0.0001]\nSteps: 70%|██████▉ | 487/700 [03:31<01:32, 2.31it/s, loss=0.0674, lr=0.0001]\nSteps: 70%|██████▉ | 487/700 [03:31<01:32, 2.31it/s, loss=0.187, lr=0.0001] \nSteps: 70%|██████▉ | 488/700 [03:31<01:31, 2.31it/s, loss=0.187, lr=0.0001]\nSteps: 70%|██████▉ | 488/700 [03:31<01:31, 2.31it/s, loss=0.106, lr=0.0001]\nSteps: 70%|██████▉ | 489/700 [03:32<01:31, 2.31it/s, loss=0.106, lr=0.0001]\nSteps: 70%|██████▉ | 489/700 [03:32<01:31, 2.31it/s, loss=0.0499, lr=0.0001]\nSteps: 70%|███████ | 490/700 [03:32<01:30, 2.31it/s, loss=0.0499, lr=0.0001]\nSteps: 70%|███████ | 490/700 [03:32<01:30, 2.31it/s, loss=0.11, lr=0.0001] \nSteps: 70%|███████ | 491/700 [03:33<01:30, 2.31it/s, loss=0.11, lr=0.0001]\nSteps: 70%|███████ | 491/700 [03:33<01:30, 2.31it/s, loss=0.0632, lr=0.0001]\nSteps: 70%|███████ | 492/700 [03:33<01:30, 2.31it/s, loss=0.0632, lr=0.0001]\nSteps: 70%|███████ | 492/700 [03:33<01:30, 2.31it/s, loss=0.0964, lr=0.0001]\nSteps: 70%|███████ | 493/700 [03:34<01:29, 2.31it/s, loss=0.0964, lr=0.0001]\nSteps: 70%|███████ | 493/700 [03:34<01:29, 2.31it/s, loss=0.0333, lr=0.0001]\nSteps: 71%|███████ | 494/700 [03:34<01:29, 2.31it/s, loss=0.0333, lr=0.0001]\nSteps: 71%|███████ | 494/700 [03:34<01:29, 2.31it/s, loss=0.094, lr=0.0001] \nSteps: 71%|███████ | 495/700 [03:34<01:28, 2.31it/s, loss=0.094, lr=0.0001]\nSteps: 71%|███████ | 495/700 [03:34<01:28, 2.31it/s, loss=0.115, lr=0.0001]\nSteps: 71%|███████ | 496/700 [03:35<01:28, 2.31it/s, loss=0.115, lr=0.0001]\nSteps: 71%|███████ | 496/700 [03:35<01:28, 2.31it/s, loss=0.0327, lr=0.0001]\nSteps: 71%|███████ | 497/700 [03:35<01:28, 2.30it/s, loss=0.0327, lr=0.0001]\nSteps: 71%|███████ | 497/700 [03:35<01:28, 2.30it/s, loss=0.14, lr=0.0001] \nSteps: 71%|███████ | 498/700 [03:36<01:27, 2.30it/s, loss=0.14, lr=0.0001]\nSteps: 71%|███████ | 498/700 [03:36<01:27, 2.30it/s, loss=0.0866, lr=0.0001]\nSteps: 71%|███████▏ | 499/700 [03:36<01:27, 2.31it/s, loss=0.0866, lr=0.0001]\nSteps: 71%|███████▏ | 499/700 [03:36<01:27, 2.31it/s, loss=0.132, lr=0.0001] \nSteps: 71%|███████▏ | 500/700 [03:37<01:26, 2.31it/s, loss=0.132, lr=0.0001]\nSteps: 71%|███████▏ | 500/700 [03:37<01:26, 2.31it/s, loss=0.119, lr=0.0001]\nSteps: 72%|███████▏ | 501/700 [03:37<01:26, 2.31it/s, loss=0.119, lr=0.0001]\nSteps: 72%|███████▏ | 501/700 [03:37<01:26, 2.31it/s, loss=0.129, lr=0.0001]\nSteps: 72%|███████▏ | 502/700 [03:37<01:25, 2.31it/s, loss=0.129, lr=0.0001]\nSteps: 72%|███████▏ | 502/700 [03:37<01:25, 2.31it/s, loss=0.128, lr=0.0001]\nSteps: 72%|███████▏ | 503/700 [03:38<01:25, 2.31it/s, loss=0.128, lr=0.0001]\nSteps: 72%|███████▏ | 503/700 [03:38<01:25, 2.31it/s, loss=0.121, lr=0.0001]\nSteps: 72%|███████▏ | 504/700 [03:38<01:24, 2.31it/s, loss=0.121, lr=0.0001]\nSteps: 72%|███████▏ | 504/700 [03:38<01:24, 2.31it/s, loss=0.134, lr=0.0001]\nSteps: 72%|███████▏ | 505/700 [03:39<01:24, 2.31it/s, loss=0.134, lr=0.0001]\nSteps: 72%|███████▏ | 505/700 [03:39<01:24, 2.31it/s, loss=0.108, lr=0.0001]\nSteps: 72%|███████▏ | 506/700 [03:39<01:24, 2.31it/s, loss=0.108, lr=0.0001]\nSteps: 72%|███████▏ | 506/700 [03:39<01:24, 2.31it/s, loss=0.06, lr=0.0001] \nSteps: 72%|███████▏ | 507/700 [03:40<01:23, 2.31it/s, loss=0.06, lr=0.0001]\nSteps: 72%|███████▏ | 507/700 [03:40<01:23, 2.31it/s, loss=0.144, lr=0.0001]\nSteps: 73%|███████▎ | 508/700 [03:40<01:23, 2.31it/s, loss=0.144, lr=0.0001]\nSteps: 73%|███████▎ | 508/700 [03:40<01:23, 2.31it/s, loss=0.0841, lr=0.0001]\nSteps: 73%|███████▎ | 509/700 [03:40<01:22, 2.31it/s, loss=0.0841, lr=0.0001]\nSteps: 73%|███████▎ | 509/700 [03:41<01:22, 2.31it/s, loss=0.104, lr=0.0001] \nSteps: 73%|███████▎ | 510/700 [03:41<01:22, 2.31it/s, loss=0.104, lr=0.0001]\nSteps: 73%|███████▎ | 510/700 [03:41<01:22, 2.31it/s, loss=0.0856, lr=0.0001]\nSteps: 73%|███████▎ | 511/700 [03:41<01:21, 2.31it/s, loss=0.0856, lr=0.0001]\nSteps: 73%|███████▎ | 511/700 [03:41<01:21, 2.31it/s, loss=0.16, lr=0.0001] \nSteps: 73%|███████▎ | 512/700 [03:42<01:21, 2.31it/s, loss=0.16, lr=0.0001]\nSteps: 73%|███████▎ | 512/700 [03:42<01:21, 2.31it/s, loss=0.0192, lr=0.0001]\nSteps: 73%|███████▎ | 513/700 [03:42<01:21, 2.30it/s, loss=0.0192, lr=0.0001]\nSteps: 73%|███████▎ | 513/700 [03:42<01:21, 2.30it/s, loss=0.0949, lr=0.0001]\nSteps: 73%|███████▎ | 514/700 [03:43<01:20, 2.30it/s, loss=0.0949, lr=0.0001]\nSteps: 73%|███████▎ | 514/700 [03:43<01:20, 2.30it/s, loss=0.223, lr=0.0001] \nSteps: 74%|███████▎ | 515/700 [03:43<01:20, 2.30it/s, loss=0.223, lr=0.0001]\nSteps: 74%|███████▎ | 515/700 [03:43<01:20, 2.30it/s, loss=0.164, lr=0.0001]\nSteps: 74%|███████▎ | 516/700 [03:44<01:19, 2.31it/s, loss=0.164, lr=0.0001]\nSteps: 74%|███████▎ | 516/700 [03:44<01:19, 2.31it/s, loss=0.0825, lr=0.0001]\nSteps: 74%|███████▍ | 517/700 [03:44<01:19, 2.31it/s, loss=0.0825, lr=0.0001]\nSteps: 74%|███████▍ | 517/700 [03:44<01:19, 2.31it/s, loss=0.133, lr=0.0001] \nSteps: 74%|███████▍ | 518/700 [03:44<01:18, 2.31it/s, loss=0.133, lr=0.0001]\nSteps: 74%|███████▍ | 518/700 [03:44<01:18, 2.31it/s, loss=0.0874, lr=0.0001]\nSteps: 74%|███████▍ | 519/700 [03:45<01:18, 2.31it/s, loss=0.0874, lr=0.0001]\nSteps: 74%|███████▍ | 519/700 [03:45<01:18, 2.31it/s, loss=0.162, lr=0.0001] \nSteps: 74%|███████▍ | 520/700 [03:45<01:18, 2.30it/s, loss=0.162, lr=0.0001]\nSteps: 74%|███████▍ | 520/700 [03:45<01:18, 2.30it/s, loss=0.102, lr=0.0001]\nSteps: 74%|███████▍ | 521/700 [03:46<01:17, 2.31it/s, loss=0.102, lr=0.0001]\nSteps: 74%|███████▍ | 521/700 [03:46<01:17, 2.31it/s, loss=0.145, lr=0.0001]\nSteps: 75%|███████▍ | 522/700 [03:46<01:17, 2.31it/s, loss=0.145, lr=0.0001]\nSteps: 75%|███████▍ | 522/700 [03:46<01:17, 2.31it/s, loss=0.0441, lr=0.0001]\nSteps: 75%|███████▍ | 523/700 [03:47<01:16, 2.31it/s, loss=0.0441, lr=0.0001]\nSteps: 75%|███████▍ | 523/700 [03:47<01:16, 2.31it/s, loss=0.119, lr=0.0001] \nSteps: 75%|███████▍ | 524/700 [03:47<01:16, 2.31it/s, loss=0.119, lr=0.0001]\nSteps: 75%|███████▍ | 524/700 [03:47<01:16, 2.31it/s, loss=0.0832, lr=0.0001]\nSteps: 75%|███████▌ | 525/700 [03:47<01:16, 2.30it/s, loss=0.0832, lr=0.0001]\nSteps: 75%|███████▌ | 525/700 [03:47<01:16, 2.30it/s, loss=0.136, lr=0.0001] \nSteps: 75%|███████▌ | 526/700 [03:48<01:15, 2.30it/s, loss=0.136, lr=0.0001]\nSteps: 75%|███████▌ | 526/700 [03:48<01:15, 2.30it/s, loss=0.124, lr=0.0001]\nSteps: 75%|███████▌ | 527/700 [03:48<01:15, 2.30it/s, loss=0.124, lr=0.0001]\nSteps: 75%|███████▌ | 527/700 [03:48<01:15, 2.30it/s, loss=0.0421, lr=0.0001]\nSteps: 75%|███████▌ | 528/700 [03:49<01:14, 2.31it/s, loss=0.0421, lr=0.0001]\nSteps: 75%|███████▌ | 528/700 [03:49<01:14, 2.31it/s, loss=0.0114, lr=0.0001]\nSteps: 76%|███████▌ | 529/700 [03:49<01:14, 2.30it/s, loss=0.0114, lr=0.0001]\nSteps: 76%|███████▌ | 529/700 [03:49<01:14, 2.30it/s, loss=0.134, lr=0.0001] \nSteps: 76%|███████▌ | 530/700 [03:50<01:13, 2.30it/s, loss=0.134, lr=0.0001]\nSteps: 76%|███████▌ | 530/700 [03:50<01:13, 2.30it/s, loss=0.0501, lr=0.0001]\nSteps: 76%|███████▌ | 531/700 [03:50<01:13, 2.30it/s, loss=0.0501, lr=0.0001]\nSteps: 76%|███████▌ | 531/700 [03:50<01:13, 2.30it/s, loss=0.0874, lr=0.0001]\nSteps: 76%|███████▌ | 532/700 [03:50<01:12, 2.31it/s, loss=0.0874, lr=0.0001]\nSteps: 76%|███████▌ | 532/700 [03:51<01:12, 2.31it/s, loss=0.0677, lr=0.0001]\nSteps: 76%|███████▌ | 533/700 [03:51<01:12, 2.31it/s, loss=0.0677, lr=0.0001]\nSteps: 76%|███████▌ | 533/700 [03:51<01:12, 2.31it/s, loss=0.299, lr=0.0001] \nSteps: 76%|███████▋ | 534/700 [03:51<01:12, 2.30it/s, loss=0.299, lr=0.0001]\nSteps: 76%|███████▋ | 534/700 [03:51<01:12, 2.30it/s, loss=0.12, lr=0.0001] \nSteps: 76%|███████▋ | 535/700 [03:52<01:11, 2.31it/s, loss=0.12, lr=0.0001]\nSteps: 76%|███████▋ | 535/700 [03:52<01:11, 2.31it/s, loss=0.279, lr=0.0001]\nSteps: 77%|███████▋ | 536/700 [03:52<01:11, 2.31it/s, loss=0.279, lr=0.0001]\nSteps: 77%|███████▋ | 536/700 [03:52<01:11, 2.31it/s, loss=0.109, lr=0.0001]\nSteps: 77%|███████▋ | 537/700 [03:53<01:10, 2.31it/s, loss=0.109, lr=0.0001]\nSteps: 77%|███████▋ | 537/700 [03:53<01:10, 2.31it/s, loss=0.0592, lr=0.0001]\nSteps: 77%|███████▋ | 538/700 [03:53<01:10, 2.31it/s, loss=0.0592, lr=0.0001]\nSteps: 77%|███████▋ | 538/700 [03:53<01:10, 2.31it/s, loss=0.101, lr=0.0001] \nSteps: 77%|███████▋ | 539/700 [03:54<01:09, 2.30it/s, loss=0.101, lr=0.0001]\nSteps: 77%|███████▋ | 539/700 [03:54<01:09, 2.30it/s, loss=0.0438, lr=0.0001]\nSteps: 77%|███████▋ | 540/700 [03:54<01:09, 2.30it/s, loss=0.0438, lr=0.0001]\nSteps: 77%|███████▋ | 540/700 [03:54<01:09, 2.30it/s, loss=0.101, lr=0.0001] \nSteps: 77%|███████▋ | 541/700 [03:54<01:09, 2.30it/s, loss=0.101, lr=0.0001]\nSteps: 77%|███████▋ | 541/700 [03:54<01:09, 2.30it/s, loss=0.139, lr=0.0001]\nSteps: 77%|███████▋ | 542/700 [03:55<01:08, 2.30it/s, loss=0.139, lr=0.0001]\nSteps: 77%|███████▋ | 542/700 [03:55<01:08, 2.30it/s, loss=0.198, lr=0.0001]\nSteps: 78%|███████▊ | 543/700 [03:55<01:08, 2.30it/s, loss=0.198, lr=0.0001]\nSteps: 78%|███████▊ | 543/700 [03:55<01:08, 2.30it/s, loss=0.171, lr=0.0001]\nSteps: 78%|███████▊ | 544/700 [03:56<01:07, 2.31it/s, loss=0.171, lr=0.0001]\nSteps: 78%|███████▊ | 544/700 [03:56<01:07, 2.31it/s, loss=0.11, lr=0.0001] \nSteps: 78%|███████▊ | 545/700 [03:56<01:07, 2.30it/s, loss=0.11, lr=0.0001]\nSteps: 78%|███████▊ | 545/700 [03:56<01:07, 2.30it/s, loss=0.117, lr=0.0001]\nSteps: 78%|███████▊ | 546/700 [03:57<01:06, 2.30it/s, loss=0.117, lr=0.0001]\nSteps: 78%|███████▊ | 546/700 [03:57<01:06, 2.30it/s, loss=0.0327, lr=0.0001]\nSteps: 78%|███████▊ | 547/700 [03:57<01:06, 2.30it/s, loss=0.0327, lr=0.0001]\nSteps: 78%|███████▊ | 547/700 [03:57<01:06, 2.30it/s, loss=0.0536, lr=0.0001]\nSteps: 78%|███████▊ | 548/700 [03:57<01:05, 2.31it/s, loss=0.0536, lr=0.0001]\nSteps: 78%|███████▊ | 548/700 [03:57<01:05, 2.31it/s, loss=0.1, lr=0.0001] \nSteps: 78%|███████▊ | 549/700 [03:58<01:05, 2.31it/s, loss=0.1, lr=0.0001]\nSteps: 78%|███████▊ | 549/700 [03:58<01:05, 2.31it/s, loss=0.113, lr=0.0001]\nSteps: 79%|███████▊ | 550/700 [03:58<01:04, 2.31it/s, loss=0.113, lr=0.0001]\nSteps: 79%|███████▊ | 550/700 [03:58<01:04, 2.31it/s, loss=0.0923, lr=0.0001]\nSteps: 79%|███████▊ | 551/700 [03:59<01:04, 2.31it/s, loss=0.0923, lr=0.0001]\nSteps: 79%|███████▊ | 551/700 [03:59<01:04, 2.31it/s, loss=0.13, lr=0.0001] \nSteps: 79%|███████▉ | 552/700 [03:59<01:04, 2.31it/s, loss=0.13, lr=0.0001]\nSteps: 79%|███████▉ | 552/700 [03:59<01:04, 2.31it/s, loss=0.0919, lr=0.0001]\nSteps: 79%|███████▉ | 553/700 [04:00<01:03, 2.31it/s, loss=0.0919, lr=0.0001]\nSteps: 79%|███████▉ | 553/700 [04:00<01:03, 2.31it/s, loss=0.125, lr=0.0001] \nSteps: 79%|███████▉ | 554/700 [04:00<01:03, 2.31it/s, loss=0.125, lr=0.0001]\nSteps: 79%|███████▉ | 554/700 [04:00<01:03, 2.31it/s, loss=0.0459, lr=0.0001]\nSteps: 79%|███████▉ | 555/700 [04:00<01:02, 2.31it/s, loss=0.0459, lr=0.0001]\nSteps: 79%|███████▉ | 555/700 [04:00<01:02, 2.31it/s, loss=0.178, lr=0.0001] \nSteps: 79%|███████▉ | 556/700 [04:01<01:02, 2.31it/s, loss=0.178, lr=0.0001]\nSteps: 79%|███████▉ | 556/700 [04:01<01:02, 2.31it/s, loss=0.0118, lr=0.0001]\nSteps: 80%|███████▉ | 557/700 [04:01<01:01, 2.31it/s, loss=0.0118, lr=0.0001]\nSteps: 80%|███████▉ | 557/700 [04:01<01:01, 2.31it/s, loss=0.105, lr=0.0001] \nSteps: 80%|███████▉ | 558/700 [04:02<01:01, 2.31it/s, loss=0.105, lr=0.0001]\nSteps: 80%|███████▉ | 558/700 [04:02<01:01, 2.31it/s, loss=0.141, lr=0.0001]\nSteps: 80%|███████▉ | 559/700 [04:02<01:01, 2.31it/s, loss=0.141, lr=0.0001]\nSteps: 80%|███████▉ | 559/700 [04:02<01:01, 2.31it/s, loss=0.135, lr=0.0001]\nSteps: 80%|████████ | 560/700 [04:03<01:00, 2.31it/s, loss=0.135, lr=0.0001]\nSteps: 80%|████████ | 560/700 [04:03<01:00, 2.31it/s, loss=0.118, lr=0.0001]\nSteps: 80%|████████ | 561/700 [04:03<01:00, 2.30it/s, loss=0.118, lr=0.0001]\nSteps: 80%|████████ | 561/700 [04:03<01:00, 2.30it/s, loss=0.162, lr=0.0001]\nSteps: 80%|████████ | 562/700 [04:03<00:59, 2.30it/s, loss=0.162, lr=0.0001]\nSteps: 80%|████████ | 562/700 [04:04<00:59, 2.30it/s, loss=0.0823, lr=0.0001]\nSteps: 80%|████████ | 563/700 [04:04<00:59, 2.30it/s, loss=0.0823, lr=0.0001]\nSteps: 80%|████████ | 563/700 [04:04<00:59, 2.30it/s, loss=0.182, lr=0.0001] \nSteps: 81%|████████ | 564/700 [04:04<00:59, 2.30it/s, loss=0.182, lr=0.0001]\nSteps: 81%|████████ | 564/700 [04:04<00:59, 2.30it/s, loss=0.118, lr=0.0001]\nSteps: 81%|████████ | 565/700 [04:05<00:58, 2.31it/s, loss=0.118, lr=0.0001]\nSteps: 81%|████████ | 565/700 [04:05<00:58, 2.31it/s, loss=0.0902, lr=0.0001]\nSteps: 81%|████████ | 566/700 [04:05<00:58, 2.31it/s, loss=0.0902, lr=0.0001]\nSteps: 81%|████████ | 566/700 [04:05<00:58, 2.31it/s, loss=0.0953, lr=0.0001]\nSteps: 81%|████████ | 567/700 [04:06<00:57, 2.31it/s, loss=0.0953, lr=0.0001]\nSteps: 81%|████████ | 567/700 [04:06<00:57, 2.31it/s, loss=0.126, lr=0.0001] \nSteps: 81%|████████ | 568/700 [04:06<00:57, 2.31it/s, loss=0.126, lr=0.0001]\nSteps: 81%|████████ | 568/700 [04:06<00:57, 2.31it/s, loss=0.0431, lr=0.0001]\nSteps: 81%|████████▏ | 569/700 [04:07<00:56, 2.31it/s, loss=0.0431, lr=0.0001]\nSteps: 81%|████████▏ | 569/700 [04:07<00:56, 2.31it/s, loss=0.0227, lr=0.0001]\nSteps: 81%|████████▏ | 570/700 [04:07<00:56, 2.31it/s, loss=0.0227, lr=0.0001]\nSteps: 81%|████████▏ | 570/700 [04:07<00:56, 2.31it/s, loss=0.192, lr=0.0001] \nSteps: 82%|████████▏ | 571/700 [04:07<00:55, 2.31it/s, loss=0.192, lr=0.0001]\nSteps: 82%|████████▏ | 571/700 [04:07<00:55, 2.31it/s, loss=0.189, lr=0.0001]\nSteps: 82%|████████▏ | 572/700 [04:08<00:55, 2.31it/s, loss=0.189, lr=0.0001]\nSteps: 82%|████████▏ | 572/700 [04:08<00:55, 2.31it/s, loss=0.116, lr=0.0001]\nSteps: 82%|████████▏ | 573/700 [04:08<00:55, 2.31it/s, loss=0.116, lr=0.0001]\nSteps: 82%|████████▏ | 573/700 [04:08<00:55, 2.31it/s, loss=0.156, lr=0.0001]\nSteps: 82%|████████▏ | 574/700 [04:09<00:54, 2.31it/s, loss=0.156, lr=0.0001]\nSteps: 82%|████████▏ | 574/700 [04:09<00:54, 2.31it/s, loss=0.133, lr=0.0001]\nSteps: 82%|████████▏ | 575/700 [04:09<00:54, 2.31it/s, loss=0.133, lr=0.0001]\nSteps: 82%|████████▏ | 575/700 [04:09<00:54, 2.31it/s, loss=0.0888, lr=0.0001]\nSteps: 82%|████████▏ | 576/700 [04:10<00:53, 2.31it/s, loss=0.0888, lr=0.0001]\nSteps: 82%|████████▏ | 576/700 [04:10<00:53, 2.31it/s, loss=0.128, lr=0.0001] \nSteps: 82%|████████▏ | 577/700 [04:10<00:53, 2.30it/s, loss=0.128, lr=0.0001]\nSteps: 82%|████████▏ | 577/700 [04:10<00:53, 2.30it/s, loss=0.154, lr=0.0001]\nSteps: 83%|████████▎ | 578/700 [04:10<00:53, 2.30it/s, loss=0.154, lr=0.0001]\nSteps: 83%|████████▎ | 578/700 [04:10<00:53, 2.30it/s, loss=0.062, lr=0.0001]\nSteps: 83%|████████▎ | 579/700 [04:11<00:52, 2.30it/s, loss=0.062, lr=0.0001]\nSteps: 83%|████████▎ | 579/700 [04:11<00:52, 2.30it/s, loss=0.11, lr=0.0001] \nSteps: 83%|████████▎ | 580/700 [04:11<00:52, 2.31it/s, loss=0.11, lr=0.0001]\nSteps: 83%|████████▎ | 580/700 [04:11<00:52, 2.31it/s, loss=0.0333, lr=0.0001]\nSteps: 83%|████████▎ | 581/700 [04:12<00:51, 2.31it/s, loss=0.0333, lr=0.0001]\nSteps: 83%|████████▎ | 581/700 [04:12<00:51, 2.31it/s, loss=0.0944, lr=0.0001]\nSteps: 83%|████████▎ | 582/700 [04:12<00:51, 2.31it/s, loss=0.0944, lr=0.0001]\nSteps: 83%|████████▎ | 582/700 [04:12<00:51, 2.31it/s, loss=0.106, lr=0.0001] \nSteps: 83%|████████▎ | 583/700 [04:13<00:50, 2.31it/s, loss=0.106, lr=0.0001]\nSteps: 83%|████████▎ | 583/700 [04:13<00:50, 2.31it/s, loss=0.125, lr=0.0001]\nSteps: 83%|████████▎ | 584/700 [04:13<00:50, 2.31it/s, loss=0.125, lr=0.0001]\nSteps: 83%|████████▎ | 584/700 [04:13<00:50, 2.31it/s, loss=0.0806, lr=0.0001]\nSteps: 84%|████████▎ | 585/700 [04:13<00:49, 2.31it/s, loss=0.0806, lr=0.0001]\nSteps: 84%|████████▎ | 585/700 [04:13<00:49, 2.31it/s, loss=0.157, lr=0.0001] \nSteps: 84%|████████▎ | 586/700 [04:14<00:49, 2.31it/s, loss=0.157, lr=0.0001]\nSteps: 84%|████████▎ | 586/700 [04:14<00:49, 2.31it/s, loss=0.0135, lr=0.0001]\nSteps: 84%|████████▍ | 587/700 [04:14<00:48, 2.31it/s, loss=0.0135, lr=0.0001]\nSteps: 84%|████████▍ | 587/700 [04:14<00:48, 2.31it/s, loss=0.244, lr=0.0001] \nSteps: 84%|████████▍ | 588/700 [04:15<00:48, 2.31it/s, loss=0.244, lr=0.0001]\nSteps: 84%|████████▍ | 588/700 [04:15<00:48, 2.31it/s, loss=0.148, lr=0.0001]\nSteps: 84%|████████▍ | 589/700 [04:15<00:48, 2.31it/s, loss=0.148, lr=0.0001]\nSteps: 84%|████████▍ | 589/700 [04:15<00:48, 2.31it/s, loss=0.118, lr=0.0001]\nSteps: 84%|████████▍ | 590/700 [04:16<00:47, 2.31it/s, loss=0.118, lr=0.0001]\nSteps: 84%|████████▍ | 590/700 [04:16<00:47, 2.31it/s, loss=0.128, lr=0.0001]\nSteps: 84%|████████▍ | 591/700 [04:16<00:47, 2.31it/s, loss=0.128, lr=0.0001]\nSteps: 84%|████████▍ | 591/700 [04:16<00:47, 2.31it/s, loss=0.148, lr=0.0001]\nSteps: 85%|████████▍ | 592/700 [04:16<00:46, 2.31it/s, loss=0.148, lr=0.0001]\nSteps: 85%|████████▍ | 592/700 [04:17<00:46, 2.31it/s, loss=0.278, lr=0.0001]\nSteps: 85%|████████▍ | 593/700 [04:17<00:46, 2.30it/s, loss=0.278, lr=0.0001]\nSteps: 85%|████████▍ | 593/700 [04:17<00:46, 2.30it/s, loss=0.134, lr=0.0001]\nSteps: 85%|████████▍ | 594/700 [04:17<00:46, 2.30it/s, loss=0.134, lr=0.0001]\nSteps: 85%|████████▍ | 594/700 [04:17<00:46, 2.30it/s, loss=0.0929, lr=0.0001]\nSteps: 85%|████████▌ | 595/700 [04:18<00:45, 2.30it/s, loss=0.0929, lr=0.0001]\nSteps: 85%|████████▌ | 595/700 [04:18<00:45, 2.30it/s, loss=0.102, lr=0.0001] \nSteps: 85%|████████▌ | 596/700 [04:18<00:45, 2.31it/s, loss=0.102, lr=0.0001]\nSteps: 85%|████████▌ | 596/700 [04:18<00:45, 2.31it/s, loss=0.0314, lr=0.0001]\nSteps: 85%|████████▌ | 597/700 [04:19<00:44, 2.31it/s, loss=0.0314, lr=0.0001]\nSteps: 85%|████████▌ | 597/700 [04:19<00:44, 2.31it/s, loss=0.15, lr=0.0001] \nSteps: 85%|████████▌ | 598/700 [04:19<00:44, 2.31it/s, loss=0.15, lr=0.0001]\nSteps: 85%|████████▌ | 598/700 [04:19<00:44, 2.31it/s, loss=0.104, lr=0.0001]\nSteps: 86%|████████▌ | 599/700 [04:20<00:43, 2.31it/s, loss=0.104, lr=0.0001]\nSteps: 86%|████████▌ | 599/700 [04:20<00:43, 2.31it/s, loss=0.0743, lr=0.0001]\nSteps: 86%|████████▌ | 600/700 [04:20<00:43, 2.31it/s, loss=0.0743, lr=0.0001]\nSteps: 86%|████████▌ | 600/700 [04:20<00:43, 2.31it/s, loss=0.128, lr=0.0001] \nSteps: 86%|████████▌ | 601/700 [04:20<00:42, 2.31it/s, loss=0.128, lr=0.0001]\nSteps: 86%|████████▌ | 601/700 [04:20<00:42, 2.31it/s, loss=0.123, lr=0.0001]\nSteps: 86%|████████▌ | 602/700 [04:21<00:42, 2.31it/s, loss=0.123, lr=0.0001]\nSteps: 86%|████████▌ | 602/700 [04:21<00:42, 2.31it/s, loss=0.111, lr=0.0001]\nSteps: 86%|████████▌ | 603/700 [04:21<00:41, 2.31it/s, loss=0.111, lr=0.0001]\nSteps: 86%|████████▌ | 603/700 [04:21<00:41, 2.31it/s, loss=0.071, lr=0.0001]\nSteps: 86%|████████▋ | 604/700 [04:22<00:41, 2.31it/s, loss=0.071, lr=0.0001]\nSteps: 86%|████████▋ | 604/700 [04:22<00:41, 2.31it/s, loss=0.255, lr=0.0001]\nSteps: 86%|████████▋ | 605/700 [04:22<00:41, 2.31it/s, loss=0.255, lr=0.0001]\nSteps: 86%|████████▋ | 605/700 [04:22<00:41, 2.31it/s, loss=0.069, lr=0.0001]\nSteps: 87%|████████▋ | 606/700 [04:23<00:40, 2.31it/s, loss=0.069, lr=0.0001]\nSteps: 87%|████████▋ | 606/700 [04:23<00:40, 2.31it/s, loss=0.127, lr=0.0001]\nSteps: 87%|████████▋ | 607/700 [04:23<00:40, 2.31it/s, loss=0.127, lr=0.0001]\nSteps: 87%|████████▋ | 607/700 [04:23<00:40, 2.31it/s, loss=0.176, lr=0.0001]\nSteps: 87%|████████▋ | 608/700 [04:23<00:39, 2.31it/s, loss=0.176, lr=0.0001]\nSteps: 87%|████████▋ | 608/700 [04:23<00:39, 2.31it/s, loss=0.131, lr=0.0001]\nSteps: 87%|████████▋ | 609/700 [04:24<00:39, 2.29it/s, loss=0.131, lr=0.0001]\nSteps: 87%|████████▋ | 609/700 [04:24<00:39, 2.29it/s, loss=0.265, lr=0.0001]\nSteps: 87%|████████▋ | 610/700 [04:24<00:39, 2.30it/s, loss=0.265, lr=0.0001]\nSteps: 87%|████████▋ | 610/700 [04:24<00:39, 2.30it/s, loss=0.19, lr=0.0001] \nSteps: 87%|████████▋ | 611/700 [04:25<00:38, 2.30it/s, loss=0.19, lr=0.0001]\nSteps: 87%|████████▋ | 611/700 [04:25<00:38, 2.30it/s, loss=0.143, lr=0.0001]\nSteps: 87%|████████▋ | 612/700 [04:25<00:38, 2.30it/s, loss=0.143, lr=0.0001]\nSteps: 87%|████████▋ | 612/700 [04:25<00:38, 2.30it/s, loss=0.11, lr=0.0001] \nSteps: 88%|████████▊ | 613/700 [04:26<00:37, 2.31it/s, loss=0.11, lr=0.0001]\nSteps: 88%|████████▊ | 613/700 [04:26<00:37, 2.31it/s, loss=0.327, lr=0.0001]\nSteps: 88%|████████▊ | 614/700 [04:26<00:37, 2.31it/s, loss=0.327, lr=0.0001]\nSteps: 88%|████████▊ | 614/700 [04:26<00:37, 2.31it/s, loss=0.127, lr=0.0001]\nSteps: 88%|████████▊ | 615/700 [04:26<00:36, 2.31it/s, loss=0.127, lr=0.0001]\nSteps: 88%|████████▊ | 615/700 [04:26<00:36, 2.31it/s, loss=0.0661, lr=0.0001]\nSteps: 88%|████████▊ | 616/700 [04:27<00:36, 2.31it/s, loss=0.0661, lr=0.0001]\nSteps: 88%|████████▊ | 616/700 [04:27<00:36, 2.31it/s, loss=0.0279, lr=0.0001]\nSteps: 88%|████████▊ | 617/700 [04:27<00:35, 2.31it/s, loss=0.0279, lr=0.0001]\nSteps: 88%|████████▊ | 617/700 [04:27<00:35, 2.31it/s, loss=0.0887, lr=0.0001]\nSteps: 88%|████████▊ | 618/700 [04:28<00:35, 2.31it/s, loss=0.0887, lr=0.0001]\nSteps: 88%|████████▊ | 618/700 [04:28<00:35, 2.31it/s, loss=0.222, lr=0.0001] \nSteps: 88%|████████▊ | 619/700 [04:28<00:35, 2.31it/s, loss=0.222, lr=0.0001]\nSteps: 88%|████████▊ | 619/700 [04:28<00:35, 2.31it/s, loss=0.253, lr=0.0001]\nSteps: 89%|████████▊ | 620/700 [04:29<00:34, 2.31it/s, loss=0.253, lr=0.0001]\nSteps: 89%|████████▊ | 620/700 [04:29<00:34, 2.31it/s, loss=0.0884, lr=0.0001]\nSteps: 89%|████████▊ | 621/700 [04:29<00:34, 2.30it/s, loss=0.0884, lr=0.0001]\nSteps: 89%|████████▊ | 621/700 [04:29<00:34, 2.30it/s, loss=0.0895, lr=0.0001]\nSteps: 89%|████████▉ | 622/700 [04:29<00:33, 2.31it/s, loss=0.0895, lr=0.0001]\nSteps: 89%|████████▉ | 622/700 [04:30<00:33, 2.31it/s, loss=0.113, lr=0.0001] \nSteps: 89%|████████▉ | 623/700 [04:30<00:33, 2.31it/s, loss=0.113, lr=0.0001]\nSteps: 89%|████████▉ | 623/700 [04:30<00:33, 2.31it/s, loss=0.0678, lr=0.0001]\nSteps: 89%|████████▉ | 624/700 [04:30<00:32, 2.31it/s, loss=0.0678, lr=0.0001]\nSteps: 89%|████████▉ | 624/700 [04:30<00:32, 2.31it/s, loss=0.147, lr=0.0001] \nSteps: 89%|████████▉ | 625/700 [04:31<00:32, 2.30it/s, loss=0.147, lr=0.0001]\nSteps: 89%|████████▉ | 625/700 [04:31<00:32, 2.30it/s, loss=0.087, lr=0.0001]\nSteps: 89%|████████▉ | 626/700 [04:31<00:32, 2.30it/s, loss=0.087, lr=0.0001]\nSteps: 89%|████████▉ | 626/700 [04:31<00:32, 2.30it/s, loss=0.0731, lr=0.0001]\nSteps: 90%|████████▉ | 627/700 [04:32<00:31, 2.30it/s, loss=0.0731, lr=0.0001]\nSteps: 90%|████████▉ | 627/700 [04:32<00:31, 2.30it/s, loss=0.137, lr=0.0001] \nSteps: 90%|████████▉ | 628/700 [04:32<00:31, 2.31it/s, loss=0.137, lr=0.0001]\nSteps: 90%|████████▉ | 628/700 [04:32<00:31, 2.31it/s, loss=0.117, lr=0.0001]\nSteps: 90%|████████▉ | 629/700 [04:33<00:30, 2.31it/s, loss=0.117, lr=0.0001]\nSteps: 90%|████████▉ | 629/700 [04:33<00:30, 2.31it/s, loss=0.102, lr=0.0001]\nSteps: 90%|█████████ | 630/700 [04:33<00:30, 2.31it/s, loss=0.102, lr=0.0001]\nSteps: 90%|█████████ | 630/700 [04:33<00:30, 2.31it/s, loss=0.276, lr=0.0001]\nSteps: 90%|█████████ | 631/700 [04:33<00:29, 2.31it/s, loss=0.276, lr=0.0001]\nSteps: 90%|█████████ | 631/700 [04:33<00:29, 2.31it/s, loss=0.12, lr=0.0001] \nSteps: 90%|█████████ | 632/700 [04:34<00:29, 2.31it/s, loss=0.12, lr=0.0001]\nSteps: 90%|█████████ | 632/700 [04:34<00:29, 2.31it/s, loss=0.171, lr=0.0001]\nSteps: 90%|█████████ | 633/700 [04:34<00:28, 2.31it/s, loss=0.171, lr=0.0001]\nSteps: 90%|█████████ | 633/700 [04:34<00:28, 2.31it/s, loss=0.0859, lr=0.0001]\nSteps: 91%|█████████ | 634/700 [04:35<00:28, 2.31it/s, loss=0.0859, lr=0.0001]\nSteps: 91%|█████████ | 634/700 [04:35<00:28, 2.31it/s, loss=0.0891, lr=0.0001]\nSteps: 91%|█████████ | 635/700 [04:35<00:28, 2.31it/s, loss=0.0891, lr=0.0001]\nSteps: 91%|█████████ | 635/700 [04:35<00:28, 2.31it/s, loss=0.122, lr=0.0001] \nSteps: 91%|█████████ | 636/700 [04:36<00:27, 2.31it/s, loss=0.122, lr=0.0001]\nSteps: 91%|█████████ | 636/700 [04:36<00:27, 2.31it/s, loss=0.147, lr=0.0001]\nSteps: 91%|█████████ | 637/700 [04:36<00:27, 2.31it/s, loss=0.147, lr=0.0001]\nSteps: 91%|█████████ | 637/700 [04:36<00:27, 2.31it/s, loss=0.103, lr=0.0001]\nSteps: 91%|█████████ | 638/700 [04:36<00:26, 2.31it/s, loss=0.103, lr=0.0001]\nSteps: 91%|█████████ | 638/700 [04:36<00:26, 2.31it/s, loss=0.212, lr=0.0001]\nSteps: 91%|█████████▏| 639/700 [04:37<00:26, 2.31it/s, loss=0.212, lr=0.0001]\nSteps: 91%|█████████▏| 639/700 [04:37<00:26, 2.31it/s, loss=0.125, lr=0.0001]\nSteps: 91%|█████████▏| 640/700 [04:37<00:25, 2.31it/s, loss=0.125, lr=0.0001]\nSteps: 91%|█████████▏| 640/700 [04:37<00:25, 2.31it/s, loss=0.222, lr=0.0001]\nSteps: 92%|█████████▏| 641/700 [04:38<00:25, 2.30it/s, loss=0.222, lr=0.0001]\nSteps: 92%|█████████▏| 641/700 [04:38<00:25, 2.30it/s, loss=0.145, lr=0.0001]\nSteps: 92%|█████████▏| 642/700 [04:38<00:25, 2.30it/s, loss=0.145, lr=0.0001]\nSteps: 92%|█████████▏| 642/700 [04:38<00:25, 2.30it/s, loss=0.0954, lr=0.0001]\nSteps: 92%|█████████▏| 643/700 [04:39<00:24, 2.31it/s, loss=0.0954, lr=0.0001]\nSteps: 92%|█████████▏| 643/700 [04:39<00:24, 2.31it/s, loss=0.288, lr=0.0001] \nSteps: 92%|█████████▏| 644/700 [04:39<00:24, 2.31it/s, loss=0.288, lr=0.0001]\nSteps: 92%|█████████▏| 644/700 [04:39<00:24, 2.31it/s, loss=0.115, lr=0.0001]\nSteps: 92%|█████████▏| 645/700 [04:39<00:23, 2.31it/s, loss=0.115, lr=0.0001]\nSteps: 92%|█████████▏| 645/700 [04:39<00:23, 2.31it/s, loss=0.111, lr=0.0001]\nSteps: 92%|█████████▏| 646/700 [04:40<00:23, 2.31it/s, loss=0.111, lr=0.0001]\nSteps: 92%|█████████▏| 646/700 [04:40<00:23, 2.31it/s, loss=0.111, lr=0.0001]\nSteps: 92%|█████████▏| 647/700 [04:40<00:22, 2.31it/s, loss=0.111, lr=0.0001]\nSteps: 92%|█████████▏| 647/700 [04:40<00:22, 2.31it/s, loss=0.16, lr=0.0001] \nSteps: 93%|█████████▎| 648/700 [04:41<00:22, 2.31it/s, loss=0.16, lr=0.0001]\nSteps: 93%|█████████▎| 648/700 [04:41<00:22, 2.31it/s, loss=0.08, lr=0.0001]\nSteps: 93%|█████████▎| 649/700 [04:41<00:22, 2.31it/s, loss=0.08, lr=0.0001]\nSteps: 93%|█████████▎| 649/700 [04:41<00:22, 2.31it/s, loss=0.145, lr=0.0001]\nSteps: 93%|█████████▎| 650/700 [04:42<00:21, 2.31it/s, loss=0.145, lr=0.0001]\nSteps: 93%|█████████▎| 650/700 [04:42<00:21, 2.31it/s, loss=0.105, lr=0.0001]\nSteps: 93%|█████████▎| 651/700 [04:42<00:21, 2.31it/s, loss=0.105, lr=0.0001]\nSteps: 93%|█████████▎| 651/700 [04:42<00:21, 2.31it/s, loss=0.142, lr=0.0001]\nSteps: 93%|█████████▎| 652/700 [04:42<00:20, 2.31it/s, loss=0.142, lr=0.0001]\nSteps: 93%|█████████▎| 652/700 [04:43<00:20, 2.31it/s, loss=0.177, lr=0.0001]\nSteps: 93%|█████████▎| 653/700 [04:43<00:20, 2.31it/s, loss=0.177, lr=0.0001]\nSteps: 93%|█████████▎| 653/700 [04:43<00:20, 2.31it/s, loss=0.0607, lr=0.0001]\nSteps: 93%|█████████▎| 654/700 [04:43<00:19, 2.31it/s, loss=0.0607, lr=0.0001]\nSteps: 93%|█████████▎| 654/700 [04:43<00:19, 2.31it/s, loss=0.131, lr=0.0001] \nSteps: 94%|█████████▎| 655/700 [04:44<00:19, 2.31it/s, loss=0.131, lr=0.0001]\nSteps: 94%|█████████▎| 655/700 [04:44<00:19, 2.31it/s, loss=0.0542, lr=0.0001]\nSteps: 94%|█████████▎| 656/700 [04:44<00:19, 2.31it/s, loss=0.0542, lr=0.0001]\nSteps: 94%|█████████▎| 656/700 [04:44<00:19, 2.31it/s, loss=0.113, lr=0.0001] \nSteps: 94%|█████████▍| 657/700 [04:45<00:18, 2.30it/s, loss=0.113, lr=0.0001]\nSteps: 94%|█████████▍| 657/700 [04:45<00:18, 2.30it/s, loss=0.173, lr=0.0001]\nSteps: 94%|█████████▍| 658/700 [04:45<00:18, 2.30it/s, loss=0.173, lr=0.0001]\nSteps: 94%|█████████▍| 658/700 [04:45<00:18, 2.30it/s, loss=0.0329, lr=0.0001]\nSteps: 94%|█████████▍| 659/700 [04:46<00:17, 2.31it/s, loss=0.0329, lr=0.0001]\nSteps: 94%|█████████▍| 659/700 [04:46<00:17, 2.31it/s, loss=0.161, lr=0.0001] \nSteps: 94%|█████████▍| 660/700 [04:46<00:17, 2.31it/s, loss=0.161, lr=0.0001]\nSteps: 94%|█████████▍| 660/700 [04:46<00:17, 2.31it/s, loss=0.0519, lr=0.0001]\nSteps: 94%|█████████▍| 661/700 [04:46<00:16, 2.31it/s, loss=0.0519, lr=0.0001]\nSteps: 94%|█████████▍| 661/700 [04:46<00:16, 2.31it/s, loss=0.0884, lr=0.0001]\nSteps: 95%|█████████▍| 662/700 [04:47<00:16, 2.31it/s, loss=0.0884, lr=0.0001]\nSteps: 95%|█████████▍| 662/700 [04:47<00:16, 2.31it/s, loss=0.108, lr=0.0001] \nSteps: 95%|█████████▍| 663/700 [04:47<00:16, 2.31it/s, loss=0.108, lr=0.0001]\nSteps: 95%|█████████▍| 663/700 [04:47<00:16, 2.31it/s, loss=0.0557, lr=0.0001]\nSteps: 95%|█████████▍| 664/700 [04:48<00:15, 2.31it/s, loss=0.0557, lr=0.0001]\nSteps: 95%|█████████▍| 664/700 [04:48<00:15, 2.31it/s, loss=0.12, lr=0.0001] \nSteps: 95%|█████████▌| 665/700 [04:48<00:15, 2.31it/s, loss=0.12, lr=0.0001]\nSteps: 95%|█████████▌| 665/700 [04:48<00:15, 2.31it/s, loss=0.0976, lr=0.0001]\nSteps: 95%|█████████▌| 666/700 [04:49<00:14, 2.31it/s, loss=0.0976, lr=0.0001]\nSteps: 95%|█████████▌| 666/700 [04:49<00:14, 2.31it/s, loss=0.175, lr=0.0001] \nSteps: 95%|█████████▌| 667/700 [04:49<00:14, 2.31it/s, loss=0.175, lr=0.0001]\nSteps: 95%|█████████▌| 667/700 [04:49<00:14, 2.31it/s, loss=0.0758, lr=0.0001]\nSteps: 95%|█████████▌| 668/700 [04:49<00:13, 2.31it/s, loss=0.0758, lr=0.0001]\nSteps: 95%|█████████▌| 668/700 [04:49<00:13, 2.31it/s, loss=0.154, lr=0.0001] \nSteps: 96%|█████████▌| 669/700 [04:50<00:13, 2.31it/s, loss=0.154, lr=0.0001]\nSteps: 96%|█████████▌| 669/700 [04:50<00:13, 2.31it/s, loss=0.0661, lr=0.0001]\nSteps: 96%|█████████▌| 670/700 [04:50<00:12, 2.31it/s, loss=0.0661, lr=0.0001]\nSteps: 96%|█████████▌| 670/700 [04:50<00:12, 2.31it/s, loss=0.222, lr=0.0001] \nSteps: 96%|█████████▌| 671/700 [04:51<00:12, 2.31it/s, loss=0.222, lr=0.0001]\nSteps: 96%|█████████▌| 671/700 [04:51<00:12, 2.31it/s, loss=0.125, lr=0.0001]\nSteps: 96%|█████████▌| 672/700 [04:51<00:12, 2.31it/s, loss=0.125, lr=0.0001]\nSteps: 96%|█████████▌| 672/700 [04:51<00:12, 2.31it/s, loss=0.117, lr=0.0001]\nSteps: 96%|█████████▌| 673/700 [04:52<00:11, 2.30it/s, loss=0.117, lr=0.0001]\nSteps: 96%|█████████▌| 673/700 [04:52<00:11, 2.30it/s, loss=0.163, lr=0.0001]\nSteps: 96%|█████████▋| 674/700 [04:52<00:11, 2.30it/s, loss=0.163, lr=0.0001]\nSteps: 96%|█████████▋| 674/700 [04:52<00:11, 2.30it/s, loss=0.0756, lr=0.0001]\nSteps: 96%|█████████▋| 675/700 [04:52<00:10, 2.31it/s, loss=0.0756, lr=0.0001]\nSteps: 96%|█████████▋| 675/700 [04:52<00:10, 2.31it/s, loss=0.178, lr=0.0001] \nSteps: 97%|█████████▋| 676/700 [04:53<00:10, 2.31it/s, loss=0.178, lr=0.0001]\nSteps: 97%|█████████▋| 676/700 [04:53<00:10, 2.31it/s, loss=0.104, lr=0.0001]\nSteps: 97%|█████████▋| 677/700 [04:53<00:09, 2.31it/s, loss=0.104, lr=0.0001]\nSteps: 97%|█████████▋| 677/700 [04:53<00:09, 2.31it/s, loss=0.139, lr=0.0001]\nSteps: 97%|█████████▋| 678/700 [04:54<00:09, 2.31it/s, loss=0.139, lr=0.0001]\nSteps: 97%|█████████▋| 678/700 [04:54<00:09, 2.31it/s, loss=0.0792, lr=0.0001]\nSteps: 97%|█████████▋| 679/700 [04:54<00:09, 2.31it/s, loss=0.0792, lr=0.0001]\nSteps: 97%|█████████▋| 679/700 [04:54<00:09, 2.31it/s, loss=0.214, lr=0.0001] \nSteps: 97%|█████████▋| 680/700 [04:55<00:08, 2.31it/s, loss=0.214, lr=0.0001]\nSteps: 97%|█████████▋| 680/700 [04:55<00:08, 2.31it/s, loss=0.105, lr=0.0001]\nSteps: 97%|█████████▋| 681/700 [04:55<00:08, 2.31it/s, loss=0.105, lr=0.0001]\nSteps: 97%|█████████▋| 681/700 [04:55<00:08, 2.31it/s, loss=0.233, lr=0.0001]\nSteps: 97%|█████████▋| 682/700 [04:55<00:07, 2.31it/s, loss=0.233, lr=0.0001]\nSteps: 97%|█████████▋| 682/700 [04:56<00:07, 2.31it/s, loss=0.107, lr=0.0001]\nSteps: 98%|█████████▊| 683/700 [04:56<00:07, 2.31it/s, loss=0.107, lr=0.0001]\nSteps: 98%|█████████▊| 683/700 [04:56<00:07, 2.31it/s, loss=0.125, lr=0.0001]\nSteps: 98%|█████████▊| 684/700 [04:56<00:06, 2.31it/s, loss=0.125, lr=0.0001]\nSteps: 98%|█████████▊| 684/700 [04:56<00:06, 2.31it/s, loss=0.176, lr=0.0001]\nSteps: 98%|█████████▊| 685/700 [04:57<00:06, 2.31it/s, loss=0.176, lr=0.0001]\nSteps: 98%|█████████▊| 685/700 [04:57<00:06, 2.31it/s, loss=0.0955, lr=0.0001]\nSteps: 98%|█████████▊| 686/700 [04:57<00:06, 2.31it/s, loss=0.0955, lr=0.0001]\nSteps: 98%|█████████▊| 686/700 [04:57<00:06, 2.31it/s, loss=0.11, lr=0.0001] \nSteps: 98%|█████████▊| 687/700 [04:58<00:05, 2.31it/s, loss=0.11, lr=0.0001]\nSteps: 98%|█████████▊| 687/700 [04:58<00:05, 2.31it/s, loss=0.139, lr=0.0001]\nSteps: 98%|█████████▊| 688/700 [04:58<00:05, 2.31it/s, loss=0.139, lr=0.0001]\nSteps: 98%|█████████▊| 688/700 [04:58<00:05, 2.31it/s, loss=0.0515, lr=0.0001]\nSteps: 98%|█████████▊| 689/700 [04:59<00:04, 2.30it/s, loss=0.0515, lr=0.0001]\nSteps: 98%|█████████▊| 689/700 [04:59<00:04, 2.30it/s, loss=0.102, lr=0.0001] \nSteps: 99%|█████████▊| 690/700 [04:59<00:04, 2.30it/s, loss=0.102, lr=0.0001]\nSteps: 99%|█████████▊| 690/700 [04:59<00:04, 2.30it/s, loss=0.174, lr=0.0001]\nSteps: 99%|█████████▊| 691/700 [04:59<00:03, 2.31it/s, loss=0.174, lr=0.0001]\nSteps: 99%|█████████▊| 691/700 [04:59<00:03, 2.31it/s, loss=0.161, lr=0.0001]\nSteps: 99%|█████████▉| 692/700 [05:00<00:03, 2.31it/s, loss=0.161, lr=0.0001]\nSteps: 99%|█████████▉| 692/700 [05:00<00:03, 2.31it/s, loss=0.103, lr=0.0001]\nSteps: 99%|█████████▉| 693/700 [05:00<00:03, 2.31it/s, loss=0.103, lr=0.0001]\nSteps: 99%|█████████▉| 693/700 [05:00<00:03, 2.31it/s, loss=0.0503, lr=0.0001]\nSteps: 99%|█████████▉| 694/700 [05:01<00:02, 2.31it/s, loss=0.0503, lr=0.0001]\nSteps: 99%|█████████▉| 694/700 [05:01<00:02, 2.31it/s, loss=0.079, lr=0.0001] \nSteps: 99%|█████████▉| 695/700 [05:01<00:02, 2.31it/s, loss=0.079, lr=0.0001]\nSteps: 99%|█████████▉| 695/700 [05:01<00:02, 2.31it/s, loss=0.0907, lr=0.0001]\nSteps: 99%|█████████▉| 696/700 [05:02<00:01, 2.31it/s, loss=0.0907, lr=0.0001]\nSteps: 99%|█████████▉| 696/700 [05:02<00:01, 2.31it/s, loss=0.108, lr=0.0001] \nSteps: 100%|█████████▉| 697/700 [05:02<00:01, 2.31it/s, loss=0.108, lr=0.0001]\nSteps: 100%|█████████▉| 697/700 [05:02<00:01, 2.31it/s, loss=0.165, lr=0.0001]\nSteps: 100%|█████████▉| 698/700 [05:02<00:00, 2.31it/s, loss=0.165, lr=0.0001]\nSteps: 100%|█████████▉| 698/700 [05:02<00:00, 2.31it/s, loss=0.194, lr=0.0001]\nSteps: 100%|█████████▉| 699/700 [05:03<00:00, 2.31it/s, loss=0.194, lr=0.0001]\nSteps: 100%|█████████▉| 699/700 [05:03<00:00, 2.31it/s, loss=0.229, lr=0.0001]\nSteps: 100%|██████████| 700/700 [05:03<00:00, 2.31it/s, loss=0.229, lr=0.0001]\nSteps: 100%|██████████| 700/700 [05:03<00:00, 2.31it/s, loss=0.141, lr=0.0001]Model weights saved in /tmp/train/output/sd35_large_train_replicate/pytorch_lora_weights.safetensors\nLoading pipeline components...: 0%| | 0/9 [00:00<?, ?it/s]\u001b[A{'base_image_seq_len', 'base_shift', 'max_shift', 'max_image_seq_len', 'use_dynamic_shifting'} was not found in config. Values will be initialized to default values.\nLoaded scheduler as FlowMatchEulerDiscreteScheduler from `scheduler` subfolder of stable-diffusion-3.5-large.\nLoaded text_encoder as CLIPTextModelWithProjection from `text_encoder` subfolder of stable-diffusion-3.5-large.\nLoading pipeline components...: 22%|██▏ | 2/9 [00:00<00:01, 5.30it/s]\u001b[A\nLoading checkpoint shards: 0%| | 0/2 [00:00<?, ?it/s]\u001b[A\u001b[A\nLoading checkpoint shards: 50%|█████ | 1/2 [00:04<00:04, 4.98s/it]\u001b[A\u001b[A\nLoading checkpoint shards: 100%|██████████| 2/2 [00:09<00:00, 4.75s/it]\u001b[A\u001b[A\nLoading checkpoint shards: 100%|██████████| 2/2 [00:09<00:00, 4.79s/it]\nLoaded text_encoder_3 as T5EncoderModel from `text_encoder_3` subfolder of stable-diffusion-3.5-large.\nLoading pipeline components...: 33%|███▎ | 3/9 [00:09<00:24, 4.12s/it]\u001b[A{'dual_attention_layers'} was not found in config. Values will be initialized to default values.\nLoaded transformer as SD3Transformer2DModel from `transformer` subfolder of stable-diffusion-3.5-large.\nLoading pipeline components...: 44%|████▍ | 4/9 [00:11<00:16, 3.27s/it]\u001b[ALoaded tokenizer as CLIPTokenizer from `tokenizer` subfolder of stable-diffusion-3.5-large.\nLoaded tokenizer_3 as T5TokenizerFast from `tokenizer_3` subfolder of stable-diffusion-3.5-large.\nLoading pipeline components...: 67%|██████▋ | 6/9 [00:12<00:04, 1.64s/it]\u001b[ALoaded tokenizer_2 as CLIPTokenizer from `tokenizer_2` subfolder of stable-diffusion-3.5-large.\nLoaded text_encoder_2 as CLIPTextModelWithProjection from `text_encoder_2` subfolder of stable-diffusion-3.5-large.\nLoading pipeline components...: 89%|████████▉ | 8/9 [00:13<00:01, 1.30s/it]\u001b[ALoaded vae as AutoencoderKL from `vae` subfolder of stable-diffusion-3.5-large.\nLoading pipeline components...: 100%|██████████| 9/9 [00:13<00:00, 1.53s/it]\nSteps: 100%|██████████| 700/700 [05:18<00:00, 2.20it/s, loss=0.141, lr=0.0001]\n./\n./output/\n./output/sd35_large_train_replicate/\n./output/sd35_large_train_replicate/lora.safetensors",
"metrics": {
"predict_time": 381.84731858,
"total_time": 448.071794
},
"output": "https://replicate.delivery/yhqm/jIEgASHbbbZOKdpHzeXSs69U7I7EuDmLQMvj0RDp1ONR8O1JA/trained_model.tar",
"started_at": "2024-10-25T23:32:52.392475Z",
"status": "succeeded",
"urls": {
"stream": "https://stream.replicate.com/v1/files/qoxq-f7ssj3usyyzpbiefbnvm5arzsinbfancnamm5xjc65jku7zrmela",
"get": "https://api.replicate.com/v1/predictions/ng14j2cff1rj40cjrr2vbz667m",
"cancel": "https://api.replicate.com/v1/predictions/ng14j2cff1rj40cjrr2vbz667m/cancel"
},
"version": "cd6419a53b69fd410a912d945fa481a2a9ecfc4ab93062ed76c53f6e617f89e9"
}
Using seed: 3595070789
Extracted 16 files from zip to input_images
Using params: ['accelerate', 'launch', '--dynamo_backend', 'no', 'train_dreambooth_lora_sd3.py', '--pretrained_model_name_or_path', 'stable-diffusion-3.5-large', '--instance_data_dir', 'input_images', '--rank', '16', '--output_dir', '/tmp/train/output/sd35_large_train_replicate', '--mixed_precision', 'bf16', '--instance_prompt', 'Frog, yarn art style', '--resolution', '768', '--train_batch_size', '1', '--gradient_accumulation_steps', '1', '--optimizer', 'AdamW', '--learning_rate', '0.0001', '--lr_scheduler', 'constant', '--lr_warmup_steps', '0', '--max_train_steps', '700', '--checkpointing_steps', '701', '--seed', '3595070789', '--logging_dir', '/tmp/logs']
10/25/2024 23:33:02 - INFO - __main__ - Distributed environment: DistributedType.NO
Num processes: 1
Process index: 0
Local process index: 0
Device: cuda
Mixed precision type: bf16
You set `add_prefix_space`. The tokenizer needs to be converted from the slow tokenizers
You are using a model of type clip_text_model to instantiate a model of type . This is not supported for all configurations of models and can yield errors.
You are using a model of type clip_text_model to instantiate a model of type . This is not supported for all configurations of models and can yield errors.
You are using a model of type t5 to instantiate a model of type . This is not supported for all configurations of models and can yield errors.
{'base_image_seq_len', 'base_shift', 'max_shift', 'max_image_seq_len', 'use_dynamic_shifting'} was not found in config. Values will be initialized to default values.
Loading checkpoint shards: 0%| | 0/2 [00:00<?, ?it/s]
Loading checkpoint shards: 50%|█████ | 1/2 [00:03<00:03, 3.67s/it]
Loading checkpoint shards: 100%|██████████| 2/2 [00:07<00:00, 3.64s/it]
Loading checkpoint shards: 100%|██████████| 2/2 [00:07<00:00, 3.64s/it]
{'dual_attention_layers'} was not found in config. Values will be initialized to default values.
10/25/2024 23:33:53 - INFO - __main__ - ***** Running training *****
10/25/2024 23:33:53 - INFO - __main__ - Num examples = 16
10/25/2024 23:33:53 - INFO - __main__ - Num batches each epoch = 16
10/25/2024 23:33:53 - INFO - __main__ - Num Epochs = 44
10/25/2024 23:33:53 - INFO - __main__ - Instantaneous batch size per device = 1
10/25/2024 23:33:53 - INFO - __main__ - Total train batch size (w. parallel, distributed & accumulation) = 1
10/25/2024 23:33:53 - INFO - __main__ - Gradient Accumulation steps = 1
10/25/2024 23:33:53 - INFO - __main__ - Total optimization steps = 700
Steps: 0%| | 0/700 [00:00<?, ?it/s]
Steps: 0%| | 1/700 [00:00<07:26, 1.56it/s]
Steps: 0%| | 1/700 [00:00<07:26, 1.56it/s, loss=0.132, lr=0.0001]
Steps: 0%| | 2/700 [00:01<05:50, 1.99it/s, loss=0.132, lr=0.0001]
Steps: 0%| | 2/700 [00:01<05:50, 1.99it/s, loss=0.189, lr=0.0001]
Steps: 0%| | 3/700 [00:01<05:27, 2.13it/s, loss=0.189, lr=0.0001]
Steps: 0%| | 3/700 [00:01<05:27, 2.13it/s, loss=0.0392, lr=0.0001]
Steps: 1%| | 4/700 [00:01<05:17, 2.20it/s, loss=0.0392, lr=0.0001]
Steps: 1%| | 4/700 [00:01<05:17, 2.20it/s, loss=0.203, lr=0.0001]
Steps: 1%| | 5/700 [00:02<05:10, 2.24it/s, loss=0.203, lr=0.0001]
Steps: 1%| | 5/700 [00:02<05:10, 2.24it/s, loss=0.165, lr=0.0001]
Steps: 1%| | 6/700 [00:02<05:07, 2.26it/s, loss=0.165, lr=0.0001]
Steps: 1%| | 6/700 [00:02<05:07, 2.26it/s, loss=0.175, lr=0.0001]
Steps: 1%| | 7/700 [00:03<05:04, 2.27it/s, loss=0.175, lr=0.0001]
Steps: 1%| | 7/700 [00:03<05:04, 2.27it/s, loss=0.171, lr=0.0001]
Steps: 1%| | 8/700 [00:03<05:02, 2.28it/s, loss=0.171, lr=0.0001]
Steps: 1%| | 8/700 [00:03<05:02, 2.28it/s, loss=0.141, lr=0.0001]
Steps: 1%|▏ | 9/700 [00:04<05:01, 2.29it/s, loss=0.141, lr=0.0001]
Steps: 1%|▏ | 9/700 [00:04<05:01, 2.29it/s, loss=0.203, lr=0.0001]
Steps: 1%|▏ | 10/700 [00:04<05:00, 2.30it/s, loss=0.203, lr=0.0001]
Steps: 1%|▏ | 10/700 [00:04<05:00, 2.30it/s, loss=0.0762, lr=0.0001]
Steps: 2%|▏ | 11/700 [00:04<04:59, 2.30it/s, loss=0.0762, lr=0.0001]
Steps: 2%|▏ | 11/700 [00:04<04:59, 2.30it/s, loss=0.0826, lr=0.0001]
Steps: 2%|▏ | 12/700 [00:05<04:59, 2.30it/s, loss=0.0826, lr=0.0001]
Steps: 2%|▏ | 12/700 [00:05<04:59, 2.30it/s, loss=0.19, lr=0.0001]
Steps: 2%|▏ | 13/700 [00:05<04:59, 2.30it/s, loss=0.19, lr=0.0001]
Steps: 2%|▏ | 13/700 [00:05<04:59, 2.30it/s, loss=0.285, lr=0.0001]
Steps: 2%|▏ | 14/700 [00:06<04:58, 2.30it/s, loss=0.285, lr=0.0001]
Steps: 2%|▏ | 14/700 [00:06<04:58, 2.30it/s, loss=0.144, lr=0.0001]
Steps: 2%|▏ | 15/700 [00:06<04:57, 2.30it/s, loss=0.144, lr=0.0001]
Steps: 2%|▏ | 15/700 [00:06<04:57, 2.30it/s, loss=0.134, lr=0.0001]
Steps: 2%|▏ | 16/700 [00:07<04:56, 2.31it/s, loss=0.134, lr=0.0001]
Steps: 2%|▏ | 16/700 [00:07<04:56, 2.31it/s, loss=0.189, lr=0.0001]
Steps: 2%|▏ | 17/700 [00:07<04:57, 2.30it/s, loss=0.189, lr=0.0001]
Steps: 2%|▏ | 17/700 [00:07<04:57, 2.30it/s, loss=0.097, lr=0.0001]
Steps: 3%|▎ | 18/700 [00:07<04:56, 2.30it/s, loss=0.097, lr=0.0001]
Steps: 3%|▎ | 18/700 [00:08<04:56, 2.30it/s, loss=0.215, lr=0.0001]
Steps: 3%|▎ | 19/700 [00:08<04:55, 2.30it/s, loss=0.215, lr=0.0001]
Steps: 3%|▎ | 19/700 [00:08<04:55, 2.30it/s, loss=0.173, lr=0.0001]
Steps: 3%|▎ | 20/700 [00:08<04:55, 2.30it/s, loss=0.173, lr=0.0001]
Steps: 3%|▎ | 20/700 [00:08<04:55, 2.30it/s, loss=0.0768, lr=0.0001]
Steps: 3%|▎ | 21/700 [00:09<04:54, 2.30it/s, loss=0.0768, lr=0.0001]
Steps: 3%|▎ | 21/700 [00:09<04:54, 2.30it/s, loss=0.0714, lr=0.0001]
Steps: 3%|▎ | 22/700 [00:09<04:54, 2.30it/s, loss=0.0714, lr=0.0001]
Steps: 3%|▎ | 22/700 [00:09<04:54, 2.30it/s, loss=0.148, lr=0.0001]
Steps: 3%|▎ | 23/700 [00:10<04:54, 2.30it/s, loss=0.148, lr=0.0001]
Steps: 3%|▎ | 23/700 [00:10<04:54, 2.30it/s, loss=0.297, lr=0.0001]
Steps: 3%|▎ | 24/700 [00:10<04:53, 2.30it/s, loss=0.297, lr=0.0001]
Steps: 3%|▎ | 24/700 [00:10<04:53, 2.30it/s, loss=0.0754, lr=0.0001]
Steps: 4%|▎ | 25/700 [00:11<04:53, 2.30it/s, loss=0.0754, lr=0.0001]
Steps: 4%|▎ | 25/700 [00:11<04:53, 2.30it/s, loss=0.116, lr=0.0001]
Steps: 4%|▎ | 26/700 [00:11<04:52, 2.30it/s, loss=0.116, lr=0.0001]
Steps: 4%|▎ | 26/700 [00:11<04:52, 2.30it/s, loss=0.0963, lr=0.0001]
Steps: 4%|▍ | 27/700 [00:11<04:52, 2.30it/s, loss=0.0963, lr=0.0001]
Steps: 4%|▍ | 27/700 [00:11<04:52, 2.30it/s, loss=0.0578, lr=0.0001]
Steps: 4%|▍ | 28/700 [00:12<04:51, 2.30it/s, loss=0.0578, lr=0.0001]
Steps: 4%|▍ | 28/700 [00:12<04:51, 2.30it/s, loss=0.0973, lr=0.0001]
Steps: 4%|▍ | 29/700 [00:12<04:51, 2.30it/s, loss=0.0973, lr=0.0001]
Steps: 4%|▍ | 29/700 [00:12<04:51, 2.30it/s, loss=0.116, lr=0.0001]
Steps: 4%|▍ | 30/700 [00:13<04:51, 2.30it/s, loss=0.116, lr=0.0001]
Steps: 4%|▍ | 30/700 [00:13<04:51, 2.30it/s, loss=0.191, lr=0.0001]
Steps: 4%|▍ | 31/700 [00:13<04:50, 2.30it/s, loss=0.191, lr=0.0001]
Steps: 4%|▍ | 31/700 [00:13<04:50, 2.30it/s, loss=0.113, lr=0.0001]
Steps: 5%|▍ | 32/700 [00:14<04:49, 2.30it/s, loss=0.113, lr=0.0001]
Steps: 5%|▍ | 32/700 [00:14<04:49, 2.30it/s, loss=0.187, lr=0.0001]
Steps: 5%|▍ | 33/700 [00:14<04:50, 2.29it/s, loss=0.187, lr=0.0001]
Steps: 5%|▍ | 33/700 [00:14<04:50, 2.29it/s, loss=0.104, lr=0.0001]
Steps: 5%|▍ | 34/700 [00:14<04:50, 2.30it/s, loss=0.104, lr=0.0001]
Steps: 5%|▍ | 34/700 [00:14<04:50, 2.30it/s, loss=0.176, lr=0.0001]
Steps: 5%|▌ | 35/700 [00:15<04:49, 2.30it/s, loss=0.176, lr=0.0001]
Steps: 5%|▌ | 35/700 [00:15<04:49, 2.30it/s, loss=0.0212, lr=0.0001]
Steps: 5%|▌ | 36/700 [00:15<04:48, 2.30it/s, loss=0.0212, lr=0.0001]
Steps: 5%|▌ | 36/700 [00:15<04:48, 2.30it/s, loss=0.0399, lr=0.0001]
Steps: 5%|▌ | 37/700 [00:16<04:47, 2.30it/s, loss=0.0399, lr=0.0001]
Steps: 5%|▌ | 37/700 [00:16<04:47, 2.30it/s, loss=0.078, lr=0.0001]
Steps: 5%|▌ | 38/700 [00:16<04:47, 2.30it/s, loss=0.078, lr=0.0001]
Steps: 5%|▌ | 38/700 [00:16<04:47, 2.30it/s, loss=0.208, lr=0.0001]
Steps: 6%|▌ | 39/700 [00:17<04:46, 2.31it/s, loss=0.208, lr=0.0001]
Steps: 6%|▌ | 39/700 [00:17<04:46, 2.31it/s, loss=0.212, lr=0.0001]
Steps: 6%|▌ | 40/700 [00:17<04:46, 2.31it/s, loss=0.212, lr=0.0001]
Steps: 6%|▌ | 40/700 [00:17<04:46, 2.31it/s, loss=0.119, lr=0.0001]
Steps: 6%|▌ | 41/700 [00:17<04:45, 2.31it/s, loss=0.119, lr=0.0001]
Steps: 6%|▌ | 41/700 [00:18<04:45, 2.31it/s, loss=0.186, lr=0.0001]
Steps: 6%|▌ | 42/700 [00:18<04:45, 2.31it/s, loss=0.186, lr=0.0001]
Steps: 6%|▌ | 42/700 [00:18<04:45, 2.31it/s, loss=0.0453, lr=0.0001]
Steps: 6%|▌ | 43/700 [00:18<04:44, 2.31it/s, loss=0.0453, lr=0.0001]
Steps: 6%|▌ | 43/700 [00:18<04:44, 2.31it/s, loss=0.125, lr=0.0001]
Steps: 6%|▋ | 44/700 [00:19<04:44, 2.31it/s, loss=0.125, lr=0.0001]
Steps: 6%|▋ | 44/700 [00:19<04:44, 2.31it/s, loss=0.299, lr=0.0001]
Steps: 6%|▋ | 45/700 [00:19<04:43, 2.31it/s, loss=0.299, lr=0.0001]
Steps: 6%|▋ | 45/700 [00:19<04:43, 2.31it/s, loss=0.0874, lr=0.0001]
Steps: 7%|▋ | 46/700 [00:20<04:43, 2.31it/s, loss=0.0874, lr=0.0001]
Steps: 7%|▋ | 46/700 [00:20<04:43, 2.31it/s, loss=0.178, lr=0.0001]
Steps: 7%|▋ | 47/700 [00:20<04:43, 2.31it/s, loss=0.178, lr=0.0001]
Steps: 7%|▋ | 47/700 [00:20<04:43, 2.31it/s, loss=0.166, lr=0.0001]
Steps: 7%|▋ | 48/700 [00:21<04:42, 2.31it/s, loss=0.166, lr=0.0001]
Steps: 7%|▋ | 48/700 [00:21<04:42, 2.31it/s, loss=0.0528, lr=0.0001]
Steps: 7%|▋ | 49/700 [00:21<04:43, 2.30it/s, loss=0.0528, lr=0.0001]
Steps: 7%|▋ | 49/700 [00:21<04:43, 2.30it/s, loss=0.159, lr=0.0001]
Steps: 7%|▋ | 50/700 [00:21<04:42, 2.30it/s, loss=0.159, lr=0.0001]
Steps: 7%|▋ | 50/700 [00:21<04:42, 2.30it/s, loss=0.103, lr=0.0001]
Steps: 7%|▋ | 51/700 [00:22<04:41, 2.30it/s, loss=0.103, lr=0.0001]
Steps: 7%|▋ | 51/700 [00:22<04:41, 2.30it/s, loss=0.034, lr=0.0001]
Steps: 7%|▋ | 52/700 [00:22<04:41, 2.30it/s, loss=0.034, lr=0.0001]
Steps: 7%|▋ | 52/700 [00:22<04:41, 2.30it/s, loss=0.0843, lr=0.0001]
Steps: 8%|▊ | 53/700 [00:23<04:40, 2.31it/s, loss=0.0843, lr=0.0001]
Steps: 8%|▊ | 53/700 [00:23<04:40, 2.31it/s, loss=0.163, lr=0.0001]
Steps: 8%|▊ | 54/700 [00:23<04:40, 2.31it/s, loss=0.163, lr=0.0001]
Steps: 8%|▊ | 54/700 [00:23<04:40, 2.31it/s, loss=0.202, lr=0.0001]
Steps: 8%|▊ | 55/700 [00:24<04:40, 2.30it/s, loss=0.202, lr=0.0001]
Steps: 8%|▊ | 55/700 [00:24<04:40, 2.30it/s, loss=0.178, lr=0.0001]
Steps: 8%|▊ | 56/700 [00:24<04:39, 2.31it/s, loss=0.178, lr=0.0001]
Steps: 8%|▊ | 56/700 [00:24<04:39, 2.31it/s, loss=0.215, lr=0.0001]
Steps: 8%|▊ | 57/700 [00:24<04:38, 2.31it/s, loss=0.215, lr=0.0001]
Steps: 8%|▊ | 57/700 [00:24<04:38, 2.31it/s, loss=0.0982, lr=0.0001]
Steps: 8%|▊ | 58/700 [00:25<04:38, 2.31it/s, loss=0.0982, lr=0.0001]
Steps: 8%|▊ | 58/700 [00:25<04:38, 2.31it/s, loss=0.143, lr=0.0001]
Steps: 8%|▊ | 59/700 [00:25<04:37, 2.31it/s, loss=0.143, lr=0.0001]
Steps: 8%|▊ | 59/700 [00:25<04:37, 2.31it/s, loss=0.156, lr=0.0001]
Steps: 9%|▊ | 60/700 [00:26<04:37, 2.31it/s, loss=0.156, lr=0.0001]
Steps: 9%|▊ | 60/700 [00:26<04:37, 2.31it/s, loss=0.117, lr=0.0001]
Steps: 9%|▊ | 61/700 [00:26<04:36, 2.31it/s, loss=0.117, lr=0.0001]
Steps: 9%|▊ | 61/700 [00:26<04:36, 2.31it/s, loss=0.168, lr=0.0001]
Steps: 9%|▉ | 62/700 [00:27<04:36, 2.31it/s, loss=0.168, lr=0.0001]
Steps: 9%|▉ | 62/700 [00:27<04:36, 2.31it/s, loss=0.098, lr=0.0001]
Steps: 9%|▉ | 63/700 [00:27<04:36, 2.31it/s, loss=0.098, lr=0.0001]
Steps: 9%|▉ | 63/700 [00:27<04:36, 2.31it/s, loss=0.16, lr=0.0001]
Steps: 9%|▉ | 64/700 [00:27<04:35, 2.31it/s, loss=0.16, lr=0.0001]
Steps: 9%|▉ | 64/700 [00:27<04:35, 2.31it/s, loss=0.0913, lr=0.0001]
Steps: 9%|▉ | 65/700 [00:28<04:36, 2.30it/s, loss=0.0913, lr=0.0001]
Steps: 9%|▉ | 65/700 [00:28<04:36, 2.30it/s, loss=0.232, lr=0.0001]
Steps: 9%|▉ | 66/700 [00:28<04:36, 2.29it/s, loss=0.232, lr=0.0001]
Steps: 9%|▉ | 66/700 [00:28<04:36, 2.29it/s, loss=0.204, lr=0.0001]
Steps: 10%|▉ | 67/700 [00:29<04:35, 2.30it/s, loss=0.204, lr=0.0001]
Steps: 10%|▉ | 67/700 [00:29<04:35, 2.30it/s, loss=0.0839, lr=0.0001]
Steps: 10%|▉ | 68/700 [00:29<04:34, 2.30it/s, loss=0.0839, lr=0.0001]
Steps: 10%|▉ | 68/700 [00:29<04:34, 2.30it/s, loss=0.163, lr=0.0001]
Steps: 10%|▉ | 69/700 [00:30<04:33, 2.30it/s, loss=0.163, lr=0.0001]
Steps: 10%|▉ | 69/700 [00:30<04:33, 2.30it/s, loss=0.117, lr=0.0001]
Steps: 10%|█ | 70/700 [00:30<04:33, 2.31it/s, loss=0.117, lr=0.0001]
Steps: 10%|█ | 70/700 [00:30<04:33, 2.31it/s, loss=0.116, lr=0.0001]
Steps: 10%|█ | 71/700 [00:30<04:32, 2.31it/s, loss=0.116, lr=0.0001]
Steps: 10%|█ | 71/700 [00:31<04:32, 2.31it/s, loss=0.273, lr=0.0001]
Steps: 10%|█ | 72/700 [00:31<04:32, 2.31it/s, loss=0.273, lr=0.0001]
Steps: 10%|█ | 72/700 [00:31<04:32, 2.31it/s, loss=0.2, lr=0.0001]
Steps: 10%|█ | 73/700 [00:31<04:31, 2.31it/s, loss=0.2, lr=0.0001]
Steps: 10%|█ | 73/700 [00:31<04:31, 2.31it/s, loss=0.189, lr=0.0001]
Steps: 11%|█ | 74/700 [00:32<04:31, 2.31it/s, loss=0.189, lr=0.0001]
Steps: 11%|█ | 74/700 [00:32<04:31, 2.31it/s, loss=0.201, lr=0.0001]
Steps: 11%|█ | 75/700 [00:32<04:30, 2.31it/s, loss=0.201, lr=0.0001]
Steps: 11%|█ | 75/700 [00:32<04:30, 2.31it/s, loss=0.13, lr=0.0001]
Steps: 11%|█ | 76/700 [00:33<04:30, 2.31it/s, loss=0.13, lr=0.0001]
Steps: 11%|█ | 76/700 [00:33<04:30, 2.31it/s, loss=0.128, lr=0.0001]
Steps: 11%|█ | 77/700 [00:33<04:29, 2.31it/s, loss=0.128, lr=0.0001]
Steps: 11%|█ | 77/700 [00:33<04:29, 2.31it/s, loss=0.19, lr=0.0001]
Steps: 11%|█ | 78/700 [00:34<04:29, 2.31it/s, loss=0.19, lr=0.0001]
Steps: 11%|█ | 78/700 [00:34<04:29, 2.31it/s, loss=0.117, lr=0.0001]
Steps: 11%|█▏ | 79/700 [00:34<04:28, 2.31it/s, loss=0.117, lr=0.0001]
Steps: 11%|█▏ | 79/700 [00:34<04:28, 2.31it/s, loss=0.0576, lr=0.0001]
Steps: 11%|█▏ | 80/700 [00:34<04:28, 2.31it/s, loss=0.0576, lr=0.0001]
Steps: 11%|█▏ | 80/700 [00:34<04:28, 2.31it/s, loss=0.0391, lr=0.0001]
Steps: 12%|█▏ | 81/700 [00:35<04:29, 2.30it/s, loss=0.0391, lr=0.0001]
Steps: 12%|█▏ | 81/700 [00:35<04:29, 2.30it/s, loss=0.157, lr=0.0001]
Steps: 12%|█▏ | 82/700 [00:35<04:28, 2.30it/s, loss=0.157, lr=0.0001]
Steps: 12%|█▏ | 82/700 [00:35<04:28, 2.30it/s, loss=0.0326, lr=0.0001]
Steps: 12%|█▏ | 83/700 [00:36<04:27, 2.30it/s, loss=0.0326, lr=0.0001]
Steps: 12%|█▏ | 83/700 [00:36<04:27, 2.30it/s, loss=0.0692, lr=0.0001]
Steps: 12%|█▏ | 84/700 [00:36<04:27, 2.30it/s, loss=0.0692, lr=0.0001]
Steps: 12%|█▏ | 84/700 [00:36<04:27, 2.30it/s, loss=0.175, lr=0.0001]
Steps: 12%|█▏ | 85/700 [00:37<04:26, 2.31it/s, loss=0.175, lr=0.0001]
Steps: 12%|█▏ | 85/700 [00:37<04:26, 2.31it/s, loss=0.134, lr=0.0001]
Steps: 12%|█▏ | 86/700 [00:37<04:26, 2.31it/s, loss=0.134, lr=0.0001]
Steps: 12%|█▏ | 86/700 [00:37<04:26, 2.31it/s, loss=0.137, lr=0.0001]
Steps: 12%|█▏ | 87/700 [00:37<04:26, 2.30it/s, loss=0.137, lr=0.0001]
Steps: 12%|█▏ | 87/700 [00:37<04:26, 2.30it/s, loss=0.0814, lr=0.0001]
Steps: 13%|█▎ | 88/700 [00:38<04:25, 2.30it/s, loss=0.0814, lr=0.0001]
Steps: 13%|█▎ | 88/700 [00:38<04:25, 2.30it/s, loss=0.29, lr=0.0001]
Steps: 13%|█▎ | 89/700 [00:38<04:25, 2.31it/s, loss=0.29, lr=0.0001]
Steps: 13%|█▎ | 89/700 [00:38<04:25, 2.31it/s, loss=0.122, lr=0.0001]
Steps: 13%|█▎ | 90/700 [00:39<04:24, 2.31it/s, loss=0.122, lr=0.0001]
Steps: 13%|█▎ | 90/700 [00:39<04:24, 2.31it/s, loss=0.0188, lr=0.0001]
Steps: 13%|█▎ | 91/700 [00:39<04:24, 2.31it/s, loss=0.0188, lr=0.0001]
Steps: 13%|█▎ | 91/700 [00:39<04:24, 2.31it/s, loss=0.146, lr=0.0001]
Steps: 13%|█▎ | 92/700 [00:40<04:23, 2.31it/s, loss=0.146, lr=0.0001]
Steps: 13%|█▎ | 92/700 [00:40<04:23, 2.31it/s, loss=0.0699, lr=0.0001]
Steps: 13%|█▎ | 93/700 [00:40<04:22, 2.31it/s, loss=0.0699, lr=0.0001]
Steps: 13%|█▎ | 93/700 [00:40<04:22, 2.31it/s, loss=0.0927, lr=0.0001]
Steps: 13%|█▎ | 94/700 [00:40<04:22, 2.31it/s, loss=0.0927, lr=0.0001]
Steps: 13%|█▎ | 94/700 [00:40<04:22, 2.31it/s, loss=0.147, lr=0.0001]
Steps: 14%|█▎ | 95/700 [00:41<04:21, 2.31it/s, loss=0.147, lr=0.0001]
Steps: 14%|█▎ | 95/700 [00:41<04:21, 2.31it/s, loss=0.0597, lr=0.0001]
Steps: 14%|█▎ | 96/700 [00:41<04:21, 2.31it/s, loss=0.0597, lr=0.0001]
Steps: 14%|█▎ | 96/700 [00:41<04:21, 2.31it/s, loss=0.107, lr=0.0001]
Steps: 14%|█▍ | 97/700 [00:42<04:22, 2.30it/s, loss=0.107, lr=0.0001]
Steps: 14%|█▍ | 97/700 [00:42<04:22, 2.30it/s, loss=0.103, lr=0.0001]
Steps: 14%|█▍ | 98/700 [00:42<04:21, 2.30it/s, loss=0.103, lr=0.0001]
Steps: 14%|█▍ | 98/700 [00:42<04:21, 2.30it/s, loss=0.127, lr=0.0001]
Steps: 14%|█▍ | 99/700 [00:43<04:21, 2.30it/s, loss=0.127, lr=0.0001]
Steps: 14%|█▍ | 99/700 [00:43<04:21, 2.30it/s, loss=0.0597, lr=0.0001]
Steps: 14%|█▍ | 100/700 [00:43<04:21, 2.30it/s, loss=0.0597, lr=0.0001]
Steps: 14%|█▍ | 100/700 [00:43<04:21, 2.30it/s, loss=0.0843, lr=0.0001]
Steps: 14%|█▍ | 101/700 [00:44<04:20, 2.30it/s, loss=0.0843, lr=0.0001]
Steps: 14%|█▍ | 101/700 [00:44<04:20, 2.30it/s, loss=0.0791, lr=0.0001]
Steps: 15%|█▍ | 102/700 [00:44<04:19, 2.30it/s, loss=0.0791, lr=0.0001]
Steps: 15%|█▍ | 102/700 [00:44<04:19, 2.30it/s, loss=0.0923, lr=0.0001]
Steps: 15%|█▍ | 103/700 [00:44<04:19, 2.30it/s, loss=0.0923, lr=0.0001]
Steps: 15%|█▍ | 103/700 [00:44<04:19, 2.30it/s, loss=0.159, lr=0.0001]
Steps: 15%|█▍ | 104/700 [00:45<04:18, 2.30it/s, loss=0.159, lr=0.0001]
Steps: 15%|█▍ | 104/700 [00:45<04:18, 2.30it/s, loss=0.304, lr=0.0001]
Steps: 15%|█▌ | 105/700 [00:45<04:18, 2.30it/s, loss=0.304, lr=0.0001]
Steps: 15%|█▌ | 105/700 [00:45<04:18, 2.30it/s, loss=0.0677, lr=0.0001]
Steps: 15%|█▌ | 106/700 [00:46<04:17, 2.31it/s, loss=0.0677, lr=0.0001]
Steps: 15%|█▌ | 106/700 [00:46<04:17, 2.31it/s, loss=0.102, lr=0.0001]
Steps: 15%|█▌ | 107/700 [00:46<04:17, 2.31it/s, loss=0.102, lr=0.0001]
Steps: 15%|█▌ | 107/700 [00:46<04:17, 2.31it/s, loss=0.129, lr=0.0001]
Steps: 15%|█▌ | 108/700 [00:47<04:16, 2.31it/s, loss=0.129, lr=0.0001]
Steps: 15%|█▌ | 108/700 [00:47<04:16, 2.31it/s, loss=0.131, lr=0.0001]
Steps: 16%|█▌ | 109/700 [00:47<04:16, 2.31it/s, loss=0.131, lr=0.0001]
Steps: 16%|█▌ | 109/700 [00:47<04:16, 2.31it/s, loss=0.0958, lr=0.0001]
Steps: 16%|█▌ | 110/700 [00:47<04:15, 2.31it/s, loss=0.0958, lr=0.0001]
Steps: 16%|█▌ | 110/700 [00:47<04:15, 2.31it/s, loss=0.244, lr=0.0001]
Steps: 16%|█▌ | 111/700 [00:48<04:15, 2.31it/s, loss=0.244, lr=0.0001]
Steps: 16%|█▌ | 111/700 [00:48<04:15, 2.31it/s, loss=0.278, lr=0.0001]
Steps: 16%|█▌ | 112/700 [00:48<04:14, 2.31it/s, loss=0.278, lr=0.0001]
Steps: 16%|█▌ | 112/700 [00:48<04:14, 2.31it/s, loss=0.1, lr=0.0001]
Steps: 16%|█▌ | 113/700 [00:49<04:15, 2.30it/s, loss=0.1, lr=0.0001]
Steps: 16%|█▌ | 113/700 [00:49<04:15, 2.30it/s, loss=0.133, lr=0.0001]
Steps: 16%|█▋ | 114/700 [00:49<04:14, 2.30it/s, loss=0.133, lr=0.0001]
Steps: 16%|█▋ | 114/700 [00:49<04:14, 2.30it/s, loss=0.253, lr=0.0001]
Steps: 16%|█▋ | 115/700 [00:50<04:14, 2.30it/s, loss=0.253, lr=0.0001]
Steps: 16%|█▋ | 115/700 [00:50<04:14, 2.30it/s, loss=0.114, lr=0.0001]
Steps: 17%|█▋ | 116/700 [00:50<04:13, 2.30it/s, loss=0.114, lr=0.0001]
Steps: 17%|█▋ | 116/700 [00:50<04:13, 2.30it/s, loss=0.154, lr=0.0001]
Steps: 17%|█▋ | 117/700 [00:50<04:14, 2.29it/s, loss=0.154, lr=0.0001]
Steps: 17%|█▋ | 117/700 [00:50<04:14, 2.29it/s, loss=0.202, lr=0.0001]
Steps: 17%|█▋ | 118/700 [00:51<04:14, 2.29it/s, loss=0.202, lr=0.0001]
Steps: 17%|█▋ | 118/700 [00:51<04:14, 2.29it/s, loss=0.0992, lr=0.0001]
Steps: 17%|█▋ | 119/700 [00:51<04:13, 2.29it/s, loss=0.0992, lr=0.0001]
Steps: 17%|█▋ | 119/700 [00:51<04:13, 2.29it/s, loss=0.166, lr=0.0001]
Steps: 17%|█▋ | 120/700 [00:52<04:12, 2.30it/s, loss=0.166, lr=0.0001]
Steps: 17%|█▋ | 120/700 [00:52<04:12, 2.30it/s, loss=0.124, lr=0.0001]
Steps: 17%|█▋ | 121/700 [00:52<04:11, 2.30it/s, loss=0.124, lr=0.0001]
Steps: 17%|█▋ | 121/700 [00:52<04:11, 2.30it/s, loss=0.0382, lr=0.0001]
Steps: 17%|█▋ | 122/700 [00:53<04:11, 2.29it/s, loss=0.0382, lr=0.0001]
Steps: 17%|█▋ | 122/700 [00:53<04:11, 2.29it/s, loss=0.0882, lr=0.0001]
Steps: 18%|█▊ | 123/700 [00:53<04:11, 2.30it/s, loss=0.0882, lr=0.0001]
Steps: 18%|█▊ | 123/700 [00:53<04:11, 2.30it/s, loss=0.0856, lr=0.0001]
Steps: 18%|█▊ | 124/700 [00:54<04:10, 2.30it/s, loss=0.0856, lr=0.0001]
Steps: 18%|█▊ | 124/700 [00:54<04:10, 2.30it/s, loss=0.145, lr=0.0001]
Steps: 18%|█▊ | 125/700 [00:54<04:10, 2.29it/s, loss=0.145, lr=0.0001]
Steps: 18%|█▊ | 125/700 [00:54<04:10, 2.29it/s, loss=0.14, lr=0.0001]
Steps: 18%|█▊ | 126/700 [00:54<04:09, 2.30it/s, loss=0.14, lr=0.0001]
Steps: 18%|█▊ | 126/700 [00:54<04:09, 2.30it/s, loss=0.194, lr=0.0001]
Steps: 18%|█▊ | 127/700 [00:55<04:08, 2.31it/s, loss=0.194, lr=0.0001]
Steps: 18%|█▊ | 127/700 [00:55<04:08, 2.31it/s, loss=0.101, lr=0.0001]
Steps: 18%|█▊ | 128/700 [00:55<04:07, 2.31it/s, loss=0.101, lr=0.0001]
Steps: 18%|█▊ | 128/700 [00:55<04:07, 2.31it/s, loss=0.106, lr=0.0001]
Steps: 18%|█▊ | 129/700 [00:56<04:08, 2.30it/s, loss=0.106, lr=0.0001]
Steps: 18%|█▊ | 129/700 [00:56<04:08, 2.30it/s, loss=0.138, lr=0.0001]
Steps: 19%|█▊ | 130/700 [00:56<04:07, 2.30it/s, loss=0.138, lr=0.0001]
Steps: 19%|█▊ | 130/700 [00:56<04:07, 2.30it/s, loss=0.229, lr=0.0001]
Steps: 19%|█▊ | 131/700 [00:57<04:07, 2.30it/s, loss=0.229, lr=0.0001]
Steps: 19%|█▊ | 131/700 [00:57<04:07, 2.30it/s, loss=0.125, lr=0.0001]
Steps: 19%|█▉ | 132/700 [00:57<04:06, 2.30it/s, loss=0.125, lr=0.0001]
Steps: 19%|█▉ | 132/700 [00:57<04:06, 2.30it/s, loss=0.251, lr=0.0001]
Steps: 19%|█▉ | 133/700 [00:57<04:06, 2.30it/s, loss=0.251, lr=0.0001]
Steps: 19%|█▉ | 133/700 [00:57<04:06, 2.30it/s, loss=0.111, lr=0.0001]
Steps: 19%|█▉ | 134/700 [00:58<04:05, 2.30it/s, loss=0.111, lr=0.0001]
Steps: 19%|█▉ | 134/700 [00:58<04:05, 2.30it/s, loss=0.0731, lr=0.0001]
Steps: 19%|█▉ | 135/700 [00:58<04:05, 2.30it/s, loss=0.0731, lr=0.0001]
Steps: 19%|█▉ | 135/700 [00:58<04:05, 2.30it/s, loss=0.146, lr=0.0001]
Steps: 19%|█▉ | 136/700 [00:59<04:05, 2.30it/s, loss=0.146, lr=0.0001]
Steps: 19%|█▉ | 136/700 [00:59<04:05, 2.30it/s, loss=0.0851, lr=0.0001]
Steps: 20%|█▉ | 137/700 [00:59<04:04, 2.30it/s, loss=0.0851, lr=0.0001]
Steps: 20%|█▉ | 137/700 [00:59<04:04, 2.30it/s, loss=0.245, lr=0.0001]
Steps: 20%|█▉ | 138/700 [01:00<04:03, 2.31it/s, loss=0.245, lr=0.0001]
Steps: 20%|█▉ | 138/700 [01:00<04:03, 2.31it/s, loss=0.113, lr=0.0001]
Steps: 20%|█▉ | 139/700 [01:00<04:03, 2.30it/s, loss=0.113, lr=0.0001]
Steps: 20%|█▉ | 139/700 [01:00<04:03, 2.30it/s, loss=0.158, lr=0.0001]
Steps: 20%|██ | 140/700 [01:00<04:02, 2.31it/s, loss=0.158, lr=0.0001]
Steps: 20%|██ | 140/700 [01:00<04:02, 2.31it/s, loss=0.0694, lr=0.0001]
Steps: 20%|██ | 141/700 [01:01<04:02, 2.31it/s, loss=0.0694, lr=0.0001]
Steps: 20%|██ | 141/700 [01:01<04:02, 2.31it/s, loss=0.0592, lr=0.0001]
Steps: 20%|██ | 142/700 [01:01<04:02, 2.31it/s, loss=0.0592, lr=0.0001]
Steps: 20%|██ | 142/700 [01:01<04:02, 2.31it/s, loss=0.0842, lr=0.0001]
Steps: 20%|██ | 143/700 [01:02<04:01, 2.31it/s, loss=0.0842, lr=0.0001]
Steps: 20%|██ | 143/700 [01:02<04:01, 2.31it/s, loss=0.286, lr=0.0001]
Steps: 21%|██ | 144/700 [01:02<04:00, 2.31it/s, loss=0.286, lr=0.0001]
Steps: 21%|██ | 144/700 [01:02<04:00, 2.31it/s, loss=0.153, lr=0.0001]
Steps: 21%|██ | 145/700 [01:03<04:01, 2.30it/s, loss=0.153, lr=0.0001]
Steps: 21%|██ | 145/700 [01:03<04:01, 2.30it/s, loss=0.128, lr=0.0001]
Steps: 21%|██ | 146/700 [01:03<04:00, 2.30it/s, loss=0.128, lr=0.0001]
Steps: 21%|██ | 146/700 [01:03<04:00, 2.30it/s, loss=0.135, lr=0.0001]
Steps: 21%|██ | 147/700 [01:03<03:59, 2.30it/s, loss=0.135, lr=0.0001]
Steps: 21%|██ | 147/700 [01:04<03:59, 2.30it/s, loss=0.133, lr=0.0001]
Steps: 21%|██ | 148/700 [01:04<03:59, 2.31it/s, loss=0.133, lr=0.0001]
Steps: 21%|██ | 148/700 [01:04<03:59, 2.31it/s, loss=0.139, lr=0.0001]
Steps: 21%|██▏ | 149/700 [01:04<03:58, 2.31it/s, loss=0.139, lr=0.0001]
Steps: 21%|██▏ | 149/700 [01:04<03:58, 2.31it/s, loss=0.0741, lr=0.0001]
Steps: 21%|██▏ | 150/700 [01:05<03:58, 2.31it/s, loss=0.0741, lr=0.0001]
Steps: 21%|██▏ | 150/700 [01:05<03:58, 2.31it/s, loss=0.26, lr=0.0001]
Steps: 22%|██▏ | 151/700 [01:05<03:57, 2.31it/s, loss=0.26, lr=0.0001]
Steps: 22%|██▏ | 151/700 [01:05<03:57, 2.31it/s, loss=0.14, lr=0.0001]
Steps: 22%|██▏ | 152/700 [01:06<03:57, 2.31it/s, loss=0.14, lr=0.0001]
Steps: 22%|██▏ | 152/700 [01:06<03:57, 2.31it/s, loss=0.118, lr=0.0001]
Steps: 22%|██▏ | 153/700 [01:06<03:56, 2.31it/s, loss=0.118, lr=0.0001]
Steps: 22%|██▏ | 153/700 [01:06<03:56, 2.31it/s, loss=0.119, lr=0.0001]
Steps: 22%|██▏ | 154/700 [01:07<03:56, 2.31it/s, loss=0.119, lr=0.0001]
Steps: 22%|██▏ | 154/700 [01:07<03:56, 2.31it/s, loss=0.0301, lr=0.0001]
Steps: 22%|██▏ | 155/700 [01:07<03:55, 2.31it/s, loss=0.0301, lr=0.0001]
Steps: 22%|██▏ | 155/700 [01:07<03:55, 2.31it/s, loss=0.147, lr=0.0001]
Steps: 22%|██▏ | 156/700 [01:07<03:55, 2.31it/s, loss=0.147, lr=0.0001]
Steps: 22%|██▏ | 156/700 [01:07<03:55, 2.31it/s, loss=0.246, lr=0.0001]
Steps: 22%|██▏ | 157/700 [01:08<03:55, 2.31it/s, loss=0.246, lr=0.0001]
Steps: 22%|██▏ | 157/700 [01:08<03:55, 2.31it/s, loss=0.281, lr=0.0001]
Steps: 23%|██▎ | 158/700 [01:08<03:54, 2.31it/s, loss=0.281, lr=0.0001]
Steps: 23%|██▎ | 158/700 [01:08<03:54, 2.31it/s, loss=0.114, lr=0.0001]
Steps: 23%|██▎ | 159/700 [01:09<03:54, 2.31it/s, loss=0.114, lr=0.0001]
Steps: 23%|██▎ | 159/700 [01:09<03:54, 2.31it/s, loss=0.0437, lr=0.0001]
Steps: 23%|██▎ | 160/700 [01:09<03:53, 2.31it/s, loss=0.0437, lr=0.0001]
Steps: 23%|██▎ | 160/700 [01:09<03:53, 2.31it/s, loss=0.0781, lr=0.0001]
Steps: 23%|██▎ | 161/700 [01:10<03:54, 2.30it/s, loss=0.0781, lr=0.0001]
Steps: 23%|██▎ | 161/700 [01:10<03:54, 2.30it/s, loss=0.0544, lr=0.0001]
Steps: 23%|██▎ | 162/700 [01:10<03:53, 2.30it/s, loss=0.0544, lr=0.0001]
Steps: 23%|██▎ | 162/700 [01:10<03:53, 2.30it/s, loss=0.199, lr=0.0001]
Steps: 23%|██▎ | 163/700 [01:10<03:53, 2.30it/s, loss=0.199, lr=0.0001]
Steps: 23%|██▎ | 163/700 [01:10<03:53, 2.30it/s, loss=0.164, lr=0.0001]
Steps: 23%|██▎ | 164/700 [01:11<03:52, 2.31it/s, loss=0.164, lr=0.0001]
Steps: 23%|██▎ | 164/700 [01:11<03:52, 2.31it/s, loss=0.0932, lr=0.0001]
Steps: 24%|██▎ | 165/700 [01:11<03:51, 2.31it/s, loss=0.0932, lr=0.0001]
Steps: 24%|██▎ | 165/700 [01:11<03:51, 2.31it/s, loss=0.116, lr=0.0001]
Steps: 24%|██▎ | 166/700 [01:12<03:51, 2.31it/s, loss=0.116, lr=0.0001]
Steps: 24%|██▎ | 166/700 [01:12<03:51, 2.31it/s, loss=0.0942, lr=0.0001]
Steps: 24%|██▍ | 167/700 [01:12<03:50, 2.31it/s, loss=0.0942, lr=0.0001]
Steps: 24%|██▍ | 167/700 [01:12<03:50, 2.31it/s, loss=0.105, lr=0.0001]
Steps: 24%|██▍ | 168/700 [01:13<03:50, 2.31it/s, loss=0.105, lr=0.0001]
Steps: 24%|██▍ | 168/700 [01:13<03:50, 2.31it/s, loss=0.141, lr=0.0001]
Steps: 24%|██▍ | 169/700 [01:13<03:50, 2.31it/s, loss=0.141, lr=0.0001]
Steps: 24%|██▍ | 169/700 [01:13<03:50, 2.31it/s, loss=0.146, lr=0.0001]
Steps: 24%|██▍ | 170/700 [01:13<03:49, 2.31it/s, loss=0.146, lr=0.0001]
Steps: 24%|██▍ | 170/700 [01:13<03:49, 2.31it/s, loss=0.0638, lr=0.0001]
Steps: 24%|██▍ | 171/700 [01:14<03:49, 2.31it/s, loss=0.0638, lr=0.0001]
Steps: 24%|██▍ | 171/700 [01:14<03:49, 2.31it/s, loss=0.16, lr=0.0001]
Steps: 25%|██▍ | 172/700 [01:14<03:48, 2.31it/s, loss=0.16, lr=0.0001]
Steps: 25%|██▍ | 172/700 [01:14<03:48, 2.31it/s, loss=0.215, lr=0.0001]
Steps: 25%|██▍ | 173/700 [01:15<03:48, 2.31it/s, loss=0.215, lr=0.0001]
Steps: 25%|██▍ | 173/700 [01:15<03:48, 2.31it/s, loss=0.21, lr=0.0001]
Steps: 25%|██▍ | 174/700 [01:15<03:47, 2.31it/s, loss=0.21, lr=0.0001]
Steps: 25%|██▍ | 174/700 [01:15<03:47, 2.31it/s, loss=0.174, lr=0.0001]
Steps: 25%|██▌ | 175/700 [01:16<03:47, 2.31it/s, loss=0.174, lr=0.0001]
Steps: 25%|██▌ | 175/700 [01:16<03:47, 2.31it/s, loss=0.117, lr=0.0001]
Steps: 25%|██▌ | 176/700 [01:16<03:46, 2.31it/s, loss=0.117, lr=0.0001]
Steps: 25%|██▌ | 176/700 [01:16<03:46, 2.31it/s, loss=0.169, lr=0.0001]
Steps: 25%|██▌ | 177/700 [01:16<03:47, 2.30it/s, loss=0.169, lr=0.0001]
Steps: 25%|██▌ | 177/700 [01:17<03:47, 2.30it/s, loss=0.0948, lr=0.0001]
Steps: 25%|██▌ | 178/700 [01:17<03:46, 2.30it/s, loss=0.0948, lr=0.0001]
Steps: 25%|██▌ | 178/700 [01:17<03:46, 2.30it/s, loss=0.275, lr=0.0001]
Steps: 26%|██▌ | 179/700 [01:17<03:46, 2.30it/s, loss=0.275, lr=0.0001]
Steps: 26%|██▌ | 179/700 [01:17<03:46, 2.30it/s, loss=0.109, lr=0.0001]
Steps: 26%|██▌ | 180/700 [01:18<03:45, 2.31it/s, loss=0.109, lr=0.0001]
Steps: 26%|██▌ | 180/700 [01:18<03:45, 2.31it/s, loss=0.0641, lr=0.0001]
Steps: 26%|██▌ | 181/700 [01:18<03:45, 2.30it/s, loss=0.0641, lr=0.0001]
Steps: 26%|██▌ | 181/700 [01:18<03:45, 2.30it/s, loss=0.245, lr=0.0001]
Steps: 26%|██▌ | 182/700 [01:19<03:44, 2.31it/s, loss=0.245, lr=0.0001]
Steps: 26%|██▌ | 182/700 [01:19<03:44, 2.31it/s, loss=0.133, lr=0.0001]
Steps: 26%|██▌ | 183/700 [01:19<03:44, 2.31it/s, loss=0.133, lr=0.0001]
Steps: 26%|██▌ | 183/700 [01:19<03:44, 2.31it/s, loss=0.0986, lr=0.0001]
Steps: 26%|██▋ | 184/700 [01:20<03:43, 2.30it/s, loss=0.0986, lr=0.0001]
Steps: 26%|██▋ | 184/700 [01:20<03:43, 2.30it/s, loss=0.152, lr=0.0001]
Steps: 26%|██▋ | 185/700 [01:20<03:43, 2.31it/s, loss=0.152, lr=0.0001]
Steps: 26%|██▋ | 185/700 [01:20<03:43, 2.31it/s, loss=0.136, lr=0.0001]
Steps: 27%|██▋ | 186/700 [01:20<03:42, 2.31it/s, loss=0.136, lr=0.0001]
Steps: 27%|██▋ | 186/700 [01:20<03:42, 2.31it/s, loss=0.172, lr=0.0001]
Steps: 27%|██▋ | 187/700 [01:21<03:42, 2.31it/s, loss=0.172, lr=0.0001]
Steps: 27%|██▋ | 187/700 [01:21<03:42, 2.31it/s, loss=0.31, lr=0.0001]
Steps: 27%|██▋ | 188/700 [01:21<03:42, 2.30it/s, loss=0.31, lr=0.0001]
Steps: 27%|██▋ | 188/700 [01:21<03:42, 2.30it/s, loss=0.124, lr=0.0001]
Steps: 27%|██▋ | 189/700 [01:22<03:41, 2.30it/s, loss=0.124, lr=0.0001]
Steps: 27%|██▋ | 189/700 [01:22<03:41, 2.30it/s, loss=0.049, lr=0.0001]
Steps: 27%|██▋ | 190/700 [01:22<03:41, 2.30it/s, loss=0.049, lr=0.0001]
Steps: 27%|██▋ | 190/700 [01:22<03:41, 2.30it/s, loss=0.0852, lr=0.0001]
Steps: 27%|██▋ | 191/700 [01:23<03:41, 2.30it/s, loss=0.0852, lr=0.0001]
Steps: 27%|██▋ | 191/700 [01:23<03:41, 2.30it/s, loss=0.0649, lr=0.0001]
Steps: 27%|██▋ | 192/700 [01:23<03:40, 2.31it/s, loss=0.0649, lr=0.0001]
Steps: 27%|██▋ | 192/700 [01:23<03:40, 2.31it/s, loss=0.0476, lr=0.0001]
Steps: 28%|██▊ | 193/700 [01:23<03:41, 2.29it/s, loss=0.0476, lr=0.0001]
Steps: 28%|██▊ | 193/700 [01:23<03:41, 2.29it/s, loss=0.0807, lr=0.0001]
Steps: 28%|██▊ | 194/700 [01:24<03:40, 2.29it/s, loss=0.0807, lr=0.0001]
Steps: 28%|██▊ | 194/700 [01:24<03:40, 2.29it/s, loss=0.207, lr=0.0001]
Steps: 28%|██▊ | 195/700 [01:24<03:39, 2.30it/s, loss=0.207, lr=0.0001]
Steps: 28%|██▊ | 195/700 [01:24<03:39, 2.30it/s, loss=0.153, lr=0.0001]
Steps: 28%|██▊ | 196/700 [01:25<03:38, 2.30it/s, loss=0.153, lr=0.0001]
Steps: 28%|██▊ | 196/700 [01:25<03:38, 2.30it/s, loss=0.0468, lr=0.0001]
Steps: 28%|██▊ | 197/700 [01:25<03:38, 2.31it/s, loss=0.0468, lr=0.0001]
Steps: 28%|██▊ | 197/700 [01:25<03:38, 2.31it/s, loss=0.194, lr=0.0001]
Steps: 28%|██▊ | 198/700 [01:26<03:37, 2.31it/s, loss=0.194, lr=0.0001]
Steps: 28%|██▊ | 198/700 [01:26<03:37, 2.31it/s, loss=0.341, lr=0.0001]
Steps: 28%|██▊ | 199/700 [01:26<03:37, 2.31it/s, loss=0.341, lr=0.0001]
Steps: 28%|██▊ | 199/700 [01:26<03:37, 2.31it/s, loss=0.0981, lr=0.0001]
Steps: 29%|██▊ | 200/700 [01:26<03:36, 2.31it/s, loss=0.0981, lr=0.0001]
Steps: 29%|██▊ | 200/700 [01:27<03:36, 2.31it/s, loss=0.193, lr=0.0001]
Steps: 29%|██▊ | 201/700 [01:27<03:36, 2.30it/s, loss=0.193, lr=0.0001]
Steps: 29%|██▊ | 201/700 [01:27<03:36, 2.30it/s, loss=0.0917, lr=0.0001]
Steps: 29%|██▉ | 202/700 [01:27<03:35, 2.31it/s, loss=0.0917, lr=0.0001]
Steps: 29%|██▉ | 202/700 [01:27<03:35, 2.31it/s, loss=0.149, lr=0.0001]
Steps: 29%|██▉ | 203/700 [01:28<03:35, 2.31it/s, loss=0.149, lr=0.0001]
Steps: 29%|██▉ | 203/700 [01:28<03:35, 2.31it/s, loss=0.0842, lr=0.0001]
Steps: 29%|██▉ | 204/700 [01:28<03:34, 2.31it/s, loss=0.0842, lr=0.0001]
Steps: 29%|██▉ | 204/700 [01:28<03:34, 2.31it/s, loss=0.27, lr=0.0001]
Steps: 29%|██▉ | 205/700 [01:29<03:34, 2.31it/s, loss=0.27, lr=0.0001]
Steps: 29%|██▉ | 205/700 [01:29<03:34, 2.31it/s, loss=0.234, lr=0.0001]
Steps: 29%|██▉ | 206/700 [01:29<03:34, 2.31it/s, loss=0.234, lr=0.0001]
Steps: 29%|██▉ | 206/700 [01:29<03:34, 2.31it/s, loss=0.125, lr=0.0001]
Steps: 30%|██▉ | 207/700 [01:30<03:33, 2.31it/s, loss=0.125, lr=0.0001]
Steps: 30%|██▉ | 207/700 [01:30<03:33, 2.31it/s, loss=0.0958, lr=0.0001]
Steps: 30%|██▉ | 208/700 [01:30<03:33, 2.31it/s, loss=0.0958, lr=0.0001]
Steps: 30%|██▉ | 208/700 [01:30<03:33, 2.31it/s, loss=0.0906, lr=0.0001]
Steps: 30%|██▉ | 209/700 [01:30<03:33, 2.30it/s, loss=0.0906, lr=0.0001]
Steps: 30%|██▉ | 209/700 [01:30<03:33, 2.30it/s, loss=0.0941, lr=0.0001]
Steps: 30%|███ | 210/700 [01:31<03:32, 2.30it/s, loss=0.0941, lr=0.0001]
Steps: 30%|███ | 210/700 [01:31<03:32, 2.30it/s, loss=0.0909, lr=0.0001]
Steps: 30%|███ | 211/700 [01:31<03:32, 2.30it/s, loss=0.0909, lr=0.0001]
Steps: 30%|███ | 211/700 [01:31<03:32, 2.30it/s, loss=0.126, lr=0.0001]
Steps: 30%|███ | 212/700 [01:32<03:31, 2.30it/s, loss=0.126, lr=0.0001]
Steps: 30%|███ | 212/700 [01:32<03:31, 2.30it/s, loss=0.148, lr=0.0001]
Steps: 30%|███ | 213/700 [01:32<03:31, 2.31it/s, loss=0.148, lr=0.0001]
Steps: 30%|███ | 213/700 [01:32<03:31, 2.31it/s, loss=0.259, lr=0.0001]
Steps: 31%|███ | 214/700 [01:33<03:30, 2.31it/s, loss=0.259, lr=0.0001]
Steps: 31%|███ | 214/700 [01:33<03:30, 2.31it/s, loss=0.233, lr=0.0001]
Steps: 31%|███ | 215/700 [01:33<03:30, 2.31it/s, loss=0.233, lr=0.0001]
Steps: 31%|███ | 215/700 [01:33<03:30, 2.31it/s, loss=0.0979, lr=0.0001]
Steps: 31%|███ | 216/700 [01:33<03:29, 2.31it/s, loss=0.0979, lr=0.0001]
Steps: 31%|███ | 216/700 [01:33<03:29, 2.31it/s, loss=0.167, lr=0.0001]
Steps: 31%|███ | 217/700 [01:34<03:29, 2.31it/s, loss=0.167, lr=0.0001]
Steps: 31%|███ | 217/700 [01:34<03:29, 2.31it/s, loss=0.136, lr=0.0001]
Steps: 31%|███ | 218/700 [01:34<03:28, 2.31it/s, loss=0.136, lr=0.0001]
Steps: 31%|███ | 218/700 [01:34<03:28, 2.31it/s, loss=0.112, lr=0.0001]
Steps: 31%|███▏ | 219/700 [01:35<03:28, 2.31it/s, loss=0.112, lr=0.0001]
Steps: 31%|███▏ | 219/700 [01:35<03:28, 2.31it/s, loss=0.0973, lr=0.0001]
Steps: 31%|███▏ | 220/700 [01:35<03:27, 2.31it/s, loss=0.0973, lr=0.0001]
Steps: 31%|███▏ | 220/700 [01:35<03:27, 2.31it/s, loss=0.113, lr=0.0001]
Steps: 32%|███▏ | 221/700 [01:36<03:27, 2.31it/s, loss=0.113, lr=0.0001]
Steps: 32%|███▏ | 221/700 [01:36<03:27, 2.31it/s, loss=0.094, lr=0.0001]
Steps: 32%|███▏ | 222/700 [01:36<03:26, 2.31it/s, loss=0.094, lr=0.0001]
Steps: 32%|███▏ | 222/700 [01:36<03:26, 2.31it/s, loss=0.141, lr=0.0001]
Steps: 32%|███▏ | 223/700 [01:36<03:26, 2.31it/s, loss=0.141, lr=0.0001]
Steps: 32%|███▏ | 223/700 [01:36<03:26, 2.31it/s, loss=0.148, lr=0.0001]
Steps: 32%|███▏ | 224/700 [01:37<03:25, 2.31it/s, loss=0.148, lr=0.0001]
Steps: 32%|███▏ | 224/700 [01:37<03:25, 2.31it/s, loss=0.105, lr=0.0001]
Steps: 32%|███▏ | 225/700 [01:37<03:26, 2.30it/s, loss=0.105, lr=0.0001]
Steps: 32%|███▏ | 225/700 [01:37<03:26, 2.30it/s, loss=0.255, lr=0.0001]
Steps: 32%|███▏ | 226/700 [01:38<03:25, 2.30it/s, loss=0.255, lr=0.0001]
Steps: 32%|███▏ | 226/700 [01:38<03:25, 2.30it/s, loss=0.189, lr=0.0001]
Steps: 32%|███▏ | 227/700 [01:38<03:25, 2.30it/s, loss=0.189, lr=0.0001]
Steps: 32%|███▏ | 227/700 [01:38<03:25, 2.30it/s, loss=0.117, lr=0.0001]
Steps: 33%|███▎ | 228/700 [01:39<03:24, 2.31it/s, loss=0.117, lr=0.0001]
Steps: 33%|███▎ | 228/700 [01:39<03:24, 2.31it/s, loss=0.0894, lr=0.0001]
Steps: 33%|███▎ | 229/700 [01:39<03:24, 2.31it/s, loss=0.0894, lr=0.0001]
Steps: 33%|███▎ | 229/700 [01:39<03:24, 2.31it/s, loss=0.107, lr=0.0001]
Steps: 33%|███▎ | 230/700 [01:39<03:23, 2.31it/s, loss=0.107, lr=0.0001]
Steps: 33%|███▎ | 230/700 [01:40<03:23, 2.31it/s, loss=0.0873, lr=0.0001]
Steps: 33%|███▎ | 231/700 [01:40<03:23, 2.31it/s, loss=0.0873, lr=0.0001]
Steps: 33%|███▎ | 231/700 [01:40<03:23, 2.31it/s, loss=0.0671, lr=0.0001]
Steps: 33%|███▎ | 232/700 [01:40<03:22, 2.31it/s, loss=0.0671, lr=0.0001]
Steps: 33%|███▎ | 232/700 [01:40<03:22, 2.31it/s, loss=0.094, lr=0.0001]
Steps: 33%|███▎ | 233/700 [01:41<03:22, 2.31it/s, loss=0.094, lr=0.0001]
Steps: 33%|███▎ | 233/700 [01:41<03:22, 2.31it/s, loss=0.124, lr=0.0001]
Steps: 33%|███▎ | 234/700 [01:41<03:21, 2.31it/s, loss=0.124, lr=0.0001]
Steps: 33%|███▎ | 234/700 [01:41<03:21, 2.31it/s, loss=0.0847, lr=0.0001]
Steps: 34%|███▎ | 235/700 [01:42<03:21, 2.31it/s, loss=0.0847, lr=0.0001]
Steps: 34%|███▎ | 235/700 [01:42<03:21, 2.31it/s, loss=0.236, lr=0.0001]
Steps: 34%|███▎ | 236/700 [01:42<03:20, 2.31it/s, loss=0.236, lr=0.0001]
Steps: 34%|███▎ | 236/700 [01:42<03:20, 2.31it/s, loss=0.0215, lr=0.0001]
Steps: 34%|███▍ | 237/700 [01:43<03:20, 2.31it/s, loss=0.0215, lr=0.0001]
Steps: 34%|███▍ | 237/700 [01:43<03:20, 2.31it/s, loss=0.0918, lr=0.0001]
Steps: 34%|███▍ | 238/700 [01:43<03:19, 2.31it/s, loss=0.0918, lr=0.0001]
Steps: 34%|███▍ | 238/700 [01:43<03:19, 2.31it/s, loss=0.152, lr=0.0001]
Steps: 34%|███▍ | 239/700 [01:43<03:19, 2.31it/s, loss=0.152, lr=0.0001]
Steps: 34%|███▍ | 239/700 [01:43<03:19, 2.31it/s, loss=0.0908, lr=0.0001]
Steps: 34%|███▍ | 240/700 [01:44<03:18, 2.31it/s, loss=0.0908, lr=0.0001]
Steps: 34%|███▍ | 240/700 [01:44<03:18, 2.31it/s, loss=0.0664, lr=0.0001]
Steps: 34%|███▍ | 241/700 [01:44<03:19, 2.30it/s, loss=0.0664, lr=0.0001]
Steps: 34%|███▍ | 241/700 [01:44<03:19, 2.30it/s, loss=0.0761, lr=0.0001]
Steps: 35%|███▍ | 242/700 [01:45<03:18, 2.30it/s, loss=0.0761, lr=0.0001]
Steps: 35%|███▍ | 242/700 [01:45<03:18, 2.30it/s, loss=0.0773, lr=0.0001]
Steps: 35%|███▍ | 243/700 [01:45<03:18, 2.31it/s, loss=0.0773, lr=0.0001]
Steps: 35%|███▍ | 243/700 [01:45<03:18, 2.31it/s, loss=0.127, lr=0.0001]
Steps: 35%|███▍ | 244/700 [01:46<03:17, 2.31it/s, loss=0.127, lr=0.0001]
Steps: 35%|███▍ | 244/700 [01:46<03:17, 2.31it/s, loss=0.16, lr=0.0001]
Steps: 35%|███▌ | 245/700 [01:46<03:17, 2.31it/s, loss=0.16, lr=0.0001]
Steps: 35%|███▌ | 245/700 [01:46<03:17, 2.31it/s, loss=0.0749, lr=0.0001]
Steps: 35%|███▌ | 246/700 [01:46<03:16, 2.31it/s, loss=0.0749, lr=0.0001]
Steps: 35%|███▌ | 246/700 [01:46<03:16, 2.31it/s, loss=0.143, lr=0.0001]
Steps: 35%|███▌ | 247/700 [01:47<03:16, 2.31it/s, loss=0.143, lr=0.0001]
Steps: 35%|███▌ | 247/700 [01:47<03:16, 2.31it/s, loss=0.221, lr=0.0001]
Steps: 35%|███▌ | 248/700 [01:47<03:15, 2.31it/s, loss=0.221, lr=0.0001]
Steps: 35%|███▌ | 248/700 [01:47<03:15, 2.31it/s, loss=0.0879, lr=0.0001]
Steps: 36%|███▌ | 249/700 [01:48<03:15, 2.31it/s, loss=0.0879, lr=0.0001]
Steps: 36%|███▌ | 249/700 [01:48<03:15, 2.31it/s, loss=0.0838, lr=0.0001]
Steps: 36%|███▌ | 250/700 [01:48<03:14, 2.31it/s, loss=0.0838, lr=0.0001]
Steps: 36%|███▌ | 250/700 [01:48<03:14, 2.31it/s, loss=0.166, lr=0.0001]
Steps: 36%|███▌ | 251/700 [01:49<03:14, 2.31it/s, loss=0.166, lr=0.0001]
Steps: 36%|███▌ | 251/700 [01:49<03:14, 2.31it/s, loss=0.156, lr=0.0001]
Steps: 36%|███▌ | 252/700 [01:49<03:13, 2.31it/s, loss=0.156, lr=0.0001]
Steps: 36%|███▌ | 252/700 [01:49<03:13, 2.31it/s, loss=0.256, lr=0.0001]
Steps: 36%|███▌ | 253/700 [01:49<03:13, 2.31it/s, loss=0.256, lr=0.0001]
Steps: 36%|███▌ | 253/700 [01:49<03:13, 2.31it/s, loss=0.044, lr=0.0001]
Steps: 36%|███▋ | 254/700 [01:50<03:12, 2.31it/s, loss=0.044, lr=0.0001]
Steps: 36%|███▋ | 254/700 [01:50<03:12, 2.31it/s, loss=0.182, lr=0.0001]
Steps: 36%|███▋ | 255/700 [01:50<03:12, 2.31it/s, loss=0.182, lr=0.0001]
Steps: 36%|███▋ | 255/700 [01:50<03:12, 2.31it/s, loss=0.102, lr=0.0001]
Steps: 37%|███▋ | 256/700 [01:51<03:12, 2.31it/s, loss=0.102, lr=0.0001]
Steps: 37%|███▋ | 256/700 [01:51<03:12, 2.31it/s, loss=0.151, lr=0.0001]
Steps: 37%|███▋ | 257/700 [01:51<03:12, 2.30it/s, loss=0.151, lr=0.0001]
Steps: 37%|███▋ | 257/700 [01:51<03:12, 2.30it/s, loss=0.0976, lr=0.0001]
Steps: 37%|███▋ | 258/700 [01:52<03:11, 2.30it/s, loss=0.0976, lr=0.0001]
Steps: 37%|███▋ | 258/700 [01:52<03:11, 2.30it/s, loss=0.193, lr=0.0001]
Steps: 37%|███▋ | 259/700 [01:52<03:11, 2.31it/s, loss=0.193, lr=0.0001]
Steps: 37%|███▋ | 259/700 [01:52<03:11, 2.31it/s, loss=0.0853, lr=0.0001]
Steps: 37%|███▋ | 260/700 [01:52<03:10, 2.31it/s, loss=0.0853, lr=0.0001]
Steps: 37%|███▋ | 260/700 [01:53<03:10, 2.31it/s, loss=0.201, lr=0.0001]
Steps: 37%|███▋ | 261/700 [01:53<03:10, 2.31it/s, loss=0.201, lr=0.0001]
Steps: 37%|███▋ | 261/700 [01:53<03:10, 2.31it/s, loss=0.191, lr=0.0001]
Steps: 37%|███▋ | 262/700 [01:53<03:09, 2.31it/s, loss=0.191, lr=0.0001]
Steps: 37%|███▋ | 262/700 [01:53<03:09, 2.31it/s, loss=0.0494, lr=0.0001]
Steps: 38%|███▊ | 263/700 [01:54<03:09, 2.31it/s, loss=0.0494, lr=0.0001]
Steps: 38%|███▊ | 263/700 [01:54<03:09, 2.31it/s, loss=0.0995, lr=0.0001]
Steps: 38%|███▊ | 264/700 [01:54<03:08, 2.31it/s, loss=0.0995, lr=0.0001]
Steps: 38%|███▊ | 264/700 [01:54<03:08, 2.31it/s, loss=0.204, lr=0.0001]
Steps: 38%|███▊ | 265/700 [01:55<03:08, 2.31it/s, loss=0.204, lr=0.0001]
Steps: 38%|███▊ | 265/700 [01:55<03:08, 2.31it/s, loss=0.18, lr=0.0001]
Steps: 38%|███▊ | 266/700 [01:55<03:07, 2.31it/s, loss=0.18, lr=0.0001]
Steps: 38%|███▊ | 266/700 [01:55<03:07, 2.31it/s, loss=0.107, lr=0.0001]
Steps: 38%|███▊ | 267/700 [01:56<03:07, 2.31it/s, loss=0.107, lr=0.0001]
Steps: 38%|███▊ | 267/700 [01:56<03:07, 2.31it/s, loss=0.243, lr=0.0001]
Steps: 38%|███▊ | 268/700 [01:56<03:06, 2.31it/s, loss=0.243, lr=0.0001]
Steps: 38%|███▊ | 268/700 [01:56<03:06, 2.31it/s, loss=0.0764, lr=0.0001]
Steps: 38%|███▊ | 269/700 [01:56<03:06, 2.31it/s, loss=0.0764, lr=0.0001]
Steps: 38%|███▊ | 269/700 [01:56<03:06, 2.31it/s, loss=0.103, lr=0.0001]
Steps: 39%|███▊ | 270/700 [01:57<03:06, 2.31it/s, loss=0.103, lr=0.0001]
Steps: 39%|███▊ | 270/700 [01:57<03:06, 2.31it/s, loss=0.114, lr=0.0001]
Steps: 39%|███▊ | 271/700 [01:57<03:05, 2.31it/s, loss=0.114, lr=0.0001]
Steps: 39%|███▊ | 271/700 [01:57<03:05, 2.31it/s, loss=0.206, lr=0.0001]
Steps: 39%|███▉ | 272/700 [01:58<03:05, 2.31it/s, loss=0.206, lr=0.0001]
Steps: 39%|███▉ | 272/700 [01:58<03:05, 2.31it/s, loss=0.108, lr=0.0001]
Steps: 39%|███▉ | 273/700 [01:58<03:05, 2.30it/s, loss=0.108, lr=0.0001]
Steps: 39%|███▉ | 273/700 [01:58<03:05, 2.30it/s, loss=0.14, lr=0.0001]
Steps: 39%|███▉ | 274/700 [01:59<03:04, 2.30it/s, loss=0.14, lr=0.0001]
Steps: 39%|███▉ | 274/700 [01:59<03:04, 2.30it/s, loss=0.0251, lr=0.0001]
Steps: 39%|███▉ | 275/700 [01:59<03:04, 2.31it/s, loss=0.0251, lr=0.0001]
Steps: 39%|███▉ | 275/700 [01:59<03:04, 2.31it/s, loss=0.151, lr=0.0001]
Steps: 39%|███▉ | 276/700 [01:59<03:03, 2.31it/s, loss=0.151, lr=0.0001]
Steps: 39%|███▉ | 276/700 [01:59<03:03, 2.31it/s, loss=0.128, lr=0.0001]
Steps: 40%|███▉ | 277/700 [02:00<03:03, 2.31it/s, loss=0.128, lr=0.0001]
Steps: 40%|███▉ | 277/700 [02:00<03:03, 2.31it/s, loss=0.097, lr=0.0001]
Steps: 40%|███▉ | 278/700 [02:00<03:02, 2.31it/s, loss=0.097, lr=0.0001]
Steps: 40%|███▉ | 278/700 [02:00<03:02, 2.31it/s, loss=0.293, lr=0.0001]
Steps: 40%|███▉ | 279/700 [02:01<03:02, 2.31it/s, loss=0.293, lr=0.0001]
Steps: 40%|███▉ | 279/700 [02:01<03:02, 2.31it/s, loss=0.286, lr=0.0001]
Steps: 40%|████ | 280/700 [02:01<03:01, 2.31it/s, loss=0.286, lr=0.0001]
Steps: 40%|████ | 280/700 [02:01<03:01, 2.31it/s, loss=0.171, lr=0.0001]
Steps: 40%|████ | 281/700 [02:02<03:01, 2.31it/s, loss=0.171, lr=0.0001]
Steps: 40%|████ | 281/700 [02:02<03:01, 2.31it/s, loss=0.2, lr=0.0001]
Steps: 40%|████ | 282/700 [02:02<03:00, 2.31it/s, loss=0.2, lr=0.0001]
Steps: 40%|████ | 282/700 [02:02<03:00, 2.31it/s, loss=0.153, lr=0.0001]
Steps: 40%|████ | 283/700 [02:02<03:00, 2.31it/s, loss=0.153, lr=0.0001]
Steps: 40%|████ | 283/700 [02:02<03:00, 2.31it/s, loss=0.132, lr=0.0001]
Steps: 41%|████ | 284/700 [02:03<02:59, 2.31it/s, loss=0.132, lr=0.0001]
Steps: 41%|████ | 284/700 [02:03<02:59, 2.31it/s, loss=0.115, lr=0.0001]
Steps: 41%|████ | 285/700 [02:03<02:59, 2.31it/s, loss=0.115, lr=0.0001]
Steps: 41%|████ | 285/700 [02:03<02:59, 2.31it/s, loss=0.159, lr=0.0001]
Steps: 41%|████ | 286/700 [02:04<02:59, 2.31it/s, loss=0.159, lr=0.0001]
Steps: 41%|████ | 286/700 [02:04<02:59, 2.31it/s, loss=0.0701, lr=0.0001]
Steps: 41%|████ | 287/700 [02:04<02:58, 2.31it/s, loss=0.0701, lr=0.0001]
Steps: 41%|████ | 287/700 [02:04<02:58, 2.31it/s, loss=0.134, lr=0.0001]
Steps: 41%|████ | 288/700 [02:05<02:58, 2.31it/s, loss=0.134, lr=0.0001]
Steps: 41%|████ | 288/700 [02:05<02:58, 2.31it/s, loss=0.188, lr=0.0001]
Steps: 41%|████▏ | 289/700 [02:05<02:58, 2.30it/s, loss=0.188, lr=0.0001]
Steps: 41%|████▏ | 289/700 [02:05<02:58, 2.30it/s, loss=0.0311, lr=0.0001]
Steps: 41%|████▏ | 290/700 [02:05<02:58, 2.30it/s, loss=0.0311, lr=0.0001]
Steps: 41%|████▏ | 290/700 [02:05<02:58, 2.30it/s, loss=0.13, lr=0.0001]
Steps: 42%|████▏ | 291/700 [02:06<02:57, 2.30it/s, loss=0.13, lr=0.0001]
Steps: 42%|████▏ | 291/700 [02:06<02:57, 2.30it/s, loss=0.286, lr=0.0001]
Steps: 42%|████▏ | 292/700 [02:06<02:57, 2.30it/s, loss=0.286, lr=0.0001]
Steps: 42%|████▏ | 292/700 [02:06<02:57, 2.30it/s, loss=0.136, lr=0.0001]
Steps: 42%|████▏ | 293/700 [02:07<02:56, 2.31it/s, loss=0.136, lr=0.0001]
Steps: 42%|████▏ | 293/700 [02:07<02:56, 2.31it/s, loss=0.0702, lr=0.0001]
Steps: 42%|████▏ | 294/700 [02:07<02:55, 2.31it/s, loss=0.0702, lr=0.0001]
Steps: 42%|████▏ | 294/700 [02:07<02:55, 2.31it/s, loss=0.161, lr=0.0001]
Steps: 42%|████▏ | 295/700 [02:08<02:55, 2.31it/s, loss=0.161, lr=0.0001]
Steps: 42%|████▏ | 295/700 [02:08<02:55, 2.31it/s, loss=0.0911, lr=0.0001]
Steps: 42%|████▏ | 296/700 [02:08<02:54, 2.31it/s, loss=0.0911, lr=0.0001]
Steps: 42%|████▏ | 296/700 [02:08<02:54, 2.31it/s, loss=0.074, lr=0.0001]
Steps: 42%|████▏ | 297/700 [02:08<02:54, 2.31it/s, loss=0.074, lr=0.0001]
Steps: 42%|████▏ | 297/700 [02:09<02:54, 2.31it/s, loss=0.112, lr=0.0001]
Steps: 43%|████▎ | 298/700 [02:09<02:54, 2.31it/s, loss=0.112, lr=0.0001]
Steps: 43%|████▎ | 298/700 [02:09<02:54, 2.31it/s, loss=0.0824, lr=0.0001]
Steps: 43%|████▎ | 299/700 [02:09<02:53, 2.31it/s, loss=0.0824, lr=0.0001]
Steps: 43%|████▎ | 299/700 [02:09<02:53, 2.31it/s, loss=0.124, lr=0.0001]
Steps: 43%|████▎ | 300/700 [02:10<02:53, 2.31it/s, loss=0.124, lr=0.0001]
Steps: 43%|████▎ | 300/700 [02:10<02:53, 2.31it/s, loss=0.129, lr=0.0001]
Steps: 43%|████▎ | 301/700 [02:10<02:53, 2.31it/s, loss=0.129, lr=0.0001]
Steps: 43%|████▎ | 301/700 [02:10<02:53, 2.31it/s, loss=0.148, lr=0.0001]
Steps: 43%|████▎ | 302/700 [02:11<02:52, 2.31it/s, loss=0.148, lr=0.0001]
Steps: 43%|████▎ | 302/700 [02:11<02:52, 2.31it/s, loss=0.0999, lr=0.0001]
Steps: 43%|████▎ | 303/700 [02:11<02:51, 2.31it/s, loss=0.0999, lr=0.0001]
Steps: 43%|████▎ | 303/700 [02:11<02:51, 2.31it/s, loss=0.0991, lr=0.0001]
Steps: 43%|████▎ | 304/700 [02:12<02:51, 2.31it/s, loss=0.0991, lr=0.0001]
Steps: 43%|████▎ | 304/700 [02:12<02:51, 2.31it/s, loss=0.206, lr=0.0001]
Steps: 44%|████▎ | 305/700 [02:12<02:51, 2.30it/s, loss=0.206, lr=0.0001]
Steps: 44%|████▎ | 305/700 [02:12<02:51, 2.30it/s, loss=0.0953, lr=0.0001]
Steps: 44%|████▎ | 306/700 [02:12<02:51, 2.30it/s, loss=0.0953, lr=0.0001]
Steps: 44%|████▎ | 306/700 [02:12<02:51, 2.30it/s, loss=0.132, lr=0.0001]
Steps: 44%|████▍ | 307/700 [02:13<02:50, 2.31it/s, loss=0.132, lr=0.0001]
Steps: 44%|████▍ | 307/700 [02:13<02:50, 2.31it/s, loss=0.0862, lr=0.0001]
Steps: 44%|████▍ | 308/700 [02:13<02:49, 2.31it/s, loss=0.0862, lr=0.0001]
Steps: 44%|████▍ | 308/700 [02:13<02:49, 2.31it/s, loss=0.0361, lr=0.0001]
Steps: 44%|████▍ | 309/700 [02:14<02:49, 2.31it/s, loss=0.0361, lr=0.0001]
Steps: 44%|████▍ | 309/700 [02:14<02:49, 2.31it/s, loss=0.229, lr=0.0001]
Steps: 44%|████▍ | 310/700 [02:14<02:49, 2.31it/s, loss=0.229, lr=0.0001]
Steps: 44%|████▍ | 310/700 [02:14<02:49, 2.31it/s, loss=0.133, lr=0.0001]
Steps: 44%|████▍ | 311/700 [02:15<02:48, 2.31it/s, loss=0.133, lr=0.0001]
Steps: 44%|████▍ | 311/700 [02:15<02:48, 2.31it/s, loss=0.163, lr=0.0001]
Steps: 45%|████▍ | 312/700 [02:15<02:47, 2.31it/s, loss=0.163, lr=0.0001]
Steps: 45%|████▍ | 312/700 [02:15<02:47, 2.31it/s, loss=0.116, lr=0.0001]
Steps: 45%|████▍ | 313/700 [02:15<02:47, 2.31it/s, loss=0.116, lr=0.0001]
Steps: 45%|████▍ | 313/700 [02:15<02:47, 2.31it/s, loss=0.309, lr=0.0001]
Steps: 45%|████▍ | 314/700 [02:16<02:47, 2.31it/s, loss=0.309, lr=0.0001]
Steps: 45%|████▍ | 314/700 [02:16<02:47, 2.31it/s, loss=0.0657, lr=0.0001]
Steps: 45%|████▌ | 315/700 [02:16<02:46, 2.31it/s, loss=0.0657, lr=0.0001]
Steps: 45%|████▌ | 315/700 [02:16<02:46, 2.31it/s, loss=0.0988, lr=0.0001]
Steps: 45%|████▌ | 316/700 [02:17<02:46, 2.31it/s, loss=0.0988, lr=0.0001]
Steps: 45%|████▌ | 316/700 [02:17<02:46, 2.31it/s, loss=0.103, lr=0.0001]
Steps: 45%|████▌ | 317/700 [02:17<02:45, 2.31it/s, loss=0.103, lr=0.0001]
Steps: 45%|████▌ | 317/700 [02:17<02:45, 2.31it/s, loss=0.282, lr=0.0001]
Steps: 45%|████▌ | 318/700 [02:18<02:45, 2.31it/s, loss=0.282, lr=0.0001]
Steps: 45%|████▌ | 318/700 [02:18<02:45, 2.31it/s, loss=0.162, lr=0.0001]
Steps: 46%|████▌ | 319/700 [02:18<02:45, 2.31it/s, loss=0.162, lr=0.0001]
Steps: 46%|████▌ | 319/700 [02:18<02:45, 2.31it/s, loss=0.11, lr=0.0001]
Steps: 46%|████▌ | 320/700 [02:18<02:44, 2.31it/s, loss=0.11, lr=0.0001]
Steps: 46%|████▌ | 320/700 [02:18<02:44, 2.31it/s, loss=0.165, lr=0.0001]
Steps: 46%|████▌ | 321/700 [02:19<02:44, 2.30it/s, loss=0.165, lr=0.0001]
Steps: 46%|████▌ | 321/700 [02:19<02:44, 2.30it/s, loss=0.105, lr=0.0001]
Steps: 46%|████▌ | 322/700 [02:19<02:44, 2.30it/s, loss=0.105, lr=0.0001]
Steps: 46%|████▌ | 322/700 [02:19<02:44, 2.30it/s, loss=0.246, lr=0.0001]
Steps: 46%|████▌ | 323/700 [02:20<02:43, 2.30it/s, loss=0.246, lr=0.0001]
Steps: 46%|████▌ | 323/700 [02:20<02:43, 2.30it/s, loss=0.0769, lr=0.0001]
Steps: 46%|████▋ | 324/700 [02:20<02:43, 2.31it/s, loss=0.0769, lr=0.0001]
Steps: 46%|████▋ | 324/700 [02:20<02:43, 2.31it/s, loss=0.101, lr=0.0001]
Steps: 46%|████▋ | 325/700 [02:21<02:42, 2.31it/s, loss=0.101, lr=0.0001]
Steps: 46%|████▋ | 325/700 [02:21<02:42, 2.31it/s, loss=0.161, lr=0.0001]
Steps: 47%|████▋ | 326/700 [02:21<02:42, 2.31it/s, loss=0.161, lr=0.0001]
Steps: 47%|████▋ | 326/700 [02:21<02:42, 2.31it/s, loss=0.175, lr=0.0001]
Steps: 47%|████▋ | 327/700 [02:22<02:41, 2.31it/s, loss=0.175, lr=0.0001]
Steps: 47%|████▋ | 327/700 [02:22<02:41, 2.31it/s, loss=0.147, lr=0.0001]
Steps: 47%|████▋ | 328/700 [02:22<02:40, 2.31it/s, loss=0.147, lr=0.0001]
Steps: 47%|████▋ | 328/700 [02:22<02:40, 2.31it/s, loss=0.258, lr=0.0001]
Steps: 47%|████▋ | 329/700 [02:22<02:40, 2.31it/s, loss=0.258, lr=0.0001]
Steps: 47%|████▋ | 329/700 [02:22<02:40, 2.31it/s, loss=0.117, lr=0.0001]
Steps: 47%|████▋ | 330/700 [02:23<02:40, 2.31it/s, loss=0.117, lr=0.0001]
Steps: 47%|████▋ | 330/700 [02:23<02:40, 2.31it/s, loss=0.0967, lr=0.0001]
Steps: 47%|████▋ | 331/700 [02:23<02:39, 2.31it/s, loss=0.0967, lr=0.0001]
Steps: 47%|████▋ | 331/700 [02:23<02:39, 2.31it/s, loss=0.0688, lr=0.0001]
Steps: 47%|████▋ | 332/700 [02:24<02:39, 2.31it/s, loss=0.0688, lr=0.0001]
Steps: 47%|████▋ | 332/700 [02:24<02:39, 2.31it/s, loss=0.102, lr=0.0001]
Steps: 48%|████▊ | 333/700 [02:24<02:38, 2.31it/s, loss=0.102, lr=0.0001]
Steps: 48%|████▊ | 333/700 [02:24<02:38, 2.31it/s, loss=0.0854, lr=0.0001]
Steps: 48%|████▊ | 334/700 [02:25<02:38, 2.31it/s, loss=0.0854, lr=0.0001]
Steps: 48%|████▊ | 334/700 [02:25<02:38, 2.31it/s, loss=0.0907, lr=0.0001]
Steps: 48%|████▊ | 335/700 [02:25<02:37, 2.31it/s, loss=0.0907, lr=0.0001]
Steps: 48%|████▊ | 335/700 [02:25<02:37, 2.31it/s, loss=0.243, lr=0.0001]
Steps: 48%|████▊ | 336/700 [02:25<02:37, 2.31it/s, loss=0.243, lr=0.0001]
Steps: 48%|████▊ | 336/700 [02:25<02:37, 2.31it/s, loss=0.182, lr=0.0001]
Steps: 48%|████▊ | 337/700 [02:26<02:37, 2.30it/s, loss=0.182, lr=0.0001]
Steps: 48%|████▊ | 337/700 [02:26<02:37, 2.30it/s, loss=0.165, lr=0.0001]
Steps: 48%|████▊ | 338/700 [02:26<02:37, 2.30it/s, loss=0.165, lr=0.0001]
Steps: 48%|████▊ | 338/700 [02:26<02:37, 2.30it/s, loss=0.116, lr=0.0001]
Steps: 48%|████▊ | 339/700 [02:27<02:36, 2.31it/s, loss=0.116, lr=0.0001]
Steps: 48%|████▊ | 339/700 [02:27<02:36, 2.31it/s, loss=0.0656, lr=0.0001]
Steps: 49%|████▊ | 340/700 [02:27<02:36, 2.31it/s, loss=0.0656, lr=0.0001]
Steps: 49%|████▊ | 340/700 [02:27<02:36, 2.31it/s, loss=0.0485, lr=0.0001]
Steps: 49%|████▊ | 341/700 [02:28<02:35, 2.31it/s, loss=0.0485, lr=0.0001]
Steps: 49%|████▊ | 341/700 [02:28<02:35, 2.31it/s, loss=0.0723, lr=0.0001]
Steps: 49%|████▉ | 342/700 [02:28<02:34, 2.31it/s, loss=0.0723, lr=0.0001]
Steps: 49%|████▉ | 342/700 [02:28<02:34, 2.31it/s, loss=0.057, lr=0.0001]
Steps: 49%|████▉ | 343/700 [02:28<02:34, 2.31it/s, loss=0.057, lr=0.0001]
Steps: 49%|████▉ | 343/700 [02:28<02:34, 2.31it/s, loss=0.159, lr=0.0001]
Steps: 49%|████▉ | 344/700 [02:29<02:34, 2.31it/s, loss=0.159, lr=0.0001]
Steps: 49%|████▉ | 344/700 [02:29<02:34, 2.31it/s, loss=0.193, lr=0.0001]
Steps: 49%|████▉ | 345/700 [02:29<02:33, 2.31it/s, loss=0.193, lr=0.0001]
Steps: 49%|████▉ | 345/700 [02:29<02:33, 2.31it/s, loss=0.236, lr=0.0001]
Steps: 49%|████▉ | 346/700 [02:30<02:33, 2.31it/s, loss=0.236, lr=0.0001]
Steps: 49%|████▉ | 346/700 [02:30<02:33, 2.31it/s, loss=0.108, lr=0.0001]
Steps: 50%|████▉ | 347/700 [02:30<02:33, 2.31it/s, loss=0.108, lr=0.0001]
Steps: 50%|████▉ | 347/700 [02:30<02:33, 2.31it/s, loss=0.0848, lr=0.0001]
Steps: 50%|████▉ | 348/700 [02:31<02:32, 2.31it/s, loss=0.0848, lr=0.0001]
Steps: 50%|████▉ | 348/700 [02:31<02:32, 2.31it/s, loss=0.135, lr=0.0001]
Steps: 50%|████▉ | 349/700 [02:31<02:32, 2.31it/s, loss=0.135, lr=0.0001]
Steps: 50%|████▉ | 349/700 [02:31<02:32, 2.31it/s, loss=0.141, lr=0.0001]
Steps: 50%|█████ | 350/700 [02:31<02:31, 2.31it/s, loss=0.141, lr=0.0001]
Steps: 50%|█████ | 350/700 [02:31<02:31, 2.31it/s, loss=0.0529, lr=0.0001]
Steps: 50%|█████ | 351/700 [02:32<02:31, 2.31it/s, loss=0.0529, lr=0.0001]
Steps: 50%|█████ | 351/700 [02:32<02:31, 2.31it/s, loss=0.0894, lr=0.0001]
Steps: 50%|█████ | 352/700 [02:32<02:30, 2.31it/s, loss=0.0894, lr=0.0001]
Steps: 50%|█████ | 352/700 [02:32<02:30, 2.31it/s, loss=0.343, lr=0.0001]
Steps: 50%|█████ | 353/700 [02:33<02:30, 2.30it/s, loss=0.343, lr=0.0001]
Steps: 50%|█████ | 353/700 [02:33<02:30, 2.30it/s, loss=0.195, lr=0.0001]
Steps: 51%|█████ | 354/700 [02:33<02:30, 2.30it/s, loss=0.195, lr=0.0001]
Steps: 51%|█████ | 354/700 [02:33<02:30, 2.30it/s, loss=0.107, lr=0.0001]
Steps: 51%|█████ | 355/700 [02:34<02:29, 2.30it/s, loss=0.107, lr=0.0001]
Steps: 51%|█████ | 355/700 [02:34<02:29, 2.30it/s, loss=0.0284, lr=0.0001]
Steps: 51%|█████ | 356/700 [02:34<02:29, 2.31it/s, loss=0.0284, lr=0.0001]
Steps: 51%|█████ | 356/700 [02:34<02:29, 2.31it/s, loss=0.167, lr=0.0001]
Steps: 51%|█████ | 357/700 [02:35<02:28, 2.31it/s, loss=0.167, lr=0.0001]
Steps: 51%|█████ | 357/700 [02:35<02:28, 2.31it/s, loss=0.14, lr=0.0001]
Steps: 51%|█████ | 358/700 [02:35<02:28, 2.31it/s, loss=0.14, lr=0.0001]
Steps: 51%|█████ | 358/700 [02:35<02:28, 2.31it/s, loss=0.111, lr=0.0001]
Steps: 51%|█████▏ | 359/700 [02:35<02:27, 2.31it/s, loss=0.111, lr=0.0001]
Steps: 51%|█████▏ | 359/700 [02:35<02:27, 2.31it/s, loss=0.199, lr=0.0001]
Steps: 51%|█████▏ | 360/700 [02:36<02:27, 2.31it/s, loss=0.199, lr=0.0001]
Steps: 51%|█████▏ | 360/700 [02:36<02:27, 2.31it/s, loss=0.2, lr=0.0001]
Steps: 52%|█████▏ | 361/700 [02:36<02:26, 2.31it/s, loss=0.2, lr=0.0001]
Steps: 52%|█████▏ | 361/700 [02:36<02:26, 2.31it/s, loss=0.0617, lr=0.0001]
Steps: 52%|█████▏ | 362/700 [02:37<02:26, 2.31it/s, loss=0.0617, lr=0.0001]
Steps: 52%|█████▏ | 362/700 [02:37<02:26, 2.31it/s, loss=0.202, lr=0.0001]
Steps: 52%|█████▏ | 363/700 [02:37<02:25, 2.31it/s, loss=0.202, lr=0.0001]
Steps: 52%|█████▏ | 363/700 [02:37<02:25, 2.31it/s, loss=0.081, lr=0.0001]
Steps: 52%|█████▏ | 364/700 [02:38<02:25, 2.31it/s, loss=0.081, lr=0.0001]
Steps: 52%|█████▏ | 364/700 [02:38<02:25, 2.31it/s, loss=0.158, lr=0.0001]
Steps: 52%|█████▏ | 365/700 [02:38<02:25, 2.31it/s, loss=0.158, lr=0.0001]
Steps: 52%|█████▏ | 365/700 [02:38<02:25, 2.31it/s, loss=0.111, lr=0.0001]
Steps: 52%|█████▏ | 366/700 [02:38<02:24, 2.31it/s, loss=0.111, lr=0.0001]
Steps: 52%|█████▏ | 366/700 [02:38<02:24, 2.31it/s, loss=0.166, lr=0.0001]
Steps: 52%|█████▏ | 367/700 [02:39<02:24, 2.31it/s, loss=0.166, lr=0.0001]
Steps: 52%|█████▏ | 367/700 [02:39<02:24, 2.31it/s, loss=0.261, lr=0.0001]
Steps: 53%|█████▎ | 368/700 [02:39<02:23, 2.31it/s, loss=0.261, lr=0.0001]
Steps: 53%|█████▎ | 368/700 [02:39<02:23, 2.31it/s, loss=0.119, lr=0.0001]
Steps: 53%|█████▎ | 369/700 [02:40<02:24, 2.30it/s, loss=0.119, lr=0.0001]
Steps: 53%|█████▎ | 369/700 [02:40<02:24, 2.30it/s, loss=0.0896, lr=0.0001]
Steps: 53%|█████▎ | 370/700 [02:40<02:23, 2.30it/s, loss=0.0896, lr=0.0001]
Steps: 53%|█████▎ | 370/700 [02:40<02:23, 2.30it/s, loss=0.101, lr=0.0001]
Steps: 53%|█████▎ | 371/700 [02:41<02:23, 2.30it/s, loss=0.101, lr=0.0001]
Steps: 53%|█████▎ | 371/700 [02:41<02:23, 2.30it/s, loss=0.112, lr=0.0001]
Steps: 53%|█████▎ | 372/700 [02:41<02:22, 2.30it/s, loss=0.112, lr=0.0001]
Steps: 53%|█████▎ | 372/700 [02:41<02:22, 2.30it/s, loss=0.132, lr=0.0001]
Steps: 53%|█████▎ | 373/700 [02:41<02:21, 2.30it/s, loss=0.132, lr=0.0001]
Steps: 53%|█████▎ | 373/700 [02:41<02:21, 2.30it/s, loss=0.15, lr=0.0001]
Steps: 53%|█████▎ | 374/700 [02:42<02:21, 2.31it/s, loss=0.15, lr=0.0001]
Steps: 53%|█████▎ | 374/700 [02:42<02:21, 2.31it/s, loss=0.326, lr=0.0001]
Steps: 54%|█████▎ | 375/700 [02:42<02:20, 2.31it/s, loss=0.326, lr=0.0001]
Steps: 54%|█████▎ | 375/700 [02:42<02:20, 2.31it/s, loss=0.117, lr=0.0001]
Steps: 54%|█████▎ | 376/700 [02:43<02:20, 2.30it/s, loss=0.117, lr=0.0001]
Steps: 54%|█████▎ | 376/700 [02:43<02:20, 2.30it/s, loss=0.128, lr=0.0001]
Steps: 54%|█████▍ | 377/700 [02:43<02:20, 2.30it/s, loss=0.128, lr=0.0001]
Steps: 54%|█████▍ | 377/700 [02:43<02:20, 2.30it/s, loss=0.146, lr=0.0001]
Steps: 54%|█████▍ | 378/700 [02:44<02:19, 2.31it/s, loss=0.146, lr=0.0001]
Steps: 54%|█████▍ | 378/700 [02:44<02:19, 2.31it/s, loss=0.219, lr=0.0001]
Steps: 54%|█████▍ | 379/700 [02:44<02:19, 2.31it/s, loss=0.219, lr=0.0001]
Steps: 54%|█████▍ | 379/700 [02:44<02:19, 2.31it/s, loss=0.0741, lr=0.0001]
Steps: 54%|█████▍ | 380/700 [02:44<02:18, 2.31it/s, loss=0.0741, lr=0.0001]
Steps: 54%|█████▍ | 380/700 [02:45<02:18, 2.31it/s, loss=0.104, lr=0.0001]
Steps: 54%|█████▍ | 381/700 [02:45<02:18, 2.31it/s, loss=0.104, lr=0.0001]
Steps: 54%|█████▍ | 381/700 [02:45<02:18, 2.31it/s, loss=0.0772, lr=0.0001]
Steps: 55%|█████▍ | 382/700 [02:45<02:18, 2.30it/s, loss=0.0772, lr=0.0001]
Steps: 55%|█████▍ | 382/700 [02:45<02:18, 2.30it/s, loss=0.213, lr=0.0001]
Steps: 55%|█████▍ | 383/700 [02:46<02:29, 2.11it/s, loss=0.213, lr=0.0001]
Steps: 55%|█████▍ | 383/700 [02:46<02:29, 2.11it/s, loss=0.197, lr=0.0001]
Steps: 55%|█████▍ | 384/700 [02:46<02:25, 2.16it/s, loss=0.197, lr=0.0001]
Steps: 55%|█████▍ | 384/700 [02:46<02:25, 2.16it/s, loss=0.172, lr=0.0001]
Steps: 55%|█████▌ | 385/700 [02:47<02:23, 2.20it/s, loss=0.172, lr=0.0001]
Steps: 55%|█████▌ | 385/700 [02:47<02:23, 2.20it/s, loss=0.108, lr=0.0001]
Steps: 55%|█████▌ | 386/700 [02:47<02:20, 2.23it/s, loss=0.108, lr=0.0001]
Steps: 55%|█████▌ | 386/700 [02:47<02:20, 2.23it/s, loss=0.0851, lr=0.0001]
Steps: 55%|█████▌ | 387/700 [02:48<02:18, 2.25it/s, loss=0.0851, lr=0.0001]
Steps: 55%|█████▌ | 387/700 [02:48<02:18, 2.25it/s, loss=0.037, lr=0.0001]
Steps: 55%|█████▌ | 388/700 [02:48<02:17, 2.27it/s, loss=0.037, lr=0.0001]
Steps: 55%|█████▌ | 388/700 [02:48<02:17, 2.27it/s, loss=0.278, lr=0.0001]
Steps: 56%|█████▌ | 389/700 [02:49<02:16, 2.28it/s, loss=0.278, lr=0.0001]
Steps: 56%|█████▌ | 389/700 [02:49<02:16, 2.28it/s, loss=0.0438, lr=0.0001]
Steps: 56%|█████▌ | 390/700 [02:49<02:15, 2.29it/s, loss=0.0438, lr=0.0001]
Steps: 56%|█████▌ | 390/700 [02:49<02:15, 2.29it/s, loss=0.171, lr=0.0001]
Steps: 56%|█████▌ | 391/700 [02:49<02:14, 2.29it/s, loss=0.171, lr=0.0001]
Steps: 56%|█████▌ | 391/700 [02:49<02:14, 2.29it/s, loss=0.0965, lr=0.0001]
Steps: 56%|█████▌ | 392/700 [02:50<02:14, 2.30it/s, loss=0.0965, lr=0.0001]
Steps: 56%|█████▌ | 392/700 [02:50<02:14, 2.30it/s, loss=0.061, lr=0.0001]
Steps: 56%|█████▌ | 393/700 [02:50<02:13, 2.30it/s, loss=0.061, lr=0.0001]
Steps: 56%|█████▌ | 393/700 [02:50<02:13, 2.30it/s, loss=0.0909, lr=0.0001]
Steps: 56%|█████▋ | 394/700 [02:51<02:12, 2.30it/s, loss=0.0909, lr=0.0001]
Steps: 56%|█████▋ | 394/700 [02:51<02:12, 2.30it/s, loss=0.0822, lr=0.0001]
Steps: 56%|█████▋ | 395/700 [02:51<02:12, 2.31it/s, loss=0.0822, lr=0.0001]
Steps: 56%|█████▋ | 395/700 [02:51<02:12, 2.31it/s, loss=0.0202, lr=0.0001]
Steps: 57%|█████▋ | 396/700 [02:52<02:11, 2.31it/s, loss=0.0202, lr=0.0001]
Steps: 57%|█████▋ | 396/700 [02:52<02:11, 2.31it/s, loss=0.084, lr=0.0001]
Steps: 57%|█████▋ | 397/700 [02:52<02:11, 2.31it/s, loss=0.084, lr=0.0001]
Steps: 57%|█████▋ | 397/700 [02:52<02:11, 2.31it/s, loss=0.165, lr=0.0001]
Steps: 57%|█████▋ | 398/700 [02:52<02:10, 2.31it/s, loss=0.165, lr=0.0001]
Steps: 57%|█████▋ | 398/700 [02:52<02:10, 2.31it/s, loss=0.121, lr=0.0001]
Steps: 57%|█████▋ | 399/700 [02:53<02:10, 2.31it/s, loss=0.121, lr=0.0001]
Steps: 57%|█████▋ | 399/700 [02:53<02:10, 2.31it/s, loss=0.17, lr=0.0001]
Steps: 57%|█████▋ | 400/700 [02:53<02:09, 2.31it/s, loss=0.17, lr=0.0001]
Steps: 57%|█████▋ | 400/700 [02:53<02:09, 2.31it/s, loss=0.176, lr=0.0001]
Steps: 57%|█████▋ | 401/700 [02:54<02:10, 2.30it/s, loss=0.176, lr=0.0001]
Steps: 57%|█████▋ | 401/700 [02:54<02:10, 2.30it/s, loss=0.165, lr=0.0001]
Steps: 57%|█████▋ | 402/700 [02:54<02:09, 2.30it/s, loss=0.165, lr=0.0001]
Steps: 57%|█████▋ | 402/700 [02:54<02:09, 2.30it/s, loss=0.0535, lr=0.0001]
Steps: 58%|█████▊ | 403/700 [02:55<02:08, 2.31it/s, loss=0.0535, lr=0.0001]
Steps: 58%|█████▊ | 403/700 [02:55<02:08, 2.31it/s, loss=0.15, lr=0.0001]
Steps: 58%|█████▊ | 404/700 [02:55<02:08, 2.31it/s, loss=0.15, lr=0.0001]
Steps: 58%|█████▊ | 404/700 [02:55<02:08, 2.31it/s, loss=0.122, lr=0.0001]
Steps: 58%|█████▊ | 405/700 [02:55<02:07, 2.31it/s, loss=0.122, lr=0.0001]
Steps: 58%|█████▊ | 405/700 [02:55<02:07, 2.31it/s, loss=0.111, lr=0.0001]
Steps: 58%|█████▊ | 406/700 [02:56<02:07, 2.31it/s, loss=0.111, lr=0.0001]
Steps: 58%|█████▊ | 406/700 [02:56<02:07, 2.31it/s, loss=0.148, lr=0.0001]
Steps: 58%|█████▊ | 407/700 [02:56<02:06, 2.31it/s, loss=0.148, lr=0.0001]
Steps: 58%|█████▊ | 407/700 [02:56<02:06, 2.31it/s, loss=0.135, lr=0.0001]
Steps: 58%|█████▊ | 408/700 [02:57<02:06, 2.31it/s, loss=0.135, lr=0.0001]
Steps: 58%|█████▊ | 408/700 [02:57<02:06, 2.31it/s, loss=0.0779, lr=0.0001]
Steps: 58%|█████▊ | 409/700 [02:57<02:05, 2.31it/s, loss=0.0779, lr=0.0001]
Steps: 58%|█████▊ | 409/700 [02:57<02:05, 2.31it/s, loss=0.125, lr=0.0001]
Steps: 59%|█████▊ | 410/700 [02:58<02:05, 2.31it/s, loss=0.125, lr=0.0001]
Steps: 59%|█████▊ | 410/700 [02:58<02:05, 2.31it/s, loss=0.116, lr=0.0001]
Steps: 59%|█████▊ | 411/700 [02:58<02:05, 2.31it/s, loss=0.116, lr=0.0001]
Steps: 59%|█████▊ | 411/700 [02:58<02:05, 2.31it/s, loss=0.187, lr=0.0001]
Steps: 59%|█████▉ | 412/700 [02:58<02:04, 2.31it/s, loss=0.187, lr=0.0001]
Steps: 59%|█████▉ | 412/700 [02:59<02:04, 2.31it/s, loss=0.0657, lr=0.0001]
Steps: 59%|█████▉ | 413/700 [02:59<02:04, 2.31it/s, loss=0.0657, lr=0.0001]
Steps: 59%|█████▉ | 413/700 [02:59<02:04, 2.31it/s, loss=0.0886, lr=0.0001]
Steps: 59%|█████▉ | 414/700 [02:59<02:03, 2.31it/s, loss=0.0886, lr=0.0001]
Steps: 59%|█████▉ | 414/700 [02:59<02:03, 2.31it/s, loss=0.127, lr=0.0001]
Steps: 59%|█████▉ | 415/700 [03:00<02:03, 2.31it/s, loss=0.127, lr=0.0001]
Steps: 59%|█████▉ | 415/700 [03:00<02:03, 2.31it/s, loss=0.0474, lr=0.0001]
Steps: 59%|█████▉ | 416/700 [03:00<02:02, 2.31it/s, loss=0.0474, lr=0.0001]
Steps: 59%|█████▉ | 416/700 [03:00<02:02, 2.31it/s, loss=0.135, lr=0.0001]
Steps: 60%|█████▉ | 417/700 [03:01<02:03, 2.30it/s, loss=0.135, lr=0.0001]
Steps: 60%|█████▉ | 417/700 [03:01<02:03, 2.30it/s, loss=0.127, lr=0.0001]
Steps: 60%|█████▉ | 418/700 [03:01<02:02, 2.30it/s, loss=0.127, lr=0.0001]
Steps: 60%|█████▉ | 418/700 [03:01<02:02, 2.30it/s, loss=0.136, lr=0.0001]
Steps: 60%|█████▉ | 419/700 [03:02<02:01, 2.31it/s, loss=0.136, lr=0.0001]
Steps: 60%|█████▉ | 419/700 [03:02<02:01, 2.31it/s, loss=0.197, lr=0.0001]
Steps: 60%|██████ | 420/700 [03:02<02:01, 2.31it/s, loss=0.197, lr=0.0001]
Steps: 60%|██████ | 420/700 [03:02<02:01, 2.31it/s, loss=0.0675, lr=0.0001]
Steps: 60%|██████ | 421/700 [03:02<02:00, 2.31it/s, loss=0.0675, lr=0.0001]
Steps: 60%|██████ | 421/700 [03:02<02:00, 2.31it/s, loss=0.0898, lr=0.0001]
Steps: 60%|██████ | 422/700 [03:03<02:00, 2.31it/s, loss=0.0898, lr=0.0001]
Steps: 60%|██████ | 422/700 [03:03<02:00, 2.31it/s, loss=0.118, lr=0.0001]
Steps: 60%|██████ | 423/700 [03:03<01:59, 2.31it/s, loss=0.118, lr=0.0001]
Steps: 60%|██████ | 423/700 [03:03<01:59, 2.31it/s, loss=0.14, lr=0.0001]
Steps: 61%|██████ | 424/700 [03:04<01:59, 2.31it/s, loss=0.14, lr=0.0001]
Steps: 61%|██████ | 424/700 [03:04<01:59, 2.31it/s, loss=0.0937, lr=0.0001]
Steps: 61%|██████ | 425/700 [03:04<01:59, 2.31it/s, loss=0.0937, lr=0.0001]
Steps: 61%|██████ | 425/700 [03:04<01:59, 2.31it/s, loss=0.138, lr=0.0001]
Steps: 61%|██████ | 426/700 [03:05<01:58, 2.31it/s, loss=0.138, lr=0.0001]
Steps: 61%|██████ | 426/700 [03:05<01:58, 2.31it/s, loss=0.158, lr=0.0001]
Steps: 61%|██████ | 427/700 [03:05<01:58, 2.31it/s, loss=0.158, lr=0.0001]
Steps: 61%|██████ | 427/700 [03:05<01:58, 2.31it/s, loss=0.0508, lr=0.0001]
Steps: 61%|██████ | 428/700 [03:05<01:57, 2.31it/s, loss=0.0508, lr=0.0001]
Steps: 61%|██████ | 428/700 [03:05<01:57, 2.31it/s, loss=0.0954, lr=0.0001]
Steps: 61%|██████▏ | 429/700 [03:06<01:57, 2.31it/s, loss=0.0954, lr=0.0001]
Steps: 61%|██████▏ | 429/700 [03:06<01:57, 2.31it/s, loss=0.315, lr=0.0001]
Steps: 61%|██████▏ | 430/700 [03:06<01:56, 2.31it/s, loss=0.315, lr=0.0001]
Steps: 61%|██████▏ | 430/700 [03:06<01:56, 2.31it/s, loss=0.166, lr=0.0001]
Steps: 62%|██████▏ | 431/700 [03:07<01:56, 2.31it/s, loss=0.166, lr=0.0001]
Steps: 62%|██████▏ | 431/700 [03:07<01:56, 2.31it/s, loss=0.09, lr=0.0001]
Steps: 62%|██████▏ | 432/700 [03:07<01:55, 2.31it/s, loss=0.09, lr=0.0001]
Steps: 62%|██████▏ | 432/700 [03:07<01:55, 2.31it/s, loss=0.0611, lr=0.0001]
Steps: 62%|██████▏ | 433/700 [03:08<01:56, 2.30it/s, loss=0.0611, lr=0.0001]
Steps: 62%|██████▏ | 433/700 [03:08<01:56, 2.30it/s, loss=0.23, lr=0.0001]
Steps: 62%|██████▏ | 434/700 [03:08<01:55, 2.30it/s, loss=0.23, lr=0.0001]
Steps: 62%|██████▏ | 434/700 [03:08<01:55, 2.30it/s, loss=0.221, lr=0.0001]
Steps: 62%|██████▏ | 435/700 [03:08<01:55, 2.30it/s, loss=0.221, lr=0.0001]
Steps: 62%|██████▏ | 435/700 [03:08<01:55, 2.30it/s, loss=0.0432, lr=0.0001]
Steps: 62%|██████▏ | 436/700 [03:09<01:54, 2.31it/s, loss=0.0432, lr=0.0001]
Steps: 62%|██████▏ | 436/700 [03:09<01:54, 2.31it/s, loss=0.127, lr=0.0001]
Steps: 62%|██████▏ | 437/700 [03:09<01:53, 2.31it/s, loss=0.127, lr=0.0001]
Steps: 62%|██████▏ | 437/700 [03:09<01:53, 2.31it/s, loss=0.121, lr=0.0001]
Steps: 63%|██████▎ | 438/700 [03:10<01:53, 2.31it/s, loss=0.121, lr=0.0001]
Steps: 63%|██████▎ | 438/700 [03:10<01:53, 2.31it/s, loss=0.104, lr=0.0001]
Steps: 63%|██████▎ | 439/700 [03:10<01:53, 2.31it/s, loss=0.104, lr=0.0001]
Steps: 63%|██████▎ | 439/700 [03:10<01:53, 2.31it/s, loss=0.0318, lr=0.0001]
Steps: 63%|██████▎ | 440/700 [03:11<01:52, 2.31it/s, loss=0.0318, lr=0.0001]
Steps: 63%|██████▎ | 440/700 [03:11<01:52, 2.31it/s, loss=0.109, lr=0.0001]
Steps: 63%|██████▎ | 441/700 [03:11<01:52, 2.31it/s, loss=0.109, lr=0.0001]
Steps: 63%|██████▎ | 441/700 [03:11<01:52, 2.31it/s, loss=0.0869, lr=0.0001]
Steps: 63%|██████▎ | 442/700 [03:11<01:51, 2.31it/s, loss=0.0869, lr=0.0001]
Steps: 63%|██████▎ | 442/700 [03:12<01:51, 2.31it/s, loss=0.0479, lr=0.0001]
Steps: 63%|██████▎ | 443/700 [03:12<01:51, 2.31it/s, loss=0.0479, lr=0.0001]
Steps: 63%|██████▎ | 443/700 [03:12<01:51, 2.31it/s, loss=0.0615, lr=0.0001]
Steps: 63%|██████▎ | 444/700 [03:12<01:50, 2.31it/s, loss=0.0615, lr=0.0001]
Steps: 63%|██████▎ | 444/700 [03:12<01:50, 2.31it/s, loss=0.0695, lr=0.0001]
Steps: 64%|██████▎ | 445/700 [03:13<01:50, 2.31it/s, loss=0.0695, lr=0.0001]
Steps: 64%|██████▎ | 445/700 [03:13<01:50, 2.31it/s, loss=0.109, lr=0.0001]
Steps: 64%|██████▎ | 446/700 [03:13<01:49, 2.31it/s, loss=0.109, lr=0.0001]
Steps: 64%|██████▎ | 446/700 [03:13<01:49, 2.31it/s, loss=0.155, lr=0.0001]
Steps: 64%|██████▍ | 447/700 [03:14<01:49, 2.31it/s, loss=0.155, lr=0.0001]
Steps: 64%|██████▍ | 447/700 [03:14<01:49, 2.31it/s, loss=0.0106, lr=0.0001]
Steps: 64%|██████▍ | 448/700 [03:14<01:49, 2.31it/s, loss=0.0106, lr=0.0001]
Steps: 64%|██████▍ | 448/700 [03:14<01:49, 2.31it/s, loss=0.176, lr=0.0001]
Steps: 64%|██████▍ | 449/700 [03:15<01:49, 2.30it/s, loss=0.176, lr=0.0001]
Steps: 64%|██████▍ | 449/700 [03:15<01:49, 2.30it/s, loss=0.193, lr=0.0001]
Steps: 64%|██████▍ | 450/700 [03:15<01:48, 2.30it/s, loss=0.193, lr=0.0001]
Steps: 64%|██████▍ | 450/700 [03:15<01:48, 2.30it/s, loss=0.104, lr=0.0001]
Steps: 64%|██████▍ | 451/700 [03:15<01:47, 2.31it/s, loss=0.104, lr=0.0001]
Steps: 64%|██████▍ | 451/700 [03:15<01:47, 2.31it/s, loss=0.0734, lr=0.0001]
Steps: 65%|██████▍ | 452/700 [03:16<01:47, 2.31it/s, loss=0.0734, lr=0.0001]
Steps: 65%|██████▍ | 452/700 [03:16<01:47, 2.31it/s, loss=0.272, lr=0.0001]
Steps: 65%|██████▍ | 453/700 [03:16<01:47, 2.31it/s, loss=0.272, lr=0.0001]
Steps: 65%|██████▍ | 453/700 [03:16<01:47, 2.31it/s, loss=0.0395, lr=0.0001]
Steps: 65%|██████▍ | 454/700 [03:17<01:46, 2.31it/s, loss=0.0395, lr=0.0001]
Steps: 65%|██████▍ | 454/700 [03:17<01:46, 2.31it/s, loss=0.118, lr=0.0001]
Steps: 65%|██████▌ | 455/700 [03:17<01:46, 2.31it/s, loss=0.118, lr=0.0001]
Steps: 65%|██████▌ | 455/700 [03:17<01:46, 2.31it/s, loss=0.0978, lr=0.0001]
Steps: 65%|██████▌ | 456/700 [03:18<01:45, 2.31it/s, loss=0.0978, lr=0.0001]
Steps: 65%|██████▌ | 456/700 [03:18<01:45, 2.31it/s, loss=0.152, lr=0.0001]
Steps: 65%|██████▌ | 457/700 [03:18<01:45, 2.31it/s, loss=0.152, lr=0.0001]
Steps: 65%|██████▌ | 457/700 [03:18<01:45, 2.31it/s, loss=0.095, lr=0.0001]
Steps: 65%|██████▌ | 458/700 [03:18<01:44, 2.31it/s, loss=0.095, lr=0.0001]
Steps: 65%|██████▌ | 458/700 [03:18<01:44, 2.31it/s, loss=0.178, lr=0.0001]
Steps: 66%|██████▌ | 459/700 [03:19<01:44, 2.31it/s, loss=0.178, lr=0.0001]
Steps: 66%|██████▌ | 459/700 [03:19<01:44, 2.31it/s, loss=0.161, lr=0.0001]
Steps: 66%|██████▌ | 460/700 [03:19<01:43, 2.31it/s, loss=0.161, lr=0.0001]
Steps: 66%|██████▌ | 460/700 [03:19<01:43, 2.31it/s, loss=0.135, lr=0.0001]
Steps: 66%|██████▌ | 461/700 [03:20<01:43, 2.31it/s, loss=0.135, lr=0.0001]
Steps: 66%|██████▌ | 461/700 [03:20<01:43, 2.31it/s, loss=0.165, lr=0.0001]
Steps: 66%|██████▌ | 462/700 [03:20<01:43, 2.31it/s, loss=0.165, lr=0.0001]
Steps: 66%|██████▌ | 462/700 [03:20<01:43, 2.31it/s, loss=0.162, lr=0.0001]
Steps: 66%|██████▌ | 463/700 [03:21<01:42, 2.31it/s, loss=0.162, lr=0.0001]
Steps: 66%|██████▌ | 463/700 [03:21<01:42, 2.31it/s, loss=0.177, lr=0.0001]
Steps: 66%|██████▋ | 464/700 [03:21<01:42, 2.31it/s, loss=0.177, lr=0.0001]
Steps: 66%|██████▋ | 464/700 [03:21<01:42, 2.31it/s, loss=0.158, lr=0.0001]
Steps: 66%|██████▋ | 465/700 [03:21<01:42, 2.30it/s, loss=0.158, lr=0.0001]
Steps: 66%|██████▋ | 465/700 [03:21<01:42, 2.30it/s, loss=0.203, lr=0.0001]
Steps: 67%|██████▋ | 466/700 [03:22<01:41, 2.30it/s, loss=0.203, lr=0.0001]
Steps: 67%|██████▋ | 466/700 [03:22<01:41, 2.30it/s, loss=0.0449, lr=0.0001]
Steps: 67%|██████▋ | 467/700 [03:22<01:41, 2.31it/s, loss=0.0449, lr=0.0001]
Steps: 67%|██████▋ | 467/700 [03:22<01:41, 2.31it/s, loss=0.259, lr=0.0001]
Steps: 67%|██████▋ | 468/700 [03:23<01:40, 2.31it/s, loss=0.259, lr=0.0001]
Steps: 67%|██████▋ | 468/700 [03:23<01:40, 2.31it/s, loss=0.177, lr=0.0001]
Steps: 67%|██████▋ | 469/700 [03:23<01:40, 2.31it/s, loss=0.177, lr=0.0001]
Steps: 67%|██████▋ | 469/700 [03:23<01:40, 2.31it/s, loss=0.118, lr=0.0001]
Steps: 67%|██████▋ | 470/700 [03:24<01:39, 2.31it/s, loss=0.118, lr=0.0001]
Steps: 67%|██████▋ | 470/700 [03:24<01:39, 2.31it/s, loss=0.164, lr=0.0001]
Steps: 67%|██████▋ | 471/700 [03:24<01:39, 2.31it/s, loss=0.164, lr=0.0001]
Steps: 67%|██████▋ | 471/700 [03:24<01:39, 2.31it/s, loss=0.0637, lr=0.0001]
Steps: 67%|██████▋ | 472/700 [03:24<01:38, 2.31it/s, loss=0.0637, lr=0.0001]
Steps: 67%|██████▋ | 472/700 [03:25<01:38, 2.31it/s, loss=0.101, lr=0.0001]
Steps: 68%|██████▊ | 473/700 [03:25<01:38, 2.31it/s, loss=0.101, lr=0.0001]
Steps: 68%|██████▊ | 473/700 [03:25<01:38, 2.31it/s, loss=0.197, lr=0.0001]
Steps: 68%|██████▊ | 474/700 [03:25<01:37, 2.31it/s, loss=0.197, lr=0.0001]
Steps: 68%|██████▊ | 474/700 [03:25<01:37, 2.31it/s, loss=0.246, lr=0.0001]
Steps: 68%|██████▊ | 475/700 [03:26<01:37, 2.31it/s, loss=0.246, lr=0.0001]
Steps: 68%|██████▊ | 475/700 [03:26<01:37, 2.31it/s, loss=0.0803, lr=0.0001]
Steps: 68%|██████▊ | 476/700 [03:26<01:36, 2.31it/s, loss=0.0803, lr=0.0001]
Steps: 68%|██████▊ | 476/700 [03:26<01:36, 2.31it/s, loss=0.131, lr=0.0001]
Steps: 68%|██████▊ | 477/700 [03:27<01:36, 2.31it/s, loss=0.131, lr=0.0001]
Steps: 68%|██████▊ | 477/700 [03:27<01:36, 2.31it/s, loss=0.0571, lr=0.0001]
Steps: 68%|██████▊ | 478/700 [03:27<01:36, 2.31it/s, loss=0.0571, lr=0.0001]
Steps: 68%|██████▊ | 478/700 [03:27<01:36, 2.31it/s, loss=0.126, lr=0.0001]
Steps: 68%|██████▊ | 479/700 [03:27<01:35, 2.31it/s, loss=0.126, lr=0.0001]
Steps: 68%|██████▊ | 479/700 [03:28<01:35, 2.31it/s, loss=0.148, lr=0.0001]
Steps: 69%|██████▊ | 480/700 [03:28<01:35, 2.31it/s, loss=0.148, lr=0.0001]
Steps: 69%|██████▊ | 480/700 [03:28<01:35, 2.31it/s, loss=0.0757, lr=0.0001]
Steps: 69%|██████▊ | 481/700 [03:28<01:35, 2.30it/s, loss=0.0757, lr=0.0001]
Steps: 69%|██████▊ | 481/700 [03:28<01:35, 2.30it/s, loss=0.118, lr=0.0001]
Steps: 69%|██████▉ | 482/700 [03:29<01:34, 2.30it/s, loss=0.118, lr=0.0001]
Steps: 69%|██████▉ | 482/700 [03:29<01:34, 2.30it/s, loss=0.233, lr=0.0001]
Steps: 69%|██████▉ | 483/700 [03:29<01:34, 2.30it/s, loss=0.233, lr=0.0001]
Steps: 69%|██████▉ | 483/700 [03:29<01:34, 2.30it/s, loss=0.146, lr=0.0001]
Steps: 69%|██████▉ | 484/700 [03:30<01:33, 2.31it/s, loss=0.146, lr=0.0001]
Steps: 69%|██████▉ | 484/700 [03:30<01:33, 2.31it/s, loss=0.129, lr=0.0001]
Steps: 69%|██████▉ | 485/700 [03:30<01:33, 2.31it/s, loss=0.129, lr=0.0001]
Steps: 69%|██████▉ | 485/700 [03:30<01:33, 2.31it/s, loss=0.179, lr=0.0001]
Steps: 69%|██████▉ | 486/700 [03:31<01:32, 2.31it/s, loss=0.179, lr=0.0001]
Steps: 69%|██████▉ | 486/700 [03:31<01:32, 2.31it/s, loss=0.0674, lr=0.0001]
Steps: 70%|██████▉ | 487/700 [03:31<01:32, 2.31it/s, loss=0.0674, lr=0.0001]
Steps: 70%|██████▉ | 487/700 [03:31<01:32, 2.31it/s, loss=0.187, lr=0.0001]
Steps: 70%|██████▉ | 488/700 [03:31<01:31, 2.31it/s, loss=0.187, lr=0.0001]
Steps: 70%|██████▉ | 488/700 [03:31<01:31, 2.31it/s, loss=0.106, lr=0.0001]
Steps: 70%|██████▉ | 489/700 [03:32<01:31, 2.31it/s, loss=0.106, lr=0.0001]
Steps: 70%|██████▉ | 489/700 [03:32<01:31, 2.31it/s, loss=0.0499, lr=0.0001]
Steps: 70%|███████ | 490/700 [03:32<01:30, 2.31it/s, loss=0.0499, lr=0.0001]
Steps: 70%|███████ | 490/700 [03:32<01:30, 2.31it/s, loss=0.11, lr=0.0001]
Steps: 70%|███████ | 491/700 [03:33<01:30, 2.31it/s, loss=0.11, lr=0.0001]
Steps: 70%|███████ | 491/700 [03:33<01:30, 2.31it/s, loss=0.0632, lr=0.0001]
Steps: 70%|███████ | 492/700 [03:33<01:30, 2.31it/s, loss=0.0632, lr=0.0001]
Steps: 70%|███████ | 492/700 [03:33<01:30, 2.31it/s, loss=0.0964, lr=0.0001]
Steps: 70%|███████ | 493/700 [03:34<01:29, 2.31it/s, loss=0.0964, lr=0.0001]
Steps: 70%|███████ | 493/700 [03:34<01:29, 2.31it/s, loss=0.0333, lr=0.0001]
Steps: 71%|███████ | 494/700 [03:34<01:29, 2.31it/s, loss=0.0333, lr=0.0001]
Steps: 71%|███████ | 494/700 [03:34<01:29, 2.31it/s, loss=0.094, lr=0.0001]
Steps: 71%|███████ | 495/700 [03:34<01:28, 2.31it/s, loss=0.094, lr=0.0001]
Steps: 71%|███████ | 495/700 [03:34<01:28, 2.31it/s, loss=0.115, lr=0.0001]
Steps: 71%|███████ | 496/700 [03:35<01:28, 2.31it/s, loss=0.115, lr=0.0001]
Steps: 71%|███████ | 496/700 [03:35<01:28, 2.31it/s, loss=0.0327, lr=0.0001]
Steps: 71%|███████ | 497/700 [03:35<01:28, 2.30it/s, loss=0.0327, lr=0.0001]
Steps: 71%|███████ | 497/700 [03:35<01:28, 2.30it/s, loss=0.14, lr=0.0001]
Steps: 71%|███████ | 498/700 [03:36<01:27, 2.30it/s, loss=0.14, lr=0.0001]
Steps: 71%|███████ | 498/700 [03:36<01:27, 2.30it/s, loss=0.0866, lr=0.0001]
Steps: 71%|███████▏ | 499/700 [03:36<01:27, 2.31it/s, loss=0.0866, lr=0.0001]
Steps: 71%|███████▏ | 499/700 [03:36<01:27, 2.31it/s, loss=0.132, lr=0.0001]
Steps: 71%|███████▏ | 500/700 [03:37<01:26, 2.31it/s, loss=0.132, lr=0.0001]
Steps: 71%|███████▏ | 500/700 [03:37<01:26, 2.31it/s, loss=0.119, lr=0.0001]
Steps: 72%|███████▏ | 501/700 [03:37<01:26, 2.31it/s, loss=0.119, lr=0.0001]
Steps: 72%|███████▏ | 501/700 [03:37<01:26, 2.31it/s, loss=0.129, lr=0.0001]
Steps: 72%|███████▏ | 502/700 [03:37<01:25, 2.31it/s, loss=0.129, lr=0.0001]
Steps: 72%|███████▏ | 502/700 [03:37<01:25, 2.31it/s, loss=0.128, lr=0.0001]
Steps: 72%|███████▏ | 503/700 [03:38<01:25, 2.31it/s, loss=0.128, lr=0.0001]
Steps: 72%|███████▏ | 503/700 [03:38<01:25, 2.31it/s, loss=0.121, lr=0.0001]
Steps: 72%|███████▏ | 504/700 [03:38<01:24, 2.31it/s, loss=0.121, lr=0.0001]
Steps: 72%|███████▏ | 504/700 [03:38<01:24, 2.31it/s, loss=0.134, lr=0.0001]
Steps: 72%|███████▏ | 505/700 [03:39<01:24, 2.31it/s, loss=0.134, lr=0.0001]
Steps: 72%|███████▏ | 505/700 [03:39<01:24, 2.31it/s, loss=0.108, lr=0.0001]
Steps: 72%|███████▏ | 506/700 [03:39<01:24, 2.31it/s, loss=0.108, lr=0.0001]
Steps: 72%|███████▏ | 506/700 [03:39<01:24, 2.31it/s, loss=0.06, lr=0.0001]
Steps: 72%|███████▏ | 507/700 [03:40<01:23, 2.31it/s, loss=0.06, lr=0.0001]
Steps: 72%|███████▏ | 507/700 [03:40<01:23, 2.31it/s, loss=0.144, lr=0.0001]
Steps: 73%|███████▎ | 508/700 [03:40<01:23, 2.31it/s, loss=0.144, lr=0.0001]
Steps: 73%|███████▎ | 508/700 [03:40<01:23, 2.31it/s, loss=0.0841, lr=0.0001]
Steps: 73%|███████▎ | 509/700 [03:40<01:22, 2.31it/s, loss=0.0841, lr=0.0001]
Steps: 73%|███████▎ | 509/700 [03:41<01:22, 2.31it/s, loss=0.104, lr=0.0001]
Steps: 73%|███████▎ | 510/700 [03:41<01:22, 2.31it/s, loss=0.104, lr=0.0001]
Steps: 73%|███████▎ | 510/700 [03:41<01:22, 2.31it/s, loss=0.0856, lr=0.0001]
Steps: 73%|███████▎ | 511/700 [03:41<01:21, 2.31it/s, loss=0.0856, lr=0.0001]
Steps: 73%|███████▎ | 511/700 [03:41<01:21, 2.31it/s, loss=0.16, lr=0.0001]
Steps: 73%|███████▎ | 512/700 [03:42<01:21, 2.31it/s, loss=0.16, lr=0.0001]
Steps: 73%|███████▎ | 512/700 [03:42<01:21, 2.31it/s, loss=0.0192, lr=0.0001]
Steps: 73%|███████▎ | 513/700 [03:42<01:21, 2.30it/s, loss=0.0192, lr=0.0001]
Steps: 73%|███████▎ | 513/700 [03:42<01:21, 2.30it/s, loss=0.0949, lr=0.0001]
Steps: 73%|███████▎ | 514/700 [03:43<01:20, 2.30it/s, loss=0.0949, lr=0.0001]
Steps: 73%|███████▎ | 514/700 [03:43<01:20, 2.30it/s, loss=0.223, lr=0.0001]
Steps: 74%|███████▎ | 515/700 [03:43<01:20, 2.30it/s, loss=0.223, lr=0.0001]
Steps: 74%|███████▎ | 515/700 [03:43<01:20, 2.30it/s, loss=0.164, lr=0.0001]
Steps: 74%|███████▎ | 516/700 [03:44<01:19, 2.31it/s, loss=0.164, lr=0.0001]
Steps: 74%|███████▎ | 516/700 [03:44<01:19, 2.31it/s, loss=0.0825, lr=0.0001]
Steps: 74%|███████▍ | 517/700 [03:44<01:19, 2.31it/s, loss=0.0825, lr=0.0001]
Steps: 74%|███████▍ | 517/700 [03:44<01:19, 2.31it/s, loss=0.133, lr=0.0001]
Steps: 74%|███████▍ | 518/700 [03:44<01:18, 2.31it/s, loss=0.133, lr=0.0001]
Steps: 74%|███████▍ | 518/700 [03:44<01:18, 2.31it/s, loss=0.0874, lr=0.0001]
Steps: 74%|███████▍ | 519/700 [03:45<01:18, 2.31it/s, loss=0.0874, lr=0.0001]
Steps: 74%|███████▍ | 519/700 [03:45<01:18, 2.31it/s, loss=0.162, lr=0.0001]
Steps: 74%|███████▍ | 520/700 [03:45<01:18, 2.30it/s, loss=0.162, lr=0.0001]
Steps: 74%|███████▍ | 520/700 [03:45<01:18, 2.30it/s, loss=0.102, lr=0.0001]
Steps: 74%|███████▍ | 521/700 [03:46<01:17, 2.31it/s, loss=0.102, lr=0.0001]
Steps: 74%|███████▍ | 521/700 [03:46<01:17, 2.31it/s, loss=0.145, lr=0.0001]
Steps: 75%|███████▍ | 522/700 [03:46<01:17, 2.31it/s, loss=0.145, lr=0.0001]
Steps: 75%|███████▍ | 522/700 [03:46<01:17, 2.31it/s, loss=0.0441, lr=0.0001]
Steps: 75%|███████▍ | 523/700 [03:47<01:16, 2.31it/s, loss=0.0441, lr=0.0001]
Steps: 75%|███████▍ | 523/700 [03:47<01:16, 2.31it/s, loss=0.119, lr=0.0001]
Steps: 75%|███████▍ | 524/700 [03:47<01:16, 2.31it/s, loss=0.119, lr=0.0001]
Steps: 75%|███████▍ | 524/700 [03:47<01:16, 2.31it/s, loss=0.0832, lr=0.0001]
Steps: 75%|███████▌ | 525/700 [03:47<01:16, 2.30it/s, loss=0.0832, lr=0.0001]
Steps: 75%|███████▌ | 525/700 [03:47<01:16, 2.30it/s, loss=0.136, lr=0.0001]
Steps: 75%|███████▌ | 526/700 [03:48<01:15, 2.30it/s, loss=0.136, lr=0.0001]
Steps: 75%|███████▌ | 526/700 [03:48<01:15, 2.30it/s, loss=0.124, lr=0.0001]
Steps: 75%|███████▌ | 527/700 [03:48<01:15, 2.30it/s, loss=0.124, lr=0.0001]
Steps: 75%|███████▌ | 527/700 [03:48<01:15, 2.30it/s, loss=0.0421, lr=0.0001]
Steps: 75%|███████▌ | 528/700 [03:49<01:14, 2.31it/s, loss=0.0421, lr=0.0001]
Steps: 75%|███████▌ | 528/700 [03:49<01:14, 2.31it/s, loss=0.0114, lr=0.0001]
Steps: 76%|███████▌ | 529/700 [03:49<01:14, 2.30it/s, loss=0.0114, lr=0.0001]
Steps: 76%|███████▌ | 529/700 [03:49<01:14, 2.30it/s, loss=0.134, lr=0.0001]
Steps: 76%|███████▌ | 530/700 [03:50<01:13, 2.30it/s, loss=0.134, lr=0.0001]
Steps: 76%|███████▌ | 530/700 [03:50<01:13, 2.30it/s, loss=0.0501, lr=0.0001]
Steps: 76%|███████▌ | 531/700 [03:50<01:13, 2.30it/s, loss=0.0501, lr=0.0001]
Steps: 76%|███████▌ | 531/700 [03:50<01:13, 2.30it/s, loss=0.0874, lr=0.0001]
Steps: 76%|███████▌ | 532/700 [03:50<01:12, 2.31it/s, loss=0.0874, lr=0.0001]
Steps: 76%|███████▌ | 532/700 [03:51<01:12, 2.31it/s, loss=0.0677, lr=0.0001]
Steps: 76%|███████▌ | 533/700 [03:51<01:12, 2.31it/s, loss=0.0677, lr=0.0001]
Steps: 76%|███████▌ | 533/700 [03:51<01:12, 2.31it/s, loss=0.299, lr=0.0001]
Steps: 76%|███████▋ | 534/700 [03:51<01:12, 2.30it/s, loss=0.299, lr=0.0001]
Steps: 76%|███████▋ | 534/700 [03:51<01:12, 2.30it/s, loss=0.12, lr=0.0001]
Steps: 76%|███████▋ | 535/700 [03:52<01:11, 2.31it/s, loss=0.12, lr=0.0001]
Steps: 76%|███████▋ | 535/700 [03:52<01:11, 2.31it/s, loss=0.279, lr=0.0001]
Steps: 77%|███████▋ | 536/700 [03:52<01:11, 2.31it/s, loss=0.279, lr=0.0001]
Steps: 77%|███████▋ | 536/700 [03:52<01:11, 2.31it/s, loss=0.109, lr=0.0001]
Steps: 77%|███████▋ | 537/700 [03:53<01:10, 2.31it/s, loss=0.109, lr=0.0001]
Steps: 77%|███████▋ | 537/700 [03:53<01:10, 2.31it/s, loss=0.0592, lr=0.0001]
Steps: 77%|███████▋ | 538/700 [03:53<01:10, 2.31it/s, loss=0.0592, lr=0.0001]
Steps: 77%|███████▋ | 538/700 [03:53<01:10, 2.31it/s, loss=0.101, lr=0.0001]
Steps: 77%|███████▋ | 539/700 [03:54<01:09, 2.30it/s, loss=0.101, lr=0.0001]
Steps: 77%|███████▋ | 539/700 [03:54<01:09, 2.30it/s, loss=0.0438, lr=0.0001]
Steps: 77%|███████▋ | 540/700 [03:54<01:09, 2.30it/s, loss=0.0438, lr=0.0001]
Steps: 77%|███████▋ | 540/700 [03:54<01:09, 2.30it/s, loss=0.101, lr=0.0001]
Steps: 77%|███████▋ | 541/700 [03:54<01:09, 2.30it/s, loss=0.101, lr=0.0001]
Steps: 77%|███████▋ | 541/700 [03:54<01:09, 2.30it/s, loss=0.139, lr=0.0001]
Steps: 77%|███████▋ | 542/700 [03:55<01:08, 2.30it/s, loss=0.139, lr=0.0001]
Steps: 77%|███████▋ | 542/700 [03:55<01:08, 2.30it/s, loss=0.198, lr=0.0001]
Steps: 78%|███████▊ | 543/700 [03:55<01:08, 2.30it/s, loss=0.198, lr=0.0001]
Steps: 78%|███████▊ | 543/700 [03:55<01:08, 2.30it/s, loss=0.171, lr=0.0001]
Steps: 78%|███████▊ | 544/700 [03:56<01:07, 2.31it/s, loss=0.171, lr=0.0001]
Steps: 78%|███████▊ | 544/700 [03:56<01:07, 2.31it/s, loss=0.11, lr=0.0001]
Steps: 78%|███████▊ | 545/700 [03:56<01:07, 2.30it/s, loss=0.11, lr=0.0001]
Steps: 78%|███████▊ | 545/700 [03:56<01:07, 2.30it/s, loss=0.117, lr=0.0001]
Steps: 78%|███████▊ | 546/700 [03:57<01:06, 2.30it/s, loss=0.117, lr=0.0001]
Steps: 78%|███████▊ | 546/700 [03:57<01:06, 2.30it/s, loss=0.0327, lr=0.0001]
Steps: 78%|███████▊ | 547/700 [03:57<01:06, 2.30it/s, loss=0.0327, lr=0.0001]
Steps: 78%|███████▊ | 547/700 [03:57<01:06, 2.30it/s, loss=0.0536, lr=0.0001]
Steps: 78%|███████▊ | 548/700 [03:57<01:05, 2.31it/s, loss=0.0536, lr=0.0001]
Steps: 78%|███████▊ | 548/700 [03:57<01:05, 2.31it/s, loss=0.1, lr=0.0001]
Steps: 78%|███████▊ | 549/700 [03:58<01:05, 2.31it/s, loss=0.1, lr=0.0001]
Steps: 78%|███████▊ | 549/700 [03:58<01:05, 2.31it/s, loss=0.113, lr=0.0001]
Steps: 79%|███████▊ | 550/700 [03:58<01:04, 2.31it/s, loss=0.113, lr=0.0001]
Steps: 79%|███████▊ | 550/700 [03:58<01:04, 2.31it/s, loss=0.0923, lr=0.0001]
Steps: 79%|███████▊ | 551/700 [03:59<01:04, 2.31it/s, loss=0.0923, lr=0.0001]
Steps: 79%|███████▊ | 551/700 [03:59<01:04, 2.31it/s, loss=0.13, lr=0.0001]
Steps: 79%|███████▉ | 552/700 [03:59<01:04, 2.31it/s, loss=0.13, lr=0.0001]
Steps: 79%|███████▉ | 552/700 [03:59<01:04, 2.31it/s, loss=0.0919, lr=0.0001]
Steps: 79%|███████▉ | 553/700 [04:00<01:03, 2.31it/s, loss=0.0919, lr=0.0001]
Steps: 79%|███████▉ | 553/700 [04:00<01:03, 2.31it/s, loss=0.125, lr=0.0001]
Steps: 79%|███████▉ | 554/700 [04:00<01:03, 2.31it/s, loss=0.125, lr=0.0001]
Steps: 79%|███████▉ | 554/700 [04:00<01:03, 2.31it/s, loss=0.0459, lr=0.0001]
Steps: 79%|███████▉ | 555/700 [04:00<01:02, 2.31it/s, loss=0.0459, lr=0.0001]
Steps: 79%|███████▉ | 555/700 [04:00<01:02, 2.31it/s, loss=0.178, lr=0.0001]
Steps: 79%|███████▉ | 556/700 [04:01<01:02, 2.31it/s, loss=0.178, lr=0.0001]
Steps: 79%|███████▉ | 556/700 [04:01<01:02, 2.31it/s, loss=0.0118, lr=0.0001]
Steps: 80%|███████▉ | 557/700 [04:01<01:01, 2.31it/s, loss=0.0118, lr=0.0001]
Steps: 80%|███████▉ | 557/700 [04:01<01:01, 2.31it/s, loss=0.105, lr=0.0001]
Steps: 80%|███████▉ | 558/700 [04:02<01:01, 2.31it/s, loss=0.105, lr=0.0001]
Steps: 80%|███████▉ | 558/700 [04:02<01:01, 2.31it/s, loss=0.141, lr=0.0001]
Steps: 80%|███████▉ | 559/700 [04:02<01:01, 2.31it/s, loss=0.141, lr=0.0001]
Steps: 80%|███████▉ | 559/700 [04:02<01:01, 2.31it/s, loss=0.135, lr=0.0001]
Steps: 80%|████████ | 560/700 [04:03<01:00, 2.31it/s, loss=0.135, lr=0.0001]
Steps: 80%|████████ | 560/700 [04:03<01:00, 2.31it/s, loss=0.118, lr=0.0001]
Steps: 80%|████████ | 561/700 [04:03<01:00, 2.30it/s, loss=0.118, lr=0.0001]
Steps: 80%|████████ | 561/700 [04:03<01:00, 2.30it/s, loss=0.162, lr=0.0001]
Steps: 80%|████████ | 562/700 [04:03<00:59, 2.30it/s, loss=0.162, lr=0.0001]
Steps: 80%|████████ | 562/700 [04:04<00:59, 2.30it/s, loss=0.0823, lr=0.0001]
Steps: 80%|████████ | 563/700 [04:04<00:59, 2.30it/s, loss=0.0823, lr=0.0001]
Steps: 80%|████████ | 563/700 [04:04<00:59, 2.30it/s, loss=0.182, lr=0.0001]
Steps: 81%|████████ | 564/700 [04:04<00:59, 2.30it/s, loss=0.182, lr=0.0001]
Steps: 81%|████████ | 564/700 [04:04<00:59, 2.30it/s, loss=0.118, lr=0.0001]
Steps: 81%|████████ | 565/700 [04:05<00:58, 2.31it/s, loss=0.118, lr=0.0001]
Steps: 81%|████████ | 565/700 [04:05<00:58, 2.31it/s, loss=0.0902, lr=0.0001]
Steps: 81%|████████ | 566/700 [04:05<00:58, 2.31it/s, loss=0.0902, lr=0.0001]
Steps: 81%|████████ | 566/700 [04:05<00:58, 2.31it/s, loss=0.0953, lr=0.0001]
Steps: 81%|████████ | 567/700 [04:06<00:57, 2.31it/s, loss=0.0953, lr=0.0001]
Steps: 81%|████████ | 567/700 [04:06<00:57, 2.31it/s, loss=0.126, lr=0.0001]
Steps: 81%|████████ | 568/700 [04:06<00:57, 2.31it/s, loss=0.126, lr=0.0001]
Steps: 81%|████████ | 568/700 [04:06<00:57, 2.31it/s, loss=0.0431, lr=0.0001]
Steps: 81%|████████▏ | 569/700 [04:07<00:56, 2.31it/s, loss=0.0431, lr=0.0001]
Steps: 81%|████████▏ | 569/700 [04:07<00:56, 2.31it/s, loss=0.0227, lr=0.0001]
Steps: 81%|████████▏ | 570/700 [04:07<00:56, 2.31it/s, loss=0.0227, lr=0.0001]
Steps: 81%|████████▏ | 570/700 [04:07<00:56, 2.31it/s, loss=0.192, lr=0.0001]
Steps: 82%|████████▏ | 571/700 [04:07<00:55, 2.31it/s, loss=0.192, lr=0.0001]
Steps: 82%|████████▏ | 571/700 [04:07<00:55, 2.31it/s, loss=0.189, lr=0.0001]
Steps: 82%|████████▏ | 572/700 [04:08<00:55, 2.31it/s, loss=0.189, lr=0.0001]
Steps: 82%|████████▏ | 572/700 [04:08<00:55, 2.31it/s, loss=0.116, lr=0.0001]
Steps: 82%|████████▏ | 573/700 [04:08<00:55, 2.31it/s, loss=0.116, lr=0.0001]
Steps: 82%|████████▏ | 573/700 [04:08<00:55, 2.31it/s, loss=0.156, lr=0.0001]
Steps: 82%|████████▏ | 574/700 [04:09<00:54, 2.31it/s, loss=0.156, lr=0.0001]
Steps: 82%|████████▏ | 574/700 [04:09<00:54, 2.31it/s, loss=0.133, lr=0.0001]
Steps: 82%|████████▏ | 575/700 [04:09<00:54, 2.31it/s, loss=0.133, lr=0.0001]
Steps: 82%|████████▏ | 575/700 [04:09<00:54, 2.31it/s, loss=0.0888, lr=0.0001]
Steps: 82%|████████▏ | 576/700 [04:10<00:53, 2.31it/s, loss=0.0888, lr=0.0001]
Steps: 82%|████████▏ | 576/700 [04:10<00:53, 2.31it/s, loss=0.128, lr=0.0001]
Steps: 82%|████████▏ | 577/700 [04:10<00:53, 2.30it/s, loss=0.128, lr=0.0001]
Steps: 82%|████████▏ | 577/700 [04:10<00:53, 2.30it/s, loss=0.154, lr=0.0001]
Steps: 83%|████████▎ | 578/700 [04:10<00:53, 2.30it/s, loss=0.154, lr=0.0001]
Steps: 83%|████████▎ | 578/700 [04:10<00:53, 2.30it/s, loss=0.062, lr=0.0001]
Steps: 83%|████████▎ | 579/700 [04:11<00:52, 2.30it/s, loss=0.062, lr=0.0001]
Steps: 83%|████████▎ | 579/700 [04:11<00:52, 2.30it/s, loss=0.11, lr=0.0001]
Steps: 83%|████████▎ | 580/700 [04:11<00:52, 2.31it/s, loss=0.11, lr=0.0001]
Steps: 83%|████████▎ | 580/700 [04:11<00:52, 2.31it/s, loss=0.0333, lr=0.0001]
Steps: 83%|████████▎ | 581/700 [04:12<00:51, 2.31it/s, loss=0.0333, lr=0.0001]
Steps: 83%|████████▎ | 581/700 [04:12<00:51, 2.31it/s, loss=0.0944, lr=0.0001]
Steps: 83%|████████▎ | 582/700 [04:12<00:51, 2.31it/s, loss=0.0944, lr=0.0001]
Steps: 83%|████████▎ | 582/700 [04:12<00:51, 2.31it/s, loss=0.106, lr=0.0001]
Steps: 83%|████████▎ | 583/700 [04:13<00:50, 2.31it/s, loss=0.106, lr=0.0001]
Steps: 83%|████████▎ | 583/700 [04:13<00:50, 2.31it/s, loss=0.125, lr=0.0001]
Steps: 83%|████████▎ | 584/700 [04:13<00:50, 2.31it/s, loss=0.125, lr=0.0001]
Steps: 83%|████████▎ | 584/700 [04:13<00:50, 2.31it/s, loss=0.0806, lr=0.0001]
Steps: 84%|████████▎ | 585/700 [04:13<00:49, 2.31it/s, loss=0.0806, lr=0.0001]
Steps: 84%|████████▎ | 585/700 [04:13<00:49, 2.31it/s, loss=0.157, lr=0.0001]
Steps: 84%|████████▎ | 586/700 [04:14<00:49, 2.31it/s, loss=0.157, lr=0.0001]
Steps: 84%|████████▎ | 586/700 [04:14<00:49, 2.31it/s, loss=0.0135, lr=0.0001]
Steps: 84%|████████▍ | 587/700 [04:14<00:48, 2.31it/s, loss=0.0135, lr=0.0001]
Steps: 84%|████████▍ | 587/700 [04:14<00:48, 2.31it/s, loss=0.244, lr=0.0001]
Steps: 84%|████████▍ | 588/700 [04:15<00:48, 2.31it/s, loss=0.244, lr=0.0001]
Steps: 84%|████████▍ | 588/700 [04:15<00:48, 2.31it/s, loss=0.148, lr=0.0001]
Steps: 84%|████████▍ | 589/700 [04:15<00:48, 2.31it/s, loss=0.148, lr=0.0001]
Steps: 84%|████████▍ | 589/700 [04:15<00:48, 2.31it/s, loss=0.118, lr=0.0001]
Steps: 84%|████████▍ | 590/700 [04:16<00:47, 2.31it/s, loss=0.118, lr=0.0001]
Steps: 84%|████████▍ | 590/700 [04:16<00:47, 2.31it/s, loss=0.128, lr=0.0001]
Steps: 84%|████████▍ | 591/700 [04:16<00:47, 2.31it/s, loss=0.128, lr=0.0001]
Steps: 84%|████████▍ | 591/700 [04:16<00:47, 2.31it/s, loss=0.148, lr=0.0001]
Steps: 85%|████████▍ | 592/700 [04:16<00:46, 2.31it/s, loss=0.148, lr=0.0001]
Steps: 85%|████████▍ | 592/700 [04:17<00:46, 2.31it/s, loss=0.278, lr=0.0001]
Steps: 85%|████████▍ | 593/700 [04:17<00:46, 2.30it/s, loss=0.278, lr=0.0001]
Steps: 85%|████████▍ | 593/700 [04:17<00:46, 2.30it/s, loss=0.134, lr=0.0001]
Steps: 85%|████████▍ | 594/700 [04:17<00:46, 2.30it/s, loss=0.134, lr=0.0001]
Steps: 85%|████████▍ | 594/700 [04:17<00:46, 2.30it/s, loss=0.0929, lr=0.0001]
Steps: 85%|████████▌ | 595/700 [04:18<00:45, 2.30it/s, loss=0.0929, lr=0.0001]
Steps: 85%|████████▌ | 595/700 [04:18<00:45, 2.30it/s, loss=0.102, lr=0.0001]
Steps: 85%|████████▌ | 596/700 [04:18<00:45, 2.31it/s, loss=0.102, lr=0.0001]
Steps: 85%|████████▌ | 596/700 [04:18<00:45, 2.31it/s, loss=0.0314, lr=0.0001]
Steps: 85%|████████▌ | 597/700 [04:19<00:44, 2.31it/s, loss=0.0314, lr=0.0001]
Steps: 85%|████████▌ | 597/700 [04:19<00:44, 2.31it/s, loss=0.15, lr=0.0001]
Steps: 85%|████████▌ | 598/700 [04:19<00:44, 2.31it/s, loss=0.15, lr=0.0001]
Steps: 85%|████████▌ | 598/700 [04:19<00:44, 2.31it/s, loss=0.104, lr=0.0001]
Steps: 86%|████████▌ | 599/700 [04:20<00:43, 2.31it/s, loss=0.104, lr=0.0001]
Steps: 86%|████████▌ | 599/700 [04:20<00:43, 2.31it/s, loss=0.0743, lr=0.0001]
Steps: 86%|████████▌ | 600/700 [04:20<00:43, 2.31it/s, loss=0.0743, lr=0.0001]
Steps: 86%|████████▌ | 600/700 [04:20<00:43, 2.31it/s, loss=0.128, lr=0.0001]
Steps: 86%|████████▌ | 601/700 [04:20<00:42, 2.31it/s, loss=0.128, lr=0.0001]
Steps: 86%|████████▌ | 601/700 [04:20<00:42, 2.31it/s, loss=0.123, lr=0.0001]
Steps: 86%|████████▌ | 602/700 [04:21<00:42, 2.31it/s, loss=0.123, lr=0.0001]
Steps: 86%|████████▌ | 602/700 [04:21<00:42, 2.31it/s, loss=0.111, lr=0.0001]
Steps: 86%|████████▌ | 603/700 [04:21<00:41, 2.31it/s, loss=0.111, lr=0.0001]
Steps: 86%|████████▌ | 603/700 [04:21<00:41, 2.31it/s, loss=0.071, lr=0.0001]
Steps: 86%|████████▋ | 604/700 [04:22<00:41, 2.31it/s, loss=0.071, lr=0.0001]
Steps: 86%|████████▋ | 604/700 [04:22<00:41, 2.31it/s, loss=0.255, lr=0.0001]
Steps: 86%|████████▋ | 605/700 [04:22<00:41, 2.31it/s, loss=0.255, lr=0.0001]
Steps: 86%|████████▋ | 605/700 [04:22<00:41, 2.31it/s, loss=0.069, lr=0.0001]
Steps: 87%|████████▋ | 606/700 [04:23<00:40, 2.31it/s, loss=0.069, lr=0.0001]
Steps: 87%|████████▋ | 606/700 [04:23<00:40, 2.31it/s, loss=0.127, lr=0.0001]
Steps: 87%|████████▋ | 607/700 [04:23<00:40, 2.31it/s, loss=0.127, lr=0.0001]
Steps: 87%|████████▋ | 607/700 [04:23<00:40, 2.31it/s, loss=0.176, lr=0.0001]
Steps: 87%|████████▋ | 608/700 [04:23<00:39, 2.31it/s, loss=0.176, lr=0.0001]
Steps: 87%|████████▋ | 608/700 [04:23<00:39, 2.31it/s, loss=0.131, lr=0.0001]
Steps: 87%|████████▋ | 609/700 [04:24<00:39, 2.29it/s, loss=0.131, lr=0.0001]
Steps: 87%|████████▋ | 609/700 [04:24<00:39, 2.29it/s, loss=0.265, lr=0.0001]
Steps: 87%|████████▋ | 610/700 [04:24<00:39, 2.30it/s, loss=0.265, lr=0.0001]
Steps: 87%|████████▋ | 610/700 [04:24<00:39, 2.30it/s, loss=0.19, lr=0.0001]
Steps: 87%|████████▋ | 611/700 [04:25<00:38, 2.30it/s, loss=0.19, lr=0.0001]
Steps: 87%|████████▋ | 611/700 [04:25<00:38, 2.30it/s, loss=0.143, lr=0.0001]
Steps: 87%|████████▋ | 612/700 [04:25<00:38, 2.30it/s, loss=0.143, lr=0.0001]
Steps: 87%|████████▋ | 612/700 [04:25<00:38, 2.30it/s, loss=0.11, lr=0.0001]
Steps: 88%|████████▊ | 613/700 [04:26<00:37, 2.31it/s, loss=0.11, lr=0.0001]
Steps: 88%|████████▊ | 613/700 [04:26<00:37, 2.31it/s, loss=0.327, lr=0.0001]
Steps: 88%|████████▊ | 614/700 [04:26<00:37, 2.31it/s, loss=0.327, lr=0.0001]
Steps: 88%|████████▊ | 614/700 [04:26<00:37, 2.31it/s, loss=0.127, lr=0.0001]
Steps: 88%|████████▊ | 615/700 [04:26<00:36, 2.31it/s, loss=0.127, lr=0.0001]
Steps: 88%|████████▊ | 615/700 [04:26<00:36, 2.31it/s, loss=0.0661, lr=0.0001]
Steps: 88%|████████▊ | 616/700 [04:27<00:36, 2.31it/s, loss=0.0661, lr=0.0001]
Steps: 88%|████████▊ | 616/700 [04:27<00:36, 2.31it/s, loss=0.0279, lr=0.0001]
Steps: 88%|████████▊ | 617/700 [04:27<00:35, 2.31it/s, loss=0.0279, lr=0.0001]
Steps: 88%|████████▊ | 617/700 [04:27<00:35, 2.31it/s, loss=0.0887, lr=0.0001]
Steps: 88%|████████▊ | 618/700 [04:28<00:35, 2.31it/s, loss=0.0887, lr=0.0001]
Steps: 88%|████████▊ | 618/700 [04:28<00:35, 2.31it/s, loss=0.222, lr=0.0001]
Steps: 88%|████████▊ | 619/700 [04:28<00:35, 2.31it/s, loss=0.222, lr=0.0001]
Steps: 88%|████████▊ | 619/700 [04:28<00:35, 2.31it/s, loss=0.253, lr=0.0001]
Steps: 89%|████████▊ | 620/700 [04:29<00:34, 2.31it/s, loss=0.253, lr=0.0001]
Steps: 89%|████████▊ | 620/700 [04:29<00:34, 2.31it/s, loss=0.0884, lr=0.0001]
Steps: 89%|████████▊ | 621/700 [04:29<00:34, 2.30it/s, loss=0.0884, lr=0.0001]
Steps: 89%|████████▊ | 621/700 [04:29<00:34, 2.30it/s, loss=0.0895, lr=0.0001]
Steps: 89%|████████▉ | 622/700 [04:29<00:33, 2.31it/s, loss=0.0895, lr=0.0001]
Steps: 89%|████████▉ | 622/700 [04:30<00:33, 2.31it/s, loss=0.113, lr=0.0001]
Steps: 89%|████████▉ | 623/700 [04:30<00:33, 2.31it/s, loss=0.113, lr=0.0001]
Steps: 89%|████████▉ | 623/700 [04:30<00:33, 2.31it/s, loss=0.0678, lr=0.0001]
Steps: 89%|████████▉ | 624/700 [04:30<00:32, 2.31it/s, loss=0.0678, lr=0.0001]
Steps: 89%|████████▉ | 624/700 [04:30<00:32, 2.31it/s, loss=0.147, lr=0.0001]
Steps: 89%|████████▉ | 625/700 [04:31<00:32, 2.30it/s, loss=0.147, lr=0.0001]
Steps: 89%|████████▉ | 625/700 [04:31<00:32, 2.30it/s, loss=0.087, lr=0.0001]
Steps: 89%|████████▉ | 626/700 [04:31<00:32, 2.30it/s, loss=0.087, lr=0.0001]
Steps: 89%|████████▉ | 626/700 [04:31<00:32, 2.30it/s, loss=0.0731, lr=0.0001]
Steps: 90%|████████▉ | 627/700 [04:32<00:31, 2.30it/s, loss=0.0731, lr=0.0001]
Steps: 90%|████████▉ | 627/700 [04:32<00:31, 2.30it/s, loss=0.137, lr=0.0001]
Steps: 90%|████████▉ | 628/700 [04:32<00:31, 2.31it/s, loss=0.137, lr=0.0001]
Steps: 90%|████████▉ | 628/700 [04:32<00:31, 2.31it/s, loss=0.117, lr=0.0001]
Steps: 90%|████████▉ | 629/700 [04:33<00:30, 2.31it/s, loss=0.117, lr=0.0001]
Steps: 90%|████████▉ | 629/700 [04:33<00:30, 2.31it/s, loss=0.102, lr=0.0001]
Steps: 90%|█████████ | 630/700 [04:33<00:30, 2.31it/s, loss=0.102, lr=0.0001]
Steps: 90%|█████████ | 630/700 [04:33<00:30, 2.31it/s, loss=0.276, lr=0.0001]
Steps: 90%|█████████ | 631/700 [04:33<00:29, 2.31it/s, loss=0.276, lr=0.0001]
Steps: 90%|█████████ | 631/700 [04:33<00:29, 2.31it/s, loss=0.12, lr=0.0001]
Steps: 90%|█████████ | 632/700 [04:34<00:29, 2.31it/s, loss=0.12, lr=0.0001]
Steps: 90%|█████████ | 632/700 [04:34<00:29, 2.31it/s, loss=0.171, lr=0.0001]
Steps: 90%|█████████ | 633/700 [04:34<00:28, 2.31it/s, loss=0.171, lr=0.0001]
Steps: 90%|█████████ | 633/700 [04:34<00:28, 2.31it/s, loss=0.0859, lr=0.0001]
Steps: 91%|█████████ | 634/700 [04:35<00:28, 2.31it/s, loss=0.0859, lr=0.0001]
Steps: 91%|█████████ | 634/700 [04:35<00:28, 2.31it/s, loss=0.0891, lr=0.0001]
Steps: 91%|█████████ | 635/700 [04:35<00:28, 2.31it/s, loss=0.0891, lr=0.0001]
Steps: 91%|█████████ | 635/700 [04:35<00:28, 2.31it/s, loss=0.122, lr=0.0001]
Steps: 91%|█████████ | 636/700 [04:36<00:27, 2.31it/s, loss=0.122, lr=0.0001]
Steps: 91%|█████████ | 636/700 [04:36<00:27, 2.31it/s, loss=0.147, lr=0.0001]
Steps: 91%|█████████ | 637/700 [04:36<00:27, 2.31it/s, loss=0.147, lr=0.0001]
Steps: 91%|█████████ | 637/700 [04:36<00:27, 2.31it/s, loss=0.103, lr=0.0001]
Steps: 91%|█████████ | 638/700 [04:36<00:26, 2.31it/s, loss=0.103, lr=0.0001]
Steps: 91%|█████████ | 638/700 [04:36<00:26, 2.31it/s, loss=0.212, lr=0.0001]
Steps: 91%|█████████▏| 639/700 [04:37<00:26, 2.31it/s, loss=0.212, lr=0.0001]
Steps: 91%|█████████▏| 639/700 [04:37<00:26, 2.31it/s, loss=0.125, lr=0.0001]
Steps: 91%|█████████▏| 640/700 [04:37<00:25, 2.31it/s, loss=0.125, lr=0.0001]
Steps: 91%|█████████▏| 640/700 [04:37<00:25, 2.31it/s, loss=0.222, lr=0.0001]
Steps: 92%|█████████▏| 641/700 [04:38<00:25, 2.30it/s, loss=0.222, lr=0.0001]
Steps: 92%|█████████▏| 641/700 [04:38<00:25, 2.30it/s, loss=0.145, lr=0.0001]
Steps: 92%|█████████▏| 642/700 [04:38<00:25, 2.30it/s, loss=0.145, lr=0.0001]
Steps: 92%|█████████▏| 642/700 [04:38<00:25, 2.30it/s, loss=0.0954, lr=0.0001]
Steps: 92%|█████████▏| 643/700 [04:39<00:24, 2.31it/s, loss=0.0954, lr=0.0001]
Steps: 92%|█████████▏| 643/700 [04:39<00:24, 2.31it/s, loss=0.288, lr=0.0001]
Steps: 92%|█████████▏| 644/700 [04:39<00:24, 2.31it/s, loss=0.288, lr=0.0001]
Steps: 92%|█████████▏| 644/700 [04:39<00:24, 2.31it/s, loss=0.115, lr=0.0001]
Steps: 92%|█████████▏| 645/700 [04:39<00:23, 2.31it/s, loss=0.115, lr=0.0001]
Steps: 92%|█████████▏| 645/700 [04:39<00:23, 2.31it/s, loss=0.111, lr=0.0001]
Steps: 92%|█████████▏| 646/700 [04:40<00:23, 2.31it/s, loss=0.111, lr=0.0001]
Steps: 92%|█████████▏| 646/700 [04:40<00:23, 2.31it/s, loss=0.111, lr=0.0001]
Steps: 92%|█████████▏| 647/700 [04:40<00:22, 2.31it/s, loss=0.111, lr=0.0001]
Steps: 92%|█████████▏| 647/700 [04:40<00:22, 2.31it/s, loss=0.16, lr=0.0001]
Steps: 93%|█████████▎| 648/700 [04:41<00:22, 2.31it/s, loss=0.16, lr=0.0001]
Steps: 93%|█████████▎| 648/700 [04:41<00:22, 2.31it/s, loss=0.08, lr=0.0001]
Steps: 93%|█████████▎| 649/700 [04:41<00:22, 2.31it/s, loss=0.08, lr=0.0001]
Steps: 93%|█████████▎| 649/700 [04:41<00:22, 2.31it/s, loss=0.145, lr=0.0001]
Steps: 93%|█████████▎| 650/700 [04:42<00:21, 2.31it/s, loss=0.145, lr=0.0001]
Steps: 93%|█████████▎| 650/700 [04:42<00:21, 2.31it/s, loss=0.105, lr=0.0001]
Steps: 93%|█████████▎| 651/700 [04:42<00:21, 2.31it/s, loss=0.105, lr=0.0001]
Steps: 93%|█████████▎| 651/700 [04:42<00:21, 2.31it/s, loss=0.142, lr=0.0001]
Steps: 93%|█████████▎| 652/700 [04:42<00:20, 2.31it/s, loss=0.142, lr=0.0001]
Steps: 93%|█████████▎| 652/700 [04:43<00:20, 2.31it/s, loss=0.177, lr=0.0001]
Steps: 93%|█████████▎| 653/700 [04:43<00:20, 2.31it/s, loss=0.177, lr=0.0001]
Steps: 93%|█████████▎| 653/700 [04:43<00:20, 2.31it/s, loss=0.0607, lr=0.0001]
Steps: 93%|█████████▎| 654/700 [04:43<00:19, 2.31it/s, loss=0.0607, lr=0.0001]
Steps: 93%|█████████▎| 654/700 [04:43<00:19, 2.31it/s, loss=0.131, lr=0.0001]
Steps: 94%|█████████▎| 655/700 [04:44<00:19, 2.31it/s, loss=0.131, lr=0.0001]
Steps: 94%|█████████▎| 655/700 [04:44<00:19, 2.31it/s, loss=0.0542, lr=0.0001]
Steps: 94%|█████████▎| 656/700 [04:44<00:19, 2.31it/s, loss=0.0542, lr=0.0001]
Steps: 94%|█████████▎| 656/700 [04:44<00:19, 2.31it/s, loss=0.113, lr=0.0001]
Steps: 94%|█████████▍| 657/700 [04:45<00:18, 2.30it/s, loss=0.113, lr=0.0001]
Steps: 94%|█████████▍| 657/700 [04:45<00:18, 2.30it/s, loss=0.173, lr=0.0001]
Steps: 94%|█████████▍| 658/700 [04:45<00:18, 2.30it/s, loss=0.173, lr=0.0001]
Steps: 94%|█████████▍| 658/700 [04:45<00:18, 2.30it/s, loss=0.0329, lr=0.0001]
Steps: 94%|█████████▍| 659/700 [04:46<00:17, 2.31it/s, loss=0.0329, lr=0.0001]
Steps: 94%|█████████▍| 659/700 [04:46<00:17, 2.31it/s, loss=0.161, lr=0.0001]
Steps: 94%|█████████▍| 660/700 [04:46<00:17, 2.31it/s, loss=0.161, lr=0.0001]
Steps: 94%|█████████▍| 660/700 [04:46<00:17, 2.31it/s, loss=0.0519, lr=0.0001]
Steps: 94%|█████████▍| 661/700 [04:46<00:16, 2.31it/s, loss=0.0519, lr=0.0001]
Steps: 94%|█████████▍| 661/700 [04:46<00:16, 2.31it/s, loss=0.0884, lr=0.0001]
Steps: 95%|█████████▍| 662/700 [04:47<00:16, 2.31it/s, loss=0.0884, lr=0.0001]
Steps: 95%|█████████▍| 662/700 [04:47<00:16, 2.31it/s, loss=0.108, lr=0.0001]
Steps: 95%|█████████▍| 663/700 [04:47<00:16, 2.31it/s, loss=0.108, lr=0.0001]
Steps: 95%|█████████▍| 663/700 [04:47<00:16, 2.31it/s, loss=0.0557, lr=0.0001]
Steps: 95%|█████████▍| 664/700 [04:48<00:15, 2.31it/s, loss=0.0557, lr=0.0001]
Steps: 95%|█████████▍| 664/700 [04:48<00:15, 2.31it/s, loss=0.12, lr=0.0001]
Steps: 95%|█████████▌| 665/700 [04:48<00:15, 2.31it/s, loss=0.12, lr=0.0001]
Steps: 95%|█████████▌| 665/700 [04:48<00:15, 2.31it/s, loss=0.0976, lr=0.0001]
Steps: 95%|█████████▌| 666/700 [04:49<00:14, 2.31it/s, loss=0.0976, lr=0.0001]
Steps: 95%|█████████▌| 666/700 [04:49<00:14, 2.31it/s, loss=0.175, lr=0.0001]
Steps: 95%|█████████▌| 667/700 [04:49<00:14, 2.31it/s, loss=0.175, lr=0.0001]
Steps: 95%|█████████▌| 667/700 [04:49<00:14, 2.31it/s, loss=0.0758, lr=0.0001]
Steps: 95%|█████████▌| 668/700 [04:49<00:13, 2.31it/s, loss=0.0758, lr=0.0001]
Steps: 95%|█████████▌| 668/700 [04:49<00:13, 2.31it/s, loss=0.154, lr=0.0001]
Steps: 96%|█████████▌| 669/700 [04:50<00:13, 2.31it/s, loss=0.154, lr=0.0001]
Steps: 96%|█████████▌| 669/700 [04:50<00:13, 2.31it/s, loss=0.0661, lr=0.0001]
Steps: 96%|█████████▌| 670/700 [04:50<00:12, 2.31it/s, loss=0.0661, lr=0.0001]
Steps: 96%|█████████▌| 670/700 [04:50<00:12, 2.31it/s, loss=0.222, lr=0.0001]
Steps: 96%|█████████▌| 671/700 [04:51<00:12, 2.31it/s, loss=0.222, lr=0.0001]
Steps: 96%|█████████▌| 671/700 [04:51<00:12, 2.31it/s, loss=0.125, lr=0.0001]
Steps: 96%|█████████▌| 672/700 [04:51<00:12, 2.31it/s, loss=0.125, lr=0.0001]
Steps: 96%|█████████▌| 672/700 [04:51<00:12, 2.31it/s, loss=0.117, lr=0.0001]
Steps: 96%|█████████▌| 673/700 [04:52<00:11, 2.30it/s, loss=0.117, lr=0.0001]
Steps: 96%|█████████▌| 673/700 [04:52<00:11, 2.30it/s, loss=0.163, lr=0.0001]
Steps: 96%|█████████▋| 674/700 [04:52<00:11, 2.30it/s, loss=0.163, lr=0.0001]
Steps: 96%|█████████▋| 674/700 [04:52<00:11, 2.30it/s, loss=0.0756, lr=0.0001]
Steps: 96%|█████████▋| 675/700 [04:52<00:10, 2.31it/s, loss=0.0756, lr=0.0001]
Steps: 96%|█████████▋| 675/700 [04:52<00:10, 2.31it/s, loss=0.178, lr=0.0001]
Steps: 97%|█████████▋| 676/700 [04:53<00:10, 2.31it/s, loss=0.178, lr=0.0001]
Steps: 97%|█████████▋| 676/700 [04:53<00:10, 2.31it/s, loss=0.104, lr=0.0001]
Steps: 97%|█████████▋| 677/700 [04:53<00:09, 2.31it/s, loss=0.104, lr=0.0001]
Steps: 97%|█████████▋| 677/700 [04:53<00:09, 2.31it/s, loss=0.139, lr=0.0001]
Steps: 97%|█████████▋| 678/700 [04:54<00:09, 2.31it/s, loss=0.139, lr=0.0001]
Steps: 97%|█████████▋| 678/700 [04:54<00:09, 2.31it/s, loss=0.0792, lr=0.0001]
Steps: 97%|█████████▋| 679/700 [04:54<00:09, 2.31it/s, loss=0.0792, lr=0.0001]
Steps: 97%|█████████▋| 679/700 [04:54<00:09, 2.31it/s, loss=0.214, lr=0.0001]
Steps: 97%|█████████▋| 680/700 [04:55<00:08, 2.31it/s, loss=0.214, lr=0.0001]
Steps: 97%|█████████▋| 680/700 [04:55<00:08, 2.31it/s, loss=0.105, lr=0.0001]
Steps: 97%|█████████▋| 681/700 [04:55<00:08, 2.31it/s, loss=0.105, lr=0.0001]
Steps: 97%|█████████▋| 681/700 [04:55<00:08, 2.31it/s, loss=0.233, lr=0.0001]
Steps: 97%|█████████▋| 682/700 [04:55<00:07, 2.31it/s, loss=0.233, lr=0.0001]
Steps: 97%|█████████▋| 682/700 [04:56<00:07, 2.31it/s, loss=0.107, lr=0.0001]
Steps: 98%|█████████▊| 683/700 [04:56<00:07, 2.31it/s, loss=0.107, lr=0.0001]
Steps: 98%|█████████▊| 683/700 [04:56<00:07, 2.31it/s, loss=0.125, lr=0.0001]
Steps: 98%|█████████▊| 684/700 [04:56<00:06, 2.31it/s, loss=0.125, lr=0.0001]
Steps: 98%|█████████▊| 684/700 [04:56<00:06, 2.31it/s, loss=0.176, lr=0.0001]
Steps: 98%|█████████▊| 685/700 [04:57<00:06, 2.31it/s, loss=0.176, lr=0.0001]
Steps: 98%|█████████▊| 685/700 [04:57<00:06, 2.31it/s, loss=0.0955, lr=0.0001]
Steps: 98%|█████████▊| 686/700 [04:57<00:06, 2.31it/s, loss=0.0955, lr=0.0001]
Steps: 98%|█████████▊| 686/700 [04:57<00:06, 2.31it/s, loss=0.11, lr=0.0001]
Steps: 98%|█████████▊| 687/700 [04:58<00:05, 2.31it/s, loss=0.11, lr=0.0001]
Steps: 98%|█████████▊| 687/700 [04:58<00:05, 2.31it/s, loss=0.139, lr=0.0001]
Steps: 98%|█████████▊| 688/700 [04:58<00:05, 2.31it/s, loss=0.139, lr=0.0001]
Steps: 98%|█████████▊| 688/700 [04:58<00:05, 2.31it/s, loss=0.0515, lr=0.0001]
Steps: 98%|█████████▊| 689/700 [04:59<00:04, 2.30it/s, loss=0.0515, lr=0.0001]
Steps: 98%|█████████▊| 689/700 [04:59<00:04, 2.30it/s, loss=0.102, lr=0.0001]
Steps: 99%|█████████▊| 690/700 [04:59<00:04, 2.30it/s, loss=0.102, lr=0.0001]
Steps: 99%|█████████▊| 690/700 [04:59<00:04, 2.30it/s, loss=0.174, lr=0.0001]
Steps: 99%|█████████▊| 691/700 [04:59<00:03, 2.31it/s, loss=0.174, lr=0.0001]
Steps: 99%|█████████▊| 691/700 [04:59<00:03, 2.31it/s, loss=0.161, lr=0.0001]
Steps: 99%|█████████▉| 692/700 [05:00<00:03, 2.31it/s, loss=0.161, lr=0.0001]
Steps: 99%|█████████▉| 692/700 [05:00<00:03, 2.31it/s, loss=0.103, lr=0.0001]
Steps: 99%|█████████▉| 693/700 [05:00<00:03, 2.31it/s, loss=0.103, lr=0.0001]
Steps: 99%|█████████▉| 693/700 [05:00<00:03, 2.31it/s, loss=0.0503, lr=0.0001]
Steps: 99%|█████████▉| 694/700 [05:01<00:02, 2.31it/s, loss=0.0503, lr=0.0001]
Steps: 99%|█████████▉| 694/700 [05:01<00:02, 2.31it/s, loss=0.079, lr=0.0001]
Steps: 99%|█████████▉| 695/700 [05:01<00:02, 2.31it/s, loss=0.079, lr=0.0001]
Steps: 99%|█████████▉| 695/700 [05:01<00:02, 2.31it/s, loss=0.0907, lr=0.0001]
Steps: 99%|█████████▉| 696/700 [05:02<00:01, 2.31it/s, loss=0.0907, lr=0.0001]
Steps: 99%|█████████▉| 696/700 [05:02<00:01, 2.31it/s, loss=0.108, lr=0.0001]
Steps: 100%|█████████▉| 697/700 [05:02<00:01, 2.31it/s, loss=0.108, lr=0.0001]
Steps: 100%|█████████▉| 697/700 [05:02<00:01, 2.31it/s, loss=0.165, lr=0.0001]
Steps: 100%|█████████▉| 698/700 [05:02<00:00, 2.31it/s, loss=0.165, lr=0.0001]
Steps: 100%|█████████▉| 698/700 [05:02<00:00, 2.31it/s, loss=0.194, lr=0.0001]
Steps: 100%|█████████▉| 699/700 [05:03<00:00, 2.31it/s, loss=0.194, lr=0.0001]
Steps: 100%|█████████▉| 699/700 [05:03<00:00, 2.31it/s, loss=0.229, lr=0.0001]
Steps: 100%|██████████| 700/700 [05:03<00:00, 2.31it/s, loss=0.229, lr=0.0001]
Steps: 100%|██████████| 700/700 [05:03<00:00, 2.31it/s, loss=0.141, lr=0.0001]Model weights saved in /tmp/train/output/sd35_large_train_replicate/pytorch_lora_weights.safetensors
Loading pipeline components...: 0%| | 0/9 [00:00<?, ?it/s]{'base_image_seq_len', 'base_shift', 'max_shift', 'max_image_seq_len', 'use_dynamic_shifting'} was not found in config. Values will be initialized to default values.
Loaded scheduler as FlowMatchEulerDiscreteScheduler from `scheduler` subfolder of stable-diffusion-3.5-large.
Loaded text_encoder as CLIPTextModelWithProjection from `text_encoder` subfolder of stable-diffusion-3.5-large.
Loading pipeline components...: 22%|██▏ | 2/9 [00:00<00:01, 5.30it/s]
Loading checkpoint shards: 0%| | 0/2 [00:00<?, ?it/s]
Loading checkpoint shards: 50%|█████ | 1/2 [00:04<00:04, 4.98s/it]
Loading checkpoint shards: 100%|██████████| 2/2 [00:09<00:00, 4.75s/it]
Loading checkpoint shards: 100%|██████████| 2/2 [00:09<00:00, 4.79s/it]
Loaded text_encoder_3 as T5EncoderModel from `text_encoder_3` subfolder of stable-diffusion-3.5-large.
Loading pipeline components...: 33%|███▎ | 3/9 [00:09<00:24, 4.12s/it]{'dual_attention_layers'} was not found in config. Values will be initialized to default values.
Loaded transformer as SD3Transformer2DModel from `transformer` subfolder of stable-diffusion-3.5-large.
Loading pipeline components...: 44%|████▍ | 4/9 [00:11<00:16, 3.27s/it]Loaded tokenizer as CLIPTokenizer from `tokenizer` subfolder of stable-diffusion-3.5-large.
Loaded tokenizer_3 as T5TokenizerFast from `tokenizer_3` subfolder of stable-diffusion-3.5-large.
Loading pipeline components...: 67%|██████▋ | 6/9 [00:12<00:04, 1.64s/it]Loaded tokenizer_2 as CLIPTokenizer from `tokenizer_2` subfolder of stable-diffusion-3.5-large.
Loaded text_encoder_2 as CLIPTextModelWithProjection from `text_encoder_2` subfolder of stable-diffusion-3.5-large.
Loading pipeline components...: 89%|████████▉ | 8/9 [00:13<00:01, 1.30s/it]Loaded vae as AutoencoderKL from `vae` subfolder of stable-diffusion-3.5-large.
Loading pipeline components...: 100%|██████████| 9/9 [00:13<00:00, 1.53s/it]
Steps: 100%|██████████| 700/700 [05:18<00:00, 2.20it/s, loss=0.141, lr=0.0001]
./
./output/
./output/sd35_large_train_replicate/
./output/sd35_large_train_replicate/lora.safetensors
This output was created using a different version of the model, lucataco/stable-diffusion-3.5-large-lora-trainer:cd6419a5.