Failed to load versions. Head to the versions page to see all versions for this model.
You're looking at a specific version of this model. Jump to the model overview.
Input
Run this model in Node.js with one line of code:
npm install replicate
REPLICATE_API_TOKEN
environment variable:export REPLICATE_API_TOKEN=<paste-your-token-here>
Find your API token in your account settings.
import Replicate from "replicate";
const replicate = new Replicate({
auth: process.env.REPLICATE_API_TOKEN,
});
Run zsxkib/diffbir using Replicate’s API. Check out the model's schema for an overview of inputs and outputs.
const output = await replicate.run(
"zsxkib/diffbir:51ed1464d8bbbaca811153b051d3b09ab42f0bdeb85804ae26ba323d7a66a4ac",
{
input: {
seed: 231,
input: "https://replicate.delivery/pbxt/JgdmREudlAXBDFZnIvZjfgSxwxtNd3aHk7gXHScaLGFltLGe/0427.png",
steps: 50,
tiled: false,
tile_size: 512,
has_aligned: true,
tile_stride: 256,
repeat_times: 1,
use_guidance: false,
color_fix_type: "wavelet",
guidance_scale: 0,
guidance_space: "latent",
guidance_repeat: 5,
only_center_face: false,
guidance_time_stop: -1,
guidance_time_start: 1001,
background_upsampler: "RealESRGAN",
face_detection_model: "retinaface_resnet50",
upscaling_model_type: "faces",
restoration_model_type: "general_scenes",
super_resolution_factor: 1,
disable_preprocess_model: false,
reload_restoration_model: false,
background_upsampler_tile: 400,
background_upsampler_tile_stride: 400
}
}
);
// To access the file URL:
console.log(output[0].url()); //=> "http://example.com"
// To write the file to disk:
fs.writeFile("my-image.png", output[0]);
To learn more, take a look at the guide on getting started with Node.js.
pip install replicate
REPLICATE_API_TOKEN
environment variable:export REPLICATE_API_TOKEN=<paste-your-token-here>
Find your API token in your account settings.
import replicate
Run zsxkib/diffbir using Replicate’s API. Check out the model's schema for an overview of inputs and outputs.
output = replicate.run(
"zsxkib/diffbir:51ed1464d8bbbaca811153b051d3b09ab42f0bdeb85804ae26ba323d7a66a4ac",
input={
"seed": 231,
"input": "https://replicate.delivery/pbxt/JgdmREudlAXBDFZnIvZjfgSxwxtNd3aHk7gXHScaLGFltLGe/0427.png",
"steps": 50,
"tiled": False,
"tile_size": 512,
"has_aligned": True,
"tile_stride": 256,
"repeat_times": 1,
"use_guidance": False,
"color_fix_type": "wavelet",
"guidance_scale": 0,
"guidance_space": "latent",
"guidance_repeat": 5,
"only_center_face": False,
"guidance_time_stop": -1,
"guidance_time_start": 1001,
"background_upsampler": "RealESRGAN",
"face_detection_model": "retinaface_resnet50",
"upscaling_model_type": "faces",
"restoration_model_type": "general_scenes",
"super_resolution_factor": 1,
"disable_preprocess_model": False,
"reload_restoration_model": False,
"background_upsampler_tile": 400,
"background_upsampler_tile_stride": 400
}
)
print(output)
To learn more, take a look at the guide on getting started with Python.
REPLICATE_API_TOKEN
environment variable:export REPLICATE_API_TOKEN=<paste-your-token-here>
Find your API token in your account settings.
Run zsxkib/diffbir using Replicate’s API. Check out the model's schema for an overview of inputs and outputs.
curl -s -X POST \
-H "Authorization: Bearer $REPLICATE_API_TOKEN" \
-H "Content-Type: application/json" \
-H "Prefer: wait" \
-d $'{
"version": "51ed1464d8bbbaca811153b051d3b09ab42f0bdeb85804ae26ba323d7a66a4ac",
"input": {
"seed": 231,
"input": "https://replicate.delivery/pbxt/JgdmREudlAXBDFZnIvZjfgSxwxtNd3aHk7gXHScaLGFltLGe/0427.png",
"steps": 50,
"tiled": false,
"tile_size": 512,
"has_aligned": true,
"tile_stride": 256,
"repeat_times": 1,
"use_guidance": false,
"color_fix_type": "wavelet",
"guidance_scale": 0,
"guidance_space": "latent",
"guidance_repeat": 5,
"only_center_face": false,
"guidance_time_stop": -1,
"guidance_time_start": 1001,
"background_upsampler": "RealESRGAN",
"face_detection_model": "retinaface_resnet50",
"upscaling_model_type": "faces",
"restoration_model_type": "general_scenes",
"super_resolution_factor": 1,
"disable_preprocess_model": false,
"reload_restoration_model": false,
"background_upsampler_tile": 400,
"background_upsampler_tile_stride": 400
}
}' \
https://api.replicate.com/v1/predictions
To learn more, take a look at Replicate’s HTTP API reference docs.
Add a payment method to run this model.
By signing in, you agree to our
terms of service and privacy policy
Output
We were unable to load these images. Please make sure the URLs are valid.
{ "input": "https://replicate.delivery/pbxt/JgdmREudlAXBDFZnIvZjfgSxwxtNd3aHk7gXHScaLGFltLGe/0427.png", "outut": "https://replicate.delivery/pbxt/tjBj5e8QUiSAHaJhYwLUV2Sb5fmmp9VuIvfb6X4fG6UCHp1GB/tmpbr7p39dy0427.png" }
{
"completed_at": "2023-10-12T13:19:45.432677Z",
"created_at": "2023-10-12T13:17:41.439299Z",
"data_removed": false,
"error": null,
"id": "77euyklbgcyarhaczq7uwxulai",
"input": {
"seed": 231,
"input": "https://replicate.delivery/pbxt/JgdmREudlAXBDFZnIvZjfgSxwxtNd3aHk7gXHScaLGFltLGe/0427.png",
"steps": 50,
"tiled": false,
"tile_size": 512,
"has_aligned": true,
"tile_stride": 256,
"repeat_times": 1,
"use_guidance": false,
"color_fix_type": "wavelet",
"guidance_scale": 0,
"guidance_space": "latent",
"guidance_repeat": 5,
"only_center_face": false,
"guidance_time_stop": -1,
"guidance_time_start": 1001,
"background_upsampler": "RealESRGAN",
"face_detection_model": "retinaface_resnet50",
"upscaling_model_type": "faces",
"restoration_model_type": "general_scenes",
"super_resolution_factor": 1,
"disable_preprocess_model": false,
"reload_restoration_model": false,
"background_upsampler_tile": 400,
"background_upsampler_tile_stride": 400
},
"logs": "ckptckptckpt weights/face_full_v1.ckpt\nSwitching from mode 'FULL' to 'FACE'...\nBuilding and loading 'FACE' mode model...\nControlLDM: Running in eps-prediction mode\nSetting up MemoryEfficientCrossAttention. Query dim is 320, context_dim is None and using 5 heads.\nSetting up MemoryEfficientCrossAttention. Query dim is 320, context_dim is 1024 and using 5 heads.\nSetting up MemoryEfficientCrossAttention. Query dim is 320, context_dim is None and using 5 heads.\nSetting up MemoryEfficientCrossAttention. Query dim is 320, context_dim is 1024 and using 5 heads.\nSetting up MemoryEfficientCrossAttention. Query dim is 640, context_dim is None and using 10 heads.\nSetting up MemoryEfficientCrossAttention. Query dim is 640, context_dim is 1024 and using 10 heads.\nSetting up MemoryEfficientCrossAttention. Query dim is 640, context_dim is None and using 10 heads.\nSetting up MemoryEfficientCrossAttention. Query dim is 640, context_dim is 1024 and using 10 heads.\nSetting up MemoryEfficientCrossAttention. Query dim is 1280, context_dim is None and using 20 heads.\nSetting up MemoryEfficientCrossAttention. Query dim is 1280, context_dim is 1024 and using 20 heads.\nSetting up MemoryEfficientCrossAttention. Query dim is 1280, context_dim is None and using 20 heads.\nSetting up MemoryEfficientCrossAttention. Query dim is 1280, context_dim is 1024 and using 20 heads.\nSetting up MemoryEfficientCrossAttention. Query dim is 1280, context_dim is None and using 20 heads.\nSetting up MemoryEfficientCrossAttention. Query dim is 1280, context_dim is 1024 and using 20 heads.\nSetting up MemoryEfficientCrossAttention. Query dim is 1280, context_dim is None and using 20 heads.\nSetting up MemoryEfficientCrossAttention. Query dim is 1280, context_dim is 1024 and using 20 heads.\nSetting up MemoryEfficientCrossAttention. Query dim is 1280, context_dim is None and using 20 heads.\nSetting up MemoryEfficientCrossAttention. Query dim is 1280, context_dim is 1024 and using 20 heads.\nSetting up MemoryEfficientCrossAttention. Query dim is 1280, context_dim is None and using 20 heads.\nSetting up MemoryEfficientCrossAttention. Query dim is 1280, context_dim is 1024 and using 20 heads.\nSetting up MemoryEfficientCrossAttention. Query dim is 640, context_dim is None and using 10 heads.\nSetting up MemoryEfficientCrossAttention. Query dim is 640, context_dim is 1024 and using 10 heads.\nSetting up MemoryEfficientCrossAttention. Query dim is 640, context_dim is None and using 10 heads.\nSetting up MemoryEfficientCrossAttention. Query dim is 640, context_dim is 1024 and using 10 heads.\nSetting up MemoryEfficientCrossAttention. Query dim is 640, context_dim is None and using 10 heads.\nSetting up MemoryEfficientCrossAttention. Query dim is 640, context_dim is 1024 and using 10 heads.\nSetting up MemoryEfficientCrossAttention. Query dim is 320, context_dim is None and using 5 heads.\nSetting up MemoryEfficientCrossAttention. Query dim is 320, context_dim is 1024 and using 5 heads.\nSetting up MemoryEfficientCrossAttention. Query dim is 320, context_dim is None and using 5 heads.\nSetting up MemoryEfficientCrossAttention. Query dim is 320, context_dim is 1024 and using 5 heads.\nSetting up MemoryEfficientCrossAttention. Query dim is 320, context_dim is None and using 5 heads.\nSetting up MemoryEfficientCrossAttention. Query dim is 320, context_dim is 1024 and using 5 heads.\nDiffusionWrapper has 865.91 M params.\nmaking attention of type 'vanilla-xformers' with 512 in_channels\nbuilding MemoryEfficientAttnBlock with 512 in_channels...\nWorking with z of shape (1, 4, 32, 32) = 4096 dimensions.\nmaking attention of type 'vanilla-xformers' with 512 in_channels\nbuilding MemoryEfficientAttnBlock with 512 in_channels...\nSetting up MemoryEfficientCrossAttention. Query dim is 320, context_dim is None and using 5 heads.\nSetting up MemoryEfficientCrossAttention. Query dim is 320, context_dim is 1024 and using 5 heads.\nSetting up MemoryEfficientCrossAttention. Query dim is 320, context_dim is None and using 5 heads.\nSetting up MemoryEfficientCrossAttention. Query dim is 320, context_dim is 1024 and using 5 heads.\nSetting up MemoryEfficientCrossAttention. Query dim is 640, context_dim is None and using 10 heads.\nSetting up MemoryEfficientCrossAttention. Query dim is 640, context_dim is 1024 and using 10 heads.\nSetting up MemoryEfficientCrossAttention. Query dim is 640, context_dim is None and using 10 heads.\nSetting up MemoryEfficientCrossAttention. Query dim is 640, context_dim is 1024 and using 10 heads.\nSetting up MemoryEfficientCrossAttention. Query dim is 1280, context_dim is None and using 20 heads.\nSetting up MemoryEfficientCrossAttention. Query dim is 1280, context_dim is 1024 and using 20 heads.\nSetting up MemoryEfficientCrossAttention. Query dim is 1280, context_dim is None and using 20 heads.\nSetting up MemoryEfficientCrossAttention. Query dim is 1280, context_dim is 1024 and using 20 heads.\nSetting up MemoryEfficientCrossAttention. Query dim is 1280, context_dim is None and using 20 heads.\nSetting up MemoryEfficientCrossAttention. Query dim is 1280, context_dim is 1024 and using 20 heads.\nSetting up [LPIPS] perceptual loss: trunk [alex], v[0.1], spatial [off]\nLoading model from: /root/.pyenv/versions/3.9.18/lib/python3.9/site-packages/lpips/weights/v0.1/alex.pth\nreload swinir model from weights/general_swinir_v1.ckpt\nENABLE XFORMERS!\nModel successfully switched to 'FACE' mode.\n{'bg_tile': 400,\n'bg_tile_stride': 400,\n'bg_upsampler': 'RealESRGAN',\n'ckpt': 'weights/face_full_v1.ckpt',\n'color_fix_type': 'wavelet',\n'config': 'configs/model/cldm.yaml',\n'detection_model': 'retinaface_resnet50',\n'device': 'cuda',\n'disable_preprocess_model': False,\n'g_repeat': 5,\n'g_scale': 0.0,\n'g_space': 'latent',\n 'g_t_start': 1001,\n 'g_t_stop': -1,\n 'has_aligned': True,\n'image_size': 512,\n'input': '/tmp/tmpbr7p39dy0427.png',\n 'only_center_face': False,\n 'output': '.',\n 'reload_swinir': False,\n'repeat_times': 1,\n 'seed': 231,\n 'show_lq': False,\n 'skip_if_exist': False,\n 'sr_scale': 1,\n'steps': 50,\n 'swinir_ckpt': 'weights/general_swinir_v1.ckpt',\n'tile_size': 512,\n'tile_stride': 256,\n 'tiled': False,\n 'use_guidance': False}\nGlobal seed set to 231\n/root/.pyenv/versions/3.9.18/lib/python3.9/site-packages/torchvision/models/_utils.py:223: UserWarning: Arguments other than a weight enum or `None` for 'weights' are deprecated since 0.13 and may be removed in the future. The current behavior is equivalent to passing `weights=None`.\nwarnings.warn(msg)\nDownloading: \"https://github.com/xinntao/facexlib/releases/download/v0.1.0/detection_Resnet50_Final.pth\" to /root/.pyenv/versions/3.9.18/lib/python3.9/site-packages/facexlib/weights/detection_Resnet50_Final.pth\n 0%| | 0.00/104M [00:00<?, ?B/s]\n 4%|▎ | 3.81M/104M [00:00<00:02, 39.8MB/s]\n 8%|▊ | 8.60M/104M [00:00<00:02, 45.9MB/s]\n 14%|█▎ | 14.1M/104M [00:00<00:01, 51.3MB/s]\n 20%|█▉ | 20.6M/104M [00:00<00:01, 57.8MB/s]\n 27%|██▋ | 28.1M/104M [00:00<00:01, 65.5MB/s]\n 34%|███▍ | 35.7M/104M [00:00<00:01, 70.4MB/s]\n 43%|████▎ | 45.0M/104M [00:00<00:00, 79.3MB/s]\n 53%|█████▎ | 54.9M/104M [00:00<00:00, 86.8MB/s]\n 63%|██████▎ | 65.8M/104M [00:00<00:00, 95.7MB/s]\n 74%|███████▍ | 77.1M/104M [00:01<00:00, 103MB/s] \n 85%|████████▌ | 89.2M/104M [00:01<00:00, 110MB/s]\n 97%|█████████▋| 102M/104M [00:01<00:00, 116MB/s] \n100%|██████████| 104M/104M [00:01<00:00, 89.6MB/s]\nDownloading: \"https://github.com/xinntao/facexlib/releases/download/v0.2.2/parsing_parsenet.pth\" to /root/.pyenv/versions/3.9.18/lib/python3.9/site-packages/facexlib/weights/parsing_parsenet.pth\n 0%| | 0.00/81.4M [00:00<?, ?B/s]\n 5%|▌ | 4.19M/81.4M [00:00<00:01, 43.6MB/s]\n 13%|█▎ | 10.6M/81.4M [00:00<00:01, 57.4MB/s]\n 22%|██▏ | 18.3M/81.4M [00:00<00:00, 67.9MB/s]\n 36%|███▌ | 29.2M/81.4M [00:00<00:00, 86.4MB/s]\n 53%|█████▎ | 43.3M/81.4M [00:00<00:00, 108MB/s] \n 68%|██████▊ | 55.1M/81.4M [00:00<00:00, 114MB/s]\n 83%|████████▎ | 67.5M/81.4M [00:00<00:00, 119MB/s]\n100%|██████████| 81.4M/81.4M [00:00<00:00, 107MB/s]\nLoading RealESRGAN_x2plus.pth for background upsampling...\ntimesteps used in spaced sampler:\n[0, 20, 41, 61, 82, 102, 122, 143, 163, 183, 204, 224, 245, 265, 285, 306, 326, 347, 367, 387, 408, 428, 449, 469, 489, 510, 530, 550, 571, 591, 612, 632, 652, 673, 693, 714, 734, 754, 775, 795, 816, 836, 856, 877, 897, 917, 938, 958, 979, 999]\nSpaced Sampler: 0%| | 0/50 [00:00<?, ?it/s]\nSpaced Sampler: 2%|▏ | 1/50 [00:00<00:10, 4.78it/s]\nSpaced Sampler: 6%|▌ | 3/50 [00:00<00:05, 8.71it/s]\nSpaced Sampler: 10%|█ | 5/50 [00:00<00:04, 10.22it/s]\nSpaced Sampler: 14%|█▍ | 7/50 [00:00<00:03, 11.02it/s]\nSpaced Sampler: 18%|█▊ | 9/50 [00:00<00:03, 11.47it/s]\nSpaced Sampler: 22%|██▏ | 11/50 [00:01<00:03, 11.76it/s]\nSpaced Sampler: 26%|██▌ | 13/50 [00:01<00:03, 11.94it/s]\nSpaced Sampler: 30%|███ | 15/50 [00:01<00:02, 12.07it/s]\nSpaced Sampler: 34%|███▍ | 17/50 [00:01<00:02, 12.15it/s]\nSpaced Sampler: 38%|███▊ | 19/50 [00:01<00:02, 12.16it/s]\nSpaced Sampler: 42%|████▏ | 21/50 [00:01<00:02, 12.19it/s]\nSpaced Sampler: 46%|████▌ | 23/50 [00:02<00:02, 12.22it/s]\nSpaced Sampler: 50%|█████ | 25/50 [00:02<00:02, 12.23it/s]\nSpaced Sampler: 54%|█████▍ | 27/50 [00:02<00:01, 12.26it/s]\nSpaced Sampler: 58%|█████▊ | 29/50 [00:02<00:01, 12.27it/s]\nSpaced Sampler: 62%|██████▏ | 31/50 [00:02<00:01, 12.27it/s]\nSpaced Sampler: 66%|██████▌ | 33/50 [00:02<00:01, 12.27it/s]\nSpaced Sampler: 70%|███████ | 35/50 [00:02<00:01, 12.24it/s]\nSpaced Sampler: 74%|███████▍ | 37/50 [00:03<00:01, 12.14it/s]\nSpaced Sampler: 78%|███████▊ | 39/50 [00:03<00:00, 12.18it/s]\nSpaced Sampler: 82%|████████▏ | 41/50 [00:03<00:00, 12.19it/s]\nSpaced Sampler: 86%|████████▌ | 43/50 [00:03<00:00, 12.19it/s]\nSpaced Sampler: 90%|█████████ | 45/50 [00:03<00:00, 12.20it/s]\nSpaced Sampler: 94%|█████████▍| 47/50 [00:03<00:00, 12.17it/s]\nSpaced Sampler: 98%|█████████▊| 49/50 [00:04<00:00, 12.12it/s]\nSpaced Sampler: 100%|██████████| 50/50 [00:04<00:00, 11.86it/s]\nFace image tmpbr7p39dy0427 saved to ./..",
"metrics": {
"predict_time": 36.889872,
"total_time": 123.993378
},
"output": [
"https://replicate.delivery/pbxt/tjBj5e8QUiSAHaJhYwLUV2Sb5fmmp9VuIvfb6X4fG6UCHp1GB/tmpbr7p39dy0427.png"
],
"started_at": "2023-10-12T13:19:08.542805Z",
"status": "succeeded",
"urls": {
"get": "https://api.replicate.com/v1/predictions/77euyklbgcyarhaczq7uwxulai",
"cancel": "https://api.replicate.com/v1/predictions/77euyklbgcyarhaczq7uwxulai/cancel"
},
"version": "51ed1464d8bbbaca811153b051d3b09ab42f0bdeb85804ae26ba323d7a66a4ac"
}
ckptckptckpt weights/face_full_v1.ckpt
Switching from mode 'FULL' to 'FACE'...
Building and loading 'FACE' mode model...
ControlLDM: Running in eps-prediction mode
Setting up MemoryEfficientCrossAttention. Query dim is 320, context_dim is None and using 5 heads.
Setting up MemoryEfficientCrossAttention. Query dim is 320, context_dim is 1024 and using 5 heads.
Setting up MemoryEfficientCrossAttention. Query dim is 320, context_dim is None and using 5 heads.
Setting up MemoryEfficientCrossAttention. Query dim is 320, context_dim is 1024 and using 5 heads.
Setting up MemoryEfficientCrossAttention. Query dim is 640, context_dim is None and using 10 heads.
Setting up MemoryEfficientCrossAttention. Query dim is 640, context_dim is 1024 and using 10 heads.
Setting up MemoryEfficientCrossAttention. Query dim is 640, context_dim is None and using 10 heads.
Setting up MemoryEfficientCrossAttention. Query dim is 640, context_dim is 1024 and using 10 heads.
Setting up MemoryEfficientCrossAttention. Query dim is 1280, context_dim is None and using 20 heads.
Setting up MemoryEfficientCrossAttention. Query dim is 1280, context_dim is 1024 and using 20 heads.
Setting up MemoryEfficientCrossAttention. Query dim is 1280, context_dim is None and using 20 heads.
Setting up MemoryEfficientCrossAttention. Query dim is 1280, context_dim is 1024 and using 20 heads.
Setting up MemoryEfficientCrossAttention. Query dim is 1280, context_dim is None and using 20 heads.
Setting up MemoryEfficientCrossAttention. Query dim is 1280, context_dim is 1024 and using 20 heads.
Setting up MemoryEfficientCrossAttention. Query dim is 1280, context_dim is None and using 20 heads.
Setting up MemoryEfficientCrossAttention. Query dim is 1280, context_dim is 1024 and using 20 heads.
Setting up MemoryEfficientCrossAttention. Query dim is 1280, context_dim is None and using 20 heads.
Setting up MemoryEfficientCrossAttention. Query dim is 1280, context_dim is 1024 and using 20 heads.
Setting up MemoryEfficientCrossAttention. Query dim is 1280, context_dim is None and using 20 heads.
Setting up MemoryEfficientCrossAttention. Query dim is 1280, context_dim is 1024 and using 20 heads.
Setting up MemoryEfficientCrossAttention. Query dim is 640, context_dim is None and using 10 heads.
Setting up MemoryEfficientCrossAttention. Query dim is 640, context_dim is 1024 and using 10 heads.
Setting up MemoryEfficientCrossAttention. Query dim is 640, context_dim is None and using 10 heads.
Setting up MemoryEfficientCrossAttention. Query dim is 640, context_dim is 1024 and using 10 heads.
Setting up MemoryEfficientCrossAttention. Query dim is 640, context_dim is None and using 10 heads.
Setting up MemoryEfficientCrossAttention. Query dim is 640, context_dim is 1024 and using 10 heads.
Setting up MemoryEfficientCrossAttention. Query dim is 320, context_dim is None and using 5 heads.
Setting up MemoryEfficientCrossAttention. Query dim is 320, context_dim is 1024 and using 5 heads.
Setting up MemoryEfficientCrossAttention. Query dim is 320, context_dim is None and using 5 heads.
Setting up MemoryEfficientCrossAttention. Query dim is 320, context_dim is 1024 and using 5 heads.
Setting up MemoryEfficientCrossAttention. Query dim is 320, context_dim is None and using 5 heads.
Setting up MemoryEfficientCrossAttention. Query dim is 320, context_dim is 1024 and using 5 heads.
DiffusionWrapper has 865.91 M params.
making attention of type 'vanilla-xformers' with 512 in_channels
building MemoryEfficientAttnBlock with 512 in_channels...
Working with z of shape (1, 4, 32, 32) = 4096 dimensions.
making attention of type 'vanilla-xformers' with 512 in_channels
building MemoryEfficientAttnBlock with 512 in_channels...
Setting up MemoryEfficientCrossAttention. Query dim is 320, context_dim is None and using 5 heads.
Setting up MemoryEfficientCrossAttention. Query dim is 320, context_dim is 1024 and using 5 heads.
Setting up MemoryEfficientCrossAttention. Query dim is 320, context_dim is None and using 5 heads.
Setting up MemoryEfficientCrossAttention. Query dim is 320, context_dim is 1024 and using 5 heads.
Setting up MemoryEfficientCrossAttention. Query dim is 640, context_dim is None and using 10 heads.
Setting up MemoryEfficientCrossAttention. Query dim is 640, context_dim is 1024 and using 10 heads.
Setting up MemoryEfficientCrossAttention. Query dim is 640, context_dim is None and using 10 heads.
Setting up MemoryEfficientCrossAttention. Query dim is 640, context_dim is 1024 and using 10 heads.
Setting up MemoryEfficientCrossAttention. Query dim is 1280, context_dim is None and using 20 heads.
Setting up MemoryEfficientCrossAttention. Query dim is 1280, context_dim is 1024 and using 20 heads.
Setting up MemoryEfficientCrossAttention. Query dim is 1280, context_dim is None and using 20 heads.
Setting up MemoryEfficientCrossAttention. Query dim is 1280, context_dim is 1024 and using 20 heads.
Setting up MemoryEfficientCrossAttention. Query dim is 1280, context_dim is None and using 20 heads.
Setting up MemoryEfficientCrossAttention. Query dim is 1280, context_dim is 1024 and using 20 heads.
Setting up [LPIPS] perceptual loss: trunk [alex], v[0.1], spatial [off]
Loading model from: /root/.pyenv/versions/3.9.18/lib/python3.9/site-packages/lpips/weights/v0.1/alex.pth
reload swinir model from weights/general_swinir_v1.ckpt
ENABLE XFORMERS!
Model successfully switched to 'FACE' mode.
{'bg_tile': 400,
'bg_tile_stride': 400,
'bg_upsampler': 'RealESRGAN',
'ckpt': 'weights/face_full_v1.ckpt',
'color_fix_type': 'wavelet',
'config': 'configs/model/cldm.yaml',
'detection_model': 'retinaface_resnet50',
'device': 'cuda',
'disable_preprocess_model': False,
'g_repeat': 5,
'g_scale': 0.0,
'g_space': 'latent',
'g_t_start': 1001,
'g_t_stop': -1,
'has_aligned': True,
'image_size': 512,
'input': '/tmp/tmpbr7p39dy0427.png',
'only_center_face': False,
'output': '.',
'reload_swinir': False,
'repeat_times': 1,
'seed': 231,
'show_lq': False,
'skip_if_exist': False,
'sr_scale': 1,
'steps': 50,
'swinir_ckpt': 'weights/general_swinir_v1.ckpt',
'tile_size': 512,
'tile_stride': 256,
'tiled': False,
'use_guidance': False}
Global seed set to 231
/root/.pyenv/versions/3.9.18/lib/python3.9/site-packages/torchvision/models/_utils.py:223: UserWarning: Arguments other than a weight enum or `None` for 'weights' are deprecated since 0.13 and may be removed in the future. The current behavior is equivalent to passing `weights=None`.
warnings.warn(msg)
Downloading: "https://github.com/xinntao/facexlib/releases/download/v0.1.0/detection_Resnet50_Final.pth" to /root/.pyenv/versions/3.9.18/lib/python3.9/site-packages/facexlib/weights/detection_Resnet50_Final.pth
0%| | 0.00/104M [00:00<?, ?B/s]
4%|▎ | 3.81M/104M [00:00<00:02, 39.8MB/s]
8%|▊ | 8.60M/104M [00:00<00:02, 45.9MB/s]
14%|█▎ | 14.1M/104M [00:00<00:01, 51.3MB/s]
20%|█▉ | 20.6M/104M [00:00<00:01, 57.8MB/s]
27%|██▋ | 28.1M/104M [00:00<00:01, 65.5MB/s]
34%|███▍ | 35.7M/104M [00:00<00:01, 70.4MB/s]
43%|████▎ | 45.0M/104M [00:00<00:00, 79.3MB/s]
53%|█████▎ | 54.9M/104M [00:00<00:00, 86.8MB/s]
63%|██████▎ | 65.8M/104M [00:00<00:00, 95.7MB/s]
74%|███████▍ | 77.1M/104M [00:01<00:00, 103MB/s]
85%|████████▌ | 89.2M/104M [00:01<00:00, 110MB/s]
97%|█████████▋| 102M/104M [00:01<00:00, 116MB/s]
100%|██████████| 104M/104M [00:01<00:00, 89.6MB/s]
Downloading: "https://github.com/xinntao/facexlib/releases/download/v0.2.2/parsing_parsenet.pth" to /root/.pyenv/versions/3.9.18/lib/python3.9/site-packages/facexlib/weights/parsing_parsenet.pth
0%| | 0.00/81.4M [00:00<?, ?B/s]
5%|▌ | 4.19M/81.4M [00:00<00:01, 43.6MB/s]
13%|█▎ | 10.6M/81.4M [00:00<00:01, 57.4MB/s]
22%|██▏ | 18.3M/81.4M [00:00<00:00, 67.9MB/s]
36%|███▌ | 29.2M/81.4M [00:00<00:00, 86.4MB/s]
53%|█████▎ | 43.3M/81.4M [00:00<00:00, 108MB/s]
68%|██████▊ | 55.1M/81.4M [00:00<00:00, 114MB/s]
83%|████████▎ | 67.5M/81.4M [00:00<00:00, 119MB/s]
100%|██████████| 81.4M/81.4M [00:00<00:00, 107MB/s]
Loading RealESRGAN_x2plus.pth for background upsampling...
timesteps used in spaced sampler:
[0, 20, 41, 61, 82, 102, 122, 143, 163, 183, 204, 224, 245, 265, 285, 306, 326, 347, 367, 387, 408, 428, 449, 469, 489, 510, 530, 550, 571, 591, 612, 632, 652, 673, 693, 714, 734, 754, 775, 795, 816, 836, 856, 877, 897, 917, 938, 958, 979, 999]
Spaced Sampler: 0%| | 0/50 [00:00<?, ?it/s]
Spaced Sampler: 2%|▏ | 1/50 [00:00<00:10, 4.78it/s]
Spaced Sampler: 6%|▌ | 3/50 [00:00<00:05, 8.71it/s]
Spaced Sampler: 10%|█ | 5/50 [00:00<00:04, 10.22it/s]
Spaced Sampler: 14%|█▍ | 7/50 [00:00<00:03, 11.02it/s]
Spaced Sampler: 18%|█▊ | 9/50 [00:00<00:03, 11.47it/s]
Spaced Sampler: 22%|██▏ | 11/50 [00:01<00:03, 11.76it/s]
Spaced Sampler: 26%|██▌ | 13/50 [00:01<00:03, 11.94it/s]
Spaced Sampler: 30%|███ | 15/50 [00:01<00:02, 12.07it/s]
Spaced Sampler: 34%|███▍ | 17/50 [00:01<00:02, 12.15it/s]
Spaced Sampler: 38%|███▊ | 19/50 [00:01<00:02, 12.16it/s]
Spaced Sampler: 42%|████▏ | 21/50 [00:01<00:02, 12.19it/s]
Spaced Sampler: 46%|████▌ | 23/50 [00:02<00:02, 12.22it/s]
Spaced Sampler: 50%|█████ | 25/50 [00:02<00:02, 12.23it/s]
Spaced Sampler: 54%|█████▍ | 27/50 [00:02<00:01, 12.26it/s]
Spaced Sampler: 58%|█████▊ | 29/50 [00:02<00:01, 12.27it/s]
Spaced Sampler: 62%|██████▏ | 31/50 [00:02<00:01, 12.27it/s]
Spaced Sampler: 66%|██████▌ | 33/50 [00:02<00:01, 12.27it/s]
Spaced Sampler: 70%|███████ | 35/50 [00:02<00:01, 12.24it/s]
Spaced Sampler: 74%|███████▍ | 37/50 [00:03<00:01, 12.14it/s]
Spaced Sampler: 78%|███████▊ | 39/50 [00:03<00:00, 12.18it/s]
Spaced Sampler: 82%|████████▏ | 41/50 [00:03<00:00, 12.19it/s]
Spaced Sampler: 86%|████████▌ | 43/50 [00:03<00:00, 12.19it/s]
Spaced Sampler: 90%|█████████ | 45/50 [00:03<00:00, 12.20it/s]
Spaced Sampler: 94%|█████████▍| 47/50 [00:03<00:00, 12.17it/s]
Spaced Sampler: 98%|█████████▊| 49/50 [00:04<00:00, 12.12it/s]
Spaced Sampler: 100%|██████████| 50/50 [00:04<00:00, 11.86it/s]
Face image tmpbr7p39dy0427 saved to ./..