Failed to load versions. Head to the versions page to see all versions for this model.
You're looking at a specific version of this model. Jump to the model overview.
Input
Run this model in Node.js with one line of code:
npm install replicate
REPLICATE_API_TOKEN
environment variable:export REPLICATE_API_TOKEN=<paste-your-token-here>
Find your API token in your account settings.
import Replicate from "replicate";
import fs from "node:fs";
const replicate = new Replicate({
auth: process.env.REPLICATE_API_TOKEN,
});
Run astramlco/diffbir using Replicate’s API. Check out the model's schema for an overview of inputs and outputs.
const output = await replicate.run(
"astramlco/diffbir:f7a6e7832fee8d2593be566723295b80ed14b424f8365f8647e19775f617e205",
{
input: {
seed: 231,
input: "https://replicate.delivery/pbxt/Ketx7Rc2HPkgBgsBAmZ14gWWKc6aiKsA783giEEf4qiHDtOb/C.L.A.I.R.E._everhart_s.jpg",
steps: 50,
tiled: false,
tile_size: 512,
has_aligned: false,
tile_stride: 256,
repeat_times: 1,
use_guidance: false,
color_fix_type: "wavelet",
guidance_scale: 0,
guidance_space: "latent",
guidance_repeat: 5,
only_center_face: false,
guidance_time_stop: -1,
guidance_time_start: 1001,
background_upsampler: "RealESRGAN",
face_detection_model: "retinaface_resnet50",
upscaling_model_type: "faces",
restoration_model_type: "faces",
super_resolution_factor: 2,
disable_preprocess_model: false,
reload_restoration_model: false,
background_upsampler_tile: 400,
background_upsampler_tile_stride: 400
}
}
);
// To access the file URL:
console.log(output[0].url()); //=> "http://example.com"
// To write the file to disk:
fs.writeFile("my-image.png", output[0]);
To learn more, take a look at the guide on getting started with Node.js.
pip install replicate
REPLICATE_API_TOKEN
environment variable:export REPLICATE_API_TOKEN=<paste-your-token-here>
Find your API token in your account settings.
import replicate
Run astramlco/diffbir using Replicate’s API. Check out the model's schema for an overview of inputs and outputs.
output = replicate.run(
"astramlco/diffbir:f7a6e7832fee8d2593be566723295b80ed14b424f8365f8647e19775f617e205",
input={
"seed": 231,
"input": "https://replicate.delivery/pbxt/Ketx7Rc2HPkgBgsBAmZ14gWWKc6aiKsA783giEEf4qiHDtOb/C.L.A.I.R.E._everhart_s.jpg",
"steps": 50,
"tiled": False,
"tile_size": 512,
"has_aligned": False,
"tile_stride": 256,
"repeat_times": 1,
"use_guidance": False,
"color_fix_type": "wavelet",
"guidance_scale": 0,
"guidance_space": "latent",
"guidance_repeat": 5,
"only_center_face": False,
"guidance_time_stop": -1,
"guidance_time_start": 1001,
"background_upsampler": "RealESRGAN",
"face_detection_model": "retinaface_resnet50",
"upscaling_model_type": "faces",
"restoration_model_type": "faces",
"super_resolution_factor": 2,
"disable_preprocess_model": False,
"reload_restoration_model": False,
"background_upsampler_tile": 400,
"background_upsampler_tile_stride": 400
}
)
print(output)
To learn more, take a look at the guide on getting started with Python.
REPLICATE_API_TOKEN
environment variable:export REPLICATE_API_TOKEN=<paste-your-token-here>
Find your API token in your account settings.
Run astramlco/diffbir using Replicate’s API. Check out the model's schema for an overview of inputs and outputs.
curl -s -X POST \
-H "Authorization: Bearer $REPLICATE_API_TOKEN" \
-H "Content-Type: application/json" \
-H "Prefer: wait" \
-d $'{
"version": "astramlco/diffbir:f7a6e7832fee8d2593be566723295b80ed14b424f8365f8647e19775f617e205",
"input": {
"seed": 231,
"input": "https://replicate.delivery/pbxt/Ketx7Rc2HPkgBgsBAmZ14gWWKc6aiKsA783giEEf4qiHDtOb/C.L.A.I.R.E._everhart_s.jpg",
"steps": 50,
"tiled": false,
"tile_size": 512,
"has_aligned": false,
"tile_stride": 256,
"repeat_times": 1,
"use_guidance": false,
"color_fix_type": "wavelet",
"guidance_scale": 0,
"guidance_space": "latent",
"guidance_repeat": 5,
"only_center_face": false,
"guidance_time_stop": -1,
"guidance_time_start": 1001,
"background_upsampler": "RealESRGAN",
"face_detection_model": "retinaface_resnet50",
"upscaling_model_type": "faces",
"restoration_model_type": "faces",
"super_resolution_factor": 2,
"disable_preprocess_model": false,
"reload_restoration_model": false,
"background_upsampler_tile": 400,
"background_upsampler_tile_stride": 400
}
}' \
https://api.replicate.com/v1/predictions
To learn more, take a look at Replicate’s HTTP API reference docs.
Add a payment method to run this model.
By signing in, you agree to our
terms of service and privacy policy
Output
{
"completed_at": "2024-03-30T07:49:57.477422Z",
"created_at": "2024-03-30T07:46:10.719345Z",
"data_removed": false,
"error": null,
"id": "lg6q6ydbytjofvmc7o474gdleu",
"input": {
"seed": 231,
"input": "https://replicate.delivery/pbxt/Ketx7Rc2HPkgBgsBAmZ14gWWKc6aiKsA783giEEf4qiHDtOb/C.L.A.I.R.E._everhart_s.jpg",
"steps": 50,
"tiled": false,
"tile_size": 512,
"has_aligned": false,
"tile_stride": 256,
"repeat_times": 1,
"use_guidance": false,
"color_fix_type": "wavelet",
"guidance_scale": 0,
"guidance_space": "latent",
"guidance_repeat": 5,
"only_center_face": false,
"guidance_time_stop": -1,
"guidance_time_start": 1001,
"background_upsampler": "RealESRGAN",
"face_detection_model": "retinaface_resnet50",
"upscaling_model_type": "faces",
"restoration_model_type": "faces",
"super_resolution_factor": 2,
"disable_preprocess_model": false,
"reload_restoration_model": false,
"background_upsampler_tile": 400,
"background_upsampler_tile_stride": 400
},
"logs": "ckptckptckpt weights/face_full_v1.ckpt\nSwitching from mode 'FULL' to 'FACE'...\nBuilding and loading 'FACE' mode model...\nControlLDM: Running in eps-prediction mode\nSetting up MemoryEfficientCrossAttention. Query dim is 320, context_dim is None and using 5 heads.\nSetting up MemoryEfficientCrossAttention. Query dim is 320, context_dim is 1024 and using 5 heads.\nSetting up MemoryEfficientCrossAttention. Query dim is 320, context_dim is None and using 5 heads.\nSetting up MemoryEfficientCrossAttention. Query dim is 320, context_dim is 1024 and using 5 heads.\nSetting up MemoryEfficientCrossAttention. Query dim is 640, context_dim is None and using 10 heads.\nSetting up MemoryEfficientCrossAttention. Query dim is 640, context_dim is 1024 and using 10 heads.\nSetting up MemoryEfficientCrossAttention. Query dim is 640, context_dim is None and using 10 heads.\nSetting up MemoryEfficientCrossAttention. Query dim is 640, context_dim is 1024 and using 10 heads.\nSetting up MemoryEfficientCrossAttention. Query dim is 1280, context_dim is None and using 20 heads.\nSetting up MemoryEfficientCrossAttention. Query dim is 1280, context_dim is 1024 and using 20 heads.\nSetting up MemoryEfficientCrossAttention. Query dim is 1280, context_dim is None and using 20 heads.\nSetting up MemoryEfficientCrossAttention. Query dim is 1280, context_dim is 1024 and using 20 heads.\nSetting up MemoryEfficientCrossAttention. Query dim is 1280, context_dim is None and using 20 heads.\nSetting up MemoryEfficientCrossAttention. Query dim is 1280, context_dim is 1024 and using 20 heads.\nSetting up MemoryEfficientCrossAttention. Query dim is 1280, context_dim is None and using 20 heads.\nSetting up MemoryEfficientCrossAttention. Query dim is 1280, context_dim is 1024 and using 20 heads.\nSetting up MemoryEfficientCrossAttention. Query dim is 1280, context_dim is None and using 20 heads.\nSetting up MemoryEfficientCrossAttention. Query dim is 1280, context_dim is 1024 and using 20 heads.\nSetting up MemoryEfficientCrossAttention. Query dim is 1280, context_dim is None and using 20 heads.\nSetting up MemoryEfficientCrossAttention. Query dim is 1280, context_dim is 1024 and using 20 heads.\nSetting up MemoryEfficientCrossAttention. Query dim is 640, context_dim is None and using 10 heads.\nSetting up MemoryEfficientCrossAttention. Query dim is 640, context_dim is 1024 and using 10 heads.\nSetting up MemoryEfficientCrossAttention. Query dim is 640, context_dim is None and using 10 heads.\nSetting up MemoryEfficientCrossAttention. Query dim is 640, context_dim is 1024 and using 10 heads.\nSetting up MemoryEfficientCrossAttention. Query dim is 640, context_dim is None and using 10 heads.\nSetting up MemoryEfficientCrossAttention. Query dim is 640, context_dim is 1024 and using 10 heads.\nSetting up MemoryEfficientCrossAttention. Query dim is 320, context_dim is None and using 5 heads.\nSetting up MemoryEfficientCrossAttention. Query dim is 320, context_dim is 1024 and using 5 heads.\nSetting up MemoryEfficientCrossAttention. Query dim is 320, context_dim is None and using 5 heads.\nSetting up MemoryEfficientCrossAttention. Query dim is 320, context_dim is 1024 and using 5 heads.\nSetting up MemoryEfficientCrossAttention. Query dim is 320, context_dim is None and using 5 heads.\nSetting up MemoryEfficientCrossAttention. Query dim is 320, context_dim is 1024 and using 5 heads.\nDiffusionWrapper has 865.91 M params.\nmaking attention of type 'vanilla-xformers' with 512 in_channels\nbuilding MemoryEfficientAttnBlock with 512 in_channels...\nWorking with z of shape (1, 4, 32, 32) = 4096 dimensions.\nmaking attention of type 'vanilla-xformers' with 512 in_channels\nbuilding MemoryEfficientAttnBlock with 512 in_channels...\nSetting up MemoryEfficientCrossAttention. Query dim is 320, context_dim is None and using 5 heads.\nSetting up MemoryEfficientCrossAttention. Query dim is 320, context_dim is 1024 and using 5 heads.\nSetting up MemoryEfficientCrossAttention. Query dim is 320, context_dim is None and using 5 heads.\nSetting up MemoryEfficientCrossAttention. Query dim is 320, context_dim is 1024 and using 5 heads.\nSetting up MemoryEfficientCrossAttention. Query dim is 640, context_dim is None and using 10 heads.\nSetting up MemoryEfficientCrossAttention. Query dim is 640, context_dim is 1024 and using 10 heads.\nSetting up MemoryEfficientCrossAttention. Query dim is 640, context_dim is None and using 10 heads.\nSetting up MemoryEfficientCrossAttention. Query dim is 640, context_dim is 1024 and using 10 heads.\nSetting up MemoryEfficientCrossAttention. Query dim is 1280, context_dim is None and using 20 heads.\nSetting up MemoryEfficientCrossAttention. Query dim is 1280, context_dim is 1024 and using 20 heads.\nSetting up MemoryEfficientCrossAttention. Query dim is 1280, context_dim is None and using 20 heads.\nSetting up MemoryEfficientCrossAttention. Query dim is 1280, context_dim is 1024 and using 20 heads.\nSetting up MemoryEfficientCrossAttention. Query dim is 1280, context_dim is None and using 20 heads.\nSetting up MemoryEfficientCrossAttention. Query dim is 1280, context_dim is 1024 and using 20 heads.\nSetting up [LPIPS] perceptual loss: trunk [alex], v[0.1], spatial [off]\nLoading model from: /root/.pyenv/versions/3.9.19/lib/python3.9/site-packages/lpips/weights/v0.1/alex.pth\nreload swinir model from weights/face_swinir_v1.ckpt\nENABLE XFORMERS!\nModel successfully switched to 'FACE' mode.\n{'bg_tile': 400,\n'bg_tile_stride': 400,\n'bg_upsampler': 'RealESRGAN',\n'ckpt': 'weights/face_full_v1.ckpt',\n'color_fix_type': 'wavelet',\n'config': 'configs/model/cldm.yaml',\n'detection_model': 'retinaface_resnet50',\n'device': 'cuda',\n'disable_preprocess_model': False,\n'g_repeat': 5,\n'g_scale': 0.0,\n'g_space': 'latent',\n'g_t_start': 1001,\n'g_t_stop': -1,\n'has_aligned': False,\n'image_size': 512,\n'input': '/tmp/tmpcdoq1b8cC.L.A.I.R.E._everhart_s.jpg',\n'only_center_face': False,\n'output': '.',\n'reload_swinir': False,\n'repeat_times': 1,\n'seed': 231,\n'show_lq': False,\n'skip_if_exist': False,\n'sr_scale': 2,\n'steps': 50,\n'swinir_ckpt': 'weights/face_swinir_v1.ckpt',\n'tile_size': 512,\n'tile_stride': 256,\n'tiled': False,\n'use_guidance': False}\nGlobal seed set to 231\n/root/.pyenv/versions/3.9.19/lib/python3.9/site-packages/torchvision/models/_utils.py:223: UserWarning: Arguments other than a weight enum or `None` for 'weights' are deprecated since 0.13 and may be removed in the future. The current behavior is equivalent to passing `weights=None`.\nwarnings.warn(msg)\nDownloading: \"https://github.com/xinntao/facexlib/releases/download/v0.1.0/detection_Resnet50_Final.pth\" to /root/.pyenv/versions/3.9.19/lib/python3.9/site-packages/facexlib/weights/detection_Resnet50_Final.pth\n 0%| | 0.00/104M [00:00<?, ?B/s]\n 39%|███▉ | 40.5M/104M [00:00<00:00, 425MB/s]\n 78%|███████▊ | 81.1M/104M [00:00<00:00, 98.1MB/s]\n100%|██████████| 104M/104M [00:00<00:00, 127MB/s]\nDownloading: \"https://github.com/xinntao/facexlib/releases/download/v0.2.2/parsing_parsenet.pth\" to /root/.pyenv/versions/3.9.19/lib/python3.9/site-packages/facexlib/weights/parsing_parsenet.pth\n 0%| | 0.00/81.4M [00:00<?, ?B/s]\n 54%|█████▍ | 43.8M/81.4M [00:00<00:00, 459MB/s]\n100%|██████████| 81.4M/81.4M [00:00<00:00, 464MB/s]\nLoading RealESRGAN_x2plus.pth for background upsampling...\nDownloading: \"https://github.com/xinntao/Real-ESRGAN/releases/download/v0.2.1/RealESRGAN_x2plus.pth\" to /src/weights/realesrgan/RealESRGAN_x2plus.pth\n 0%| | 0.00/64.0M [00:00<?, ?B/s]\n 52%|█████▏ | 33.4M/64.0M [00:00<00:00, 351MB/s]\n100%|██████████| 64.0M/64.0M [00:00<00:00, 386MB/s]\ntimesteps used in spaced sampler:\n[0, 20, 41, 61, 82, 102, 122, 143, 163, 183, 204, 224, 245, 265, 285, 306, 326, 347, 367, 387, 408, 428, 449, 469, 489, 510, 530, 550, 571, 591, 612, 632, 652, 673, 693, 714, 734, 754, 775, 795, 816, 836, 856, 877, 897, 917, 938, 958, 979, 999]\nSpaced Sampler: 0%| | 0/50 [00:00<?, ?it/s]\nSpaced Sampler: 2%|▏ | 1/50 [00:00<00:10, 4.79it/s]\nSpaced Sampler: 6%|▌ | 3/50 [00:00<00:05, 8.70it/s]\nSpaced Sampler: 10%|█ | 5/50 [00:00<00:04, 10.19it/s]\nSpaced Sampler: 14%|█▍ | 7/50 [00:00<00:03, 10.93it/s]\nSpaced Sampler: 18%|█▊ | 9/50 [00:00<00:03, 11.34it/s]\nSpaced Sampler: 22%|██▏ | 11/50 [00:01<00:03, 11.59it/s]\nSpaced Sampler: 26%|██▌ | 13/50 [00:01<00:03, 11.81it/s]\nSpaced Sampler: 30%|███ | 15/50 [00:01<00:02, 11.96it/s]\nSpaced Sampler: 34%|███▍ | 17/50 [00:01<00:02, 12.07it/s]\nSpaced Sampler: 38%|███▊ | 19/50 [00:01<00:02, 12.13it/s]\nSpaced Sampler: 42%|████▏ | 21/50 [00:01<00:02, 12.17it/s]\nSpaced Sampler: 46%|████▌ | 23/50 [00:02<00:02, 12.14it/s]\nSpaced Sampler: 50%|█████ | 25/50 [00:02<00:02, 12.13it/s]\nSpaced Sampler: 54%|█████▍ | 27/50 [00:02<00:01, 12.11it/s]\nSpaced Sampler: 58%|█████▊ | 29/50 [00:02<00:01, 12.10it/s]\nSpaced Sampler: 62%|██████▏ | 31/50 [00:02<00:01, 12.07it/s]\nSpaced Sampler: 66%|██████▌ | 33/50 [00:02<00:01, 12.08it/s]\nSpaced Sampler: 70%|███████ | 35/50 [00:03<00:01, 12.06it/s]\nSpaced Sampler: 74%|███████▍ | 37/50 [00:03<00:01, 12.05it/s]\nSpaced Sampler: 78%|███████▊ | 39/50 [00:03<00:00, 11.95it/s]\nSpaced Sampler: 82%|████████▏ | 41/50 [00:03<00:00, 11.97it/s]\nSpaced Sampler: 86%|████████▌ | 43/50 [00:03<00:00, 12.01it/s]\nSpaced Sampler: 90%|█████████ | 45/50 [00:03<00:00, 12.06it/s]\nSpaced Sampler: 94%|█████████▍| 47/50 [00:04<00:00, 12.12it/s]\nSpaced Sampler: 98%|█████████▊| 49/50 [00:04<00:00, 12.17it/s]\nSpaced Sampler: 100%|██████████| 50/50 [00:04<00:00, 11.77it/s]\nupsampling the background image using RealESRGAN...\nFace image tmpcdoq1b8cC.L.A.I.R.E._everhart_s saved to ./..",
"metrics": {
"predict_time": 43.867453,
"total_time": 226.758077
},
"output": [
"https://replicate.delivery/pbxt/ZebgzumoHLy2ZCL5ppfJnICvW5SBKGZ2emNQxPeS35KNidVKB/tmpcdoq1b8cC.L.A.I.R.E._everhart_s_00.png",
"https://replicate.delivery/pbxt/e1wupnukQOzFYaCzdEemITU2qoHPTUcDRHyfiWepxncNidVKB/tmpcdoq1b8cC.L.A.I.R.E._everhart_s_00.png",
"https://replicate.delivery/pbxt/a35MV4BcyI7wEdbFav8hrbX8CuOt3IU8jEr39S6E9geSsrSJA/tmpcdoq1b8cC.L.A.I.R.E._everhart_s.png"
],
"started_at": "2024-03-30T07:49:13.609969Z",
"status": "succeeded",
"urls": {
"get": "https://api.replicate.com/v1/predictions/lg6q6ydbytjofvmc7o474gdleu",
"cancel": "https://api.replicate.com/v1/predictions/lg6q6ydbytjofvmc7o474gdleu/cancel"
},
"version": "f7a6e7832fee8d2593be566723295b80ed14b424f8365f8647e19775f617e205"
}
ckptckptckpt weights/face_full_v1.ckpt
Switching from mode 'FULL' to 'FACE'...
Building and loading 'FACE' mode model...
ControlLDM: Running in eps-prediction mode
Setting up MemoryEfficientCrossAttention. Query dim is 320, context_dim is None and using 5 heads.
Setting up MemoryEfficientCrossAttention. Query dim is 320, context_dim is 1024 and using 5 heads.
Setting up MemoryEfficientCrossAttention. Query dim is 320, context_dim is None and using 5 heads.
Setting up MemoryEfficientCrossAttention. Query dim is 320, context_dim is 1024 and using 5 heads.
Setting up MemoryEfficientCrossAttention. Query dim is 640, context_dim is None and using 10 heads.
Setting up MemoryEfficientCrossAttention. Query dim is 640, context_dim is 1024 and using 10 heads.
Setting up MemoryEfficientCrossAttention. Query dim is 640, context_dim is None and using 10 heads.
Setting up MemoryEfficientCrossAttention. Query dim is 640, context_dim is 1024 and using 10 heads.
Setting up MemoryEfficientCrossAttention. Query dim is 1280, context_dim is None and using 20 heads.
Setting up MemoryEfficientCrossAttention. Query dim is 1280, context_dim is 1024 and using 20 heads.
Setting up MemoryEfficientCrossAttention. Query dim is 1280, context_dim is None and using 20 heads.
Setting up MemoryEfficientCrossAttention. Query dim is 1280, context_dim is 1024 and using 20 heads.
Setting up MemoryEfficientCrossAttention. Query dim is 1280, context_dim is None and using 20 heads.
Setting up MemoryEfficientCrossAttention. Query dim is 1280, context_dim is 1024 and using 20 heads.
Setting up MemoryEfficientCrossAttention. Query dim is 1280, context_dim is None and using 20 heads.
Setting up MemoryEfficientCrossAttention. Query dim is 1280, context_dim is 1024 and using 20 heads.
Setting up MemoryEfficientCrossAttention. Query dim is 1280, context_dim is None and using 20 heads.
Setting up MemoryEfficientCrossAttention. Query dim is 1280, context_dim is 1024 and using 20 heads.
Setting up MemoryEfficientCrossAttention. Query dim is 1280, context_dim is None and using 20 heads.
Setting up MemoryEfficientCrossAttention. Query dim is 1280, context_dim is 1024 and using 20 heads.
Setting up MemoryEfficientCrossAttention. Query dim is 640, context_dim is None and using 10 heads.
Setting up MemoryEfficientCrossAttention. Query dim is 640, context_dim is 1024 and using 10 heads.
Setting up MemoryEfficientCrossAttention. Query dim is 640, context_dim is None and using 10 heads.
Setting up MemoryEfficientCrossAttention. Query dim is 640, context_dim is 1024 and using 10 heads.
Setting up MemoryEfficientCrossAttention. Query dim is 640, context_dim is None and using 10 heads.
Setting up MemoryEfficientCrossAttention. Query dim is 640, context_dim is 1024 and using 10 heads.
Setting up MemoryEfficientCrossAttention. Query dim is 320, context_dim is None and using 5 heads.
Setting up MemoryEfficientCrossAttention. Query dim is 320, context_dim is 1024 and using 5 heads.
Setting up MemoryEfficientCrossAttention. Query dim is 320, context_dim is None and using 5 heads.
Setting up MemoryEfficientCrossAttention. Query dim is 320, context_dim is 1024 and using 5 heads.
Setting up MemoryEfficientCrossAttention. Query dim is 320, context_dim is None and using 5 heads.
Setting up MemoryEfficientCrossAttention. Query dim is 320, context_dim is 1024 and using 5 heads.
DiffusionWrapper has 865.91 M params.
making attention of type 'vanilla-xformers' with 512 in_channels
building MemoryEfficientAttnBlock with 512 in_channels...
Working with z of shape (1, 4, 32, 32) = 4096 dimensions.
making attention of type 'vanilla-xformers' with 512 in_channels
building MemoryEfficientAttnBlock with 512 in_channels...
Setting up MemoryEfficientCrossAttention. Query dim is 320, context_dim is None and using 5 heads.
Setting up MemoryEfficientCrossAttention. Query dim is 320, context_dim is 1024 and using 5 heads.
Setting up MemoryEfficientCrossAttention. Query dim is 320, context_dim is None and using 5 heads.
Setting up MemoryEfficientCrossAttention. Query dim is 320, context_dim is 1024 and using 5 heads.
Setting up MemoryEfficientCrossAttention. Query dim is 640, context_dim is None and using 10 heads.
Setting up MemoryEfficientCrossAttention. Query dim is 640, context_dim is 1024 and using 10 heads.
Setting up MemoryEfficientCrossAttention. Query dim is 640, context_dim is None and using 10 heads.
Setting up MemoryEfficientCrossAttention. Query dim is 640, context_dim is 1024 and using 10 heads.
Setting up MemoryEfficientCrossAttention. Query dim is 1280, context_dim is None and using 20 heads.
Setting up MemoryEfficientCrossAttention. Query dim is 1280, context_dim is 1024 and using 20 heads.
Setting up MemoryEfficientCrossAttention. Query dim is 1280, context_dim is None and using 20 heads.
Setting up MemoryEfficientCrossAttention. Query dim is 1280, context_dim is 1024 and using 20 heads.
Setting up MemoryEfficientCrossAttention. Query dim is 1280, context_dim is None and using 20 heads.
Setting up MemoryEfficientCrossAttention. Query dim is 1280, context_dim is 1024 and using 20 heads.
Setting up [LPIPS] perceptual loss: trunk [alex], v[0.1], spatial [off]
Loading model from: /root/.pyenv/versions/3.9.19/lib/python3.9/site-packages/lpips/weights/v0.1/alex.pth
reload swinir model from weights/face_swinir_v1.ckpt
ENABLE XFORMERS!
Model successfully switched to 'FACE' mode.
{'bg_tile': 400,
'bg_tile_stride': 400,
'bg_upsampler': 'RealESRGAN',
'ckpt': 'weights/face_full_v1.ckpt',
'color_fix_type': 'wavelet',
'config': 'configs/model/cldm.yaml',
'detection_model': 'retinaface_resnet50',
'device': 'cuda',
'disable_preprocess_model': False,
'g_repeat': 5,
'g_scale': 0.0,
'g_space': 'latent',
'g_t_start': 1001,
'g_t_stop': -1,
'has_aligned': False,
'image_size': 512,
'input': '/tmp/tmpcdoq1b8cC.L.A.I.R.E._everhart_s.jpg',
'only_center_face': False,
'output': '.',
'reload_swinir': False,
'repeat_times': 1,
'seed': 231,
'show_lq': False,
'skip_if_exist': False,
'sr_scale': 2,
'steps': 50,
'swinir_ckpt': 'weights/face_swinir_v1.ckpt',
'tile_size': 512,
'tile_stride': 256,
'tiled': False,
'use_guidance': False}
Global seed set to 231
/root/.pyenv/versions/3.9.19/lib/python3.9/site-packages/torchvision/models/_utils.py:223: UserWarning: Arguments other than a weight enum or `None` for 'weights' are deprecated since 0.13 and may be removed in the future. The current behavior is equivalent to passing `weights=None`.
warnings.warn(msg)
Downloading: "https://github.com/xinntao/facexlib/releases/download/v0.1.0/detection_Resnet50_Final.pth" to /root/.pyenv/versions/3.9.19/lib/python3.9/site-packages/facexlib/weights/detection_Resnet50_Final.pth
0%| | 0.00/104M [00:00<?, ?B/s]
39%|███▉ | 40.5M/104M [00:00<00:00, 425MB/s]
78%|███████▊ | 81.1M/104M [00:00<00:00, 98.1MB/s]
100%|██████████| 104M/104M [00:00<00:00, 127MB/s]
Downloading: "https://github.com/xinntao/facexlib/releases/download/v0.2.2/parsing_parsenet.pth" to /root/.pyenv/versions/3.9.19/lib/python3.9/site-packages/facexlib/weights/parsing_parsenet.pth
0%| | 0.00/81.4M [00:00<?, ?B/s]
54%|█████▍ | 43.8M/81.4M [00:00<00:00, 459MB/s]
100%|██████████| 81.4M/81.4M [00:00<00:00, 464MB/s]
Loading RealESRGAN_x2plus.pth for background upsampling...
Downloading: "https://github.com/xinntao/Real-ESRGAN/releases/download/v0.2.1/RealESRGAN_x2plus.pth" to /src/weights/realesrgan/RealESRGAN_x2plus.pth
0%| | 0.00/64.0M [00:00<?, ?B/s]
52%|█████▏ | 33.4M/64.0M [00:00<00:00, 351MB/s]
100%|██████████| 64.0M/64.0M [00:00<00:00, 386MB/s]
timesteps used in spaced sampler:
[0, 20, 41, 61, 82, 102, 122, 143, 163, 183, 204, 224, 245, 265, 285, 306, 326, 347, 367, 387, 408, 428, 449, 469, 489, 510, 530, 550, 571, 591, 612, 632, 652, 673, 693, 714, 734, 754, 775, 795, 816, 836, 856, 877, 897, 917, 938, 958, 979, 999]
Spaced Sampler: 0%| | 0/50 [00:00<?, ?it/s]
Spaced Sampler: 2%|▏ | 1/50 [00:00<00:10, 4.79it/s]
Spaced Sampler: 6%|▌ | 3/50 [00:00<00:05, 8.70it/s]
Spaced Sampler: 10%|█ | 5/50 [00:00<00:04, 10.19it/s]
Spaced Sampler: 14%|█▍ | 7/50 [00:00<00:03, 10.93it/s]
Spaced Sampler: 18%|█▊ | 9/50 [00:00<00:03, 11.34it/s]
Spaced Sampler: 22%|██▏ | 11/50 [00:01<00:03, 11.59it/s]
Spaced Sampler: 26%|██▌ | 13/50 [00:01<00:03, 11.81it/s]
Spaced Sampler: 30%|███ | 15/50 [00:01<00:02, 11.96it/s]
Spaced Sampler: 34%|███▍ | 17/50 [00:01<00:02, 12.07it/s]
Spaced Sampler: 38%|███▊ | 19/50 [00:01<00:02, 12.13it/s]
Spaced Sampler: 42%|████▏ | 21/50 [00:01<00:02, 12.17it/s]
Spaced Sampler: 46%|████▌ | 23/50 [00:02<00:02, 12.14it/s]
Spaced Sampler: 50%|█████ | 25/50 [00:02<00:02, 12.13it/s]
Spaced Sampler: 54%|█████▍ | 27/50 [00:02<00:01, 12.11it/s]
Spaced Sampler: 58%|█████▊ | 29/50 [00:02<00:01, 12.10it/s]
Spaced Sampler: 62%|██████▏ | 31/50 [00:02<00:01, 12.07it/s]
Spaced Sampler: 66%|██████▌ | 33/50 [00:02<00:01, 12.08it/s]
Spaced Sampler: 70%|███████ | 35/50 [00:03<00:01, 12.06it/s]
Spaced Sampler: 74%|███████▍ | 37/50 [00:03<00:01, 12.05it/s]
Spaced Sampler: 78%|███████▊ | 39/50 [00:03<00:00, 11.95it/s]
Spaced Sampler: 82%|████████▏ | 41/50 [00:03<00:00, 11.97it/s]
Spaced Sampler: 86%|████████▌ | 43/50 [00:03<00:00, 12.01it/s]
Spaced Sampler: 90%|█████████ | 45/50 [00:03<00:00, 12.06it/s]
Spaced Sampler: 94%|█████████▍| 47/50 [00:04<00:00, 12.12it/s]
Spaced Sampler: 98%|█████████▊| 49/50 [00:04<00:00, 12.17it/s]
Spaced Sampler: 100%|██████████| 50/50 [00:04<00:00, 11.77it/s]
upsampling the background image using RealESRGAN...
Face image tmpcdoq1b8cC.L.A.I.R.E._everhart_s saved to ./..