Readme
This model doesn't have a readme.
DiffBIR: Towards Blind Image Restoration with Generative Diffusion Prior
Run this model in Node.js with one line of code:
npm install replicate
REPLICATE_API_TOKEN
environment variable:export REPLICATE_API_TOKEN=<paste-your-token-here>
Find your API token in your account settings.
import Replicate from "replicate";
import fs from "node:fs";
const replicate = new Replicate({
auth: process.env.REPLICATE_API_TOKEN,
});
Run astramlco/diffbir using Replicate’s API. Check out the model's schema for an overview of inputs and outputs.
const output = await replicate.run(
"astramlco/diffbir:f7a6e7832fee8d2593be566723295b80ed14b424f8365f8647e19775f617e205",
{
input: {
seed: 231,
input: "https://replicate.delivery/pbxt/Ketx7Rc2HPkgBgsBAmZ14gWWKc6aiKsA783giEEf4qiHDtOb/C.L.A.I.R.E._everhart_s.jpg",
steps: 50,
tiled: false,
tile_size: 512,
has_aligned: false,
tile_stride: 256,
repeat_times: 1,
use_guidance: false,
color_fix_type: "wavelet",
guidance_scale: 0,
guidance_space: "latent",
guidance_repeat: 5,
only_center_face: false,
guidance_time_stop: -1,
guidance_time_start: 1001,
background_upsampler: "RealESRGAN",
face_detection_model: "retinaface_resnet50",
upscaling_model_type: "faces",
restoration_model_type: "faces",
super_resolution_factor: 2,
disable_preprocess_model: false,
reload_restoration_model: false,
background_upsampler_tile: 400,
background_upsampler_tile_stride: 400
}
}
);
// To access the file URL:
console.log(output[0].url()); //=> "http://example.com"
// To write the file to disk:
fs.writeFile("my-image.png", output[0]);
To learn more, take a look at the guide on getting started with Node.js.
pip install replicate
REPLICATE_API_TOKEN
environment variable:export REPLICATE_API_TOKEN=<paste-your-token-here>
Find your API token in your account settings.
import replicate
Run astramlco/diffbir using Replicate’s API. Check out the model's schema for an overview of inputs and outputs.
output = replicate.run(
"astramlco/diffbir:f7a6e7832fee8d2593be566723295b80ed14b424f8365f8647e19775f617e205",
input={
"seed": 231,
"input": "https://replicate.delivery/pbxt/Ketx7Rc2HPkgBgsBAmZ14gWWKc6aiKsA783giEEf4qiHDtOb/C.L.A.I.R.E._everhart_s.jpg",
"steps": 50,
"tiled": False,
"tile_size": 512,
"has_aligned": False,
"tile_stride": 256,
"repeat_times": 1,
"use_guidance": False,
"color_fix_type": "wavelet",
"guidance_scale": 0,
"guidance_space": "latent",
"guidance_repeat": 5,
"only_center_face": False,
"guidance_time_stop": -1,
"guidance_time_start": 1001,
"background_upsampler": "RealESRGAN",
"face_detection_model": "retinaface_resnet50",
"upscaling_model_type": "faces",
"restoration_model_type": "faces",
"super_resolution_factor": 2,
"disable_preprocess_model": False,
"reload_restoration_model": False,
"background_upsampler_tile": 400,
"background_upsampler_tile_stride": 400
}
)
# To access the file URL:
print(output[0].url())
#=> "http://example.com"
# To write the file to disk:
with open("my-image.png", "wb") as file:
file.write(output[0].read())
To learn more, take a look at the guide on getting started with Python.
REPLICATE_API_TOKEN
environment variable:export REPLICATE_API_TOKEN=<paste-your-token-here>
Find your API token in your account settings.
Run astramlco/diffbir using Replicate’s API. Check out the model's schema for an overview of inputs and outputs.
curl -s -X POST \
-H "Authorization: Bearer $REPLICATE_API_TOKEN" \
-H "Content-Type: application/json" \
-H "Prefer: wait" \
-d $'{
"version": "astramlco/diffbir:f7a6e7832fee8d2593be566723295b80ed14b424f8365f8647e19775f617e205",
"input": {
"seed": 231,
"input": "https://replicate.delivery/pbxt/Ketx7Rc2HPkgBgsBAmZ14gWWKc6aiKsA783giEEf4qiHDtOb/C.L.A.I.R.E._everhart_s.jpg",
"steps": 50,
"tiled": false,
"tile_size": 512,
"has_aligned": false,
"tile_stride": 256,
"repeat_times": 1,
"use_guidance": false,
"color_fix_type": "wavelet",
"guidance_scale": 0,
"guidance_space": "latent",
"guidance_repeat": 5,
"only_center_face": false,
"guidance_time_stop": -1,
"guidance_time_start": 1001,
"background_upsampler": "RealESRGAN",
"face_detection_model": "retinaface_resnet50",
"upscaling_model_type": "faces",
"restoration_model_type": "faces",
"super_resolution_factor": 2,
"disable_preprocess_model": false,
"reload_restoration_model": false,
"background_upsampler_tile": 400,
"background_upsampler_tile_stride": 400
}
}' \
https://api.replicate.com/v1/predictions
To learn more, take a look at Replicate’s HTTP API reference docs.
Add a payment method to run this model.
By signing in, you agree to our
terms of service and privacy policy
{
"completed_at": "2024-03-30T07:49:57.477422Z",
"created_at": "2024-03-30T07:46:10.719345Z",
"data_removed": false,
"error": null,
"id": "lg6q6ydbytjofvmc7o474gdleu",
"input": {
"seed": 231,
"input": "https://replicate.delivery/pbxt/Ketx7Rc2HPkgBgsBAmZ14gWWKc6aiKsA783giEEf4qiHDtOb/C.L.A.I.R.E._everhart_s.jpg",
"steps": 50,
"tiled": false,
"tile_size": 512,
"has_aligned": false,
"tile_stride": 256,
"repeat_times": 1,
"use_guidance": false,
"color_fix_type": "wavelet",
"guidance_scale": 0,
"guidance_space": "latent",
"guidance_repeat": 5,
"only_center_face": false,
"guidance_time_stop": -1,
"guidance_time_start": 1001,
"background_upsampler": "RealESRGAN",
"face_detection_model": "retinaface_resnet50",
"upscaling_model_type": "faces",
"restoration_model_type": "faces",
"super_resolution_factor": 2,
"disable_preprocess_model": false,
"reload_restoration_model": false,
"background_upsampler_tile": 400,
"background_upsampler_tile_stride": 400
},
"logs": "ckptckptckpt weights/face_full_v1.ckpt\nSwitching from mode 'FULL' to 'FACE'...\nBuilding and loading 'FACE' mode model...\nControlLDM: Running in eps-prediction mode\nSetting up MemoryEfficientCrossAttention. Query dim is 320, context_dim is None and using 5 heads.\nSetting up MemoryEfficientCrossAttention. Query dim is 320, context_dim is 1024 and using 5 heads.\nSetting up MemoryEfficientCrossAttention. Query dim is 320, context_dim is None and using 5 heads.\nSetting up MemoryEfficientCrossAttention. Query dim is 320, context_dim is 1024 and using 5 heads.\nSetting up MemoryEfficientCrossAttention. Query dim is 640, context_dim is None and using 10 heads.\nSetting up MemoryEfficientCrossAttention. Query dim is 640, context_dim is 1024 and using 10 heads.\nSetting up MemoryEfficientCrossAttention. Query dim is 640, context_dim is None and using 10 heads.\nSetting up MemoryEfficientCrossAttention. Query dim is 640, context_dim is 1024 and using 10 heads.\nSetting up MemoryEfficientCrossAttention. Query dim is 1280, context_dim is None and using 20 heads.\nSetting up MemoryEfficientCrossAttention. Query dim is 1280, context_dim is 1024 and using 20 heads.\nSetting up MemoryEfficientCrossAttention. Query dim is 1280, context_dim is None and using 20 heads.\nSetting up MemoryEfficientCrossAttention. Query dim is 1280, context_dim is 1024 and using 20 heads.\nSetting up MemoryEfficientCrossAttention. Query dim is 1280, context_dim is None and using 20 heads.\nSetting up MemoryEfficientCrossAttention. Query dim is 1280, context_dim is 1024 and using 20 heads.\nSetting up MemoryEfficientCrossAttention. Query dim is 1280, context_dim is None and using 20 heads.\nSetting up MemoryEfficientCrossAttention. Query dim is 1280, context_dim is 1024 and using 20 heads.\nSetting up MemoryEfficientCrossAttention. Query dim is 1280, context_dim is None and using 20 heads.\nSetting up MemoryEfficientCrossAttention. Query dim is 1280, context_dim is 1024 and using 20 heads.\nSetting up MemoryEfficientCrossAttention. Query dim is 1280, context_dim is None and using 20 heads.\nSetting up MemoryEfficientCrossAttention. Query dim is 1280, context_dim is 1024 and using 20 heads.\nSetting up MemoryEfficientCrossAttention. Query dim is 640, context_dim is None and using 10 heads.\nSetting up MemoryEfficientCrossAttention. Query dim is 640, context_dim is 1024 and using 10 heads.\nSetting up MemoryEfficientCrossAttention. Query dim is 640, context_dim is None and using 10 heads.\nSetting up MemoryEfficientCrossAttention. Query dim is 640, context_dim is 1024 and using 10 heads.\nSetting up MemoryEfficientCrossAttention. Query dim is 640, context_dim is None and using 10 heads.\nSetting up MemoryEfficientCrossAttention. Query dim is 640, context_dim is 1024 and using 10 heads.\nSetting up MemoryEfficientCrossAttention. Query dim is 320, context_dim is None and using 5 heads.\nSetting up MemoryEfficientCrossAttention. Query dim is 320, context_dim is 1024 and using 5 heads.\nSetting up MemoryEfficientCrossAttention. Query dim is 320, context_dim is None and using 5 heads.\nSetting up MemoryEfficientCrossAttention. Query dim is 320, context_dim is 1024 and using 5 heads.\nSetting up MemoryEfficientCrossAttention. Query dim is 320, context_dim is None and using 5 heads.\nSetting up MemoryEfficientCrossAttention. Query dim is 320, context_dim is 1024 and using 5 heads.\nDiffusionWrapper has 865.91 M params.\nmaking attention of type 'vanilla-xformers' with 512 in_channels\nbuilding MemoryEfficientAttnBlock with 512 in_channels...\nWorking with z of shape (1, 4, 32, 32) = 4096 dimensions.\nmaking attention of type 'vanilla-xformers' with 512 in_channels\nbuilding MemoryEfficientAttnBlock with 512 in_channels...\nSetting up MemoryEfficientCrossAttention. Query dim is 320, context_dim is None and using 5 heads.\nSetting up MemoryEfficientCrossAttention. Query dim is 320, context_dim is 1024 and using 5 heads.\nSetting up MemoryEfficientCrossAttention. Query dim is 320, context_dim is None and using 5 heads.\nSetting up MemoryEfficientCrossAttention. Query dim is 320, context_dim is 1024 and using 5 heads.\nSetting up MemoryEfficientCrossAttention. Query dim is 640, context_dim is None and using 10 heads.\nSetting up MemoryEfficientCrossAttention. Query dim is 640, context_dim is 1024 and using 10 heads.\nSetting up MemoryEfficientCrossAttention. Query dim is 640, context_dim is None and using 10 heads.\nSetting up MemoryEfficientCrossAttention. Query dim is 640, context_dim is 1024 and using 10 heads.\nSetting up MemoryEfficientCrossAttention. Query dim is 1280, context_dim is None and using 20 heads.\nSetting up MemoryEfficientCrossAttention. Query dim is 1280, context_dim is 1024 and using 20 heads.\nSetting up MemoryEfficientCrossAttention. Query dim is 1280, context_dim is None and using 20 heads.\nSetting up MemoryEfficientCrossAttention. Query dim is 1280, context_dim is 1024 and using 20 heads.\nSetting up MemoryEfficientCrossAttention. Query dim is 1280, context_dim is None and using 20 heads.\nSetting up MemoryEfficientCrossAttention. Query dim is 1280, context_dim is 1024 and using 20 heads.\nSetting up [LPIPS] perceptual loss: trunk [alex], v[0.1], spatial [off]\nLoading model from: /root/.pyenv/versions/3.9.19/lib/python3.9/site-packages/lpips/weights/v0.1/alex.pth\nreload swinir model from weights/face_swinir_v1.ckpt\nENABLE XFORMERS!\nModel successfully switched to 'FACE' mode.\n{'bg_tile': 400,\n'bg_tile_stride': 400,\n'bg_upsampler': 'RealESRGAN',\n'ckpt': 'weights/face_full_v1.ckpt',\n'color_fix_type': 'wavelet',\n'config': 'configs/model/cldm.yaml',\n'detection_model': 'retinaface_resnet50',\n'device': 'cuda',\n'disable_preprocess_model': False,\n'g_repeat': 5,\n'g_scale': 0.0,\n'g_space': 'latent',\n'g_t_start': 1001,\n'g_t_stop': -1,\n'has_aligned': False,\n'image_size': 512,\n'input': '/tmp/tmpcdoq1b8cC.L.A.I.R.E._everhart_s.jpg',\n'only_center_face': False,\n'output': '.',\n'reload_swinir': False,\n'repeat_times': 1,\n'seed': 231,\n'show_lq': False,\n'skip_if_exist': False,\n'sr_scale': 2,\n'steps': 50,\n'swinir_ckpt': 'weights/face_swinir_v1.ckpt',\n'tile_size': 512,\n'tile_stride': 256,\n'tiled': False,\n'use_guidance': False}\nGlobal seed set to 231\n/root/.pyenv/versions/3.9.19/lib/python3.9/site-packages/torchvision/models/_utils.py:223: UserWarning: Arguments other than a weight enum or `None` for 'weights' are deprecated since 0.13 and may be removed in the future. The current behavior is equivalent to passing `weights=None`.\nwarnings.warn(msg)\nDownloading: \"https://github.com/xinntao/facexlib/releases/download/v0.1.0/detection_Resnet50_Final.pth\" to /root/.pyenv/versions/3.9.19/lib/python3.9/site-packages/facexlib/weights/detection_Resnet50_Final.pth\n 0%| | 0.00/104M [00:00<?, ?B/s]\n 39%|███▉ | 40.5M/104M [00:00<00:00, 425MB/s]\n 78%|███████▊ | 81.1M/104M [00:00<00:00, 98.1MB/s]\n100%|██████████| 104M/104M [00:00<00:00, 127MB/s]\nDownloading: \"https://github.com/xinntao/facexlib/releases/download/v0.2.2/parsing_parsenet.pth\" to /root/.pyenv/versions/3.9.19/lib/python3.9/site-packages/facexlib/weights/parsing_parsenet.pth\n 0%| | 0.00/81.4M [00:00<?, ?B/s]\n 54%|█████▍ | 43.8M/81.4M [00:00<00:00, 459MB/s]\n100%|██████████| 81.4M/81.4M [00:00<00:00, 464MB/s]\nLoading RealESRGAN_x2plus.pth for background upsampling...\nDownloading: \"https://github.com/xinntao/Real-ESRGAN/releases/download/v0.2.1/RealESRGAN_x2plus.pth\" to /src/weights/realesrgan/RealESRGAN_x2plus.pth\n 0%| | 0.00/64.0M [00:00<?, ?B/s]\n 52%|█████▏ | 33.4M/64.0M [00:00<00:00, 351MB/s]\n100%|██████████| 64.0M/64.0M [00:00<00:00, 386MB/s]\ntimesteps used in spaced sampler:\n[0, 20, 41, 61, 82, 102, 122, 143, 163, 183, 204, 224, 245, 265, 285, 306, 326, 347, 367, 387, 408, 428, 449, 469, 489, 510, 530, 550, 571, 591, 612, 632, 652, 673, 693, 714, 734, 754, 775, 795, 816, 836, 856, 877, 897, 917, 938, 958, 979, 999]\nSpaced Sampler: 0%| | 0/50 [00:00<?, ?it/s]\nSpaced Sampler: 2%|▏ | 1/50 [00:00<00:10, 4.79it/s]\nSpaced Sampler: 6%|▌ | 3/50 [00:00<00:05, 8.70it/s]\nSpaced Sampler: 10%|█ | 5/50 [00:00<00:04, 10.19it/s]\nSpaced Sampler: 14%|█▍ | 7/50 [00:00<00:03, 10.93it/s]\nSpaced Sampler: 18%|█▊ | 9/50 [00:00<00:03, 11.34it/s]\nSpaced Sampler: 22%|██▏ | 11/50 [00:01<00:03, 11.59it/s]\nSpaced Sampler: 26%|██▌ | 13/50 [00:01<00:03, 11.81it/s]\nSpaced Sampler: 30%|███ | 15/50 [00:01<00:02, 11.96it/s]\nSpaced Sampler: 34%|███▍ | 17/50 [00:01<00:02, 12.07it/s]\nSpaced Sampler: 38%|███▊ | 19/50 [00:01<00:02, 12.13it/s]\nSpaced Sampler: 42%|████▏ | 21/50 [00:01<00:02, 12.17it/s]\nSpaced Sampler: 46%|████▌ | 23/50 [00:02<00:02, 12.14it/s]\nSpaced Sampler: 50%|█████ | 25/50 [00:02<00:02, 12.13it/s]\nSpaced Sampler: 54%|█████▍ | 27/50 [00:02<00:01, 12.11it/s]\nSpaced Sampler: 58%|█████▊ | 29/50 [00:02<00:01, 12.10it/s]\nSpaced Sampler: 62%|██████▏ | 31/50 [00:02<00:01, 12.07it/s]\nSpaced Sampler: 66%|██████▌ | 33/50 [00:02<00:01, 12.08it/s]\nSpaced Sampler: 70%|███████ | 35/50 [00:03<00:01, 12.06it/s]\nSpaced Sampler: 74%|███████▍ | 37/50 [00:03<00:01, 12.05it/s]\nSpaced Sampler: 78%|███████▊ | 39/50 [00:03<00:00, 11.95it/s]\nSpaced Sampler: 82%|████████▏ | 41/50 [00:03<00:00, 11.97it/s]\nSpaced Sampler: 86%|████████▌ | 43/50 [00:03<00:00, 12.01it/s]\nSpaced Sampler: 90%|█████████ | 45/50 [00:03<00:00, 12.06it/s]\nSpaced Sampler: 94%|█████████▍| 47/50 [00:04<00:00, 12.12it/s]\nSpaced Sampler: 98%|█████████▊| 49/50 [00:04<00:00, 12.17it/s]\nSpaced Sampler: 100%|██████████| 50/50 [00:04<00:00, 11.77it/s]\nupsampling the background image using RealESRGAN...\nFace image tmpcdoq1b8cC.L.A.I.R.E._everhart_s saved to ./..",
"metrics": {
"predict_time": 43.867453,
"total_time": 226.758077
},
"output": [
"https://replicate.delivery/pbxt/ZebgzumoHLy2ZCL5ppfJnICvW5SBKGZ2emNQxPeS35KNidVKB/tmpcdoq1b8cC.L.A.I.R.E._everhart_s_00.png",
"https://replicate.delivery/pbxt/e1wupnukQOzFYaCzdEemITU2qoHPTUcDRHyfiWepxncNidVKB/tmpcdoq1b8cC.L.A.I.R.E._everhart_s_00.png",
"https://replicate.delivery/pbxt/a35MV4BcyI7wEdbFav8hrbX8CuOt3IU8jEr39S6E9geSsrSJA/tmpcdoq1b8cC.L.A.I.R.E._everhart_s.png"
],
"started_at": "2024-03-30T07:49:13.609969Z",
"status": "succeeded",
"urls": {
"get": "https://api.replicate.com/v1/predictions/lg6q6ydbytjofvmc7o474gdleu",
"cancel": "https://api.replicate.com/v1/predictions/lg6q6ydbytjofvmc7o474gdleu/cancel"
},
"version": "f7a6e7832fee8d2593be566723295b80ed14b424f8365f8647e19775f617e205"
}
ckptckptckpt weights/face_full_v1.ckpt
Switching from mode 'FULL' to 'FACE'...
Building and loading 'FACE' mode model...
ControlLDM: Running in eps-prediction mode
Setting up MemoryEfficientCrossAttention. Query dim is 320, context_dim is None and using 5 heads.
Setting up MemoryEfficientCrossAttention. Query dim is 320, context_dim is 1024 and using 5 heads.
Setting up MemoryEfficientCrossAttention. Query dim is 320, context_dim is None and using 5 heads.
Setting up MemoryEfficientCrossAttention. Query dim is 320, context_dim is 1024 and using 5 heads.
Setting up MemoryEfficientCrossAttention. Query dim is 640, context_dim is None and using 10 heads.
Setting up MemoryEfficientCrossAttention. Query dim is 640, context_dim is 1024 and using 10 heads.
Setting up MemoryEfficientCrossAttention. Query dim is 640, context_dim is None and using 10 heads.
Setting up MemoryEfficientCrossAttention. Query dim is 640, context_dim is 1024 and using 10 heads.
Setting up MemoryEfficientCrossAttention. Query dim is 1280, context_dim is None and using 20 heads.
Setting up MemoryEfficientCrossAttention. Query dim is 1280, context_dim is 1024 and using 20 heads.
Setting up MemoryEfficientCrossAttention. Query dim is 1280, context_dim is None and using 20 heads.
Setting up MemoryEfficientCrossAttention. Query dim is 1280, context_dim is 1024 and using 20 heads.
Setting up MemoryEfficientCrossAttention. Query dim is 1280, context_dim is None and using 20 heads.
Setting up MemoryEfficientCrossAttention. Query dim is 1280, context_dim is 1024 and using 20 heads.
Setting up MemoryEfficientCrossAttention. Query dim is 1280, context_dim is None and using 20 heads.
Setting up MemoryEfficientCrossAttention. Query dim is 1280, context_dim is 1024 and using 20 heads.
Setting up MemoryEfficientCrossAttention. Query dim is 1280, context_dim is None and using 20 heads.
Setting up MemoryEfficientCrossAttention. Query dim is 1280, context_dim is 1024 and using 20 heads.
Setting up MemoryEfficientCrossAttention. Query dim is 1280, context_dim is None and using 20 heads.
Setting up MemoryEfficientCrossAttention. Query dim is 1280, context_dim is 1024 and using 20 heads.
Setting up MemoryEfficientCrossAttention. Query dim is 640, context_dim is None and using 10 heads.
Setting up MemoryEfficientCrossAttention. Query dim is 640, context_dim is 1024 and using 10 heads.
Setting up MemoryEfficientCrossAttention. Query dim is 640, context_dim is None and using 10 heads.
Setting up MemoryEfficientCrossAttention. Query dim is 640, context_dim is 1024 and using 10 heads.
Setting up MemoryEfficientCrossAttention. Query dim is 640, context_dim is None and using 10 heads.
Setting up MemoryEfficientCrossAttention. Query dim is 640, context_dim is 1024 and using 10 heads.
Setting up MemoryEfficientCrossAttention. Query dim is 320, context_dim is None and using 5 heads.
Setting up MemoryEfficientCrossAttention. Query dim is 320, context_dim is 1024 and using 5 heads.
Setting up MemoryEfficientCrossAttention. Query dim is 320, context_dim is None and using 5 heads.
Setting up MemoryEfficientCrossAttention. Query dim is 320, context_dim is 1024 and using 5 heads.
Setting up MemoryEfficientCrossAttention. Query dim is 320, context_dim is None and using 5 heads.
Setting up MemoryEfficientCrossAttention. Query dim is 320, context_dim is 1024 and using 5 heads.
DiffusionWrapper has 865.91 M params.
making attention of type 'vanilla-xformers' with 512 in_channels
building MemoryEfficientAttnBlock with 512 in_channels...
Working with z of shape (1, 4, 32, 32) = 4096 dimensions.
making attention of type 'vanilla-xformers' with 512 in_channels
building MemoryEfficientAttnBlock with 512 in_channels...
Setting up MemoryEfficientCrossAttention. Query dim is 320, context_dim is None and using 5 heads.
Setting up MemoryEfficientCrossAttention. Query dim is 320, context_dim is 1024 and using 5 heads.
Setting up MemoryEfficientCrossAttention. Query dim is 320, context_dim is None and using 5 heads.
Setting up MemoryEfficientCrossAttention. Query dim is 320, context_dim is 1024 and using 5 heads.
Setting up MemoryEfficientCrossAttention. Query dim is 640, context_dim is None and using 10 heads.
Setting up MemoryEfficientCrossAttention. Query dim is 640, context_dim is 1024 and using 10 heads.
Setting up MemoryEfficientCrossAttention. Query dim is 640, context_dim is None and using 10 heads.
Setting up MemoryEfficientCrossAttention. Query dim is 640, context_dim is 1024 and using 10 heads.
Setting up MemoryEfficientCrossAttention. Query dim is 1280, context_dim is None and using 20 heads.
Setting up MemoryEfficientCrossAttention. Query dim is 1280, context_dim is 1024 and using 20 heads.
Setting up MemoryEfficientCrossAttention. Query dim is 1280, context_dim is None and using 20 heads.
Setting up MemoryEfficientCrossAttention. Query dim is 1280, context_dim is 1024 and using 20 heads.
Setting up MemoryEfficientCrossAttention. Query dim is 1280, context_dim is None and using 20 heads.
Setting up MemoryEfficientCrossAttention. Query dim is 1280, context_dim is 1024 and using 20 heads.
Setting up [LPIPS] perceptual loss: trunk [alex], v[0.1], spatial [off]
Loading model from: /root/.pyenv/versions/3.9.19/lib/python3.9/site-packages/lpips/weights/v0.1/alex.pth
reload swinir model from weights/face_swinir_v1.ckpt
ENABLE XFORMERS!
Model successfully switched to 'FACE' mode.
{'bg_tile': 400,
'bg_tile_stride': 400,
'bg_upsampler': 'RealESRGAN',
'ckpt': 'weights/face_full_v1.ckpt',
'color_fix_type': 'wavelet',
'config': 'configs/model/cldm.yaml',
'detection_model': 'retinaface_resnet50',
'device': 'cuda',
'disable_preprocess_model': False,
'g_repeat': 5,
'g_scale': 0.0,
'g_space': 'latent',
'g_t_start': 1001,
'g_t_stop': -1,
'has_aligned': False,
'image_size': 512,
'input': '/tmp/tmpcdoq1b8cC.L.A.I.R.E._everhart_s.jpg',
'only_center_face': False,
'output': '.',
'reload_swinir': False,
'repeat_times': 1,
'seed': 231,
'show_lq': False,
'skip_if_exist': False,
'sr_scale': 2,
'steps': 50,
'swinir_ckpt': 'weights/face_swinir_v1.ckpt',
'tile_size': 512,
'tile_stride': 256,
'tiled': False,
'use_guidance': False}
Global seed set to 231
/root/.pyenv/versions/3.9.19/lib/python3.9/site-packages/torchvision/models/_utils.py:223: UserWarning: Arguments other than a weight enum or `None` for 'weights' are deprecated since 0.13 and may be removed in the future. The current behavior is equivalent to passing `weights=None`.
warnings.warn(msg)
Downloading: "https://github.com/xinntao/facexlib/releases/download/v0.1.0/detection_Resnet50_Final.pth" to /root/.pyenv/versions/3.9.19/lib/python3.9/site-packages/facexlib/weights/detection_Resnet50_Final.pth
0%| | 0.00/104M [00:00<?, ?B/s]
39%|███▉ | 40.5M/104M [00:00<00:00, 425MB/s]
78%|███████▊ | 81.1M/104M [00:00<00:00, 98.1MB/s]
100%|██████████| 104M/104M [00:00<00:00, 127MB/s]
Downloading: "https://github.com/xinntao/facexlib/releases/download/v0.2.2/parsing_parsenet.pth" to /root/.pyenv/versions/3.9.19/lib/python3.9/site-packages/facexlib/weights/parsing_parsenet.pth
0%| | 0.00/81.4M [00:00<?, ?B/s]
54%|█████▍ | 43.8M/81.4M [00:00<00:00, 459MB/s]
100%|██████████| 81.4M/81.4M [00:00<00:00, 464MB/s]
Loading RealESRGAN_x2plus.pth for background upsampling...
Downloading: "https://github.com/xinntao/Real-ESRGAN/releases/download/v0.2.1/RealESRGAN_x2plus.pth" to /src/weights/realesrgan/RealESRGAN_x2plus.pth
0%| | 0.00/64.0M [00:00<?, ?B/s]
52%|█████▏ | 33.4M/64.0M [00:00<00:00, 351MB/s]
100%|██████████| 64.0M/64.0M [00:00<00:00, 386MB/s]
timesteps used in spaced sampler:
[0, 20, 41, 61, 82, 102, 122, 143, 163, 183, 204, 224, 245, 265, 285, 306, 326, 347, 367, 387, 408, 428, 449, 469, 489, 510, 530, 550, 571, 591, 612, 632, 652, 673, 693, 714, 734, 754, 775, 795, 816, 836, 856, 877, 897, 917, 938, 958, 979, 999]
Spaced Sampler: 0%| | 0/50 [00:00<?, ?it/s]
Spaced Sampler: 2%|▏ | 1/50 [00:00<00:10, 4.79it/s]
Spaced Sampler: 6%|▌ | 3/50 [00:00<00:05, 8.70it/s]
Spaced Sampler: 10%|█ | 5/50 [00:00<00:04, 10.19it/s]
Spaced Sampler: 14%|█▍ | 7/50 [00:00<00:03, 10.93it/s]
Spaced Sampler: 18%|█▊ | 9/50 [00:00<00:03, 11.34it/s]
Spaced Sampler: 22%|██▏ | 11/50 [00:01<00:03, 11.59it/s]
Spaced Sampler: 26%|██▌ | 13/50 [00:01<00:03, 11.81it/s]
Spaced Sampler: 30%|███ | 15/50 [00:01<00:02, 11.96it/s]
Spaced Sampler: 34%|███▍ | 17/50 [00:01<00:02, 12.07it/s]
Spaced Sampler: 38%|███▊ | 19/50 [00:01<00:02, 12.13it/s]
Spaced Sampler: 42%|████▏ | 21/50 [00:01<00:02, 12.17it/s]
Spaced Sampler: 46%|████▌ | 23/50 [00:02<00:02, 12.14it/s]
Spaced Sampler: 50%|█████ | 25/50 [00:02<00:02, 12.13it/s]
Spaced Sampler: 54%|█████▍ | 27/50 [00:02<00:01, 12.11it/s]
Spaced Sampler: 58%|█████▊ | 29/50 [00:02<00:01, 12.10it/s]
Spaced Sampler: 62%|██████▏ | 31/50 [00:02<00:01, 12.07it/s]
Spaced Sampler: 66%|██████▌ | 33/50 [00:02<00:01, 12.08it/s]
Spaced Sampler: 70%|███████ | 35/50 [00:03<00:01, 12.06it/s]
Spaced Sampler: 74%|███████▍ | 37/50 [00:03<00:01, 12.05it/s]
Spaced Sampler: 78%|███████▊ | 39/50 [00:03<00:00, 11.95it/s]
Spaced Sampler: 82%|████████▏ | 41/50 [00:03<00:00, 11.97it/s]
Spaced Sampler: 86%|████████▌ | 43/50 [00:03<00:00, 12.01it/s]
Spaced Sampler: 90%|█████████ | 45/50 [00:03<00:00, 12.06it/s]
Spaced Sampler: 94%|█████████▍| 47/50 [00:04<00:00, 12.12it/s]
Spaced Sampler: 98%|█████████▊| 49/50 [00:04<00:00, 12.17it/s]
Spaced Sampler: 100%|██████████| 50/50 [00:04<00:00, 11.77it/s]
upsampling the background image using RealESRGAN...
Face image tmpcdoq1b8cC.L.A.I.R.E._everhart_s saved to ./..
This model costs approximately $0.22 to run on Replicate, or 4 runs per $1, but this varies depending on your inputs. It is also open source and you can run it on your own computer with Docker.
This model runs on Nvidia L40S GPU hardware. Predictions typically complete within 4 minutes. The predict time for this model varies significantly based on the inputs.
This model doesn't have a readme.
This model is cold. You'll get a fast response if the model is warm and already running, and a slower response if the model is cold and starting up.
This model costs approximately $0.22 to run on Replicate, but this varies depending on your inputs. View more.
Choose a file from your machine
Hint: you can also drag files onto the input
ckptckptckpt weights/face_full_v1.ckpt
Switching from mode 'FULL' to 'FACE'...
Building and loading 'FACE' mode model...
ControlLDM: Running in eps-prediction mode
Setting up MemoryEfficientCrossAttention. Query dim is 320, context_dim is None and using 5 heads.
Setting up MemoryEfficientCrossAttention. Query dim is 320, context_dim is 1024 and using 5 heads.
Setting up MemoryEfficientCrossAttention. Query dim is 320, context_dim is None and using 5 heads.
Setting up MemoryEfficientCrossAttention. Query dim is 320, context_dim is 1024 and using 5 heads.
Setting up MemoryEfficientCrossAttention. Query dim is 640, context_dim is None and using 10 heads.
Setting up MemoryEfficientCrossAttention. Query dim is 640, context_dim is 1024 and using 10 heads.
Setting up MemoryEfficientCrossAttention. Query dim is 640, context_dim is None and using 10 heads.
Setting up MemoryEfficientCrossAttention. Query dim is 640, context_dim is 1024 and using 10 heads.
Setting up MemoryEfficientCrossAttention. Query dim is 1280, context_dim is None and using 20 heads.
Setting up MemoryEfficientCrossAttention. Query dim is 1280, context_dim is 1024 and using 20 heads.
Setting up MemoryEfficientCrossAttention. Query dim is 1280, context_dim is None and using 20 heads.
Setting up MemoryEfficientCrossAttention. Query dim is 1280, context_dim is 1024 and using 20 heads.
Setting up MemoryEfficientCrossAttention. Query dim is 1280, context_dim is None and using 20 heads.
Setting up MemoryEfficientCrossAttention. Query dim is 1280, context_dim is 1024 and using 20 heads.
Setting up MemoryEfficientCrossAttention. Query dim is 1280, context_dim is None and using 20 heads.
Setting up MemoryEfficientCrossAttention. Query dim is 1280, context_dim is 1024 and using 20 heads.
Setting up MemoryEfficientCrossAttention. Query dim is 1280, context_dim is None and using 20 heads.
Setting up MemoryEfficientCrossAttention. Query dim is 1280, context_dim is 1024 and using 20 heads.
Setting up MemoryEfficientCrossAttention. Query dim is 1280, context_dim is None and using 20 heads.
Setting up MemoryEfficientCrossAttention. Query dim is 1280, context_dim is 1024 and using 20 heads.
Setting up MemoryEfficientCrossAttention. Query dim is 640, context_dim is None and using 10 heads.
Setting up MemoryEfficientCrossAttention. Query dim is 640, context_dim is 1024 and using 10 heads.
Setting up MemoryEfficientCrossAttention. Query dim is 640, context_dim is None and using 10 heads.
Setting up MemoryEfficientCrossAttention. Query dim is 640, context_dim is 1024 and using 10 heads.
Setting up MemoryEfficientCrossAttention. Query dim is 640, context_dim is None and using 10 heads.
Setting up MemoryEfficientCrossAttention. Query dim is 640, context_dim is 1024 and using 10 heads.
Setting up MemoryEfficientCrossAttention. Query dim is 320, context_dim is None and using 5 heads.
Setting up MemoryEfficientCrossAttention. Query dim is 320, context_dim is 1024 and using 5 heads.
Setting up MemoryEfficientCrossAttention. Query dim is 320, context_dim is None and using 5 heads.
Setting up MemoryEfficientCrossAttention. Query dim is 320, context_dim is 1024 and using 5 heads.
Setting up MemoryEfficientCrossAttention. Query dim is 320, context_dim is None and using 5 heads.
Setting up MemoryEfficientCrossAttention. Query dim is 320, context_dim is 1024 and using 5 heads.
DiffusionWrapper has 865.91 M params.
making attention of type 'vanilla-xformers' with 512 in_channels
building MemoryEfficientAttnBlock with 512 in_channels...
Working with z of shape (1, 4, 32, 32) = 4096 dimensions.
making attention of type 'vanilla-xformers' with 512 in_channels
building MemoryEfficientAttnBlock with 512 in_channels...
Setting up MemoryEfficientCrossAttention. Query dim is 320, context_dim is None and using 5 heads.
Setting up MemoryEfficientCrossAttention. Query dim is 320, context_dim is 1024 and using 5 heads.
Setting up MemoryEfficientCrossAttention. Query dim is 320, context_dim is None and using 5 heads.
Setting up MemoryEfficientCrossAttention. Query dim is 320, context_dim is 1024 and using 5 heads.
Setting up MemoryEfficientCrossAttention. Query dim is 640, context_dim is None and using 10 heads.
Setting up MemoryEfficientCrossAttention. Query dim is 640, context_dim is 1024 and using 10 heads.
Setting up MemoryEfficientCrossAttention. Query dim is 640, context_dim is None and using 10 heads.
Setting up MemoryEfficientCrossAttention. Query dim is 640, context_dim is 1024 and using 10 heads.
Setting up MemoryEfficientCrossAttention. Query dim is 1280, context_dim is None and using 20 heads.
Setting up MemoryEfficientCrossAttention. Query dim is 1280, context_dim is 1024 and using 20 heads.
Setting up MemoryEfficientCrossAttention. Query dim is 1280, context_dim is None and using 20 heads.
Setting up MemoryEfficientCrossAttention. Query dim is 1280, context_dim is 1024 and using 20 heads.
Setting up MemoryEfficientCrossAttention. Query dim is 1280, context_dim is None and using 20 heads.
Setting up MemoryEfficientCrossAttention. Query dim is 1280, context_dim is 1024 and using 20 heads.
Setting up [LPIPS] perceptual loss: trunk [alex], v[0.1], spatial [off]
Loading model from: /root/.pyenv/versions/3.9.19/lib/python3.9/site-packages/lpips/weights/v0.1/alex.pth
reload swinir model from weights/face_swinir_v1.ckpt
ENABLE XFORMERS!
Model successfully switched to 'FACE' mode.
{'bg_tile': 400,
'bg_tile_stride': 400,
'bg_upsampler': 'RealESRGAN',
'ckpt': 'weights/face_full_v1.ckpt',
'color_fix_type': 'wavelet',
'config': 'configs/model/cldm.yaml',
'detection_model': 'retinaface_resnet50',
'device': 'cuda',
'disable_preprocess_model': False,
'g_repeat': 5,
'g_scale': 0.0,
'g_space': 'latent',
'g_t_start': 1001,
'g_t_stop': -1,
'has_aligned': False,
'image_size': 512,
'input': '/tmp/tmpcdoq1b8cC.L.A.I.R.E._everhart_s.jpg',
'only_center_face': False,
'output': '.',
'reload_swinir': False,
'repeat_times': 1,
'seed': 231,
'show_lq': False,
'skip_if_exist': False,
'sr_scale': 2,
'steps': 50,
'swinir_ckpt': 'weights/face_swinir_v1.ckpt',
'tile_size': 512,
'tile_stride': 256,
'tiled': False,
'use_guidance': False}
Global seed set to 231
/root/.pyenv/versions/3.9.19/lib/python3.9/site-packages/torchvision/models/_utils.py:223: UserWarning: Arguments other than a weight enum or `None` for 'weights' are deprecated since 0.13 and may be removed in the future. The current behavior is equivalent to passing `weights=None`.
warnings.warn(msg)
Downloading: "https://github.com/xinntao/facexlib/releases/download/v0.1.0/detection_Resnet50_Final.pth" to /root/.pyenv/versions/3.9.19/lib/python3.9/site-packages/facexlib/weights/detection_Resnet50_Final.pth
0%| | 0.00/104M [00:00<?, ?B/s]
39%|███▉ | 40.5M/104M [00:00<00:00, 425MB/s]
78%|███████▊ | 81.1M/104M [00:00<00:00, 98.1MB/s]
100%|██████████| 104M/104M [00:00<00:00, 127MB/s]
Downloading: "https://github.com/xinntao/facexlib/releases/download/v0.2.2/parsing_parsenet.pth" to /root/.pyenv/versions/3.9.19/lib/python3.9/site-packages/facexlib/weights/parsing_parsenet.pth
0%| | 0.00/81.4M [00:00<?, ?B/s]
54%|█████▍ | 43.8M/81.4M [00:00<00:00, 459MB/s]
100%|██████████| 81.4M/81.4M [00:00<00:00, 464MB/s]
Loading RealESRGAN_x2plus.pth for background upsampling...
Downloading: "https://github.com/xinntao/Real-ESRGAN/releases/download/v0.2.1/RealESRGAN_x2plus.pth" to /src/weights/realesrgan/RealESRGAN_x2plus.pth
0%| | 0.00/64.0M [00:00<?, ?B/s]
52%|█████▏ | 33.4M/64.0M [00:00<00:00, 351MB/s]
100%|██████████| 64.0M/64.0M [00:00<00:00, 386MB/s]
timesteps used in spaced sampler:
[0, 20, 41, 61, 82, 102, 122, 143, 163, 183, 204, 224, 245, 265, 285, 306, 326, 347, 367, 387, 408, 428, 449, 469, 489, 510, 530, 550, 571, 591, 612, 632, 652, 673, 693, 714, 734, 754, 775, 795, 816, 836, 856, 877, 897, 917, 938, 958, 979, 999]
Spaced Sampler: 0%| | 0/50 [00:00<?, ?it/s]
Spaced Sampler: 2%|▏ | 1/50 [00:00<00:10, 4.79it/s]
Spaced Sampler: 6%|▌ | 3/50 [00:00<00:05, 8.70it/s]
Spaced Sampler: 10%|█ | 5/50 [00:00<00:04, 10.19it/s]
Spaced Sampler: 14%|█▍ | 7/50 [00:00<00:03, 10.93it/s]
Spaced Sampler: 18%|█▊ | 9/50 [00:00<00:03, 11.34it/s]
Spaced Sampler: 22%|██▏ | 11/50 [00:01<00:03, 11.59it/s]
Spaced Sampler: 26%|██▌ | 13/50 [00:01<00:03, 11.81it/s]
Spaced Sampler: 30%|███ | 15/50 [00:01<00:02, 11.96it/s]
Spaced Sampler: 34%|███▍ | 17/50 [00:01<00:02, 12.07it/s]
Spaced Sampler: 38%|███▊ | 19/50 [00:01<00:02, 12.13it/s]
Spaced Sampler: 42%|████▏ | 21/50 [00:01<00:02, 12.17it/s]
Spaced Sampler: 46%|████▌ | 23/50 [00:02<00:02, 12.14it/s]
Spaced Sampler: 50%|█████ | 25/50 [00:02<00:02, 12.13it/s]
Spaced Sampler: 54%|█████▍ | 27/50 [00:02<00:01, 12.11it/s]
Spaced Sampler: 58%|█████▊ | 29/50 [00:02<00:01, 12.10it/s]
Spaced Sampler: 62%|██████▏ | 31/50 [00:02<00:01, 12.07it/s]
Spaced Sampler: 66%|██████▌ | 33/50 [00:02<00:01, 12.08it/s]
Spaced Sampler: 70%|███████ | 35/50 [00:03<00:01, 12.06it/s]
Spaced Sampler: 74%|███████▍ | 37/50 [00:03<00:01, 12.05it/s]
Spaced Sampler: 78%|███████▊ | 39/50 [00:03<00:00, 11.95it/s]
Spaced Sampler: 82%|████████▏ | 41/50 [00:03<00:00, 11.97it/s]
Spaced Sampler: 86%|████████▌ | 43/50 [00:03<00:00, 12.01it/s]
Spaced Sampler: 90%|█████████ | 45/50 [00:03<00:00, 12.06it/s]
Spaced Sampler: 94%|█████████▍| 47/50 [00:04<00:00, 12.12it/s]
Spaced Sampler: 98%|█████████▊| 49/50 [00:04<00:00, 12.17it/s]
Spaced Sampler: 100%|██████████| 50/50 [00:04<00:00, 11.77it/s]
upsampling the background image using RealESRGAN...
Face image tmpcdoq1b8cC.L.A.I.R.E._everhart_s saved to ./..