astramlco / diffbir
DiffBIR: Towards Blind Image Restoration with Generative Diffusion Prior
Prediction
astramlco/diffbir:f7a6e7832fee8d2593be566723295b80ed14b424f8365f8647e19775f617e205IDlg6q6ydbytjofvmc7o474gdleuStatusSucceededSourceWebHardwareA40Total durationCreatedInput
- seed
- 231
- steps
- 50
- tiled
- tile_size
- 512
- has_aligned
- tile_stride
- 256
- repeat_times
- 1
- use_guidance
- color_fix_type
- wavelet
- guidance_scale
- 0
- guidance_space
- latent
- guidance_repeat
- 5
- only_center_face
- guidance_time_stop
- -1
- guidance_time_start
- 1001
- background_upsampler
- RealESRGAN
- face_detection_model
- retinaface_resnet50
- upscaling_model_type
- faces
- restoration_model_type
- faces
- super_resolution_factor
- 2
- disable_preprocess_model
- reload_restoration_model
- background_upsampler_tile
- 400
- background_upsampler_tile_stride
- 400
{ "seed": 231, "input": "https://replicate.delivery/pbxt/Ketx7Rc2HPkgBgsBAmZ14gWWKc6aiKsA783giEEf4qiHDtOb/C.L.A.I.R.E._everhart_s.jpg", "steps": 50, "tiled": false, "tile_size": 512, "has_aligned": false, "tile_stride": 256, "repeat_times": 1, "use_guidance": false, "color_fix_type": "wavelet", "guidance_scale": 0, "guidance_space": "latent", "guidance_repeat": 5, "only_center_face": false, "guidance_time_stop": -1, "guidance_time_start": 1001, "background_upsampler": "RealESRGAN", "face_detection_model": "retinaface_resnet50", "upscaling_model_type": "faces", "restoration_model_type": "faces", "super_resolution_factor": 2, "disable_preprocess_model": false, "reload_restoration_model": false, "background_upsampler_tile": 400, "background_upsampler_tile_stride": 400 }
Install Replicate’s Node.js client library:npm install replicate
Import and set up the client:import Replicate from "replicate"; const replicate = new Replicate({ auth: process.env.REPLICATE_API_TOKEN, });
Run astramlco/diffbir using Replicate’s API. Check out the model's schema for an overview of inputs and outputs.
const output = await replicate.run( "astramlco/diffbir:f7a6e7832fee8d2593be566723295b80ed14b424f8365f8647e19775f617e205", { input: { seed: 231, input: "https://replicate.delivery/pbxt/Ketx7Rc2HPkgBgsBAmZ14gWWKc6aiKsA783giEEf4qiHDtOb/C.L.A.I.R.E._everhart_s.jpg", steps: 50, tiled: false, tile_size: 512, has_aligned: false, tile_stride: 256, repeat_times: 1, use_guidance: false, color_fix_type: "wavelet", guidance_scale: 0, guidance_space: "latent", guidance_repeat: 5, only_center_face: false, guidance_time_stop: -1, guidance_time_start: 1001, background_upsampler: "RealESRGAN", face_detection_model: "retinaface_resnet50", upscaling_model_type: "faces", restoration_model_type: "faces", super_resolution_factor: 2, disable_preprocess_model: false, reload_restoration_model: false, background_upsampler_tile: 400, background_upsampler_tile_stride: 400 } } ); // To access the file URL: console.log(output[0].url()); //=> "http://example.com" // To write the file to disk: fs.writeFile("my-image.png", output[0]);
To learn more, take a look at the guide on getting started with Node.js.
Install Replicate’s Python client library:pip install replicate
Import the client:import replicate
Run astramlco/diffbir using Replicate’s API. Check out the model's schema for an overview of inputs and outputs.
output = replicate.run( "astramlco/diffbir:f7a6e7832fee8d2593be566723295b80ed14b424f8365f8647e19775f617e205", input={ "seed": 231, "input": "https://replicate.delivery/pbxt/Ketx7Rc2HPkgBgsBAmZ14gWWKc6aiKsA783giEEf4qiHDtOb/C.L.A.I.R.E._everhart_s.jpg", "steps": 50, "tiled": False, "tile_size": 512, "has_aligned": False, "tile_stride": 256, "repeat_times": 1, "use_guidance": False, "color_fix_type": "wavelet", "guidance_scale": 0, "guidance_space": "latent", "guidance_repeat": 5, "only_center_face": False, "guidance_time_stop": -1, "guidance_time_start": 1001, "background_upsampler": "RealESRGAN", "face_detection_model": "retinaface_resnet50", "upscaling_model_type": "faces", "restoration_model_type": "faces", "super_resolution_factor": 2, "disable_preprocess_model": False, "reload_restoration_model": False, "background_upsampler_tile": 400, "background_upsampler_tile_stride": 400 } ) print(output)
To learn more, take a look at the guide on getting started with Python.
Run astramlco/diffbir using Replicate’s API. Check out the model's schema for an overview of inputs and outputs.
curl -s -X POST \ -H "Authorization: Bearer $REPLICATE_API_TOKEN" \ -H "Content-Type: application/json" \ -H "Prefer: wait" \ -d $'{ "version": "astramlco/diffbir:f7a6e7832fee8d2593be566723295b80ed14b424f8365f8647e19775f617e205", "input": { "seed": 231, "input": "https://replicate.delivery/pbxt/Ketx7Rc2HPkgBgsBAmZ14gWWKc6aiKsA783giEEf4qiHDtOb/C.L.A.I.R.E._everhart_s.jpg", "steps": 50, "tiled": false, "tile_size": 512, "has_aligned": false, "tile_stride": 256, "repeat_times": 1, "use_guidance": false, "color_fix_type": "wavelet", "guidance_scale": 0, "guidance_space": "latent", "guidance_repeat": 5, "only_center_face": false, "guidance_time_stop": -1, "guidance_time_start": 1001, "background_upsampler": "RealESRGAN", "face_detection_model": "retinaface_resnet50", "upscaling_model_type": "faces", "restoration_model_type": "faces", "super_resolution_factor": 2, "disable_preprocess_model": false, "reload_restoration_model": false, "background_upsampler_tile": 400, "background_upsampler_tile_stride": 400 } }' \ https://api.replicate.com/v1/predictions
To learn more, take a look at Replicate’s HTTP API reference docs.
Output
{ "completed_at": "2024-03-30T07:49:57.477422Z", "created_at": "2024-03-30T07:46:10.719345Z", "data_removed": false, "error": null, "id": "lg6q6ydbytjofvmc7o474gdleu", "input": { "seed": 231, "input": "https://replicate.delivery/pbxt/Ketx7Rc2HPkgBgsBAmZ14gWWKc6aiKsA783giEEf4qiHDtOb/C.L.A.I.R.E._everhart_s.jpg", "steps": 50, "tiled": false, "tile_size": 512, "has_aligned": false, "tile_stride": 256, "repeat_times": 1, "use_guidance": false, "color_fix_type": "wavelet", "guidance_scale": 0, "guidance_space": "latent", "guidance_repeat": 5, "only_center_face": false, "guidance_time_stop": -1, "guidance_time_start": 1001, "background_upsampler": "RealESRGAN", "face_detection_model": "retinaface_resnet50", "upscaling_model_type": "faces", "restoration_model_type": "faces", "super_resolution_factor": 2, "disable_preprocess_model": false, "reload_restoration_model": false, "background_upsampler_tile": 400, "background_upsampler_tile_stride": 400 }, "logs": "ckptckptckpt weights/face_full_v1.ckpt\nSwitching from mode 'FULL' to 'FACE'...\nBuilding and loading 'FACE' mode model...\nControlLDM: Running in eps-prediction mode\nSetting up MemoryEfficientCrossAttention. Query dim is 320, context_dim is None and using 5 heads.\nSetting up MemoryEfficientCrossAttention. Query dim is 320, context_dim is 1024 and using 5 heads.\nSetting up MemoryEfficientCrossAttention. Query dim is 320, context_dim is None and using 5 heads.\nSetting up MemoryEfficientCrossAttention. Query dim is 320, context_dim is 1024 and using 5 heads.\nSetting up MemoryEfficientCrossAttention. Query dim is 640, context_dim is None and using 10 heads.\nSetting up MemoryEfficientCrossAttention. Query dim is 640, context_dim is 1024 and using 10 heads.\nSetting up MemoryEfficientCrossAttention. Query dim is 640, context_dim is None and using 10 heads.\nSetting up MemoryEfficientCrossAttention. Query dim is 640, context_dim is 1024 and using 10 heads.\nSetting up MemoryEfficientCrossAttention. Query dim is 1280, context_dim is None and using 20 heads.\nSetting up MemoryEfficientCrossAttention. Query dim is 1280, context_dim is 1024 and using 20 heads.\nSetting up MemoryEfficientCrossAttention. Query dim is 1280, context_dim is None and using 20 heads.\nSetting up MemoryEfficientCrossAttention. Query dim is 1280, context_dim is 1024 and using 20 heads.\nSetting up MemoryEfficientCrossAttention. Query dim is 1280, context_dim is None and using 20 heads.\nSetting up MemoryEfficientCrossAttention. Query dim is 1280, context_dim is 1024 and using 20 heads.\nSetting up MemoryEfficientCrossAttention. Query dim is 1280, context_dim is None and using 20 heads.\nSetting up MemoryEfficientCrossAttention. Query dim is 1280, context_dim is 1024 and using 20 heads.\nSetting up MemoryEfficientCrossAttention. Query dim is 1280, context_dim is None and using 20 heads.\nSetting up MemoryEfficientCrossAttention. Query dim is 1280, context_dim is 1024 and using 20 heads.\nSetting up MemoryEfficientCrossAttention. Query dim is 1280, context_dim is None and using 20 heads.\nSetting up MemoryEfficientCrossAttention. Query dim is 1280, context_dim is 1024 and using 20 heads.\nSetting up MemoryEfficientCrossAttention. Query dim is 640, context_dim is None and using 10 heads.\nSetting up MemoryEfficientCrossAttention. Query dim is 640, context_dim is 1024 and using 10 heads.\nSetting up MemoryEfficientCrossAttention. Query dim is 640, context_dim is None and using 10 heads.\nSetting up MemoryEfficientCrossAttention. Query dim is 640, context_dim is 1024 and using 10 heads.\nSetting up MemoryEfficientCrossAttention. Query dim is 640, context_dim is None and using 10 heads.\nSetting up MemoryEfficientCrossAttention. Query dim is 640, context_dim is 1024 and using 10 heads.\nSetting up MemoryEfficientCrossAttention. Query dim is 320, context_dim is None and using 5 heads.\nSetting up MemoryEfficientCrossAttention. Query dim is 320, context_dim is 1024 and using 5 heads.\nSetting up MemoryEfficientCrossAttention. Query dim is 320, context_dim is None and using 5 heads.\nSetting up MemoryEfficientCrossAttention. Query dim is 320, context_dim is 1024 and using 5 heads.\nSetting up MemoryEfficientCrossAttention. Query dim is 320, context_dim is None and using 5 heads.\nSetting up MemoryEfficientCrossAttention. Query dim is 320, context_dim is 1024 and using 5 heads.\nDiffusionWrapper has 865.91 M params.\nmaking attention of type 'vanilla-xformers' with 512 in_channels\nbuilding MemoryEfficientAttnBlock with 512 in_channels...\nWorking with z of shape (1, 4, 32, 32) = 4096 dimensions.\nmaking attention of type 'vanilla-xformers' with 512 in_channels\nbuilding MemoryEfficientAttnBlock with 512 in_channels...\nSetting up MemoryEfficientCrossAttention. Query dim is 320, context_dim is None and using 5 heads.\nSetting up MemoryEfficientCrossAttention. Query dim is 320, context_dim is 1024 and using 5 heads.\nSetting up MemoryEfficientCrossAttention. Query dim is 320, context_dim is None and using 5 heads.\nSetting up MemoryEfficientCrossAttention. Query dim is 320, context_dim is 1024 and using 5 heads.\nSetting up MemoryEfficientCrossAttention. Query dim is 640, context_dim is None and using 10 heads.\nSetting up MemoryEfficientCrossAttention. Query dim is 640, context_dim is 1024 and using 10 heads.\nSetting up MemoryEfficientCrossAttention. Query dim is 640, context_dim is None and using 10 heads.\nSetting up MemoryEfficientCrossAttention. Query dim is 640, context_dim is 1024 and using 10 heads.\nSetting up MemoryEfficientCrossAttention. Query dim is 1280, context_dim is None and using 20 heads.\nSetting up MemoryEfficientCrossAttention. Query dim is 1280, context_dim is 1024 and using 20 heads.\nSetting up MemoryEfficientCrossAttention. Query dim is 1280, context_dim is None and using 20 heads.\nSetting up MemoryEfficientCrossAttention. Query dim is 1280, context_dim is 1024 and using 20 heads.\nSetting up MemoryEfficientCrossAttention. Query dim is 1280, context_dim is None and using 20 heads.\nSetting up MemoryEfficientCrossAttention. Query dim is 1280, context_dim is 1024 and using 20 heads.\nSetting up [LPIPS] perceptual loss: trunk [alex], v[0.1], spatial [off]\nLoading model from: /root/.pyenv/versions/3.9.19/lib/python3.9/site-packages/lpips/weights/v0.1/alex.pth\nreload swinir model from weights/face_swinir_v1.ckpt\nENABLE XFORMERS!\nModel successfully switched to 'FACE' mode.\n{'bg_tile': 400,\n'bg_tile_stride': 400,\n'bg_upsampler': 'RealESRGAN',\n'ckpt': 'weights/face_full_v1.ckpt',\n'color_fix_type': 'wavelet',\n'config': 'configs/model/cldm.yaml',\n'detection_model': 'retinaface_resnet50',\n'device': 'cuda',\n'disable_preprocess_model': False,\n'g_repeat': 5,\n'g_scale': 0.0,\n'g_space': 'latent',\n'g_t_start': 1001,\n'g_t_stop': -1,\n'has_aligned': False,\n'image_size': 512,\n'input': '/tmp/tmpcdoq1b8cC.L.A.I.R.E._everhart_s.jpg',\n'only_center_face': False,\n'output': '.',\n'reload_swinir': False,\n'repeat_times': 1,\n'seed': 231,\n'show_lq': False,\n'skip_if_exist': False,\n'sr_scale': 2,\n'steps': 50,\n'swinir_ckpt': 'weights/face_swinir_v1.ckpt',\n'tile_size': 512,\n'tile_stride': 256,\n'tiled': False,\n'use_guidance': False}\nGlobal seed set to 231\n/root/.pyenv/versions/3.9.19/lib/python3.9/site-packages/torchvision/models/_utils.py:223: UserWarning: Arguments other than a weight enum or `None` for 'weights' are deprecated since 0.13 and may be removed in the future. The current behavior is equivalent to passing `weights=None`.\nwarnings.warn(msg)\nDownloading: \"https://github.com/xinntao/facexlib/releases/download/v0.1.0/detection_Resnet50_Final.pth\" to /root/.pyenv/versions/3.9.19/lib/python3.9/site-packages/facexlib/weights/detection_Resnet50_Final.pth\n 0%| | 0.00/104M [00:00<?, ?B/s]\n 39%|███▉ | 40.5M/104M [00:00<00:00, 425MB/s]\n 78%|███████▊ | 81.1M/104M [00:00<00:00, 98.1MB/s]\n100%|██████████| 104M/104M [00:00<00:00, 127MB/s]\nDownloading: \"https://github.com/xinntao/facexlib/releases/download/v0.2.2/parsing_parsenet.pth\" to /root/.pyenv/versions/3.9.19/lib/python3.9/site-packages/facexlib/weights/parsing_parsenet.pth\n 0%| | 0.00/81.4M [00:00<?, ?B/s]\n 54%|█████▍ | 43.8M/81.4M [00:00<00:00, 459MB/s]\n100%|██████████| 81.4M/81.4M [00:00<00:00, 464MB/s]\nLoading RealESRGAN_x2plus.pth for background upsampling...\nDownloading: \"https://github.com/xinntao/Real-ESRGAN/releases/download/v0.2.1/RealESRGAN_x2plus.pth\" to /src/weights/realesrgan/RealESRGAN_x2plus.pth\n 0%| | 0.00/64.0M [00:00<?, ?B/s]\n 52%|█████▏ | 33.4M/64.0M [00:00<00:00, 351MB/s]\n100%|██████████| 64.0M/64.0M [00:00<00:00, 386MB/s]\ntimesteps used in spaced sampler:\n[0, 20, 41, 61, 82, 102, 122, 143, 163, 183, 204, 224, 245, 265, 285, 306, 326, 347, 367, 387, 408, 428, 449, 469, 489, 510, 530, 550, 571, 591, 612, 632, 652, 673, 693, 714, 734, 754, 775, 795, 816, 836, 856, 877, 897, 917, 938, 958, 979, 999]\nSpaced Sampler: 0%| | 0/50 [00:00<?, ?it/s]\nSpaced Sampler: 2%|▏ | 1/50 [00:00<00:10, 4.79it/s]\nSpaced Sampler: 6%|▌ | 3/50 [00:00<00:05, 8.70it/s]\nSpaced Sampler: 10%|█ | 5/50 [00:00<00:04, 10.19it/s]\nSpaced Sampler: 14%|█▍ | 7/50 [00:00<00:03, 10.93it/s]\nSpaced Sampler: 18%|█▊ | 9/50 [00:00<00:03, 11.34it/s]\nSpaced Sampler: 22%|██▏ | 11/50 [00:01<00:03, 11.59it/s]\nSpaced Sampler: 26%|██▌ | 13/50 [00:01<00:03, 11.81it/s]\nSpaced Sampler: 30%|███ | 15/50 [00:01<00:02, 11.96it/s]\nSpaced Sampler: 34%|███▍ | 17/50 [00:01<00:02, 12.07it/s]\nSpaced Sampler: 38%|███▊ | 19/50 [00:01<00:02, 12.13it/s]\nSpaced Sampler: 42%|████▏ | 21/50 [00:01<00:02, 12.17it/s]\nSpaced Sampler: 46%|████▌ | 23/50 [00:02<00:02, 12.14it/s]\nSpaced Sampler: 50%|█████ | 25/50 [00:02<00:02, 12.13it/s]\nSpaced Sampler: 54%|█████▍ | 27/50 [00:02<00:01, 12.11it/s]\nSpaced Sampler: 58%|█████▊ | 29/50 [00:02<00:01, 12.10it/s]\nSpaced Sampler: 62%|██████▏ | 31/50 [00:02<00:01, 12.07it/s]\nSpaced Sampler: 66%|██████▌ | 33/50 [00:02<00:01, 12.08it/s]\nSpaced Sampler: 70%|███████ | 35/50 [00:03<00:01, 12.06it/s]\nSpaced Sampler: 74%|███████▍ | 37/50 [00:03<00:01, 12.05it/s]\nSpaced Sampler: 78%|███████▊ | 39/50 [00:03<00:00, 11.95it/s]\nSpaced Sampler: 82%|████████▏ | 41/50 [00:03<00:00, 11.97it/s]\nSpaced Sampler: 86%|████████▌ | 43/50 [00:03<00:00, 12.01it/s]\nSpaced Sampler: 90%|█████████ | 45/50 [00:03<00:00, 12.06it/s]\nSpaced Sampler: 94%|█████████▍| 47/50 [00:04<00:00, 12.12it/s]\nSpaced Sampler: 98%|█████████▊| 49/50 [00:04<00:00, 12.17it/s]\nSpaced Sampler: 100%|██████████| 50/50 [00:04<00:00, 11.77it/s]\nupsampling the background image using RealESRGAN...\nFace image tmpcdoq1b8cC.L.A.I.R.E._everhart_s saved to ./..", "metrics": { "predict_time": 43.867453, "total_time": 226.758077 }, "output": [ "https://replicate.delivery/pbxt/ZebgzumoHLy2ZCL5ppfJnICvW5SBKGZ2emNQxPeS35KNidVKB/tmpcdoq1b8cC.L.A.I.R.E._everhart_s_00.png", "https://replicate.delivery/pbxt/e1wupnukQOzFYaCzdEemITU2qoHPTUcDRHyfiWepxncNidVKB/tmpcdoq1b8cC.L.A.I.R.E._everhart_s_00.png", "https://replicate.delivery/pbxt/a35MV4BcyI7wEdbFav8hrbX8CuOt3IU8jEr39S6E9geSsrSJA/tmpcdoq1b8cC.L.A.I.R.E._everhart_s.png" ], "started_at": "2024-03-30T07:49:13.609969Z", "status": "succeeded", "urls": { "get": "https://api.replicate.com/v1/predictions/lg6q6ydbytjofvmc7o474gdleu", "cancel": "https://api.replicate.com/v1/predictions/lg6q6ydbytjofvmc7o474gdleu/cancel" }, "version": "f7a6e7832fee8d2593be566723295b80ed14b424f8365f8647e19775f617e205" }
Generated inckptckptckpt weights/face_full_v1.ckpt Switching from mode 'FULL' to 'FACE'... Building and loading 'FACE' mode model... ControlLDM: Running in eps-prediction mode Setting up MemoryEfficientCrossAttention. Query dim is 320, context_dim is None and using 5 heads. Setting up MemoryEfficientCrossAttention. Query dim is 320, context_dim is 1024 and using 5 heads. Setting up MemoryEfficientCrossAttention. Query dim is 320, context_dim is None and using 5 heads. Setting up MemoryEfficientCrossAttention. Query dim is 320, context_dim is 1024 and using 5 heads. Setting up MemoryEfficientCrossAttention. Query dim is 640, context_dim is None and using 10 heads. Setting up MemoryEfficientCrossAttention. Query dim is 640, context_dim is 1024 and using 10 heads. Setting up MemoryEfficientCrossAttention. Query dim is 640, context_dim is None and using 10 heads. Setting up MemoryEfficientCrossAttention. Query dim is 640, context_dim is 1024 and using 10 heads. Setting up MemoryEfficientCrossAttention. Query dim is 1280, context_dim is None and using 20 heads. Setting up MemoryEfficientCrossAttention. Query dim is 1280, context_dim is 1024 and using 20 heads. Setting up MemoryEfficientCrossAttention. Query dim is 1280, context_dim is None and using 20 heads. Setting up MemoryEfficientCrossAttention. Query dim is 1280, context_dim is 1024 and using 20 heads. Setting up MemoryEfficientCrossAttention. Query dim is 1280, context_dim is None and using 20 heads. Setting up MemoryEfficientCrossAttention. Query dim is 1280, context_dim is 1024 and using 20 heads. Setting up MemoryEfficientCrossAttention. Query dim is 1280, context_dim is None and using 20 heads. Setting up MemoryEfficientCrossAttention. Query dim is 1280, context_dim is 1024 and using 20 heads. Setting up MemoryEfficientCrossAttention. Query dim is 1280, context_dim is None and using 20 heads. Setting up MemoryEfficientCrossAttention. Query dim is 1280, context_dim is 1024 and using 20 heads. Setting up MemoryEfficientCrossAttention. Query dim is 1280, context_dim is None and using 20 heads. Setting up MemoryEfficientCrossAttention. Query dim is 1280, context_dim is 1024 and using 20 heads. Setting up MemoryEfficientCrossAttention. Query dim is 640, context_dim is None and using 10 heads. Setting up MemoryEfficientCrossAttention. Query dim is 640, context_dim is 1024 and using 10 heads. Setting up MemoryEfficientCrossAttention. Query dim is 640, context_dim is None and using 10 heads. Setting up MemoryEfficientCrossAttention. Query dim is 640, context_dim is 1024 and using 10 heads. Setting up MemoryEfficientCrossAttention. Query dim is 640, context_dim is None and using 10 heads. Setting up MemoryEfficientCrossAttention. Query dim is 640, context_dim is 1024 and using 10 heads. Setting up MemoryEfficientCrossAttention. Query dim is 320, context_dim is None and using 5 heads. Setting up MemoryEfficientCrossAttention. Query dim is 320, context_dim is 1024 and using 5 heads. Setting up MemoryEfficientCrossAttention. Query dim is 320, context_dim is None and using 5 heads. Setting up MemoryEfficientCrossAttention. Query dim is 320, context_dim is 1024 and using 5 heads. Setting up MemoryEfficientCrossAttention. Query dim is 320, context_dim is None and using 5 heads. Setting up MemoryEfficientCrossAttention. Query dim is 320, context_dim is 1024 and using 5 heads. DiffusionWrapper has 865.91 M params. making attention of type 'vanilla-xformers' with 512 in_channels building MemoryEfficientAttnBlock with 512 in_channels... Working with z of shape (1, 4, 32, 32) = 4096 dimensions. making attention of type 'vanilla-xformers' with 512 in_channels building MemoryEfficientAttnBlock with 512 in_channels... Setting up MemoryEfficientCrossAttention. Query dim is 320, context_dim is None and using 5 heads. Setting up MemoryEfficientCrossAttention. Query dim is 320, context_dim is 1024 and using 5 heads. Setting up MemoryEfficientCrossAttention. Query dim is 320, context_dim is None and using 5 heads. Setting up MemoryEfficientCrossAttention. Query dim is 320, context_dim is 1024 and using 5 heads. Setting up MemoryEfficientCrossAttention. Query dim is 640, context_dim is None and using 10 heads. Setting up MemoryEfficientCrossAttention. Query dim is 640, context_dim is 1024 and using 10 heads. Setting up MemoryEfficientCrossAttention. Query dim is 640, context_dim is None and using 10 heads. Setting up MemoryEfficientCrossAttention. Query dim is 640, context_dim is 1024 and using 10 heads. Setting up MemoryEfficientCrossAttention. Query dim is 1280, context_dim is None and using 20 heads. Setting up MemoryEfficientCrossAttention. Query dim is 1280, context_dim is 1024 and using 20 heads. Setting up MemoryEfficientCrossAttention. Query dim is 1280, context_dim is None and using 20 heads. Setting up MemoryEfficientCrossAttention. Query dim is 1280, context_dim is 1024 and using 20 heads. Setting up MemoryEfficientCrossAttention. Query dim is 1280, context_dim is None and using 20 heads. Setting up MemoryEfficientCrossAttention. Query dim is 1280, context_dim is 1024 and using 20 heads. Setting up [LPIPS] perceptual loss: trunk [alex], v[0.1], spatial [off] Loading model from: /root/.pyenv/versions/3.9.19/lib/python3.9/site-packages/lpips/weights/v0.1/alex.pth reload swinir model from weights/face_swinir_v1.ckpt ENABLE XFORMERS! Model successfully switched to 'FACE' mode. {'bg_tile': 400, 'bg_tile_stride': 400, 'bg_upsampler': 'RealESRGAN', 'ckpt': 'weights/face_full_v1.ckpt', 'color_fix_type': 'wavelet', 'config': 'configs/model/cldm.yaml', 'detection_model': 'retinaface_resnet50', 'device': 'cuda', 'disable_preprocess_model': False, 'g_repeat': 5, 'g_scale': 0.0, 'g_space': 'latent', 'g_t_start': 1001, 'g_t_stop': -1, 'has_aligned': False, 'image_size': 512, 'input': '/tmp/tmpcdoq1b8cC.L.A.I.R.E._everhart_s.jpg', 'only_center_face': False, 'output': '.', 'reload_swinir': False, 'repeat_times': 1, 'seed': 231, 'show_lq': False, 'skip_if_exist': False, 'sr_scale': 2, 'steps': 50, 'swinir_ckpt': 'weights/face_swinir_v1.ckpt', 'tile_size': 512, 'tile_stride': 256, 'tiled': False, 'use_guidance': False} Global seed set to 231 /root/.pyenv/versions/3.9.19/lib/python3.9/site-packages/torchvision/models/_utils.py:223: UserWarning: Arguments other than a weight enum or `None` for 'weights' are deprecated since 0.13 and may be removed in the future. The current behavior is equivalent to passing `weights=None`. warnings.warn(msg) Downloading: "https://github.com/xinntao/facexlib/releases/download/v0.1.0/detection_Resnet50_Final.pth" to /root/.pyenv/versions/3.9.19/lib/python3.9/site-packages/facexlib/weights/detection_Resnet50_Final.pth 0%| | 0.00/104M [00:00<?, ?B/s] 39%|███▉ | 40.5M/104M [00:00<00:00, 425MB/s] 78%|███████▊ | 81.1M/104M [00:00<00:00, 98.1MB/s] 100%|██████████| 104M/104M [00:00<00:00, 127MB/s] Downloading: "https://github.com/xinntao/facexlib/releases/download/v0.2.2/parsing_parsenet.pth" to /root/.pyenv/versions/3.9.19/lib/python3.9/site-packages/facexlib/weights/parsing_parsenet.pth 0%| | 0.00/81.4M [00:00<?, ?B/s] 54%|█████▍ | 43.8M/81.4M [00:00<00:00, 459MB/s] 100%|██████████| 81.4M/81.4M [00:00<00:00, 464MB/s] Loading RealESRGAN_x2plus.pth for background upsampling... Downloading: "https://github.com/xinntao/Real-ESRGAN/releases/download/v0.2.1/RealESRGAN_x2plus.pth" to /src/weights/realesrgan/RealESRGAN_x2plus.pth 0%| | 0.00/64.0M [00:00<?, ?B/s] 52%|█████▏ | 33.4M/64.0M [00:00<00:00, 351MB/s] 100%|██████████| 64.0M/64.0M [00:00<00:00, 386MB/s] timesteps used in spaced sampler: [0, 20, 41, 61, 82, 102, 122, 143, 163, 183, 204, 224, 245, 265, 285, 306, 326, 347, 367, 387, 408, 428, 449, 469, 489, 510, 530, 550, 571, 591, 612, 632, 652, 673, 693, 714, 734, 754, 775, 795, 816, 836, 856, 877, 897, 917, 938, 958, 979, 999] Spaced Sampler: 0%| | 0/50 [00:00<?, ?it/s] Spaced Sampler: 2%|▏ | 1/50 [00:00<00:10, 4.79it/s] Spaced Sampler: 6%|▌ | 3/50 [00:00<00:05, 8.70it/s] Spaced Sampler: 10%|█ | 5/50 [00:00<00:04, 10.19it/s] Spaced Sampler: 14%|█▍ | 7/50 [00:00<00:03, 10.93it/s] Spaced Sampler: 18%|█▊ | 9/50 [00:00<00:03, 11.34it/s] Spaced Sampler: 22%|██▏ | 11/50 [00:01<00:03, 11.59it/s] Spaced Sampler: 26%|██▌ | 13/50 [00:01<00:03, 11.81it/s] Spaced Sampler: 30%|███ | 15/50 [00:01<00:02, 11.96it/s] Spaced Sampler: 34%|███▍ | 17/50 [00:01<00:02, 12.07it/s] Spaced Sampler: 38%|███▊ | 19/50 [00:01<00:02, 12.13it/s] Spaced Sampler: 42%|████▏ | 21/50 [00:01<00:02, 12.17it/s] Spaced Sampler: 46%|████▌ | 23/50 [00:02<00:02, 12.14it/s] Spaced Sampler: 50%|█████ | 25/50 [00:02<00:02, 12.13it/s] Spaced Sampler: 54%|█████▍ | 27/50 [00:02<00:01, 12.11it/s] Spaced Sampler: 58%|█████▊ | 29/50 [00:02<00:01, 12.10it/s] Spaced Sampler: 62%|██████▏ | 31/50 [00:02<00:01, 12.07it/s] Spaced Sampler: 66%|██████▌ | 33/50 [00:02<00:01, 12.08it/s] Spaced Sampler: 70%|███████ | 35/50 [00:03<00:01, 12.06it/s] Spaced Sampler: 74%|███████▍ | 37/50 [00:03<00:01, 12.05it/s] Spaced Sampler: 78%|███████▊ | 39/50 [00:03<00:00, 11.95it/s] Spaced Sampler: 82%|████████▏ | 41/50 [00:03<00:00, 11.97it/s] Spaced Sampler: 86%|████████▌ | 43/50 [00:03<00:00, 12.01it/s] Spaced Sampler: 90%|█████████ | 45/50 [00:03<00:00, 12.06it/s] Spaced Sampler: 94%|█████████▍| 47/50 [00:04<00:00, 12.12it/s] Spaced Sampler: 98%|█████████▊| 49/50 [00:04<00:00, 12.17it/s] Spaced Sampler: 100%|██████████| 50/50 [00:04<00:00, 11.77it/s] upsampling the background image using RealESRGAN... Face image tmpcdoq1b8cC.L.A.I.R.E._everhart_s saved to ./..
Want to make some of these yourself?
Run this model