wglint / 3_rv

Stable Diffusion 2.1 - Realistic Vision 5.1 - VAE

  • Public
  • 1.2K runs
  • GitHub

Input

Output

Run time and cost

This model costs approximately $0.014 to run on Replicate, or 71 runs per $1, but this varies depending on your inputs. It is also open source and you can run it on your own computer with Docker.

This model runs on Nvidia A100 (80GB) GPU hardware. Predictions typically complete within 11 seconds.

Readme

What do and how work this model

What do this model

This model name 3_Rv can generate picture with Realistic Vision 5-1 model you can find in huggingface her

You can generate picture and choice if :

  • NSFW : Choice to use a NSFW filter or not.

  • VAE : You can choice to use a VAE or not. By that you have :

    noVAE : Choice the basic VAE of Stable Diffusion.
    VAE : Choice this VAE on HuggingFace, vae of stabilityai.
    VAE and noVAE : Create picture with and without VAE of stabilityai.

How this model work

Before start, we need to have Cog and Docker. For learn Cog, click her for Github Doc. But for start, use brew for install Cog :

brew install cog

After for this model, i use only 2 files :

All the code is in this repo Github.

Or, let check all code her :

cog.yaml

# Configuration for Cog ⚙️
# Reference: https://github.com/replicate/cog/blob/main/docs/yaml.md

build:
  # set to true if your model requires a GPU
  gpu: true
  cuda: "11.8"
  python_version: "3.9"
  python_packages:
    - "torch==2.0.1"
    - "torchvision==0.15.2"
    - "transformers==4.26.1"
    - "safetensors==0.3.1"
    - "diffusers==0.19.0"
    - "accelerate==0.21.0"
    - "numpy==1.25.1"
    - "omegaconf==2.3.0"
    - "xformers"

  run : 
    - "pip install --upgrade pip"

predict: "predict.py:Predictor"
image: "r8.im/wglint/3_rv"

predict.py

# Prediction interface for Cog ⚙️
# https://github.com/replicate/cog/blob/main/docs/python.md

from cog import BasePredictor, Input, Path
from diffusers import StableDiffusionPipeline, DPMSolverMultistepScheduler, AutoencoderKL
from diffusers.pipelines.stable_diffusion.safety_checker import (
    StableDiffusionSafetyChecker,
)

import torch
from PIL import Image
from transformers import AutoModelForImageClassification, ViTImageProcessor

from typing import List

MODEL_PIPELINE_CACHE = "diffusers-cache"
MODEL_noVAE = "SG161222/Realistic_Vision_V5.1_noVAE"
MODEL_VAE = "https://huggingface.co/stabilityai/sd-vae-ft-mse-original/blob/main/vae-ft-mse-840000-ema-pruned.safetensors"
VAE_CACHE = "vae-cache"
SAFETY_MODEL_ID = "CompVis/stable-diffusion-safety-checker"

class Predictor(BasePredictor):
    def setup(self) -> None:
        """Load the model into memory to make running multiple predictions efficient"""
        # self.model = torch.load("./weights.pth")
        vae = AutoencoderKL.from_single_file(
            MODEL_VAE,
            cache_dir = VAE_CACHE
        )

        ## NSFW Filter
        safety_checker = StableDiffusionSafetyChecker.from_pretrained(
            SAFETY_MODEL_ID
        )

        self.model_nsfw = AutoModelForImageClassification.from_pretrained("Falconsai/nsfw_image_detection")
        self.processor_nsfw = ViTImageProcessor.from_pretrained("Falconsai/nsfw_image_detection")

        ## VAE and no VAE model pipeline
        self.rv_noVAE = StableDiffusionPipeline.from_pretrained(
            MODEL_noVAE,
            cache_dir = MODEL_PIPELINE_CACHE
        ).to("cuda")

        self.rv_VAE = StableDiffusionPipeline.from_pretrained(
            MODEL_noVAE,
            vae = vae,
            cache_dir = MODEL_PIPELINE_CACHE
        ).to("cuda")

    def check_nsfw(self, path: Path) -> str:
        img_check = Image.open(path)
        inputs = self.processor_nsfw(images=img_check, return_tensors="pt")
        outputs = self.model_nsfw(**inputs)
        logits = outputs.logits

        predict_label = logits.argmax(-1).item()
        NSFW_OR_NOT = self.model_nsfw.config.id2label[predict_label] # "nsfw" or "normal"

        return NSFW_OR_NOT

    @torch.inference_mode()
    def predict(
        self,
        NSFW: bool = Input(description="Choice a option for NSFW", default=False),
        VAE: str = Input(description="Choice a option for VAE", choices=["noVAE", "VAE", "VAE and noVAE"], default="noVAE"),
        prompt: str = Input(description="Enter a prompt", default="RAW photo, a portrait photo of a latina woman in casual clothes, natural skin, 8k uhd, high quality, film grain, Fujifilm XT3"),
        negative_prompt: str = Input(description="Enter a negative prompt", default="(deformed iris, deformed pupils, semi-realistic, cgi, 3d, render, sketch, cartoon, drawing, anime:1.4), text, close up, cropped, out of frame, worst quality, low quality, jpeg artifacts, ugly, duplicate, morbid, mutilated, extra fingers, mutated hands, poorly drawn hands, poorly drawn face, mutation, deformed, blurry, dehydrated, bad anatomy, bad proportions, extra limbs, cloned face, disfigured, gross proportions, malformed limbs, missing arms, missing legs, extra arms, extra legs, fused fingers, too many fingers, long neck"),
        width: int = Input(description="Enter a width", default=512),
        height: int = Input(description="Enter a height", default=768),
        guidance_scale: int = Input(description="Enter a guidance scale", default=7),
        num_inference_steps: int = Input(description="Enter a number of inference steps", default=20),
        seed: int = Input(description="Enter a seed", default=42),
        number_picture: int = Input(description="Enter a number of picture", default=1, le=4, ge=1),
    ) -> List[Path]:

        generator = torch.Generator("cuda").manual_seed(seed)
        Parameters = {
            "prompt": [prompt] * number_picture,
            "negative_prompt": [negative_prompt] * number_picture,
            "width": width,
            "height": height,
            "guidance_scale": guidance_scale,
            "num_inference_steps": num_inference_steps,
            "generator": generator
        }

        if VAE == "VAE":
            self.rv_VAE.scheduler = DPMSolverMultistepScheduler.from_config(
                self.rv_VAE.scheduler.config
            )
            image = self.rv_VAE(
                **Parameters
            )
        elif VAE == "noVAE":
            self.rv_noVAE.scheduler = DPMSolverMultistepScheduler.from_config(
                self.rv_noVAE.scheduler.config
            )
            image = self.rv_noVAE(
                **Parameters
            )
        else:
            self.rv_noVAE.scheduler = DPMSolverMultistepScheduler.from_config(
                self.rv_noVAE.scheduler.config
            )
            self.rv_VAE.scheduler = DPMSolverMultistepScheduler.from_config(
                self.rv_VAE.scheduler.config
            )
            print("Creating VAE image")
            image_vae = self.rv_VAE(
                **Parameters
            )
            print("Creating noVAE image")
            image_noVAE = self.rv_noVAE(
                **Parameters
            )

            output_vae_novae = []
            for i, vae_picture in enumerate(image_vae.images):
                # VAE PICTURE
                output_path_vae = f"/tmp/picture_vae_{i}.png"
                vae_picture.save(output_path_vae)

                NSFW_OR_NOT_VAE = self.check_nsfw(output_path_vae)
                if NSFW and NSFW_OR_NOT_VAE == "nsfw":
                    print("NSFW picture detected !! Take car about this !!!")
                else:
                    output_vae_novae.append(Path(output_path_vae))
                    print(f"Picture VAE : {output_path_vae}")


                # NO VAE PICTURE
                output_path_novae = f"/tmp/picture_novae_{i}.png"
                image_noVAE.images[i].save(output_path_novae)

                NSFW_OR_NOT_NO_VAE = self.check_nsfw(output_path_novae)
                if NSFW and NSFW_OR_NOT_NO_VAE == "nsfw":
                    print("NSFW picture detected !! Take car about this !!!")
                else:
                    output_vae_novae.append(Path(output_path_novae))
                    print(f"Picture noVAE : {output_path_novae}")

            if NSFW and len(output_vae_novae) == 0:
                return f"All picture you generate are NSFW, please change your prompt or negative prompt"
            else:
                return output_vae_novae

        output = []
        print(image)
        for i, sample in enumerate(image.images):
            output_path = f"/tmp/picture_{i}.png"
            sample.save(output_path)

            # NSFW Filter
            NSFW_OR_NOT = self.check_nsfw(output_path)

            if NSFW and NSFW_OR_NOT == "nsfw":
                print("NSFW picture detected !! Take car about this !!!")
            else:
                output.append(Path(output_path))

        if NSFW and len(output) == 0:
            return f"All picture you generate are NSFW, please change your prompt or negative prompt"
        else:
            return output

Let’s check my other model !