Spaces:

haor
/

clip_aes_onnx

Sleeping

App Files Files Community

haor commited on Apr 8

Commit

a45bc86

•

1 Parent(s): b1d96f3

Update app.py

Browse files

Files changed (1) hide show

app.py +40 -32

app.py CHANGED Viewed

@@ -9,14 +9,17 @@ from PIL import Image
 import onnxruntime as ort
 import requests
-def _binary_array_to_hex(arr):
-    bit_string = ''.join(str(b) for b in 1 * arr.flatten())
     width = int(np.ceil(len(bit_string) / 4))
     return '{:0>{width}x}'.format(int(bit_string, 2), width=width)
-def phashstr(image, hash_size=8, highfreq_factor=4):
     if hash_size < 2:
         raise ValueError('Hash size must be greater than or equal to 2')
     import scipy.fftpack
     img_size = hash_size * highfreq_factor
     image = image.convert('L').resize((img_size, img_size), Image.Resampling.LANCZOS)
@@ -25,14 +28,16 @@ def phashstr(image, hash_size=8, highfreq_factor=4):
     dctlowfreq = dct[:hash_size, :hash_size]
     med = np.median(dctlowfreq)
     diff = dctlowfreq > med
-    return _binary_array_to_hex(diff.flatten())
-def normalized(a, axis=-1, order=2):
     l2 = np.atleast_1d(np.linalg.norm(a, order, axis))
     l2[l2 == 0] = 1
     return a / np.expand_dims(l2, axis)
 def convert_numpy_types(data):
     if isinstance(data, dict):
         return {key: convert_numpy_types(value) for key, value in data.items()}
     elif isinstance(data, list):
@@ -44,50 +49,53 @@ def convert_numpy_types(data):
     else:
         return data
-def download_onnx_model(url, filename):
     response = requests.get(url)
-    with open(filename, 'wb') as f:
         f.write(response.content)
-def predict(image):
-    onnx_url = "https://huggingface.co/haor/aesthetics/resolve/main/aesthetic_score_mlp.onnx"
-    onnx_path = "aesthetic_score_mlp.onnx"
-    download_onnx_model(onnx_url, onnx_path)
-    device = "cuda" if torch.cuda.is_available() else "cpu"
-    ort_session = ort.InferenceSession(onnx_path)
-    model2, preprocess = clip.load("ViT-L/14", device=device)
-    image = Image.fromarray(image)
     image_np = cv2.cvtColor(np.array(image), cv2.COLOR_RGB2GRAY)
     laplacian_variance = cv2.Laplacian(image_np, cv2.CV_64F).var()
-    phash = phashstr(image)
-    md5 = hashlib.md5(image.tobytes()).hexdigest()
-    sha1 = hashlib.sha1(image.tobytes()).hexdigest()
     inputs = preprocess(image).unsqueeze(0).to(device)
     with torch.no_grad():
-        img_emb = model2.encode_image(inputs)
-    img_emb = normalized(img_emb.cpu().numpy())
-    ort_inputs = {ort_session.get_inputs()[0].name: img_emb.astype(np.float32)}
-    ort_outs = ort_session.run(None, ort_inputs)
-    prediction = ort_outs[0].item()
     result = {
         "clip_aesthetic": prediction,
-        "phash": phash,
-        "md5": md5,
-        "sha1": sha1,
         "laplacian_variance": laplacian_variance
     }
     return convert_numpy_types(result)
 title = "CLIP Aesthetic Score"
-description = "Upload an image to predict its aesthetic score using the CLIP model and calculate other image metrics."
 gr.Interface(
-    fn=predict,
     inputs=gr.Image(type="numpy"),
     outputs=gr.JSON(label="Result"),
     title=title,

 import onnxruntime as ort
 import requests
+def binary_array_to_hex(arr: np.ndarray) -> str:
+    """Convert a binary array to a hex string."""
+    bit_string = ''.join(str(b) for b in arr.flatten())
     width = int(np.ceil(len(bit_string) / 4))
     return '{:0>{width}x}'.format(int(bit_string, 2), width=width)
+def phash(image: Image.Image, hash_size: int = 8, highfreq_factor: int = 4) -> str:
+    """Calculate the perceptual hash of an image."""
     if hash_size < 2:
         raise ValueError('Hash size must be greater than or equal to 2')
     import scipy.fftpack
     img_size = hash_size * highfreq_factor
     image = image.convert('L').resize((img_size, img_size), Image.Resampling.LANCZOS)
     dctlowfreq = dct[:hash_size, :hash_size]
     med = np.median(dctlowfreq)
     diff = dctlowfreq > med
+    return binary_array_to_hex(diff)
+def normalize(a: np.ndarray, axis: int = -1, order: int = 2) -> np.ndarray:
+    """Normalize a numpy array."""
     l2 = np.atleast_1d(np.linalg.norm(a, order, axis))
     l2[l2 == 0] = 1
     return a / np.expand_dims(l2, axis)
 def convert_numpy_types(data):
+    """Convert numpy types to Python native types."""
     if isinstance(data, dict):
         return {key: convert_numpy_types(value) for key, value in data.items()}
     elif isinstance(data, list):
     else:
         return data
+def download_model(url: str, path: str) -> None:
+    """Download a model from a URL and save it to a file."""
     response = requests.get(url)
+    with open(path, 'wb') as f:
         f.write(response.content)
+# Load models outside the function
+onnx_url = "https://huggingface.co/haor/aesthetics/resolve/main/aesthetic_score_mlp.onnx"
+onnx_path = "aesthetic_score_mlp.onnx"
+download_model(onnx_url, onnx_path)
+device = "cuda" if torch.cuda.is_available() else "cpu"
+ort_session = ort.InferenceSession(onnx_path)
+model, preprocess = clip.load("ViT-L/14", device=device)
+def predict(image: np.ndarray) -> dict:
+    """Predict the aesthetic score of an image using CLIP."""
+    image = Image.fromarray(image)
     image_np = cv2.cvtColor(np.array(image), cv2.COLOR_RGB2GRAY)
     laplacian_variance = cv2.Laplacian(image_np, cv2.CV_64F).var()
+    phash_str = phash(image)
+    md5_hash = hashlib.md5(image.tobytes()).hexdigest()
+    sha1_hash = hashlib.sha1(image.tobytes()).hexdigest()
     inputs = preprocess(image).unsqueeze(0).to(device)
     with torch.no_grad():
+        img_emb = model.encode_image(inputs)
+        img_emb = normalize(img_emb.cpu().numpy())
+        ort_inputs = {ort_session.get_inputs()[0].name: img_emb.astype(np.float32)}
+        ort_outs = ort_session.run(None, ort_inputs)
+        prediction = ort_outs[0].item()
     result = {
         "clip_aesthetic": prediction,
+        "phash": phash_str,
+        "md5": md5_hash,
+        "sha1": sha1_hash,
         "laplacian_variance": laplacian_variance
     }
     return convert_numpy_types(result)
 title = "CLIP Aesthetic Score"
+description = "Upload an image to predict its aesthetic score using the CLIP model and other metrics."
 gr.Interface(
+    fn=predict,
     inputs=gr.Image(type="numpy"),
     outputs=gr.JSON(label="Result"),
     title=title,