SeeMoreDetails

Sleeping

App Files Files Community

Eduard-Sebastian Zamfir commited on May 29

Commit

9080570

•

1 Parent(s): 022d36d

add gradio app

Browse files

Files changed (15) hide show

.gitignore +2 -0
README.md +6 -6
app.py +153 -0
assets/arch.svg +0 -0
configs/eval_seemore_t_x4.yml +14 -0
images/img002x4.png +0 -0
images/img003x4.png +0 -0
images/img004x4.png +0 -0
images/img035x4.png +0 -0
images/img053x4.png +0 -0
images/img064x4.png +0 -0
images/img083x4.png +0 -0
images/img092x4.png +0 -0
models/seemore.py +416 -0
requirements.txt +6 -0

.gitignore ADDED Viewed

	@@ -0,0 +1,2 @@


1	+ __pycache__/
2	+ flagged/

README.md CHANGED Viewed

@@ -1,13 +1,13 @@
 ---
-title: SeeMoreDetails
-emoji: 🏢
-colorFrom: indigo
-colorTo: purple
 sdk: gradio
-sdk_version: 4.31.5
 app_file: app.py
 pinned: false
-license: apache-2.0
 ---
 Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference

 ---
+title: SeemoRe
+emoji: 💻
+colorFrom: purple
+colorTo: blue
 sdk: gradio
+sdk_version: 4.16.0
 app_file: app.py
 pinned: false
+license: mit
 ---
 Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference

app.py ADDED Viewed

	@@ -0,0 +1,153 @@

+import os
+import yaml
+import torch
+import argparse
+import numpy as np
+import gradio as gr
+from PIL import Image
+from copy import deepcopy
+from torch.nn.parallel import DataParallel, DistributedDataParallel
+from huggingface_hub import hf_hub_download
+from gradio_imageslider import ImageSlider
+## local code
+from models import seemore
+def dict2namespace(config):
+    namespace = argparse.Namespace()
+    for key, value in config.items():
+        if isinstance(value, dict):
+            new_value = dict2namespace(value)
+        else:
+            new_value = value
+        setattr(namespace, key, new_value)
+    return namespace
+def load_img (filename, norm=True,):
+    img = np.array(Image.open(filename).convert("RGB"))
+    if norm:
+        img = img / 255.
+        img = img.astype(np.float32)
+    return img
+def process_img (image):
+    img = np.array(image)
+    img = img / 255.
+    img = img.astype(np.float32)
+    y = torch.tensor(img).permute(2,0,1).unsqueeze(0).to(device)
+    with torch.no_grad():
+        x_hat = model(y)
+    restored_img = x_hat.squeeze().permute(1,2,0).clamp_(0, 1).cpu().detach().numpy()
+    restored_img = np.clip(restored_img, 0. , 1.)
+    restored_img = (restored_img * 255.0).round().astype(np.uint8)  # float32 to uint8
+    #return Image.fromarray(restored_img) #
+    return (image, Image.fromarray(restored_img))
+def load_network(net, load_path, strict=True, param_key='params'):
+    if isinstance(net, (DataParallel, DistributedDataParallel)):
+        net = net.module
+    load_net = torch.load(load_path, map_location=lambda storage, loc: storage)
+    if param_key is not None:
+        if param_key not in load_net and 'params' in load_net:
+            param_key = 'params'
+        load_net = load_net[param_key]
+    # remove unnecessary 'module.'
+    for k, v in deepcopy(load_net).items():
+        if k.startswith('module.'):
+            load_net[k[7:]] = v
+            load_net.pop(k)
+    net.load_state_dict(load_net, strict=strict)
+CONFIG     = "configs/eval_seemore_t_x4.yml"
+MODEL_NAME = "checkpoints/SeemoRe_T/X4/net_g_latest.pth"
+# parse config file
+with open(os.path.join(CONFIG), "r") as f:
+    config = yaml.safe_load(f)
+cfg = dict2namespace(config)
+device = torch.device("cpu")
+model = seemore.SeemoRe(scale=cfg.model.scale, in_chans=cfg.model.in_chans,
+                        num_experts=cfg.model.num_experts, num_layers=cfg.model.num_layers, embedding_dim=cfg.model.embedding_dim,
+                        img_range=cfg.model.img_range, use_shuffle=cfg.model.use_shuffle, global_kernel_size=cfg.model.global_kernel_size,
+                        recursive=cfg.model.recursive, lr_space=cfg.model.lr_space, topk=cfg.model.topk)
+model = model.to(device)
+print ("IMAGE MODEL CKPT:", MODEL_NAME)
+load_network(model, MODEL_NAME, strict=True, param_key='params')
+title = "See More Details"
+description = ''' ### See More Details: Efficient Image Super-Resolution by Experts Mining
+#### [Eduard Zamfir<sup>1</sup>](https://eduardzamfir.github.io), [Zongwei Wu<sup>1*</sup>](https://sites.google.com/view/zwwu/accueil), [Nancy Mehta<sup>1</sup>](https://scholar.google.com/citations?user=WwdYdlUAAAAJ&hl=en&oi=ao),  [Yulun Zhang<sup>2,3*</sup>](http://yulunzhang.com/) and [Radu Timofte<sup>1</sup>](https://www.informatik.uni-wuerzburg.de/computervision/)
+#### **<sup>1</sup> University of Würzburg, Germany - <sup>2</sup> Shanghai Jiao Tong University, China - <sup>3</sup> ETH Zürich, Switzerland**
+#### **<sup>*</sup> Corresponding authors**
+<details>
+<summary> <b> Abstract</b> (click me to read)</summary>
+<p>
+Reconstructing high-resolution (HR) images from low-resolution (LR) inputs poses a significant challenge in image super-resolution (SR). While recent approaches have demonstrated the efficacy of intricate operations customized for various objectives, the straightforward stacking of these disparate operations can result in a substantial computational burden, hampering their practical utility. In response, we introduce **S**eemo**R**e, an efficient SR model employing expert mining. Our approach strategically incorporates experts at different levels, adopting a collaborative methodology. At the macro scale, our experts address rank-wise and spatial-wise informative features, providing a holistic understanding. Subsequently, the model delves into the subtleties of rank choice by leveraging a mixture of low-rank experts. By tapping into experts specialized in distinct key factors crucial for accurate SR, our model excels in uncovering intricate intra-feature details. This collaborative approach is reminiscent of the concept of **see more**, allowing our model to achieve an optimal performance with minimal computational costs in efficient settings
+</p>
+</details>
+<br>
+<code>
+@inproceedings{zamfir2024details,
+  title={See More Details: Efficient Image Super-Resolution by Experts Mining},
+  author={Eduard Zamfir and Zongwei Wu and Nancy Mehta and Yulun Zhang and Radu Timofte},
+  booktitle={International Conference on Machine Learning},
+  year={2024},
+  organization={PMLR}
+}
+</code>
+<br>
+'''
+article = "<p style='text-align: center'><a href='https://eduardzamfir.github.io/seemore' target='_blank'>See More Details: Efficient Image Super-Resolution by Experts Mining</a></p>"
+#### Image,Prompts examples
+examples = [['images/img002x4.png'],
+            ['images/img003x4.png'],
+            ['images/img004x4.png'],
+            ['images/img035x4.png'],
+            ['images/img053x4.png'],
+            ['images/img064x4.png'],
+            ['images/img083x4.png'],
+            ['images/img092x4.png'],
+            ]
+css = """
+    .image-frame img, .image-container img {
+        width: auto;
+        height: auto;
+        max-width: none;
+    }
+"""
+demo = gr.Interface(
+    fn=process_img,
+    inputs=[gr.Image(type="pil", label="Input", value="images/img002x4.png"),],
+    outputs=ImageSlider(label="Super-Resolved Image", type="pil"), #[gr.Image(type="pil", label="Ouput", min_width=500)],
+    title=title,
+    description=description,
+    article=article,
+    examples=examples,
+    css=css,
+)
+if __name__ == "__main__":
+    demo.launch()

assets/arch.svg ADDED Viewed

configs/eval_seemore_t_x4.yml ADDED Viewed

	@@ -0,0 +1,14 @@

+model:
+    arch: "SeemoRe"
+    scale: 4
+    in_chans: 3
+    num_experts: 3
+    img_range: 1.0
+    num_layers: 6
+    embedding_dim: 36
+    use_shuffle: True
+    lr_space: exp
+    topk: 1
+    recursive: 2
+    global_kernel_size: 11

images/img002x4.png ADDED Viewed

images/img003x4.png ADDED Viewed

images/img004x4.png ADDED Viewed

images/img035x4.png ADDED Viewed

images/img053x4.png ADDED Viewed

images/img064x4.png ADDED Viewed

images/img083x4.png ADDED Viewed

images/img092x4.png ADDED Viewed

models/seemore.py ADDED Viewed

	@@ -0,0 +1,416 @@

+from typing import Tuple, List
+from torch import Tensor
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+from einops.layers.torch import Rearrange
+######################
+# Meta Architecture
+######################
+class SeemoRe(nn.Module):
+    def __init__(self,
+                 scale: int = 4,
+                 in_chans: int = 3,
+                 num_experts: int = 6,
+                 num_layers: int = 6,
+                 embedding_dim: int = 64,
+                 img_range: float = 1.0,
+                 use_shuffle: bool = False,
+                 global_kernel_size: int = 11,
+                 recursive: int = 2,
+                 lr_space: int = 1,
+                 topk: int = 2,):
+        super().__init__()
+        self.scale = scale
+        self.num_in_channels = in_chans
+        self.num_out_channels = in_chans
+        self.img_range = img_range
+        rgb_mean = (0.4488, 0.4371, 0.4040)
+        self.mean = torch.Tensor(rgb_mean).view(1, 3, 1, 1)
+        # -- SHALLOW FEATURES --
+        self.conv_1 = nn.Conv2d(self.num_in_channels, embedding_dim, kernel_size=3, padding=1)
+        # -- DEEP FEATURES --
+        self.body = nn.ModuleList(
+            [ResGroup(in_ch=embedding_dim,
+                       num_experts=num_experts,
+                       use_shuffle=use_shuffle,
+                       topk=topk,
+                       lr_space=lr_space,
+                       recursive=recursive,
+                       global_kernel_size=global_kernel_size) for i in range(num_layers)]
+        )
+        # -- UPSCALE --
+        self.norm = LayerNorm(embedding_dim, data_format='channels_first')
+        self.conv_2 = nn.Conv2d(embedding_dim, embedding_dim, kernel_size=3, padding=1)
+        self.upsampler = nn.Sequential(
+            nn.Conv2d(embedding_dim, (scale**2) * self.num_out_channels, kernel_size=3, padding=1),
+            nn.PixelShuffle(scale)
+        )
+    def forward(self, x: torch.Tensor) -> torch.Tensor:
+        self.mean = self.mean.type_as(x)
+        x = (x - self.mean) * self.img_range
+        # -- SHALLOW FEATURES --
+        x = self.conv_1(x)
+        res = x
+        # -- DEEP FEATURES --
+        for idx, layer in enumerate(self.body):
+            x = layer(x)
+        x = self.norm(x)
+        # -- HR IMAGE RECONSTRUCTION --
+        x = self.conv_2(x) + res
+        x = self.upsampler(x)
+        x = x / self.img_range + self.mean
+        return x
+#############################
+# Components
+#############################
+class ResGroup(nn.Module):
+    def __init__(self,
+                 in_ch: int,
+                 num_experts: int,
+                 global_kernel_size: int = 11,
+                 lr_space: int = 1,
+                 topk: int = 2,
+                 recursive: int = 2,
+                 use_shuffle: bool = False):
+        super().__init__()
+        self.local_block = RME(in_ch=in_ch,
+                               num_experts=num_experts,
+                               use_shuffle=use_shuffle,
+                               lr_space=lr_space,
+                               topk=topk,
+                               recursive=recursive)
+        self.global_block = SME(in_ch=in_ch,
+                                kernel_size=global_kernel_size)
+    def forward(self, x: torch.Tensor) -> torch.Tensor:
+        x = self.local_block(x)
+        x = self.global_block(x)
+        return x
+#############################
+# Global Block
+#############################
+class SME(nn.Module):
+    def __init__(self,
+                 in_ch: int,
+                 kernel_size: int = 11):
+        super().__init__()
+        self.norm_1 = LayerNorm(in_ch, data_format='channels_first')
+        self.block = StripedConvFormer(in_ch=in_ch, kernel_size=kernel_size)
+        self.norm_2 = LayerNorm(in_ch, data_format='channels_first')
+        self.ffn = GatedFFN(in_ch, mlp_ratio=2, kernel_size=3, act_layer=nn.GELU())
+    def forward(self, x: torch.Tensor) -> torch.Tensor:
+        x = self.block(self.norm_1(x)) + x
+        x = self.ffn(self.norm_2(x)) + x
+        return x
+class StripedConvFormer(nn.Module):
+    def __init__(self,
+                 in_ch: int,
+                 kernel_size: int):
+        super().__init__()
+        self.in_ch = in_ch
+        self.kernel_size = kernel_size
+        self.padding = kernel_size // 2
+        self.proj = nn.Conv2d(in_ch, in_ch, kernel_size=1, padding=0)
+        self.to_qv = nn.Sequential(
+            nn.Conv2d(in_ch, in_ch * 2, kernel_size=1, padding=0),
+            nn.GELU(),
+        )
+        self.attn = StripedConv2d(in_ch, kernel_size=kernel_size, depthwise=True)
+    def forward(self, x: torch.Tensor) -> torch.Tensor:
+        q, v = self.to_qv(x).chunk(2, dim=1)
+        q = self.attn(q)
+        x = self.proj(q * v)
+        return x
+#############################
+# Local Blocks
+#############################
+class RME(nn.Module):
+    def __init__(self,
+                 in_ch: int,
+                 num_experts: int,
+                 topk: int,
+                 lr_space: int = 1,
+                 recursive: int = 2,
+                 use_shuffle: bool = False,):
+        super().__init__()
+        self.norm_1 = LayerNorm(in_ch, data_format='channels_first')
+        self.block = MoEBlock(in_ch=in_ch, num_experts=num_experts, topk=topk, use_shuffle=use_shuffle, recursive=recursive, lr_space=lr_space,)
+        self.norm_2 = LayerNorm(in_ch, data_format='channels_first')
+        self.ffn = GatedFFN(in_ch, mlp_ratio=2, kernel_size=3, act_layer=nn.GELU())
+    def forward(self, x: torch.Tensor) -> torch.Tensor:
+        x = self.block(self.norm_1(x)) + x
+        x = self.ffn(self.norm_2(x)) + x
+        return x
+#################
+# MoE Layer
+#################
+class MoEBlock(nn.Module):
+    def __init__(self,
+                 in_ch: int,
+                 num_experts: int,
+                 topk: int,
+                 use_shuffle: bool = False,
+                 lr_space: str = "linear",
+                 recursive: int = 2):
+        super().__init__()
+        self.use_shuffle = use_shuffle
+        self.recursive = recursive
+        self.conv_1 = nn.Sequential(
+            nn.Conv2d(in_ch, in_ch, kernel_size=3, padding=1),
+            nn.GELU(),
+            nn.Conv2d(in_ch, 2*in_ch, kernel_size=1, padding=0)
+        )
+        self.agg_conv = nn.Sequential(
+            nn.Conv2d(in_ch, in_ch, kernel_size=4, stride=4, groups=in_ch),
+            nn.GELU())
+        self.conv = nn.Sequential(
+            nn.Conv2d(in_ch, in_ch, kernel_size=3, stride=1, padding=1, groups=in_ch),
+            nn.Conv2d(in_ch, in_ch, kernel_size=1, padding=0)
+        )
+        self.conv_2 = nn.Sequential(
+            StripedConv2d(in_ch, kernel_size=3, depthwise=True),
+            nn.GELU())
+        if lr_space == "linear":
+            grow_func = lambda i: i+2
+        elif lr_space == "exp":
+            grow_func = lambda i: 2**(i+1)
+        elif lr_space == "double":
+            grow_func = lambda i: 2*i+2
+        else:
+            raise NotImplementedError(f"lr_space {lr_space} not implemented")
+        self.moe_layer = MoELayer(
+            experts=[Expert(in_ch=in_ch, low_dim=grow_func(i)) for i in range(num_experts)], # add here multiple of 2 as low_dim
+            gate=Router(in_ch=in_ch, num_experts=num_experts),
+            num_expert=topk,
+        )
+        self.proj = nn.Conv2d(in_ch, in_ch, kernel_size=1, padding=0)
+    def calibrate(self, x: torch.Tensor) -> torch.Tensor:
+        b, c, h, w = x.shape
+        res = x
+        for _ in range(self.recursive):
+            x = self.agg_conv(x)
+        x = self.conv(x)
+        x = F.interpolate(x, size=(h, w), mode="bilinear", align_corners=False)
+        return res + x
+    def forward(self, x: torch.Tensor) -> torch.Tensor:
+        x = self.conv_1(x)
+        if self.use_shuffle:
+            x = channel_shuffle(x, groups=2)
+        x, k = torch.chunk(x, chunks=2, dim=1)
+        x = self.conv_2(x)
+        k = self.calibrate(k)
+        x = self.moe_layer(x, k)
+        x = self.proj(x)
+        return x
+class MoELayer(nn.Module):
+    def __init__(self, experts: List[nn.Module], gate: nn.Module, num_expert: int = 1):
+        super().__init__()
+        assert len(experts) > 0
+        self.experts = nn.ModuleList(experts)
+        self.gate = gate
+        self.num_expert = num_expert
+    def forward(self, inputs: torch.Tensor, k: torch.Tensor):
+        out = self.gate(inputs)
+        weights = F.softmax(out, dim=1, dtype=torch.float).to(inputs.dtype)
+        topk_weights, topk_experts = torch.topk(weights, self.num_expert)
+        out = inputs.clone()
+        if self.training:
+            exp_weights = torch.zeros_like(weights)
+            exp_weights.scatter_(1, topk_experts, weights.gather(1, topk_experts))
+            for i, expert in enumerate(self.experts):
+                out += expert(inputs, k) * exp_weights[:, i:i+1, None, None]
+        else:
+            selected_experts = [self.experts[i] for i in topk_experts.squeeze(dim=0)]
+            for i, expert in enumerate(selected_experts):
+                out += expert(inputs, k) * topk_weights[:, i:i+1, None, None]
+        return out
+class Expert(nn.Module):
+    def __init__(self,
+                 in_ch: int,
+                 low_dim: int,):
+        super().__init__()
+        self.conv_1 = nn.Conv2d(in_ch, low_dim, kernel_size=1, padding=0)
+        self.conv_2 = nn.Conv2d(in_ch, low_dim, kernel_size=1, padding=0)
+        self.conv_3 = nn.Conv2d(low_dim, in_ch, kernel_size=1, padding=0)
+    def forward(self, x: torch.Tensor, k: torch.Tensor) -> torch.Tensor:
+        x = self.conv_1(x)
+        x = self.conv_2(k) * x # here no more sigmoid
+        x = self.conv_3(x)
+        return x
+class Router(nn.Module):
+    def __init__(self,
+                 in_ch: int,
+                 num_experts: int):
+        super().__init__()
+        self.body = nn.Sequential(
+            nn.AdaptiveAvgPool2d(1),
+            Rearrange('b c 1 1 -> b c'),
+            nn.Linear(in_ch, num_experts, bias=False),
+        )
+    def forward(self, x: torch.Tensor) -> torch.Tensor:
+        return self.body(x)
+#################
+# Utilities
+#################
+class StripedConv2d(nn.Module):
+    def __init__(self,
+                 in_ch: int,
+                 kernel_size: int,
+                 depthwise: bool = False):
+        super().__init__()
+        self.in_ch = in_ch
+        self.kernel_size = kernel_size
+        self.padding = kernel_size // 2
+        self.conv = nn.Sequential(
+            nn.Conv2d(in_ch, in_ch, kernel_size=(1, self.kernel_size), padding=(0, self.padding), groups=in_ch if depthwise else 1),
+            nn.Conv2d(in_ch, in_ch, kernel_size=(self.kernel_size, 1), padding=(self.padding, 0), groups=in_ch if depthwise else 1),
+        )
+    def forward(self, x: torch.Tensor) -> torch.Tensor:
+        return self.conv(x)
+def channel_shuffle(x, groups=2):
+    bat_size, channels, w, h = x.shape
+    group_c = channels // groups
+    x = x.view(bat_size, groups, group_c, w, h)
+    x = torch.transpose(x, 1, 2).contiguous()
+    x = x.view(bat_size, -1, w, h)
+    return x
+class GatedFFN(nn.Module):
+    def __init__(self,
+                 in_ch,
+                 mlp_ratio,
+                 kernel_size,
+                 act_layer,):
+        super().__init__()
+        mlp_ch = in_ch * mlp_ratio
+        self.fn_1 = nn.Sequential(
+            nn.Conv2d(in_ch, mlp_ch, kernel_size=1, padding=0),
+            act_layer,
+        )
+        self.fn_2 = nn.Sequential(
+            nn.Conv2d(in_ch, in_ch, kernel_size=1, padding=0),
+            act_layer,
+        )
+        self.gate = nn.Conv2d(mlp_ch // 2, mlp_ch // 2,
+                              kernel_size=kernel_size, padding=kernel_size // 2, groups=mlp_ch // 2)
+    def feat_decompose(self, x):
+        s = x - self.gate(x)
+        x = x + self.sigma * s
+        return x
+    def forward(self, x: torch.Tensor):
+        x = self.fn_1(x)
+        x, gate = torch.chunk(x, 2, dim=1)
+        gate = self.gate(gate)
+        x = x * gate
+        x = self.fn_2(x)
+        return x
+class LayerNorm(nn.Module):
+    r""" LayerNorm that supports two data formats: channels_last (default) or channels_first.
+    The ordering of the dimensions in the inputs. channels_last corresponds to inputs with
+    shape (batch_size, height, width, channels) while channels_first corresponds to inputs
+    with shape (batch_size, channels, height, width).
+    """
+    def __init__(self, normalized_shape, eps=1e-6, data_format="channels_last"):
+        super().__init__()
+        self.weight = nn.Parameter(torch.ones(normalized_shape))
+        self.bias = nn.Parameter(torch.zeros(normalized_shape))
+        self.eps = eps
+        self.data_format = data_format
+        if self.data_format not in ["channels_last", "channels_first"]:
+            raise NotImplementedError
+        self.normalized_shape = (normalized_shape, )
+    def forward(self, x):
+        if self.data_format == "channels_last":
+            return F.layer_norm(x, self.normalized_shape, self.weight, self.bias, self.eps)
+        elif self.data_format == "channels_first":
+            u = x.mean(1, keepdim=True)
+            s = (x - u).pow(2).mean(1, keepdim=True)
+            x = (x - u) / torch.sqrt(s + self.eps)
+            x = self.weight[:, None, None] * x + self.bias[:, None, None]
+            return x

requirements.txt ADDED Viewed

	@@ -0,0 +1,6 @@

+torch
+numpy
+PyYAML
+Pillow>=6.2.2
+gradio==4.16.0
+gradio_imageslider==0.0.18