Spaces:

vespa-engine
/

colpali-vespa-visual-retrieval

Running on T4

App Files Files Community

thomasht86 commited on 18 days ago

Commit

2034346

•

1 Parent(s): 5d22e58

Upload folder using huggingface_hub

Browse files

Files changed (12) hide show

README.md +2 -143
backend/colpali.py +236 -270
backend/stopwords.py +2 -1
backend/vespa_app.py +3 -2
frontend/app.py +71 -24
frontend/layout.py +2 -1
globals.css +65 -51
icons.py +1 -1
main.py +63 -73
output.css +145 -61
requirements.txt +1 -1
static/.DS_Store +0 -0

README.md CHANGED Viewed

@@ -9,152 +9,11 @@ sdk_version: 4.44.0
 app_file: main.py
 pinned: false
 license: apache-2.0
 models:
   - vidore/colpaligemma-3b-pt-448-base
   - vidore/colpali-v1.2
 preload_from_hub:
   - vidore/colpaligemma-3b-pt-448-base config.json,model-00001-of-00002.safetensors,model-00002-of-00002.safetensors,model.safetensors.index.json,preprocessor_config.json,special_tokens_map.json,tokenizer.json,tokenizer_config.json 12c59eb7e23bc4c26876f7be7c17760d5d3a1ffa
   - vidore/colpali-v1.2 adapter_config.json,adapter_model.safetensors,preprocessor_config.json,special_tokens_map.json,tokenizer.json,tokenizer_config.json 9912ce6f8a462d8cf2269f5606eabbd2784e764f
----
-<!-- Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. -->
-<picture>
-  <source media="(prefers-color-scheme: dark)" srcset="https://assets.vespa.ai/logos/Vespa-logo-green-RGB.svg">
-  <source media="(prefers-color-scheme: light)" srcset="https://assets.vespa.ai/logos/Vespa-logo-dark-RGB.svg">
-  <img alt="#Vespa" width="200" src="https://assets.vespa.ai/logos/Vespa-logo-dark-RGB.svg" style="margin-bottom: 25px;">
-</picture>
-# Visual Retrieval ColPali
-# Prepare data and Vespa application
-First, install `uv`:
-```bash
-curl -LsSf https://astral.sh/uv/install.sh | sh
-```
-Then, run:
-```bash
-uv sync --extra dev --extra feed
-```
-Convert the `prepare_feed_deploy.py` to notebook to:
-```bash
-jupytext --to notebook prepare_feed_deploy.py
-```
-And launch a Jupyter instance, see https://docs.astral.sh/uv/guides/integration/jupyter/ for recommended approach.
-Open and follow the `prepare_feed_deploy.ipynb` notebook to prepare the data and deploy the Vespa application.
-# Developing on the web app
-Then, in this directory, run:
-```bash
-uv sync --extra dev
-```
-This will generate a virtual environment with the required dependencies at `.venv`.
-To activate the virtual environment, run:
-```bash
-source .venv/bin/activate
-```
-And run development server:
-```bash
-python hello.py
-```
-## Preparation
-First, set up your `.env` file by renaming `.env.example` to `.env` and filling in the required values.
-(Token can be shared with 1password, `HF_TOKEN` is personal and must be created at huggingface)
-### Deploying the Vespa app
-To deploy the Vespa app, run:
-```bash
-python deploy_vespa_app.py --tenant_name mytenant --vespa_application_name myapp --token_id_write mytokenid_write --token_id_read mytokenid_read
-```
-You should get an output like:
-```bash
-Found token endpoint: https://abcde.z.vespa-app.cloud
-````
-### Feeding the data
-#### Dependencies
-In addition to the python dependencies, you also need `poppler`
-On Mac:
-```bash
-brew install poppler
-```
-First, you need to create a huggingface token, after you have accepted the term to use the model
-at https://huggingface.co/google/paligemma-3b-mix-448.
-Add the token to your environment variables as `HF_TOKEN`:
-```bash
-export HF_TOKEN=yourtoken
-```
-To feed the data, run:
-```bash
-python feed_vespa.py --vespa_app_url https://myapp.z.vespa-app.cloud --vespa_cloud_secret_token mysecrettoken
-```
-### Starting the front-end
-```bash
-python main.py
-```
-## Deploy to huggingface 🤗
-### Compiling dependencies
-Before a deploy, make sure to run this to compile the `uv` lock file to `requirements.txt` if you have made changes to the dependencies:
-```bash
-uv pip compile pyproject.toml -o requirements.txt
-```
-### Deploying to huggingface
-To deploy, run
-```bash
-huggingface-cli upload vespa-engine/colpali-vespa-visual-retrieval . . --repo-type=space
-```
-Note that you need to set `HF_TOKEN` environment variable first.
-This is personal, and must be created at [huggingface](https://huggingface.co/settings/tokens).
-Make sure the token has `write` access.
-Be aware that this will not delete existing files, only modify or add,
-see [huggingface-cli](https://huggingface.co/docs/huggingface_hub/en/guides/upload#upload-from-the-cli) for more
-information.
-### Making changes to CSS
-To make changes to output.css apply, run
-```bash
-shad4fast watch # watches all files passed through the tailwind.config.js content section
-shad4fast build # minifies the current output.css file to reduce bundle size in production.
-```

 app_file: main.py
 pinned: false
 license: apache-2.0
+suggested_hardware: t4-small
 models:
   - vidore/colpaligemma-3b-pt-448-base
   - vidore/colpali-v1.2
 preload_from_hub:
   - vidore/colpaligemma-3b-pt-448-base config.json,model-00001-of-00002.safetensors,model-00002-of-00002.safetensors,model.safetensors.index.json,preprocessor_config.json,special_tokens_map.json,tokenizer.json,tokenizer_config.json 12c59eb7e23bc4c26876f7be7c17760d5d3a1ffa
   - vidore/colpali-v1.2 adapter_config.json,adapter_model.safetensors,preprocessor_config.json,special_tokens_map.json,tokenizer.json,tokenizer_config.json 9912ce6f8a462d8cf2269f5606eabbd2784e764f
+---

backend/colpali.py CHANGED Viewed

@@ -1,308 +1,274 @@
-#!/usr/bin/env python3
 import torch
 from PIL import Image
 import numpy as np
-from typing import cast, Generator
 from pathlib import Path
 import base64
 from io import BytesIO
-from typing import Union, Tuple, List
-import matplotlib
-import matplotlib.cm as cm
 import re
 import io
-import time
-import backend.testquery as testquery
 from colpali_engine.models import ColPali, ColPaliProcessor
 from colpali_engine.utils.torch_utils import get_torch_device
-from einops import rearrange
 from vidore_benchmark.interpretability.torch_utils import (
     normalize_similarity_map_per_query_token,
 )
-from vidore_benchmark.interpretability.vit_configs import VIT_CONFIG
-matplotlib.use("Agg")
-# Prepare the colormap once to avoid recomputation
-colormap = cm.get_cmap("viridis")
-COLPALI_GEMMA_MODEL_NAME = "vidore/colpaligemma-3b-pt-448-base"
-def load_model() -> Tuple[ColPali, ColPaliProcessor]:
-    model_name = "vidore/colpali-v1.2"
-    device = get_torch_device("auto")
-    print(f"Using device: {device}")
-    # Load the model
-    model = cast(
-        ColPali,
-        ColPali.from_pretrained(
-            model_name,
             torch_dtype=torch.bfloat16 if torch.cuda.is_available() else torch.float32,
-            device_map=device,
-        ),
-    ).eval()
-    # Load the processor
-    processor = cast(ColPaliProcessor, ColPaliProcessor.from_pretrained(model_name))
-    return model, processor, device
-def load_vit_config(model):
-    # Load the ViT config
-    print(f"VIT config: {VIT_CONFIG}")
-    vit_config = VIT_CONFIG[COLPALI_GEMMA_MODEL_NAME]
-    return vit_config
-def gen_similarity_maps(
-    model: ColPali,
-    processor: ColPaliProcessor,
-    device,
-    query: str,
-    query_embs: torch.Tensor,
-    token_idx_map: dict,
-    images: List[Union[Path, str]],
-    vespa_sim_maps: List[str],
-) -> Generator[Tuple[int, str, str], None, None]:
-    """
-    Generate similarity maps for the given images and query, and return base64-encoded blended images.
-    Args:
-        model (ColPali): The model used for generating embeddings.
-        processor (ColPaliProcessor): Processor for images and text.
-        device: Device to run the computations on.
-        vit_config: Configuration for the Vision Transformer.
-        query (str): The query string.
-        query_embs (torch.Tensor): Query embeddings.
-        token_idx_map (dict): Mapping from indices to tokens.
-        images (List[Union[Path, str]]): List of image paths or base64-encoded strings.
-        vespa_sim_maps (List[str]): List of Vespa similarity maps.
-    Yields:
-        Tuple[int, str, str]: A tuple containing the image index, the selected token, and the base64-encoded image.
-    """
-    vit_config = load_vit_config(model)
-    # Process images and store original images and sizes
-    processed_images = []
-    original_images = []
-    original_sizes = []
-    for img in images:
-        if isinstance(img, Path):
-            try:
-                img_pil = Image.open(img).convert("RGB")
-            except Exception as e:
-                raise ValueError(f"Failed to open image from path: {e}")
-        elif isinstance(img, str):
-            try:
-                img_pil = Image.open(BytesIO(base64.b64decode(img))).convert("RGB")
-            except Exception as e:
-                raise ValueError(f"Failed to open image from base64 string: {e}")
-        else:
-            raise ValueError(f"Unsupported image type: {type(img)}")
-        original_images.append(img_pil.copy())
-        original_sizes.append(img_pil.size)  # (width, height)
-        processed_images.append(img_pil)
-    # If similarity maps are provided, use them instead of computing them
-    if vespa_sim_maps:
-        print("Using provided similarity maps")
-        # A sim map looks like this:
-        # "quantized": [
-        #      {
-        #        "address": {
-        #          "patch": "0",
-        #          "querytoken": "0"
-        #        },
-        #        "value": 12, # score in range [-128, 127]
-        #      },
-        # ... and so on.
-        # Now turn these into a tensor of same shape as previous similarity map
         vespa_sim_map_tensor = torch.zeros(
-            (
-                len(vespa_sim_maps),
-                query_embs.size(dim=1),
-                vit_config.n_patch_per_dim,
-                vit_config.n_patch_per_dim,
-            )
         )
         for idx, vespa_sim_map in enumerate(vespa_sim_maps):
             for cell in vespa_sim_map["quantized"]["cells"]:
                 patch = int(cell["address"]["patch"])
-                # if dummy model then just use 1024 as the image_seq_length
-                if hasattr(processor, "image_seq_length"):
-                    image_seq_length = processor.image_seq_length
                 else:
                     image_seq_length = 1024
                 if patch >= image_seq_length:
                     continue
-                query_token = int(cell["address"]["querytoken"])
-                value = cell["value"]
                 vespa_sim_map_tensor[
                     idx,
-                    int(query_token),
-                    int(patch) // vit_config.n_patch_per_dim,
-                    int(patch) % vit_config.n_patch_per_dim,
                 ] = value
-        # Normalize the similarity map per query token
-        similarity_map_normalized = normalize_similarity_map_per_query_token(
-            vespa_sim_map_tensor
         )
-    else:
-        # Preprocess inputs
-        print("Computing similarity maps")
-        start2 = time.perf_counter()
-        input_image_processed = processor.process_images(processed_images).to(device)
-        # Forward passes
-        with torch.no_grad():
-            output_image = model.forward(**input_image_processed)
-        # Remove the special tokens from the output
-        output_image = output_image[:, : processor.image_seq_length, :]
-        # Rearrange the output image tensor to represent the 2D grid of patches
-        output_image = rearrange(
-            output_image,
-            "b (h w) c -> b h w c",
-            h=vit_config.n_patch_per_dim,
-            w=vit_config.n_patch_per_dim,
         )
-        # Ensure query_embs has batch dimension
-        if query_embs.dim() == 2:
-            query_embs = query_embs.unsqueeze(0).to(device)
-        else:
-            query_embs = query_embs.to(device)
-        # Compute the similarity map
-        similarity_map = torch.einsum(
-            "bnk,bhwk->bnhw", query_embs, output_image
-        )  # Shape: (batch_size, query_tokens, h, w)
-        end2 = time.perf_counter()
-        print(f"Similarity map computation took: {end2 - start2} s")
-        # Normalize the similarity map per query token
-        similarity_map_normalized = normalize_similarity_map_per_query_token(
-            similarity_map
         )
-    # Collect the blended images
-    start3 = time.perf_counter()
-    for idx, img in enumerate(original_images):
-        SCALING_FACTOR = 8
-        sim_map_resolution = (
-            max(32, int(original_sizes[idx][0] / SCALING_FACTOR)),
-            max(32, int(original_sizes[idx][1] / SCALING_FACTOR)),
         )
-        result_per_image = {}
-        for token_idx, token in token_idx_map.items():
-            if should_filter_token(token):
-                continue
-            # Get the similarity map for this image and the selected token
-            sim_map = similarity_map_normalized[idx, token_idx, :, :]  # Shape: (h, w)
-            # Move the similarity map to CPU, convert to float (as BFloat16 not supported by Numpy) and convert to NumPy array
-            sim_map_np = sim_map.cpu().float().numpy()
-            # Resize the similarity map to the original image size
-            sim_map_img = Image.fromarray(sim_map_np)
-            sim_map_resized = sim_map_img.resize(
-                sim_map_resolution, resample=Image.BICUBIC
-            )
-            # Convert the resized similarity map to a NumPy array
-            sim_map_resized_np = np.array(sim_map_resized, dtype=np.float32)
-            # Normalize the similarity map to range [0, 1]
-            sim_map_min = sim_map_resized_np.min()
-            sim_map_max = sim_map_resized_np.max()
-            if sim_map_max - sim_map_min > 1e-6:
-                sim_map_normalized = (sim_map_resized_np - sim_map_min) / (
-                    sim_map_max - sim_map_min
-                )
-            else:
-                sim_map_normalized = np.zeros_like(sim_map_resized_np)
-            # Apply a colormap to the normalized similarity map
-            heatmap = colormap(sim_map_normalized)  # Returns an RGBA array
-            # Convert the heatmap to a PIL Image
-            heatmap_uint8 = (heatmap * 255).astype(np.uint8)
-            heatmap_img = Image.fromarray(heatmap_uint8)
-            heatmap_img_rgba = heatmap_img.convert("RGBA")
-            # Save the image to a BytesIO buffer
-            buffer = io.BytesIO()
-            heatmap_img_rgba.save(buffer, format="PNG")
-            buffer.seek(0)
-            # Encode the image to base64
-            blended_img_base64 = base64.b64encode(buffer.read()).decode("utf-8")
-            # Store the base64-encoded image
-            result_per_image[token] = blended_img_base64
-            yield idx, token, token_idx, blended_img_base64
-    end3 = time.perf_counter()
-    print(f"Blending images took: {end3 - start3} s")
-def get_query_embeddings_and_token_map(
-    processor, model, query
-) -> Tuple[torch.Tensor, dict]:
-    if model is None:  # use static test query data (saves time when testing)
-        return testquery.q_embs, testquery.idx_to_token
-    start_time = time.perf_counter()
-    inputs = processor.process_queries([query]).to(model.device)
-    with torch.no_grad():
-        embeddings_query = model(**inputs)
-        q_emb = embeddings_query.to("cpu")[0]  # Extract the single embedding
-    # Use this cell output to choose a token using its index
-    query_tokens = processor.tokenizer.tokenize(processor.decode(inputs.input_ids[0]))
-    # reverse key, values in dictionary
-    print(query_tokens)
-    idx_to_token = {idx: val for idx, val in enumerate(query_tokens)}
-    end_time = time.perf_counter()
-    print(f"Query inference took: {end_time - start_time} s")
-    return q_emb, idx_to_token
-def should_filter_token(token: str) -> bool:
-    # Pattern to match tokens that start with '<', numbers, whitespace, special characters (except ▁), or the string 'Question'
-    # Will exclude these tokens from the similarity map generation
-    # Does NOT match:
-    # 2
-    # 0
-    # 2
-    # 3
-    # ▁2
-    # ▁hi
-    #
-    # Do match:
-    # <bos>
-    # Question
-    # :
-    # _Percentage
-    # <pad>
-    # \n
-    # ▁
-    # ?
-    # )
-    # %
-    # /)
-    pattern = re.compile(r"^<.*$|^\s+$|^(?!.*\d)(?!▁)\S+$|^Question$|^▁$")
-    if pattern.match(token):
-        return True
-    return False

 import torch
 from PIL import Image
 import numpy as np
+from typing import Generator, Tuple, List, Union, Dict
 from pathlib import Path
 import base64
 from io import BytesIO
 import re
 import io
+import matplotlib.cm as cm
 from colpali_engine.models import ColPali, ColPaliProcessor
 from colpali_engine.utils.torch_utils import get_torch_device
 from vidore_benchmark.interpretability.torch_utils import (
     normalize_similarity_map_per_query_token,
 )
+class SimMapGenerator:
+    """
+    Generates similarity maps based on query embeddings and image patches using the ColPali model.
+    """
+    COLPALI_GEMMA_MODEL_NAME = "vidore/colpaligemma-3b-pt-448-base"
+    colormap = cm.get_cmap("viridis")  # Preload colormap for efficiency
+    def __init__(self, model_name: str = "vidore/colpali-v1.2", n_patch: int = 32):
+        """
+        Initializes the SimMapGenerator class with a specified model and patch dimension.
+        Args:
+            model_name (str): The model name for loading the ColPali model.
+            n_patch (int): The number of patches per dimension.
+        """
+        self.model_name = model_name
+        self.n_patch = n_patch
+        self.device = get_torch_device("auto")
+        print(f"Using device: {self.device}")
+        self.model, self.processor = self.load_model()
+    def load_model(self) -> Tuple[ColPali, ColPaliProcessor]:
+        """
+        Loads the ColPali model and processor.
+        Returns:
+            Tuple[ColPali, ColPaliProcessor]: Loaded model and processor.
+        """
+        model = ColPali.from_pretrained(
+            self.model_name,
             torch_dtype=torch.bfloat16 if torch.cuda.is_available() else torch.float32,
+            device_map=self.device,
+        ).eval()
+        processor = ColPaliProcessor.from_pretrained(self.model_name)
+        return model, processor
+    def gen_similarity_maps(
+        self,
+        query: str,
+        query_embs: torch.Tensor,
+        token_idx_map: Dict[int, str],
+        images: List[Union[Path, str]],
+        vespa_sim_maps: List[Dict],
+    ) -> Generator[Tuple[int, str, str], None, None]:
+        """
+        Generates similarity maps for the provided images and query, and returns base64-encoded blended images.
+        Args:
+            query (str): The query string.
+            query_embs (torch.Tensor): Query embeddings tensor.
+            token_idx_map (dict): Mapping from indices to tokens.
+            images (List[Union[Path, str]]): List of image paths or base64-encoded strings.
+            vespa_sim_maps (List[Dict]): List of Vespa similarity maps.
+        Yields:
+            Tuple[int, str, str]: A tuple containing the image index, selected token, and base64-encoded image.
+        """
+        processed_images, original_images, original_sizes = [], [], []
+        for img in images:
+            img_pil = self._load_image(img)
+            original_images.append(img_pil.copy())
+            original_sizes.append(img_pil.size)
+            processed_images.append(img_pil)
+        vespa_sim_map_tensor = self._prepare_similarity_map_tensor(
+            query_embs, vespa_sim_maps
+        )
+        similarity_map_normalized = normalize_similarity_map_per_query_token(
+            vespa_sim_map_tensor
+        )
+        for idx, img in enumerate(original_images):
+            for token_idx, token in token_idx_map.items():
+                if self.should_filter_token(token):
+                    continue
+                sim_map = similarity_map_normalized[idx, token_idx, :, :]
+                blended_img_base64 = self._blend_image(
+                    img, sim_map, original_sizes[idx]
+                )
+                yield idx, token, token_idx, blended_img_base64
+    def _load_image(self, img: Union[Path, str]) -> Image:
+        """
+        Loads an image from a file path or a base64-encoded string.
+        Args:
+            img (Union[Path, str]): The image to load.
+        Returns:
+            Image: The loaded PIL image.
+        """
+        try:
+            if isinstance(img, Path):
+                return Image.open(img).convert("RGB")
+            elif isinstance(img, str):
+                return Image.open(BytesIO(base64.b64decode(img))).convert("RGB")
+        except Exception as e:
+            raise ValueError(f"Failed to load image: {e}")
+    def _prepare_similarity_map_tensor(
+        self, query_embs: torch.Tensor, vespa_sim_maps: List[Dict]
+    ) -> torch.Tensor:
+        """
+        Prepares a similarity map tensor from Vespa similarity maps.
+        Args:
+            query_embs (torch.Tensor): Query embeddings tensor.
+            vespa_sim_maps (List[Dict]): List of Vespa similarity maps.
+        Returns:
+            torch.Tensor: The prepared similarity map tensor.
+        """
         vespa_sim_map_tensor = torch.zeros(
+            (len(vespa_sim_maps), query_embs.size(1), self.n_patch, self.n_patch)
         )
         for idx, vespa_sim_map in enumerate(vespa_sim_maps):
             for cell in vespa_sim_map["quantized"]["cells"]:
                 patch = int(cell["address"]["patch"])
+                query_token = int(cell["address"]["querytoken"])
+                value = cell["value"]
+                if hasattr(self.processor, "image_seq_length"):
+                    image_seq_length = self.processor.image_seq_length
                 else:
                     image_seq_length = 1024
                 if patch >= image_seq_length:
                     continue
                 vespa_sim_map_tensor[
                     idx,
+                    query_token,
+                    patch // self.n_patch,
+                    patch % self.n_patch,
                 ] = value
+        return vespa_sim_map_tensor
+    def _blend_image(
+        self, img: Image, sim_map: torch.Tensor, original_size: Tuple[int, int]
+    ) -> str:
+        """
+        Blends an image with a similarity map and encodes it to base64.
+        Args:
+            img (Image): The original image.
+            sim_map (torch.Tensor): The similarity map tensor.
+            original_size (Tuple[int, int]): The original size of the image.
+        Returns:
+            str: The base64-encoded blended image.
+        """
+        SCALING_FACTOR = 8
+        sim_map_resolution = (
+            max(32, int(original_size[0] / SCALING_FACTOR)),
+            max(32, int(original_size[1] / SCALING_FACTOR)),
         )
+        sim_map_np = sim_map.cpu().float().numpy()
+        sim_map_img = Image.fromarray(sim_map_np).resize(
+            sim_map_resolution, resample=Image.BICUBIC
         )
+        sim_map_resized_np = np.array(sim_map_img, dtype=np.float32)
+        sim_map_normalized = self._normalize_sim_map(sim_map_resized_np)
+        heatmap = self.colormap(sim_map_normalized)
+        heatmap_img = Image.fromarray((heatmap * 255).astype(np.uint8)).convert("RGBA")
+        buffer = io.BytesIO()
+        heatmap_img.save(buffer, format="PNG")
+        return base64.b64encode(buffer.getvalue()).decode("utf-8")
+    @staticmethod
+    def _normalize_sim_map(sim_map: np.ndarray) -> np.ndarray:
+        """
+        Normalizes a similarity map to range [0, 1].
+        Args:
+            sim_map (np.ndarray): The similarity map.
+        Returns:
+            np.ndarray: The normalized similarity map.
+        """
+        sim_map_min, sim_map_max = sim_map.min(), sim_map.max()
+        if sim_map_max - sim_map_min > 1e-6:
+            return (sim_map - sim_map_min) / (sim_map_max - sim_map_min)
+        return np.zeros_like(sim_map)
+    @staticmethod
+    def should_filter_token(token: str) -> bool:
+        """
+        Determines if a token should be filtered out based on predefined patterns.
+        The function filters out tokens that:
+            - Start with '<' (e.g., '<bos>')
+            - Consist entirely of whitespace
+            - Are purely punctuation (excluding tokens that contain digits or start with '▁')
+            - Start with an underscore '_'
+            - Exactly match the word 'Question'
+            - Are exactly the single character '▁'
+        Output of test:
+            Token: '2'         | False
+            Token: '0'         | False
+            Token: '2'         | False
+            Token: '3'         | False
+            Token: '▁2'        | False
+            Token: '▁hi'       | False
+            Token: 'norwegian' | False
+            Token: 'unlisted'  | False
+            Token: '<bos>'     | True
+            Token: 'Question'  | True
+            Token: ':'         | True
+            Token: '<pad>'     | True
+            Token: '\n'        | True
+            Token: '▁'         | True
+            Token: '?'         | True
+            Token: ')'         | True
+            Token: '%'         | True
+            Token: '/)'        | True
+        Args:
+            token (str): The token to check.
+        Returns:
+            bool: True if the token should be filtered out, False otherwise.
+        """
+        pattern = re.compile(
+            r"^<.*$|^\s+$|^(?!.*\d)(?!▁)[^\w\s]+$|^_.*$|^Question$|^▁$"
         )
+        return bool(pattern.match(token))
+    # TODO: Would be nice to @lru_cache this method.
+    def get_query_embeddings_and_token_map(
+        self, query: str
+    ) -> Tuple[torch.Tensor, dict]:
+        """
+        Retrieves query embeddings and a token index map.
+        Args:
+            query (str): The query string.
+        Returns:
+            Tuple[torch.Tensor, dict]: Query embeddings and token index map.
+        """
+        inputs = self.processor.process_queries([query]).to(self.model.device)
+        with torch.no_grad():
+            q_emb = self.model(**inputs).to("cpu")[0]
+        query_tokens = self.processor.tokenizer.tokenize(
+            self.processor.decode(inputs.input_ids[0])
         )
+        idx_to_token = {idx: token for idx, token in enumerate(query_tokens)}
+        return q_emb, idx_to_token

backend/stopwords.py CHANGED Viewed

@@ -6,6 +6,7 @@ if not spacy.util.is_package("en_core_web_sm"):
     spacy.cli.download("en_core_web_sm")
 nlp = spacy.load("en_core_web_sm")
 # It would be possible to remove bolding for stopwords without removing them from the query,
 # but that would require a java plugin which we didn't want to complicate this sample app with.
 def filter(text):
@@ -14,4 +15,4 @@ def filter(text):
     if len(tokens) == 0:
         # if we remove all the words we don't have a query at all, so use the original
         return text
-    return " ".join(tokens)

     spacy.cli.download("en_core_web_sm")
 nlp = spacy.load("en_core_web_sm")
 # It would be possible to remove bolding for stopwords without removing them from the query,
 # but that would require a java plugin which we didn't want to complicate this sample app with.
 def filter(text):
     if len(tokens) == 0:
         # if we remove all the words we don't have a query at all, so use the original
         return text
+    return " ".join(tokens)

backend/vespa_app.py CHANGED Viewed

@@ -7,9 +7,10 @@ import torch
 from dotenv import load_dotenv
 from vespa.application import Vespa
 from vespa.io import VespaQueryResponse
-from .colpali import should_filter_token
 import backend.stopwords
 class VespaQueryClient:
     MAX_QUERY_TERMS = 64
     VESPA_SCHEMA_NAME = "pdf_page"
@@ -364,7 +365,7 @@ class VespaQueryClient:
         fields_to_add = [
             f"sim_map_{token}_{idx}"
             for idx, token in idx_to_token.items()
-            if not should_filter_token(token)
         ]
         for child in result["root"]["children"]:
             for sim_map_key in fields_to_add:

 from dotenv import load_dotenv
 from vespa.application import Vespa
 from vespa.io import VespaQueryResponse
+from .colpali import SimMapGenerator
 import backend.stopwords
 class VespaQueryClient:
     MAX_QUERY_TERMS = 64
     VESPA_SCHEMA_NAME = "pdf_page"
         fields_to_add = [
             f"sim_map_{token}_{idx}"
             for idx, token in idx_to_token.items()
+            if not SimMapGenerator.should_filter_token(token)
         ]
         for child in result["root"]["children"]:
             for sim_map_key in fields_to_add:

frontend/app.py CHANGED Viewed

@@ -1,7 +1,7 @@
 from typing import Optional
 from urllib.parse import quote_plus
-from fasthtml.components import H1, H2, Div, Form, Img, NotStr, P, Span, H3, Br
 from fasthtml.xtend import A, Script
 from lucide_fasthtml import Lucide
 from shad4fast import Badge, Button, Input, Label, RadioGroup, RadioGroupItem, Separator
@@ -154,7 +154,7 @@ def SearchBox(with_border=False, query_value="", ranking_value="nn+colpali"):
                 name="query",
                 value=query_value,
                 id="search-input",
-                cls="text-base pl-10 border-transparent ring-offset-transparent ring-0 focus-visible:ring-transparent awesomplete",
                 data_list="#suggestions",
                 style="font-size: 1rem",
                 autofocus=True,
@@ -366,7 +366,23 @@ def SimMapButtonPoll(query_id, idx, token, token_idx):
     )
-def SearchResult(results: list, query_id: Optional[str] = None):
     if not results:
         return Div(
             P(
@@ -376,10 +392,13 @@ def SearchResult(results: list, query_id: Optional[str] = None):
             cls="grid p-10",
         )
     # Otherwise, display the search results
     result_items = []
     for idx, result in enumerate(results):
         fields = result["fields"]  # Extract the 'fields' part of each result
         blur_image_base64 = f"data:image/jpeg;base64,{fields['blur_image']}"
         sim_map_fields = {
@@ -472,7 +491,7 @@ def SearchResult(results: list, query_id: Optional[str] = None):
                                 Div(
                                     Img(
                                         src=blur_image_base64,
-                                        hx_get=f"/full_image?docid={fields['id']}&query_id={query_id}&idx={idx}",
                                         style="backdrop-filter: blur(5px);",
                                         hx_trigger="load",
                                         hx_swap="outerHTML",
@@ -493,9 +512,12 @@ def SearchResult(results: list, query_id: Optional[str] = None):
                     ),
                     Div(
                         Div(
-                            P(
-                                "Page " + str(fields["page_number"]),
-                                cls="text-foreground font-mono bold text-sm",
                             ),
                             cls="flex items-center justify-end",
                         ),
@@ -504,7 +526,10 @@ def SearchResult(results: list, query_id: Optional[str] = None):
                                 Div(
                                     Div(
                                         Div(
-                                            H3("Dynamic summary", cls="text-base font-semibold"),
                                             P(
                                                 NotStr(fields.get("snippet", "")),
                                                 cls="text-highlight text-muted-foreground",
@@ -517,23 +542,28 @@ def SearchResult(results: list, query_id: Optional[str] = None):
                                     Div(
                                         Div(
                                             Div(
-                                                H3("Full text", cls="text-base font-semibold"),
                                                 Div(
                                                     P(
                                                         NotStr(fields.get("text", "")),
                                                         cls="text-highlight text-muted-foreground",
                                                     ),
-                                                    Br()
                                                 ),
                                                 cls="grid grid-rows-[auto_0px] content-start gap-y-3",
                                             ),
                                             id=f"result-text-full-{idx}",
                                             cls="grid gap-y-3 p-8 border border-dashed",
                                         ),
-                                        Div(cls="absolute inset-x-0 bottom-0 bg-gradient-to-t from-white dark:from-slate-900 pt-[7%]"),
-                                        cls="relative grid"
                                     ),
-                                    cls="grid grid-rows-[1fr_1fr] gap-y-8 p-8 text-sm",
                                 ),
                                 cls="grid bg-background",
                             ),
@@ -545,11 +575,13 @@ def SearchResult(results: list, query_id: Optional[str] = None):
                     id=f"image-text-columns-{idx}",
                     cls="relative grid grid-cols-1 border-t grid-image-text-columns",
                 ),
-                cls="grid grid-cols-1 grid-rows-[auto_1fr]",
             ),
         )
-    return Div(
         *result_items,
         image_swapping,
         toggle_text_content,
@@ -559,22 +591,37 @@ def SearchResult(results: list, query_id: Optional[str] = None):
     )
-def ChatResult(query_id: str, query: str):
     return Div(
         Div("AI-response (Gemini-8B)", cls="text-xl font-semibold p-5"),
         Div(
             Div(
-                Div(
-                    LoadingSkeleton(),
-                    hx_ext="sse",
-                    sse_connect=f"/get-message?query_id={query_id}&query={quote_plus(query)}",
-                    sse_swap="message",
-                    sse_close="close",
-                    hx_swap="innerHTML",
-                ),
             ),
             id="chat-messages",
             cls="overflow-auto min-h-0 grid items-end px-5",
         ),
         cls="h-full grid grid-rows-[auto_1fr_auto] min-h-0 gap-3",
     )

 from typing import Optional
 from urllib.parse import quote_plus
+from fasthtml.components import H1, H2, H3, Br, Div, Form, Img, NotStr, P, Span
 from fasthtml.xtend import A, Script
 from lucide_fasthtml import Lucide
 from shad4fast import Badge, Button, Input, Label, RadioGroup, RadioGroupItem, Separator
                 name="query",
                 value=query_value,
                 id="search-input",
+                cls="text-base pl-10 border-transparent ring-offset-transparent ring-0 focus-visible:ring-transparent bg-white dark:bg-background awesomplete",
                 data_list="#suggestions",
                 style="font-size: 1rem",
                 autofocus=True,
     )
+def SearchInfo(search_time, total_count):
+    return (
+        Div(
+            NotStr(
+                f"<span>Found <strong>{total_count}</strong> results in <strong>{search_time}</strong> seconds.</span>"
+            ),
+            cls="grid bg-background border-t text-sm text-center p-3",
+        ),
+    )
+def SearchResult(
+    results: list,
+   query: str, query_id: Optional[str] = None,
+    search_time: float = 0,
+    total_count: int = 0,
+):
     if not results:
         return Div(
             P(
             cls="grid p-10",
         )
+    doc_ids = []
     # Otherwise, display the search results
     result_items = []
     for idx, result in enumerate(results):
         fields = result["fields"]  # Extract the 'fields' part of each result
+        doc_id = fields["id"]
+        doc_ids.append(doc_id)
         blur_image_base64 = f"data:image/jpeg;base64,{fields['blur_image']}"
         sim_map_fields = {
                                 Div(
                                     Img(
                                         src=blur_image_base64,
+                                        hx_get=f"/full_image?doc_id={doc_id}",
                                         style="backdrop-filter: blur(5px);",
                                         hx_trigger="load",
                                         hx_swap="outerHTML",
                     ),
                     Div(
                         Div(
+                            A(
+                                Lucide(icon="external-link", size="18"),
+                                f"PDF Source (Page {fields['page_number']})",
+                                href=f"{fields['url']}#page={fields['page_number'] + 1}",
+                                target="_blank",
+                                cls="flex items-center gap-1.5 font-mono bold text-sm",
                             ),
                             cls="flex items-center justify-end",
                         ),
                                 Div(
                                     Div(
                                         Div(
+                                            H3(
+                                                "Dynamic summary",
+                                                cls="text-base font-semibold",
+                                            ),
                                             P(
                                                 NotStr(fields.get("snippet", "")),
                                                 cls="text-highlight text-muted-foreground",
                                     Div(
                                         Div(
                                             Div(
+                                                H3(
+                                                    "Full text",
+                                                    cls="text-base font-semibold",
+                                                ),
                                                 Div(
                                                     P(
                                                         NotStr(fields.get("text", "")),
                                                         cls="text-highlight text-muted-foreground",
                                                     ),
+                                                    Br(),
                                                 ),
                                                 cls="grid grid-rows-[auto_0px] content-start gap-y-3",
                                             ),
                                             id=f"result-text-full-{idx}",
                                             cls="grid gap-y-3 p-8 border border-dashed",
                                         ),
+                                        Div(
+                                            cls="absolute inset-x-0 bottom-0 bg-gradient-to-t from-[#fcfcfd] dark:from-[#1c2024] pt-[7%]"
+                                        ),
+                                        cls="relative grid",
                                     ),
+                                    cls="grid grid-rows-[1fr_1fr] xl:grid-rows-[1fr_2fr] gap-y-8 p-8 text-sm",
                                 ),
                                 cls="grid bg-background",
                             ),
                     id=f"image-text-columns-{idx}",
                     cls="relative grid grid-cols-1 border-t grid-image-text-columns",
                 ),
+                cls="grid grid-cols-1 grid-rows-[auto_auto_1fr]",
             ),
         )
+    return [
+        Div(
+            SearchInfo(search_time, total_count),
         *result_items,
         image_swapping,
         toggle_text_content,
     )
+,
+        Div(
+            ChatResult(query_id=query_id, query=query, doc_ids=doc_ids),
+            hx_swap_oob="true",
+            id="chat_messages",
+        ),
+    ]
+def ChatResult(query_id: str, query: str, doc_ids: Optional[list] = None):
+    messages = Div(LoadingSkeleton())
+    if doc_ids:
+        messages = Div(
+            LoadingSkeleton(),
+            hx_ext="sse",
+            sse_connect=f"/get-message?query_id={query_id}&doc_ids={','.join(doc_ids)}&query={quote_plus(query)}",
+            sse_swap="message",
+            sse_close="close",
+            hx_swap="innerHTML",
+        )
     return Div(
         Div("AI-response (Gemini-8B)", cls="text-xl font-semibold p-5"),
         Div(
             Div(
+                messages,
             ),
             id="chat-messages",
             cls="overflow-auto min-h-0 grid items-end px-5",
         ),
+        id="chat_messages",
         cls="h-full grid grid-rows-[auto_1fr_auto] min-h-0 gap-3",
     )

frontend/layout.py CHANGED Viewed

@@ -151,7 +151,7 @@ def Links():
     )
-def Layout(*c, **kwargs):
     return (
         Title("Visual Retrieval ColPali"),
         Body(
@@ -162,6 +162,7 @@ def Layout(*c, **kwargs):
             ),
             *c,
             **kwargs,
             cls="grid grid-rows-[minmax(0,55px)_minmax(0,1fr)] min-h-0",
         ),
         layout_script,

     )
+def Layout(*c, is_home=False, **kwargs):
     return (
         Title("Visual Retrieval ColPali"),
         Body(
             ),
             *c,
             **kwargs,
+            data_is_home=str(is_home).lower(),
             cls="grid grid-rows-[minmax(0,55px)_minmax(0,1fr)] min-h-0",
         ),
         layout_script,

globals.css CHANGED Viewed

@@ -5,58 +5,57 @@
 @layer base {
     :root {
-        --background: 0 0% 100%;
-        --foreground: 222.2 84% 4.9%;
-        --card: 0 0% 100%;
-        --card-foreground: 222.2 84% 4.9%;
-        --popover: 0 0% 100%;
-        --popover-foreground: 222.2 84% 4.9%;
-        --primary: 222.2 47.4% 11.2%;
-        --primary-foreground: 210 40% 98%;
-        --secondary: 210 40% 96.1%;
-        --secondary-foreground: 222.2 47.4% 11.2%;
-        --muted: 210 40% 96.1%;
-        --muted-foreground: 215.4 16.3% 26.9%;
-        --accent: 210 40% 96.1%;
-        --accent-foreground: 222.2 47.4% 11.2%;
-        --destructive: 0 84.2% 60.2%;
-        --destructive-foreground: 210 40% 98%;
-        --border: 214.3 31.8% 81.4%;
-        --input: 214.3 31.8% 81.4%;
-        --ring: 222.2 84% 4.9%;
-        --radius: 0.5rem;
-        --chart-1: 12 76% 61%;
-        --chart-2: 173 58% 39%;
-        --chart-3: 197 37% 24%;
-        --chart-4: 43 74% 66%;
-        --chart-5: 27 87% 67%;
     }
     .dark {
-        --background: 222.2 84% 4.9%;
-        --foreground: 210 40% 98%;
-        --card: 222.2 84% 4.9%;
-        --card-foreground: 210 40% 98%;
-        --popover: 222.2 84% 4.9%;
-        --popover-foreground: 210 40% 98%;
-        --primary: 210 40% 98%;
-        --primary-foreground: 222.2 47.4% 11.2%;
-        --secondary: 217.2 32.6% 17.5%;
-        --secondary-foreground: 210 40% 98%;
-        --muted: 217.2 32.6% 17.5%;
-        --muted-foreground: 215 20.2% 85.1%;
-        --accent: 217.2 32.6% 17.5%;
-        --accent-foreground: 210 40% 98%;
-        --destructive: 0 62.8% 30.6%;
-        --destructive-foreground: 210 40% 98%;
-        --border: 217.2 32.6% 27.5%;
-        --input: 217.2 32.6% 27.5%;
-        --ring: 212.7 26.8% 83.9;
-        --chart-1: 220 70% 50%;
-        --chart-2: 160 60% 45%;
-        --chart-3: 30 80% 55%;
-        --chart-4: 280 65% 60%;
-        --chart-5: 340 75% 55%;
     }
 }
@@ -193,6 +192,16 @@ header {
     grid-column: 1/-1;
 }
 main {
     overflow: auto;
 }
@@ -236,14 +245,19 @@ aside {
 }
 .awesomplete > ul {
-    @apply text-sm space-y-0.5;
     margin: 0;
     border-top: none;
     border-left: 1px solid hsl(var(--input));
     border-right: 1px solid hsl(var(--input));
     border-bottom: 1px solid hsl(var(--input));
     border-radius: 0 0 calc(var(--radius) - 2px) calc(var(--radius) - 2px);
-    background: hsl(var(--background));
     box-shadow: none;
     text-shadow: none;
 }

 @layer base {
     :root {
+        --background: 240 20% 99%; /* 1 */
+        --foreground: 210 13% 13%; /* 12 */
+        --card: 240 20% 99%; /* 1 */
+        --card-foreground: 210 13% 13%; /* 12 */
+        --popover: 240 20% 99%; /* 1 */
+        --popover-foreground: 210 13% 13%; /* 12 */
+        --primary: 210 13% 13%; /* 12 */
+        --primary-foreground: 240 20% 98%; /* 2 */
+        --secondary: 240 11% 95%; /* 3 */
+        --secondary-foreground: 210 13% 13%; /* 12 */
+        --muted: 240 11% 95%; /* 3 */
+        --muted-foreground: 220 6% 40%; /* 11 */
+        --accent: 240 11% 95%; /* 3 */
+        --accent-foreground: 210 13% 13%; /* 12 */
+        --destructive: 358 75% 59%; /* 9 - red */
+        --destructive-foreground: 240 20% 98%; /* 2 */
+        --border: 240 10% 86%; /* 6 */
+        --input: 240 10% 86%; /* 6 */
+        --ring: 210 13% 13%; /* 12 */
+        --chart-1: 10 78% 54%; /* 9 - tomato */
+        --chart-2: 173 80% 36%; /* 9 - teal */
+        --chart-3: 206 100% 50%; /* 9 - blue */
+        --chart-4: 42 100% 62%; /* 9 - amber */
+        --chart-5: 23 93% 53%; /* 9 - orange */
     }
     .dark {
+        --background: 240 6% 7%; /* 1 */
+        --foreground: 220 9% 94%; /* 12 */
+        --card: 240 6% 7%; /* 1 */
+        --card-foreground: 220 9% 94%; /* 12 */
+        --popover: 240 6% 7%; /* 1 */
+        --popover-foreground: 220 9% 94%; /* 12 */
+        --primary: 220 9% 94%; /* 12 */
+        --primary-foreground: 220 6% 10%; /* 2 */
+        --secondary: 225 6% 14%; /* 3 */
+        --secondary-foreground: 220 9% 94%; /* 12 */
+        --muted: 225 6% 14%; /* 3 */
+        --muted-foreground: 216 7% 71%; /* 11 */
+        --accent: 225 6% 14%; /* 3 */
+        --accent-foreground: 220 9% 94%; /* 12 */
+        --destructive: 358 75% 59%; /* 9 - red */
+        --destructive-foreground: 220 9% 94%; /* 12 */
+        --border: 213 8% 23%; /* 6 */
+        --input: 213 8% 23%; /* 6 */
+        --ring: 220 9% 94%; /* 12 */
+        --chart-1: 10 78% 54%; /* 9 - tomato */
+        --chart-2: 173 80% 36%; /* 9 - teal */
+        --chart-3: 206 100% 50%; /* 9 - blue */
+        --chart-4: 42 100% 62%; /* 9 - amber */
+        --chart-5: 23 93% 53%; /* 9 - orange */
     }
 }
     grid-column: 1/-1;
 }
+body {
+    &[data-is-home="true"] {
+        background: radial-gradient(circle at 50% 100%, #fcfcfd, #fcfcfd, #fdfdfe, #fdfdfe, #fefefe, #fefefe, #ffffff, #ffffff);
+        .dark & {
+            background: radial-gradient(circle at 50% 50%, #272a2d, #242629, #212326, #1e1f22, #1b1c1e, #18181b, #151517, #111113);
+        }
+    }
+}
 main {
     overflow: auto;
 }
 }
 .awesomplete > ul {
+    @apply text-sm space-y-1;
     margin: 0;
     border-top: none;
     border-left: 1px solid hsl(var(--input));
     border-right: 1px solid hsl(var(--input));
     border-bottom: 1px solid hsl(var(--input));
     border-radius: 0 0 calc(var(--radius) - 2px) calc(var(--radius) - 2px);
+    background: white;
+    .dark & {
+        background: hsl(var(--background));
+    }
     box-shadow: none;
     text-shadow: none;
 }

icons.py CHANGED Viewed

@@ -1 +1 @@

- ICONS = {"chevrons-right": "<path d=\"m6 17 5-5-5-5\"></path><path d=\"m13 17 5-5-5-5\"></path>", "moon": "<path d=\"M12 3a6 6 0 0 0 9 9 9 9 0 1 1-9-9Z\"></path>", "sun": "<circle cx=\"12\" cy=\"12\" r=\"4\"></circle><path d=\"M12 2v2\"></path><path d=\"M12 20v2\"></path><path d=\"m4.93 4.93 1.41 1.41\"></path><path d=\"m17.66 17.66 1.41 1.41\"></path><path d=\"M2 12h2\"></path><path d=\"M20 12h2\"></path><path d=\"m6.34 17.66-1.41 1.41\"></path><path d=\"m19.07 4.93-1.41 1.41\"></path>", "github": "<path d=\"M15 22v-4a4.8 4.8 0 0 0-1-3.5c3 0 6-2 6-5.5.08-1.25-.27-2.48-1-3.5.28-1.15.28-2.35 0-3.5 0 0-1 0-3 1.5-2.64-.5-5.36-.5-8 0C6 2 5 2 5 2c-.3 1.15-.3 2.35 0 3.5A5.403 5.403 0 0 0 4 9c0 3.5 3 5.5 6 5.5-.39.49-.68 1.05-.85 1.65-.17.6-.22 1.23-.15 1.85v4\"></path><path d=\"M9 18c-4.51 2-5-2-7-2\"></path>", "slack": "<rect height=\"8\" rx=\"1.5\" width=\"3\" x=\"13\" y=\"2\"></rect><path d=\"M19 8.5V10h1.5A1.5 1.5 0 1 0 19 8.5\"></path><rect height=\"8\" rx=\"1.5\" width=\"3\" x=\"8\" y=\"14\"></rect><path d=\"M5 15.5V14H3.5A1.5 1.5 0 1 0 5 15.5\"></path><rect height=\"3\" rx=\"1.5\" width=\"8\" x=\"14\" y=\"13\"></rect><path d=\"M15.5 19H14v1.5a1.5 1.5 0 1 0 1.5-1.5\"></path><rect height=\"3\" rx=\"1.5\" width=\"8\" x=\"2\" y=\"8\"></rect><path d=\"M8.5 5H10V3.5A1.5 1.5 0 1 0 8.5 5\"></path>", "settings": "<path d=\"M12.22 2h-.44a2 2 0 0 0-2 2v.18a2 2 0 0 1-1 1.73l-.43.25a2 2 0 0 1-2 0l-.15-.08a2 2 0 0 0-2.73.73l-.22.38a2 2 0 0 0 .73 2.73l.15.1a2 2 0 0 1 1 1.72v.51a2 2 0 0 1-1 1.74l-.15.09a2 2 0 0 0-.73 2.73l.22.38a2 2 0 0 0 2.73.73l.15-.08a2 2 0 0 1 2 0l.43.25a2 2 0 0 1 1 1.73V20a2 2 0 0 0 2 2h.44a2 2 0 0 0 2-2v-.18a2 2 0 0 1 1-1.73l.43-.25a2 2 0 0 1 2 0l.15.08a2 2 0 0 0 2.73-.73l.22-.39a2 2 0 0 0-.73-2.73l-.15-.08a2 2 0 0 1-1-1.74v-.5a2 2 0 0 1 1-1.74l.15-.09a2 2 0 0 0 .73-2.73l-.22-.38a2 2 0 0 0-2.73-.73l-.15.08a2 2 0 0 1-2 0l-.43-.25a2 2 0 0 1-1-1.73V4a2 2 0 0 0-2-2z\"></path><circle cx=\"12\" cy=\"12\" r=\"3\"></circle>", "arrow-right": "<path d=\"M5 12h14\"></path><path d=\"m12 5 7 7-7 7\"></path>", "search": "<circle cx=\"11\" cy=\"11\" r=\"8\"></circle><path d=\"m21 21-4.3-4.3\"></path>", "file-search": "<path d=\"M14 2v4a2 2 0 0 0 2 2h4\"></path><path d=\"M4.268 21a2 2 0 0 0 1.727 1H18a2 2 0 0 0 2-2V7l-5-5H6a2 2 0 0 0-2 2v3\"></path><path d=\"m9 18-1.5-1.5\"></path><circle cx=\"5\" cy=\"14\" r=\"3\"></circle>", "message-circle-question": "<path d=\"M7.9 20A9 9 0 1 0 4 16.1L2 22Z\"></path><path d=\"M9.09 9a3 3 0 0 1 5.83 1c0 2-3 3-3 3\"></path><path d=\"M12 17h.01\"></path>", "text-search": "<path d=\"M21 6H3\"></path><path d=\"M10 12H3\"></path><path d=\"M10 18H3\"></path><circle cx=\"17\" cy=\"15\" r=\"3\"></circle><path d=\"m21 19-1.9-1.9\"></path>", "maximize": "<path d=\"M8 3H5a2 2 0 0 0-2 2v3\"></path><path d=\"M21 8V5a2 2 0 0 0-2-2h-3\"></path><path d=\"M3 16v3a2 2 0 0 0 2 2h3\"></path><path d=\"M16 21h3a2 2 0 0 0 2-2v-3\"></path>", "expand": "<path d=\"m21 21-6-6m6 6v-4.8m0 4.8h-4.8\"></path><path d=\"M3 16.2V21m0 0h4.8M3 21l6-6\"></path><path d=\"M21 7.8V3m0 0h-4.8M21 3l-6 6\"></path><path d=\"M3 7.8V3m0 0h4.8M3 3l6 6\"></path>", "fullscreen": "<path d=\"M3 7V5a2 2 0 0 1 2-2h2\"></path><path d=\"M17 3h2a2 2 0 0 1 2 2v2\"></path><path d=\"M21 17v2a2 2 0 0 1-2 2h-2\"></path><path d=\"M7 21H5a2 2 0 0 1-2-2v-2\"></path><rect height=\"8\" rx=\"1\" width=\"10\" x=\"7\" y=\"8\"></rect>", "images": "<path d=\"M18 22H4a2 2 0 0 1-2-2V6\"></path><path d=\"m22 13-1.296-1.296a2.41 2.41 0 0 0-3.408 0L11 18\"></path><circle cx=\"12\" cy=\"8\" r=\"2\"></circle><rect height=\"16\" rx=\"2\" width=\"16\" x=\"6\" y=\"2\"></rect>", "circle": "<circle cx=\"12\" cy=\"12\" r=\"10\"></circle>", "loader-circle": "<path d=\"M21 12a9 9 0 1 1-6.219-8.56\"></path>", "file-text": "<path d=\"M15 2H6a2 2 0 0 0-2 2v16a2 2 0 0 0 2 2h12a2 2 0 0 0 2-2V7Z\"></path><path d=\"M14 2v4a2 2 0 0 0 2 2h4\"></path><path d=\"M10 9H8\"></path><path d=\"M16 13H8\"></path><path d=\"M16 17H8\"></path>", "file-question": "<path d=\"M12 17h.01\"></path><path d=\"M15 2H6a2 2 0 0 0-2 2v16a2 2 0 0 0 2 2h12a2 2 0 0 0 2-2V7z\"></path><path d=\"M9.1 9a3 3 0 0 1 5.82 1c0 2-3 3-3 3\"></path>"}

+ ICONS = {"chevrons-right": "<path d=\"m6 17 5-5-5-5\"></path><path d=\"m13 17 5-5-5-5\"></path>", "moon": "<path d=\"M12 3a6 6 0 0 0 9 9 9 9 0 1 1-9-9Z\"></path>", "sun": "<circle cx=\"12\" cy=\"12\" r=\"4\"></circle><path d=\"M12 2v2\"></path><path d=\"M12 20v2\"></path><path d=\"m4.93 4.93 1.41 1.41\"></path><path d=\"m17.66 17.66 1.41 1.41\"></path><path d=\"M2 12h2\"></path><path d=\"M20 12h2\"></path><path d=\"m6.34 17.66-1.41 1.41\"></path><path d=\"m19.07 4.93-1.41 1.41\"></path>", "github": "<path d=\"M15 22v-4a4.8 4.8 0 0 0-1-3.5c3 0 6-2 6-5.5.08-1.25-.27-2.48-1-3.5.28-1.15.28-2.35 0-3.5 0 0-1 0-3 1.5-2.64-.5-5.36-.5-8 0C6 2 5 2 5 2c-.3 1.15-.3 2.35 0 3.5A5.403 5.403 0 0 0 4 9c0 3.5 3 5.5 6 5.5-.39.49-.68 1.05-.85 1.65-.17.6-.22 1.23-.15 1.85v4\"></path><path d=\"M9 18c-4.51 2-5-2-7-2\"></path>", "slack": "<rect height=\"8\" rx=\"1.5\" width=\"3\" x=\"13\" y=\"2\"></rect><path d=\"M19 8.5V10h1.5A1.5 1.5 0 1 0 19 8.5\"></path><rect height=\"8\" rx=\"1.5\" width=\"3\" x=\"8\" y=\"14\"></rect><path d=\"M5 15.5V14H3.5A1.5 1.5 0 1 0 5 15.5\"></path><rect height=\"3\" rx=\"1.5\" width=\"8\" x=\"14\" y=\"13\"></rect><path d=\"M15.5 19H14v1.5a1.5 1.5 0 1 0 1.5-1.5\"></path><rect height=\"3\" rx=\"1.5\" width=\"8\" x=\"2\" y=\"8\"></rect><path d=\"M8.5 5H10V3.5A1.5 1.5 0 1 0 8.5 5\"></path>", "settings": "<path d=\"M12.22 2h-.44a2 2 0 0 0-2 2v.18a2 2 0 0 1-1 1.73l-.43.25a2 2 0 0 1-2 0l-.15-.08a2 2 0 0 0-2.73.73l-.22.38a2 2 0 0 0 .73 2.73l.15.1a2 2 0 0 1 1 1.72v.51a2 2 0 0 1-1 1.74l-.15.09a2 2 0 0 0-.73 2.73l.22.38a2 2 0 0 0 2.73.73l.15-.08a2 2 0 0 1 2 0l.43.25a2 2 0 0 1 1 1.73V20a2 2 0 0 0 2 2h.44a2 2 0 0 0 2-2v-.18a2 2 0 0 1 1-1.73l.43-.25a2 2 0 0 1 2 0l.15.08a2 2 0 0 0 2.73-.73l.22-.39a2 2 0 0 0-.73-2.73l-.15-.08a2 2 0 0 1-1-1.74v-.5a2 2 0 0 1 1-1.74l.15-.09a2 2 0 0 0 .73-2.73l-.22-.38a2 2 0 0 0-2.73-.73l-.15.08a2 2 0 0 1-2 0l-.43-.25a2 2 0 0 1-1-1.73V4a2 2 0 0 0-2-2z\"></path><circle cx=\"12\" cy=\"12\" r=\"3\"></circle>", "arrow-right": "<path d=\"M5 12h14\"></path><path d=\"m12 5 7 7-7 7\"></path>", "search": "<circle cx=\"11\" cy=\"11\" r=\"8\"></circle><path d=\"m21 21-4.3-4.3\"></path>", "file-search": "<path d=\"M14 2v4a2 2 0 0 0 2 2h4\"></path><path d=\"M4.268 21a2 2 0 0 0 1.727 1H18a2 2 0 0 0 2-2V7l-5-5H6a2 2 0 0 0-2 2v3\"></path><path d=\"m9 18-1.5-1.5\"></path><circle cx=\"5\" cy=\"14\" r=\"3\"></circle>", "message-circle-question": "<path d=\"M7.9 20A9 9 0 1 0 4 16.1L2 22Z\"></path><path d=\"M9.09 9a3 3 0 0 1 5.83 1c0 2-3 3-3 3\"></path><path d=\"M12 17h.01\"></path>", "text-search": "<path d=\"M21 6H3\"></path><path d=\"M10 12H3\"></path><path d=\"M10 18H3\"></path><circle cx=\"17\" cy=\"15\" r=\"3\"></circle><path d=\"m21 19-1.9-1.9\"></path>", "maximize": "<path d=\"M8 3H5a2 2 0 0 0-2 2v3\"></path><path d=\"M21 8V5a2 2 0 0 0-2-2h-3\"></path><path d=\"M3 16v3a2 2 0 0 0 2 2h3\"></path><path d=\"M16 21h3a2 2 0 0 0 2-2v-3\"></path>", "expand": "<path d=\"m21 21-6-6m6 6v-4.8m0 4.8h-4.8\"></path><path d=\"M3 16.2V21m0 0h4.8M3 21l6-6\"></path><path d=\"M21 7.8V3m0 0h-4.8M21 3l-6 6\"></path><path d=\"M3 7.8V3m0 0h4.8M3 3l6 6\"></path>", "fullscreen": "<path d=\"M3 7V5a2 2 0 0 1 2-2h2\"></path><path d=\"M17 3h2a2 2 0 0 1 2 2v2\"></path><path d=\"M21 17v2a2 2 0 0 1-2 2h-2\"></path><path d=\"M7 21H5a2 2 0 0 1-2-2v-2\"></path><rect height=\"8\" rx=\"1\" width=\"10\" x=\"7\" y=\"8\"></rect>", "images": "<path d=\"M18 22H4a2 2 0 0 1-2-2V6\"></path><path d=\"m22 13-1.296-1.296a2.41 2.41 0 0 0-3.408 0L11 18\"></path><circle cx=\"12\" cy=\"8\" r=\"2\"></circle><rect height=\"16\" rx=\"2\" width=\"16\" x=\"6\" y=\"2\"></rect>", "circle": "<circle cx=\"12\" cy=\"12\" r=\"10\"></circle>", "loader-circle": "<path d=\"M21 12a9 9 0 1 1-6.219-8.56\"></path>", "file-text": "<path d=\"M15 2H6a2 2 0 0 0-2 2v16a2 2 0 0 0 2 2h12a2 2 0 0 0 2-2V7Z\"></path><path d=\"M14 2v4a2 2 0 0 0 2 2h4\"></path><path d=\"M10 9H8\"></path><path d=\"M16 13H8\"></path><path d=\"M16 17H8\"></path>", "file-question": "<path d=\"M12 17h.01\"></path><path d=\"M15 2H6a2 2 0 0 0-2 2v16a2 2 0 0 0 2 2h12a2 2 0 0 0 2-2V7z\"></path><path d=\"M9.1 9a3 3 0 0 1 5.82 1c0 2-3 3-3 3\"></path>", "external-link": "<path d=\"M15 3h6v6\"></path><path d=\"M10 14 21 3\"></path><path d=\"M18 13v6a2 2 0 0 1-2 2H5a2 2 0 0 1-2-2V8a2 2 0 0 1 2-2h6\"></path>"}

main.py CHANGED Viewed

@@ -1,36 +1,37 @@
 import asyncio
 import os
 import time
-from pathlib import Path
-from concurrent.futures import ThreadPoolExecutor
 import uuid
 import google.generativeai as genai
 from fasthtml.common import (
     Div,
     Img,
     Main,
     P,
-    Script,
-    Link,
-    fast_app,
-    HighlightJS,
-    FileResponse,
     RedirectResponse,
-    Aside,
     StreamingResponse,
-    JSONResponse,
     serve,
 )
 from shad4fast import ShadHead
 from vespa.application import Vespa
-import base64
-from fastcore.parallel import threaded
-from PIL import Image
-from backend.colpali import get_query_embeddings_and_token_map, gen_similarity_maps
-from backend.modelmanager import ModelManager
 from backend.vespa_app import VespaQueryClient
 from frontend.app import (
     ChatResult,
     Home,
     Search,
@@ -38,7 +39,6 @@ from frontend.app import (
     SearchResult,
     SimMapButtonPoll,
     SimMapButtonReady,
-    AboutThisDemo,
 )
 from frontend.layout import Layout
@@ -90,10 +90,10 @@ thread_pool = ThreadPoolExecutor()
 genai.configure(api_key=os.getenv("GEMINI_API_KEY"))
 GEMINI_SYSTEM_PROMPT = """If the user query is a question, try your best to answer it based on the provided images.
 If the user query can not be interpreted as a question, or if the answer to the query can not be inferred from the images,
-answer with the exact phrase "I am sorry, I do not have enough information in the image to answer your question.".
 Your response should be HTML formatted, but only simple tags, such as <b>. <p>, <i>, <br> <ul> and <li> are allowed. No HTML tables.
 This means that newlines will be replaced with <br> tags, bold text will be enclosed in <b> tags, and so on.
-But, you should NOT include backticks (`) or HTML tags in your response.
 """
 gemini_model = genai.GenerativeModel(
     "gemini-1.5-flash-8b", system_instruction=GEMINI_SYSTEM_PROMPT
@@ -107,7 +107,7 @@ os.makedirs(SIM_MAP_DIR, exist_ok=True)
 @app.on_event("startup")
 def load_model_on_startup():
-    app.manager = ModelManager.get_instance()
     return
@@ -131,7 +131,7 @@ def serve_static(filepath: str):
 def get(session):
     if "session_id" not in session:
         session["session_id"] = str(uuid.uuid4())
-    return Layout(Main(Home()))
 @rt("/about-this-demo")
@@ -140,19 +140,16 @@ def get():
 @rt("/search")
-def get(request):
-    # Extract the 'query' and 'ranking' parameters from the URL
-    query_value = request.query_params.get("query", "").strip()
-    ranking_value = request.query_params.get("ranking", "nn+colpali")
-    print("/search: Fetching results for ranking_value:", ranking_value)
     # Always render the SearchBox first
-    if not query_value:
         # Show SearchBox and a message for missing query
         return Layout(
             Main(
                 Div(
-                    SearchBox(query_value=query_value, ranking_value=ranking_value),
                     Div(
                         P(
                             "No query provided. Please enter a query.",
@@ -165,35 +162,17 @@ def get(request):
             )
         )
     # Generate a unique query_id based on the query and ranking value
-    query_id = generate_query_id(query_value, ranking_value)
     # Show the loading message if a query is provided
     return Layout(
         Main(Search(request), data_overlayscrollbars_initialize=True, cls="border-t"),
         Aside(
-            ChatResult(query_id=query_id, query=query_value),
             cls="border-t border-l hidden md:block",
         ),
     )  # Show SearchBox and Loading message initially
-@rt("/fetch_results2")
-def get(query: str, ranking: str):
-    # 1. Get the results from Vespa (without sim_maps and full_images)
-    # Call search-endpoint in Vespa sync.
-    # 2. Kick off tasks to fetch sim_maps and full_images
-    # Sim maps - call search endpoint async.
-    # (A) New rank_profile that does not calculate sim_maps.
-    # (A) Make vespa endpoints take select_fields as a parameter.
-    # One sim map per image per token.
-    # the filename query_id_result_idx_token_idx.png
-    # Full image. based on the doc_id.
-    # Each of these tasks saves to disk.
-    # Need a cleanup task to delete old files.
-    # Polling endpoints for sim_maps and full_images checks if file exists and returns it.
-    pass
 @rt("/fetch_results")
 async def get(session, request, query: str, ranking: str):
     if "hx-request" not in request.headers:
@@ -203,9 +182,10 @@ async def get(session, request, query: str, ranking: str):
     query_id = generate_query_id(query, ranking)
     print(f"Query id in /fetch_results: {query_id}")
     # Run the embedding and query against Vespa app
-    model = app.manager.model
-    processor = app.manager.processor
-    q_embs, idx_to_token = get_query_embeddings_and_token_map(processor, model, query)
     start = time.perf_counter()
     # Fetch real search results from Vespa
@@ -219,15 +199,20 @@ async def get(session, request, query: str, ranking: str):
     print(
         f"Search results fetched in {end - start:.2f} seconds, Vespa says searchtime was {result['timing']['searchtime']} seconds"
     )
     search_results = vespa_app.results_to_search_results(result, idx_to_token)
     get_and_store_sim_maps(
         query_id=query_id,
         query=query,
         q_embs=q_embs,
         ranking=ranking,
         idx_to_token=idx_to_token,
     )
-    return SearchResult(search_results, query_id)
 def get_results_children(result):
@@ -247,7 +232,9 @@ async def poll_vespa_keepalive():
 @threaded
-def get_and_store_sim_maps(query_id, query: str, q_embs, ranking, idx_to_token):
     ranking_sim = ranking + "_sim"
     vespa_sim_maps = vespa_app.get_sim_maps_from_query(
         query=query,
@@ -255,9 +242,7 @@ def get_and_store_sim_maps(query_id, query: str, q_embs, ranking, idx_to_token):
         ranking=ranking_sim,
         idx_to_token=idx_to_token,
     )
-    img_paths = [
-        IMG_DIR / f"{query_id}_{idx}.jpg" for idx in range(len(vespa_sim_maps))
-    ]
     # All images should be downloaded, but best to wait 5 secs
     max_wait = 5
     start_time = time.time()
@@ -269,10 +254,7 @@ def get_and_store_sim_maps(query_id, query: str, q_embs, ranking, idx_to_token):
     if not all([os.path.exists(img_path) for img_path in img_paths]):
         print(f"Images not ready in 5 seconds for query_id: {query_id}")
         return False
-    sim_map_generator = gen_similarity_maps(
-        model=app.manager.model,
-        processor=app.manager.processor,
-        device=app.manager.device,
         query=query,
         query_embs=q_embs,
         token_idx_map=idx_to_token,
@@ -312,17 +294,17 @@ async def get_sim_map(query_id: str, idx: int, token: str, token_idx: int):
 @app.get("/full_image")
-async def full_image(docid: str, query_id: str, idx: int):
     """
     Endpoint to get the full quality image for a given result id.
     """
-    img_path = IMG_DIR / f"{query_id}_{idx}.jpg"
     if not os.path.exists(img_path):
-        image_data = await vespa_app.get_full_image_from_vespa(docid)
         # image data is base 64 encoded string. Save it to disk as jpg.
         with open(img_path, "wb") as f:
             f.write(base64.b64decode(image_data))
-        print(f"Full image saved to disk for query_id: {query_id}, idx: {idx}")
     else:
         with open(img_path, "rb") as f:
             image_data = base64.b64encode(f.read()).decode("utf-8")
@@ -334,8 +316,9 @@ async def full_image(docid: str, query_id: str, idx: int):
 @rt("/suggestions")
-async def get_suggestions(request):
-    query = request.query_params.get("query", "").lower().strip()
     if query:
         suggestions = await vespa_app.get_suggestions(query)
@@ -345,15 +328,20 @@ async def get_suggestions(request):
     return JSONResponse({"suggestions": []})
-async def message_generator(query_id: str, query: str):
-    images = []
     num_images = 3  # Number of images before firing chat request
     max_wait = 10  # seconds
     start_time = time.time()
     # Check if full images are ready on disk
-    while len(images) < num_images and time.time() - start_time < max_wait:
         for idx in range(num_images):
-            if not os.path.exists(IMG_DIR / f"{query_id}_{idx}.jpg"):
                 print(
                     f"Message generator: Full image not ready for query_id: {query_id}, idx: {idx}"
                 )
@@ -362,12 +350,14 @@ async def message_generator(query_id: str, query: str):
                 print(
                     f"Message generator: image ready for query_id: {query_id}, idx: {idx}"
                 )
-                images.append(Image.open(IMG_DIR / f"{query_id}_{idx}.jpg"))
         await asyncio.sleep(0.2)
     # yield message with number of images ready
-    yield f"event: message\ndata: Generating response based on {len(images)} images.\n\n"
     if not images:
-        yield "event: message\ndata: I am sorry, I do not have enough information in the image to answer your question.\n\n"
         yield "event: close\ndata: \n\n"
         return
@@ -388,9 +378,9 @@ async def message_generator(query_id: str, query: str):
 @app.get("/get-message")
-async def get_message(query_id: str, query: str):
     return StreamingResponse(
-        message_generator(query_id=query_id, query=query),
         media_type="text/event-stream",
     )

 import asyncio
+import base64
 import os
 import time
 import uuid
+from concurrent.futures import ThreadPoolExecutor
+from pathlib import Path
 import google.generativeai as genai
+from fastcore.parallel import threaded
 from fasthtml.common import (
+    Aside,
     Div,
+    FileResponse,
+    HighlightJS,
     Img,
+    JSONResponse,
+    Link,
     Main,
     P,
     RedirectResponse,
+    Script,
     StreamingResponse,
+    fast_app,
     serve,
 )
+from PIL import Image
 from shad4fast import ShadHead
 from vespa.application import Vespa
+from backend.colpali import SimMapGenerator
 from backend.vespa_app import VespaQueryClient
 from frontend.app import (
+    AboutThisDemo,
     ChatResult,
     Home,
     Search,
     SearchResult,
     SimMapButtonPoll,
     SimMapButtonReady,
 )
 from frontend.layout import Layout
 genai.configure(api_key=os.getenv("GEMINI_API_KEY"))
 GEMINI_SYSTEM_PROMPT = """If the user query is a question, try your best to answer it based on the provided images.
 If the user query can not be interpreted as a question, or if the answer to the query can not be inferred from the images,
+answer with the exact phrase "I am sorry, I can't find enough relevant information on these pages to answer your question.".
 Your response should be HTML formatted, but only simple tags, such as <b>. <p>, <i>, <br> <ul> and <li> are allowed. No HTML tables.
 This means that newlines will be replaced with <br> tags, bold text will be enclosed in <b> tags, and so on.
+Do NOT include backticks (`) in your response. Only simple HTML tags and text.
 """
 gemini_model = genai.GenerativeModel(
     "gemini-1.5-flash-8b", system_instruction=GEMINI_SYSTEM_PROMPT
 @app.on_event("startup")
 def load_model_on_startup():
+    app.sim_map_generator = SimMapGenerator()
     return
 def get(session):
     if "session_id" not in session:
         session["session_id"] = str(uuid.uuid4())
+    return Layout(Main(Home()), is_home=True)
 @rt("/about-this-demo")
 @rt("/search")
+def get(request, query: str = "", ranking: str = "nn+colpali"):
+    print("/search: Fetching results for ranking_value:", ranking)
     # Always render the SearchBox first
+    if not query:
         # Show SearchBox and a message for missing query
         return Layout(
             Main(
                 Div(
+                    SearchBox(query_value=query, ranking_value=ranking),
                     Div(
                         P(
                             "No query provided. Please enter a query.",
             )
         )
     # Generate a unique query_id based on the query and ranking value
+    query_id = generate_query_id(query, ranking)
     # Show the loading message if a query is provided
     return Layout(
         Main(Search(request), data_overlayscrollbars_initialize=True, cls="border-t"),
         Aside(
+            ChatResult(query_id=query_id, query=query),
             cls="border-t border-l hidden md:block",
         ),
     )  # Show SearchBox and Loading message initially
 @rt("/fetch_results")
 async def get(session, request, query: str, ranking: str):
     if "hx-request" not in request.headers:
     query_id = generate_query_id(query, ranking)
     print(f"Query id in /fetch_results: {query_id}")
     # Run the embedding and query against Vespa app
+    q_embs, idx_to_token = app.sim_map_generator.get_query_embeddings_and_token_map(
+        query
+    )
     start = time.perf_counter()
     # Fetch real search results from Vespa
     print(
         f"Search results fetched in {end - start:.2f} seconds, Vespa says searchtime was {result['timing']['searchtime']} seconds"
     )
+    search_time = result["timing"]["searchtime"]
+    total_count = result["root"]["fields"]["totalCount"]
     search_results = vespa_app.results_to_search_results(result, idx_to_token)
     get_and_store_sim_maps(
         query_id=query_id,
         query=query,
         q_embs=q_embs,
         ranking=ranking,
         idx_to_token=idx_to_token,
+        doc_ids=[result["fields"]["id"] for result in search_results],
     )
+    return SearchResult(search_results, query, query_id, search_time, total_count)
 def get_results_children(result):
 @threaded
+def get_and_store_sim_maps(
+    query_id, query: str, q_embs, ranking, idx_to_token, doc_ids
+):
     ranking_sim = ranking + "_sim"
     vespa_sim_maps = vespa_app.get_sim_maps_from_query(
         query=query,
         ranking=ranking_sim,
         idx_to_token=idx_to_token,
     )
+    img_paths = [IMG_DIR / f"{doc_id}.jpg" for doc_id in doc_ids]
     # All images should be downloaded, but best to wait 5 secs
     max_wait = 5
     start_time = time.time()
     if not all([os.path.exists(img_path) for img_path in img_paths]):
         print(f"Images not ready in 5 seconds for query_id: {query_id}")
         return False
+    sim_map_generator = app.sim_map_generator.gen_similarity_maps(
         query=query,
         query_embs=q_embs,
         token_idx_map=idx_to_token,
 @app.get("/full_image")
+async def full_image(doc_id: str):
     """
     Endpoint to get the full quality image for a given result id.
     """
+    img_path = IMG_DIR / f"{doc_id}.jpg"
     if not os.path.exists(img_path):
+        image_data = await vespa_app.get_full_image_from_vespa(doc_id)
         # image data is base 64 encoded string. Save it to disk as jpg.
         with open(img_path, "wb") as f:
             f.write(base64.b64decode(image_data))
+        print(f"Full image saved to disk for doc_id: {doc_id}")
     else:
         with open(img_path, "rb") as f:
             image_data = base64.b64encode(f.read()).decode("utf-8")
 @rt("/suggestions")
+async def get_suggestions(query: str = ""):
+    """Endpoint to get suggestions as user types in the search box"""
+    query = query.lower().strip()
     if query:
         suggestions = await vespa_app.get_suggestions(query)
     return JSONResponse({"suggestions": []})
+async def message_generator(query_id: str, query: str, doc_ids: list):
+    """Generator function to yield SSE messages for chat response"""
+    images = {}
     num_images = 3  # Number of images before firing chat request
     max_wait = 10  # seconds
     start_time = time.time()
     # Check if full images are ready on disk
+    while (
+        len(images) < min(num_images, len(doc_ids))
+        and time.time() - start_time < max_wait
+    ):
         for idx in range(num_images):
+            image_filename = IMG_DIR / f"{doc_ids[idx]}.jpg"
+            if not os.path.exists(image_filename):
                 print(
                     f"Message generator: Full image not ready for query_id: {query_id}, idx: {idx}"
                 )
                 print(
                     f"Message generator: image ready for query_id: {query_id}, idx: {idx}"
                 )
+                images[image_filename] = Image.open(image_filename)
         await asyncio.sleep(0.2)
+    images = list(images.values())
     # yield message with number of images ready
+    yield f"event: message\ndata: Generating response based on {len(images)} images...\n\n"
     if not images:
+        yield "event: message\ndata: Failed to send images to Gemini-8B!\n\n"
         yield "event: close\ndata: \n\n"
         return
 @app.get("/get-message")
+async def get_message(query_id: str, query: str, doc_ids: str):
     return StreamingResponse(
+        message_generator(query_id=query_id, query=query, doc_ids=doc_ids.split(",")),
         media_type="text/event-stream",
     )

output.css CHANGED Viewed

@@ -555,58 +555,105 @@ video {
 }
 :root {
-  --background: 0 0% 100%;
-  --foreground: 222.2 84% 4.9%;
-  --card: 0 0% 100%;
-  --card-foreground: 222.2 84% 4.9%;
-  --popover: 0 0% 100%;
-  --popover-foreground: 222.2 84% 4.9%;
-  --primary: 222.2 47.4% 11.2%;
-  --primary-foreground: 210 40% 98%;
-  --secondary: 210 40% 96.1%;
-  --secondary-foreground: 222.2 47.4% 11.2%;
-  --muted: 210 40% 96.1%;
-  --muted-foreground: 215.4 16.3% 26.9%;
-  --accent: 210 40% 96.1%;
-  --accent-foreground: 222.2 47.4% 11.2%;
-  --destructive: 0 84.2% 60.2%;
-  --destructive-foreground: 210 40% 98%;
-  --border: 214.3 31.8% 81.4%;
-  --input: 214.3 31.8% 81.4%;
-  --ring: 222.2 84% 4.9%;
-  --radius: 0.5rem;
-  --chart-1: 12 76% 61%;
-  --chart-2: 173 58% 39%;
-  --chart-3: 197 37% 24%;
-  --chart-4: 43 74% 66%;
-  --chart-5: 27 87% 67%;
 }
 .dark {
-  --background: 222.2 84% 4.9%;
-  --foreground: 210 40% 98%;
-  --card: 222.2 84% 4.9%;
-  --card-foreground: 210 40% 98%;
-  --popover: 222.2 84% 4.9%;
-  --popover-foreground: 210 40% 98%;
-  --primary: 210 40% 98%;
-  --primary-foreground: 222.2 47.4% 11.2%;
-  --secondary: 217.2 32.6% 17.5%;
-  --secondary-foreground: 210 40% 98%;
-  --muted: 217.2 32.6% 17.5%;
-  --muted-foreground: 215 20.2% 85.1%;
-  --accent: 217.2 32.6% 17.5%;
-  --accent-foreground: 210 40% 98%;
-  --destructive: 0 62.8% 30.6%;
-  --destructive-foreground: 210 40% 98%;
-  --border: 217.2 32.6% 27.5%;
-  --input: 217.2 32.6% 27.5%;
-  --ring: 212.7 26.8% 83.9;
-  --chart-1: 220 70% 50%;
-  --chart-2: 160 60% 45%;
-  --chart-3: 30 80% 55%;
-  --chart-4: 280 65% 60%;
-  --chart-5: 340 75% 55%;
 }
 :root:has(.no-bg-scroll) {
@@ -1134,6 +1181,10 @@ body {
   grid-template-rows: minmax(0,55px) minmax(0,1fr);
 }
 .flex-col {
   flex-direction: column;
 }
@@ -1248,6 +1299,12 @@ body {
   margin-bottom: calc(0.5rem * var(--tw-space-y-reverse));
 }
 .self-stretch {
   align-self: stretch;
 }
@@ -1407,6 +1464,11 @@ body {
   background-color: hsl(var(--secondary));
 }
 .bg-gradient-to-r {
   background-image: linear-gradient(to right, var(--tw-gradient-stops));
 }
@@ -1415,15 +1477,15 @@ body {
   background-image: linear-gradient(to top, var(--tw-gradient-stops));
 }
-.from-black {
-  --tw-gradient-from: #000 var(--tw-gradient-from-position);
-  --tw-gradient-to: rgb(0 0 0 / 0) var(--tw-gradient-to-position);
   --tw-gradient-stops: var(--tw-gradient-from), var(--tw-gradient-to);
 }
-.from-white {
-  --tw-gradient-from: #fff var(--tw-gradient-from-position);
-  --tw-gradient-to: rgb(255 255 255 / 0) var(--tw-gradient-to-position);
   --tw-gradient-stops: var(--tw-gradient-from), var(--tw-gradient-to);
 }
@@ -2084,6 +2146,15 @@ header {
   grid-column: 1/-1;
 }
 main {
   overflow: auto;
 }
@@ -2139,8 +2210,8 @@ aside {
 .awesomplete > ul > :not([hidden]) ~ :not([hidden]) {
   --tw-space-y-reverse: 0;
-  margin-top: calc(0.125rem * calc(1 - var(--tw-space-y-reverse)));
-  margin-bottom: calc(0.125rem * var(--tw-space-y-reverse));
 }
 .awesomplete > ul {
@@ -2152,7 +2223,10 @@ aside {
   border-right: 1px solid hsl(var(--input));
   border-bottom: 1px solid hsl(var(--input));
   border-radius: 0 0 calc(var(--radius) - 2px) calc(var(--radius) - 2px);
-  background: hsl(var(--background));
   box-shadow: none;
   text-shadow: none;
 }
@@ -2700,6 +2774,12 @@ aside {
   }
 }
 .dark\:block:where(.dark, .dark *) {
   display: block;
 }
@@ -2716,9 +2796,13 @@ aside {
   border-color: hsl(var(--destructive));
 }
-.dark\:from-slate-900:where(.dark, .dark *) {
-  --tw-gradient-from: #0f172a var(--tw-gradient-from-position);
-  --tw-gradient-to: rgb(15 23 42 / 0) var(--tw-gradient-to-position);
   --tw-gradient-stops: var(--tw-gradient-from), var(--tw-gradient-to);
 }

 }
 :root {
+  --background: 240 20% 99%;
+  /* 1 */
+  --foreground: 210 13% 13%;
+  /* 12 */
+  --card: 240 20% 99%;
+  /* 1 */
+  --card-foreground: 210 13% 13%;
+  /* 12 */
+  --popover: 240 20% 99%;
+  /* 1 */
+  --popover-foreground: 210 13% 13%;
+  /* 12 */
+  --primary: 210 13% 13%;
+  /* 12 */
+  --primary-foreground: 240 20% 98%;
+  /* 2 */
+  --secondary: 240 11% 95%;
+  /* 3 */
+  --secondary-foreground: 210 13% 13%;
+  /* 12 */
+  --muted: 240 11% 95%;
+  /* 3 */
+  --muted-foreground: 220 6% 40%;
+  /* 11 */
+  --accent: 240 11% 95%;
+  /* 3 */
+  --accent-foreground: 210 13% 13%;
+  /* 12 */
+  --destructive: 358 75% 59%;
+  /* 9 - red */
+  --destructive-foreground: 240 20% 98%;
+  /* 2 */
+  --border: 240 10% 86%;
+  /* 6 */
+  --input: 240 10% 86%;
+  /* 6 */
+  --ring: 210 13% 13%;
+  /* 12 */
+  --chart-1: 10 78% 54%;
+  /* 9 - tomato */
+  --chart-2: 173 80% 36%;
+  /* 9 - teal */
+  --chart-3: 206 100% 50%;
+  /* 9 - blue */
+  --chart-4: 42 100% 62%;
+  /* 9 - amber */
+  --chart-5: 23 93% 53%;
+  /* 9 - orange */
 }
 .dark {
+  --background: 240 6% 7%;
+  /* 1 */
+  --foreground: 220 9% 94%;
+  /* 12 */
+  --card: 240 6% 7%;
+  /* 1 */
+  --card-foreground: 220 9% 94%;
+  /* 12 */
+  --popover: 240 6% 7%;
+  /* 1 */
+  --popover-foreground: 220 9% 94%;
+  /* 12 */
+  --primary: 220 9% 94%;
+  /* 12 */
+  --primary-foreground: 220 6% 10%;
+  /* 2 */
+  --secondary: 225 6% 14%;
+  /* 3 */
+  --secondary-foreground: 220 9% 94%;
+  /* 12 */
+  --muted: 225 6% 14%;
+  /* 3 */
+  --muted-foreground: 216 7% 71%;
+  /* 11 */
+  --accent: 225 6% 14%;
+  /* 3 */
+  --accent-foreground: 220 9% 94%;
+  /* 12 */
+  --destructive: 358 75% 59%;
+  /* 9 - red */
+  --destructive-foreground: 220 9% 94%;
+  /* 12 */
+  --border: 213 8% 23%;
+  /* 6 */
+  --input: 213 8% 23%;
+  /* 6 */
+  --ring: 220 9% 94%;
+  /* 12 */
+  --chart-1: 10 78% 54%;
+  /* 9 - tomato */
+  --chart-2: 173 80% 36%;
+  /* 9 - teal */
+  --chart-3: 206 100% 50%;
+  /* 9 - blue */
+  --chart-4: 42 100% 62%;
+  /* 9 - amber */
+  --chart-5: 23 93% 53%;
+  /* 9 - orange */
 }
 :root:has(.no-bg-scroll) {
   grid-template-rows: minmax(0,55px) minmax(0,1fr);
 }
+.grid-rows-\[auto_auto_1fr\] {
+  grid-template-rows: auto auto 1fr;
+}
 .flex-col {
   flex-direction: column;
 }
   margin-bottom: calc(0.5rem * var(--tw-space-y-reverse));
 }
+.space-x-1 > :not([hidden]) ~ :not([hidden]) {
+  --tw-space-x-reverse: 0;
+  margin-right: calc(0.25rem * var(--tw-space-x-reverse));
+  margin-left: calc(0.25rem * calc(1 - var(--tw-space-x-reverse)));
+}
 .self-stretch {
   align-self: stretch;
 }
   background-color: hsl(var(--secondary));
 }
+.bg-white {
+  --tw-bg-opacity: 1;
+  background-color: rgb(255 255 255 / var(--tw-bg-opacity));
+}
 .bg-gradient-to-r {
   background-image: linear-gradient(to right, var(--tw-gradient-stops));
 }
   background-image: linear-gradient(to top, var(--tw-gradient-stops));
 }
+.from-\[\#fcfcfd\] {
+  --tw-gradient-from: #fcfcfd var(--tw-gradient-from-position);
+  --tw-gradient-to: rgb(252 252 253 / 0) var(--tw-gradient-to-position);
   --tw-gradient-stops: var(--tw-gradient-from), var(--tw-gradient-to);
 }
+.from-black {
+  --tw-gradient-from: #000 var(--tw-gradient-from-position);
+  --tw-gradient-to: rgb(0 0 0 / 0) var(--tw-gradient-to-position);
   --tw-gradient-stops: var(--tw-gradient-from), var(--tw-gradient-to);
 }
   grid-column: 1/-1;
 }
+body {
+  &[data-is-home="true"] {
+    background: radial-gradient(circle at 50% 100%, #fcfcfd, #fcfcfd, #fdfdfe, #fdfdfe, #fefefe, #fefefe, #ffffff, #ffffff);
+    .dark & {
+      background: radial-gradient(circle at 50% 50%, #272a2d, #242629, #212326, #1e1f22, #1b1c1e, #18181b, #151517, #111113);
+    }
+  }
+}
 main {
   overflow: auto;
 }
 .awesomplete > ul > :not([hidden]) ~ :not([hidden]) {
   --tw-space-y-reverse: 0;
+  margin-top: calc(0.25rem * calc(1 - var(--tw-space-y-reverse)));
+  margin-bottom: calc(0.25rem * var(--tw-space-y-reverse));
 }
 .awesomplete > ul {
   border-right: 1px solid hsl(var(--input));
   border-bottom: 1px solid hsl(var(--input));
   border-radius: 0 0 calc(var(--radius) - 2px) calc(var(--radius) - 2px);
+  background: white;
+  .dark & {
+    background: hsl(var(--background));
+  }
   box-shadow: none;
   text-shadow: none;
 }
   }
 }
+@media (min-width: 1280px) {
+  .xl\:grid-rows-\[1fr_2fr\] {
+    grid-template-rows: 1fr 2fr;
+  }
+}
 .dark\:block:where(.dark, .dark *) {
   display: block;
 }
   border-color: hsl(var(--destructive));
 }
+.dark\:bg-background:where(.dark, .dark *) {
+  background-color: hsl(var(--background));
+}
+.dark\:from-\[\#1c2024\]:where(.dark, .dark *) {
+  --tw-gradient-from: #1c2024 var(--tw-gradient-from-position);
+  --tw-gradient-to: rgb(28 32 36 / 0) var(--tw-gradient-to-position);
   --tw-gradient-stops: var(--tw-gradient-from), var(--tw-gradient-to);
 }

requirements.txt CHANGED Viewed

@@ -1,5 +1,5 @@
 # This file was autogenerated by uv via the following command:
-#    uv pip compile pyproject.toml -o requirements.txt
 accelerate==0.34.2
     # via peft
 aiohappyeyeballs==2.4.3

 # This file was autogenerated by uv via the following command:
+#    uv pip compile pyproject.toml -o src/requirements.txt
 accelerate==0.34.2
     # via peft
 aiohappyeyeballs==2.4.3

static/.DS_Store CHANGED Viewed

Binary files a/static/.DS_Store and b/static/.DS_Store differ