Spaces:
Runtime error
Runtime error
upload from local pc
Browse files- Dockerfile +12 -0
- README.md +2 -12
- aicovers_topics.csv +97 -0
- caption.py +43 -0
- image_processing.py +23 -0
- img_upload.py +58 -0
- main.py +65 -0
- moderator.py +53 -0
- moderator_mc.py +39 -0
- requirements.txt +99 -0
- test.ann +0 -0
- vector_search.py +48 -0
Dockerfile
ADDED
@@ -0,0 +1,12 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
FROM python:3.10
|
2 |
+
|
3 |
+
WORKDIR /app
|
4 |
+
|
5 |
+
ADD . /app
|
6 |
+
|
7 |
+
RUN pip install --no-cache-dir -r requirements.txt
|
8 |
+
|
9 |
+
EXPOSE 8000
|
10 |
+
|
11 |
+
# Run main.py when the container launches
|
12 |
+
CMD ["python", "main.py"]
|
README.md
CHANGED
@@ -1,12 +1,2 @@
|
|
1 |
-
|
2 |
-
|
3 |
-
emoji: 🏢
|
4 |
-
colorFrom: purple
|
5 |
-
colorTo: indigo
|
6 |
-
sdk: docker
|
7 |
-
app_port: 8000
|
8 |
-
pinned: false
|
9 |
-
license: apache-2.0
|
10 |
-
---
|
11 |
-
|
12 |
-
Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
|
|
|
1 |
+
# beam-image-captioning
|
2 |
+
Repo for the Image Captioning task for Beam campaign
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
aicovers_topics.csv
ADDED
@@ -0,0 +1,97 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
Topic,topic_cleaned,Topic group
|
2 |
+
- Urban skyline at sunset,Urban skyline at sunset,City/Street Views
|
3 |
+
- Historical district cityscape,Historical district cityscape,City/Street Views
|
4 |
+
- City under the night lights,City under the night lights,City/Street Views
|
5 |
+
- Busy city main street,Busy city main street,City/Street Views
|
6 |
+
- A quiet pedestrian street,A quiet pedestrian street,City/Street Views
|
7 |
+
- Festive decorations on a city street,Festive decorations on a city street,City/Street Views
|
8 |
+
- Calm morning in a residential street,Calm morning in a residential street,City/Street Views
|
9 |
+
- City street under night lights,City street under night lights,City/Street Views
|
10 |
+
- Sunrise over the city panorama,Sunrise over the city panorama,City/Street Views
|
11 |
+
- City at twilight,City at twilight,City/Street Views
|
12 |
+
- Majestic historical building,Majestic historical building,Architecture: Classic and Modern
|
13 |
+
- Classic museum exterior,Classic museum exterior,Architecture: Classic and Modern
|
14 |
+
- Colorful cathedral,Colorful cathedral,Architecture: Classic and Modern
|
15 |
+
- Modern skyscraper tower,Modern skyscraper tower,Architecture: Classic and Modern
|
16 |
+
- Futuristic business center,Futuristic business center,Architecture: Classic and Modern
|
17 |
+
- Ornate grand cathedral,Ornate grand cathedral,Architecture: Classic and Modern
|
18 |
+
- Imperial palace exterior,Imperial palace exterior,Architecture: Classic and Modern
|
19 |
+
- Contemporary design building,Contemporary design building,Architecture: Classic and Modern
|
20 |
+
- Modern airport exterior,Modern airport exterior,Architecture: Classic and Modern
|
21 |
+
- Innovative technology park,Innovative technology park,Architecture: Classic and Modern
|
22 |
+
- Tranquil lake scenery,Tranquil lake scenery,Nature and Landscapes
|
23 |
+
- Beautiful botanic garden,Beautiful botanic garden,Nature and Landscapes
|
24 |
+
- Majestic mountains,Majestic mountains,Nature and Landscapes
|
25 |
+
- Winter in pine forest,Winter in pine forest,Nature and Landscapes
|
26 |
+
- Sunrise over a field,Sunrise over a field,Nature and Landscapes
|
27 |
+
- Traditional breakfast spread,Traditional breakfast spread,Cuisine and Dining
|
28 |
+
- Scene in a cafe,Scene in a cafe,Cuisine and Dining
|
29 |
+
- Instagram food shot,Instagram food shot,Cuisine and Dining
|
30 |
+
- Local market produce,Local market produce,Cuisine and Dining
|
31 |
+
- Exotic regional dishes,Exotic regional dishes,Cuisine and Dining
|
32 |
+
- Ice hockey game action,Ice hockey game action,Sports Activities
|
33 |
+
- Snowy ski tracks,Snowy ski tracks,Sports Activities
|
34 |
+
- Football match,Football match,Sports Activities
|
35 |
+
- Rafting on a river,Rafting on a river,Sports Activities
|
36 |
+
- Scenic railway journey / Train scene,Scenic railway journey / Train scene,Sports Activities
|
37 |
+
- City's annual carnival,City's annual carnival,Social Gatherings
|
38 |
+
- Networking event,Networking event,Social Gatherings
|
39 |
+
- Open-air concert crowd,Open-air concert crowd,Social Gatherings
|
40 |
+
- Friends in a bar,Friends in a bar,Social Gatherings
|
41 |
+
- Family gathering,Family gathering,Social Gatherings
|
42 |
+
- Iconic craft making,Iconic craft making,Traditional and Folklore
|
43 |
+
- Traditional folk dance,Traditional folk dance,Traditional and Folklore
|
44 |
+
- Tea ceremony with a traditional kettle,Tea ceremony with a traditional kettle,Traditional and Folklore
|
45 |
+
- New Year celebration / Fireworks,New Year celebration / Fireworks,Traditional and Folklore
|
46 |
+
- Day out in a traditional architecture complex,Day out in a traditional architecture complex,Traditional and Folklore
|
47 |
+
- Playful dog in a park,Playful dog in a park,Pets
|
48 |
+
- Cat lounging in a cozy home,Cat lounging in a cozy home,Pets
|
49 |
+
- Parakeet in a colorful cage,Parakeet in a colorful cage,Pets
|
50 |
+
- A child feeding her hamster,A child feeding her hamster,Pets
|
51 |
+
- Aquarium scene with exotic fishes,Aquarium scene with exotic fishes,Pets
|
52 |
+
- Trendy street style,Trendy street style,Fashion and Lifestyle
|
53 |
+
- High-end fashion boutique,High-end fashion boutique,Fashion and Lifestyle
|
54 |
+
- Eclectic vintage clothing store,Eclectic vintage clothing store,Fashion and Lifestyle
|
55 |
+
- Chic home decor,Chic home decor,Fashion and Lifestyle
|
56 |
+
- Lively beauty salon interior,Lively beauty salon interior,Fashion and Lifestyle
|
57 |
+
- Tranquil seaside panorama,Tranquil seaside panorama,Travel and Adventure
|
58 |
+
- Rustic camping site,Rustic camping site,Travel and Adventure
|
59 |
+
- Snapshot of a road trip,Snapshot of a road trip,Travel and Adventure
|
60 |
+
- Exciting amusement park,Exciting amusement park,Travel and Adventure
|
61 |
+
- Captivating hiking trail,Captivating hiking trail,Travel and Adventure
|
62 |
+
- Inspiring street mural,Inspiring street mural,Art and Creativity
|
63 |
+
- Quaint pottery studio,Quaint pottery studio,Art and Creativity
|
64 |
+
- Gallery exhibition,Gallery exhibition,Art and Creativity
|
65 |
+
- Creative DIY craft project,Creative DIY craft project,Art and Creativity
|
66 |
+
- Dramatic theater scene,Dramatic theater scene,Art and Creativity
|
67 |
+
- Modern workspace with tech gadgets,Modern workspace with tech gadgets,Technology and Gaming
|
68 |
+
- Immersive virtual reality gaming,Immersive virtual reality gaming,Technology and Gaming
|
69 |
+
- E-sports event,E-sports event,Technology and Gaming
|
70 |
+
- Robots,Robots,Technology and Gaming
|
71 |
+
- Drone flying against city skyline,Drone flying against city skyline,Technology and Gaming
|
72 |
+
- Outdoor yoga session,Outdoor yoga session,Health and Well-being
|
73 |
+
- Running scene,Running scene,Health and Well-being
|
74 |
+
- Fitness class,Fitness class,Health and Well-being
|
75 |
+
"- Group sports (football, hockey)","Group sports (football, hockey)",Health and Well-being
|
76 |
+
- Buzzing train station,Buzzing train station,Transportation
|
77 |
+
- Airport with airplanes,Airport with airplanes,Transportation
|
78 |
+
- Cars in a busy city,Cars in a busy city,Transportation
|
79 |
+
- Busy harbor with ships,Busy harbor with ships,Transportation
|
80 |
+
- Metro ride during peak hours,Metro ride during peak hours,Transportation
|
81 |
+
"- Kitchenware: pots, pans, cutlery","Kitchenware: pots, pans, cutlery",Home Categories
|
82 |
+
"- Bathroom: skincare, cosmetics, bath accessories","Bathroom: skincare, cosmetics, bath accessories",Home Categories
|
83 |
+
"- Interior: décor elements, types of furniture","Interior: décor elements, types of furniture",Home Categories
|
84 |
+
"- New Year theme: decorations, gifts, New Year parties","New Year theme: decorations, gifts, New Year parties",Home Categories
|
85 |
+
"- People in the frame: home comfort, family scenes, domestic life","People in the frame: home comfort, family scenes, domestic life",Home Categories
|
86 |
+
"- Hobbies: art tools, musical instruments, hobbies","Hobbies: art tools, musical instruments, hobbies",Home Categories
|
87 |
+
"- Appliances: house appliances, cleaning, home maintenance","Appliances: house appliances, cleaning, home maintenance",Home Categories
|
88 |
+
"- Workstation: computers, office supplies, workstations","Workstation: computers, office supplies, workstations",Office
|
89 |
+
"- Team moments: meetings, brainstorming, team events","Team moments: meetings, brainstorming, team events",Office
|
90 |
+
"- Office space: office interior, space design, working atmosphere","Office space: office interior, space design, working atmosphere",Office
|
91 |
+
"- Coffee break: coffee breaks, lunchtime, informal communication","Coffee break: coffee breaks, lunchtime, informal communication",Office
|
92 |
+
"- Fruits and vegetables: fresh produce, farmers market, vegetarian products","Fruits and vegetables: fresh produce, farmers market, vegetarian products",Grocery Store
|
93 |
+
"- Dairy products: dairy production, cheese, milk","Dairy products: dairy production, cheese, milk",Grocery Store
|
94 |
+
"- Meats and seafood: meat products, fish, deli","Meats and seafood: meat products, fish, deli",Grocery Store
|
95 |
+
"- Grains and pasta: variety of grains, cereals, pasta","Grains and pasta: variety of grains, cereals, pasta",Grocery Store
|
96 |
+
"- Waters and other drinks: water, non-alcoholic beverages, carbonated drinks","Waters and other drinks: water, non-alcoholic beverages, carbonated drinks",Grocery Store
|
97 |
+
"- Beauty and hygiene: personal care, cosmetic products, hygiene products","Beauty and hygiene: personal care, cosmetic products, hygiene products",Grocery Store
|
caption.py
ADDED
@@ -0,0 +1,43 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import os
|
2 |
+
import requests
|
3 |
+
from dotenv import load_dotenv
|
4 |
+
|
5 |
+
load_dotenv()
|
6 |
+
|
7 |
+
|
8 |
+
def caption_from_url(image_url):
|
9 |
+
"""
|
10 |
+
Generates a caption for an image using the Azure Computer Vision API.
|
11 |
+
|
12 |
+
Parameters:
|
13 |
+
image_url (str): The URL of the image for which a caption should be generated.
|
14 |
+
|
15 |
+
Returns:
|
16 |
+
str: The generated caption for the image.
|
17 |
+
|
18 |
+
Raises:
|
19 |
+
requests.exceptions.HTTPError: If the request to the Azure API fails.
|
20 |
+
"""
|
21 |
+
|
22 |
+
subscription_key = os.getenv('AZURE_SUBSCRIPTION_KEY')
|
23 |
+
endpoint = 'https://icmvp.cognitiveservices.azure.com/'
|
24 |
+
|
25 |
+
analyze_url = endpoint + "computervision/imageanalysis:analyze?api-version=2023-10-01"
|
26 |
+
|
27 |
+
headers = {
|
28 |
+
"Content-Type": "application/json",
|
29 |
+
'Ocp-Apim-Subscription-Key': subscription_key
|
30 |
+
}
|
31 |
+
params = {
|
32 |
+
'features': 'caption'
|
33 |
+
}
|
34 |
+
data = {'url': image_url}
|
35 |
+
response = requests.post(analyze_url, headers=headers, params=params, json=data)
|
36 |
+
response.raise_for_status()
|
37 |
+
|
38 |
+
analysis = response.json()
|
39 |
+
|
40 |
+
# Extract the description from the returned JSON
|
41 |
+
description = analysis['captionResult']['text']
|
42 |
+
|
43 |
+
return description
|
image_processing.py
ADDED
@@ -0,0 +1,23 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# works with uploaded image URLs
|
2 |
+
|
3 |
+
from moderator_mc import moderate_image # uses moderate-content api
|
4 |
+
from caption import caption_from_url # generates captions
|
5 |
+
from vector_search import topic_from_caption
|
6 |
+
|
7 |
+
|
8 |
+
def process_image(image_url):
|
9 |
+
|
10 |
+
# Call the moderation function
|
11 |
+
moderation_result = moderate_image(image_url)
|
12 |
+
|
13 |
+
# If the moderator returns True, return "moderated"
|
14 |
+
# if moderation_result: #for azure
|
15 |
+
if moderation_result == 3: # mc api
|
16 |
+
return "moderated"
|
17 |
+
|
18 |
+
# If the moderator returns False, pass the URL to the captioner function
|
19 |
+
else:
|
20 |
+
image_caption = caption_from_url(image_url)
|
21 |
+
topic = topic_from_caption(image_caption)
|
22 |
+
answer = f"Caption: {image_caption}. Topic: {topic}"
|
23 |
+
return answer
|
img_upload.py
ADDED
@@ -0,0 +1,58 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from azure.storage.blob import BlobServiceClient, BlobClient, ContentSettings, generate_blob_sas, BlobSasPermissions
|
2 |
+
from datetime import datetime, timedelta
|
3 |
+
import os
|
4 |
+
import dotenv
|
5 |
+
|
6 |
+
dotenv.load_dotenv()
|
7 |
+
|
8 |
+
storage_account_name = os.environ['AZURE_STORAGE_ACCOUNT_NAME']
|
9 |
+
storage_account_key = os.environ['AZURE_STORAGE_KEY']
|
10 |
+
connection_string = os.environ['AZURE_STORAGE_CONNECTION_STRING']
|
11 |
+
container_name = os.environ['AZURE_STORAGE_CONTAINER_NAME']
|
12 |
+
|
13 |
+
|
14 |
+
def upload_image_to_blob(image_data, image_name):
|
15 |
+
# Create a BlobServiceClient
|
16 |
+
blob_service_client = BlobServiceClient(account_url=f"https://{storage_account_name}.blob.core.windows.net",
|
17 |
+
credential=storage_account_key)
|
18 |
+
|
19 |
+
# Get the container client
|
20 |
+
container_client = blob_service_client.get_container_client(container_name)
|
21 |
+
|
22 |
+
# get the extension
|
23 |
+
# extension = os.path.splitext(image_name)[1]
|
24 |
+
|
25 |
+
# Get the blob client for the image
|
26 |
+
blob_name = image_name
|
27 |
+
blob_client = container_client.get_blob_client(blob_name)
|
28 |
+
|
29 |
+
# Determine the content type from the image name
|
30 |
+
content_type = "image/jpeg" # Default to JPEG
|
31 |
+
if image_name.lower().endswith(".png"):
|
32 |
+
content_type = "image/png"
|
33 |
+
elif image_name.lower().endswith(".gif"):
|
34 |
+
content_type = "image/gif"
|
35 |
+
|
36 |
+
# Create the content settings with the determined content type
|
37 |
+
content_settings = ContentSettings(content_type=content_type)
|
38 |
+
|
39 |
+
# Set the content settings for the blob
|
40 |
+
# blob_client.set_http_headers(content_settings)
|
41 |
+
|
42 |
+
# Upload the image
|
43 |
+
blob_client.upload_blob(image_data, content_settings=content_settings)
|
44 |
+
|
45 |
+
# Generate a SAS token for the blob
|
46 |
+
sas_token = generate_blob_sas(
|
47 |
+
account_name=storage_account_name,
|
48 |
+
container_name=container_name,
|
49 |
+
blob_name=blob_name,
|
50 |
+
account_key=storage_account_key,
|
51 |
+
permission=BlobSasPermissions(read=True),
|
52 |
+
expiry=datetime.utcnow() + timedelta(hours=10) # The SAS token will be valid for 1 hour
|
53 |
+
)
|
54 |
+
|
55 |
+
# Create a SAS URL for the blob
|
56 |
+
sas_url = f"https://{storage_account_name}.blob.core.windows.net/{container_name}/{blob_name}?{sas_token}"
|
57 |
+
|
58 |
+
return sas_url
|
main.py
ADDED
@@ -0,0 +1,65 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# works with gradio file upload, not image upload
|
2 |
+
import base64
|
3 |
+
from fastapi import FastAPI #, UploadFile, File
|
4 |
+
from img_upload import upload_image_to_blob
|
5 |
+
from image_processing import process_image
|
6 |
+
from pydantic import BaseModel, validator
|
7 |
+
from PIL import Image
|
8 |
+
import io
|
9 |
+
import gradio as gr
|
10 |
+
import uuid
|
11 |
+
|
12 |
+
app = FastAPI()
|
13 |
+
|
14 |
+
class FileUpload(BaseModel):
|
15 |
+
filename: str
|
16 |
+
data: str
|
17 |
+
|
18 |
+
# @validator('data')
|
19 |
+
# def validate_image(cls, data: str):
|
20 |
+
# try:
|
21 |
+
# image_data = base64.b64decode(data)
|
22 |
+
# image = Image.open(BytesIO(image_data))
|
23 |
+
# if image.format not in ['JPEG', 'PNG']:
|
24 |
+
# raise ValueError('Invalid file type')
|
25 |
+
# if max(image.size) > 5000:
|
26 |
+
# raise ValueError('Image dimensions are too large')
|
27 |
+
# if len(data) > 5000 * 5000: # adjust this value based on your needs
|
28 |
+
# raise ValueError('File size is too large')
|
29 |
+
# return data
|
30 |
+
# except Exception as e:
|
31 |
+
# raise ValueError('Invalid image') from e
|
32 |
+
#
|
33 |
+
|
34 |
+
class Response(BaseModel):
|
35 |
+
result: str
|
36 |
+
|
37 |
+
|
38 |
+
@app.post("/upload", response_model=Response)
|
39 |
+
async def create_upload_file(file: FileUpload):
|
40 |
+
data = base64.b64decode(file.data)
|
41 |
+
sas_url = upload_image_to_blob(data, file.filename)
|
42 |
+
result = process_image(sas_url)
|
43 |
+
return result
|
44 |
+
|
45 |
+
async def gradio_interface(image: Image.Image):
|
46 |
+
# Convert PIL Image to bytes
|
47 |
+
img_byte_arr = io.BytesIO()
|
48 |
+
image.save(img_byte_arr, format="JPEG")
|
49 |
+
img_byte_arr = img_byte_arr.getvalue()
|
50 |
+
|
51 |
+
# Encode bytes to base64
|
52 |
+
data = base64.b64encode(img_byte_arr).decode()
|
53 |
+
|
54 |
+
# Generate a unique ID for the image
|
55 |
+
unique_id = str(uuid.uuid4())
|
56 |
+
|
57 |
+
response = await create_upload_file(FileUpload(filename=unique_id + ".jpg", data=data))
|
58 |
+
return response
|
59 |
+
|
60 |
+
iface = gr.Interface(fn=gradio_interface, inputs=gr.Image(type="pil"), outputs="text")
|
61 |
+
app = gr.mount_gradio_app(app, iface, "/gradio")
|
62 |
+
|
63 |
+
if __name__ == "__main__":
|
64 |
+
import uvicorn
|
65 |
+
uvicorn.run(app, host="0.0.0.0", port=8000, )
|
moderator.py
ADDED
@@ -0,0 +1,53 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import os
|
2 |
+
import requests
|
3 |
+
from dotenv import load_dotenv
|
4 |
+
|
5 |
+
load_dotenv()
|
6 |
+
|
7 |
+
|
8 |
+
def moderate_image(image_url):
|
9 |
+
"""
|
10 |
+
Uses Microsoft Azure Content Moderator API to evaluate an image's content.
|
11 |
+
|
12 |
+
Args:
|
13 |
+
- image_url (str): The URL of the image to be moderated.
|
14 |
+
|
15 |
+
Returns:
|
16 |
+
- str: Returns "Moderated" if the image is classified as adult or racy,
|
17 |
+
otherwise returns "Passed".
|
18 |
+
"""
|
19 |
+
|
20 |
+
subscription_key = os.getenv('AZURE_SUBSCRIPTION_KEY')
|
21 |
+
endpoint = "https://eastus.api.cognitive.microsoft.com"
|
22 |
+
|
23 |
+
moderator_url = endpoint + "/contentmoderator/moderate/v1.0/ProcessImage/Evaluate"
|
24 |
+
|
25 |
+
# Define the headers for the HTTP request
|
26 |
+
headers = {
|
27 |
+
"Content-Type": "application/json",
|
28 |
+
"Ocp-Apim-Subscription-Key": subscription_key
|
29 |
+
}
|
30 |
+
|
31 |
+
data = {
|
32 |
+
"DataRepresentation": 'URL',
|
33 |
+
'Value': image_url
|
34 |
+
}
|
35 |
+
|
36 |
+
# Send the image to the API
|
37 |
+
response = requests.post(moderator_url, headers=headers, json=data)
|
38 |
+
|
39 |
+
# Parse the response
|
40 |
+
response_json = response.json()
|
41 |
+
|
42 |
+
# Check if the image is classified as adult or racy
|
43 |
+
if response_json["IsImageAdultClassified"] or response_json["IsImageRacyClassified"]:
|
44 |
+
return True
|
45 |
+
else:
|
46 |
+
return False
|
47 |
+
|
48 |
+
# Example usage
|
49 |
+
#
|
50 |
+
#
|
51 |
+
# url = "https://www.rainforest-alliance.org/wp-content/uploads/2021/06/capybara-square-1-400x400.jpg.webp"
|
52 |
+
# result = moderate_image(url)
|
53 |
+
# print(result)
|
moderator_mc.py
ADDED
@@ -0,0 +1,39 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import os
|
2 |
+
# import json
|
3 |
+
import requests
|
4 |
+
from dotenv import load_dotenv
|
5 |
+
|
6 |
+
load_dotenv()
|
7 |
+
|
8 |
+
|
9 |
+
def moderate_image(image_url):
|
10 |
+
"""
|
11 |
+
Process an image by moderating it and extracting a caption if not moderated.
|
12 |
+
|
13 |
+
Args:
|
14 |
+
- image_url (str): URL of the image to be processed.
|
15 |
+
|
16 |
+
Returns:
|
17 |
+
- str: If the image is moderated, returns "moderated".
|
18 |
+
If not moderated, returns the extracted caption.
|
19 |
+
"""
|
20 |
+
mc_key = os.getenv('MODERATE_CONTENT_KEY')
|
21 |
+
payload = {
|
22 |
+
'key': mc_key,
|
23 |
+
'url': image_url
|
24 |
+
}
|
25 |
+
endpoint = 'https://api.moderatecontent.com/moderate/'
|
26 |
+
response = requests.post(endpoint, data=payload)
|
27 |
+
if response.status_code == 200:
|
28 |
+
response_json = response.json()
|
29 |
+
return response_json['rating_index']
|
30 |
+
else:
|
31 |
+
print(response.status_code)
|
32 |
+
return None
|
33 |
+
|
34 |
+
# Example usage
|
35 |
+
|
36 |
+
|
37 |
+
# url = "https://www.rainforest-alliance.org/wp-content/uploads/2021/06/capybara-square-1-400x400.jpg.webp"
|
38 |
+
# result = moderate_image(url)
|
39 |
+
# print(result)
|
requirements.txt
ADDED
@@ -0,0 +1,99 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
aiofiles==23.2.1
|
2 |
+
aiohttp==3.9.1
|
3 |
+
aiosignal==1.3.1
|
4 |
+
altair==5.2.0
|
5 |
+
annotated-types==0.6.0
|
6 |
+
annoy==1.17.3
|
7 |
+
anyio==3.7.1
|
8 |
+
asks==3.0.0
|
9 |
+
async-generator==1.10
|
10 |
+
async-timeout==4.0.3
|
11 |
+
attrs==23.1.0
|
12 |
+
azure-core==1.29.6
|
13 |
+
azure-identity==1.15.0
|
14 |
+
azure-storage-blob==12.19.0
|
15 |
+
backoff==2.2.1
|
16 |
+
beautifulsoup4==4.12.2
|
17 |
+
certifi==2023.11.17
|
18 |
+
cffi==1.16.0
|
19 |
+
charset-normalizer==3.3.2
|
20 |
+
click==8.1.7
|
21 |
+
cohere==4.39
|
22 |
+
colorama==0.4.6
|
23 |
+
contourpy==1.2.0
|
24 |
+
cryptography==41.0.7
|
25 |
+
cycler==0.12.1
|
26 |
+
exceptiongroup==1.2.0
|
27 |
+
fastapi==0.105.0
|
28 |
+
fastavro==1.9.2
|
29 |
+
ffmpy==0.3.1
|
30 |
+
filelock==3.13.1
|
31 |
+
fonttools==4.46.0
|
32 |
+
frozenlist==1.4.1
|
33 |
+
fsspec==2023.12.2
|
34 |
+
gradio==4.10.0
|
35 |
+
gradio_client==0.7.3
|
36 |
+
h11==0.14.0
|
37 |
+
httpcore==1.0.2
|
38 |
+
httpx==0.25.2
|
39 |
+
huggingface-hub==0.19.4
|
40 |
+
idna==3.6
|
41 |
+
importlib-metadata==6.11.0
|
42 |
+
importlib-resources==6.1.1
|
43 |
+
isodate==0.6.1
|
44 |
+
Jinja2==3.1.2
|
45 |
+
jsonschema==4.20.0
|
46 |
+
jsonschema-specifications==2023.11.2
|
47 |
+
kiwisolver==1.4.5
|
48 |
+
lxml==5.0.0
|
49 |
+
markdown-it-py==3.0.0
|
50 |
+
MarkupSafe==2.1.3
|
51 |
+
matplotlib==3.8.2
|
52 |
+
mdurl==0.1.2
|
53 |
+
msal==1.26.0
|
54 |
+
msal-extensions==1.1.0
|
55 |
+
multidict==6.0.4
|
56 |
+
numpy==1.26.2
|
57 |
+
orjson==3.9.10
|
58 |
+
outcome==1.3.0.post0
|
59 |
+
packaging==23.2
|
60 |
+
pandas==2.1.4
|
61 |
+
Pillow==10.1.0
|
62 |
+
ply==3.11
|
63 |
+
portalocker==2.8.2
|
64 |
+
pycparser==2.21
|
65 |
+
pydantic==2.5.2
|
66 |
+
pydantic_core==2.14.5
|
67 |
+
pydub==0.25.1
|
68 |
+
Pygments==2.17.2
|
69 |
+
PyJWT==2.8.0
|
70 |
+
pyparsing==3.1.1
|
71 |
+
python-dateutil==2.8.2
|
72 |
+
python-dotenv==1.0.0
|
73 |
+
python-multipart==0.0.6
|
74 |
+
pytz==2023.3.post1
|
75 |
+
PyYAML==6.0.1
|
76 |
+
referencing==0.32.0
|
77 |
+
requests==2.31.0
|
78 |
+
rich==13.7.0
|
79 |
+
rpds-py==0.14.1
|
80 |
+
semantic-version==2.10.0
|
81 |
+
shellingham==1.5.4
|
82 |
+
six==1.16.0
|
83 |
+
sniffio==1.3.0
|
84 |
+
sortedcontainers==2.4.0
|
85 |
+
soupsieve==2.5
|
86 |
+
starlette==0.27.0
|
87 |
+
stone==3.3.1
|
88 |
+
tomlkit==0.12.0
|
89 |
+
toolz==0.12.0
|
90 |
+
tqdm==4.66.1
|
91 |
+
trio==0.23.2
|
92 |
+
typer==0.9.0
|
93 |
+
typing_extensions==4.9.0
|
94 |
+
tzdata==2023.3
|
95 |
+
urllib3==2.1.0
|
96 |
+
uvicorn==0.24.0.post1
|
97 |
+
websockets==11.0.3
|
98 |
+
yarl==1.9.4
|
99 |
+
zipp==3.17.0
|
test.ann
ADDED
Binary file (477 kB). View file
|
|
vector_search.py
ADDED
@@ -0,0 +1,48 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import cohere
|
2 |
+
from annoy import AnnoyIndex
|
3 |
+
import numpy as np
|
4 |
+
import dotenv
|
5 |
+
import os
|
6 |
+
import pandas as pd
|
7 |
+
|
8 |
+
dotenv.load_dotenv()
|
9 |
+
|
10 |
+
model_name = "embed-english-v3.0"
|
11 |
+
api_key = os.environ['COHERE_API_KEY']
|
12 |
+
input_type_embed = "search_document"
|
13 |
+
|
14 |
+
# Set up the cohere client
|
15 |
+
co = cohere.Client(api_key)
|
16 |
+
|
17 |
+
# Get the dataset of topics
|
18 |
+
topics = pd.read_csv("aicovers_topics.csv")
|
19 |
+
|
20 |
+
# Get the embeddings
|
21 |
+
list_embeds = co.embed(texts=list(topics['topic_cleaned']), model=model_name, input_type=input_type_embed).embeddings
|
22 |
+
|
23 |
+
# Create the search index, pass the size of embedding
|
24 |
+
search_index = AnnoyIndex(np.array(list_embeds).shape[1], metric='angular')
|
25 |
+
|
26 |
+
# Add vectors to the search index
|
27 |
+
for i in range(len(list_embeds)):
|
28 |
+
search_index.add_item(i, list_embeds[i])
|
29 |
+
search_index.build(10) # 10 trees
|
30 |
+
search_index.save('test.ann')
|
31 |
+
|
32 |
+
|
33 |
+
def topic_from_caption(caption):
|
34 |
+
"""
|
35 |
+
Returns a topic from an uploaded list that is semantically similar to the input caption.
|
36 |
+
|
37 |
+
Args:
|
38 |
+
- caption (str): The image caption generated by MS Azure.
|
39 |
+
|
40 |
+
Returns:
|
41 |
+
- str: The extracted topic based on the provided caption.
|
42 |
+
"""
|
43 |
+
input_type_query = "search_query"
|
44 |
+
caption_embed = co.embed(texts=[caption], model=model_name, input_type=input_type_query).embeddings # embeds a caption
|
45 |
+
topic_ids = search_index.get_nns_by_vector(caption_embed[0], n=1, include_distances=True) # retrieves the nearest category
|
46 |
+
topic = topics.iloc[topic_ids[0]]['topic_cleaned'].to_string(index=False, header=False)
|
47 |
+
return topic
|
48 |
+
|