{ "cells": [ { "cell_type": "code", "execution_count": 1, "metadata": {}, "outputs": [], "source": [ "from fastai.vision.all import *\n", "import gradio as gr\n", "import requests\n", "import base64\n", "from bs4 import BeautifulSoup\n", "import os" ] }, { "cell_type": "code", "execution_count": 7, "metadata": {}, "outputs": [], "source": [ "# Load the trained model\n", "learn = load_learner('nsfw_model.pkl')\n", "labels = learn.dls.vocab\n", "\n", "def analyze(url):\n", " \"\"\"Analyzer function that classifies the images found at the given URL\"\"\"\n", " \n", " # Make sure URL starts with http or https\n", " # TODO: confirm that the url points to a web page, and not some resource.\n", " # Regex could be useful here\n", " if not url.startswith(('http://','https://')):\n", " url = 'http://'+url\n", " \n", " safety = 'safe' # our return variable\n", "\n", " # Extract html and all img tags\n", " html = requests.get(url)\n", " soup = BeautifulSoup(html.text, \"html.parser\")\n", " img_elements = soup.find_all(\"img\")\n", "\n", " # Save all src urls that we can clearly tell are img urls.\n", " # A better approach would be to use regex here\n", " srcs = []\n", " for img in img_elements:\n", " for v in img.attrs.values():\n", " if isinstance(v, str):\n", " if v.lower().endswith(('jpg', 'png', 'gif', 'jpeg')):\n", " srcs.append(v)\n", " \n", " # Get the images from the urls and classify\n", " # If there is a single unsafe image, report it.\n", " for src_url in srcs:\n", " try:\n", " img_data = requests.get(src_url).content\n", " temp = 'temp.' + src_url.lower().split('.')[-1]\n", " with open(temp, 'wb') as handler:\n", " handler.write(img_data)\n", " is_nsfw,_,probs = learn.predict(PILImage.create(temp))\n", " os.remove(temp) \n", " if is_nsfw == \"unsafe_searches\":\n", " safety = 'NOT safe'\n", " return safety\n", " except Exception as e:\n", " pass\n", " return safety" ] }, { "cell_type": "code", "execution_count": 11, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Running on local URL: http://127.0.0.1:7867\n", "\n", "To create a public link, set `share=True` in `launch()`.\n" ] }, { "data": { "text/html": [ "
" ], "text/plain": [ "