Spaces:

Bestever
/

st_report_2

Runtime error

App Files Files Community

st_report_2 / app.py

bestpedro

Update app.py

5312fd8 verified 7 months ago

raw

history blame

19.5 kB

	import streamlit as st
	import numpy as np
	import plotly.figure_factory as ff

	import plotly.graph_objects as go
	import plotly.express as px


	import requests
	import json
	import pandas as pd
	import shutil
	import os
	from openai import AzureOpenAI
	import base64


	# st.page_link("report.py", label="Home", icon="🏠")
	# st.page_link("pages/page_1.py", label="Page 1", icon="1️⃣")
	# st.page_link("pages/page_2.py", label="Page 2", icon="2️⃣", disabled=True)

	ACCOUNT_ID = "act_416207949073936"
	PAGE_ID = "63257509478"
	OPENAI_API = os.getenv("OPENAI_API")
	ACCESS_TOKEN = os.getenv("ACCESS_TOKEN")
	BIG_DATASET = None

	print(ACCESS_TOKEN)
	ANALYSIS_TYPE = {
	"OUTCOME_SALES": "ROAS",
	}

	API_BASE = 'https://bestever-vision.openai.azure.com/'
	DEPLOYMENT_NAME = 'vision'
	API_VERSION = '2023-12-01-preview' # this might change in the future
	API_URL = f"{API_BASE}openai/deployments/{DEPLOYMENT_NAME}/extensions"

	client = AzureOpenAI(
	api_key=OPENAI_API,
	api_version=API_VERSION,
	base_url=API_URL,
	)

	def encode_image(image_path):
	with open(image_path, "rb") as image_file:
	return base64.b64encode(image_file.read()).decode('utf-8')


	def call_gpt_vision(client, images_path, user_prompt):
	"""Call the GPT4 Vision API to generate tags."""
	images_content = [
	{
	"type": "image_url",
	"image_url": {
	"url": f"data:image/jpeg;base64,{encode_image(image_path)}",
	},
	}
	for image_path in images_path
	]
	user_content = [
	{"type": "text", "text": user_prompt},
	]
	user_content += images_content
	response = client.chat.completions.create(
	model=DEPLOYMENT_NAME,
	messages=[
	{"role": "user", "content": user_content},
	],
	max_tokens=2000,
	)
	return response


	def parse_tags_from_content(response):
	"""Parse the tags from the response."""
	tags = []
	content = response.choices[0].message.content
	for full_tag in content.split("\n"):
	splitted_fields = full_tag.split(":")
	if len(splitted_fields) < 2:
	continue
	tag_name = splitted_fields[0]
	tag_details = ":".join(splitted_fields[1:])
	tag_element = {"name": tag_name, "metadata": {"details": tag_details}}
	tags.append(tag_element)
	return tags


	def get_campaigns(account_id):
	url = f"{account_id}/insights"
	params = {
	"date_preset": "last_90d",
	"fields": "campaign_id,campaign_name,impressions,spend,objective",
	"level": "campaign",
	"access_token": ACCESS_TOKEN,
	}
	return call_graph_api(url, params)


	def get_adsets(campaign_id):
	url = f"{campaign_id}/insights"
	params = {
	"date_preset": "last_90d",
	"fields": "adset_id,adset_name,impressions,spend",
	"level": "adset",
	"access_token": ACCESS_TOKEN,
	}
	return call_graph_api(url, params)


	def get_ads(adset_id):
	url = f"{adset_id}/insights"
	params = {
	"date_preset": "last_90d",
	"fields": "ad_name,ad_id,impressions,spend,video_play_actions,video_p25_watched_actions,video_p50_watched_actions,video_p75_watched_actions,video_p100_watched_actions,video_play_curve_actions,purchase_roas",
	"breakdowns": "age,gender",
	"limit": 1000,
	"level": "ad",
	"access_token": ACCESS_TOKEN,
	}
	return call_graph_api(url, params)


	def save_image_from_url(url, filename):
	res = requests.get(url, stream = True)

	if res.status_code == 200:
	with open(filename,'wb') as f:
	shutil.copyfileobj(res.raw, f)
	return True
	return False

	def get_creative_assets(ad_id):
	# checking if the asset already exists
	if os.path.exists(f'assets/{ad_id}.png') or os.path.exists(f'assets/{ad_id}.mp4') or os.path.exists(f'assets/{ad_id}.jpg'):
	return
	url = f"{ad_id}"
	params = {
	"fields": "creative{video_id,id,effective_object_story_id,image_url}",
	"access_token": ACCESS_TOKEN,
	}
	creative = call_graph_api(url, params)["creative"]
	saved = False
	print("-" * 10)
	if "video_id" in creative:
	# download video
	video_id = creative["video_id"]
	video_url = f"{video_id}"
	video_params = {
	"fields": "source",
	"access_token": ACCESS_TOKEN,
	}
	video_source = call_graph_api(video_url, video_params)["source"]
	ext = video_source.split("?")[0].split(".")[-1]
	if len(ext) > 4:
	ext = "mp4"
	saved = save_image_from_url(video_source, os.path.join("assets", f'{ad_id}.{ext}'))

	elif "image_url" in creative:
	image_url = creative["image_url"]
	ext = image_url.split("?")[0].split(".")[-1]
	if len(ext) > 4:
	ext = "png"
	saved = save_image_from_url(image_url, os.path.join("assets", f'{ad_id}.{ext}'))

	elif "effective_object_story_id" in creative:
	object_story_url = creative["effective_object_story_id"]
	object_story_params = {
	"fields": "attachments",
	"access_token": ACCESS_TOKEN,
	}
	attachments = call_graph_api(object_story_url, object_story_params)["attachments"]
	if "media" in attachments:
	media = attachments["media"]
	if "source" in media or "video" in media:
	video_url = media["video"]["source"]
	ext = video_url.split("?")[0].split(".")[-1]
	if len(ext) > 4:
	ext = "png"
	saved = save_image_from_url(video_url, os.path.join("assets", f'{ad_id}.{ext}'))
	elif "image" in media:
	image_url = media["image"]["src"]
	ext = image_url.split("?")[0].split(".")[-1]
	if len(ext) > 4:
	ext = "mp4"
	saved = save_image_from_url(image_url, os.path.join("assets", f'{ad_id}.{ext}'))

	if not saved:
	creative_url = f'{creative["id"]}'
	creative_params = {
	"fields": "thumbnail_url",
	"access_token": ACCESS_TOKEN,
	"thumbnail_width": 512,
	"thumbnail_height": 512,
	}
	thumbnail_url = call_graph_api(creative_url, creative_params)["thumbnail_url"]
	ext = thumbnail_url.split("?")[0].split(".")[-1]
	if len(ext) > 4:
	ext = "jpg"
	saved = save_image_from_url(thumbnail_url, os.path.join("assets", f'{ad_id}.{ext}'))

	def call_graph_api(url, params):
	base_url = "https://graph.facebook.com/v19.0/"
	response = requests.get(base_url + url, params=params)
	return json.loads(response.text)


	def top_n_ads(df, n=5):
	ad_ids = df.head(n)["ad_id"].values
	image_paths = []
	for ad_id in ad_ids:
	if os.path.exists(f'assets/{ad_id}.png'):
	image_paths.append(f'assets/{ad_id}.png')
	elif os.path.exists(f'assets/{ad_id}.mp4'):
	image_paths.append(f'assets/{ad_id}.mp4')
	elif os.path.exists(f'assets/{ad_id}.jpg'):
	image_paths.append(f'assets/{ad_id}.jpg')
	return image_paths


	def perform_analysis(df, objective):
	# - TS to CTR ratio analysis
	# - ROAS analysis (I will see the better metric here to use)
	# - Video drop off analysis
	if ANALYSIS_TYPE[objective] == "ROAS":
	# 3 analysis:
	# general
	# male
	# female

	df_general = df.groupby(["ad_id"]).sum()
	df_general = df_general.reset_index()
	df_general["relative_roas"] = df_general["purchase_roas"] / df_general["spend"]
	df_general = df_general.sort_values("relative_roas", ascending=False)

	image_paths = top_n_ads(df_general)
	response = call_gpt_vision(client, image_paths, "You are a marketing analyst and your task is to find common features between the most performatives ads of the company. You are given the top 5 most perfomative ads, and we expect you to return 5 keywords and its explanation that defines what makes a good ad that show an excellent ROAS. Return it as a list of 5 concepts and its explanation, using the provided ads as example. Try to use nice categories to describe the features (you can use some names like `minimalist design`, `Clear message`, etc). Also, pay attention if the ads are mostly images or videos, this is important to say. The output MUST contain one concept per line. For each like, follow the structure: <concept>:<explanation>.")
	image_winner_concepts = parse_tags_from_content(response)

	response = call_gpt_vision(client, [], f"Following, you have the key features that makes an ad a performative ad. Your task is to group this information and summarize in a nice paragraph that will be presented to the marketing team. Be concise. Features:\n{image_winner_concepts}")
	insights = response.choices[0].message.content

	general_output = {"keywords": [concept["name"] for concept in image_winner_concepts], "insights": insights}

	# Groupby ad_id and gender
	df_male = df[df["gender"] == "male"].groupby(["ad_id"]).sum()
	df_male = df_male.reset_index()
	df_male["relative_roas"] = df_male["purchase_roas"] / df_male["spend"]
	df_male = df_male.sort_values("relative_roas", ascending=False)

	image_paths = top_n_ads(df_male)
	response = call_gpt_vision(client, image_paths, "You are a marketing analyst and your task is to find common features between the most performatives ads published to men. You are given the top 5 most perfomative ads, and we expect you to return 5 keywords and its explanation that defines what makes a good ad that show an excellent ROAS. Return it as a list of 5 concepts and its explanation, using the provided ads as example. Try to use nice categories to describe the features (you can use some names like `minimalist design`, `Clear message`, etc). Also, pay attention if the ads are mostly images or videos, this is important to say. The output MUST contain one concept per line. For each like, follow the structure: <concept>:<explanation>.")
	image_winner_concepts = parse_tags_from_content(response)

	response = call_gpt_vision(client, [], f"Following, you have the key features that makes an ad a performative ad. Your task is to group this information and summarize in a nice paragraph that will be presented to the marketing team. Be concise. Features:\n{image_winner_concepts}")
	insights = response.choices[0].message.content

	male_output = {"keywords": [concept["name"] for concept in image_winner_concepts], "insights": insights}


	df_female = df[df["gender"] == "female"].groupby(["ad_id"]).sum()
	df_female = df_female.reset_index()
	df_female["relative_roas"] = df_female["purchase_roas"] / df_female["spend"]
	df_female = df_female.sort_values("relative_roas", ascending=False)

	image_paths = top_n_ads(df_female)
	response = call_gpt_vision(client, image_paths, "You are a marketing analyst and your task is to find common features between the most performatives ads published to women. You are given the top 5 most perfomative ads, and we expect you to return 5 keywords and its explanation that defines what makes a good ad that show an excellent ROAS. Return it as a list of 5 concepts and its explanation, using the provided ads as example. Try to use nice categories to describe the features (you can use some names like `minimalist design`, `Clear message`, etc). Also, pay attention if the ads are mostly images or videos, this is important to say. The output MUST contain one concept per line. For each like, follow the structure: <concept>:<explanation>.")
	image_winner_concepts = parse_tags_from_content(response)

	response = call_gpt_vision(client, [], f"Following, you have the key features that makes an ad a performative ad. Your task is to group this information and summarize in a nice paragraph that will be presented to the marketing team. Be concise. Features:\n{image_winner_concepts}")
	insights = response.choices[0].message.content
	female_output = {"keywords": [concept["name"] for concept in image_winner_concepts], "insights": insights}

	return {
	"General": general_output,
	"Male": male_output,
	"Female": female_output,
	}

	def format_adsets(campaign_id):
	st_campaigns.empty()
	adsets = get_adsets(campaign_id)
	with st_adsets.container():
	st.title("Adsets")
	for adset in adsets["data"]:
	with st.popover(adset["adset_name"]):
	st.markdown("Impressions: " + str(adset["impressions"]))
	st.markdown("Total Spend: US$" + str(adset["spend"]))
	st.button(
	"View Ads",
	key=adset["adset_name"],
	on_click=format_ads,
	kwargs={"adset_id": adset["adset_id"]},
	)


	def format_ads(adset_id):
	st_adsets.empty()
	BIG_DATASET = None
	ads = get_ads(adset_id)
	df_ads = pd.DataFrame(ads["data"])
	options = ["gender"] #st.multiselect(
	# "Which breakdowns do you want to see?", ["gender", "age"], ["gender"]
	# )
	df_ads["spend"] = df_ads["spend"].astype(float)
	df_ads["impressions"] = df_ads["impressions"].astype(float)
	video_cols = ["video_play_actions","video_p25_watched_actions","video_p50_watched_actions","video_p75_watched_actions","video_p100_watched_actions"]
	for col in video_cols:
	if col in df_ads.columns:
	df_ads[col] = df_ads[col].apply(lambda x: float(x[0].get("value", 0)) if isinstance(x, list) else 0)

	if "purchase_roas" in df_ads.columns:
	df_ads["purchase_roas"] = df_ads["purchase_roas"].apply(lambda x: float(x[0].get("value", 0)) if isinstance(x, list) else 0)

	if BIG_DATASET is None:
	BIG_DATASET = df_ads
	else:
	BIG_DATASET = pd.concat([BIG_DATASET, df_ads])
	BIG_DATASET.to_csv("big_dataset.csv")
	with st_ads.container():
	with st.expander("See analysis", expanded=False):
	analysis = st.empty()

	for i, ad in enumerate(df_ads["ad_id"].unique()):
	get_creative_assets(ad)
	ad_name = df_ads[df_ads["ad_id"] == ad]["ad_name"].values[0]
	with st.popover(ad_name):
	tab1, tab2, tab3 = st.tabs(["Creative", "Analytics", "Video Analysis"])
	df_tmp = df_ads[df_ads["ad_id"] == ad]
	with tab2:
	if len(options) >= 1:
	label = ["Total impressions"]
	source = []
	target = []
	value = []
	for option in options:
	df_g_tmp = df_tmp.groupby(option).sum()
	df_g_tmp = df_g_tmp.reset_index()
	for imp, v in df_g_tmp[["impressions", option]].values:
	label.append(v)
	source.append(0)
	target.append(len(label) - 1)
	value.append(imp)

	fig = go.Figure(
	data=[
	go.Sankey(
	node=dict(
	pad=15,
	thickness=20,
	line=dict(color="black", width=0.5),
	label=label,
	color="blue",
	),
	link=dict(
	source=source, target=target, value=value
	),
	)
	]
	)
	fig.update_layout(title_text="Basic Sankey Diagram", font_size=10)
	st.plotly_chart(fig, use_container_width=True)

	if "purchase_roas" in df_tmp.columns:
	df_roas = df_tmp.groupby(options)[["spend","purchase_roas"]].sum().reset_index().sort_values("purchase_roas", ascending=False)
	print(df_roas)
	values = [str(v) for v in df_tmp[options].values]
	fig = go.Figure(data=[
	go.Bar(name='ROAS', x=values, y=df_roas["purchase_roas"]),
	go.Bar(name='Spend', x=values, y=df_roas["spend"])
	])
	# Change the bar mode
	fig.update_layout(barmode='group')
	st.plotly_chart(fig, use_container_width=True)

	with tab3:
	if "video_play_actions" in df_tmp.columns:
	values = df_ads[["ad_id","video_play_actions","video_p25_watched_actions","video_p50_watched_actions","video_p75_watched_actions","video_p100_watched_actions"]].groupby("ad_id").get_group(ad).sum().values[1:]
	labels = ["Total video plays","Video plays until 25%","Video plays until 50%","Video plays until 75%","Video plays until 100%"]
	print(values)
	if values[0] > 0:
	st.plotly_chart(create_video_plays_funnel(values, labels), use_container_width=True)
	with tab1:
	if os.path.exists(f'assets/{ad}.png'):
	st.image(f'assets/{ad}.png', caption='Creative', use_column_width=True)
	elif os.path.exists(f'assets/{ad}.mp4'):
	st.video(f'assets/{ad}.mp4')
	elif os.path.exists(f'assets/{ad}.jpg'):
	st.image(f'assets/{ad}.jpg', caption='Creative', use_column_width=True)

	with analysis.container():
	report = perform_analysis(df_tmp, "OUTCOME_SALES")
	tabs = st.tabs(report.keys())
	tabs_names = list(report.keys())
	for i, tab in enumerate(tabs):
	with tab:
	st.multiselect("", report[tabs_names[i]]["keywords"], report[tabs_names[i]]["keywords"], key=f"{ad}_{i}")
	st.write(report[tabs_names[i]]["insights"])

	def create_video_plays_funnel(funnel_data, funnel_title):
	fig = go.Figure(go.Funnel(
	y = funnel_title,
	x = funnel_data))
	return fig

	if "initiated" not in st.session_state:
	st.session_state["initiated"] = False

	if not st.session_state["initiated"]:
	st_campaigns = st.empty()
	st_adsets = st.empty()
	st_ads = st.empty()
	st.session_state["initiated"] = True
	with st_campaigns.container():
	st.title("Campaigns")
	print(get_campaigns(ACCOUNT_ID))
	for c in (get_campaigns(ACCOUNT_ID))["data"]:
	with st.popover(c["campaign_name"]):
	st.markdown("Impressions: " + str(c["impressions"]))
	st.markdown("Total Spend: US$" + str(c["spend"]))
	st.markdown("Objective: " + str(c["objective"]))
	st.button(
	"View Adsets",
	key=c["campaign_name"],
	on_click=format_adsets,
	kwargs={"campaign_id": c["campaign_id"]},
	)