Spaces:

Bestever
/

st_report_2

Runtime error

File size: 19,513 Bytes

import streamlit as st
import numpy as np
import plotly.figure_factory as ff

import plotly.graph_objects as go
import plotly.express as px


import requests
import json
import pandas as pd
import shutil
import os
from openai import AzureOpenAI
import base64


# st.page_link("report.py", label="Home", icon="🏠")
# st.page_link("pages/page_1.py", label="Page 1", icon="1️⃣")
# st.page_link("pages/page_2.py", label="Page 2", icon="2️⃣", disabled=True)

ACCOUNT_ID = "act_416207949073936"
PAGE_ID = "63257509478"
OPENAI_API = os.getenv("OPENAI_API")
ACCESS_TOKEN = os.getenv("ACCESS_TOKEN")
BIG_DATASET = None

print(ACCESS_TOKEN)
ANALYSIS_TYPE = {
    "OUTCOME_SALES": "ROAS",
}

API_BASE = 'https://bestever-vision.openai.azure.com/'
DEPLOYMENT_NAME = 'vision'
API_VERSION = '2023-12-01-preview' # this might change in the future
API_URL = f"{API_BASE}openai/deployments/{DEPLOYMENT_NAME}/extensions"

client = AzureOpenAI(
    api_key=OPENAI_API,
    api_version=API_VERSION,
    base_url=API_URL,
)

def encode_image(image_path):
    with open(image_path, "rb") as image_file:
        return base64.b64encode(image_file.read()).decode('utf-8')


def call_gpt_vision(client, images_path, user_prompt):
    """Call the GPT4 Vision API to generate tags."""
    images_content = [
        {
            "type": "image_url",
            "image_url": {
                "url": f"data:image/jpeg;base64,{encode_image(image_path)}",
            },
        }
        for image_path in images_path
    ]
    user_content = [
        {"type": "text", "text": user_prompt},
    ]
    user_content += images_content
    response = client.chat.completions.create(
        model=DEPLOYMENT_NAME,
        messages=[
            {"role": "user", "content": user_content},
        ],
        max_tokens=2000,
    )
    return response


def parse_tags_from_content(response):
    """Parse the tags from the response."""
    tags = []
    content = response.choices[0].message.content
    for full_tag in content.split("\n"):
        splitted_fields = full_tag.split(":")
        if len(splitted_fields) < 2:
            continue
        tag_name = splitted_fields[0]
        tag_details = ":".join(splitted_fields[1:])
        tag_element = {"name": tag_name, "metadata": {"details": tag_details}}
        tags.append(tag_element)
    return tags


def get_campaigns(account_id):
    url = f"{account_id}/insights"
    params = {
        "date_preset": "last_90d",
        "fields": "campaign_id,campaign_name,impressions,spend,objective",
        "level": "campaign",
        "access_token": ACCESS_TOKEN,
    }
    return call_graph_api(url, params)


def get_adsets(campaign_id):
    url = f"{campaign_id}/insights"
    params = {
        "date_preset": "last_90d",
        "fields": "adset_id,adset_name,impressions,spend",
        "level": "adset",
        "access_token": ACCESS_TOKEN,
    }
    return call_graph_api(url, params)


def get_ads(adset_id):
    url = f"{adset_id}/insights"
    params = {
        "date_preset": "last_90d",
        "fields": "ad_name,ad_id,impressions,spend,video_play_actions,video_p25_watched_actions,video_p50_watched_actions,video_p75_watched_actions,video_p100_watched_actions,video_play_curve_actions,purchase_roas",
        "breakdowns": "age,gender",
        "limit": 1000,
        "level": "ad",
        "access_token": ACCESS_TOKEN,
    }
    return call_graph_api(url, params)


def save_image_from_url(url, filename):
    res = requests.get(url, stream = True)

    if res.status_code == 200:
        with open(filename,'wb') as f:
            shutil.copyfileobj(res.raw, f)
        return True
    return False

def get_creative_assets(ad_id):
    # checking if the asset already exists
    if os.path.exists(f'assets/{ad_id}.png') or os.path.exists(f'assets/{ad_id}.mp4') or os.path.exists(f'assets/{ad_id}.jpg'):
        return
    url = f"{ad_id}"
    params = {
        "fields": "creative{video_id,id,effective_object_story_id,image_url}",
        "access_token": ACCESS_TOKEN,
    }
    creative = call_graph_api(url, params)["creative"]
    saved = False
    print("-" * 10)
    if "video_id" in creative:
        # download video
        video_id = creative["video_id"]
        video_url = f"{video_id}"
        video_params = {
            "fields": "source",
            "access_token": ACCESS_TOKEN,
        }
        video_source = call_graph_api(video_url, video_params)["source"]
        ext = video_source.split("?")[0].split(".")[-1]
        if len(ext) > 4:
            ext = "mp4"
        saved = save_image_from_url(video_source, os.path.join("assets", f'{ad_id}.{ext}'))

    elif "image_url" in creative:
        image_url = creative["image_url"]
        ext = image_url.split("?")[0].split(".")[-1]
        if len(ext) > 4:
            ext = "png"
        saved = save_image_from_url(image_url, os.path.join("assets", f'{ad_id}.{ext}'))
    
    elif "effective_object_story_id" in creative:
        object_story_url = creative["effective_object_story_id"]
        object_story_params = {
            "fields": "attachments",
            "access_token": ACCESS_TOKEN,
        }
        attachments = call_graph_api(object_story_url, object_story_params)["attachments"]
        if "media" in attachments:
            media = attachments["media"]
            if "source" in media or "video" in media:
                video_url = media["video"]["source"]
                ext = video_url.split("?")[0].split(".")[-1]
                if len(ext) > 4:
                    ext = "png"
                saved = save_image_from_url(video_url, os.path.join("assets", f'{ad_id}.{ext}'))
            elif "image" in media:
                image_url = media["image"]["src"]
                ext = image_url.split("?")[0].split(".")[-1]
                if len(ext) > 4:
                    ext = "mp4"
                saved = save_image_from_url(image_url, os.path.join("assets", f'{ad_id}.{ext}'))

    if not saved:
        creative_url = f'{creative["id"]}'
        creative_params = {
            "fields": "thumbnail_url",
            "access_token": ACCESS_TOKEN,
            "thumbnail_width": 512,
            "thumbnail_height": 512,
        }
        thumbnail_url = call_graph_api(creative_url, creative_params)["thumbnail_url"]
        ext = thumbnail_url.split("?")[0].split(".")[-1]
        if len(ext) > 4:
            ext = "jpg"
        saved = save_image_from_url(thumbnail_url, os.path.join("assets", f'{ad_id}.{ext}'))

def call_graph_api(url, params):
    base_url = "https://graph.facebook.com/v19.0/"
    response = requests.get(base_url + url, params=params)
    return json.loads(response.text)


def top_n_ads(df, n=5):
    ad_ids = df.head(n)["ad_id"].values
    image_paths = []
    for ad_id in ad_ids:
        if os.path.exists(f'assets/{ad_id}.png'):
            image_paths.append(f'assets/{ad_id}.png')
        elif os.path.exists(f'assets/{ad_id}.mp4'):
            image_paths.append(f'assets/{ad_id}.mp4')
        elif os.path.exists(f'assets/{ad_id}.jpg'):
            image_paths.append(f'assets/{ad_id}.jpg')
    return image_paths


def perform_analysis(df, objective):
    # - TS to CTR ratio analysis
    # - ROAS analysis (I will see the better metric here to use)
    # - Video drop off analysis
    if ANALYSIS_TYPE[objective] == "ROAS":
        # 3 analysis:
        # general
        # male
        # female

        df_general = df.groupby(["ad_id"]).sum()
        df_general = df_general.reset_index()
        df_general["relative_roas"] = df_general["purchase_roas"] / df_general["spend"]
        df_general = df_general.sort_values("relative_roas", ascending=False)
        
        image_paths = top_n_ads(df_general)
        response = call_gpt_vision(client, image_paths, "You are a marketing analyst and your task is to find common features between the most performatives ads of the company. You are given the top 5 most perfomative ads, and we expect you to return 5 keywords and its explanation that defines what makes a good ad that show an excellent ROAS. Return it as a list of 5 concepts and its explanation, using the provided ads as example. Try to use nice categories to describe the features (you can use some names like `minimalist design`, `Clear message`, etc). Also, pay attention if the ads are mostly images or videos, this is important to say. The output MUST contain one concept per line. For each like, follow the structure: <concept>:<explanation>.")
        image_winner_concepts = parse_tags_from_content(response)

        response = call_gpt_vision(client, [], f"Following, you have the key features that makes an ad a performative ad. Your task is to group this information and summarize in a nice paragraph that will be presented to the marketing team. Be concise. Features:\n{image_winner_concepts}")
        insights = response.choices[0].message.content

        general_output = {"keywords": [concept["name"] for concept in image_winner_concepts], "insights": insights}

        # Groupby ad_id and gender
        df_male = df[df["gender"] == "male"].groupby(["ad_id"]).sum()
        df_male = df_male.reset_index()
        df_male["relative_roas"] = df_male["purchase_roas"] / df_male["spend"]
        df_male = df_male.sort_values("relative_roas", ascending=False)
        
        image_paths = top_n_ads(df_male)
        response = call_gpt_vision(client, image_paths, "You are a marketing analyst and your task is to find common features between the most performatives ads published to men. You are given the top 5 most perfomative ads, and we expect you to return 5 keywords and its explanation that defines what makes a good ad that show an excellent ROAS. Return it as a list of 5 concepts and its explanation, using the provided ads as example. Try to use nice categories to describe the features (you can use some names like `minimalist design`, `Clear message`, etc). Also, pay attention if the ads are mostly images or videos, this is important to say. The output MUST contain one concept per line. For each like, follow the structure: <concept>:<explanation>.")
        image_winner_concepts = parse_tags_from_content(response)

        response = call_gpt_vision(client, [], f"Following, you have the key features that makes an ad a performative ad. Your task is to group this information and summarize in a nice paragraph that will be presented to the marketing team. Be concise. Features:\n{image_winner_concepts}")
        insights = response.choices[0].message.content

        male_output = {"keywords": [concept["name"] for concept in image_winner_concepts], "insights": insights}


        df_female = df[df["gender"] == "female"].groupby(["ad_id"]).sum()
        df_female = df_female.reset_index()
        df_female["relative_roas"] = df_female["purchase_roas"] / df_female["spend"]
        df_female = df_female.sort_values("relative_roas", ascending=False)
        
        image_paths = top_n_ads(df_female)
        response = call_gpt_vision(client, image_paths, "You are a marketing analyst and your task is to find common features between the most performatives ads published to women. You are given the top 5 most perfomative ads, and we expect you to return 5 keywords and its explanation that defines what makes a good ad that show an excellent ROAS. Return it as a list of 5 concepts and its explanation, using the provided ads as example. Try to use nice categories to describe the features (you can use some names like `minimalist design`, `Clear message`, etc). Also, pay attention if the ads are mostly images or videos, this is important to say. The output MUST contain one concept per line. For each like, follow the structure: <concept>:<explanation>.")
        image_winner_concepts = parse_tags_from_content(response)

        response = call_gpt_vision(client, [], f"Following, you have the key features that makes an ad a performative ad. Your task is to group this information and summarize in a nice paragraph that will be presented to the marketing team. Be concise. Features:\n{image_winner_concepts}")
        insights = response.choices[0].message.content
        female_output = {"keywords": [concept["name"] for concept in image_winner_concepts], "insights": insights}

        return {
            "General": general_output,
            "Male":  male_output,
            "Female": female_output,
        }

def format_adsets(campaign_id):
    st_campaigns.empty()
    adsets = get_adsets(campaign_id)
    with st_adsets.container():
        st.title("Adsets")
        for adset in adsets["data"]:
            with st.popover(adset["adset_name"]):
                st.markdown("**Impressions**: " + str(adset["impressions"]))
                st.markdown("**Total Spend**: US$" + str(adset["spend"]))
                st.button(
                    "View Ads",
                    key=adset["adset_name"],
                    on_click=format_ads,
                    kwargs={"adset_id": adset["adset_id"]},
                )


def format_ads(adset_id):
    st_adsets.empty()
    BIG_DATASET = None
    ads = get_ads(adset_id)
    df_ads = pd.DataFrame(ads["data"])
    options = ["gender"] #st.multiselect(
    #     "Which breakdowns do you want to see?", ["gender", "age"], ["gender"]
    # )
    df_ads["spend"] = df_ads["spend"].astype(float)
    df_ads["impressions"] = df_ads["impressions"].astype(float)
    video_cols = ["video_play_actions","video_p25_watched_actions","video_p50_watched_actions","video_p75_watched_actions","video_p100_watched_actions"]
    for col in video_cols:
        if col in df_ads.columns:
            df_ads[col] = df_ads[col].apply(lambda x: float(x[0].get("value", 0)) if isinstance(x, list) else 0)
    
    if "purchase_roas" in df_ads.columns:
        df_ads["purchase_roas"] = df_ads["purchase_roas"].apply(lambda x: float(x[0].get("value", 0)) if isinstance(x, list) else 0)

    if BIG_DATASET is None:
        BIG_DATASET = df_ads
    else:
        BIG_DATASET = pd.concat([BIG_DATASET, df_ads])
    BIG_DATASET.to_csv("big_dataset.csv")
    with st_ads.container():
        with st.expander("See analysis", expanded=False):
            analysis = st.empty()

        for i, ad in enumerate(df_ads["ad_id"].unique()):
            get_creative_assets(ad)
            ad_name = df_ads[df_ads["ad_id"] == ad]["ad_name"].values[0]
            with st.popover(ad_name):
                tab1, tab2, tab3 = st.tabs(["Creative", "Analytics", "Video Analysis"])
                df_tmp = df_ads[df_ads["ad_id"] == ad]
                with tab2:
                    if len(options) >= 1:
                        label = ["Total impressions"]
                        source = []
                        target = []
                        value = []
                        for option in options:
                            df_g_tmp = df_tmp.groupby(option).sum()
                            df_g_tmp = df_g_tmp.reset_index()
                            for imp, v in df_g_tmp[["impressions", option]].values:
                                label.append(v)
                                source.append(0)
                                target.append(len(label) - 1)
                                value.append(imp)

                        fig = go.Figure(
                            data=[
                                go.Sankey(
                                    node=dict(
                                        pad=15,
                                        thickness=20,
                                        line=dict(color="black", width=0.5),
                                        label=label,
                                        color="blue",
                                    ),
                                    link=dict(
                                        source=source, target=target, value=value
                                    ),
                                )
                            ]
                        )
                        fig.update_layout(title_text="Basic Sankey Diagram", font_size=10)
                        st.plotly_chart(fig, use_container_width=True)

                    if "purchase_roas" in df_tmp.columns:
                        df_roas = df_tmp.groupby(options)[["spend","purchase_roas"]].sum().reset_index().sort_values("purchase_roas", ascending=False)
                        print(df_roas)
                        values = [str(v) for v in df_tmp[options].values]
                        fig = go.Figure(data=[
                            go.Bar(name='ROAS', x=values, y=df_roas["purchase_roas"]),
                            go.Bar(name='Spend', x=values, y=df_roas["spend"])
                        ])
                        # Change the bar mode
                        fig.update_layout(barmode='group')
                        st.plotly_chart(fig, use_container_width=True)

                with tab3:
                    if "video_play_actions" in df_tmp.columns:
                        values = df_ads[["ad_id","video_play_actions","video_p25_watched_actions","video_p50_watched_actions","video_p75_watched_actions","video_p100_watched_actions"]].groupby("ad_id").get_group(ad).sum().values[1:]
                        labels = ["Total video plays","Video plays until 25%","Video plays until 50%","Video plays until 75%","Video plays until 100%"]
                        print(values)
                        if values[0] > 0:
                            st.plotly_chart(create_video_plays_funnel(values, labels), use_container_width=True)
                with tab1:
                    if os.path.exists(f'assets/{ad}.png'):
                        st.image(f'assets/{ad}.png', caption='Creative', use_column_width=True)
                    elif os.path.exists(f'assets/{ad}.mp4'):
                        st.video(f'assets/{ad}.mp4')
                    elif os.path.exists(f'assets/{ad}.jpg'):
                        st.image(f'assets/{ad}.jpg', caption='Creative', use_column_width=True)
            
        with analysis.container():
            report = perform_analysis(df_tmp, "OUTCOME_SALES")
            tabs = st.tabs(report.keys())
            tabs_names = list(report.keys())
            for i, tab in enumerate(tabs):
                with tab:
                    st.multiselect("", report[tabs_names[i]]["keywords"], report[tabs_names[i]]["keywords"], key=f"{ad}_{i}")
                    st.write(report[tabs_names[i]]["insights"])

def create_video_plays_funnel(funnel_data, funnel_title):
    fig = go.Figure(go.Funnel(
    y = funnel_title,
    x = funnel_data))
    return fig

if "initiated" not in st.session_state:
    st.session_state["initiated"] = False

if not st.session_state["initiated"]:
    st_campaigns = st.empty()
    st_adsets = st.empty()
    st_ads = st.empty()
    st.session_state["initiated"] = True
    with st_campaigns.container():
        st.title("Campaigns")
        print(get_campaigns(ACCOUNT_ID))
        for c in (get_campaigns(ACCOUNT_ID))["data"]:
            with st.popover(c["campaign_name"]):
                st.markdown("**Impressions**: " + str(c["impressions"]))
                st.markdown("**Total Spend**: US$" + str(c["spend"]))
                st.markdown("**Objective**: " + str(c["objective"]))
                st.button(
                    "View Adsets",
                    key=c["campaign_name"],
                    on_click=format_adsets,
                    kwargs={"campaign_id": c["campaign_id"]},
                )