Romain Graux commited on
Commit
0a7969d
β€’
1 Parent(s): de2f52a

V0 of the app with streamlit

Browse files
Files changed (4) hide show
  1. Dockerfile +13 -0
  2. README.md +3 -5
  3. app.py +167 -9
  4. requirements.txt +1 -1
Dockerfile ADDED
@@ -0,0 +1,13 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ FROM python:3.11
2
+
3
+ RUN useradd -m -u 1000 user
4
+ WORKDIR /app
5
+
6
+ COPY --chown=user ./requirements.txt requirements.txt
7
+ RUN pip install --no-cache-dir --upgrade -r requirements.txt
8
+
9
+ COPY --chown=user ./spock /app/spock
10
+ RUN pip install --no-cache-dir --upgrade -e /app/spock
11
+
12
+ COPY --chown=user . /app
13
+ CMD ["streamlit", "run", "/app/app.py"]
README.md CHANGED
@@ -3,9 +3,7 @@ title: Volcano plots
3
  emoji: πŸŒ‹
4
  colorFrom: blue
5
  colorTo: green
6
- sdk: gradio
7
- sdk_version: 4.37.2
8
- python_version: 3.11.9
9
- app_file: app.py
10
  pinned: true
11
- ---
 
3
  emoji: πŸŒ‹
4
  colorFrom: blue
5
  colorTo: green
6
+ sdk: docker
7
+ app_port: 8501
 
 
8
  pinned: true
9
+ ---
app.py CHANGED
@@ -1,17 +1,175 @@
1
  #!/usr/bin/env python3
2
  # -*- coding: utf-8 -*-
3
 
4
- import gradio as gr
 
 
 
 
 
 
5
 
6
- CSS = """
7
- """
 
 
8
 
9
 
10
- with gr.Blocks(css=CSS) as block:
11
- gr.Markdown(
12
- """
13
- ## Volcano plots
14
- """
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
15
  )
16
 
17
- block.launch()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  #!/usr/bin/env python3
2
  # -*- coding: utf-8 -*-
3
 
4
+ import io
5
+ import os
6
+ import sys
7
+ import contextlib
8
+ import pandas as pd
9
+ import streamlit as st
10
+ from navicat_spock.spock import run_spock_from_args
11
 
12
+ # Add spock directory to system path if not already present
13
+ spock_dir: str = os.path.dirname(os.path.abspath(__file__)) + "/spock"
14
+ if spock_dir not in sys.path:
15
+ sys.path.append(spock_dir)
16
 
17
 
18
+ # Check if the dataframe contains a target column
19
+ def check_columns(df: pd.DataFrame) -> None:
20
+ if not any(["target" in col.lower() for col in df.columns]):
21
+ raise ValueError(
22
+ "Missing the target column. Please add a column that contains `target` in the name."
23
+ )
24
+
25
+
26
+ # Cache the function to run spock with the provided dataframe and arguments
27
+ @st.cache_data(
28
+ show_spinner=False, hash_funcs={pd.DataFrame: lambda df: df.to_numpy().tobytes()}
29
+ )
30
+ def run_fn(df, *args, **kwargs) -> str:
31
+ check_columns(df)
32
+ fig, ax = run_spock_from_args(df, *args, **kwargs)
33
+ return fig
34
+
35
+
36
+ # Mock function for testing purposes
37
+ def mock_fn(df, *args, **kwargs):
38
+ import numpy as np
39
+ import matplotlib.pyplot as plt
40
+
41
+ check_columns(df)
42
+ print("WORKING")
43
+ fig, ax = plt.subplots()
44
+ ax.plot(np.random.rand(10))
45
+ return fig
46
+
47
+
48
+ # Load data from the uploaded file
49
+ def load_data(file):
50
+ accepted_ext = ["csv", "xlsx"]
51
+ if file.name.split(".")[-1] not in accepted_ext:
52
+ raise ValueError("Invalid file type. Please upload a CSV or Excel file.")
53
+ return pd.read_csv(file) if file.name.endswith(".csv") else pd.read_excel(file)
54
+
55
+
56
+ # Context manager to capture stdout with a timestamp
57
+ @contextlib.contextmanager
58
+ def capture_stdout_with_timestamp():
59
+ class TimestampedIO(io.StringIO):
60
+ def write(self, msg):
61
+ if msg.strip(): # Only add a timestamp if the message is not just a newline
62
+ timestamped_msg = f"[{pd.Timestamp.now()}] {msg}"
63
+ else:
64
+ timestamped_msg = msg
65
+ super().write(timestamped_msg)
66
+
67
+ new_stdout = TimestampedIO()
68
+ old_stdout = sys.stdout
69
+ sys.stdout = new_stdout
70
+ try:
71
+ yield new_stdout
72
+ finally:
73
+ sys.stdout = old_stdout
74
+
75
+
76
+ # Main function to run the Streamlit app
77
+ def main():
78
+ st.title("Navicat Spock")
79
+ st.subheader("A tool for generating volcano plots from your data")
80
+
81
+ with st.sidebar:
82
+ st.header("Settings")
83
+
84
+ wp = st.number_input(
85
+ "Weighting Power",
86
+ min_value=0,
87
+ value=2,
88
+ help="Weighting power used to adjust the target values",
89
+ )
90
+ verb = st.number_input(
91
+ "Verbosity",
92
+ min_value=0,
93
+ max_value=7,
94
+ value=1,
95
+ help="Verbosity level (0-7) for the logs",
96
+ )
97
+
98
+ imputer_strat_dict = {
99
+ None: "none",
100
+ "Iterative": "iterative",
101
+ "Simple": "simple",
102
+ "KNN": "knn",
103
+ }
104
+ imputer_strat_value = st.selectbox(
105
+ "Imputer Strategy",
106
+ filter(lambda x: x, list(imputer_strat_dict.keys())),
107
+ index=None,
108
+ help="Imputer Strategy used to fill missing values",
109
+ )
110
+
111
+ imputer_strat = imputer_strat_dict[imputer_strat_value]
112
+
113
+ plotmode = st.number_input(
114
+ "Plot Mode",
115
+ min_value=0,
116
+ max_value=3,
117
+ value=1,
118
+ help="Different plot modes",
119
+ )
120
+ seed = st.number_input(
121
+ "Seed", min_value=0, value=None, help="Seed number to fix the random state"
122
+ )
123
+ prefit = st.toggle("Prefit", value=False)
124
+ setcbms = st.toggle("CBMS", value=True)
125
+
126
+ with st.expander("Instructions"):
127
+ st.markdown(
128
+ """
129
+ 1. Upload your data in an Excel or CSV file.
130
+ 2. View and curate your data in the table below.
131
+ 3. Click "Run Plot" to generate your plot.
132
+ 4. View the generated plot and all the associated logs in the respective tabs.
133
+ """
134
+ )
135
+
136
+ uploaded_file = st.file_uploader(
137
+ "Choose a file", type=["csv", "xlsx"], accept_multiple_files=False
138
  )
139
 
140
+ if uploaded_file is not None:
141
+ try:
142
+ df = load_data(uploaded_file)
143
+ st.markdown("### Data")
144
+ st.dataframe(df, use_container_width=True)
145
+
146
+ if st.button("Run Plot"):
147
+ with st.spinner("Generating plot..."):
148
+ with capture_stdout_with_timestamp() as stdout_io:
149
+ result = run_fn(
150
+ df,
151
+ wp=wp,
152
+ verb=verb,
153
+ imputer_strat=imputer_strat,
154
+ plotmode=plotmode,
155
+ seed=seed,
156
+ prefit=prefit,
157
+ setcbms=setcbms,
158
+ fig=None,
159
+ ax=None,
160
+ )
161
+
162
+ st.markdown("### Result")
163
+ plot, logs = st.tabs(["Plot", "Logs"])
164
+ with plot:
165
+ st.pyplot(result)
166
+ with logs:
167
+ st.code(stdout_io.getvalue(), language="bash")
168
+ except Exception as e:
169
+ st.toast(f":red[{e}]", icon="🚨")
170
+ else:
171
+ st.write("Please first upload a file to generate the volcano plot.")
172
+
173
+
174
+ if __name__ == "__main__":
175
+ main()
requirements.txt CHANGED
@@ -1 +1 @@
1
- gradio==4.37.2
 
1
+ streamlit==1.36.0