zdxpan commited on
Commit
999d054
1 Parent(s): e460b37

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +8 -32
app.py CHANGED
@@ -8,8 +8,8 @@ import textwrap
8
  import traceback
9
 
10
  # 爬虫-------------
11
- from save_cookie import save_cookie, get_cookie, cookie_f
12
- from scrap_util import getDriver, titleLocInfo, find_key_paragrap, extract_from_driver, table_record_doc
13
  import helium as hm
14
  from postDouyin import senDouyin
15
  # 模型-------------
@@ -54,12 +54,12 @@ HEIGHT = 1400
54
  WIDTH,HEIGHT = a_.size
55
 
56
  # should load from files and build new from file
57
- cookie_fns = ["抖音北京人事考试","抖音广东人事考试","抖音四川人事考试","抖音浙江人事考试","抖音江苏人事考试","抖音山东人事考试","抖音河南人事考试"]
58
- cookie_fns = os.listdir("./cookie_list/")
59
- cookie_fn = cookie_fns[0]
60
  description = "URL--> 爬取-->解析--> 音频--> 图片--> 视频"
61
 
62
- driver = getDriver()
63
  # sub_url ="https://www.js.msa.gov.cn/art/2023/2/24/art_11436_1391666.html"
64
  # hm.set_driver(driver) # 给它一个selnuim driver
65
  # hm.go_to(sub_url)
@@ -84,30 +84,6 @@ def custom_predict(context, question):
84
  score = answer_result["score"]
85
  return answer, score
86
 
87
- def get_content_from_driver(task_link, content=None):
88
- driver = getDriver()
89
- hm.set_driver(driver) # 给它一个selnuim driver
90
- if content:
91
- pass
92
- hm.go_to(task_link)
93
- print("hm.go_to(task_link)")
94
- time.sleep(1)
95
- items_ = driver.find_elements_by_xpath("//p")
96
- items_ = [i.text for i in items_ if i.text != ""]
97
- context_to_label = "\n".join(items_)
98
- doc = extract_from_driver(driver)
99
- doc["url"] = task_link
100
- doc["content"] = context_to_label
101
- n_bm_sj,_ = custom_predict(context = context_to_label, question="报名时间")
102
- n_fee_sj,_ = custom_predict(context = context_to_label, question="缴费时间")
103
- n_ks_sj,_ = custom_predict(context = context_to_label, question="考试时间")
104
- n_zkz_sj,_ = custom_predict(context = context_to_label, question="准考证时间")
105
- need = [doc["title"],doc["zwlx"], doc["zwk_sheng"], doc["zwk_diqu"],
106
- doc["tidy_bm_sj"], doc["tidy_fee_sj"], doc["tidy_ks_sj"], doc["tidy_zkz_sj"],context_to_label,
107
- n_bm_sj, n_fee_sj, n_ks_sj, n_zkz_sj
108
- ]
109
- return need
110
-
111
  def image_preview(orimage=None, text="Hello Ai", x=10, y=20, w=500, h=100, bac_color = "#FFbbFF",
112
  txt_color = "#000000",front="simsun.ttc", size = 50):
113
  if orimage is None:
@@ -473,8 +449,8 @@ with gr.Blocks() as demo:
473
  # outputs=[img_files])
474
  # outputs=[movie_file, img_files])
475
  # exit_.click(fn=exit_func)
476
- login_.click(fn=loginDouyin, outputs=[login_qr])
477
- login_save.click(fn=run_save_cookie, inputs = [account_name_new])
478
  # post_.click(fn=mySendDouyin, inputs = [account_fn, movie_file])
479
  # 绑定clear点击函数
480
  # clear.click(fn=clear_input, inputs=[], outputs=[context, question, answer, score])
 
8
  import traceback
9
 
10
  # 爬虫-------------
11
+ # from save_cookie import save_cookie, get_cookie, cookie_f
12
+ # from scrap_util import getDriver, titleLocInfo, find_key_paragrap, extract_from_driver, table_record_doc
13
  import helium as hm
14
  from postDouyin import senDouyin
15
  # 模型-------------
 
54
  WIDTH,HEIGHT = a_.size
55
 
56
  # should load from files and build new from file
57
+ # cookie_fns = ["抖音北京人事考试","抖音广东人事考试","抖音四川人事考试","抖音浙江人事考试","抖音江苏人事考试","抖音山东人事考试","抖音河南人事考试"]
58
+ # cookie_fns = os.listdir("./cookie_list/")
59
+ # cookie_fn = cookie_fns[0]
60
  description = "URL--> 爬取-->解析--> 音频--> 图片--> 视频"
61
 
62
+ # driver = getDriver()
63
  # sub_url ="https://www.js.msa.gov.cn/art/2023/2/24/art_11436_1391666.html"
64
  # hm.set_driver(driver) # 给它一个selnuim driver
65
  # hm.go_to(sub_url)
 
84
  score = answer_result["score"]
85
  return answer, score
86
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
87
  def image_preview(orimage=None, text="Hello Ai", x=10, y=20, w=500, h=100, bac_color = "#FFbbFF",
88
  txt_color = "#000000",front="simsun.ttc", size = 50):
89
  if orimage is None:
 
449
  # outputs=[img_files])
450
  # outputs=[movie_file, img_files])
451
  # exit_.click(fn=exit_func)
452
+ # login_.click(fn=loginDouyin, outputs=[login_qr])
453
+ # login_save.click(fn=run_save_cookie, inputs = [account_name_new])
454
  # post_.click(fn=mySendDouyin, inputs = [account_fn, movie_file])
455
  # 绑定clear点击函数
456
  # clear.click(fn=clear_input, inputs=[], outputs=[context, question, answer, score])