Yijun-Yang commited on
Commit
ac3d7d8
1 Parent(s): b90b0df

updatesucesspara

Browse files
huixiangdou/service/findarticles.py CHANGED
@@ -60,7 +60,7 @@ class ArticleRetrieval:
60
  if not os.path.exists(self.repo_dir):
61
  os.makedirs(self.repo_dir)
62
  print(f"Saving articles to {self.repo_dir}.")
63
- success = 0
64
  for id in tqdm(self.pmc_ids, desc="Fetching full texts", unit="article"):
65
  base_url = "https://eutils.ncbi.nlm.nih.gov/entrez/eutils/efetch.fcgi"
66
  params = {
@@ -71,19 +71,20 @@ class ArticleRetrieval:
71
  }
72
  response = requests.get(base_url, params=params)
73
  full_text = self._clean_xml(response.text)
74
- if full_text == '':
75
  continue
76
  else:
 
77
  with open(os.path.join(self.repo_dir,f'PMC{id}.txt'), 'w') as f:
78
  f.write(full_text)
79
- success += 1
80
 
81
  def save_config(self):
82
  config = {
83
  'keywords': self.keywords,
84
  'repo_dir': self.repo_dir,
85
  'pmc_ids': self.pmc_ids,
86
- 'len': success,
87
  'retmax': self.retmax
88
  }
89
  with open(os.path.join(self.repo_dir, 'config.json'), 'w') as f:
 
60
  if not os.path.exists(self.repo_dir):
61
  os.makedirs(self.repo_dir)
62
  print(f"Saving articles to {self.repo_dir}.")
63
+ self.success = 0
64
  for id in tqdm(self.pmc_ids, desc="Fetching full texts", unit="article"):
65
  base_url = "https://eutils.ncbi.nlm.nih.gov/entrez/eutils/efetch.fcgi"
66
  params = {
 
71
  }
72
  response = requests.get(base_url, params=params)
73
  full_text = self._clean_xml(response.text)
74
+ if full_text.strip() == '':
75
  continue
76
  else:
77
+ logger.info(full_text[:1000])
78
  with open(os.path.join(self.repo_dir,f'PMC{id}.txt'), 'w') as f:
79
  f.write(full_text)
80
+ self.success += 1
81
 
82
  def save_config(self):
83
  config = {
84
  'keywords': self.keywords,
85
  'repo_dir': self.repo_dir,
86
  'pmc_ids': self.pmc_ids,
87
+ 'len': self.success,
88
  'retmax': self.retmax
89
  }
90
  with open(os.path.join(self.repo_dir, 'config.json'), 'w') as f: