heymenn commited on
Commit
460f7e9
·
1 Parent(s): 93545e3

fix regex issues

Browse files
Files changed (1) hide show
  1. classes.py +17 -10
classes.py CHANGED
@@ -219,7 +219,8 @@ class ETSISpecFinder:
219
  except (ValueError, IndexError):
220
  return []
221
 
222
- def fetch_for_type(spec_type):
 
223
  params = {
224
  "view": "data",
225
  "format": "json",
@@ -232,15 +233,16 @@ class ETSISpecFinder:
232
  proxies=_get_proxies())
233
  data = resp.json()
234
  if data and isinstance(data, list):
235
- return [str(item["wki_id"]) for item in data if "wki_id" in item]
 
 
 
 
236
  except Exception as e:
237
  print(f"Error getting wki_id for {doc_id}: {e}")
238
  return []
239
 
240
- candidates = []
241
- with ThreadPoolExecutor(max_workers=2) as executor:
242
- for result in executor.map(fetch_for_type, ["TS", "TR"]):
243
- candidates.extend(result)
244
  return candidates
245
 
246
  def _authenticate_eol(self, wki_id: str) -> requests.Session:
@@ -341,11 +343,16 @@ class ETSISpecFinder:
341
  print(f" wki_id={wki_id}: success")
342
  return tmp_path
343
 
344
- with ThreadPoolExecutor(max_workers=min(len(candidates), 4)) as executor:
345
- future_to_wki = {executor.submit(try_wki, wki_id): wki_id for wki_id in candidates}
346
- for future in as_completed(future_to_wki):
 
347
  result = future.result()
348
  if result is not None:
 
 
349
  return result
 
 
350
 
351
- return f"Specification {doc_id}: all {len(candidates)} wki_id candidate(s) rejected by ETSI portal"
 
219
  except (ValueError, IndexError):
220
  return []
221
 
222
+ def fetch_candidates():
223
+ spec_num = doc_id.split("-")[0].replace(" ", "")
224
  params = {
225
  "view": "data",
226
  "format": "json",
 
233
  proxies=_get_proxies())
234
  data = resp.json()
235
  if data and isinstance(data, list):
236
+ return [
237
+ str(item["wki_id"])
238
+ for item in data
239
+ if "wki_id" in item and spec_num in json.dumps(item)
240
+ ]
241
  except Exception as e:
242
  print(f"Error getting wki_id for {doc_id}: {e}")
243
  return []
244
 
245
+ candidates = list(dict.fromkeys(fetch_candidates())) # single call, deduped
 
 
 
246
  return candidates
247
 
248
  def _authenticate_eol(self, wki_id: str) -> requests.Session:
 
343
  print(f" wki_id={wki_id}: success")
344
  return tmp_path
345
 
346
+ executor = ThreadPoolExecutor(max_workers=min(len(candidates), 4))
347
+ try:
348
+ futures = {executor.submit(try_wki, wki_id): wki_id for wki_id in candidates}
349
+ for future in as_completed(futures):
350
  result = future.result()
351
  if result is not None:
352
+ for f in futures:
353
+ f.cancel()
354
  return result
355
+ finally:
356
+ executor.shutdown(wait=False)
357
 
358
+ return f"Specification {doc_id}: all {len(candidates)} wki_id candidate(s) rejected"