goodmodeler commited on
Commit
9339c96
·
1 Parent(s): 5d0d255

ADD: test log

Browse files
Files changed (1) hide show
  1. exp_pipeline/pipeline.py +3 -0
exp_pipeline/pipeline.py CHANGED
@@ -15,6 +15,7 @@ def run_pipeline(split: str = "train"):
15
  data_loader = DataLoader()
16
  raw_data = data_loader.get_passage_dataset(split)
17
  logger.info(f"Loaded {len(raw_data)} samples from MS MARCO Passage Ranking [{split}]")
 
18
 
19
  # 2. 预处理数据
20
  preprocessor = Preprocessor()
@@ -22,6 +23,7 @@ def run_pipeline(split: str = "train"):
22
  if hasattr(raw_data, "to_dict"):
23
  raw_data = raw_data.to_dict()
24
  raw_data = [dict(zip(raw_data.keys(), v)) for v in zip(*raw_data.values())]
 
25
 
26
  # MS MARCO Passage v2.1: 用passages["passage_text"]字段
27
  passages = []
@@ -30,6 +32,7 @@ def run_pipeline(split: str = "train"):
30
  passages.extend(item["passages"]["passage_text"])
31
  processed = preprocessor.preprocess_passages(passages)
32
  texts = [p["text"] for p in processed]
 
33
 
34
  logger.info(f"Processed {len(texts)} passages")
35
 
 
15
  data_loader = DataLoader()
16
  raw_data = data_loader.get_passage_dataset(split)
17
  logger.info(f"Loaded {len(raw_data)} samples from MS MARCO Passage Ranking [{split}]")
18
+ print("data_loader\n")
19
 
20
  # 2. 预处理数据
21
  preprocessor = Preprocessor()
 
23
  if hasattr(raw_data, "to_dict"):
24
  raw_data = raw_data.to_dict()
25
  raw_data = [dict(zip(raw_data.keys(), v)) for v in zip(*raw_data.values())]
26
+ print("raw_data\n")
27
 
28
  # MS MARCO Passage v2.1: 用passages["passage_text"]字段
29
  passages = []
 
32
  passages.extend(item["passages"]["passage_text"])
33
  processed = preprocessor.preprocess_passages(passages)
34
  texts = [p["text"] for p in processed]
35
+ print("texts\n")
36
 
37
  logger.info(f"Processed {len(texts)} passages")
38