| """ |
| Log File Position Bias |
| Find an error message at varying positions in a log file. |
| """ |
| import logging |
| import os |
| import random |
| import time |
| from typing import List, Dict, Any |
|
|
| from tqdm import tqdm |
|
|
| from src.generator import generate_text |
| from src.utils import ensure_dir, save_jsonl, save_json |
|
|
| logger = logging.getLogger(__name__) |
|
|
| LOG_LEVELS = ["INFO", "DEBUG", "WARNING", "INFO", "DEBUG", "INFO"] |
| LOG_MESSAGES = [ |
| "Connection established to server-01", |
| "Cache hit for key user_prefs", |
| "Processing batch job #4521", |
| "Database query completed in 12ms", |
| "Index rebuild started", |
| "Memory usage at 45%", |
| "Request served in 3ms", |
| "Background task scheduled", |
| "Config file reloaded", |
| "Metrics flushed to disk", |
| ] |
|
|
|
|
| def _make_log(n: int, target_line: str, target_pos: int) -> str: |
| """Generate log file with target error at position.""" |
| lines = [] |
| for i in range(n): |
| if i == target_pos: |
| lines.append(target_line) |
| else: |
| ts = f"2024-01-{random.randint(1,28):02d} {random.randint(0,23):02d}:{random.randint(0,59):02d}:{random.randint(0,59):02d}" |
| level = random.choice(LOG_LEVELS) |
| msg = random.choice(LOG_MESSAGES) |
| lines.append(f"{ts} [{level}] {msg}") |
| return "\n".join(lines) |
|
|
|
|
| def run_log_retrieval( |
| model_name: str, |
| num_lines: int, |
| num_examples: int, |
| out_dir: str, |
| depths: List[float] = None, |
| ) -> Dict[str, Any]: |
| ensure_dir(out_dir) |
| if depths is None: |
| depths = [0.0, 0.125, 0.25, 0.375, 0.5, 0.625, 0.75, 0.875, 1.0] |
|
|
| results = {} |
| start = time.time() |
|
|
| for depth in depths: |
| logger.info(f"[LOG] Depth {depth:.1%}") |
| preds = [] |
| for _ in tqdm(range(num_examples), desc=f"Log {depth:.1%}", leave=False): |
| error_code = f"ERR-{random.randint(1000,9999)}" |
| target_line = f"2024-01-15 14:30:00 [ERROR] Critical failure: {error_code} - Service halted" |
| pos = int(depth * (num_lines - 1)) |
| log_str = _make_log(num_lines, target_line, pos) |
|
|
| prompt = ( |
| f"Find the error code in the log file below.\n\n" |
| f"```\n{log_str}\n```\n\n" |
| f"Error code:" |
| ) |
| ans = generate_text( |
| [{"role": "user", "content": prompt}], |
| model_name=model_name, |
| max_new_tokens=15, |
| ) |
| correct = 1.0 if error_code.lower() in ans.lower() else 0.0 |
| preds.append({ |
| "model_answer": ans, |
| "correct": correct, |
| "error_code": error_code, |
| "depth": depth, |
| }) |
|
|
| save_jsonl(os.path.join(out_dir, f"log_depth_{depth}.jsonl"), preds) |
| acc = sum(p["correct"] for p in preds) / len(preds) if preds else 0.0 |
| results[depth] = {"accuracy": acc, "predictions": preds} |
| logger.info(f"[LOG] Depth {depth:.1%}: acc={acc:.3f}") |
|
|
| summary = { |
| "experiment": "log_retrieval", |
| "num_lines": num_lines, |
| "num_examples": num_examples, |
| "depths": {str(d): results[d]["accuracy"] for d in depths}, |
| "time_minutes": (time.time() - start) / 60, |
| } |
| save_json(os.path.join(out_dir, "log_summary.json"), summary) |
| logger.info(f"[LOG] Time={(time.time()-start)/60:.1f} min") |
| return summary |
|
|