|
| 1 | +# Copyright 2019-2026 CERN and copyright holders of ALICE O2. |
| 2 | +# See https://alice-o2.web.cern.ch/copyright for details of the copyright holders. |
| 3 | +# All rights not expressly granted are reserved. |
| 4 | +# |
| 5 | +# This software is distributed under the terms of the GNU General Public |
| 6 | +# License v3 (GPL Version 3), copied verbatim in the file "COPYING". |
| 7 | +# |
| 8 | +# In applying this license CERN does not waive the privileges and immunities |
| 9 | +# granted to it by virtue of its status as an Intergovernmental Organization |
| 10 | +# or submit itself to any jurisdiction. |
| 11 | +"""Log tools for the Hyperloop perf MCP server. |
| 12 | +
|
| 13 | +A train/device log (e.g. ``stdout.log``) is fetched once through the alimonitor |
| 14 | +proxy and cached on disk; subsequent ``grep_log`` calls run regex queries over |
| 15 | +the cached copy and return at most ``max_results`` matches (with optional |
| 16 | +context), so a multi-MB log never has to come back over the wire — or into the |
| 17 | +model's context — in full. |
| 18 | +""" |
| 19 | + |
| 20 | +from __future__ import annotations |
| 21 | + |
| 22 | +import gzip |
| 23 | +import hashlib |
| 24 | +import os |
| 25 | +import re |
| 26 | +from dataclasses import dataclass |
| 27 | + |
| 28 | +from hl_common import fetch_bytes |
| 29 | + |
| 30 | +_CACHE_DIR = os.path.expanduser(os.environ.get("LOG_MCP_CACHE", "~/.cache/log-mcp")) |
| 31 | +_MAX_LINE = 2000 # truncate individual lines in the output to keep results bounded |
| 32 | + |
| 33 | + |
| 34 | +@dataclass |
| 35 | +class LogReport: |
| 36 | + url: str |
| 37 | + name: str |
| 38 | + path: str |
| 39 | + n_lines: int |
| 40 | + n_bytes: int |
| 41 | + |
| 42 | + |
| 43 | +_logs: dict[str, LogReport] = {} |
| 44 | + |
| 45 | + |
| 46 | +def _get(name: str) -> LogReport: |
| 47 | + r = _logs.get(name) |
| 48 | + if r is None: |
| 49 | + avail = ", ".join(_logs) if _logs else "(none)" |
| 50 | + raise ValueError(f"No log '{name}'. Loaded: {avail}. Use load_log first.") |
| 51 | + return r |
| 52 | + |
| 53 | + |
| 54 | +def _clip(line: str) -> str: |
| 55 | + return line if len(line) <= _MAX_LINE else line[:_MAX_LINE] + " …[truncated]" |
| 56 | + |
| 57 | + |
| 58 | +async def load_log(url: str, name: str = "", proxy_token: str = "") -> str: |
| 59 | + """Fetch a log file and cache it for regex querying with grep_log. |
| 60 | +
|
| 61 | + The file is downloaded (via the alimonitor proxy for ``alimonitor.cern.ch`` |
| 62 | + URLs), decompressed if gzip'd, and cached on disk; grep_log then reads that |
| 63 | + cached copy and never re-fetches. |
| 64 | +
|
| 65 | + Args: |
| 66 | + url: Direct URL to a log file (e.g. .../stdout.log or a .gz log). |
| 67 | + name: Label (defaults to the filename portion of the URL). |
| 68 | + proxy_token: Bearer token for the local proxy (else PROXY_TOKEN env). |
| 69 | + """ |
| 70 | + raw = await fetch_bytes(url, proxy_token=proxy_token) |
| 71 | + data = gzip.decompress(raw) if (url.endswith(".gz") or raw[:2] == b"\x1f\x8b") else raw |
| 72 | + text = data.decode("utf-8", errors="replace") |
| 73 | + os.makedirs(_CACHE_DIR, exist_ok=True) |
| 74 | + h = hashlib.sha1(url.encode()).hexdigest()[:12] |
| 75 | + path = os.path.join(_CACHE_DIR, f"{h}.log") |
| 76 | + with open(path, "w", errors="replace") as f: |
| 77 | + f.write(text) |
| 78 | + n_lines = text.count("\n") + (0 if text.endswith("\n") or not text else 1) |
| 79 | + pname = name or url.rstrip("/").split("/")[-1] |
| 80 | + _logs[pname] = LogReport(url, pname, path, n_lines, len(data)) |
| 81 | + return f"Loaded log '{pname}': {n_lines:,} lines, {len(data):,} bytes." |
| 82 | + |
| 83 | + |
| 84 | +def grep_log( |
| 85 | + name: str, |
| 86 | + pattern: str, |
| 87 | + max_results: int = 50, |
| 88 | + ignore_case: bool = False, |
| 89 | + invert: bool = False, |
| 90 | + context: int = 0, |
| 91 | +) -> str: |
| 92 | + """Regex-search a cached log and return at most max_results matching lines. |
| 93 | +
|
| 94 | + Args: |
| 95 | + name: Log name as returned by load_log. |
| 96 | + pattern: Python regex (re.search semantics, matches anywhere in a line). |
| 97 | + max_results: Maximum number of matching lines to return (default 50). |
| 98 | + ignore_case: Case-insensitive match. |
| 99 | + invert: Return non-matching lines instead. |
| 100 | + context: Lines of context to show before and after each match (like grep -C). |
| 101 | + """ |
| 102 | + r = _get(name) |
| 103 | + try: |
| 104 | + rx = re.compile(pattern, re.IGNORECASE if ignore_case else 0) |
| 105 | + except re.error as e: |
| 106 | + return f"bad regex: {e}" |
| 107 | + if max_results < 1: |
| 108 | + return "max_results must be >= 1" |
| 109 | + |
| 110 | + with open(r.path, errors="replace") as f: |
| 111 | + lines = f.read().splitlines() |
| 112 | + |
| 113 | + total = 0 |
| 114 | + hits: list[int] = [] # line indices of the first max_results matches |
| 115 | + for i, line in enumerate(lines): |
| 116 | + matched = bool(rx.search(line)) |
| 117 | + if invert: |
| 118 | + matched = not matched |
| 119 | + if matched: |
| 120 | + total += 1 |
| 121 | + if len(hits) < max_results: |
| 122 | + hits.append(i) |
| 123 | + |
| 124 | + if total == 0: |
| 125 | + return f"[{name}] no matches for /{pattern}/ in {r.n_lines:,} lines" |
| 126 | + |
| 127 | + ctx = max(0, context) |
| 128 | + out: list[str] = [] |
| 129 | + prev_end = -1 # last printed line index, to insert separators / avoid dup |
| 130 | + for idx in hits: |
| 131 | + lo, hi = max(0, idx - ctx), min(len(lines) - 1, idx + ctx) |
| 132 | + if lo <= prev_end: # overlap with previous block: continue from there |
| 133 | + lo = prev_end + 1 |
| 134 | + elif prev_end >= 0: |
| 135 | + out.append("--") |
| 136 | + for j in range(lo, hi + 1): |
| 137 | + mark = ":" if j == idx else "-" # ':' = the match line, '-' = context |
| 138 | + out.append(f"{j + 1}{mark} {_clip(lines[j])}") |
| 139 | + prev_end = hi |
| 140 | + |
| 141 | + shown = min(total, max_results) |
| 142 | + header = f"[{name}] {total} match(es) for /{pattern}/" + ( |
| 143 | + f"; showing first {shown}" if total > shown else "" |
| 144 | + ) |
| 145 | + return header + "\n" + "\n".join(out) |
| 146 | + |
| 147 | + |
| 148 | +def list_logs() -> str: |
| 149 | + """List loaded logs.""" |
| 150 | + if not _logs: |
| 151 | + return "No logs loaded. Use load_log first." |
| 152 | + return "\n".join( |
| 153 | + f"{n}: {r.n_lines:,} lines, {r.n_bytes:,} bytes, url={r.url}" for n, r in _logs.items() |
| 154 | + ) |
| 155 | + |
| 156 | + |
| 157 | +def drop_log(name: str) -> str: |
| 158 | + """Free a log and delete its cached copy. |
| 159 | +
|
| 160 | + Args: |
| 161 | + name: Log name as returned by load_log. |
| 162 | + """ |
| 163 | + r = _get(name) |
| 164 | + if os.path.exists(r.path): |
| 165 | + os.remove(r.path) |
| 166 | + del _logs[name] |
| 167 | + return f"Dropped log '{name}'." |
| 168 | + |
| 169 | + |
| 170 | +def register(mcp) -> None: |
| 171 | + """Register the log tools on a shared FastMCP instance.""" |
| 172 | + for fn in (load_log, grep_log, list_logs, drop_log): |
| 173 | + mcp.tool()(fn) |
0 commit comments