diff --git a/.github/workflows/version_scanner.yml b/.github/workflows/version_scanner.yml new file mode 100644 index 000000000000..6b7af7d22301 --- /dev/null +++ b/.github/workflows/version_scanner.yml @@ -0,0 +1,76 @@ +name: Version Scan + +on: + push: + branches: + - main + - '**version-scanner**' + schedule: + - cron: '0 * * * *' # Run hourly at the top of the hour + workflow_dispatch: + +permissions: + contents: read + issues: write + +jobs: + scan: + name: Version Scan + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v6 + + - name: Set up Python + uses: actions/setup-python@v6 + with: + python-version: '3.14' + + - name: Install dependencies + run: | + python -m pip install --upgrade pip + pip install pyyaml + + - name: Run Version Scanner + run: | + # Use -o to output the raw CSV to a file, and --stdout to print the summary to the GitHub Actions UI + python scripts/version_scanner/version_scanner.py -d python -v 3.7 --stdout -o version_scanner_output.csv --soft-fail + + - name: Upload CSV Results + if: always() + uses: actions/upload-artifact@v7 + with: + name: version-scanner-results + path: version_scanner_output.csv + + - name: Create or update issue on finding + if: failure() + env: + GH_TOKEN: ${{ secrets.GITHUB_TOKEN }} + run: | + TITLE="Version Scanner found deprecated dependencies" + RUN_URL="${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}" + + # Read the first 50 lines to prevent blowing up the issue body if it's massive + CSV_PREVIEW=$(head -n 50 version_scanner_output.csv) + + BODY="The [Version Scanner]($RUN_URL) found deprecated dependencies in the repository. + + **Matches Found:** + \`\`\`csv + $CSV_PREVIEW + \`\`\` + *(If there are more than 50 matches, see the workflow logs for the full list)*" + + # Mirroring regenerate-all.yml: check if an issue already exists to prevent spam + EXISTING_ISSUE=$(gh issue list --state open --search "in:title \"$TITLE\"" --json number --jq '.[0].number') + + if [ -z "$EXISTING_ISSUE" ]; then + echo "WOULD HAVE CREATED ISSUE:" + echo "gh issue create --title \"$TITLE\" --body \"$BODY\"" + # gh issue create --title "$TITLE" --body "$BODY" + else + echo "Issue #$EXISTING_ISSUE already exists." + echo "WOULD HAVE ADDED COMMENT:" + echo "gh issue comment \"$EXISTING_ISSUE\" --body \"Another scanner run found deprecated dependencies: $RUN_URL\"" + # gh issue comment "$EXISTING_ISSUE" --body "Another scanner run found deprecated dependencies: $RUN_URL" + fi diff --git a/scripts/version_scanner/regex_config.yaml b/scripts/version_scanner/regex_config.yaml index 07196c63edeb..95e62fe002aa 100644 --- a/scripts/version_scanner/regex_config.yaml +++ b/scripts/version_scanner/regex_config.yaml @@ -58,15 +58,15 @@ rules: - | sys\.version_info\s*<\s*\(3,\s*{minor_plus_one}\) - | - sys\.version_info\.minor\s*==\s*{minor} + sys\.version_info\.minor\s*==\s*{minor}(?!\d) - | - sys\.version_info\.minor\s*>=\s*{minor} + sys\.version_info\.minor\s*>=\s*{minor}(?!\d) - | - sys\.version_info\.minor\s*<=\s*{minor} + sys\.version_info\.minor\s*<=\s*{minor}(?!\d) - | - sys\.version_info\.minor\s*>\s*{minor_minus_one} + sys\.version_info\.minor\s*>\s*{minor_minus_one}(?!\d) - | - sys\.version_info\.minor\s*<\s*{minor_plus_one} + sys\.version_info\.minor\s*<\s*{minor_plus_one}(?!\d) - name: python_env_short description: Finds short python environment names often used in tox or nox. @@ -87,7 +87,7 @@ rules: - "Python3.7" rules: - | - python3\.{minor} + python3\.{minor}(?!\d) - name: combined_version_string description: Finds combined version strings often used in class or variable names. @@ -97,6 +97,6 @@ rules: - "Python37DeprecationWarning" rules: - | - Python{major}{minor} + Python{major}{minor}(?!\d) diff --git a/scripts/version_scanner/tests/unit/test_version_scanner.py b/scripts/version_scanner/tests/unit/test_version_scanner.py index f2d6ce66735e..f5a909e849e8 100644 --- a/scripts/version_scanner/tests/unit/test_version_scanner.py +++ b/scripts/version_scanner/tests/unit/test_version_scanner.py @@ -19,7 +19,18 @@ from unittest.mock import patch import pytest import yaml -from version_scanner import ConfigManager, scan_file, write_csv_report +from version_scanner import ( + ConfigManager, + scan_file, + write_csv_report, + _truncate_context, + _wrap_sheet_hyperlink, + _wrap_sheet_string, + _safe_int, + format_for_raw_csv, + format_for_spreadsheet, + format_for_console +) # Test ConfigManager @pytest.mark.parametrize("dependency, version, expected", [ @@ -246,44 +257,8 @@ def test_main_package_file_not_found(capsys): assert excinfo.value.code == 1 captured = capsys.readouterr() assert "Error: Package file not found" in captured.err -def test_format_match_for_csv(): - from version_scanner import format_match_for_csv - match = { - "file_path": "google-cloud-python/main/packages/pkg_a/setup.py", - "repo_path": "packages/pkg_a/setup.py", - "line_number": 123, - "rule_name": "test_rule" - } - - # Test without github_repo - formatted = format_match_for_csv(match) - assert formatted["line_number"] == 123 - - # Test with github_repo - formatted = format_match_for_csv(match, github_repo="https://github.com/user/repo", branch="main") - expected_url = "https://github.com/user/repo/blob/main/packages/pkg_a/setup.py#L123" - assert formatted["line_number"] == f'=HYPERLINK("{expected_url}", "123")' -def test_format_match_for_csv_truncates_long_line(): - from version_scanner import format_match_for_csv - - long_line = "a" * 1000 + "PY37" + "b" * 1000 - match = { - "file_path": "test.py", - "line_number": 1, - "rule_name": "test_rule", - "matched_string": "PY37", - "context_line": long_line - } - - formatted = format_match_for_csv(match) - context = formatted["context_line"] - - assert len(context) <= 600 - assert "PY37" in context - assert "..." in context - def test_get_match_counts(): from version_scanner import get_match_counts @@ -315,30 +290,7 @@ def test_scan_file_removes_newline_from_match(tmp_path): assert "\n" not in results[0]["matched_string"] -def test_write_csv_report_with_links(tmp_path): - output_file = tmp_path / "report.csv" - matches = [ - { - "file_path": "google-cloud-python/main/packages/pkg_a/setup.py", - "repo_path": "packages/pkg_a/setup.py", - "line_number": 1, - "rule_name": "python_requires_check", - "matched_string": "python_requires = '>=3.7'", - "context_line": "python_requires = '>=3.7'" - } - ] - - from version_scanner import write_csv_report - write_csv_report(str(output_file), matches, github_repo="https://github.com/user/repo", branch="main") - - assert output_file.exists() - - with open(output_file, 'r', encoding='utf-8', newline='') as f: - reader = csv.DictReader(f) - rows = list(reader) - - assert len(rows) == 1 - assert "HYPERLINK" in rows[0]["line_number"] + def test_scan_repository_ignores_version_scanner(tmp_path): vs_dir = tmp_path / "version_scanner" vs_dir.mkdir() @@ -376,7 +328,8 @@ def test_main_loads_ignore_from_script_dir(mock_scan, mock_load_ignore): with mock.patch('sys.argv', test_args): from version_scanner import main - main() + with pytest.raises(SystemExit): + main() mock_load_ignore.assert_called_once() args, kwargs = mock_load_ignore.call_args @@ -385,9 +338,17 @@ def test_main_loads_ignore_from_script_dir(mock_scan, mock_load_ignore): assert "scripts/version_scanner" in path +try: + import googleapiclient + HAS_GOOGLE_API = True +except ImportError: + HAS_GOOGLE_API = False + +@pytest.mark.skipif(not HAS_GOOGLE_API, reason="Requires googleapiclient") @mock.patch('googleapiclient.discovery.build') @mock.patch('google.auth.default') def test_upload_to_drive(mock_auth, mock_build): + """Test the ability to upload results to drive for visibility in gSheets.""" from unittest import mock mock_creds = mock.Mock() @@ -479,6 +440,108 @@ def test_regex_examples_from_config(): break assert matched, f"Example '{example}' in group '{name}' did not match any pattern." +def test_main_exit_code_1(): + """Test that main() calls sys.exit(1) when matches are found.""" + # We can mock scan_repository to return a dummy match + test_args = ['version_scanner.py', '-d', 'python', '-v', '3.7'] + with mock.patch('sys.argv', test_args): + from version_scanner import main + with mock.patch('version_scanner.scan_repository', return_value=[{'file_path': 'test', 'line_number': 1, 'matched_string': '3.7', 'rule_name': 'test'}]): + with pytest.raises(SystemExit) as excinfo: + main() + assert excinfo.value.code == 1 + + +def test_main_soft_fail_exit_code_0(): + """Test that main() calls sys.exit(0) when matches are found but --soft-fail is set.""" + test_args = ['version_scanner.py', '-d', 'python', '-v', '3.7', '--soft-fail'] + with mock.patch('sys.argv', test_args): + from version_scanner import main + with mock.patch('version_scanner.scan_repository', return_value=[{'file_path': 'test', 'line_number': 1, 'matched_string': '3.7', 'rule_name': 'test'}]): + with pytest.raises(SystemExit) as excinfo: + main() + assert excinfo.value.code == 0 + + +def test_main_stdout(capsys): + """Test that --stdout prints the CSV output to stdout.""" + test_args = ['version_scanner.py', '-d', 'python', '-v', '3.7', '--stdout'] + with mock.patch('sys.argv', test_args): + from version_scanner import main + with mock.patch('version_scanner.scan_repository', return_value=[{'file_path': 'test.py', 'line_number': 1, 'matched_string': '3.7', 'rule_name': 'test'}]): + with pytest.raises(SystemExit): + main() + + captured = capsys.readouterr() + assert "test.py:1 [test] 3.7" in captured.out + + +def test_main_without_stdout_limits_output(capsys): + """Test that main() without --stdout prints only 10 matches and shows a suffix.""" + test_args = ['version_scanner.py', '-d', 'python', '-v', '3.7'] + matches = [{'file_path': f'test_{i}.py', 'line_number': i, 'matched_string': '3.7', 'rule_name': 'test'} for i in range(15)] + with mock.patch('sys.argv', test_args): + from version_scanner import main + with mock.patch('version_scanner.scan_repository', return_value=matches): + with pytest.raises(SystemExit): + main() + + captured = capsys.readouterr() + # Should only print first 10 matches + for i in range(10): + assert f"test_{i}.py:{i} [test] 3.7" in captured.out + for i in range(10, 15): + assert f"test_{i}.py:{i} [test] 3.7" not in captured.out + assert "... and 5 more matches." in captured.out + + +def test_main_with_stdout_prints_all(capsys): + """Test that main() with --stdout prints all matches without limiting.""" + test_args = ['version_scanner.py', '-d', 'python', '-v', '3.7', '--stdout'] + matches = [{'file_path': f'test_{i}.py', 'line_number': i, 'matched_string': '3.7', 'rule_name': 'test'} for i in range(15)] + with mock.patch('sys.argv', test_args): + from version_scanner import main + with mock.patch('version_scanner.scan_repository', return_value=matches): + with pytest.raises(SystemExit): + main() + + captured = capsys.readouterr() + # Should print all 15 matches + for i in range(15): + assert f"test_{i}.py:{i} [test] 3.7" in captured.out + assert "... and 5 more matches." not in captured.out + + +def test_main_does_not_print_rules(capsys): + """Test that main() does not print the list of loaded rules to stdout.""" + test_args = ['version_scanner.py', '-d', 'python', '-v', '3.7'] + with mock.patch('sys.argv', test_args): + from version_scanner import main + with mock.patch('version_scanner.scan_repository', return_value=[]): + with pytest.raises(SystemExit): + main() + captured = capsys.readouterr() + assert "explicit_version_string" not in captured.out + + +def test_scan_file_truncation_bug(tmp_path): + """Test that searching for 3.1 does NOT match 3.10 (truncation bug).""" + # Create a file with 3.10 + test_file = tmp_path / "test_file.py" + test_file.write_text("python_requires = '>=3.10'\npython3.10\nPython310\n") + + from version_scanner import ConfigManager, scan_file + + # Init config for 3.1 + config_manager = ConfigManager("regex_config.yaml", "python", "3.1") + rules = config_manager.load_config() + import re + compiled_rules = [{"name": r["name"], "pattern": re.compile(r["pattern"], re.IGNORECASE)} for r in rules] + + # It should not match anything because all strings are 3.10, not 3.1 + matches = scan_file(str(test_file), compiled_rules) + assert len(matches) == 0, f"Expected 0 matches for 3.1 in 3.10 content, but got {len(matches)}: {matches}" + def test_scan_repository_layout_agnostic(tmp_path): # Create directories under different roots @@ -525,3 +588,110 @@ def test_scan_repository_package_name_roots(tmp_path): assert len(results) == 1 assert results[0]["package_name"] == "pkg_third" assert "third_party/pkg_third/setup.py" in results[0]["file_path"] + + +# --- Decoupled Formatters Tests (TDD) --- + +def test_truncate_context(): + # Context shorter than 500 characters shouldn't be truncated + assert _truncate_context("short context", "short") == "short context" + + # Context longer than 500 characters should be truncated around the matched string + matched = "TARGET_VERSION" + long_prefix = "a" * 300 + long_suffix = "b" * 300 + long_context = long_prefix + matched + long_suffix + + truncated = _truncate_context(long_context, matched) + assert len(truncated) <= 500 + assert matched in truncated + assert truncated.startswith("...") + assert truncated.endswith("...") + +def test_wrap_sheet_hyperlink(): + assert _wrap_sheet_hyperlink("https://github.com/foo", "12") == '=HYPERLINK("https://github.com/foo", "12")' + +def test_wrap_sheet_string(): + assert _wrap_sheet_string("3.10") == '="3.10"' + assert _wrap_sheet_string('python_requires = ">=3.7"') == '="python_requires = "">=3.7"""' + assert _wrap_sheet_string("") == "" + assert _wrap_sheet_string(None) == "" + +def test_safe_int(): + assert _safe_int("123") == 123 + assert _safe_int("") == 0 + assert _safe_int(None) == 0 + assert _safe_int("abc") == 0 + +def test_format_for_raw_csv_handles_empty_line_number(): + match = { + "file_path": "google-cloud-python/main/packages/pkg_a/setup.py", + "repo_path": "packages/pkg_a/setup.py", + "package_name": "pkg_a", + "rule_name": "python_requires_check", + "line_number": "", + "matched_string": "3.7", + "context_line": "python_requires = '>=3.7'" + } + formatted = format_for_raw_csv(match) + assert formatted["line_number"] == 0 + +def test_format_for_raw_csv(): + match = { + "file_path": "google-cloud-python/main/packages/pkg_a/setup.py", + "repo_path": "packages/pkg_a/setup.py", + "package_name": "pkg_a", + "rule_name": "python_requires_check", + "line_number": "123", + "matched_string": "3.7", + "context_line": "python_requires = '>=3.7'" + } + + formatted = format_for_raw_csv(match) + + assert formatted["file_path"] == "google-cloud-python/main/packages/pkg_a/setup.py" + assert formatted["package_name"] == "pkg_a" + assert formatted["rule_name"] == "python_requires_check" + assert formatted["line_number"] == 123 # Int conversion + assert formatted["matched_string"] == "3.7" # No formula wrapping + assert formatted["context_line"] == "python_requires = '>=3.7'" + +def test_format_for_spreadsheet(): + match = { + "file_path": "google-cloud-python/main/packages/pkg_a/setup.py", + "repo_path": "packages/pkg_a/setup.py", + "package_name": "pkg_a", + "rule_name": "python_requires_check", + "line_number": 123, + "matched_string": "3.7", + "context_line": "python_requires = '>=3.7'" + } + + # Without github_repo + formatted_no_repo = format_for_spreadsheet(match) + assert formatted_no_repo["line_number"] == 123 + assert formatted_no_repo["matched_string"] == '="3.7"' # Decimal protection formula + + # With github_repo + formatted_repo = format_for_spreadsheet(match, github_repo="https://github.com/user/repo", branch="main") + expected_url = "https://github.com/user/repo/blob/main/packages/pkg_a/setup.py#L123" + assert formatted_repo["line_number"] == f'=HYPERLINK("{expected_url}", "123")' + assert formatted_repo["matched_string"] == '="3.7"' + +def test_format_for_console(): + match = { + "file_path": "google-cloud-python/main/packages/pkg_a/setup.py", + "repo_path": "packages/pkg_a/setup.py", + "package_name": "pkg_a", + "rule_name": "python_requires_check", + "line_number": 123, + "matched_string": "3.7", + "context_line": "python_requires = '>=3.7'" + } + + log_str = format_for_console(match) + assert "google-cloud-python/main/packages/pkg_a/setup.py:123" in log_str + assert "[python_requires_check]" in log_str + assert "3.7" in log_str + assert "python_requires = " not in log_str # Slim format doesn't print context line + diff --git a/scripts/version_scanner/version_scanner.py b/scripts/version_scanner/version_scanner.py index 1d24c8fceced..90234a967665 100644 --- a/scripts/version_scanner/version_scanner.py +++ b/scripts/version_scanner/version_scanner.py @@ -23,7 +23,7 @@ import os import re import sys -from typing import Dict, List, Tuple +from typing import Dict, List, Tuple, Any import yaml class ConfigManager: @@ -186,61 +186,100 @@ def scan_file(file_path: str, compiled_rules: List[Dict[str, re.Pattern]]) -> Li return results -def format_match_for_csv( +def _truncate_context(context: str, matched: str) -> str: + """Safely truncates context around the match location to prevent overflow.""" + if len(context) > 500: + match_start = context.find(matched) + if match_start != -1: + start = max(0, match_start - 200) + end = min(len(context), match_start + len(matched) + 200) + prefix = "..." if start > 0 else "" + suffix = "..." if end < len(context) else "" + return prefix + context[start:end] + suffix + else: + return context[:500] + "..." + return context + + +def _wrap_sheet_hyperlink(url: str, label: str) -> str: + """Wraps a URL and label into a Google Sheets HYPERLINK formula. + + This ensures that when output is imported into spreadsheet software, the + resulting cells contain clickable hyperlinks pointing directly to GitHub file + locations and line numbers. + """ + return f'=HYPERLINK("{url}", "{label}")' + + +def _wrap_sheet_string(value: str) -> str: + """Wraps a string value inside a spreadsheet string formula to prevent float parsing. + + This forces spreadsheet software (such as Google Sheets) to treat numeric + string patterns (like python runtime version "3.10") as literal strings, + preventing auto-truncation to floats (which would display "3.1"). Double + quotes inside the value are escaped by doubling them to avoid formula syntax + errors on import. + """ + if value is None: + return "" + escaped_value = value.replace('"', '""') + return f'="{escaped_value}"' if value else "" + + +def _safe_int(value: Any, default: int = 0) -> int: + """Safely converts a value to an integer, falling back to a default value. + + Used primarily during raw data formatting for spreadsheet ingestion. If a + value (like a line number) is missing or contains non-integer text (e.g. empty + strings for filename-only matches), this avoids crashing the scanner. + """ + try: + return int(value) + except (ValueError, TypeError): + return default + + +def format_for_raw_csv(match: Dict[str, str]) -> Dict[str, str]: + """Prepares a full raw dataset (n + x columns) with clean text values.""" + return { + "file_path": match.get("file_path", ""), + "package_name": match.get("package_name", ""), + "rule_name": match.get("rule_name", ""), + "line_number": _safe_int(match.get("line_number")), + "matched_string": match.get("matched_string", ""), + "context_line": _truncate_context(match.get("context_line", ""), match.get("matched_string", "")) + } + + +def format_for_spreadsheet( match: Dict[str, str], github_repo: str = None, branch: str = "main" ) -> Dict[str, str]: - """ - Formats a raw match dictionary for clean CSV presentation and imports. - - Cleans long context lines by truncating them around the match location to prevent - extreme cell overflow in spreadsheets. Optionally transforms line numbers into - clickable `=HYPERLINK(...)` formulas linking directly to the exact file and line - number in GitHub. - - Args: - match: A match dictionary containing 'file_path', 'repo_path', 'rule_name', - 'line_number', 'matched_string', and 'context_line'. - github_repo: Optional GitHub repository base URL (e.g., "https://github.com/user/repo"). - If provided, triggers the hyperlink generation. - branch: Optional branch name to build the GitHub blob URL (defaults to "main"). - - Returns: - A copy of the match dictionary with formatted/truncated values, suitable for CSV writing. - """ - formatted = match.copy() + """Builds on top of raw CSV but applies Sheets-specific formulas.""" + formatted = format_for_raw_csv(match) + # Override fields with spreadsheet formatting if github_repo: - # Use repo_path if available, fallback to file_path file_path = match.get("repo_path", match.get("file_path", "")) line_number = match.get("line_number", "") - - # Construct URL url = f"{github_repo}/blob/{branch}/{file_path}#L{line_number}" + formatted["line_number"] = _wrap_sheet_hyperlink(url, str(line_number)) - # Format as Google Sheets formula - formatted["line_number"] = f'=HYPERLINK("{url}", "{line_number}")' - - context = formatted.get("context_line", "") - matched = formatted.get("matched_string", "") - - if len(context) > 500: - match_start = context.find(matched) - if match_start != -1: - start = max(0, match_start - 200) - end = min(len(context), match_start + len(matched) + 200) - - prefix = "..." if start > 0 else "" - suffix = "..." if end < len(context) else "" - - formatted["context_line"] = prefix + context[start:end] + suffix - else: - formatted["context_line"] = context[:500] + "..." - + formatted["matched_string"] = _wrap_sheet_string(match.get("matched_string", "")) return formatted +def format_for_console(match: Dict[str, str]) -> str: + """Prepares a slim, readable string representation (n columns) for stdout/logs.""" + file_path = match.get("file_path", "") + line_number = match.get("line_number", "") + rule_name = match.get("rule_name", "") + matched_string = match.get("matched_string", "") + return f" {file_path}:{line_number} [{rule_name}] {matched_string}" + + + def get_match_counts(matches: List[Dict[str, str]]) -> Tuple[Dict[str, int], Dict[str, int]]: """ Aggregate matches by rule and by package. @@ -294,9 +333,7 @@ def load_ignore_file(file_path: str) -> List[str]: def write_csv_report( output_path: str, - matches: List[Dict[str, str]], - github_repo: str = None, - branch: str = "main" + matches: List[Dict[str, str]] ) -> None: """ Write the collected matches to a CSV file. @@ -304,8 +341,6 @@ def write_csv_report( Args: output_path: Path to the output CSV file. matches: A list of dictionaries containing match details. - github_repo: Optional GitHub repository URL base. - branch: GitHub branch for links (defaults to main). """ fieldnames = ["file_path", "package_name", "rule_name", "line_number", "matched_string", "context_line"] @@ -315,7 +350,7 @@ def write_csv_report( writer.writeheader() for match in matches: - formatted_match = format_match_for_csv(match, github_repo, branch) + formatted_match = format_for_raw_csv(match) # Ensure only specified fields are written row = {field: formatted_match.get(field, "") for field in fieldnames} writer.writerow(row) @@ -358,7 +393,7 @@ def upload_to_drive(csv_path: str, matches: List[Dict[str, str]], github_repo: s # Prepare data values = [["file_path", "package_name", "rule_name", "line_number", "matched_string", "context_line"]] for m in matches: - formatted_m = format_match_for_csv(m, github_repo=github_repo, branch=branch) + formatted_m = format_for_spreadsheet(m, github_repo=github_repo, branch=branch) values.append([ formatted_m.get("file_path", ""), formatted_m.get("package_name", ""), @@ -601,6 +636,18 @@ def main(): help="Upload results to a Google Sheet in Drive" ) + parser.add_argument( + "--stdout", + action="store_true", + help="Print the full CSV report to stdout instead of/in addition to writing to a file" + ) + + parser.add_argument( + "--soft-fail", + action="store_true", + help="Exit with code 0 even if matches are found (useful during development and testing runs)" + ) + args = parser.parse_args() # Resolve target packages if filtering is requested @@ -628,10 +675,7 @@ def main(): config_manager = ConfigManager(args.config, args.dependency, args.version) rules = config_manager.load_config() - print(f"\nLoaded {len(rules)} rules:") - for rule in rules: - print(f" - {rule['name']}: {rule['pattern']}") - + # Load ignore file from script directory (Option A) @@ -645,10 +689,11 @@ def main(): all_matches = scan_repository(args.path, rules, target_packages, ignore_dirs, version_string=args.version) print(f"\nFound {len(all_matches)} matches.") - for m in all_matches[:10]: # Show first 10 - print(f" {m['file_path']}:{m['line_number']} [{m['rule_name']}] {m['matched_string']}") + display_matches = all_matches if args.stdout else all_matches[:10] + for m in display_matches: + print(format_for_console(m)) - if len(all_matches) > 10: + if not args.stdout and len(all_matches) > 10: print(f" ... and {len(all_matches) - 10} more matches.") # Get and print summary counts @@ -665,10 +710,18 @@ def main(): os.makedirs(results_dir, exist_ok=True) output_path = os.path.join(results_dir, f"{args.dependency}-{args.version}-{timestamp}.csv") - write_csv_report(output_path, all_matches, github_repo=args.github_repo, branch=args.branch) + write_csv_report(output_path, all_matches) if args.upload: upload_to_drive(output_path, all_matches, github_repo=args.github_repo, branch=args.branch) + + + # Distinct exit codes for CI/CD + if all_matches and not args.soft_fail: + sys.exit(1) + else: + sys.exit(0) + if __name__ == "__main__": main()