From 09527292d5173a1889bc3e1c45958d80dd68af0b Mon Sep 17 00:00:00 2001 From: chalmer lowe Date: Thu, 11 Jun 2026 08:12:09 -0400 Subject: [PATCH 01/19] feat: complete Phase 1 of version_scanner project Adds distinct exit codes, --stdout support, workflow file, and fixes 3.10 truncation. --- .github/workflows/version_scanner.yml | 28 ++++++++++ scripts/version_scanner/regex_config.yaml | 4 +- .../tests/unit/test_version_scanner.py | 54 ++++++++++++++++++- scripts/version_scanner/version_scanner.py | 21 ++++++++ 4 files changed, 104 insertions(+), 3 deletions(-) create mode 100644 .github/workflows/version_scanner.yml diff --git a/.github/workflows/version_scanner.yml b/.github/workflows/version_scanner.yml new file mode 100644 index 000000000000..4bf33b52f21d --- /dev/null +++ b/.github/workflows/version_scanner.yml @@ -0,0 +1,28 @@ +name: Version Scanner + +on: + workflow_dispatch: + schedule: + - cron: '0 0 * * *' # Run daily at midnight UTC + pull_request: + +jobs: + scan: + name: Run Version Scanner + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + + - name: Set up Python + uses: actions/setup-python@v5 + with: + python-version: '3.12' + + - name: Install dependencies + run: | + python -m pip install --upgrade pip + pip install pyyaml + + - name: Run Version Scanner + run: | + python scripts/version_scanner/version_scanner.py -d python -v 3.7 --stdout diff --git a/scripts/version_scanner/regex_config.yaml b/scripts/version_scanner/regex_config.yaml index 07196c63edeb..33feb746aef6 100644 --- a/scripts/version_scanner/regex_config.yaml +++ b/scripts/version_scanner/regex_config.yaml @@ -87,7 +87,7 @@ rules: - "Python3.7" rules: - | - python3\.{minor} + python3\.{minor}(?!\d) - name: combined_version_string description: Finds combined version strings often used in class or variable names. @@ -97,6 +97,6 @@ rules: - "Python37DeprecationWarning" rules: - | - Python{major}{minor} + Python{major}{minor}(?!\d) diff --git a/scripts/version_scanner/tests/unit/test_version_scanner.py b/scripts/version_scanner/tests/unit/test_version_scanner.py index f2d6ce66735e..421eb0de15c1 100644 --- a/scripts/version_scanner/tests/unit/test_version_scanner.py +++ b/scripts/version_scanner/tests/unit/test_version_scanner.py @@ -376,7 +376,10 @@ def test_main_loads_ignore_from_script_dir(mock_scan, mock_load_ignore): with mock.patch('sys.argv', test_args): from version_scanner import main - main() + try: + main() + except SystemExit: + pass mock_load_ignore.assert_called_once() args, kwargs = mock_load_ignore.call_args @@ -385,6 +388,13 @@ def test_main_loads_ignore_from_script_dir(mock_scan, mock_load_ignore): assert "scripts/version_scanner" in path +try: + import googleapiclient + HAS_GOOGLE_API = True +except ImportError: + HAS_GOOGLE_API = False + +@pytest.mark.skipif(not HAS_GOOGLE_API, reason="Requires googleapiclient") @mock.patch('googleapiclient.discovery.build') @mock.patch('google.auth.default') def test_upload_to_drive(mock_auth, mock_build): @@ -479,6 +489,48 @@ def test_regex_examples_from_config(): break assert matched, f"Example '{example}' in group '{name}' did not match any pattern." +def test_main_exit_code_1(): + """Test that main() calls sys.exit(1) when matches are found.""" + # We can mock scan_repository to return a dummy match + test_args = ['version_scanner.py', '-d', 'python', '-v', '3.7'] + with mock.patch('sys.argv', test_args): + from version_scanner import main + with mock.patch('version_scanner.scan_repository', return_value=[{'file_path': 'test', 'line_number': 1, 'matched_string': '3.7', 'rule_name': 'test'}]): + with pytest.raises(SystemExit) as excinfo: + main() + assert excinfo.value.code == 1 + +def test_main_stdout(capsys): + """Test that --stdout prints the CSV output to stdout.""" + test_args = ['version_scanner.py', '-d', 'python', '-v', '3.7', '--stdout'] + with mock.patch('sys.argv', test_args): + from version_scanner import main + with mock.patch('version_scanner.scan_repository', return_value=[{'file_path': 'test.py', 'line_number': 1, 'matched_string': '3.7', 'rule_name': 'test'}]): + with pytest.raises(SystemExit): + main() + + captured = capsys.readouterr() + assert "=== CSV Output ===" in captured.out + assert "test.py," in captured.out + assert "test," in captured.out + assert "3.7" in captured.out + +def test_scan_file_truncation_bug(tmp_path): + """Test that searching for 3.1 does NOT match 3.10 (truncation bug).""" + # Create a file with 3.10 + test_file = tmp_path / "test_file.py" + test_file.write_text("python_requires = '>=3.10'\npython3.10\nPython310\n") + + from version_scanner import ConfigManager, scan_file + + # Init config for 3.1 + config_manager = ConfigManager("python", "3.1") + config_manager.load_config("regex_config.yaml") + + # It should not match anything because all strings are 3.10, not 3.1 + matches = scan_file(str(test_file), config_manager.rules) + assert len(matches) == 0, f"Expected 0 matches for 3.1 in 3.10 content, but got {len(matches)}: {matches}" + def test_scan_repository_layout_agnostic(tmp_path): # Create directories under different roots diff --git a/scripts/version_scanner/version_scanner.py b/scripts/version_scanner/version_scanner.py index 1d24c8fceced..aadbcb550be5 100644 --- a/scripts/version_scanner/version_scanner.py +++ b/scripts/version_scanner/version_scanner.py @@ -601,6 +601,12 @@ def main(): help="Upload results to a Google Sheet in Drive" ) + parser.add_argument( + "--stdout", + action="store_true", + help="Print the full CSV report to stdout instead of/in addition to writing to a file" + ) + args = parser.parse_args() # Resolve target packages if filtering is requested @@ -670,5 +676,20 @@ def main(): if args.upload: upload_to_drive(output_path, all_matches, github_repo=args.github_repo, branch=args.branch) + if args.stdout: + print("\n=== CSV Output ===") + import io + output = io.StringIO() + write_csv_report(output_path, all_matches, github_repo=args.github_repo, branch=args.branch) # this writes to the file, but we want stdout too + # Let's just read the file and print it + with open(output_path, 'r', encoding='utf-8') as f: + print(f.read(), end='') + + # Distinct exit codes for CI/CD + if all_matches: + sys.exit(1) + else: + sys.exit(0) + if __name__ == "__main__": main() From 163b773f2ccec37ac83075a83f6d6c655ab33d8b Mon Sep 17 00:00:00 2001 From: chalmer lowe Date: Thu, 11 Jun 2026 08:12:34 -0400 Subject: [PATCH 02/19] test: fix ConfigManager signature and regex assertions --- scripts/version_scanner/tests/unit/test_version_scanner.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/scripts/version_scanner/tests/unit/test_version_scanner.py b/scripts/version_scanner/tests/unit/test_version_scanner.py index 421eb0de15c1..b4b6a092147e 100644 --- a/scripts/version_scanner/tests/unit/test_version_scanner.py +++ b/scripts/version_scanner/tests/unit/test_version_scanner.py @@ -524,11 +524,11 @@ def test_scan_file_truncation_bug(tmp_path): from version_scanner import ConfigManager, scan_file # Init config for 3.1 - config_manager = ConfigManager("python", "3.1") - config_manager.load_config("regex_config.yaml") + config_manager = ConfigManager("regex_config.yaml", "python", "3.1") + rules = config_manager.load_config() # It should not match anything because all strings are 3.10, not 3.1 - matches = scan_file(str(test_file), config_manager.rules) + matches = scan_file(str(test_file), rules) assert len(matches) == 0, f"Expected 0 matches for 3.1 in 3.10 content, but got {len(matches)}: {matches}" From 3fd95aae1a99a0d92826b87ed6448cef2eefe233 Mon Sep 17 00:00:00 2001 From: chalmer lowe Date: Thu, 11 Jun 2026 08:12:53 -0400 Subject: [PATCH 03/19] test: fix regex rule compilation in tests --- scripts/version_scanner/tests/unit/test_version_scanner.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/scripts/version_scanner/tests/unit/test_version_scanner.py b/scripts/version_scanner/tests/unit/test_version_scanner.py index b4b6a092147e..83b5cb342667 100644 --- a/scripts/version_scanner/tests/unit/test_version_scanner.py +++ b/scripts/version_scanner/tests/unit/test_version_scanner.py @@ -526,9 +526,11 @@ def test_scan_file_truncation_bug(tmp_path): # Init config for 3.1 config_manager = ConfigManager("regex_config.yaml", "python", "3.1") rules = config_manager.load_config() + import re + compiled_rules = [{"name": r["name"], "pattern": re.compile(r["pattern"], re.IGNORECASE)} for r in rules] # It should not match anything because all strings are 3.10, not 3.1 - matches = scan_file(str(test_file), rules) + matches = scan_file(str(test_file), compiled_rules) assert len(matches) == 0, f"Expected 0 matches for 3.1 in 3.10 content, but got {len(matches)}: {matches}" From 632e3dadc95ad1dde6013d146b1b06a0b9af425c Mon Sep 17 00:00:00 2001 From: chalmer lowe Date: Thu, 11 Jun 2026 08:28:44 -0400 Subject: [PATCH 04/19] fix: prevent truncation in sys.version_info.minor regexes --- scripts/version_scanner/regex_config.yaml | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/scripts/version_scanner/regex_config.yaml b/scripts/version_scanner/regex_config.yaml index 33feb746aef6..95e62fe002aa 100644 --- a/scripts/version_scanner/regex_config.yaml +++ b/scripts/version_scanner/regex_config.yaml @@ -58,15 +58,15 @@ rules: - | sys\.version_info\s*<\s*\(3,\s*{minor_plus_one}\) - | - sys\.version_info\.minor\s*==\s*{minor} + sys\.version_info\.minor\s*==\s*{minor}(?!\d) - | - sys\.version_info\.minor\s*>=\s*{minor} + sys\.version_info\.minor\s*>=\s*{minor}(?!\d) - | - sys\.version_info\.minor\s*<=\s*{minor} + sys\.version_info\.minor\s*<=\s*{minor}(?!\d) - | - sys\.version_info\.minor\s*>\s*{minor_minus_one} + sys\.version_info\.minor\s*>\s*{minor_minus_one}(?!\d) - | - sys\.version_info\.minor\s*<\s*{minor_plus_one} + sys\.version_info\.minor\s*<\s*{minor_plus_one}(?!\d) - name: python_env_short description: Finds short python environment names often used in tox or nox. From 6b844c6bd18cf7a3705bbd9726cb183351399b1e Mon Sep 17 00:00:00 2001 From: chalmer lowe Date: Thu, 11 Jun 2026 08:38:01 -0400 Subject: [PATCH 05/19] fix: force string format in CSV output to prevent spreadsheet truncation --- scripts/version_scanner/tests/unit/test_version_scanner.py | 4 ++-- scripts/version_scanner/version_scanner.py | 5 +++++ 2 files changed, 7 insertions(+), 2 deletions(-) diff --git a/scripts/version_scanner/tests/unit/test_version_scanner.py b/scripts/version_scanner/tests/unit/test_version_scanner.py index 83b5cb342667..0805dd0a73c5 100644 --- a/scripts/version_scanner/tests/unit/test_version_scanner.py +++ b/scripts/version_scanner/tests/unit/test_version_scanner.py @@ -156,7 +156,7 @@ def test_write_csv_report(tmp_path): assert rows[0]["file_path"] == "./setup.py" assert rows[0]["rule_name"] == "python_requires_check" assert rows[0]["line_number"] == "1" - assert rows[0]["matched_string"] == "python_requires = '>=3.7'" + assert rows[0]["matched_string"] == '="python_requires = \'>=3.7\'"' assert rows[0]["context_line"] == "python_requires = '>=3.7'" @@ -513,7 +513,7 @@ def test_main_stdout(capsys): assert "=== CSV Output ===" in captured.out assert "test.py," in captured.out assert "test," in captured.out - assert "3.7" in captured.out + assert '="3.7"' in captured.out def test_scan_file_truncation_bug(tmp_path): """Test that searching for 3.1 does NOT match 3.10 (truncation bug).""" diff --git a/scripts/version_scanner/version_scanner.py b/scripts/version_scanner/version_scanner.py index aadbcb550be5..b7fffc5809df 100644 --- a/scripts/version_scanner/version_scanner.py +++ b/scripts/version_scanner/version_scanner.py @@ -225,6 +225,11 @@ def format_match_for_csv( context = formatted.get("context_line", "") matched = formatted.get("matched_string", "") + # Force spreadsheet apps (Google Sheets, Excel) to treat the match as a string. + # Otherwise, they parse "3.10" as a number and drop the trailing zero, displaying "3.1". + if matched: + formatted["matched_string"] = f'="{matched}"' + if len(context) > 500: match_start = context.find(matched) if match_start != -1: From f679fd7f47408146a62f9890b5086026f871269c Mon Sep 17 00:00:00 2001 From: chalmer lowe Date: Thu, 11 Jun 2026 08:51:06 -0400 Subject: [PATCH 06/19] build: update version_scanner.yml triggers to match repo standards --- .github/workflows/version_scanner.yml | 10 ++++++---- .../version_scanner/tests/unit/test_version_scanner.py | 2 +- 2 files changed, 7 insertions(+), 5 deletions(-) diff --git a/.github/workflows/version_scanner.yml b/.github/workflows/version_scanner.yml index 4bf33b52f21d..18151e3040ab 100644 --- a/.github/workflows/version_scanner.yml +++ b/.github/workflows/version_scanner.yml @@ -1,10 +1,12 @@ name: Version Scanner on: - workflow_dispatch: + push: + branches: + - main schedule: - - cron: '0 0 * * *' # Run daily at midnight UTC - pull_request: + - cron: '0 7 * * 2' # Run weekly on Tuesdays at 7 AM UTC (mirrors main.yml) + workflow_dispatch: jobs: scan: @@ -16,7 +18,7 @@ jobs: - name: Set up Python uses: actions/setup-python@v5 with: - python-version: '3.12' + python-version: '3.14' - name: Install dependencies run: | diff --git a/scripts/version_scanner/tests/unit/test_version_scanner.py b/scripts/version_scanner/tests/unit/test_version_scanner.py index 0805dd0a73c5..b63a0f6604f0 100644 --- a/scripts/version_scanner/tests/unit/test_version_scanner.py +++ b/scripts/version_scanner/tests/unit/test_version_scanner.py @@ -513,7 +513,7 @@ def test_main_stdout(capsys): assert "=== CSV Output ===" in captured.out assert "test.py," in captured.out assert "test," in captured.out - assert '="3.7"' in captured.out + assert '"=""3.7"""' in captured.out def test_scan_file_truncation_bug(tmp_path): """Test that searching for 3.1 does NOT match 3.10 (truncation bug).""" From 23569361d64c9411a31cc3cc1e8dc23e6963bc2c Mon Sep 17 00:00:00 2001 From: chalmer lowe Date: Thu, 11 Jun 2026 09:37:16 -0400 Subject: [PATCH 07/19] chore: test workflow on push --- .github/workflows/version_scanner.yml | 41 ++++++++++++++++++++++++++- 1 file changed, 40 insertions(+), 1 deletion(-) diff --git a/.github/workflows/version_scanner.yml b/.github/workflows/version_scanner.yml index 18151e3040ab..47af0cf7cea5 100644 --- a/.github/workflows/version_scanner.yml +++ b/.github/workflows/version_scanner.yml @@ -4,10 +4,15 @@ on: push: branches: - main + - feat/add-version-scanner schedule: - cron: '0 7 * * 2' # Run weekly on Tuesdays at 7 AM UTC (mirrors main.yml) workflow_dispatch: +permissions: + contents: read + issues: write + jobs: scan: name: Run Version Scanner @@ -27,4 +32,38 @@ jobs: - name: Run Version Scanner run: | - python scripts/version_scanner/version_scanner.py -d python -v 3.7 --stdout + # We pipe the output to a file so we can read it in the next step + python scripts/version_scanner/version_scanner.py -d python -v 3.7 --stdout > scanner_output.csv + + - name: Create or update issue on finding + if: failure() + env: + GH_TOKEN: ${{ secrets.GITHUB_TOKEN }} + run: | + TITLE="Version Scanner found deprecated dependencies" + RUN_URL="${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}" + + # Read the first 50 lines to prevent blowing up the issue body if it's massive + CSV_PREVIEW=$(head -n 50 scanner_output.csv) + + BODY="The [Version Scanner]($RUN_URL) found deprecated dependencies in the repository. + + **Matches Found:** + \`\`\`csv + $CSV_PREVIEW + \`\`\` + *(If there are more than 50 matches, see the workflow logs for the full list)*" + + # Mirroring regenerate-all.yml: check if an issue already exists to prevent spam + EXISTING_ISSUE=$(gh issue list --state open --search "in:title \"$TITLE\"" --json number --jq '.[0].number') + + if [ -z "$EXISTING_ISSUE" ]; then + echo "WOULD HAVE CREATED ISSUE:" + echo "gh issue create --title \"$TITLE\" --body \"$BODY\"" + # gh issue create --title "$TITLE" --body "$BODY" + else + echo "Issue #$EXISTING_ISSUE already exists." + echo "WOULD HAVE ADDED COMMENT:" + echo "gh issue comment \"$EXISTING_ISSUE\" --body \"Another scanner run found deprecated dependencies: $RUN_URL\"" + # gh issue comment "$EXISTING_ISSUE" --body "Another scanner run found deprecated dependencies: $RUN_URL" + fi From 7b8f3c31ee53b50fd9359e73e37f8a35c013d9f3 Mon Sep 17 00:00:00 2001 From: chalmer lowe Date: Thu, 11 Jun 2026 09:51:47 -0400 Subject: [PATCH 08/19] chore: update workflow trigger for new branch --- .github/workflows/version_scanner.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/version_scanner.yml b/.github/workflows/version_scanner.yml index 47af0cf7cea5..9771b7d3251e 100644 --- a/.github/workflows/version_scanner.yml +++ b/.github/workflows/version_scanner.yml @@ -4,7 +4,7 @@ on: push: branches: - main - - feat/add-version-scanner + - feat/version-scanner-cicd-upgrades schedule: - cron: '0 7 * * 2' # Run weekly on Tuesdays at 7 AM UTC (mirrors main.yml) workflow_dispatch: From dc6b01112394715bf811f98db11f61767a26ef23 Mon Sep 17 00:00:00 2001 From: chalmer lowe Date: Thu, 11 Jun 2026 10:06:28 -0400 Subject: [PATCH 09/19] build: fix scanner output redirection and add artifact upload --- .github/workflows/version_scanner.yml | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/.github/workflows/version_scanner.yml b/.github/workflows/version_scanner.yml index 9771b7d3251e..07dde4843f02 100644 --- a/.github/workflows/version_scanner.yml +++ b/.github/workflows/version_scanner.yml @@ -32,8 +32,15 @@ jobs: - name: Run Version Scanner run: | - # We pipe the output to a file so we can read it in the next step - python scripts/version_scanner/version_scanner.py -d python -v 3.7 --stdout > scanner_output.csv + # Use -o to output the raw CSV to a file, and --stdout to print the summary to the GitHub Actions UI + python scripts/version_scanner/version_scanner.py -d python -v 3.7 --stdout -o scanner_output.csv + + - name: Upload CSV Results + if: always() + uses: actions/upload-artifact@v4 + with: + name: version-scanner-results + path: scanner_output.csv - name: Create or update issue on finding if: failure() From f357200d7dc1d8ec0a4f55c89710e8f11499c1c5 Mon Sep 17 00:00:00 2001 From: chalmer lowe Date: Thu, 11 Jun 2026 14:06:28 -0400 Subject: [PATCH 10/19] chore: updates github action versions and scanner config --- .github/workflows/version_scanner.yml | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/.github/workflows/version_scanner.yml b/.github/workflows/version_scanner.yml index 07dde4843f02..f371e6d426bf 100644 --- a/.github/workflows/version_scanner.yml +++ b/.github/workflows/version_scanner.yml @@ -1,4 +1,4 @@ -name: Version Scanner +name: Version Scan on: push: @@ -15,13 +15,13 @@ permissions: jobs: scan: - name: Run Version Scanner + name: Version Scan runs-on: ubuntu-latest steps: - - uses: actions/checkout@v4 + - uses: actions/checkout@v6 - name: Set up Python - uses: actions/setup-python@v5 + uses: actions/setup-python@v6 with: python-version: '3.14' @@ -33,14 +33,14 @@ jobs: - name: Run Version Scanner run: | # Use -o to output the raw CSV to a file, and --stdout to print the summary to the GitHub Actions UI - python scripts/version_scanner/version_scanner.py -d python -v 3.7 --stdout -o scanner_output.csv + python scripts/version_scanner/version_scanner.py -d python -v 3.7 --stdout -o version_scanner_output.csv - name: Upload CSV Results if: always() - uses: actions/upload-artifact@v4 + uses: actions/upload-artifact@v7 with: name: version-scanner-results - path: scanner_output.csv + path: version_scanner_output.csv - name: Create or update issue on finding if: failure() From ae291adda07689d20907a4303ceb78251798f414 Mon Sep 17 00:00:00 2001 From: chalmer lowe Date: Thu, 11 Jun 2026 14:13:44 -0400 Subject: [PATCH 11/19] chore: update filename --- .github/workflows/version_scanner.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/version_scanner.yml b/.github/workflows/version_scanner.yml index f371e6d426bf..e56949dc6948 100644 --- a/.github/workflows/version_scanner.yml +++ b/.github/workflows/version_scanner.yml @@ -51,7 +51,7 @@ jobs: RUN_URL="${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}" # Read the first 50 lines to prevent blowing up the issue body if it's massive - CSV_PREVIEW=$(head -n 50 scanner_output.csv) + CSV_PREVIEW=$(head -n 50 version_scanner_output.csv) BODY="The [Version Scanner]($RUN_URL) found deprecated dependencies in the repository. From 8b64db710ad8662ad7efbb66b9d6b8296d128bd4 Mon Sep 17 00:00:00 2001 From: chalmer lowe Date: Fri, 12 Jun 2026 06:46:32 -0400 Subject: [PATCH 12/19] feat(scanner): add --soft-fail CLI flag and integrate in GHA workflow --- .github/workflows/version_scanner.yml | 2 +- .../tests/unit/test_version_scanner.py | 180 +++++++++++------- scripts/version_scanner/version_scanner.py | 130 +++++++------ 3 files changed, 182 insertions(+), 130 deletions(-) diff --git a/.github/workflows/version_scanner.yml b/.github/workflows/version_scanner.yml index e56949dc6948..312cda3cf7b0 100644 --- a/.github/workflows/version_scanner.yml +++ b/.github/workflows/version_scanner.yml @@ -33,7 +33,7 @@ jobs: - name: Run Version Scanner run: | # Use -o to output the raw CSV to a file, and --stdout to print the summary to the GitHub Actions UI - python scripts/version_scanner/version_scanner.py -d python -v 3.7 --stdout -o version_scanner_output.csv + python scripts/version_scanner/version_scanner.py -d python -v 3.7 --stdout -o version_scanner_output.csv --soft-fail - name: Upload CSV Results if: always() diff --git a/scripts/version_scanner/tests/unit/test_version_scanner.py b/scripts/version_scanner/tests/unit/test_version_scanner.py index b63a0f6604f0..19aeeba9a6fc 100644 --- a/scripts/version_scanner/tests/unit/test_version_scanner.py +++ b/scripts/version_scanner/tests/unit/test_version_scanner.py @@ -19,7 +19,17 @@ from unittest.mock import patch import pytest import yaml -from version_scanner import ConfigManager, scan_file, write_csv_report +from version_scanner import ( + ConfigManager, + scan_file, + write_csv_report, + _truncate_context, + _wrap_sheet_hyperlink, + _wrap_sheet_string, + format_for_raw_csv, + format_for_spreadsheet, + format_for_console +) # Test ConfigManager @pytest.mark.parametrize("dependency, version, expected", [ @@ -156,7 +166,7 @@ def test_write_csv_report(tmp_path): assert rows[0]["file_path"] == "./setup.py" assert rows[0]["rule_name"] == "python_requires_check" assert rows[0]["line_number"] == "1" - assert rows[0]["matched_string"] == '="python_requires = \'>=3.7\'"' + assert rows[0]["matched_string"] == "python_requires = '>=3.7'" assert rows[0]["context_line"] == "python_requires = '>=3.7'" @@ -246,43 +256,7 @@ def test_main_package_file_not_found(capsys): assert excinfo.value.code == 1 captured = capsys.readouterr() assert "Error: Package file not found" in captured.err -def test_format_match_for_csv(): - from version_scanner import format_match_for_csv - match = { - "file_path": "google-cloud-python/main/packages/pkg_a/setup.py", - "repo_path": "packages/pkg_a/setup.py", - "line_number": 123, - "rule_name": "test_rule" - } - - # Test without github_repo - formatted = format_match_for_csv(match) - assert formatted["line_number"] == 123 - - # Test with github_repo - formatted = format_match_for_csv(match, github_repo="https://github.com/user/repo", branch="main") - expected_url = "https://github.com/user/repo/blob/main/packages/pkg_a/setup.py#L123" - assert formatted["line_number"] == f'=HYPERLINK("{expected_url}", "123")' - -def test_format_match_for_csv_truncates_long_line(): - from version_scanner import format_match_for_csv - - long_line = "a" * 1000 + "PY37" + "b" * 1000 - match = { - "file_path": "test.py", - "line_number": 1, - "rule_name": "test_rule", - "matched_string": "PY37", - "context_line": long_line - } - - formatted = format_match_for_csv(match) - context = formatted["context_line"] - - assert len(context) <= 600 - assert "PY37" in context - assert "..." in context def test_get_match_counts(): @@ -315,30 +289,7 @@ def test_scan_file_removes_newline_from_match(tmp_path): assert "\n" not in results[0]["matched_string"] -def test_write_csv_report_with_links(tmp_path): - output_file = tmp_path / "report.csv" - matches = [ - { - "file_path": "google-cloud-python/main/packages/pkg_a/setup.py", - "repo_path": "packages/pkg_a/setup.py", - "line_number": 1, - "rule_name": "python_requires_check", - "matched_string": "python_requires = '>=3.7'", - "context_line": "python_requires = '>=3.7'" - } - ] - - from version_scanner import write_csv_report - write_csv_report(str(output_file), matches, github_repo="https://github.com/user/repo", branch="main") - - assert output_file.exists() - - with open(output_file, 'r', encoding='utf-8', newline='') as f: - reader = csv.DictReader(f) - rows = list(reader) - - assert len(rows) == 1 - assert "HYPERLINK" in rows[0]["line_number"] + def test_scan_repository_ignores_version_scanner(tmp_path): vs_dir = tmp_path / "version_scanner" vs_dir.mkdir() @@ -500,6 +451,18 @@ def test_main_exit_code_1(): main() assert excinfo.value.code == 1 + +def test_main_soft_fail_exit_code_0(): + """Test that main() calls sys.exit(0) when matches are found but --soft-fail is set.""" + test_args = ['version_scanner.py', '-d', 'python', '-v', '3.7', '--soft-fail'] + with mock.patch('sys.argv', test_args): + from version_scanner import main + with mock.patch('version_scanner.scan_repository', return_value=[{'file_path': 'test', 'line_number': 1, 'matched_string': '3.7', 'rule_name': 'test'}]): + with pytest.raises(SystemExit) as excinfo: + main() + assert excinfo.value.code == 0 + + def test_main_stdout(capsys): """Test that --stdout prints the CSV output to stdout.""" test_args = ['version_scanner.py', '-d', 'python', '-v', '3.7', '--stdout'] @@ -510,10 +473,8 @@ def test_main_stdout(capsys): main() captured = capsys.readouterr() - assert "=== CSV Output ===" in captured.out - assert "test.py," in captured.out - assert "test," in captured.out - assert '"=""3.7"""' in captured.out + assert "=== Scan Results ===" in captured.out + assert "test.py:1 [test] 3.7" in captured.out def test_scan_file_truncation_bug(tmp_path): """Test that searching for 3.1 does NOT match 3.10 (truncation bug).""" @@ -579,3 +540,90 @@ def test_scan_repository_package_name_roots(tmp_path): assert len(results) == 1 assert results[0]["package_name"] == "pkg_third" assert "third_party/pkg_third/setup.py" in results[0]["file_path"] + + +# --- Decoupled Formatters Tests (TDD) --- + +def test_truncate_context(): + # Context shorter than 500 characters shouldn't be truncated + assert _truncate_context("short context", "short") == "short context" + + # Context longer than 500 characters should be truncated around the matched string + matched = "TARGET_VERSION" + long_prefix = "a" * 300 + long_suffix = "b" * 300 + long_context = long_prefix + matched + long_suffix + + truncated = _truncate_context(long_context, matched) + assert len(truncated) <= 500 + assert matched in truncated + assert truncated.startswith("...") + assert truncated.endswith("...") + +def test_wrap_sheet_hyperlink(): + assert _wrap_sheet_hyperlink("https://github.com/foo", "12") == '=HYPERLINK("https://github.com/foo", "12")' + +def test_wrap_sheet_string(): + assert _wrap_sheet_string("3.10") == '="3.10"' + assert _wrap_sheet_string("") == "" + assert _wrap_sheet_string(None) == "" + +def test_format_for_raw_csv(): + match = { + "file_path": "google-cloud-python/main/packages/pkg_a/setup.py", + "repo_path": "packages/pkg_a/setup.py", + "package_name": "pkg_a", + "rule_name": "python_requires_check", + "line_number": "123", + "matched_string": "3.7", + "context_line": "python_requires = '>=3.7'" + } + + formatted = format_for_raw_csv(match) + + assert formatted["file_path"] == "google-cloud-python/main/packages/pkg_a/setup.py" + assert formatted["package_name"] == "pkg_a" + assert formatted["rule_name"] == "python_requires_check" + assert formatted["line_number"] == 123 # Int conversion + assert formatted["matched_string"] == "3.7" # No formula wrapping + assert formatted["context_line"] == "python_requires = '>=3.7'" + +def test_format_for_spreadsheet(): + match = { + "file_path": "google-cloud-python/main/packages/pkg_a/setup.py", + "repo_path": "packages/pkg_a/setup.py", + "package_name": "pkg_a", + "rule_name": "python_requires_check", + "line_number": 123, + "matched_string": "3.7", + "context_line": "python_requires = '>=3.7'" + } + + # Without github_repo + formatted_no_repo = format_for_spreadsheet(match) + assert formatted_no_repo["line_number"] == 123 + assert formatted_no_repo["matched_string"] == '="3.7"' # Decimal protection formula + + # With github_repo + formatted_repo = format_for_spreadsheet(match, github_repo="https://github.com/user/repo", branch="main") + expected_url = "https://github.com/user/repo/blob/main/packages/pkg_a/setup.py#L123" + assert formatted_repo["line_number"] == f'=HYPERLINK("{expected_url}", "123")' + assert formatted_repo["matched_string"] == '="3.7"' + +def test_format_for_console(): + match = { + "file_path": "google-cloud-python/main/packages/pkg_a/setup.py", + "repo_path": "packages/pkg_a/setup.py", + "package_name": "pkg_a", + "rule_name": "python_requires_check", + "line_number": 123, + "matched_string": "3.7", + "context_line": "python_requires = '>=3.7'" + } + + log_str = format_for_console(match) + assert "google-cloud-python/main/packages/pkg_a/setup.py:123" in log_str + assert "[python_requires_check]" in log_str + assert "3.7" in log_str + assert "python_requires = " not in log_str # Slim format doesn't print context line + diff --git a/scripts/version_scanner/version_scanner.py b/scripts/version_scanner/version_scanner.py index b7fffc5809df..e9d121fd8df6 100644 --- a/scripts/version_scanner/version_scanner.py +++ b/scripts/version_scanner/version_scanner.py @@ -186,66 +186,72 @@ def scan_file(file_path: str, compiled_rules: List[Dict[str, re.Pattern]]) -> Li return results -def format_match_for_csv( +def _truncate_context(context: str, matched: str) -> str: + """Safely truncates context around the match location to prevent overflow.""" + if len(context) > 500: + match_start = context.find(matched) + if match_start != -1: + start = max(0, match_start - 200) + end = min(len(context), match_start + len(matched) + 200) + prefix = "..." if start > 0 else "" + suffix = "..." if end < len(context) else "" + return prefix + context[start:end] + suffix + else: + return context[:500] + "..." + return context + + +def _wrap_sheet_hyperlink(url: str, label: str) -> str: + return f'=HYPERLINK("{url}", "{label}")' + + +def _wrap_sheet_string(value: str) -> str: + if value is None: + return "" + return f'="{value}"' if value else "" + + +def format_for_raw_csv(match: Dict[str, str]) -> Dict[str, str]: + """Prepares a full raw dataset (n + x columns) with clean text values.""" + return { + "file_path": match.get("file_path", ""), + "package_name": match.get("package_name", ""), + "rule_name": match.get("rule_name", ""), + "line_number": int(match.get("line_number", 0)) if match.get("line_number") is not None else 0, + "matched_string": match.get("matched_string", ""), + "context_line": _truncate_context(match.get("context_line", ""), match.get("matched_string", "")) + } + + +def format_for_spreadsheet( match: Dict[str, str], github_repo: str = None, branch: str = "main" ) -> Dict[str, str]: - """ - Formats a raw match dictionary for clean CSV presentation and imports. - - Cleans long context lines by truncating them around the match location to prevent - extreme cell overflow in spreadsheets. Optionally transforms line numbers into - clickable `=HYPERLINK(...)` formulas linking directly to the exact file and line - number in GitHub. - - Args: - match: A match dictionary containing 'file_path', 'repo_path', 'rule_name', - 'line_number', 'matched_string', and 'context_line'. - github_repo: Optional GitHub repository base URL (e.g., "https://github.com/user/repo"). - If provided, triggers the hyperlink generation. - branch: Optional branch name to build the GitHub blob URL (defaults to "main"). - - Returns: - A copy of the match dictionary with formatted/truncated values, suitable for CSV writing. - """ - formatted = match.copy() + """Builds on top of raw CSV but applies Sheets-specific formulas.""" + formatted = format_for_raw_csv(match) + # Override fields with spreadsheet formatting if github_repo: - # Use repo_path if available, fallback to file_path file_path = match.get("repo_path", match.get("file_path", "")) line_number = match.get("line_number", "") - - # Construct URL url = f"{github_repo}/blob/{branch}/{file_path}#L{line_number}" + formatted["line_number"] = _wrap_sheet_hyperlink(url, str(line_number)) - # Format as Google Sheets formula - formatted["line_number"] = f'=HYPERLINK("{url}", "{line_number}")' - - context = formatted.get("context_line", "") - matched = formatted.get("matched_string", "") - - # Force spreadsheet apps (Google Sheets, Excel) to treat the match as a string. - # Otherwise, they parse "3.10" as a number and drop the trailing zero, displaying "3.1". - if matched: - formatted["matched_string"] = f'="{matched}"' - - if len(context) > 500: - match_start = context.find(matched) - if match_start != -1: - start = max(0, match_start - 200) - end = min(len(context), match_start + len(matched) + 200) - - prefix = "..." if start > 0 else "" - suffix = "..." if end < len(context) else "" - - formatted["context_line"] = prefix + context[start:end] + suffix - else: - formatted["context_line"] = context[:500] + "..." - + formatted["matched_string"] = _wrap_sheet_string(match.get("matched_string", "")) return formatted +def format_for_console(match: Dict[str, str]) -> str: + """Prepares a slim, readable string representation (n columns) for stdout/logs.""" + file_path = match.get("file_path", "") + line_number = match.get("line_number", "") + rule_name = match.get("rule_name", "") + matched_string = match.get("matched_string", "") + return f" {file_path}:{line_number} [{rule_name}] {matched_string}" + + + def get_match_counts(matches: List[Dict[str, str]]) -> Tuple[Dict[str, int], Dict[str, int]]: """ Aggregate matches by rule and by package. @@ -299,9 +305,7 @@ def load_ignore_file(file_path: str) -> List[str]: def write_csv_report( output_path: str, - matches: List[Dict[str, str]], - github_repo: str = None, - branch: str = "main" + matches: List[Dict[str, str]] ) -> None: """ Write the collected matches to a CSV file. @@ -309,8 +313,6 @@ def write_csv_report( Args: output_path: Path to the output CSV file. matches: A list of dictionaries containing match details. - github_repo: Optional GitHub repository URL base. - branch: GitHub branch for links (defaults to main). """ fieldnames = ["file_path", "package_name", "rule_name", "line_number", "matched_string", "context_line"] @@ -320,7 +322,7 @@ def write_csv_report( writer.writeheader() for match in matches: - formatted_match = format_match_for_csv(match, github_repo, branch) + formatted_match = format_for_raw_csv(match) # Ensure only specified fields are written row = {field: formatted_match.get(field, "") for field in fieldnames} writer.writerow(row) @@ -363,7 +365,7 @@ def upload_to_drive(csv_path: str, matches: List[Dict[str, str]], github_repo: s # Prepare data values = [["file_path", "package_name", "rule_name", "line_number", "matched_string", "context_line"]] for m in matches: - formatted_m = format_match_for_csv(m, github_repo=github_repo, branch=branch) + formatted_m = format_for_spreadsheet(m, github_repo=github_repo, branch=branch) values.append([ formatted_m.get("file_path", ""), formatted_m.get("package_name", ""), @@ -612,6 +614,12 @@ def main(): help="Print the full CSV report to stdout instead of/in addition to writing to a file" ) + parser.add_argument( + "--soft-fail", + action="store_true", + help="Exit with code 0 even if matches are found (useful for advisory runs in CI/CD)" + ) + args = parser.parse_args() # Resolve target packages if filtering is requested @@ -676,22 +684,18 @@ def main(): os.makedirs(results_dir, exist_ok=True) output_path = os.path.join(results_dir, f"{args.dependency}-{args.version}-{timestamp}.csv") - write_csv_report(output_path, all_matches, github_repo=args.github_repo, branch=args.branch) + write_csv_report(output_path, all_matches) if args.upload: upload_to_drive(output_path, all_matches, github_repo=args.github_repo, branch=args.branch) if args.stdout: - print("\n=== CSV Output ===") - import io - output = io.StringIO() - write_csv_report(output_path, all_matches, github_repo=args.github_repo, branch=args.branch) # this writes to the file, but we want stdout too - # Let's just read the file and print it - with open(output_path, 'r', encoding='utf-8') as f: - print(f.read(), end='') + print("\n=== Scan Results ===") + for m in all_matches: + print(format_for_console(m)) # Distinct exit codes for CI/CD - if all_matches: + if all_matches and not args.soft_fail: sys.exit(1) else: sys.exit(0) From 265ba67b5faf6fc7b02e8fa74a82a1b384345670 Mon Sep 17 00:00:00 2001 From: chalmer lowe Date: Fri, 12 Jun 2026 06:53:19 -0400 Subject: [PATCH 13/19] chore(scanner): expand branch triggers to match any version-scanner branch --- .github/workflows/version_scanner.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/version_scanner.yml b/.github/workflows/version_scanner.yml index 312cda3cf7b0..c9cf9b49c6f3 100644 --- a/.github/workflows/version_scanner.yml +++ b/.github/workflows/version_scanner.yml @@ -4,7 +4,7 @@ on: push: branches: - main - - feat/version-scanner-cicd-upgrades + - '**version-scanner**' schedule: - cron: '0 7 * * 2' # Run weekly on Tuesdays at 7 AM UTC (mirrors main.yml) workflow_dispatch: From e1c6391bbbbaa34c3c3d04ce3be895f6f13430b8 Mon Sep 17 00:00:00 2001 From: chalmer lowe Date: Fri, 12 Jun 2026 06:57:25 -0400 Subject: [PATCH 14/19] chore(scanner): update cron trigger to run hourly --- .github/workflows/version_scanner.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/version_scanner.yml b/.github/workflows/version_scanner.yml index c9cf9b49c6f3..6b7af7d22301 100644 --- a/.github/workflows/version_scanner.yml +++ b/.github/workflows/version_scanner.yml @@ -6,7 +6,7 @@ on: - main - '**version-scanner**' schedule: - - cron: '0 7 * * 2' # Run weekly on Tuesdays at 7 AM UTC (mirrors main.yml) + - cron: '0 * * * *' # Run hourly at the top of the hour workflow_dispatch: permissions: From 4f67a1a51aecf322a4d03e7184ab3c230bdbea69 Mon Sep 17 00:00:00 2001 From: chalmer lowe Date: Fri, 12 Jun 2026 07:11:36 -0400 Subject: [PATCH 15/19] chore(scanner): simplify console output by removing rule listing and duplication --- .../tests/unit/test_version_scanner.py | 14 +++++++++++++- scripts/version_scanner/version_scanner.py | 17 ++++------------- 2 files changed, 17 insertions(+), 14 deletions(-) diff --git a/scripts/version_scanner/tests/unit/test_version_scanner.py b/scripts/version_scanner/tests/unit/test_version_scanner.py index 19aeeba9a6fc..e477801caa75 100644 --- a/scripts/version_scanner/tests/unit/test_version_scanner.py +++ b/scripts/version_scanner/tests/unit/test_version_scanner.py @@ -473,9 +473,21 @@ def test_main_stdout(capsys): main() captured = capsys.readouterr() - assert "=== Scan Results ===" in captured.out assert "test.py:1 [test] 3.7" in captured.out + +def test_main_does_not_print_rules(capsys): + """Test that main() does not print the list of loaded rules to stdout.""" + test_args = ['version_scanner.py', '-d', 'python', '-v', '3.7'] + with mock.patch('sys.argv', test_args): + from version_scanner import main + with mock.patch('version_scanner.scan_repository', return_value=[]): + with pytest.raises(SystemExit): + main() + captured = capsys.readouterr() + assert "explicit_version_string" not in captured.out + + def test_scan_file_truncation_bug(tmp_path): """Test that searching for 3.1 does NOT match 3.10 (truncation bug).""" # Create a file with 3.10 diff --git a/scripts/version_scanner/version_scanner.py b/scripts/version_scanner/version_scanner.py index e9d121fd8df6..8be59ddadad8 100644 --- a/scripts/version_scanner/version_scanner.py +++ b/scripts/version_scanner/version_scanner.py @@ -647,10 +647,7 @@ def main(): config_manager = ConfigManager(args.config, args.dependency, args.version) rules = config_manager.load_config() - print(f"\nLoaded {len(rules)} rules:") - for rule in rules: - print(f" - {rule['name']}: {rule['pattern']}") - + # Load ignore file from script directory (Option A) @@ -664,11 +661,8 @@ def main(): all_matches = scan_repository(args.path, rules, target_packages, ignore_dirs, version_string=args.version) print(f"\nFound {len(all_matches)} matches.") - for m in all_matches[:10]: # Show first 10 - print(f" {m['file_path']}:{m['line_number']} [{m['rule_name']}] {m['matched_string']}") - - if len(all_matches) > 10: - print(f" ... and {len(all_matches) - 10} more matches.") + for m in all_matches: + print(format_for_console(m)) # Get and print summary counts rule_counts, package_counts = get_match_counts(all_matches) @@ -689,10 +683,7 @@ def main(): if args.upload: upload_to_drive(output_path, all_matches, github_repo=args.github_repo, branch=args.branch) - if args.stdout: - print("\n=== Scan Results ===") - for m in all_matches: - print(format_for_console(m)) + # Distinct exit codes for CI/CD if all_matches and not args.soft_fail: From fe5c56a0d55592bb0736b5182c6a1e513c0f8575 Mon Sep 17 00:00:00 2001 From: chalmer lowe Date: Fri, 12 Jun 2026 07:36:05 -0400 Subject: [PATCH 16/19] chore(scanner): update soft-fail help text and test docstring --- scripts/version_scanner/tests/unit/test_version_scanner.py | 1 + scripts/version_scanner/version_scanner.py | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/scripts/version_scanner/tests/unit/test_version_scanner.py b/scripts/version_scanner/tests/unit/test_version_scanner.py index e477801caa75..a01ac5e8cf02 100644 --- a/scripts/version_scanner/tests/unit/test_version_scanner.py +++ b/scripts/version_scanner/tests/unit/test_version_scanner.py @@ -349,6 +349,7 @@ def test_main_loads_ignore_from_script_dir(mock_scan, mock_load_ignore): @mock.patch('googleapiclient.discovery.build') @mock.patch('google.auth.default') def test_upload_to_drive(mock_auth, mock_build): + """Test the ability to upload results to drive for visibility in gSheets.""" from unittest import mock mock_creds = mock.Mock() diff --git a/scripts/version_scanner/version_scanner.py b/scripts/version_scanner/version_scanner.py index 8be59ddadad8..c48aee7c9713 100644 --- a/scripts/version_scanner/version_scanner.py +++ b/scripts/version_scanner/version_scanner.py @@ -617,7 +617,7 @@ def main(): parser.add_argument( "--soft-fail", action="store_true", - help="Exit with code 0 even if matches are found (useful for advisory runs in CI/CD)" + help="Exit with code 0 even if matches are found (useful during development and testing runs)" ) args = parser.parse_args() From cc8cd2df26c19a79d702fdc7da3a9b7e7d44ed74 Mon Sep 17 00:00:00 2001 From: chalmer lowe Date: Fri, 12 Jun 2026 07:46:45 -0400 Subject: [PATCH 17/19] refactor(tests): use idiomatic pytest.raises instead of try-except for SystemExit --- scripts/version_scanner/tests/unit/test_version_scanner.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/scripts/version_scanner/tests/unit/test_version_scanner.py b/scripts/version_scanner/tests/unit/test_version_scanner.py index a01ac5e8cf02..6d19bd985406 100644 --- a/scripts/version_scanner/tests/unit/test_version_scanner.py +++ b/scripts/version_scanner/tests/unit/test_version_scanner.py @@ -327,10 +327,8 @@ def test_main_loads_ignore_from_script_dir(mock_scan, mock_load_ignore): with mock.patch('sys.argv', test_args): from version_scanner import main - try: + with pytest.raises(SystemExit): main() - except SystemExit: - pass mock_load_ignore.assert_called_once() args, kwargs = mock_load_ignore.call_args From a002eb129e289b26606b4e7e0d5dd05411e6b203 Mon Sep 17 00:00:00 2001 From: chalmer lowe Date: Fri, 12 Jun 2026 08:46:32 -0400 Subject: [PATCH 18/19] chore(scanner): address PR review comments on limit, safe-int, and string escaping --- .../tests/unit/test_version_scanner.py | 57 +++++++++++++++++++ scripts/version_scanner/version_scanner.py | 20 +++++-- 2 files changed, 73 insertions(+), 4 deletions(-) diff --git a/scripts/version_scanner/tests/unit/test_version_scanner.py b/scripts/version_scanner/tests/unit/test_version_scanner.py index 6d19bd985406..f5a909e849e8 100644 --- a/scripts/version_scanner/tests/unit/test_version_scanner.py +++ b/scripts/version_scanner/tests/unit/test_version_scanner.py @@ -26,6 +26,7 @@ _truncate_context, _wrap_sheet_hyperlink, _wrap_sheet_string, + _safe_int, format_for_raw_csv, format_for_spreadsheet, format_for_console @@ -475,6 +476,42 @@ def test_main_stdout(capsys): assert "test.py:1 [test] 3.7" in captured.out +def test_main_without_stdout_limits_output(capsys): + """Test that main() without --stdout prints only 10 matches and shows a suffix.""" + test_args = ['version_scanner.py', '-d', 'python', '-v', '3.7'] + matches = [{'file_path': f'test_{i}.py', 'line_number': i, 'matched_string': '3.7', 'rule_name': 'test'} for i in range(15)] + with mock.patch('sys.argv', test_args): + from version_scanner import main + with mock.patch('version_scanner.scan_repository', return_value=matches): + with pytest.raises(SystemExit): + main() + + captured = capsys.readouterr() + # Should only print first 10 matches + for i in range(10): + assert f"test_{i}.py:{i} [test] 3.7" in captured.out + for i in range(10, 15): + assert f"test_{i}.py:{i} [test] 3.7" not in captured.out + assert "... and 5 more matches." in captured.out + + +def test_main_with_stdout_prints_all(capsys): + """Test that main() with --stdout prints all matches without limiting.""" + test_args = ['version_scanner.py', '-d', 'python', '-v', '3.7', '--stdout'] + matches = [{'file_path': f'test_{i}.py', 'line_number': i, 'matched_string': '3.7', 'rule_name': 'test'} for i in range(15)] + with mock.patch('sys.argv', test_args): + from version_scanner import main + with mock.patch('version_scanner.scan_repository', return_value=matches): + with pytest.raises(SystemExit): + main() + + captured = capsys.readouterr() + # Should print all 15 matches + for i in range(15): + assert f"test_{i}.py:{i} [test] 3.7" in captured.out + assert "... and 5 more matches." not in captured.out + + def test_main_does_not_print_rules(capsys): """Test that main() does not print the list of loaded rules to stdout.""" test_args = ['version_scanner.py', '-d', 'python', '-v', '3.7'] @@ -576,9 +613,29 @@ def test_wrap_sheet_hyperlink(): def test_wrap_sheet_string(): assert _wrap_sheet_string("3.10") == '="3.10"' + assert _wrap_sheet_string('python_requires = ">=3.7"') == '="python_requires = "">=3.7"""' assert _wrap_sheet_string("") == "" assert _wrap_sheet_string(None) == "" +def test_safe_int(): + assert _safe_int("123") == 123 + assert _safe_int("") == 0 + assert _safe_int(None) == 0 + assert _safe_int("abc") == 0 + +def test_format_for_raw_csv_handles_empty_line_number(): + match = { + "file_path": "google-cloud-python/main/packages/pkg_a/setup.py", + "repo_path": "packages/pkg_a/setup.py", + "package_name": "pkg_a", + "rule_name": "python_requires_check", + "line_number": "", + "matched_string": "3.7", + "context_line": "python_requires = '>=3.7'" + } + formatted = format_for_raw_csv(match) + assert formatted["line_number"] == 0 + def test_format_for_raw_csv(): match = { "file_path": "google-cloud-python/main/packages/pkg_a/setup.py", diff --git a/scripts/version_scanner/version_scanner.py b/scripts/version_scanner/version_scanner.py index c48aee7c9713..50c22c6596db 100644 --- a/scripts/version_scanner/version_scanner.py +++ b/scripts/version_scanner/version_scanner.py @@ -23,7 +23,7 @@ import os import re import sys -from typing import Dict, List, Tuple +from typing import Dict, List, Tuple, Any import yaml class ConfigManager: @@ -208,7 +208,15 @@ def _wrap_sheet_hyperlink(url: str, label: str) -> str: def _wrap_sheet_string(value: str) -> str: if value is None: return "" - return f'="{value}"' if value else "" + escaped_value = value.replace('"', '""') + return f'="{escaped_value}"' if value else "" + + +def _safe_int(value: Any, default: int = 0) -> int: + try: + return int(value) + except (ValueError, TypeError): + return default def format_for_raw_csv(match: Dict[str, str]) -> Dict[str, str]: @@ -217,7 +225,7 @@ def format_for_raw_csv(match: Dict[str, str]) -> Dict[str, str]: "file_path": match.get("file_path", ""), "package_name": match.get("package_name", ""), "rule_name": match.get("rule_name", ""), - "line_number": int(match.get("line_number", 0)) if match.get("line_number") is not None else 0, + "line_number": _safe_int(match.get("line_number")), "matched_string": match.get("matched_string", ""), "context_line": _truncate_context(match.get("context_line", ""), match.get("matched_string", "")) } @@ -661,9 +669,13 @@ def main(): all_matches = scan_repository(args.path, rules, target_packages, ignore_dirs, version_string=args.version) print(f"\nFound {len(all_matches)} matches.") - for m in all_matches: + display_matches = all_matches if args.stdout else all_matches[:10] + for m in display_matches: print(format_for_console(m)) + if not args.stdout and len(all_matches) > 10: + print(f" ... and {len(all_matches) - 10} more matches.") + # Get and print summary counts rule_counts, package_counts = get_match_counts(all_matches) print_summary_table(rule_counts, package_counts) From e6fe5f6f82b0cf4da6311b767d07c68bec4f5cdf Mon Sep 17 00:00:00 2001 From: chalmer lowe Date: Fri, 12 Jun 2026 08:57:46 -0400 Subject: [PATCH 19/19] docs(scanner): add descriptive docstrings to spreadsheet helpers --- scripts/version_scanner/version_scanner.py | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) diff --git a/scripts/version_scanner/version_scanner.py b/scripts/version_scanner/version_scanner.py index 50c22c6596db..90234a967665 100644 --- a/scripts/version_scanner/version_scanner.py +++ b/scripts/version_scanner/version_scanner.py @@ -202,10 +202,24 @@ def _truncate_context(context: str, matched: str) -> str: def _wrap_sheet_hyperlink(url: str, label: str) -> str: + """Wraps a URL and label into a Google Sheets HYPERLINK formula. + + This ensures that when output is imported into spreadsheet software, the + resulting cells contain clickable hyperlinks pointing directly to GitHub file + locations and line numbers. + """ return f'=HYPERLINK("{url}", "{label}")' def _wrap_sheet_string(value: str) -> str: + """Wraps a string value inside a spreadsheet string formula to prevent float parsing. + + This forces spreadsheet software (such as Google Sheets) to treat numeric + string patterns (like python runtime version "3.10") as literal strings, + preventing auto-truncation to floats (which would display "3.1"). Double + quotes inside the value are escaped by doubling them to avoid formula syntax + errors on import. + """ if value is None: return "" escaped_value = value.replace('"', '""') @@ -213,6 +227,12 @@ def _wrap_sheet_string(value: str) -> str: def _safe_int(value: Any, default: int = 0) -> int: + """Safely converts a value to an integer, falling back to a default value. + + Used primarily during raw data formatting for spreadsheet ingestion. If a + value (like a line number) is missing or contains non-integer text (e.g. empty + strings for filename-only matches), this avoids crashing the scanner. + """ try: return int(value) except (ValueError, TypeError):