From 0c1b379ac1df511b01987732ed221e425833f948 Mon Sep 17 00:00:00 2001 From: Taus Date: Tue, 29 Apr 2025 15:12:38 +0000 Subject: [PATCH 1/4] Python: Extract files in hidden dirs by default Changes the default behaviour of the Python extractor so files inside hidden directories are extracted by default. Also adds an extractor option, `skip_hidden_directories`, which can be set to `true` in order to revert to the old behaviour. Finally, I made the logic surrounding what is logged in various cases a bit more obvious. Technically this changes the behaviour of the extractor (in that hidden excluded files will now be logged as `(excluded)`, but I think this makes more sense anyway. --- python/codeql-extractor.yml | 7 +++++++ python/extractor/semmle/traverser.py | 16 ++++++++++------ 2 files changed, 17 insertions(+), 6 deletions(-) diff --git a/python/codeql-extractor.yml b/python/codeql-extractor.yml index 97a9e1f2cf2f..2bd1a9c0aa76 100644 --- a/python/codeql-extractor.yml +++ b/python/codeql-extractor.yml @@ -44,3 +44,10 @@ options: Use this setting with caution, the Python extractor requires Python 3 to run. type: string pattern: "^(py|python|python3)$" + skip_hidden_directories: + title: Controls whether hidden directories are skipped during extraction. + description: > + By default, CodeQL will extract all Python files, including ones located in hidden directories. By setting this option to true, these hidden directories will be skipped instead. + Accepted values are true and false. + type: string + pattern: "^(true|false)$" diff --git a/python/extractor/semmle/traverser.py b/python/extractor/semmle/traverser.py index ad8bd38ae735..0945d8ace4bf 100644 --- a/python/extractor/semmle/traverser.py +++ b/python/extractor/semmle/traverser.py @@ -83,11 +83,10 @@ def _treewalk(self, path): self.logger.debug("Ignoring %s (symlink)", fullpath) continue if isdir(fullpath): - if fullpath in self.exclude_paths or is_hidden(fullpath): - if is_hidden(fullpath): - self.logger.debug("Ignoring %s (hidden)", fullpath) - else: - self.logger.debug("Ignoring %s (excluded)", fullpath) + if fullpath in self.exclude_paths: + self.logger.debug("Ignoring %s (excluded)", fullpath) + elif is_hidden(fullpath): + self.logger.debug("Ignoring %s (hidden)", fullpath) else: empty = True for item in self._treewalk(fullpath): @@ -101,7 +100,12 @@ def _treewalk(self, path): self.logger.debug("Ignoring %s (filter)", fullpath) -if os.name== 'nt': +if os.environ.get("CODEQL_EXTRACTOR_PYTHON_OPTION_SKIP_HIDDEN_DIRECTORIES", "false") == "false": + + def is_hidden(path): + return False + +elif os.name== 'nt': import ctypes def is_hidden(path): From 605f2bff9ccf53b35751a371436d2ee62329b56e Mon Sep 17 00:00:00 2001 From: Taus Date: Fri, 2 May 2025 12:42:23 +0000 Subject: [PATCH 2/4] Python: Add integration test --- .../hidden-files/query-default.expected | 5 ++++ .../hidden-files/query-skipped.expected | 4 ++++ .../hidden-files/query.ql | 3 +++ .../.hidden_dir/visible_file_in_hidden_dir.py | 0 .../hidden-files/repo_dir/.hidden_file.py | 0 .../hidden-files/repo_dir/foo.py | 1 + .../cli-integration-test/hidden-files/test.sh | 24 +++++++++++++++++++ 7 files changed, 37 insertions(+) create mode 100644 python/extractor/cli-integration-test/hidden-files/query-default.expected create mode 100644 python/extractor/cli-integration-test/hidden-files/query-skipped.expected create mode 100644 python/extractor/cli-integration-test/hidden-files/query.ql create mode 100644 python/extractor/cli-integration-test/hidden-files/repo_dir/.hidden_dir/visible_file_in_hidden_dir.py create mode 100644 python/extractor/cli-integration-test/hidden-files/repo_dir/.hidden_file.py create mode 100644 python/extractor/cli-integration-test/hidden-files/repo_dir/foo.py create mode 100755 python/extractor/cli-integration-test/hidden-files/test.sh diff --git a/python/extractor/cli-integration-test/hidden-files/query-default.expected b/python/extractor/cli-integration-test/hidden-files/query-default.expected new file mode 100644 index 000000000000..cc92af624b37 --- /dev/null +++ b/python/extractor/cli-integration-test/hidden-files/query-default.expected @@ -0,0 +1,5 @@ +| name | ++-------------------------------+ +| .hidden_file.py | +| foo.py | +| visible_file_in_hidden_dir.py | diff --git a/python/extractor/cli-integration-test/hidden-files/query-skipped.expected b/python/extractor/cli-integration-test/hidden-files/query-skipped.expected new file mode 100644 index 000000000000..688dbe00d570 --- /dev/null +++ b/python/extractor/cli-integration-test/hidden-files/query-skipped.expected @@ -0,0 +1,4 @@ +| name | ++-----------------+ +| .hidden_file.py | +| foo.py | diff --git a/python/extractor/cli-integration-test/hidden-files/query.ql b/python/extractor/cli-integration-test/hidden-files/query.ql new file mode 100644 index 000000000000..3b1b3c03849b --- /dev/null +++ b/python/extractor/cli-integration-test/hidden-files/query.ql @@ -0,0 +1,3 @@ +import python + +select any(File f).getShortName() as name order by name diff --git a/python/extractor/cli-integration-test/hidden-files/repo_dir/.hidden_dir/visible_file_in_hidden_dir.py b/python/extractor/cli-integration-test/hidden-files/repo_dir/.hidden_dir/visible_file_in_hidden_dir.py new file mode 100644 index 000000000000..e69de29bb2d1 diff --git a/python/extractor/cli-integration-test/hidden-files/repo_dir/.hidden_file.py b/python/extractor/cli-integration-test/hidden-files/repo_dir/.hidden_file.py new file mode 100644 index 000000000000..e69de29bb2d1 diff --git a/python/extractor/cli-integration-test/hidden-files/repo_dir/foo.py b/python/extractor/cli-integration-test/hidden-files/repo_dir/foo.py new file mode 100644 index 000000000000..517b47df53c2 --- /dev/null +++ b/python/extractor/cli-integration-test/hidden-files/repo_dir/foo.py @@ -0,0 +1 @@ +print(42) diff --git a/python/extractor/cli-integration-test/hidden-files/test.sh b/python/extractor/cli-integration-test/hidden-files/test.sh new file mode 100755 index 000000000000..77cb12664af6 --- /dev/null +++ b/python/extractor/cli-integration-test/hidden-files/test.sh @@ -0,0 +1,24 @@ +#!/bin/bash + +set -Eeuo pipefail # see https://vaneyckt.io/posts/safer_bash_scripts_with_set_euxo_pipefail/ + +set -x + +CODEQL=${CODEQL:-codeql} + +SCRIPTDIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && pwd )" +cd "$SCRIPTDIR" + +rm -rf db db-skipped + +# Test 1: Default behavior should be to extract files in hidden directories +$CODEQL database create db --language python --source-root repo_dir/ +$CODEQL query run --database db query.ql > query-default.actual +diff query-default.expected query-default.actual + +# Test 2: Setting the relevant extractor option to true skips files in hidden directories +$CODEQL database create db-skipped --language python --source-root repo_dir/ --extractor-option python.skip_hidden_directories=true +$CODEQL query run --database db-skipped query.ql > query-skipped.actual +diff query-skipped.expected query-skipped.actual + +rm -rf db db-skipped From 67d04d5477065a59f2bc0f706b5af4366b08293b Mon Sep 17 00:00:00 2001 From: Taus Date: Wed, 30 Apr 2025 12:38:31 +0000 Subject: [PATCH 3/4] Python: Add change note --- .../2025-04-30-extract-hidden-files-by-default.md | 5 +++++ 1 file changed, 5 insertions(+) create mode 100644 python/ql/lib/change-notes/2025-04-30-extract-hidden-files-by-default.md diff --git a/python/ql/lib/change-notes/2025-04-30-extract-hidden-files-by-default.md b/python/ql/lib/change-notes/2025-04-30-extract-hidden-files-by-default.md new file mode 100644 index 000000000000..96372513499f --- /dev/null +++ b/python/ql/lib/change-notes/2025-04-30-extract-hidden-files-by-default.md @@ -0,0 +1,5 @@ +--- +category: minorAnalysis +--- + +- The Python extractor now extracts files in hidden directories by default. A new extractor option, `skip_hidden_directories` has been added as well. Setting it to `true` will make the extractor revert to the old behavior. From 2ded42c285151dfceb62597e5e767bfae0586f1c Mon Sep 17 00:00:00 2001 From: Taus Date: Fri, 2 May 2025 13:29:52 +0000 Subject: [PATCH 4/4] Python: Update extractor tests --- python/ql/test/2/extractor-tests/hidden/test.expected | 2 ++ python/ql/test/extractor-tests/filter-option/Test.expected | 1 + 2 files changed, 3 insertions(+) diff --git a/python/ql/test/2/extractor-tests/hidden/test.expected b/python/ql/test/2/extractor-tests/hidden/test.expected index ca72363d8f02..21bd0dfb2dd9 100644 --- a/python/ql/test/2/extractor-tests/hidden/test.expected +++ b/python/ql/test/2/extractor-tests/hidden/test.expected @@ -1,3 +1,5 @@ +| .hidden/inner/test.py | +| .hidden/module.py | | folder/module.py | | package | | package/__init__.py | diff --git a/python/ql/test/extractor-tests/filter-option/Test.expected b/python/ql/test/extractor-tests/filter-option/Test.expected index 7ade39a5998c..56b1e36c2a93 100644 --- a/python/ql/test/extractor-tests/filter-option/Test.expected +++ b/python/ql/test/extractor-tests/filter-option/Test.expected @@ -3,3 +3,4 @@ | Module foo.bar | | Module foo.include_test | | Package foo | +| Script hidden_foo.py |