import sys import os import subprocess from ast import literal_eval from semmle import logging from semmle import traverser from semmle import cmdline from semmle import worker from semmle.util import VERSION, update_analysis_version, get_analysis_major_version from buildtools.version import executable '''The populator generates trap files from a Python project. The populator consists of two parts: a traverser front end which traverses the file system and multiple worker back ends which extract information from the modules. ''' #NOTE: The front-end is simply an iterable of "extractables" and it should be easy to #plug-in new front-ends if needed. def cleanup_sys_path(path): '''Clean up sys.path removing duplicates and current working directory, making it safe for analysis. ''' #Remove duplicates path = [ p for i, p in enumerate(path) if i == 0 or p != path[i-1] ] #Remove curent working directory cwd = os.getcwd() if cwd in path: path.remove(cwd) return path def get_py2_sys_path(logger, py3_sys_path): '''Get the sys.path for Python 2, if it is available. If no Python 2 is available, simply return the Python 3 sys.path. Returns a tuple of the sys.path and a boolean indicating whether Python 2 is available.''' try: command = " ".join(executable(2) + ['-c "import sys; print(sys.path)"']) # We need `shell=True` here in order for the test framework to function correctly. For # whatever reason, the `PATH` variable is ignored if `shell=False`. # Also, this in turn forces us to give the whole command as a string, rather than a list. # Otherwise, the effect is that the Python interpreter is invoked _as a REPL_, rather than # with the given piece of code. output = subprocess.check_output(command, shell=True).decode(sys.getfilesystemencoding()) py2_sys_path = literal_eval(output) # Ensure that the first element of the sys.path is the same as the Python 3 sys.path -- # specifically a reference to our local `tools` directory. This ensures that the `six` stubs # are picked up from there. The item we're overwriting here is '', which would be cleaned up # later anyway. py2_sys_path[0] = py3_sys_path[0] return py2_sys_path, True except (subprocess.CalledProcessError, ValueError, SyntaxError) as e: logger.error("Error while getting Python 2 sys.path:") logger.error(e) logger.info("No Python 2 found. Using Python 3 sys.path.") return py3_sys_path, False def main(sys_path = sys.path[:]): options, args = cmdline.parse(sys.argv[1:]) logger = logging.Logger(options.verbosity, options.colorize) # This is not the prettiest way to do it, but when running tests we want to ensure that the # `--lang` flag influences the analysis version (e.g. so that we include the correct stdlib TRAP # file). So, we change the values of the appropriate variables (which would otherwise be based # on `CODEQL_EXTRACTOR_PYTHON_ANALYSIS_VERSION`), overwriting the previous values. if options.language_version: last_version = options.language_version[-1] update_analysis_version(last_version) found_py2 = False if get_analysis_major_version() == 2 and options.extract_stdlib: # Setup `sys_path` to use the Python 2 standard library sys_path, found_py2 = get_py2_sys_path(logger, sys_path) # use utf-8 as the character encoding for stdout/stderr to be able to properly # log/print things on systems that use bad default encodings (windows). sys.stdout.reconfigure(encoding='utf-8') sys.stderr.reconfigure(encoding='utf-8') sys.setrecursionlimit(2000) sys_path = cleanup_sys_path(sys_path) options.sys_path = sys_path[1:] if sys.version_info.major == 2: logger.error("Extraction using Python 2 is not supported.") logger.warning("To use the Python extractor, please ensure that Python 3 is available on your system.") logger.warning("For more information, see https://codeql.github.com/docs/codeql-overview/system-requirements/#additional-software-requirements") logger.warning("and https://codeql.github.com/docs/codeql-overview/supported-languages-and-frameworks/#languages-and-compilers") logger.close() logging.stop() sys.exit(1) elif found_py2: logger.info("Extraction will use the Python 2 standard library.") else: logger.info("Extraction will use the Python 3 standard library.") logger.info("sys_path is: %s", sys_path) try: the_traverser = traverser.Traverser(options, args, logger) except Exception as ex: logger.error("%s", ex) logger.close() logging.stop() sys.exit(1) run(options, args, the_traverser, logger) def run(options, args, the_traverser, logger: logging.Logger): logger.info("Python version %s", sys.version.split()[0]) logger.info("Python extractor version %s", VERSION) if 'CODEQL_EXTRACTOR_PYTHON_SOURCE_ARCHIVE_DIR' in os.environ: archive = os.environ['CODEQL_EXTRACTOR_PYTHON_SOURCE_ARCHIVE_DIR'] elif 'SOURCE_ARCHIVE' in os.environ: archive = os.environ['SOURCE_ARCHIVE'] else: archive = None trap_dir = cmdline.output_dir_from_options_and_env(options) try: pool = worker.ExtractorPool.from_options(options, trap_dir, archive, logger) except ValueError as ve: logger.error("%s", ve) logger.close() sys.exit(1) try: exitcode = 0 pool.extract(the_traverser) except worker.ExtractorFailure: exitcode = 1 except KeyboardInterrupt: exitcode = 2 logger.info("Keyboard interrupt") except BaseException as ex: exitcode = 3 logger.error("Unexpected exception: %s ", ex) logger.traceback(logging.WARN) finally: if exitcode: logger.debug("Stopping...") pool.stop() else: logger.debug("Writing interpreter trap") pool.close() logger.close() logging.stop() logger.write_message(logging.DEBUG, "Stopped." if exitcode else "Done.") if exitcode: sys.exit(exitcode) if __name__ == "__main__": main()