From 77a080f866d193f2756b5f89476be3c823b7e3c8 Mon Sep 17 00:00:00 2001 From: Tim Swast Date: Wed, 18 Oct 2023 16:36:18 -0500 Subject: [PATCH] chore: remove unneeded scripts, no longer need THIRD_PARTY_NOTICES --- .kokoro/release-nightly.sh | 44 +--- scripts/generate_third_party_notices.py | 332 ------------------------ scripts/update_firebase_docs_site.sh | 105 -------- scripts/update_x20_docs_site.sh | 106 -------- 4 files changed, 1 insertion(+), 586 deletions(-) delete mode 100644 scripts/generate_third_party_notices.py delete mode 100644 scripts/update_firebase_docs_site.sh delete mode 100644 scripts/update_x20_docs_site.sh diff --git a/.kokoro/release-nightly.sh b/.kokoro/release-nightly.sh index 488dbb9e13..5cc1275308 100755 --- a/.kokoro/release-nightly.sh +++ b/.kokoro/release-nightly.sh @@ -63,26 +63,6 @@ export PYTHONUNBUFFERED=1 # Install dependencies, as the following steps depend on it python3.10 -m pip install -e .[all] -# Generate third party notices and include it in the licenses in setup.cfg -# TODO(shobs): Don't include it in the package once vertex colab can pick it -# from elsewhere -THIRD_PARTY_NOTICES_FILE=THIRD_PARTY_NOTICES -python3.10 -m pip install pip-licenses -python3.10 scripts/generate_third_party_notices.py --output-file ${THIRD_PARTY_NOTICES_FILE} -if ! [ -s ${THIRD_PARTY_NOTICES_FILE} ]; then - echo "${THIRD_PARTY_NOTICES_FILE} was generated with zero size" - exit -1 -fi -SETUP_CFG_BKP=`mktemp` -cp -f setup.cfg ${SETUP_CFG_BKP} -cat >> setup.cfg << EOF - -[metadata] -license_files = - LICENSE - ${THIRD_PARTY_NOTICES_FILE} -EOF - # Update version string to include git hash and date CURRENT_DATE=$(date '+%Y%m%d') GIT_HASH=$(git rev-parse --short HEAD) @@ -101,33 +81,13 @@ if [ $num_wheel_files -ne 1 ] ; then exit -1 fi -# Make sure the wheel file has the third party notices included -# TODO(shobs): An utimate validation would be to create a virtual environment -# and install the wheel file, then verify that -# site-packages/bigframes-*.dist-info/ includes third party notices -python3.10 -c " -from zipfile import ZipFile -with ZipFile('$VERSION_WHEEL') as myzip: - third_party_licenses_info = [ - info - for info in myzip.infolist() - if info.filename.endswith('.dist-info/${THIRD_PARTY_NOTICES_FILE}') - ] - assert ( - len(third_party_licenses_info) == 1 - ), f'Found {len(third_party_licenses_info)} third party licenses' - assert ( - third_party_licenses_info[0].file_size > 0 - ), 'Package contains third party license of size 0' -" - # Create a copy of the wheel with a well known, version agnostic name LATEST_WHEEL=dist/bigframes-latest-py2.py3-none-any.whl cp $VERSION_WHEEL $LATEST_WHEEL cp dist/bigframes-*.tar.gz dist/bigframes-latest.tar.gz if ! [ ${DRY_RUN} ]; then - for gcs_path in gs://vertex_sdk_private_releases/bigframe/ \ +for gcs_path in gs://vertex_sdk_private_releases/bigframe/ \ gs://dl-platform-colab/bigframes/ \ gs://bigframes-wheels/; do @@ -155,8 +115,6 @@ fi # the changes were made but before this cleanup, because the script would # terminate with the failure itself. See if we can ensure the cleanup. sed -i -e "s/$RELEASE_VERSION/$BIGFRAMES_VERSION/g" bigframes/version.py -mv -f ${SETUP_CFG_BKP} setup.cfg -rm -f ${THIRD_PARTY_NOTICES_FILE} if ! [ ${DRY_RUN} ]; then # Copy docs and wheels to Google Drive diff --git a/scripts/generate_third_party_notices.py b/scripts/generate_third_party_notices.py deleted file mode 100644 index 7040bb2e5f..0000000000 --- a/scripts/generate_third_party_notices.py +++ /dev/null @@ -1,332 +0,0 @@ -# Copyright 2023 Google LLC -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import argparse -import glob -import importlib.metadata -import json -import os.path -import re -import sys - -import piplicenses -import requests - -DEPENDENCY_INFO_SEPARATOR = "*" * 80 + "\n" -PACKAGE_NAME_EXTRACTOR = re.compile("^[a-zA-Z0-9._-]+") - -# These packages don't have LICENSE files distributed in their packages, -# but we have manually confirmed they have a compatible license and -# included it manually in our `third_party` directory. -# -# TODO(swast): We can remove this workaround once these packages bundle the -# license file. -# -# ipython-genutils and recommonmark are both in an archived state with no likely updates in the future -# -# Tracking issues: -# * https://github.com/grpc/grpc/issues/33557 -# * https://github.com/gsnedders/python-webencodings/issues/33 -# * https://github.com/pickleshare/pickleshare/issues/34 -DIRECT_LICENSE_MAPPINGS = { - "grpcio-status": "https://raw.githubusercontent.com/grpc/grpc/master/LICENSE", - "webencodings": "https://raw.githubusercontent.com/gsnedders/python-webencodings/master/LICENSE", - "ipython-genutils": "https://raw.githubusercontent.com/ipython/ipython_genutils/master/COPYING.md", - "pickleshare": "https://raw.githubusercontent.com/pickleshare/pickleshare/master/LICENSE", - "recommonmark": "https://raw.githubusercontent.com/readthedocs/recommonmark/master/license.md", -} - - -def get_package_dependencies(pkg_name): - """Get all package dependencies for a given package, both required and optional.""" - packages = set() - requirements = importlib.metadata.requires(pkg_name) - if requirements: - for req in requirements: - match = PACKAGE_NAME_EXTRACTOR.match(req) - assert match, f"Could not parse {req} for package name" - packages.add(match.group(0)) - return packages - - -# Inspired by third_party/colab/cleanup_filesets.py -def find_dependencies( - roots: set[str], ignore_missing_metadata=False -) -> dict[str, dict[str, set[str]]]: - """Return the transitive dependencies of a set of packages. - Args: - roots: List of package names, e.g. ["pkg1", "pkg2"] - Returns: - A dictionary of dependencies, e.g. - { - "pkg3" : { - "Requires" : set(["pkg4", "pkg5", "pkg6"]), - "RequiredBy": set(["pkg1"]) - }, - "pkg4" : { - "Requires" : set([]), - "RequiredBy": set(["pkg3"]) - }, - ... - } - """ - hops = set() - visited = set() - deps: dict[str, dict[str, set[str]]] = dict() - - # Initialize the start of the graph walk - for root in roots: - # Get the normalized package name - try: - pkg = importlib.metadata.metadata(root) - except importlib.metadata.PackageNotFoundError: - if not ignore_missing_metadata: - raise - continue - hops.add(pkg["Name"]) - - # Start the graph walk - while True: - if not hops: - break - hop = hops.pop() - if hop in visited: - continue - visited.add(hop) - - for dep in get_package_dependencies(hop): - # Get the normalized package name - try: - req_pkg = importlib.metadata.metadata(dep) - except importlib.metadata.PackageNotFoundError: - if not ignore_missing_metadata: - raise - continue - dep = req_pkg["Name"] - - # Create outgoing edge only for non root packages, for which an - # entry must have been created in the deps dictionary when we - # saw the package for the first time during the graph walk - if hop in deps: - deps[hop]["Requires"].add(dep) - - if dep in deps: - # We have already seen this requirement in the graph walk. - # Just update the incoming dependency and carry on. - deps[dep]["RequiredBy"].add(hop) - else: - # This is the first time we came across this requirement. - # Create a new entry with the incoming dependency. - deps[dep] = {"RequiredBy": {hop}, "Requires": set()} - - # Put it in the next hops for further graph traversal - hops.add(dep) - - return deps - - -def get_metadata_and_filename( - package_name: str, - metadata_name: str, - metadata_file: str, - metadata_text: str, - ignore_missing=True, -) -> tuple[str, str] | None: - """Get package metadata and corresponsing file name.""" - - # Check metadata file - metadata_filepath_known = metadata_file != piplicenses.LICENSE_UNKNOWN - if not metadata_filepath_known and not ignore_missing: - raise ValueError(f"No {metadata_name} file found for {package_name}") - - # Check metadata text - if metadata_text != piplicenses.LICENSE_UNKNOWN: - output_filename = metadata_name - if metadata_filepath_known: - output_filename = os.path.basename(metadata_file) - if not output_filename: - raise ValueError( - f"Need a file name to write {metadata_name} text for {package_name}." - ) - return metadata_text, output_filename - elif not ignore_missing: - raise ValueError(f"No {metadata_name} text found for {package_name}") - - return None - - -def fetch_license_and_notice_metadata(packages: list[str]): - """Fetch metadata including license and notice for given packages. - Returns a json object. - """ - parser = piplicenses.create_parser() - args = parser.parse_args( - [ - "--format", - "json", - "--with-license-file", - "--with-notice-file", - "--with-urls", - "--with-description", - "--packages", - *packages, - ] - ) - output_str = piplicenses.create_output_string(args) - metadatas = json.loads(output_str) - return metadatas - - -def write_lines_without_trailing_spaces(file, text: str, key: str): - """Write text lines to a file without the trailing spaces. - This will stop complaints by the trailing-whitespace pre-commit hook.""" - text = "\n".join([line.rstrip() for line in text.split("\n")]) - file.write(f"{key}:\n{text}\n") - - -def write_metadata_to_file( - file, metadata, with_version=False, requires_packages=[], packages_required_by=[] -): - """Write package metadata to a file object.""" - file.write(DEPENDENCY_INFO_SEPARATOR) - - info_keys = ["Name"] - if with_version: - info_keys.append("Version") - info_keys.extend(["License", "URL"]) - file.writelines([f"{key}: {metadata[key]}\n" for key in info_keys]) - - if requires_packages: - file.write(f"Requires: {', '.join(sorted(requires_packages))}\n") - - if packages_required_by: - file.write(f"Required By: {', '.join(sorted(packages_required_by))}\n") - - # Try to generate third party license - - license_info = get_metadata_and_filename( - metadata["Name"], - "LICENSE", - metadata["LicenseFile"], - metadata["LicenseText"], - ignore_missing=metadata["Name"] in DIRECT_LICENSE_MAPPINGS, - ) - - license_text = "" - if license_info: - license_text = license_info[0] - else: - license_text_response = requests.get(DIRECT_LICENSE_MAPPINGS[metadata["Name"]]) - license_text = license_text_response.text - - write_lines_without_trailing_spaces(file, license_text, "License") - - # Try to generate third party notice - notice_info = get_metadata_and_filename( - metadata["Name"], - "NOTICE", - metadata["NoticeFile"], - metadata["NoticeText"], - ignore_missing=True, - ) - - if notice_info: - write_lines_without_trailing_spaces(file, notice_info[0], "Notice") - - file.write(DEPENDENCY_INFO_SEPARATOR) - - -def write_third_party_vendored_license(file, path): - """Write license of a vendored third party library to notices file.""" - file.write(DEPENDENCY_INFO_SEPARATOR) - file.write(f"Vendored Code: {os.path.dirname(path)}\n") - notice_key = f"Notice ({os.path.basename(path)})" - write_lines_without_trailing_spaces(file, open(path).read(), notice_key) - file.write(DEPENDENCY_INFO_SEPARATOR) - - -if __name__ == "__main__": - parser = argparse.ArgumentParser( - description="Generate third party notices for bigframes dependencies." - ) - parser.add_argument( - "--with-version", - action="store_true", - default=False, - help="Include the version information for each package.", - ) - parser.add_argument( - "--with-requires", - action="store_true", - default=False, - help="Include for each package the packages it requires.", - ) - parser.add_argument( - "--with-required-by", - action="store_true", - default=False, - help="Include for each package the packages that require it.", - ) - parser.add_argument( - "--output-file", - action="store", - default="THIRD_PARTY_NOTICES", - help="The output file to write third party notices in.", - ) - args = parser.parse_args(sys.argv[1:]) - - # Initialize the root package - roots = {"bigframes"} - - # Find dependencies - # Let's ignore the packages that are not installed assuming they are - # just the optional dependencies that bigframes does not require. - # One example is the dependency path bigframes -> SQLAlchemy -> pg8000, - # where pg8000 is only an optional dependency for SQLAlchemy which bigframes - # is not depending on - # https://github.com/sqlalchemy/sqlalchemy/blob/7bc81947e22dc32368b0c49a41c398cd251d94af/setup.cfg#LL62C21-L62C27 - deps = find_dependencies(roots, ignore_missing_metadata=True) - - # Use third party solution to fetch dependency metadata - deps_metadata = fetch_license_and_notice_metadata(list(deps)) - deps_metadata = sorted(deps_metadata, key=lambda m: m["Name"]) - - # Write the file - with open(args.output_file, "w") as f: - # Generate third party metadata for each dependency - for metadata in deps_metadata: - dep = deps[metadata["Name"]] - write_metadata_to_file( - f, - metadata, - args.with_version, - dep["Requires"] if args.with_requires else [], - dep["RequiredBy"] if args.with_required_by else [], - ) - - # Generate third party vendored notices - notices = set() - for filename in [ - "LICENCE", - "LICENCE.txt", - "LICENSE", - "LICENSE.txt", - "NOTICE", - "NOTICE.txt", - "COPYING", - "COPYING.txt", - ]: - notices.update(glob.glob(f"third_party/bigframes_vendored/*/{filename}")) - for path in sorted(notices): - write_third_party_vendored_license(f, path) diff --git a/scripts/update_firebase_docs_site.sh b/scripts/update_firebase_docs_site.sh deleted file mode 100644 index f0ef866c90..0000000000 --- a/scripts/update_firebase_docs_site.sh +++ /dev/null @@ -1,105 +0,0 @@ -#!/bin/bash -# Copyright 2023 Google LLC -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -set -e -o pipefail -set -x - -# Replace the docs version by the desired commit -BIGFRAMES_DOCS_VERSION=8ce2a3e - -BIGFRAMES_FIREBASE_PROJECT=bigframes-dev-d4d9a -BIGFRAMES_DOCS_GCS_BUCKET=bigframes-docs - -BIGFRAMES_DOCS_DIR=`mktemp -d` - - -# Install firebase if not already installed -if ! which firebase; then - npm install -g firebase-tools -fi - -# Prepare a working directory for firebase -mkdir -p ${BIGFRAMES_DOCS_DIR} -pushd ${BIGFRAMES_DOCS_DIR} - -# Copy the bigframes version -if [ ! -d ${BIGFRAMES_DOCS_VERSION} ]; then - gsutil -m cp -r gs://${BIGFRAMES_DOCS_GCS_BUCKET}/${BIGFRAMES_DOCS_VERSION} . -fi - -rm -f latest -ln -s ${BIGFRAMES_DOCS_VERSION} latest - -# Set up firebase -firebase login --no-localhost -firebase init hosting - -versions="${BIGFRAMES_DOCS_VERSION} latest" -for version in ${versions}; do - site_name=bigframes-docs-${version} - if ! firebase hosting:sites:list | grep ${site_name}; then - firebase hosting:sites:create ${site_name} - fi - - firebase target:apply hosting ${version} ${site_name} -done - -# Make sure the firebase json config is consistent with ${versions} -# TODO(shobs): Come up with a better way of updating the config than -# a hard overwrite -cat > firebase.json << EOF -{ - "hosting": [ - { - "target": "latest", - "public": "latest", - "ignore": [ - "firebase.json", - "**/.*", - "**/node_modules/**" - ] - } - , - { - "target": "${BIGFRAMES_DOCS_VERSION}", - "public": "${BIGFRAMES_DOCS_VERSION}", - "ignore": [ - "firebase.json", - "**/.*", - "**/node_modules/**" - ] - } - ] -} -EOF - -# Verify that the intended sites look good -for version in ${versions}; do - echo "Preview the local hosting of the docs site \"${version}\" before actually deploying (Press Ctrl+C to stop)" - firebase serve --only hosting:${version} -done - -echo -n "Go ahead and deploy? [y/N]: " -read deploy_consent - -# Deploy the sites -if [ "$deploy_consent" = y ]; then - echo "Deploying ..." - firebase deploy --only hosting -else - echo "Not Deploying anything." -fi - -popd diff --git a/scripts/update_x20_docs_site.sh b/scripts/update_x20_docs_site.sh deleted file mode 100644 index 31da116bdd..0000000000 --- a/scripts/update_x20_docs_site.sh +++ /dev/null @@ -1,106 +0,0 @@ -#!/bin/bash -# Copyright 2020 Google LLC -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# https://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -################################################################################ -# This script uses corp credentials to sync the files to x20. -# Make sure you: -# 1. Have write permission to /x20/teams/bigframes-swe -# 2. Have run `gcert` before running the script -################################################################################ - -set -e -o pipefail -set -x - -GIT_DOCS_DIR=docs/_build/html/ -X20_BIGFRAMES_DIR=/x20/teams/bigframes-swe/bigframes/docs -MAX_BACKFILL=10 -REQ_PYTHON_VERSION=3.9 - -# Create a temporary directory -tempdir=`mktemp --directory --tmpdir bigframes.XXXXXXXXXX` - -# Clone the repository -git clone sso://team/bigquery-query-swe/bigframes ${tempdir} - -# Enter the temporary bigframes directory -pushd ${tempdir} - -# Python version 3.9 is required to build bigframes docs, install if not present -if ! python3 --version | grep ${REQ_PYTHON_VERSION}; then - # Install pyenv to install the required python version - ## https://github.com/pyenv/pyenv#basic-github-checkout - git clone https://github.com/pyenv/pyenv.git .pyenv - pushd .pyenv && src/configure && make -C src && popd - - ## https://github.com/pyenv/pyenv#set-up-your-shell-environment-for-pyenv - export PYENV_ROOT=${PWD}/.pyenv - PATH=${PYENV_ROOT}/bin:${PATH} - eval "$(pyenv init -)" - - ## Install the required python version - pyenv install ${REQ_PYTHON_VERSION} - - ## Make the required python version available - pyenv global ${REQ_PYTHON_VERSION} -fi - -# Create a virtual environment with nox installed -python3 -m venv venv -source venv/bin/activate -pip install nox - -# i = 0 means docs for the latest version, and i = 1 onwards means backfill -for i in `seq 0 ${MAX_BACKFILL}`; do - # If it is backfill turn, back off the version by 1 - if [ ${i} -ne 0 ]; then - git reset --hard HEAD~1 - - # Clean up any old docs - rm -rf ${GIT_DOCS_DIR} - fi - - # Construct a docs path in x20 - commit_hash=`git rev-parse --short HEAD` - x20_docs_dir_commit=${X20_BIGFRAMES_DIR}/${commit_hash} - - # If the x20 docs path already exists, let's assume that it was created - # properly in the previous attempt - if fileutil test -d ${x20_docs_dir_commit}; then - echo ${x20_docs_dir_commit} exists, skipping rebuilding it.. - continue - fi - - # Build the docs - echo Building docs for commit ${commit_hash}.. - nox -s docs - - # TODO(shobs): Check if a symlink can be created instead of another copy of - # the latest commit's docs, using fileutil CLI or otherwise - x20_docs_dirs=${x20_docs_dir_commit} - if [ ${i} -eq 0 ]; then - x20_docs_dirs="${x20_docs_dirs} ${X20_BIGFRAMES_DIR}/latest" - fi - - for x20_docs_dir in ${x20_docs_dirs}; do - fileutil mirror -parallelism=4 -force ${GIT_DOCS_DIR} ${x20_docs_dir} - x20_own request_change --recursive --path=${x20_docs_dir} --new_mode=a+r - done -done - -# Exit the temporary bigframes directory -popd - -# Clean up the temporary bigframes directory -rm -rf ${tempdir}