diff --git a/mypy.ini b/mypy.ini index 7709eb200a..69f0e260cf 100644 --- a/mypy.ini +++ b/mypy.ini @@ -44,3 +44,6 @@ ignore_missing_imports = True [mypy-anywidget] ignore_missing_imports = True + +[mypy-pandas_gbq] +ignore_missing_imports = True diff --git a/scripts/run_and_publish_benchmark.py b/scripts/run_and_publish_benchmark.py index 248322f619..4fdfaf09d6 100644 --- a/scripts/run_and_publish_benchmark.py +++ b/scripts/run_and_publish_benchmark.py @@ -89,6 +89,7 @@ def collect_benchmark_result( bq_seconds_files = sorted(path.rglob("*.bq_exec_time_seconds")) local_seconds_files = sorted(path.rglob("*.local_exec_time_seconds")) query_char_count_files = sorted(path.rglob("*.query_char_count")) + total_rows_files = sorted(path.rglob("*.totalrows")) error_files = sorted(path.rglob("*.error")) @@ -109,6 +110,7 @@ def collect_benchmark_result( ) has_full_metrics = len(bq_seconds_files) == len(local_seconds_files) + has_total_rows = len(total_rows_files) == len(local_seconds_files) for idx in range(len(local_seconds_files)): query_char_count_file = query_char_count_files[idx] @@ -156,6 +158,19 @@ def collect_benchmark_result( lines = file.read().splitlines() bq_seconds = sum(float(line) for line in lines) / iterations + if not has_total_rows: + total_rows = None + else: + total_rows_file = total_rows_files[idx] + if filename != total_rows_file.relative_to(path).with_suffix(""): + raise ValueError( + "File name mismatch among query_char_count, bytes, and total_rows reports." + ) + + with open(total_rows_file, "r") as file: + lines = file.read().splitlines() + total_rows = sum(int(line) for line in lines) / iterations + results_dict[str(filename)] = [ query_count, total_bytes, @@ -163,6 +178,7 @@ def collect_benchmark_result( local_seconds, bq_seconds, query_char_count, + total_rows, ] finally: for files_to_remove in ( @@ -171,6 +187,7 @@ def collect_benchmark_result( path.rglob("*.local_exec_time_seconds"), path.rglob("*.bq_exec_time_seconds"), path.rglob("*.query_char_count"), + path.rglob("*.totalrows"), path.rglob("*.error"), ): for log_file in files_to_remove: @@ -183,6 +200,7 @@ def collect_benchmark_result( "Local_Execution_Time_Sec", "BigQuery_Execution_Time_Sec", "Query_Char_Count", + "Total_Rows", ] benchmark_metrics = pd.DataFrame.from_dict( @@ -206,6 +224,11 @@ def collect_benchmark_result( print( f"{index} - query count: {row['Query_Count']}," + f" query char count: {row['Query_Char_Count']}," + + ( + f" total rows: {row['Total_Rows']}," + if not pd.isna(row["Total_Rows"]) + else "" + ) + f" bytes processed sum: {row['Bytes_Processed']}," + (f" slot millis sum: {row['Slot_Millis']}," if has_full_metrics else "") + f" local execution time: {formatted_local_exec_time} seconds" @@ -234,10 +257,14 @@ def collect_benchmark_result( geometric_mean_bq_seconds = geometric_mean_excluding_zeros( benchmark_metrics["BigQuery_Execution_Time_Sec"] ) + geometric_mean_total_rows = geometric_mean_excluding_zeros( + benchmark_metrics["Total_Rows"] + ) print( f"---Geometric mean of queries: {geometric_mean_queries}," + f" Geometric mean of queries char counts: {geometric_mean_query_char_count}," + + f" Geometric mean of total rows: {geometric_mean_total_rows}," + f" Geometric mean of bytes processed: {geometric_mean_bytes}," + ( f" Geometric mean of slot millis: {geometric_mean_slot_millis}," diff --git a/tests/system/small/test_run_and_publish_benchmark.py b/tests/system/small/test_run_and_publish_benchmark.py new file mode 100644 index 0000000000..df81e390e3 --- /dev/null +++ b/tests/system/small/test_run_and_publish_benchmark.py @@ -0,0 +1,57 @@ +# Copyright 2025 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from pathlib import Path +import sys + +# Add the project root to the Python path to allow for application-specific imports. +sys.path.insert(0, str(Path(__file__).resolve().parents[3])) + +from scripts import run_and_publish_benchmark # noqa: E402 + + +def test_collect_benchmark_result(tmp_path: Path): + """Tests the collect_benchmark_result function. + + This test verifies that the function correctly reads benchmark result + files from a specified directory, processes them, and returns a + pandas DataFrame with the expected data and types. + + Args: + tmp_path (Path): The pytest fixture providing a temporary directory path. + """ + # Arrange: Create dummy log files with benchmark data. + (tmp_path / "benchmark1.bytesprocessed").write_text("100") + (tmp_path / "benchmark1.slotmillis").write_text("1000") + (tmp_path / "benchmark1.bq_exec_time_seconds").write_text("1.0") + (tmp_path / "benchmark1.local_exec_time_seconds").write_text("2.0") + (tmp_path / "benchmark1.query_char_count").write_text("50") + (tmp_path / "benchmark1.totalrows").write_text("10") + + # Act: Collect the benchmark results from the temporary directory. + # The second argument '1' is a placeholder for the number of runs. + df, error_message = run_and_publish_benchmark.collect_benchmark_result( + str(tmp_path), 1 + ) + + # Assert: Verify the contents and structure of the resulting DataFrame. + assert error_message is None, "Expected no error messages." + assert len(df) == 1, "DataFrame should contain exactly one row." + assert df["Benchmark_Name"][0] == "benchmark1" + assert df["Bytes_Processed"][0] == 100 + assert df["Slot_Millis"][0] == 1000 + assert df["BigQuery_Execution_Time_Sec"][0] == 1.0 + assert df["Local_Execution_Time_Sec"][0] == 2.0 + assert df["Query_Char_Count"][0] == 50 + assert df["Total_Rows"][0] == 10