From 56ae1fd3e8573dfc7f91958a3a2230d2e702108e Mon Sep 17 00:00:00 2001 From: Ryunosuke Sato Date: Fri, 6 Jan 2023 14:12:33 +0900 Subject: [PATCH 01/12] Fix example code in README - A method that has `?` suffix should return boolean, but this method return an object that respond to `valid?`. - All other examples don't have `?`. --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index b64675e..f0bbaa9 100644 --- a/README.md +++ b/README.md @@ -12,7 +12,7 @@ require "scientist" class MyWidget def allows?(user) experiment = Scientist::Default.new "widget-permissions" - experiment.use { model.check_user?(user).valid? } # old way + experiment.use { model.check_user(user).valid? } # old way experiment.try { user.can?(:read, model) } # new way experiment.run From a28dc4865a6ea5910664d849b6bc6a6510755576 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Mon, 11 Sep 2023 02:49:01 +0000 Subject: [PATCH 02/12] Bump actions/checkout from 3 to 4 Bumps [actions/checkout](https://github.com/actions/checkout) from 3 to 4. - [Release notes](https://github.com/actions/checkout/releases) - [Changelog](https://github.com/actions/checkout/blob/main/CHANGELOG.md) - [Commits](https://github.com/actions/checkout/compare/v3...v4) --- updated-dependencies: - dependency-name: actions/checkout dependency-type: direct:production update-type: version-update:semver-major ... Signed-off-by: dependabot[bot] --- .github/workflows/ci.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 095758d..aa936d1 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -25,7 +25,7 @@ jobs: - "3.1" - "3.2" steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@v4 - uses: ruby/setup-ruby@v1 with: ruby-version: ${{ matrix.ruby_version }} From dbc99ecf31834c979ca35a20edd9ccbd64dc0d57 Mon Sep 17 00:00:00 2001 From: Erwin Kroon <123574+ekroon@users.noreply.github.com> Date: Thu, 23 Nov 2023 09:02:24 +0000 Subject: [PATCH 03/12] Explain clean vs compare in README --- README.md | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/README.md b/README.md index b64675e..7d15a20 100644 --- a/README.md +++ b/README.md @@ -241,6 +241,19 @@ end Note that the `#clean` method will discard the previous cleaner block if you call it again. If for some reason you need to access the currently configured cleaner block, `Scientist::Experiment#cleaner` will return the block without further ado. _(This probably won't come up in normal usage, but comes in handy if you're writing, say, a custom experiment runner that provides default cleaners.)_ +The `#clean` method will not be used for comparison of the results, so in the following example it is not possible to remove the `#compare` method without the experiment failing: + +```ruby +def user_ids + science "user_ids" do + e.use { [1,2,3] } + e.try { [1,3,2] } + e.clean { |value| value.sort } + e.compare { |a, b| a.sort == b.sort } + end +end +``` + ### Ignoring mismatches During the early stages of an experiment, it's possible that some of your code will always generate a mismatch for reasons you know and understand but haven't yet fixed. Instead of these known cases always showing up as mismatches in your metrics or analysis, you can tell an experiment whether or not to ignore a mismatch using the `ignore` method. You may include more than one block if needed: From 2c297abea0d8a398d1884a241daf7e19c6b85fab Mon Sep 17 00:00:00 2001 From: Ryan Buckley Date: Tue, 26 Mar 2024 10:32:14 -0700 Subject: [PATCH 04/12] Fix bug with marshalling experiments --- lib/scientist/experiment.rb | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/scientist/experiment.rb b/lib/scientist/experiment.rb index 7512c0b..591b3a0 100644 --- a/lib/scientist/experiment.rb +++ b/lib/scientist/experiment.rb @@ -345,7 +345,7 @@ def marshal_dump [@name, @result, @raise_on_mismatches] end - def marshal_load + def marshal_load(array) @name, @result, @raise_on_mismatches = array end end From b98d3f8ae36d3ecf257a0fea8a2b11d5d4feb747 Mon Sep 17 00:00:00 2001 From: Ryan Buckley Date: Tue, 26 Mar 2024 10:39:48 -0700 Subject: [PATCH 05/12] Add test for marshal load --- test/scientist/experiment_test.rb | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/test/scientist/experiment_test.rb b/test/scientist/experiment_test.rb index 1452d9c..558a239 100644 --- a/test/scientist/experiment_test.rb +++ b/test/scientist/experiment_test.rb @@ -512,6 +512,10 @@ def @ex.raised(op, exception) assert_kind_of(String, Marshal.dump(mismatch)) end + it "can be marshal loaded" do + assert_kind_of(Fake, Marshal.load(Marshal.dump(@ex)) + end + describe "#raise_on_mismatches?" do it "raises when there is a mismatch if the experiment instance's raise on mismatches is enabled" do Fake.raise_on_mismatches = false From 2c4c6e929be2a1619d329e8b3bf8cda4c84cad07 Mon Sep 17 00:00:00 2001 From: Ryan Buckley Date: Tue, 26 Mar 2024 10:42:20 -0700 Subject: [PATCH 06/12] Fix syntax error --- test/scientist/experiment_test.rb | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/scientist/experiment_test.rb b/test/scientist/experiment_test.rb index 558a239..e155043 100644 --- a/test/scientist/experiment_test.rb +++ b/test/scientist/experiment_test.rb @@ -513,7 +513,7 @@ def @ex.raised(op, exception) end it "can be marshal loaded" do - assert_kind_of(Fake, Marshal.load(Marshal.dump(@ex)) + assert_kind_of(Fake, Marshal.load(Marshal.dump(@ex))) end describe "#raise_on_mismatches?" do From 6bda93608b78b030a1c346093a51f34460c5acda Mon Sep 17 00:00:00 2001 From: Toshimaru Date: Wed, 19 Jun 2024 11:51:16 +0900 Subject: [PATCH 07/12] Ci against Ruby v3.3 --- .github/workflows/ci.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index aa936d1..0930434 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -24,6 +24,7 @@ jobs: - "3.0" - "3.1" - "3.2" + - "3.3" steps: - uses: actions/checkout@v4 - uses: ruby/setup-ruby@v1 From 1987656d93349b7af2b361b908f8afe69c5dd2e4 Mon Sep 17 00:00:00 2001 From: Elena Tanasoiu Date: Tue, 10 Dec 2024 17:23:14 +0000 Subject: [PATCH 08/12] Report CPU Time alongside Wall Time We'd like to be able to quantify CPU time consumed while executing the candidate and control blocks during science experiments. Changes include: - Added `cpu_time` attribute to `Scientist::Observation`. - Refactored initialization to capture both start/end times for wall time and CPU time. - Updated tests to include CPU-intensive work so we can verify we're recording CPU time correctly. This allows us to track cpu time more precisely when making improvements. --- lib/scientist/observation.rb | 25 ++++++++++++++++++++++--- test/scientist/experiment_test.rb | 30 +++++++++++++++++++++++++----- test/scientist/observation_test.rb | 7 ++++++- 3 files changed, 53 insertions(+), 9 deletions(-) diff --git a/lib/scientist/observation.rb b/lib/scientist/observation.rb index eb55115..df948e0 100644 --- a/lib/scientist/observation.rb +++ b/lib/scientist/observation.rb @@ -20,19 +20,29 @@ class Scientist::Observation # The Float seconds elapsed. attr_reader :duration + # The Float CPU time elapsed, in seconds + attr_reader :cpu_time + def initialize(name, experiment, fabricated_duration: nil, &block) @name = name @experiment = experiment - starting = Process.clock_gettime(Process::CLOCK_MONOTONIC, :float_second) unless fabricated_duration + start_wall_time, start_cpu_time = capture_times unless fabricated_duration + begin @value = block.call rescue *RESCUES => e @exception = e end - @duration = fabricated_duration || - Process.clock_gettime(Process::CLOCK_MONOTONIC, :float_second) - starting + if fabricated_duration + @duration = fabricated_duration["duration"] + @cpu_time = fabricated_duration["cpu_time"] + else + end_wall_time, end_cpu_time = capture_times + @duration = end_wall_time - start_wall_time + @cpu_time = end_cpu_time - start_cpu_time + end freeze end @@ -89,4 +99,13 @@ def hash def raised? !exception.nil? end + + private + + def capture_times + [ + Process.clock_gettime(Process::CLOCK_MONOTONIC, :float_second), + Process.clock_gettime(Process::CLOCK_PROCESS_CPUTIME_ID, :float_second) + ] + end end diff --git a/test/scientist/experiment_test.rb b/test/scientist/experiment_test.rb index e155043..e9468e7 100644 --- a/test/scientist/experiment_test.rb +++ b/test/scientist/experiment_test.rb @@ -674,27 +674,47 @@ def @ex.enabled? it "allows a user to provide fabricated durations for testing purposes" do @ex.use { true } @ex.try { true } - @ex.fabricate_durations_for_testing_purposes( "control" => 0.5, "candidate" => 1.0 ) - + @ex.fabricate_durations_for_testing_purposes({ + "control" => { "duration" => 0.5, "cpu_time" => 0.4 }, + "candidate" => { "duration" => 1.0, "cpu_time" => 0.9 } + }) @ex.run cont = @ex.published_result.control cand = @ex.published_result.candidates.first + + # Wall Time assert_in_delta 0.5, cont.duration, 0.01 assert_in_delta 1.0, cand.duration, 0.01 + + # CPU Time + assert_equal 0.4, cont.cpu_time + assert_equal 0.9, cand.cpu_time end it "returns actual durations if fabricated ones are omitted for some blocks" do @ex.use { true } - @ex.try { sleep 0.1; true } - @ex.fabricate_durations_for_testing_purposes( "control" => 0.5 ) - + @ex.try do + start_time = Time.now + while Time.now - start_time < 0.1 + # Perform some CPU-intensive work + (1..1000).each { |i| i * i } + end + true + end + @ex.fabricate_durations_for_testing_purposes({ "control" => { "duration" => 0.5, "cpu_time" => 0.4 }}) @ex.run cont = @ex.published_result.control cand = @ex.published_result.candidates.first + + # Fabricated durations assert_in_delta 0.5, cont.duration, 0.01 + assert_in_delta 0.4, cont.cpu_time, 0.01 + + # Measured durations assert_in_delta 0.1, cand.duration, 0.01 + assert_in_delta 0.1, cand.cpu_time, 0.01 end end end diff --git a/test/scientist/observation_test.rb b/test/scientist/observation_test.rb index b788663..79455d3 100644 --- a/test/scientist/observation_test.rb +++ b/test/scientist/observation_test.rb @@ -6,13 +6,18 @@ it "observes and records the execution of a block" do ob = Scientist::Observation.new("test", @experiment) do - sleep 0.1 + start_time = Time.now + while Time.now - start_time < 0.1 + # Perform some CPU-intensive work + (1..1000).each { |i| i * i } + end "ret" end assert_equal "ret", ob.value refute ob.raised? assert_in_delta 0.1, ob.duration, 0.01 + assert_in_delta 0.1, ob.cpu_time, 0.01 end it "stashes exceptions" do From cb642e8b4c7880af813b2ae15250b8c512c74086 Mon Sep 17 00:00:00 2001 From: Elena Tanasoiu Date: Tue, 10 Dec 2024 17:32:54 +0000 Subject: [PATCH 09/12] Update README to mention CPU time tracking --- README.md | 20 ++++++++++++++++---- 1 file changed, 16 insertions(+), 4 deletions(-) diff --git a/README.md b/README.md index fc264ec..1914cd9 100644 --- a/README.md +++ b/README.md @@ -24,7 +24,7 @@ Wrap a `use` block around the code's original behavior, and wrap `try` around th * It decides whether or not to run the `try` block, * Randomizes the order in which `use` and `try` blocks are run, -* Measures the durations of all behaviors in seconds, +* Measures the wall time and cpu time of all behaviors in seconds, * Compares the result of `try` to the result of `use`, * Swallow and record exceptions raised in the `try` block when overriding `raised`, and * Publishes all this information. @@ -334,11 +334,18 @@ class MyExperiment def publish(result) + # Wall time # Store the timing for the control value, $statsd.timing "science.#{name}.control", result.control.duration # for the candidate (only the first, see "Breaking the rules" below, $statsd.timing "science.#{name}.candidate", result.candidates.first.duration + # CPU time + # Store the timing for the control value, + $statsd.timing "science.cpu.#{name}.control", result.control.cpu_time + # for the candidate (only the first, see "Breaking the rules" below, + $statsd.timing "science.cpu.#{name}.candidate", result.candidates.first.cpu_time + # and counts for match/ignore/mismatch: if result.matched? $statsd.increment "science.#{name}.matched" @@ -543,17 +550,22 @@ end #### Providing fake timing data -If you're writing tests that depend on specific timing values, you can provide canned durations using the `fabricate_durations_for_testing_purposes` method, and Scientist will report these in `Scientist::Observation#duration` instead of the actual execution times. +If you're writing tests that depend on specific timing values, you can provide canned durations using the `fabricate_durations_for_testing_purposes` method, and Scientist will report these in `Scientist::Observation#duration` and `Scientist::Observation#cpu_time` instead of the actual execution times. ```ruby science "absolutely-nothing-suspicious-happening-here" do |e| e.use { ... } # "control" e.try { ... } # "candidate" - e.fabricate_durations_for_testing_purposes( "control" => 1.0, "candidate" => 0.5 ) + e.fabricate_durations_for_testing_purposes({ + "control" => { "duration" => 1.0, "cpu_time" => 0.9 }, + "candidate" => { "duration" => 0.5, "cpu_time" => 0.4 } + }) end ``` -`fabricate_durations_for_testing_purposes` takes a Hash of duration values, keyed by behavior names. (By default, Scientist uses `"control"` and `"candidate"`, but if you override these as shown in [Trying more than one thing](#trying-more-than-one-thing) or [No control, just candidates](#no-control-just-candidates), use matching names here.) If a name is not provided, the actual execution time will be reported instead. +`fabricate_durations_for_testing_purposes` takes a Hash of duration & cpu_time values, keyed by behavior names. (By default, Scientist uses `"control"` and `"candidate"`, but if you override these as shown in [Trying more than one thing](#trying-more-than-one-thing) or [No control, just candidates](#no-control-just-candidates), use matching names here.) If a name is not provided, the actual execution time will be reported instead. + +We should mention these durations will be used both for the `duration` field and the `cpu_time` field. _Like `Scientist::Experiment#cleaner`, this probably won't come up in normal usage. It's here to make it easier to test code that extends Scientist._ From b2311bcd7587a07d4671eb6271f7961837d404b8 Mon Sep 17 00:00:00 2001 From: Elena Tanasoiu Date: Fri, 13 Dec 2024 16:50:22 +0000 Subject: [PATCH 10/12] Make the change backwards-compatible This supports both the old version (single value) and new version (hash-based). I've added back the tests from the old version to check that the code stills works when we provide a single value. I've also adapted the README a bit to explain both versions. --- README.md | 24 +++++++++++++++++++++--- lib/scientist/observation.rb | 5 ++++- test/scientist/experiment_test.rb | 30 ++++++++++++++++++++++++++++-- 3 files changed, 53 insertions(+), 6 deletions(-) diff --git a/README.md b/README.md index 1914cd9..e01e8ec 100644 --- a/README.md +++ b/README.md @@ -121,7 +121,7 @@ class MyWidget e.try { UserService.slug_from_login login } # returns String instance or ArgumentError compare_error_message_and_class = -> (control, candidate) do - control.class == candidate.class && + control.class == candidate.class && control.message == candidate.message end @@ -129,7 +129,7 @@ class MyWidget control.class == ArgumentError && candidate.class == ArgumentError && control.message.start_with?("Input has invalid characters") && - candidate.message.start_with?("Invalid characters in input") + candidate.message.start_with?("Invalid characters in input") end e.compare_errors do |control, candidate| @@ -550,7 +550,25 @@ end #### Providing fake timing data -If you're writing tests that depend on specific timing values, you can provide canned durations using the `fabricate_durations_for_testing_purposes` method, and Scientist will report these in `Scientist::Observation#duration` and `Scientist::Observation#cpu_time` instead of the actual execution times. +If you're writing tests that depend on specific timing values, you can provide canned durations using the `fabricate_durations_for_testing_purposes` method. This can be done using either the old version (single value) or the new version (hash-based) to include both duration and cpu_time. + +##### Old version (Single Value) + +In the old version, you can provide a single value for the duration: + +```ruby +science "absolutely-nothing-suspicious-happening-here" do |e| + e.use { ... } # "control" + e.try { ... } # "candidate" + e.fabricate_durations_for_testing_purposes( "control" => 1.0, "candidate" => 0.5 ) +end +``` + +`fabricate_durations_for_testing_purposes` takes a Hash of duration values, keyed by behavior names. (By default, Scientist uses `"control"` and `"candidate"`, but if you override these as shown in [Trying more than one thing](#trying-more-than-one-thing) or [No control, just candidates](#no-control-just-candidates), use matching names here.) If a name is not provided, the actual execution time will be reported instead. + +##### New version (Hash-based) + +Scientist will report these in `Scientist::Observation#duration` and `Scientist::Observation#cpu_time` instead of the actual execution times. ```ruby science "absolutely-nothing-suspicious-happening-here" do |e| diff --git a/lib/scientist/observation.rb b/lib/scientist/observation.rb index df948e0..2b13903 100644 --- a/lib/scientist/observation.rb +++ b/lib/scientist/observation.rb @@ -35,9 +35,12 @@ def initialize(name, experiment, fabricated_duration: nil, &block) @exception = e end - if fabricated_duration + if fabricated_duration.is_a?(Hash) @duration = fabricated_duration["duration"] @cpu_time = fabricated_duration["cpu_time"] + elsif fabricated_duration + @duration = fabricated_duration + @cpu_time = 0.0 # setting a default value else end_wall_time, end_cpu_time = capture_times @duration = end_wall_time - start_wall_time diff --git a/test/scientist/experiment_test.rb b/test/scientist/experiment_test.rb index e9468e7..434e673 100644 --- a/test/scientist/experiment_test.rb +++ b/test/scientist/experiment_test.rb @@ -671,7 +671,20 @@ def @ex.enabled? end describe "testing hooks for extending code" do - it "allows a user to provide fabricated durations for testing purposes" do + it "allows a user to provide fabricated durations for testing purposes (old version)" do + @ex.use { true } + @ex.try { true } + @ex.fabricate_durations_for_testing_purposes( "control" => 0.5, "candidate" => 1.0 ) + + @ex.run + + cont = @ex.published_result.control + cand = @ex.published_result.candidates.first + assert_in_delta 0.5, cont.duration, 0.01 + assert_in_delta 1.0, cand.duration, 0.01 + end + + it "allows a user to provide fabricated durations for testing purposes (new version)" do @ex.use { true } @ex.try { true } @ex.fabricate_durations_for_testing_purposes({ @@ -692,7 +705,20 @@ def @ex.enabled? assert_equal 0.9, cand.cpu_time end - it "returns actual durations if fabricated ones are omitted for some blocks" do + it "returns actual durations if fabricated ones are omitted for some blocks (old version)" do + @ex.use { true } + @ex.try { sleep 0.1; true } + @ex.fabricate_durations_for_testing_purposes( "control" => 0.5 ) + + @ex.run + + cont = @ex.published_result.control + cand = @ex.published_result.candidates.first + assert_in_delta 0.5, cont.duration, 0.01 + assert_in_delta 0.1, cand.duration, 0.01 + end + + it "returns actual durations if fabricated ones are omitted for some blocks (new version)" do @ex.use { true } @ex.try do start_time = Time.now From 4a454123805810bb6b910a03066ace760f6316c6 Mon Sep 17 00:00:00 2001 From: Elena Tanasoiu Date: Fri, 13 Dec 2024 22:52:07 +0000 Subject: [PATCH 11/12] Only explain new version in the README --- README.md | 20 +------------------- 1 file changed, 1 insertion(+), 19 deletions(-) diff --git a/README.md b/README.md index e01e8ec..e076498 100644 --- a/README.md +++ b/README.md @@ -550,25 +550,7 @@ end #### Providing fake timing data -If you're writing tests that depend on specific timing values, you can provide canned durations using the `fabricate_durations_for_testing_purposes` method. This can be done using either the old version (single value) or the new version (hash-based) to include both duration and cpu_time. - -##### Old version (Single Value) - -In the old version, you can provide a single value for the duration: - -```ruby -science "absolutely-nothing-suspicious-happening-here" do |e| - e.use { ... } # "control" - e.try { ... } # "candidate" - e.fabricate_durations_for_testing_purposes( "control" => 1.0, "candidate" => 0.5 ) -end -``` - -`fabricate_durations_for_testing_purposes` takes a Hash of duration values, keyed by behavior names. (By default, Scientist uses `"control"` and `"candidate"`, but if you override these as shown in [Trying more than one thing](#trying-more-than-one-thing) or [No control, just candidates](#no-control-just-candidates), use matching names here.) If a name is not provided, the actual execution time will be reported instead. - -##### New version (Hash-based) - -Scientist will report these in `Scientist::Observation#duration` and `Scientist::Observation#cpu_time` instead of the actual execution times. +If you're writing tests that depend on specific timing values, you can provide canned durations using the `fabricate_durations_for_testing_purposes` method, and Scientist will report these in `Scientist::Observation#duration` and `Scientist::Observation#cpu_time` instead of the actual execution times. ```ruby science "absolutely-nothing-suspicious-happening-here" do |e| From 504a396e987f655a21c6bf2ed57935aadaa40859 Mon Sep 17 00:00:00 2001 From: Nathan Witmer Date: Mon, 16 Dec 2024 09:41:07 -0700 Subject: [PATCH 12/12] update changelog and bump version --- doc/changelog.md | 44 ++++++++++++++++++++++++++++++++++++++++ lib/scientist/version.rb | 2 +- 2 files changed, 45 insertions(+), 1 deletion(-) diff --git a/doc/changelog.md b/doc/changelog.md index c32a272..978fde0 100644 --- a/doc/changelog.md +++ b/doc/changelog.md @@ -1,5 +1,49 @@ # Changes +## v1.6.5 (16 December 2024) + +- New: measure CPU time alongside wall time for experiments #275 + +## v1.6.4 (5 April 2023) + +- New: GitHub Actions for CI #171 +- New: add ruby 3.1 support #175 +- Fix: `compare_errors` in docs #178 +- Fix: remove outdated travis configs #179 +- Fix: typos #191 +- New: add support for `after_run` blocks #211 + +## v1.6.3 (9 December 2021) + +- Fix: improve marshaling implementation #169 + +## v1.6.2 (4 November 2021) + +- New: make `MismatchError` marshalable #168 + +## v1.6.1 (22 October 2021) + +- Fix: moving supported ruby versions from <=2.3 to >=2.6 #150 +- Fix: update docs to explain timeout handling #159 +- New: add support for comparing errors #77 + +## v1.6.0 (8 March 2021) + +- Fix: clarify unit for observations #124 +- New: enable support for truffleruby #143 +- Fix: don't default experiment when included in a module #144 + +## v1.5.0 (8 September 2020) + +- Fix: clearer explanation of exception handling #110 +- Fix: remove unused attribute from `Scientist::Observation` #119 +- New: Added internal extension point for generating experinet results #121 +- New: Add `Scientist::Experiment.register` helper #104 + +## v1.4.0 (20 September 2019) + +- New: Make `MismatchError` a base `Exception` #107 + ## v1.3.0 (2 April 2019) - New: Drop support for ruby <2.3 diff --git a/lib/scientist/version.rb b/lib/scientist/version.rb index 6f44cc0..261500f 100644 --- a/lib/scientist/version.rb +++ b/lib/scientist/version.rb @@ -1,3 +1,3 @@ module Scientist - VERSION = '1.6.4' + VERSION = '1.6.5' end