From 7768635dc269a99dc962387c93359efed70b813f Mon Sep 17 00:00:00 2001 From: Devin Howard Date: Tue, 18 Oct 2016 15:07:07 +0800 Subject: [PATCH 1/5] add count_threshold option adds the option to only check duplication if the code is found in more than just two locations. --- README.md | 21 ++++++++++++++++++--- lib/cc/engine/analyzers/analyzer_base.rb | 4 ++++ lib/cc/engine/analyzers/engine_config.rb | 5 +++++ lib/cc/engine/analyzers/javascript/main.rb | 1 + lib/cc/engine/analyzers/php/main.rb | 1 + lib/cc/engine/analyzers/python/main.rb | 1 + lib/cc/engine/analyzers/reporter.rb | 1 + lib/cc/engine/analyzers/ruby/main.rb | 1 + lib/cc/engine/analyzers/violation.rb | 8 ++++---- 9 files changed, 36 insertions(+), 7 deletions(-) diff --git a/README.md b/README.md index b07258ca..8e8af9e8 100644 --- a/README.md +++ b/README.md @@ -40,13 +40,13 @@ engines: This will tell the duplication engine to analyze Ruby and JavaScript files. -### Threshold +### Mass Threshold We set useful threshold defaults for the languages we support but you may want to adjust these settings based on your project guidelines. -The threshold configuration represents the minimum "mass" a code block must have -to be analyzed for duplication. If the engine is too easily reporting +The mass threshold configuration represents the minimum "mass" a code block must +have to be analyzed for duplication. If the engine is too easily reporting duplication, try raising the threshold. If you suspect that the engine isn't catching enough duplication, try lowering the threshold. The best setting tends to differ from language to language. @@ -68,6 +68,20 @@ engines: Note that you have the update the YAML structure under the `languages` key to the Hash type to support extra configuration. +### Count Threshold + +By default, the duplication engine will report code that has been duplicated in just two locations. You can be less strict by only raising a warning if code is duplicated in three or more locations only. To adjust this setting, add a `count_threshold` key to your language config. For instance, to use the default `mass_threshold` for ruby, but to enforce the [Rule of Three][rule-of-three], you could use this configuration: + +```yaml +engines: + duplication: + enabled: true + config: + languages: + ruby: + count_threshold: 3 +``` + ### Excluding files and directories As with any other Code Climate engine, you can exclude certain files or @@ -86,4 +100,5 @@ engines: [what-is-duplication]: https://docs.codeclimate.com/docs/duplication-concept [flay]: https://github.com/seattlerb/flay [cli]: https://github.com/codeclimate/codeclimate +[rule-of-three]: https://en.wikipedia.org/wiki/Rule_of_three_(computer_programming) [exclude-files-engine]: https://docs.codeclimate.com/docs/excluding-files-and-folders#section-exclude-paths-for-specific-engines diff --git a/lib/cc/engine/analyzers/analyzer_base.rb b/lib/cc/engine/analyzers/analyzer_base.rb index 8ec3e5e2..5499a9af 100644 --- a/lib/cc/engine/analyzers/analyzer_base.rb +++ b/lib/cc/engine/analyzers/analyzer_base.rb @@ -43,6 +43,10 @@ def mass_threshold engine_config.mass_threshold_for(self.class::LANGUAGE) || self.class::DEFAULT_MASS_THRESHOLD end + def count_threshold + engine_config.count_threshold_for(self.class::LANGUAGE) || self.class::DEFAULT_COUNT_THRESHOLD + end + def calculate_points(mass) overage = mass - mass_threshold base_points + (overage * points_per_overage) diff --git a/lib/cc/engine/analyzers/engine_config.rb b/lib/cc/engine/analyzers/engine_config.rb index e0d062d1..6d699b11 100644 --- a/lib/cc/engine/analyzers/engine_config.rb +++ b/lib/cc/engine/analyzers/engine_config.rb @@ -30,6 +30,11 @@ def mass_threshold_for(language) end end + def count_threshold_for(language) + threshold = fetch_language(language).fetch("count_threshold", nil) + threshold ? threshold.to_i : threshold + end + def fetch_language(language) language = config. fetch("languages", {}). diff --git a/lib/cc/engine/analyzers/javascript/main.rb b/lib/cc/engine/analyzers/javascript/main.rb index 016fbf89..ffb0dc13 100644 --- a/lib/cc/engine/analyzers/javascript/main.rb +++ b/lib/cc/engine/analyzers/javascript/main.rb @@ -17,6 +17,7 @@ class Main < CC::Engine::Analyzers::Base ].freeze LANGUAGE = "javascript" DEFAULT_MASS_THRESHOLD = 40 + DEFAULT_COUNT_THRESHOLD = 2 POINTS_PER_OVERAGE = 30_000 private diff --git a/lib/cc/engine/analyzers/php/main.rb b/lib/cc/engine/analyzers/php/main.rb index 61e46703..003384ed 100644 --- a/lib/cc/engine/analyzers/php/main.rb +++ b/lib/cc/engine/analyzers/php/main.rb @@ -15,6 +15,7 @@ class Main < CC::Engine::Analyzers::Base "**/*.module", ].freeze DEFAULT_MASS_THRESHOLD = 28 + DEFAULT_COUNT_THRESHOLD = 2 POINTS_PER_OVERAGE = 100_000 private diff --git a/lib/cc/engine/analyzers/python/main.rb b/lib/cc/engine/analyzers/python/main.rb index ca5a5ca9..9d7014f7 100644 --- a/lib/cc/engine/analyzers/python/main.rb +++ b/lib/cc/engine/analyzers/python/main.rb @@ -14,6 +14,7 @@ class Main < CC::Engine::Analyzers::Base PATTERNS = ["**/*.py"] DEFAULT_MASS_THRESHOLD = 32 DEFAULT_PYTHON_VERSION = 2 + DEFAULT_COUNT_THRESHOLD = 2 POINTS_PER_OVERAGE = 50_000 private diff --git a/lib/cc/engine/analyzers/reporter.rb b/lib/cc/engine/analyzers/reporter.rb index 5cd0233b..1c7bee61 100644 --- a/lib/cc/engine/analyzers/reporter.rb +++ b/lib/cc/engine/analyzers/reporter.rb @@ -51,6 +51,7 @@ def report violations = new_violations(issue) violations.each do |violation| + next if (violation.occurrences + 1) < language_strategy.count_threshold debug("Violation name=#{violation.report_name} mass=#{violation.mass}") unless reports.include?(violation.report_name) diff --git a/lib/cc/engine/analyzers/ruby/main.rb b/lib/cc/engine/analyzers/ruby/main.rb index b348db9b..a48e5c4e 100644 --- a/lib/cc/engine/analyzers/ruby/main.rb +++ b/lib/cc/engine/analyzers/ruby/main.rb @@ -17,6 +17,7 @@ class Main < CC::Engine::Analyzers::Base "**/*.gemspec", ].freeze DEFAULT_MASS_THRESHOLD = 18 + DEFAULT_COUNT_THRESHOLD = 2 POINTS_PER_OVERAGE = 100_000 TIMEOUT = 300 diff --git a/lib/cc/engine/analyzers/violation.rb b/lib/cc/engine/analyzers/violation.rb index 17490b23..49dc204d 100644 --- a/lib/cc/engine/analyzers/violation.rb +++ b/lib/cc/engine/analyzers/violation.rb @@ -35,6 +35,10 @@ def mass current_sexp.mass end + def occurrences + other_sexps.count + end + private attr_reader :language_strategy, :other_sexps, :current_sexp @@ -96,10 +100,6 @@ def description description += " (mass = #{mass})" description end - - def occurrences - other_sexps.count - end end end end From 6418c2dfb457264fa71a2914b03d1b915dc36084 Mon Sep 17 00:00:00 2001 From: Devin Howard Date: Fri, 21 Oct 2016 15:36:21 +0800 Subject: [PATCH 2/5] make count_threshold_for less confusing --- lib/cc/engine/analyzers/engine_config.rb | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/lib/cc/engine/analyzers/engine_config.rb b/lib/cc/engine/analyzers/engine_config.rb index 6d699b11..c71182e6 100644 --- a/lib/cc/engine/analyzers/engine_config.rb +++ b/lib/cc/engine/analyzers/engine_config.rb @@ -32,7 +32,10 @@ def mass_threshold_for(language) def count_threshold_for(language) threshold = fetch_language(language).fetch("count_threshold", nil) - threshold ? threshold.to_i : threshold + + if threshold + threshold.to_i + end end def fetch_language(language) From 8ff31435b1583b6f51b050f3f651d69fec271fd8 Mon Sep 17 00:00:00 2001 From: Devin Howard Date: Sat, 29 Oct 2016 18:22:47 +0800 Subject: [PATCH 3/5] allow changing global default count threshold --- README.md | 11 ++++++++++- lib/cc/engine/analyzers/engine_config.rb | 1 + 2 files changed, 11 insertions(+), 1 deletion(-) diff --git a/README.md b/README.md index 8e8af9e8..70197f36 100644 --- a/README.md +++ b/README.md @@ -70,7 +70,7 @@ the Hash type to support extra configuration. ### Count Threshold -By default, the duplication engine will report code that has been duplicated in just two locations. You can be less strict by only raising a warning if code is duplicated in three or more locations only. To adjust this setting, add a `count_threshold` key to your language config. For instance, to use the default `mass_threshold` for ruby, but to enforce the [Rule of Three][rule-of-three], you could use this configuration: +By default, the duplication engine will report code that has been duplicated in just two locations. You can be less strict by only raising a warning if code is duplicated in three or more locations only. To adjust this setting, add a `count_threshold` key to your config. For instance, to use the default `mass_threshold` for ruby, but to enforce the [Rule of Three][rule-of-three], you could use this configuration: ```yaml engines: @@ -82,6 +82,15 @@ engines: count_threshold: 3 ``` +You can also change the default count_threshold for all languages: + +```yaml +engines: + duplication: + enabled: true + count_threshold: 3 +``` + ### Excluding files and directories As with any other Code Climate engine, you can exclude certain files or diff --git a/lib/cc/engine/analyzers/engine_config.rb b/lib/cc/engine/analyzers/engine_config.rb index c71182e6..16f8bfd5 100644 --- a/lib/cc/engine/analyzers/engine_config.rb +++ b/lib/cc/engine/analyzers/engine_config.rb @@ -32,6 +32,7 @@ def mass_threshold_for(language) def count_threshold_for(language) threshold = fetch_language(language).fetch("count_threshold", nil) + threshold = config.fetch("count_threshold", nil) unless threshold if threshold threshold.to_i From b5c1a69da26773525372736af0623a22cb71dd98 Mon Sep 17 00:00:00 2001 From: Devin Howard Date: Sat, 5 Nov 2016 17:28:33 -0400 Subject: [PATCH 4/5] make count threshold default no longer language-specific --- lib/cc/engine/analyzers/analyzer_base.rb | 2 +- lib/cc/engine/analyzers/engine_config.rb | 7 +++++-- lib/cc/engine/analyzers/javascript/main.rb | 1 - lib/cc/engine/analyzers/php/main.rb | 1 - lib/cc/engine/analyzers/python/main.rb | 1 - lib/cc/engine/analyzers/ruby/main.rb | 1 - 6 files changed, 6 insertions(+), 7 deletions(-) diff --git a/lib/cc/engine/analyzers/analyzer_base.rb b/lib/cc/engine/analyzers/analyzer_base.rb index 5499a9af..a06b1408 100644 --- a/lib/cc/engine/analyzers/analyzer_base.rb +++ b/lib/cc/engine/analyzers/analyzer_base.rb @@ -44,7 +44,7 @@ def mass_threshold end def count_threshold - engine_config.count_threshold_for(self.class::LANGUAGE) || self.class::DEFAULT_COUNT_THRESHOLD + engine_config.count_threshold_for(self.class::LANGUAGE) end def calculate_points(mass) diff --git a/lib/cc/engine/analyzers/engine_config.rb b/lib/cc/engine/analyzers/engine_config.rb index 16f8bfd5..65ff75c5 100644 --- a/lib/cc/engine/analyzers/engine_config.rb +++ b/lib/cc/engine/analyzers/engine_config.rb @@ -2,6 +2,8 @@ module CC module Engine module Analyzers class EngineConfig + DEFAULT_COUNT_THRESHOLD = 2 + def initialize(hash) @config = normalize(hash) end @@ -31,8 +33,9 @@ def mass_threshold_for(language) end def count_threshold_for(language) - threshold = fetch_language(language).fetch("count_threshold", nil) - threshold = config.fetch("count_threshold", nil) unless threshold + threshold = fetch_language(language)["count_threshold"] || + config.fetch("config", {}).fetch("count_threshold", nil) || + DEFAULT_COUNT_THRESHOLD if threshold threshold.to_i diff --git a/lib/cc/engine/analyzers/javascript/main.rb b/lib/cc/engine/analyzers/javascript/main.rb index ffb0dc13..016fbf89 100644 --- a/lib/cc/engine/analyzers/javascript/main.rb +++ b/lib/cc/engine/analyzers/javascript/main.rb @@ -17,7 +17,6 @@ class Main < CC::Engine::Analyzers::Base ].freeze LANGUAGE = "javascript" DEFAULT_MASS_THRESHOLD = 40 - DEFAULT_COUNT_THRESHOLD = 2 POINTS_PER_OVERAGE = 30_000 private diff --git a/lib/cc/engine/analyzers/php/main.rb b/lib/cc/engine/analyzers/php/main.rb index 003384ed..61e46703 100644 --- a/lib/cc/engine/analyzers/php/main.rb +++ b/lib/cc/engine/analyzers/php/main.rb @@ -15,7 +15,6 @@ class Main < CC::Engine::Analyzers::Base "**/*.module", ].freeze DEFAULT_MASS_THRESHOLD = 28 - DEFAULT_COUNT_THRESHOLD = 2 POINTS_PER_OVERAGE = 100_000 private diff --git a/lib/cc/engine/analyzers/python/main.rb b/lib/cc/engine/analyzers/python/main.rb index 9d7014f7..ca5a5ca9 100644 --- a/lib/cc/engine/analyzers/python/main.rb +++ b/lib/cc/engine/analyzers/python/main.rb @@ -14,7 +14,6 @@ class Main < CC::Engine::Analyzers::Base PATTERNS = ["**/*.py"] DEFAULT_MASS_THRESHOLD = 32 DEFAULT_PYTHON_VERSION = 2 - DEFAULT_COUNT_THRESHOLD = 2 POINTS_PER_OVERAGE = 50_000 private diff --git a/lib/cc/engine/analyzers/ruby/main.rb b/lib/cc/engine/analyzers/ruby/main.rb index a48e5c4e..b348db9b 100644 --- a/lib/cc/engine/analyzers/ruby/main.rb +++ b/lib/cc/engine/analyzers/ruby/main.rb @@ -17,7 +17,6 @@ class Main < CC::Engine::Analyzers::Base "**/*.gemspec", ].freeze DEFAULT_MASS_THRESHOLD = 18 - DEFAULT_COUNT_THRESHOLD = 2 POINTS_PER_OVERAGE = 100_000 TIMEOUT = 300 From 96cd4efdce6c017fa156400c2e6faf9e8d03d142 Mon Sep 17 00:00:00 2001 From: Devin Howard Date: Mon, 7 Nov 2016 13:41:22 -0500 Subject: [PATCH 5/5] spec for count_threshold_for --- .../cc/engine/analyzers/engine_config_spec.rb | 41 +++++++++++++++++++ 1 file changed, 41 insertions(+) diff --git a/spec/cc/engine/analyzers/engine_config_spec.rb b/spec/cc/engine/analyzers/engine_config_spec.rb index 9e28c50b..a1974630 100644 --- a/spec/cc/engine/analyzers/engine_config_spec.rb +++ b/spec/cc/engine/analyzers/engine_config_spec.rb @@ -75,6 +75,47 @@ end end + describe "count_threshold_for" do + it "returns configured count threshold as integer" do + engine_config = CC::Engine::Analyzers::EngineConfig.new({ + "config" => { + "languages" => { + "EliXiR" => { + "count_threshold" => "3", + }, + }, + }, + }) + + expect(engine_config.count_threshold_for("elixir")).to eq(3) + end + + it "returns default value when language value is empty" do + engine_config = CC::Engine::Analyzers::EngineConfig.new({ + "config" => { + "count_threshold" => "4", + "languages" => { + "ruby" => "", + }, + }, + }) + + expect(engine_config.count_threshold_for("ruby")).to eq(4) + end + + it "returns 2 by default" do + engine_config = CC::Engine::Analyzers::EngineConfig.new({ + "config" => { + "languages" => { + "ruby" => "", + }, + }, + }) + + expect(engine_config.count_threshold_for("ruby")).to eq(2) + end + end + describe "include_paths" do it "returns given include paths" do engine_config = CC::Engine::Analyzers::EngineConfig.new({