diff --git a/README.md b/README.md index b07258ca..70197f36 100644 --- a/README.md +++ b/README.md @@ -40,13 +40,13 @@ engines: This will tell the duplication engine to analyze Ruby and JavaScript files. -### Threshold +### Mass Threshold We set useful threshold defaults for the languages we support but you may want to adjust these settings based on your project guidelines. -The threshold configuration represents the minimum "mass" a code block must have -to be analyzed for duplication. If the engine is too easily reporting +The mass threshold configuration represents the minimum "mass" a code block must +have to be analyzed for duplication. If the engine is too easily reporting duplication, try raising the threshold. If you suspect that the engine isn't catching enough duplication, try lowering the threshold. The best setting tends to differ from language to language. @@ -68,6 +68,29 @@ engines: Note that you have the update the YAML structure under the `languages` key to the Hash type to support extra configuration. +### Count Threshold + +By default, the duplication engine will report code that has been duplicated in just two locations. You can be less strict by only raising a warning if code is duplicated in three or more locations only. To adjust this setting, add a `count_threshold` key to your config. For instance, to use the default `mass_threshold` for ruby, but to enforce the [Rule of Three][rule-of-three], you could use this configuration: + +```yaml +engines: + duplication: + enabled: true + config: + languages: + ruby: + count_threshold: 3 +``` + +You can also change the default count_threshold for all languages: + +```yaml +engines: + duplication: + enabled: true + count_threshold: 3 +``` + ### Excluding files and directories As with any other Code Climate engine, you can exclude certain files or @@ -86,4 +109,5 @@ engines: [what-is-duplication]: https://docs.codeclimate.com/docs/duplication-concept [flay]: https://github.com/seattlerb/flay [cli]: https://github.com/codeclimate/codeclimate +[rule-of-three]: https://en.wikipedia.org/wiki/Rule_of_three_(computer_programming) [exclude-files-engine]: https://docs.codeclimate.com/docs/excluding-files-and-folders#section-exclude-paths-for-specific-engines diff --git a/lib/cc/engine/analyzers/analyzer_base.rb b/lib/cc/engine/analyzers/analyzer_base.rb index 8ec3e5e2..a06b1408 100644 --- a/lib/cc/engine/analyzers/analyzer_base.rb +++ b/lib/cc/engine/analyzers/analyzer_base.rb @@ -43,6 +43,10 @@ def mass_threshold engine_config.mass_threshold_for(self.class::LANGUAGE) || self.class::DEFAULT_MASS_THRESHOLD end + def count_threshold + engine_config.count_threshold_for(self.class::LANGUAGE) + end + def calculate_points(mass) overage = mass - mass_threshold base_points + (overage * points_per_overage) diff --git a/lib/cc/engine/analyzers/engine_config.rb b/lib/cc/engine/analyzers/engine_config.rb index e0d062d1..65ff75c5 100644 --- a/lib/cc/engine/analyzers/engine_config.rb +++ b/lib/cc/engine/analyzers/engine_config.rb @@ -2,6 +2,8 @@ module CC module Engine module Analyzers class EngineConfig + DEFAULT_COUNT_THRESHOLD = 2 + def initialize(hash) @config = normalize(hash) end @@ -30,6 +32,16 @@ def mass_threshold_for(language) end end + def count_threshold_for(language) + threshold = fetch_language(language)["count_threshold"] || + config.fetch("config", {}).fetch("count_threshold", nil) || + DEFAULT_COUNT_THRESHOLD + + if threshold + threshold.to_i + end + end + def fetch_language(language) language = config. fetch("languages", {}). diff --git a/lib/cc/engine/analyzers/reporter.rb b/lib/cc/engine/analyzers/reporter.rb index 5cd0233b..1c7bee61 100644 --- a/lib/cc/engine/analyzers/reporter.rb +++ b/lib/cc/engine/analyzers/reporter.rb @@ -51,6 +51,7 @@ def report violations = new_violations(issue) violations.each do |violation| + next if (violation.occurrences + 1) < language_strategy.count_threshold debug("Violation name=#{violation.report_name} mass=#{violation.mass}") unless reports.include?(violation.report_name) diff --git a/lib/cc/engine/analyzers/violation.rb b/lib/cc/engine/analyzers/violation.rb index 17490b23..49dc204d 100644 --- a/lib/cc/engine/analyzers/violation.rb +++ b/lib/cc/engine/analyzers/violation.rb @@ -35,6 +35,10 @@ def mass current_sexp.mass end + def occurrences + other_sexps.count + end + private attr_reader :language_strategy, :other_sexps, :current_sexp @@ -96,10 +100,6 @@ def description description += " (mass = #{mass})" description end - - def occurrences - other_sexps.count - end end end end diff --git a/spec/cc/engine/analyzers/engine_config_spec.rb b/spec/cc/engine/analyzers/engine_config_spec.rb index 9e28c50b..a1974630 100644 --- a/spec/cc/engine/analyzers/engine_config_spec.rb +++ b/spec/cc/engine/analyzers/engine_config_spec.rb @@ -75,6 +75,47 @@ end end + describe "count_threshold_for" do + it "returns configured count threshold as integer" do + engine_config = CC::Engine::Analyzers::EngineConfig.new({ + "config" => { + "languages" => { + "EliXiR" => { + "count_threshold" => "3", + }, + }, + }, + }) + + expect(engine_config.count_threshold_for("elixir")).to eq(3) + end + + it "returns default value when language value is empty" do + engine_config = CC::Engine::Analyzers::EngineConfig.new({ + "config" => { + "count_threshold" => "4", + "languages" => { + "ruby" => "", + }, + }, + }) + + expect(engine_config.count_threshold_for("ruby")).to eq(4) + end + + it "returns 2 by default" do + engine_config = CC::Engine::Analyzers::EngineConfig.new({ + "config" => { + "languages" => { + "ruby" => "", + }, + }, + }) + + expect(engine_config.count_threshold_for("ruby")).to eq(2) + end + end + describe "include_paths" do it "returns given include paths" do engine_config = CC::Engine::Analyzers::EngineConfig.new({