pytorch
diff --git a/‎c10/core/AllocatorConfig.cpp
Lines changed: 233 additions & 0 deletions b/‎c10/core/AllocatorConfig.cpp
Lines changed: 233 additions & 0 deletions
@@ -0,0 +1,233 @@
+#include <c10/core/AllocatorConfig.h>
+#include <c10/core/DeviceType.h>
+#include <c10/util/env.h>
+#include <c10/util/irange.h>
+
+namespace c10::CachingAllocator {
+
+namespace {
+constexpr size_t kRoundUpPowerOfTwoIntervals = 16;
+constexpr size_t kMB = 1024 * 1024ul;
+constexpr size_t kRoundUpPowerOfTwoStart = 1 * kMB; // 1MB
+constexpr size_t kRoundUpPowerOfTwoEnd = 64 * 1024ul * kMB; // 64GB
+} // anonymous namespace
+
+AcceleratorAllocatorConfig& AcceleratorAllocatorConfig::instance() {
+  static AcceleratorAllocatorConfig instance;
+#define C10_ALLOCATOR_CONFIG_PARSE_ENV(env, deprecated)                       \
+  auto env##_name = c10::utils::get_env(#env);                                \
+  if (env##_name.has_value()) {                                               \
+    if (deprecated) {                                                         \
+      TORCH_WARN_ONCE(#env " is deprecated, use PYTORCH_ALLOC_CONF instead"); \
+    }                                                                         \
+    instance.parseArgs(env##_name.value());                                   \
+    return true;                                                              \
+  }
+  static bool env_flag [[maybe_unused]] = []() {
+    C10_ALLOCATOR_CONFIG_PARSE_ENV(PYTORCH_ALLOC_CONF, false)
+    // Keep this for backwards compatibility
+    C10_ALLOCATOR_CONFIG_PARSE_ENV(PYTORCH_CUDA_ALLOC_CONF, /*deprecated=*/true)
+    C10_ALLOCATOR_CONFIG_PARSE_ENV(PYTORCH_HIP_ALLOC_CONF, /*deprecated=*/true)
+    return false;
+  }();
+#undef C10_ALLOCATOR_CONFIG_PARSE_ENV
+  return instance;
+}
+
+AcceleratorAllocatorConfig::AcceleratorAllocatorConfig() {
+  roundup_power2_divisions_.assign(kRoundUpPowerOfTwoIntervals, 0);
+}
+
+size_t AcceleratorAllocatorConfig::roundup_power2_divisions(size_t size) {
+  size_t log_size = (63 - llvm::countLeadingZeros(size));
+
+  // Our intervals start at 1MB and end at 64GB
+  const size_t interval_start =
+      63 - llvm::countLeadingZeros(kRoundUpPowerOfTwoStart);
+  const size_t interval_end =
+      63 - llvm::countLeadingZeros(kRoundUpPowerOfTwoEnd);
+  TORCH_CHECK(
+      interval_end - interval_start == kRoundUpPowerOfTwoIntervals,
+      "kRoundUpPowerOfTwoIntervals mismatch");
+
+  size_t index =
+      (log_size > interval_start) ? (log_size - interval_start) : 0ul;
+  index = std::min(index, kRoundUpPowerOfTwoIntervals - 1);
+  return instance().roundup_power2_divisions_[index];
+}
+
+size_t AcceleratorAllocatorConfig::parseMaxSplitSize(
+    const ConfigTokenizer& tokenizer,
+    size_t i) {
+  tokenizer.checkToken(++i, ":");
+  constexpr size_t min_allowed_split_size_mb = kLargeBuffer / kMB;
+  constexpr size_t max_allowed_split_size_mb =
+      std::numeric_limits<size_t>::max() / kMB;
+
+  size_t val_env = tokenizer.toSizeT(++i);
+  TORCH_CHECK(
+      val_env >= min_allowed_split_size_mb,
+      "CachingAllocator option max_split_size_mb too small, must be >= ",
+      min_allowed_split_size_mb);
+  val_env = std::min(val_env, max_allowed_split_size_mb);
+  max_split_size_ = val_env * kMB;
+
+  return i;
+}
+
+size_t AcceleratorAllocatorConfig::parseMaxNonSplitRoundingSize(
+    const ConfigTokenizer& tokenizer,
+    size_t i) {
+  tokenizer.checkToken(++i, ":");
+  constexpr size_t min_allowed_split_size_mb = kLargeBuffer / kMB;
+  constexpr size_t max_allowed_split_size_mb =
+      std::numeric_limits<size_t>::max() / kMB;
+
+  size_t val_env = tokenizer.toSizeT(++i);
+  TORCH_CHECK(
+      val_env >= min_allowed_split_size_mb,
+      "CachingAllocator option max_non_split_rounding_mb too small, must be >= ",
+      min_allowed_split_size_mb);
+  val_env = std::min(val_env, max_allowed_split_size_mb);
+  max_non_split_rounding_size_ = val_env * kMB;
+
+  return i;
+}
+
+size_t AcceleratorAllocatorConfig::parseGarbageCollectionThreshold(
+    const ConfigTokenizer& tokenizer,
+    size_t i) {
+  tokenizer.checkToken(++i, ":");
+  double val_env = tokenizer.toDouble(++i);
+  TORCH_CHECK(
+      val_env > 0 && val_env < 1.0,
+      "garbage_collect_threshold is invalid, set it in (0.0, 1.0)");
+  garbage_collection_threshold_ = val_env;
+
+  return i;
+}
+
+size_t AcceleratorAllocatorConfig::parseRoundUpPower2Divisions(
+    const ConfigTokenizer& tokenizer,
+    size_t i) {
+  tokenizer.checkToken(++i, ":");
+  bool first_value = true;
+
+  if (tokenizer[++i] == "[") {
+    size_t last_index = 0;
+    // NOLINTNEXTLINE(bugprone-inc-dec-in-conditions)
+    while (++i < tokenizer.size() && tokenizer[i] != "]") {
+      size_t value_index = i;
+      tokenizer.checkToken(++i, ":");
+      size_t value = tokenizer.toSizeT(++i);
+      TORCH_CHECK(
+          value == 0 || llvm::isPowerOf2_64(value),
+          "For roundups, the divisions has to be power of 2 or 0 to disable roundup ");
+
+      if (tokenizer[value_index] == ">") {
+        std::fill(
+            std::next(
+                roundup_power2_divisions_.begin(),
+                static_cast<std::vector<size_t>::difference_type>(
+                    last_index + 1)),
+            roundup_power2_divisions_.end(),
+            value);
+      } else {
+        size_t boundary = tokenizer.toSizeT(value_index);
+        TORCH_CHECK(
+            llvm::isPowerOf2_64(boundary),
+            "For roundups, the intervals have to be power of 2 ");
+
+        size_t index = 63 - llvm::countLeadingZeros(boundary);
+        index =
+            std::clamp(index, size_t{0}, roundup_power2_divisions_.size() - 1);
+
+        if (first_value) {
+          std::fill(
+              roundup_power2_divisions_.begin(),
+              std::next(
+                  roundup_power2_divisions_.begin(),
+                  static_cast<std::vector<size_t>::difference_type>(index)),
+              value);
+          first_value = false;
+        }
+        roundup_power2_divisions_[index] = value;
+        last_index = index;
+      }
+
+      if (tokenizer[i + 1] != "]") {
+        tokenizer.checkToken(++i, ",");
+      }
+    }
+    TORCH_INTERNAL_ASSERT(
+        i < tokenizer.size(),
+        "Expected closing bracket ']' in ConfigTokenizer but reached end of config");
+  } else { // Keep this for backwards compatibility
+    size_t value = tokenizer.toSizeT(i);
+    TORCH_CHECK(
+        llvm::isPowerOf2_64(value),
+        "For roundups, the divisions has to be power of 2 ");
+    std::fill(
+        roundup_power2_divisions_.begin(),
+        roundup_power2_divisions_.end(),
+        value);
+  }
+  return i;
+}
+
+size_t AcceleratorAllocatorConfig::parseExpandableSegments(
+    const ConfigTokenizer& tokenizer,
+    size_t i) {
+  tokenizer.checkToken(++i, ":");
+  use_expandable_segments_ = tokenizer.toBool(++i);
+
+  return i;
+}
+
+size_t AcceleratorAllocatorConfig::parsePinnedUseBackgroundThreads(
+    const ConfigTokenizer& tokenizer,
+    size_t i) {
+  tokenizer.checkToken(++i, ":");
+  pinned_use_background_threads_ = tokenizer.toBool(++i);
+
+  return i;
+}
+
+void AcceleratorAllocatorConfig::parseArgs(const std::string& env) {
+  // The following option will be reset to its default value if not explicitly
+  // set each time.
+  max_split_size_ = std::numeric_limits<size_t>::max();
+  roundup_power2_divisions_.assign(kRoundUpPowerOfTwoIntervals, 0);
+  garbage_collection_threshold_ = 0;
+
+  {
+    std::lock_guard<std::mutex> lock(last_allocator_settings_mutex_);
+    last_allocator_settings_ = env;
+  }
+
+  ConfigTokenizer tokenizer(env);
+  for (size_t i = 0; i < tokenizer.size(); i++) {
+    const auto& key = tokenizer[i];
+    if (key == "max_split_size_mb") {
+      i = parseMaxSplitSize(tokenizer, i);
+    } else if (key == "max_non_split_rounding_mb") {
+      i = parseMaxNonSplitRoundingSize(tokenizer, i);
+    } else if (key == "garbage_collection_threshold") {
+      i = parseGarbageCollectionThreshold(tokenizer, i);
+    } else if (key == "roundup_power2_divisions") {
+      i = parseRoundUpPower2Divisions(tokenizer, i);
+    } else if (key == "expandable_segments") {
+      i = parseExpandableSegments(tokenizer, i);
+    } else if (key == "pinned_use_background_threads") {
+      i = parsePinnedUseBackgroundThreads(tokenizer, i);
+    } else {
+      i = tokenizer.skipKey(i);
+    }
+
+    if (i + 1 < tokenizer.size()) {
+      tokenizer.checkToken(++i, ",");
+    }
+  }
+}
+
+} // namespace c10::CachingAllocator