You signed in with another tab or window. Reload to refresh your session.You signed out in another tab or window. Reload to refresh your session.You switched accounts on another tab or window. Reload to refresh your session.Dismiss alert
[SPARK-11875][ML][PYSPARK] Update doc for PySpark HasCheckpointInterval
* Update doc for PySpark ```HasCheckpointInterval``` that users can understand how to disable checkpoint.
* Update doc for PySpark ```cacheNodeIds``` of ```DecisionTreeParams``` to notify the relationship between ```cacheNodeIds``` and ```checkpointInterval```.
Author: Yanbo Liang <ybliang8@gmail.com>
Closesapache#9856 from yanboliang/spark-11875.
(cherry picked from commit 7216f40)
Signed-off-by: Xiangrui Meng <meng@databricks.com>
Copy file name to clipboardExpand all lines: python/pyspark/ml/param/shared.py
+7-7Lines changed: 7 additions & 7 deletions
Original file line number
Diff line number
Diff line change
@@ -325,16 +325,16 @@ def getNumFeatures(self):
325
325
326
326
classHasCheckpointInterval(Params):
327
327
"""
328
-
Mixin for param checkpointInterval: checkpoint interval (>= 1).
328
+
Mixin for param checkpointInterval: set checkpoint interval (>= 1) or disable checkpoint (-1). E.g. 10 means that the cache will get checkpointed every 10 iterations.
329
329
"""
330
330
331
331
# a placeholder to make it appear in the generated doc
checkpointInterval=Param(Params._dummy(), "checkpointInterval", "set checkpoint interval (>= 1) or disable checkpoint (-1). E.g. 10 means that the cache will get checkpointed every 10 iterations.")
#: param for set checkpoint interval (>= 1) or disable checkpoint (-1). E.g. 10 means that the cache will get checkpointed every 10 iterations.
337
+
self.checkpointInterval=Param(self, "checkpointInterval", "set checkpoint interval (>= 1) or disable checkpoint (-1). E.g. 10 means that the cache will get checkpointed every 10 iterations.")
338
338
339
339
defsetCheckpointInterval(self, value):
340
340
"""
@@ -636,7 +636,7 @@ class DecisionTreeParams(Params):
636
636
minInstancesPerNode=Param(Params._dummy(), "minInstancesPerNode", "Minimum number of instances each child must have after split. If a split causes the left or right child to have fewer than minInstancesPerNode, the split will be discarded as invalid. Should be >= 1.")
637
637
minInfoGain=Param(Params._dummy(), "minInfoGain", "Minimum information gain for a split to be considered at a tree node.")
638
638
maxMemoryInMB=Param(Params._dummy(), "maxMemoryInMB", "Maximum memory in MB allocated to histogram aggregation.")
639
-
cacheNodeIds=Param(Params._dummy(), "cacheNodeIds", "If false, the algorithm will pass trees to executors to match instances with nodes. If true, the algorithm will cache node IDs for each instance. Caching can speed up training of deeper trees.")
639
+
cacheNodeIds=Param(Params._dummy(), "cacheNodeIds", "If false, the algorithm will pass trees to executors to match instances with nodes. If true, the algorithm will cache node IDs for each instance. Caching can speed up training of deeper trees. Users can set how often should the cache be checkpointed or disable it by setting checkpointInterval.")
640
640
641
641
642
642
def__init__(self):
@@ -651,8 +651,8 @@ def __init__(self):
651
651
self.minInfoGain=Param(self, "minInfoGain", "Minimum information gain for a split to be considered at a tree node.")
652
652
#: param for Maximum memory in MB allocated to histogram aggregation.
653
653
self.maxMemoryInMB=Param(self, "maxMemoryInMB", "Maximum memory in MB allocated to histogram aggregation.")
654
-
#: param for If false, the algorithm will pass trees to executors to match instances with nodes. If true, the algorithm will cache node IDs for each instance. Caching can speed up training of deeper trees.
655
-
self.cacheNodeIds=Param(self, "cacheNodeIds", "If false, the algorithm will pass trees to executors to match instances with nodes. If true, the algorithm will cache node IDs for each instance. Caching can speed up training of deeper trees.")
654
+
#: param for If false, the algorithm will pass trees to executors to match instances with nodes. If true, the algorithm will cache node IDs for each instance. Caching can speed up training of deeper trees. Users can set how often should the cache be checkpointed or disable it by setting checkpointInterval.
655
+
self.cacheNodeIds=Param(self, "cacheNodeIds", "If false, the algorithm will pass trees to executors to match instances with nodes. If true, the algorithm will cache node IDs for each instance. Caching can speed up training of deeper trees. Users can set how often should the cache be checkpointed or disable it by setting checkpointInterval.")
0 commit comments