From 5e055e070aa4557063a0a2410da5ec6adc267298 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Merc=C3=A8=20Mart=C3=ADn=20Prats?= Date: Mon, 9 Sep 2024 18:44:39 +0200 Subject: [PATCH 01/19] Fixing get_leaves function and better documentation --- .readthedocs.yaml | 15 +- HISTORY.rst | 6 + bigml/generators/model.py | 9 +- bigml/tests/create_model_steps.py | 6 + bigml/tests/test_40_local_from_file.py | 8 +- bigml/version.py | 2 +- docs/101_anomaly.rst | 4 +- docs/101_association.rst | 4 +- docs/101_cluster.rst | 4 +- docs/101_deepnet.rst | 4 +- docs/101_ensemble.rst | 4 +- docs/101_fusion.rst | 4 +- docs/101_images_classification.rst | 4 +- docs/101_images_feature_extraction.rst | 4 +- docs/101_linear_regression.rst | 4 +- docs/101_logistic_regression.rst | 4 +- docs/101_model.rst | 4 +- docs/101_object_detection.rst | 4 +- docs/101_optiml.rst | 4 +- docs/101_pca.rst | 4 +- docs/101_scripting.rst | 4 +- docs/101_topic_model.rst | 4 +- docs/101_ts.rst | 4 +- docs/conf.py | 9 +- docs/index.rst | 367 +- docs/ml_resources.rst | 6086 +++++++++++++----------- docs/quick_start.rst | 284 ++ docs/reading_resources.rst | 120 +- docs/requirements.txt | 2 + 29 files changed, 3891 insertions(+), 3091 deletions(-) create mode 100644 docs/quick_start.rst create mode 100644 docs/requirements.txt diff --git a/.readthedocs.yaml b/.readthedocs.yaml index 7e93ed20..9712e405 100644 --- a/.readthedocs.yaml +++ b/.readthedocs.yaml @@ -1,9 +1,22 @@ +# .readthedocs.yaml +# Read the Docs configuration file +# See https://docs.readthedocs.io/en/stable/config-file/v2.html for details + +# Required version: 2 +# Set the version of Python and other tools you might need build: os: ubuntu-22.04 tools: - python: "3.10" + python: "3.11" +# Build documentation in the docs/ directory with Sphinx sphinx: configuration: docs/conf.py + +# We recommend specifying your dependencies to enable reproducible builds: +# https://docs.readthedocs.io/en/stable/guides/reproducible-builds.html +python: + install: + - requirements: docs/requirements.txt diff --git a/HISTORY.rst b/HISTORY.rst index 2aa2591a..84612677 100644 --- a/HISTORY.rst +++ b/HISTORY.rst @@ -3,6 +3,12 @@ History ------- +9.8.0 (2024-09-09) +------------------ + +- Fixing the get_leaves function for local decision trees. +- Changing documentation templates. + 9.8.0.dev1 (2024-02-28) ----------------------- diff --git a/bigml/generators/model.py b/bigml/generators/model.py index 16be16c2..d6130bba 100644 --- a/bigml/generators/model.py +++ b/bigml/generators/model.py @@ -135,8 +135,9 @@ def get_leaves(model, path=None, filter_function=None): offsets = model.offsets - def get_tree_leaves(tree, fields, path, leaves, filter_function=None): + def get_tree_leaves(tree, fields, path, filter_function=None): + leaves = [] node = get_node(tree) predicate = get_predicate(tree) if isinstance(predicate, list): @@ -149,10 +150,12 @@ def get_tree_leaves(tree, fields, path, leaves, filter_function=None): if children: for child in children: + leaves += get_tree_leaves(child, fields, - path[:], leaves, + path[:], filter_function=filter_function) else: + print("id:", node[offsets["id"]]) leaf = { 'id': node[offsets["id"]], 'confidence': node[offsets["confidence"]], @@ -171,7 +174,7 @@ def get_tree_leaves(tree, fields, path, leaves, filter_function=None): or filter_function(leaf)): leaves += [leaf] return leaves - return get_tree_leaves(model.tree, model.fields, path, leaves, + return get_tree_leaves(model.tree, model.fields, path, filter_function) diff --git a/bigml/tests/create_model_steps.py b/bigml/tests/create_model_steps.py index 359ade36..b95893fb 100644 --- a/bigml/tests/create_model_steps.py +++ b/bigml/tests/create_model_steps.py @@ -31,6 +31,7 @@ from bigml.deepnet import Deepnet from bigml.fusion import Fusion from bigml.ensemble import Ensemble +from bigml.generators.model import get_leaves from .read_resource_steps import wait_until_status_code_is @@ -690,3 +691,8 @@ def the_cloned_logistic_regression_is(step, logistic_regression): def check_deepnet_id_local_id(step): """Checking that deepnet ID and local deepnet ID match""" eq_(world.deepnet["resource"], step.bigml["local_deepnet"].resource_id) + + +def check_leaves_number(step, leaves_number): + """Checking the number of leaves in a tree local model""" + eq_(len(get_leaves(step.bigml["local_model"])), leaves_number) diff --git a/bigml/tests/test_40_local_from_file.py b/bigml/tests/test_40_local_from_file.py index eba94bec..41a3b0f6 100644 --- a/bigml/tests/test_40_local_from_file.py +++ b/bigml/tests/test_40_local_from_file.py @@ -66,17 +66,18 @@ def test_scenario1(self): When I create a local model from the file "" Then the model ID and the local model ID match And the prediction for "" is "" + And the number of leaves is "" """ show_doc(self.test_scenario1) headers = ["data", "source_wait", "dataset_wait", "model_wait", "pmml", "exported_file", "input_data", "prediction", - "model_conf"] + "model_conf", 'leaves#'] examples = [ ['data/iris.csv', '10', '10', '10', False, - './tmp/model.json', {}, "Iris-setosa", '{}'], + './tmp/model.json', {}, "Iris-setosa", '{}', 9], ['data/iris.csv', '10', '10', '10', False, './tmp/model_dft.json', {}, "Iris-versicolor", - '{"default_numeric_value": "mean"}']] + '{"default_numeric_value": "mean"}', 9]] for example in examples: example = dict(zip(headers, example)) show_method(self, self.bigml["method"], example) @@ -97,6 +98,7 @@ def test_scenario1(self): model_create.check_model_id_local_id(self) model_create.local_model_prediction_is( self, example["input_data"], example["prediction"]) + model_create.check_leaves_number(self, example["leaves#"]) def test_scenario2(self): """ diff --git a/bigml/version.py b/bigml/version.py index 9865fb2a..3cd81c28 100644 --- a/bigml/version.py +++ b/bigml/version.py @@ -1 +1 @@ -__version__ = '9.8.0.dev1' +__version__ = '9.8.0' diff --git a/docs/101_anomaly.rst b/docs/101_anomaly.rst index 3ad31416..03fc9c31 100644 --- a/docs/101_anomaly.rst +++ b/docs/101_anomaly.rst @@ -1,8 +1,8 @@ .. toctree:: :hidden: -BigML Bindings: 101 - Using an anomaly detector -=============================================== +101 - Anomaly detector usage +============================ Following the schema described in the `prediction workflow `_, document, this is the code snippet that shows the minimal workflow to diff --git a/docs/101_association.rst b/docs/101_association.rst index e7cdb7e4..371456a2 100644 --- a/docs/101_association.rst +++ b/docs/101_association.rst @@ -1,8 +1,8 @@ .. toctree:: :hidden: -BigML Bindings: 101 - Using Association Discovery -================================================= +101 - Association Discovery usage +================================= Following the schema described in the `prediction workflow `_, document, this is the code snippet that shows the minimal workflow to diff --git a/docs/101_cluster.rst b/docs/101_cluster.rst index a7da2d2e..d4998463 100644 --- a/docs/101_cluster.rst +++ b/docs/101_cluster.rst @@ -1,8 +1,8 @@ .. toctree:: :hidden: -BigML Bindings: 101 - Using a Cluster -===================================== +101 - Cluster Usage +=================== Following the schema described in the `prediction workflow `_, document, this is the code snippet that shows the minimal workflow to diff --git a/docs/101_deepnet.rst b/docs/101_deepnet.rst index 8fe4330d..c8f1d2c6 100644 --- a/docs/101_deepnet.rst +++ b/docs/101_deepnet.rst @@ -1,8 +1,8 @@ .. toctree:: :hidden: -BigML Bindings: 101 - Using a Deepnet Model -=========================================== +101 - Deepnet usage +=================== Following the schema described in the `prediction workflow `_, document, this is the code snippet that shows the minimal workflow to diff --git a/docs/101_ensemble.rst b/docs/101_ensemble.rst index d996bbd8..0ca3f747 100644 --- a/docs/101_ensemble.rst +++ b/docs/101_ensemble.rst @@ -1,8 +1,8 @@ .. toctree:: :hidden: -BigML Bindings: 101 - Using an Ensemble -======================================= +101 - Ensemble usage +==================== Following the schema described in the `prediction workflow `_, document, this is the code snippet that shows the minimal workflow to diff --git a/docs/101_fusion.rst b/docs/101_fusion.rst index 01352f6c..8b549759 100644 --- a/docs/101_fusion.rst +++ b/docs/101_fusion.rst @@ -1,8 +1,8 @@ .. toctree:: :hidden: -BigML Bindings: 101 - Using a Fusion Model -========================================== +101 - Fusion usage +================== Following the schema described in the `prediction workflow `_, document, this is the code snippet that shows the minimal workflow to diff --git a/docs/101_images_classification.rst b/docs/101_images_classification.rst index 22d268d1..dd6fc4eb 100644 --- a/docs/101_images_classification.rst +++ b/docs/101_images_classification.rst @@ -1,8 +1,8 @@ .. toctree:: :hidden: -BigML Bindings: 101 - Images Classification -=========================================== +101 - Images Classification +=========================== Following the schema described in the `prediction workflow `_, document, this is the code snippet that shows the minimal workflow to diff --git a/docs/101_images_feature_extraction.rst b/docs/101_images_feature_extraction.rst index 5d838e08..f649d650 100644 --- a/docs/101_images_feature_extraction.rst +++ b/docs/101_images_feature_extraction.rst @@ -1,8 +1,8 @@ .. toctree:: :hidden: -BigML Bindings: 101 - Images Feature Extraction -=============================================== +101 - Images Feature Extraction +=============================== Following the schema described in the `prediction workflow `_, document, this is the code snippet that shows the minimal workflow to diff --git a/docs/101_linear_regression.rst b/docs/101_linear_regression.rst index b610b6ea..08f87889 100644 --- a/docs/101_linear_regression.rst +++ b/docs/101_linear_regression.rst @@ -1,8 +1,8 @@ .. toctree:: :hidden: -BigML Bindings: 101 - Using a Linear Regression -================================================= +101 - Linear Regression usage +============================= Following the schema described in the `prediction workflow `_, document, this is the code snippet that shows the minimal workflow to diff --git a/docs/101_logistic_regression.rst b/docs/101_logistic_regression.rst index ed7fdec0..8cda0471 100644 --- a/docs/101_logistic_regression.rst +++ b/docs/101_logistic_regression.rst @@ -1,8 +1,8 @@ .. toctree:: :hidden: -BigML Bindings: 101 - Using a Logistic Regression -================================================= +101 - Logistic Regression usage +=============================== Following the schema described in the `prediction workflow `_, document, this is the code snippet that shows the minimal workflow to diff --git a/docs/101_model.rst b/docs/101_model.rst index 294d439a..a7bf1915 100644 --- a/docs/101_model.rst +++ b/docs/101_model.rst @@ -1,8 +1,8 @@ .. toctree:: :hidden: -BigML Bindings: 101 - Using a Decision Tree Model -================================================= +101 - Decision Tree usage +========================= Following the schema described in the `prediction workflow `_, document, this is the code snippet that shows the minimal workflow to diff --git a/docs/101_object_detection.rst b/docs/101_object_detection.rst index 0d98a0a9..b851366d 100644 --- a/docs/101_object_detection.rst +++ b/docs/101_object_detection.rst @@ -1,8 +1,8 @@ .. toctree:: :hidden: -BigML Bindings: 101 - Images Object Detection -============================================= +101 - Images Object Detection +============================= Following the schema described in the `prediction workflow `_, document, this is the code snippet that shows the minimal workflow to diff --git a/docs/101_optiml.rst b/docs/101_optiml.rst index ad96b959..cd1f7d2e 100644 --- a/docs/101_optiml.rst +++ b/docs/101_optiml.rst @@ -1,8 +1,8 @@ .. toctree:: :hidden: -BigML Bindings: 101 - Using an OptiML -===================================== +101 - OptiML usage +================== Following the schema described in the `prediction workflow `_, document, this is the code snippet that shows the minimal workflow to diff --git a/docs/101_pca.rst b/docs/101_pca.rst index 9e8ba268..2138470a 100644 --- a/docs/101_pca.rst +++ b/docs/101_pca.rst @@ -1,8 +1,8 @@ .. toctree:: :hidden: -BigML Bindings: 101 - Using a PCA -================================= +101 - PCA usage +=============== The PCA model is used to find the linear combination of your original features that best describes your data. In that sense, the goal of the model diff --git a/docs/101_scripting.rst b/docs/101_scripting.rst index 5e17ebec..aa0f05a2 100644 --- a/docs/101_scripting.rst +++ b/docs/101_scripting.rst @@ -1,8 +1,8 @@ .. toctree:: :hidden: -BigML Bindings: 101 - Creating and executing scripts -==================================================== +101 - Creating and executing scripts +==================================== The bindings offer methods to create and execute `WhizzML `_ scripts in the platform. diff --git a/docs/101_topic_model.rst b/docs/101_topic_model.rst index acabd178..065dcd2e 100644 --- a/docs/101_topic_model.rst +++ b/docs/101_topic_model.rst @@ -1,8 +1,8 @@ .. toctree:: :hidden: -BigML Bindings: 101 - Using a Topic Model -========================================= +101 - Topic Model usage +======================= Following the schema described in the `prediction workflow `_, document, this is the code snippet that shows the minimal workflow to diff --git a/docs/101_ts.rst b/docs/101_ts.rst index 9d349717..ff5388b0 100644 --- a/docs/101_ts.rst +++ b/docs/101_ts.rst @@ -1,8 +1,8 @@ .. toctree:: :hidden: -BigML Bindings: 101 - Using a Time Series -========================================= +101 - Time Series usage +======================= Following the schema described in the `prediction workflow `_, document, this is the code snippet that shows the minimal workflow to diff --git a/docs/conf.py b/docs/conf.py index 5815a33f..ac951f6b 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -30,7 +30,9 @@ # Add any Sphinx extension module names here, as strings. They can be extensions # coming with Sphinx (named 'sphinx.ext.*') or your custom ones. -extensions = [] +extensions = [ + 'sphinx_rtd_theme' +] # Add any paths that contain templates here, relative to this directory. templates_path = ['_templates'] @@ -46,7 +48,7 @@ # General information about the project. project = u'BigML' -copyright = u'2011 - 2020, The BigML Team' +copyright = u'2011 - 2024, The BigML Team' # The version info for the project you're documenting, acts as replacement for # |version| and |release|, also used in various other places throughout the @@ -101,7 +103,8 @@ # The theme to use for HTML and HTML Help pages. See the documentation for # a list of builtin themes. -html_theme = 'default' +# a list of builtin themes. +html_theme = 'sphinx_rtd_theme' # Theme options are theme-specific and customize the look and feel of a theme # further. For a list of options available for each theme, see the diff --git a/docs/index.rst b/docs/index.rst index 72f76660..62d26d86 100644 --- a/docs/index.rst +++ b/docs/index.rst @@ -1,16 +1,35 @@ +BigML Python Bindings +===================== + +`BigML `_ makes machine learning easy by taking care +of the details required to add data-driven decisions and predictive +power to your company. Unlike other machine learning services, BigML +creates +`beautiful predictive models `_ that +can be easily understood and interacted with. + +These BigML Python bindings allow you to interact with BigML.io, the API +for BigML. You can use it to easily create, retrieve, list, update, and +delete BigML resources (i.e., sources, datasets, models and, +predictions). + +This module is licensed under the `Apache License, Version +2.0 `_. + .. toctree:: + :maxdepth: 2 :hidden: + :caption: Basic Usage - ml_resources - creating_resources + quick_start 101_model 101_ensemble 101_deepnet 101_linear_regression 101_logistic_regression + 101_optiml 101_fusion 101_ts - 101_optiml 101_cluster 101_anomaly 101_topic_model @@ -20,40 +39,27 @@ 101_images_classification 101_images_feature_extraction 101_object_detection - reading_resources - updating_resources - deleting_resources - local_resources - whizzml_resources -BigML Python Bindings -===================== - -`BigML `_ makes machine learning easy by taking care -of the details required to add data-driven decisions and predictive -power to your company. Unlike other machine learning services, BigML -creates -`beautiful predictive models `_ that -can be easily understood and interacted with. - -These BigML Python bindings allow you to interact with BigML.io, the API -for BigML. You can use it to easily create, retrieve, list, update, and -delete BigML resources (i.e., sources, datasets, models and, -predictions). +.. toctree:: + :maxdepth: 2 + :hidden: + :caption: Resouce Management -This module is licensed under the `Apache License, Version -2.0 `_. + ml_resources + creating_resources + reading_resources + updating_resources + deleting_resources -Support -------- -Please report problems and bugs to our `BigML.io issue -tracker `_. +.. toctree:: + :maxdepth: 2 + :hidden: + :caption: Client and Server Automation -Discussions about the different bindings take place in the general -`BigML mailing list `_. Or join us -in our `Campfire chatroom `_. + local_resources + whizzml_resources Requirements ------------ @@ -78,7 +84,7 @@ installed, but that is optional: we fall back to Python's built-in JSON libraries is ``simplejson`` is not found. `Node.js `_ is not installed by default, but will be -needed for `Local Pipelines `_ to work when datasets containing new added features are part of the transformation workflow. @@ -320,291 +326,6 @@ created in this environment have been moved to a special project in the now unique ``Production Environment``, so this flag is no longer needed to work with them. -Quick Start ------------ - -Imagine that you want to use `this csv -file `_ containing the `Iris -flower dataset `_ to -predict the species of a flower whose ``petal length`` is ``2.45`` and -whose ``petal width`` is ``1.75``. A preview of the dataset is shown -below. It has 4 numeric fields: ``sepal length``, ``sepal width``, -``petal length``, ``petal width`` and a categorical field: ``species``. -By default, BigML considers the last field in the dataset as the -objective field (i.e., the field that you want to generate predictions -for). - -:: - - sepal length,sepal width,petal length,petal width,species - 5.1,3.5,1.4,0.2,Iris-setosa - 4.9,3.0,1.4,0.2,Iris-setosa - 4.7,3.2,1.3,0.2,Iris-setosa - ... - 5.8,2.7,3.9,1.2,Iris-versicolor - 6.0,2.7,5.1,1.6,Iris-versicolor - 5.4,3.0,4.5,1.5,Iris-versicolor - ... - 6.8,3.0,5.5,2.1,Iris-virginica - 5.7,2.5,5.0,2.0,Iris-virginica - 5.8,2.8,5.1,2.4,Iris-virginica - -You can easily generate a prediction following these steps: - -.. code-block:: python - - from bigml.api import BigML - - api = BigML() - - source = api.create_source('./data/iris.csv') - dataset = api.create_dataset(source) - model = api.create_model(dataset) - prediction = api.create_prediction(model, \ - {"petal width": 1.75, "petal length": 2.45}) - -You can then print the prediction using the ``pprint`` method: - -.. code-block:: python - - >>> api.pprint(prediction) - species for {"petal width": 1.75, "petal length": 2.45} is Iris-setosa - -Certainly, any of the resources created in BigML can be configured using -several arguments described in the `API documentation `_. -Any of these configuration arguments can be added to the ``create`` method -as a dictionary in the last optional argument of the calls: - -.. code-block:: python - - from bigml.api import BigML - - api = BigML() - - source_args = {"name": "my source", - "source_parser": {"missing_tokens": ["NULL"]}} - source = api.create_source('./data/iris.csv', source_args) - dataset_args = {"name": "my dataset"} - dataset = api.create_dataset(source, dataset_args) - model_args = {"objective_field": "species"} - model = api.create_model(dataset, model_args) - prediction_args = {"name": "my prediction"} - prediction = api.create_prediction(model, \ - {"petal width": 1.75, "petal length": 2.45}, - prediction_args) - -The ``iris`` dataset has a small number of instances, and usually will be -instantly created, so the ``api.create_`` calls will probably return the -finished resources outright. As BigML's API is asynchronous, -in general you will need to ensure -that objects are finished before using them by using ``api.ok``. - -.. code-block:: python - - from bigml.api import BigML - - api = BigML() - - source = api.create_source('./data/iris.csv') - api.ok(source) - dataset = api.create_dataset(source) - api.ok(dataset) - model = api.create_model(dataset) - api.ok(model) - prediction = api.create_prediction(model, \ - {"petal width": 1.75, "petal length": 2.45}) - -Note that the prediction -call is not followed by the ``api.ok`` method. Predictions are so quick to be -generated that, unlike the -rest of resouces, will be generated synchronously as a finished object. - -Alternatively to the ``api.ok`` method, BigML offers -`webhooks `_ that can be set -when creating a resource and will call the url of you choice when the -finished or failed event is reached. A secret can be included in the call to -verify the webhook call authenticity, and a - -.. code-block:: python - - bigml.webhooks.check_signature(request, signature) - -function is offered to that end. As an example, this snippet creates a source -and sets a webhook to call ``https://my_webhook.com/endpoint`` when finished: - -.. code-block:: python - - from bigml.api import BigML - api = BigML() - # using a webhook with a secret - api.create_source("https://static.bigml.com/csv/iris.csv", - {"webhook": {"url": "https://my_webhook.com/endpoint", - "secret": "mysecret"}}) - - -The ``iris`` prediction example assumed that your objective -field (the one you want to predict) is the last field in the dataset. -If that's not he case, you can explicitly -set the name of this field in the creation call using the ``objective_field`` -argument: - - -.. code-block:: python - - from bigml.api import BigML - - api = BigML() - - source = api.create_source('./data/iris.csv') - api.ok(source) - dataset = api.create_dataset(source) - api.ok(dataset) - model = api.create_model(dataset, {"objective_field": "species"}) - api.ok(model) - prediction = api.create_prediction(model, \ - {'sepal length': 5, 'sepal width': 2.5}) - - -You can also generate an evaluation for the model by using: - -.. code-block:: python - - test_source = api.create_source('./data/test_iris.csv') - api.ok(test_source) - test_dataset = api.create_dataset(test_source) - api.ok(test_dataset) - evaluation = api.create_evaluation(model, test_dataset) - api.ok(evaluation) - - -The API object also offers the ``create``, ``get``, ``update`` and ``delete`` -generic methods to manage all type of resources. The type of resource to be -created is passed as first argument to the ``create`` method; - -.. code-block:: python - - from bigml.api import BigML - - api = BigML() - - source = api.create('source', './data/iris.csv') - source = api.update(source, {"name": "my new source name"}) - -Note that these methods don't need the ``api.ok`` method to be called -to wait for the resource to be finished. -The method waits internally for it by default. -This can be avoided by using ``finished=False`` as one of the arguments. - - -.. code-block:: python - - from bigml.api import BigML - - api = BigML() - - source = api.create('source', './data/iris.csv') - dataset = api.create('dataset', source, finished=False) # unfinished - api.ok(dataset) # waiting explicitly for the dataset to finish - dataset = api.update(dataset, {"name": "my_new_dataset_name"}, - finised=False) - api.ok(dataset) - -As an example for the ``delete`` and ``get`` methods, we could -create a batch prediction, put the predictions in a -dataset object and delete the ``batch_prediction``. - -.. code-block:: python - - from bigml.api import BigML - - api = BigML() - - batch_prediction = api.create('batchprediction', - 'model/5f3c3d2b5299637102000882', - 'dataset/5f29a563529963736c0116e9', - args={"output_dataset": True}) - batch_prediction_dataset = api.get(batch_prediction["object"][ \ - "output_dataset_resource"]) - api.delete(batch_prediction) - -If you set the ``storage`` argument in the ``api`` instantiation: - -.. code-block:: python - - api = BigML(storage='./storage') - -all the generated, updated or retrieved resources will be automatically -saved to the chosen directory. Once they are stored locally, the -``retrieve_resource`` method will look for the resource information -first in the local storage before trying to download the information from -the API. - -.. code-block:: python - - dataset = api.retrieve_resource("dataset/5e8e5672c7736e3d830037b5", - query_string="limit=-1") - - -Alternatively, you can use the ``export`` method to explicitly -download the JSON information -that describes any of your resources in BigML to a particular file: - -.. code-block:: python - - api.export('model/5acea49a08b07e14b9001068', - filename="my_dir/my_model.json") - -This example downloads the JSON for the model and stores it in -the ``my_dir/my_model.json`` file. - -In the case of models that can be represented in a `PMML` syntax, the -export method can be used to produce the corresponding `PMML` file. - -.. code-block:: python - - api.export('model/5acea49a08b07e14b9001068', - filename="my_dir/my_model.pmml", - pmml=True) - -You can also retrieve the last resource with some previously given tag: - -.. code-block:: python - - api.export_last("foo", - resource_type="ensemble", - filename="my_dir/my_ensemble.json") - -which selects the last ensemble that has a ``foo`` tag. This mechanism can -be specially useful when retrieving retrained models that have been created -with a shared unique keyword as tag. - -For a descriptive overview of the steps that you will usually need to -follow to model -your data and obtain predictions, please see the `basic Workflow sketch -`_ -document. You can also check other simple examples in the following documents: - -- `model 101 <101_model.html>`_ -- `logistic regression 101 <101_logistic_regression.html>`_ -- `linear regression 101 <101_linear_regression.html>`_ -- `ensemble 101 <101_ensemble.html>`_ -- `cluster 101 <101_cluster>`_ -- `anomaly detector 101 <101_anomaly.html>`_ -- `association 101 <101_association.html>`_ -- `topic model 101 <101_topic_model.html>`_ -- `deepnet 101 <101_deepnet.html>`_ -- `time series 101 <101_ts.html>`_ -- `fusion 101 <101_fusion.html>`_ -- `optiml 101 <101_optiml.html>`_ -- `PCA 101 <101_pca.html>`_ -- `scripting 101 <101_scripting.html>`_ - -And for examples on Image Processing: - -- `Images Classification 101 <101_images_classification.html>`_ -- `Object Detection 101<101_object_detection.html>`_ -- `Images Feature Extraction 101 <101_images_feature_extraction.html>`_ - Fields Structure ---------------- @@ -974,6 +695,7 @@ Install the tools required to build the documentation: .. code-block:: bash $ pip install sphinx + $ pip install sphinx-rtd-theme To build the HTML version of the documentation: @@ -984,6 +706,17 @@ To build the HTML version of the documentation: Then launch ``docs/_build/html/index.html`` in your browser. + +Support +------- + +Please report problems and bugs to our `BigML.io issue +tracker `_. + +Discussions about the different bindings take place in the general +`BigML mailing list `_. + + Additional Information ---------------------- diff --git a/docs/ml_resources.rst b/docs/ml_resources.rst index 195fb193..45ba0020 100644 --- a/docs/ml_resources.rst +++ b/docs/ml_resources.rst @@ -4,9 +4,28 @@ ML Resources ============ +This section describes the resources available in the BigML API. When retrieved +with the corresponding bindings ``get_[resource_type]`` method, they will +some common attributes, like: + +- ``resource`` which contains their ID +- ``category`` which can be set to the list of categories as defined in the + API documentation. +- ``creator`` which refers to the creator username. + +To name some. + +Beside, every resource type will have different properties as required +by its nature, that can be checked in the +`API documentation +`_. Here's a list of the different +resource types and their associated structures and properties. + +Data Ingestion and Preparation +------------------------------ External Connectors -------------------- +~~~~~~~~~~~~~~~~~~~ The ``Externalconnector`` object is is an abstract resource that helps you create ``Sources`` from several external data sources @@ -19,10 +38,10 @@ a Machine Learning resource, but a helper to connect your data repos to BigML. "externalconnector/5e30b685e476845dd901df83") You can check the external connector properties at the `API documentation -`_. +`_. Source ------- +~~~~~~ The ``Source`` is the first resource that you build in BigML when uploading a file. BigML infers the structure of the file, whether it has headers or not, @@ -33,58 +52,58 @@ the ``Source`` information: >>> source = api.get_source("source/5e30b685e476845dd901df83") >>> api.pprint(source["object"]) - { u'category': 0, - u'charset': u'UTF-8', - u'code': 200, - u'configuration': None, - u'configuration_status': False, - u'content_type': u'text/plain;UTF-8', - u'created': u'2020-01-28T22:32:37.290000', - u'creator': u'mmartin', - u'credits': 0, - u'description': u'', - u'disable_datetime': False, - u'field_types': { u'categorical': 0, - u'datetime': 0, - u'items': 0, - u'numeric': 4, - u'text': 1, - u'total': 5}, - u'fields': { u'000000': { u'column_number': 0, - u'name': u'sepal length', - u'optype': u'numeric', - u'order': 0}, - u'000001': { u'column_number': 1, - u'name': u'sepal width', - u'optype': u'numeric', - u'order': 1}, - u'000002': { u'column_number': 2, - u'name': u'petal length', - u'optype': u'numeric', - u'order': 2}, - u'000003': { u'column_number': 3, - u'name': u'petal width', - u'optype': u'numeric', - u'order': 3}, - u'000004': { u'column_number': 4, - u'name': u'species', - u'optype': u'text', - u'order': 4, - u'term_analysis': { u'enabled': True}}}, - u'fields_meta': { u'count': 5, - u'image': 0, - u'limit': 1000, - u'offset': 0, - u'query_total': 5, - u'total': 5}, + { 'category': 0, + 'charset': 'UTF-8', + 'code': 200, + 'configuration': None, + 'configuration_status': False, + 'content_type': 'text/plain;UTF-8', + 'created': '2020-01-28T22:32:37.290000', + 'creator': 'mmartin', + 'credits': 0, + 'description': '', + 'disable_datetime': False, + 'field_types': { 'categorical': 0, + 'datetime': 0, + 'items': 0, + 'numeric': 4, + 'text': 1, + 'total': 5}, + 'fields': { '000000': { 'column_number': 0, + 'name': 'sepal length', + 'optype': 'numeric', + 'order': 0}, + '000001': { 'column_number': 1, + 'name': 'sepal width', + 'optype': 'numeric', + 'order': 1}, + '000002': { 'column_number': 2, + 'name': 'petal length', + 'optype': 'numeric', + 'order': 2}, + '000003': { 'column_number': 3, + 'name': 'petal width', + 'optype': 'numeric', + 'order': 3}, + '000004': { 'column_number': 4, + 'name': 'species', + 'optype': 'text', + 'order': 4, + 'term_analysis': { 'enabled': True}}}, + 'fields_meta': { 'count': 5, + 'image': 0, + 'limit': 1000, + 'offset': 0, + 'query_total': 5, + 'total': 5}, ... } You can check the source properties at the `API documentation -`_. +`_. Dataset -------- +~~~~~~~ If you want to get some basic statistics for each field you can retrieve the ``fields`` from the dataset as follows to get a dictionary keyed by @@ -94,32 +113,32 @@ field id: >>> dataset = api.get_dataset(dataset) >>> api.pprint(api.get_fields(dataset)) - { u'000000': { u'column_number': 0, - u'datatype': u'double', - u'name': u'sepal length', - u'optype': u'numeric', - u'summary': { u'maximum': 7.9, - u'median': 5.77889, - u'minimum': 4.3, - u'missing_count': 0, - u'population': 150, - u'splits': [ 4.51526, + { '000000': { 'column_number': 0, + 'datatype': 'double', + 'name': 'sepal length', + 'optype': 'numeric', + 'summary': { 'maximum': 7.9, + 'median': 5.77889, + 'minimum': 4.3, + 'missing_count': 0, + 'population': 150, + 'splits': [ 4.51526, 4.67252, 4.81113, [... snip ... ] - u'000004': { u'column_number': 4, - u'datatype': u'string', - u'name': u'species', - u'optype': u'categorical', - u'summary': { u'categories': [ [ u'Iris-versicolor', + '000004': { 'column_number': 4, + 'datatype': 'string', + 'name': 'species', + 'optype': 'categorical', + 'summary': { 'categories': [ [ 'Iris-versicolor', 50], - [u'Iris-setosa', 50], - [ u'Iris-virginica', + ['Iris-setosa', 50], + [ 'Iris-virginica', 50]], - u'missing_count': 0}}} + 'missing_count': 0}}} The field filtering options are also available using a query string expression, @@ -132,965 +151,422 @@ for instance: limits the number of fields that will be included in ``dataset`` to 20. You can check the dataset properties at the `API documentation -`_. +`_. -Model ------ - -One of the greatest things about BigML is that the models that it -generates for you are fully white-boxed. To get the explicit tree-like -predictive model for the example above: +Samples +~~~~~~~ -.. code-block:: python +To provide quick access to your row data you can create a ``sample``. Samples +are in-memory objects that can be queried for subsets of data by limiting +their size, the fields or the rows returned. The structure of a sample would +be: - >>> model = api.get_model(model) - >>> api.pprint(model['object']['model']['root']) - {u'children': [ - {u'children': [ - {u'children': [{u'count': 38, - u'distribution': [[u'Iris-virginica', 38]], - u'output': u'Iris-virginica', - u'predicate': {u'field': u'000002', - u'operator': u'>', - u'value': 5.05}}, - u'children': [ - [ ... ] +.. code-block:: python - {u'count': 50, - u'distribution': [[u'Iris-setosa', 50]], - u'output': u'Iris-setosa', - u'predicate': {u'field': u'000002', - u'operator': u'<=', - u'value': 2.45}}]}, - {u'count': 150, - u'distribution': [[u'Iris-virginica', 50], - [u'Iris-versicolor', 50], - [u'Iris-setosa', 50]], - u'output': u'Iris-virginica', - u'predicate': True}]}}} + >>> from bigml.api import BigML + >>> api = BigML() + >>> sample = api.create_sample('dataset/55b7a6749841fa2500000d41', + {"max_rows": 150}) + >>> api.ok(sample) + >>> api.pprint(sample['object']) + { + "category": 0, + "code": 201, + "columns": 0, + "configuration": null, + "configuration_status": false, + "created": "2021-03-02T14:32:59.603699", + "creator": "alfred", + "dataset": "dataset/603e20a91f386f43db000004", + "dataset_status": true, + "description": "", + "excluded_fields": [], + "fields_meta": { + "count": 0, + "limit": 1000, + "offset": 0, + "total": 0 + }, + "input_fields": [ + "000000", + "000001", + "000002", + "000003", + "000004" + ], + "locale": "en_US", + "max_columns": 0, + "max_rows": 150, + "name": "iris", + "name_options": "", + "private": true, + "project": null, + "resource": "sample/603e4c9b1f386fdea6000000", + "rows": 0, + "seed": "d1dc0a2819344a079af521507b7e7ea8", + "shared": false, + "size": 4608, + "status": { + "code": 1, + "message": "The sample creation request has been queued and will be processed soon", + "progress": 0 + }, + "subscription": true, + "tags": [], + "type": 0, + "updated": "2021-03-02T14:32:59.603751" + } -(Note that we have abbreviated the output in the snippet above for -readability: the full predictive model you'll get is going to contain -much more details). -Again, filtering options are also available using a query string expression, -for instance: +Samples are not permanent objects. Once they are created, they will be +available as long as GETs are requested within periods smaller than +a pre-established TTL (Time to Live). The expiration timer of a sample is +reset every time a new GET is received. -.. code-block:: python +If requested, a sample can also perform linear regression and compute +Pearson's and Spearman's correlations for either one numeric field +against all other numeric fields or between two specific numeric fields. - >>> model = api.get_model(model, "limit=5") +You can check the sample properties at the `API documentation +`_. -limits the number of fields that will be included in ``model`` to 5. +Correlations +~~~~~~~~~~~~ -You can check the model properties at the `API documentation -`_. +A ``correlation`` resource contains a series of computations that reflect the +degree of dependence between the field set as objective for your predictions +and the rest of fields in your dataset. The dependence degree is obtained by +comparing the distributions in every objective and non-objective field pair, +as independent fields should have probabilistic +independent distributions. Depending on the types of the fields to compare, +the metrics used to compute the correlation degree will be: -Evaluation ----------- +- for numeric to numeric pairs: + `Pearson's `_ + and `Spearman's correlation `_ + coefficients. +- for numeric to categorical pairs: + `One-way Analysis of Variance `_, with the + categorical field as the predictor variable. +- for categorical to categorical pairs: + `contingency table (or two-way table) `_, + `Chi-square test of independence `_ + , and `Cramer's V `_ + and `Tschuprow's T `_ coefficients. -The predictive performance of a model can be measured using many different -measures. In BigML these measures can be obtained by creating evaluations. To -create an evaluation you need the id of the model you are evaluating and the id -of the dataset that contains the data to be tested with. The result is shown -as: +An example of the correlation resource JSON structure is: .. code-block:: python - >>> evaluation = api.get_evaluation(evaluation) - >>> api.pprint(evaluation['object']['result']) - { 'class_names': ['0', '1'], - 'mode': { 'accuracy': 0.9802, - 'average_f_measure': 0.495, - 'average_phi': 0, - 'average_precision': 0.5, - 'average_recall': 0.4901, - 'confusion_matrix': [[99, 0], [2, 0]], - 'per_class_statistics': [ { 'accuracy': 0.9801980198019802, - 'class_name': '0', - 'f_measure': 0.99, - 'phi_coefficient': 0, - 'precision': 1.0, - 'present_in_test_data': True, - 'recall': 0.9801980198019802}, - { 'accuracy': 0.9801980198019802, - 'class_name': '1', - 'f_measure': 0, - 'phi_coefficient': 0, - 'precision': 0.0, - 'present_in_test_data': True, - 'recall': 0}]}, - 'model': { 'accuracy': 0.9901, - 'average_f_measure': 0.89746, - 'average_phi': 0.81236, - 'average_precision': 0.99495, - 'average_recall': 0.83333, - 'confusion_matrix': [[98, 1], [0, 2]], - 'per_class_statistics': [ { 'accuracy': 0.9900990099009901, - 'class_name': '0', - 'f_measure': 0.9949238578680203, - 'phi_coefficient': 0.8123623944599232, - 'precision': 0.98989898989899, - 'present_in_test_data': True, - 'recall': 1.0}, - { 'accuracy': 0.9900990099009901, - 'class_name': '1', - 'f_measure': 0.8, - 'phi_coefficient': 0.8123623944599232, - 'precision': 1.0, - 'present_in_test_data': True, - 'recall': 0.6666666666666666}]}, - 'random': { 'accuracy': 0.50495, - 'average_f_measure': 0.36812, - 'average_phi': 0.13797, - 'average_precision': 0.74747, - 'average_recall': 0.51923, - 'confusion_matrix': [[49, 50], [0, 2]], - 'per_class_statistics': [ { 'accuracy': 0.504950495049505, - 'class_name': '0', - 'f_measure': 0.6621621621621622, - 'phi_coefficient': 0.1379728923974526, - 'precision': 0.494949494949495, - 'present_in_test_data': True, - 'recall': 1.0}, - { 'accuracy': 0.504950495049505, - 'class_name': '1', - 'f_measure': 0.07407407407407407, - 'phi_coefficient': 0.1379728923974526, - 'precision': 1.0, - 'present_in_test_data': True, - 'recall': 0.038461538461538464}]}} + >>> from bigml.api import BigML + >>> api = BigML() + >>> correlation = api.create_correlation('dataset/55b7a6749841fa2500000d41') + >>> api.ok(correlation) + >>> api.pprint(correlation['object']) + { 'category': 0, + 'clones': 0, + 'code': 200, + 'columns': 5, + 'correlations': { 'correlations': [ { 'name': 'one_way_anova', + 'result': { '000000': { 'eta_square': 0.61871, + 'f_ratio': 119.2645, + 'p_value': 0, + 'significant': [ True, + True, + True]}, + '000001': { 'eta_square': 0.40078, + 'f_ratio': 49.16004, + 'p_value': 0, + 'significant': [ True, + True, + True]}, + '000002': { 'eta_square': 0.94137, + 'f_ratio': 1180.16118, + 'p_value': 0, + 'significant': [ True, + True, + True]}, + '000003': { 'eta_square': 0.92888, + 'f_ratio': 960.00715, + 'p_value': 0, + 'significant': [ True, + True, + True]}}}], + 'fields': { '000000': { 'column_number': 0, + 'datatype': 'double', + 'idx': 0, + 'name': 'sepal length', + 'optype': 'numeric', + 'order': 0, + 'preferred': True, + 'summary': { 'bins': [ [ 4.3, + 1], + [ 4.425, + 4], + ... + [ 7.9, + 1]], + 'kurtosis': -0.57357, + 'maximum': 7.9, + 'mean': 5.84333, + 'median': 5.8, + 'minimum': 4.3, + 'missing_count': 0, + 'population': 150, + 'skewness': 0.31175, + 'splits': [ 4.51526, + 4.67252, + 4.81113, + 4.89582, + 4.96139, + 5.01131, + ... + 6.92597, + 7.20423, + 7.64746], + 'standard_deviation': 0.82807, + 'sum': 876.5, + 'sum_squares': 5223.85, + 'variance': 0.68569}}, + '000001': { 'column_number': 1, + 'datatype': 'double', + 'idx': 1, + 'name': 'sepal width', + 'optype': 'numeric', + 'order': 1, + 'preferred': True, + 'summary': { 'counts': [ [ 2, + 1], + [ 2.2, + ... + '000004': { 'column_number': 4, + 'datatype': 'string', + 'idx': 4, + 'name': 'species', + 'optype': 'categorical', + 'order': 4, + 'preferred': True, + 'summary': { 'categories': [ [ 'Iris-setosa', + 50], + [ 'Iris-versicolor', + 50], + [ 'Iris-virginica', + 50]], + 'missing_count': 0}, + 'term_analysis': { 'enabled': True}}}, + 'significance_levels': [0.01, 0.05, 0.1]}, + 'created': '2015-07-28T18:07:37.010000', + 'credits': 0.017581939697265625, + 'dataset': 'dataset/55b7a6749841fa2500000d41', + 'dataset_status': True, + 'dataset_type': 0, + 'description': '', + 'excluded_fields': [], + 'fields_meta': { 'count': 5, + 'limit': 1000, + 'offset': 0, + 'query_total': 5, + 'total': 5}, + 'input_fields': ['000000', '000001', '000002', '000003'], + 'locale': 'en_US', + 'max_columns': 5, + 'max_rows': 150, + 'name': u"iris' dataset correlation", + 'objective_field_details': { 'column_number': 4, + 'datatype': 'string', + 'name': 'species', + 'optype': 'categorical', + 'order': 4}, + 'out_of_bag': False, + 'price': 0.0, + 'private': True, + 'project': None, + 'range': [1, 150], + 'replacement': False, + 'resource': 'correlation/55b7c4e99841fa24f20009bf', + 'rows': 150, + 'sample_rate': 1.0, + 'shared': False, + 'size': 4609, + 'source': 'source/55b7a6729841fa24f100036a', + 'source_status': True, + 'status': { 'code': 5, + 'elapsed': 274, + 'message': 'The correlation has been created', + 'progress': 1.0}, + 'subscription': True, + 'tags': [], + 'updated': '2015-07-28T18:07:49.057000', + 'white_box': False} -where two levels of detail are easily identified. For classifications, -the first level shows these keys: +Note that the output in the snippet above has been abbreviated. As you see, the +``correlations`` attribute contains the information about each field +correlation to the objective field. -- **class_names**: A list with the names of all the categories for the objective field (i.e., all the classes) -- **mode**: A detailed result object. Measures of the performance of the classifier that predicts the mode class for all the instances in the dataset -- **model**: A detailed result object. -- **random**: A detailed result object. Measures the performance of the classifier that predicts a random class for all the instances in the dataset. +You can check the correlations properties at the `API documentation +`_. -and the detailed result objects include ``accuracy``, ``average_f_measure``, ``average_phi``, -``average_precision``, ``average_recall``, ``confusion_matrix`` -and ``per_class_statistics``. -For regressions first level will contain these keys: +Statistical Tests +~~~~~~~~~~~~~~~~~ -- **mean**: A detailed result object. Measures the performance of the model that predicts the mean for all the instances in the dataset. -- **model**: A detailed result object. -- **random**: A detailed result object. Measures the performance of the model that predicts a random class for all the instances in the dataset. +A ``statisticaltest`` resource contains a series of tests +that compare the +distribution of data in each numeric field of a dataset +to certain canonical distributions, +such as the +`normal distribution `_ +or `Benford's law `_ +distribution. Statistical test are useful in tasks such as fraud, normality, +or outlier detection. -where the detailed result objects include ``mean_absolute_error``, -``mean_squared_error`` and ``r_squared`` (refer to -`developers documentation `_ for -more info on the meaning of these measures. +- Fraud Detection Tests: +Benford: This statistical test performs a comparison of the distribution of +first significant digits (FSDs) of each value of the field to the Benford's +law distribution. Benford's law applies to numerical distributions spanning +several orders of magnitude, such as the values found on financial balance +sheets. It states that the frequency distribution of leading, or first +significant digits (FSD) in such distributions is not uniform. +On the contrary, lower digits like 1 and 2 occur disproportionately +often as leading significant digits. The test compares the distribution +in the field to Bendford's distribution using a Chi-square goodness-of-fit +test, and Cho-Gaines d test. If a field has a dissimilar distribution, +it may contain anomalous or fraudulent values. -You can check the evaluation properties at the `API documentation -`_. +- Normality tests: +These tests can be used to confirm the assumption that the data in each field +of a dataset is distributed according to a normal distribution. The results +are relevant because many statistical and machine learning techniques rely on +this assumption. +Anderson-Darling: The Anderson-Darling test computes a test statistic based on +the difference between the observed cumulative distribution function (CDF) to +that of a normal distribution. A significant result indicates that the +assumption of normality is rejected. +Jarque-Bera: The Jarque-Bera test computes a test statistic based on the third +and fourth central moments (skewness and kurtosis) of the data. Again, a +significant result indicates that the normality assumption is rejected. +Z-score: For a given sample size, the maximum deviation from the mean that +would expected in a sampling of a normal distribution can be computed based +on the 68-95-99.7 rule. This test simply reports this expected deviation and +the actual deviation observed in the data, as a sort of sanity check. -Cluster -------- +- Outlier tests: +Grubbs: When the values of a field are normally distributed, a few values may +still deviate from the mean distribution. The outlier tests reports whether +at least one value in each numeric field differs significantly from the mean +using Grubb's test for outliers. If an outlier is found, then its value will +be returned. -For unsupervised learning problems, the cluster is used to classify in a -limited number of groups your training data. The cluster structure is defined -by the centers of each group of data, named centroids, and the data enclosed -in the group. As for in the model's case, the cluster is a white-box resource -and can be retrieved as a JSON: +The JSON structure for ``statisticaltest`` resources is similar to this one: .. code-block:: python - >>> cluster = api.get_cluster(cluster) - >>> api.pprint(cluster['object']) - { 'balance_fields': True, - 'category': 0, - 'cluster_datasets': { '000000': '', '000001': '', '000002': ''}, - 'cluster_datasets_ids': { '000000': '53739b9ae4b0dad82b0a65e6', - '000001': '53739b9ae4b0dad82b0a65e7', - '000002': '53739b9ae4b0dad82b0a65e8'}, - 'cluster_seed': '2c249dda00fbf54ab4cdd850532a584f286af5b6', - 'clusters': { 'clusters': [ { 'center': { '000000': 58.5, - '000001': 26.8314, - '000002': 44.27907, - '000003': 14.37209}, - 'count': 56, - 'distance': { 'bins': [ [ 0.69602, - 2], - [ ... ] - [ 3.77052, - 1]], - 'maximum': 3.77052, - 'mean': 1.61711, - 'median': 1.52146, - 'minimum': 0.69237, - 'population': 56, - 'standard_deviation': 0.6161, - 'sum': 90.55805, - 'sum_squares': 167.31926, - 'variance': 0.37958}, - 'id': '000000', - 'name': 'Cluster 0'}, - { 'center': { '000000': 50.06, - '000001': 34.28, - '000002': 14.62, - '000003': 2.46}, - 'count': 50, - 'distance': { 'bins': [ [ 0.16917, - 1], - [ ... ] - [ 4.94699, - 1]], - 'maximum': 4.94699, - 'mean': 1.50725, - 'median': 1.3393, - 'minimum': 0.16917, - 'population': 50, - 'standard_deviation': 1.00994, - 'sum': 75.36252, - 'sum_squares': 163.56918, - 'variance': 1.01998}, - 'id': '000001', - 'name': 'Cluster 1'}, - { 'center': { '000000': 68.15625, - '000001': 31.25781, - '000002': 55.48438, - '000003': 19.96875}, - 'count': 44, - 'distance': { 'bins': [ [ 0.36825, - 1], - [ ... ] - [ 3.87216, - 1]], - 'maximum': 3.87216, - 'mean': 1.67264, - 'median': 1.63705, - 'minimum': 0.36825, - 'population': 44, - 'standard_deviation': 0.78905, - 'sum': 73.59627, - 'sum_squares': 149.87194, - 'variance': 0.6226}, - 'id': '000002', - 'name': 'Cluster 2'}], - 'fields': { '000000': { 'column_number': 0, - 'datatype': 'int8', - 'name': 'sepal length', - 'optype': 'numeric', - 'order': 0, - 'preferred': True, - 'summary': { 'bins': [ [ 43.75, - 4], - [ ... ] - [ 79, - 1]], - 'maximum': 79, - 'mean': 58.43333, - 'median': 57.7889, - 'minimum': 43, - 'missing_count': 0, - 'population': 150, - 'splits': [ 45.15258, - 46.72525, - 72.04226, - 76.47461], - 'standard_deviation': 8.28066, - 'sum': 8765, - 'sum_squares': 522385, - 'variance': 68.56935}}, - [ ... ] - [ 25, - 3]], - 'maximum': 25, - 'mean': 11.99333, - 'median': 13.28483, - 'minimum': 1, - 'missing_count': 0, - 'population': 150, - 'standard_deviation': 7.62238, - 'sum': 1799, - 'sum_squares': 30233, - 'variance': 58.10063}}}}, - 'code': 202, - 'columns': 4, - 'created': '2014-05-14T16:36:40.993000', - 'credits': 0.017578125, - 'credits_per_prediction': 0.0, - 'dataset': 'dataset/53739b88c8db63122b000411', - 'dataset_field_types': { 'categorical': 1, - 'datetime': 0, - 'numeric': 4, - 'preferred': 5, - 'text': 0, - 'total': 5}, + >>> statistical_test = api.create_statistical_test('dataset/55b7a6749841fa2500000d41') + >>> api.ok(statistical_test) + True + >>> api.pprint(statistical_test['object']) + { 'category': 0, + 'clones': 0, + 'code': 200, + 'columns': 5, + 'created': '2015-07-28T18:16:40.582000', + 'credits': 0.017581939697265625, + 'dataset': 'dataset/55b7a6749841fa2500000d41', 'dataset_status': True, 'dataset_type': 0, 'description': '', - 'excluded_fields': ['000004'], - 'field_scales': None, - 'fields_meta': { 'count': 4, - 'limit': 1000, - 'offset': 0, - 'query_total': 4, - 'total': 4}, + 'excluded_fields': [], + 'fields_meta': { 'count': 5, + 'limit': 1000, + 'offset': 0, + 'query_total': 5, + 'total': 5}, 'input_fields': ['000000', '000001', '000002', '000003'], - 'k': 3, - 'locale': 'es-ES', + 'locale': 'en_US', 'max_columns': 5, 'max_rows': 150, - 'name': 'my iris', - 'number_of_batchcentroids': 0, - 'number_of_centroids': 0, - 'number_of_public_centroids': 0, + 'name': u"iris' dataset test", 'out_of_bag': False, 'price': 0.0, 'private': True, + 'project': None, 'range': [1, 150], 'replacement': False, - 'resource': 'cluster/53739b98d994972da7001de9', + 'resource': 'statisticaltest/55b7c7089841fa25000010ad', 'rows': 150, 'sample_rate': 1.0, - 'scales': { '000000': 0.22445382597655375, - '000001': 0.4264213814821549, - '000002': 0.10528680248949522, - '000003': 0.2438379900517961}, 'shared': False, - 'size': 4608, - 'source': 'source/53739b24d994972da7001ddd', + 'size': 4609, + 'source': 'source/55b7a6729841fa24f100036a', 'source_status': True, 'status': { 'code': 5, - 'elapsed': 1009, - 'message': 'The cluster has been created', - 'progress': 1.0}, + 'elapsed': 302, + 'message': 'The test has been created', + 'progress': 1.0}, 'subscription': True, 'tags': [], - 'updated': '2014-05-14T16:40:26.234728', - 'white_box': False} - -(Note that we have abbreviated the output in the snippet above for -readability: the full predictive cluster you'll get is going to contain -much more details). - -You can check the cluster properties at the `API documentation -`_. - -Anomaly detector ----------------- - -For anomaly detection problems, BigML anomaly detector uses iforest as an -unsupervised kind of model that detects anomalous data in a dataset. The -information it returns encloses a `top_anomalies` block -that contains a list of the most anomalous -points. For each, we capture a `score` from 0 to 1. The closer to 1, -the more anomalous. We also capture the `row` which gives values for -each field in the order defined by `input_fields`. Similarly we give -a list of `importances` which match the `row` values. These -importances tell us which values contributed most to the anomaly -score. Thus, the structure of an anomaly detector is similar to: - -.. code-block:: python - - { 'category': 0, - 'code': 200, - 'columns': 14, - 'constraints': False, - 'created': '2014-09-08T18:51:11.893000', - 'credits': 0.11653518676757812, - 'credits_per_prediction': 0.0, - 'dataset': 'dataset/540dfa9d9841fa5c88000765', - 'dataset_field_types': { 'categorical': 21, - 'datetime': 0, - 'numeric': 21, - 'preferred': 14, - 'text': 0, - 'total': 42}, - 'dataset_status': True, - 'dataset_type': 0, - 'description': '', - 'excluded_fields': [], - 'fields_meta': { 'count': 14, - 'limit': 1000, - 'offset': 0, - 'query_total': 14, - 'total': 14}, - 'forest_size': 128, - 'input_fields': [ '000004', - '000005', - '000009', - '000016', - '000017', - '000018', - '000019', - '00001e', - '00001f', - '000020', - '000023', - '000024', - '000025', - '000026'], - 'locale': 'en_US', - 'max_columns': 42, - 'max_rows': 200, - 'model': { 'fields': { '000004': { 'column_number': 4, - 'datatype': 'int16', - 'name': 'src_bytes', - 'optype': 'numeric', - 'order': 0, - 'preferred': True, - 'summary': { 'bins': [ [ 143, - 2], - ... - [ 370, - 2]], - 'maximum': 370, - 'mean': 248.235, - 'median': 234.57157, - 'minimum': 141, - 'missing_count': 0, - 'population': 200, - 'splits': [ 159.92462, - 173.73312, - 188, - ... - 339.55228], - 'standard_deviation': 49.39869, - 'sum': 49647, - 'sum_squares': 12809729, - 'variance': 2440.23093}}, - '000005': { 'column_number': 5, - 'datatype': 'int32', - 'name': 'dst_bytes', - 'optype': 'numeric', - 'order': 1, - 'preferred': True, - ... - 'sum': 1030851, - 'sum_squares': 22764504759, - 'variance': 87694652.45224}}, - '000009': { 'column_number': 9, - 'datatype': 'string', - 'name': 'hot', - 'optype': 'categorical', - 'order': 2, - 'preferred': True, - 'summary': { 'categories': [ [ '0', - 199], - [ '1', - 1]], - 'missing_count': 0}, - 'term_analysis': { 'enabled': True}}, - '000016': { 'column_number': 22, - 'datatype': 'int8', - 'name': 'count', - 'optype': 'numeric', - 'order': 3, - 'preferred': True, - ... - 'population': 200, - 'standard_deviation': 5.42421, - 'sum': 1351, - 'sum_squares': 14981, - 'variance': 29.42209}}, - '000017': { ... }}}, - 'kind': 'iforest', - 'mean_depth': 12.314174107142858, - 'top_anomalies': [ { 'importance': [ 0.06768, - 0.01667, - 0.00081, - 0.02437, - 0.04773, - 0.22197, - 0.18208, - 0.01868, - 0.11855, - 0.01983, - 0.01898, - 0.05306, - 0.20398, - 0.00562], - 'row': [ 183.0, - 8654.0, - '0', - 4.0, - 4.0, - 0.25, - 0.25, - 0.0, - 123.0, - 255.0, - 0.01, - 0.04, - 0.01, - 0.0], - 'score': 0.68782}, - { 'importance': [ 0.05645, - 0.02285, - 0.0015, - 0.05196, - 0.04435, - 0.0005, - 0.00056, - 0.18979, - 0.12402, - 0.23671, - 0.20723, - 0.05651, - 0.00144, - 0.00612], - 'row': [ 212.0, - 1940.0, - '0', - 1.0, - 2.0, - 0.0, - 0.0, - 1.0, - 1.0, - 69.0, - 1.0, - 0.04, - 0.0, - 0.0], - 'score': 0.6239}, - ...], - 'trees': [ { 'root': { 'children': [ { 'children': [ { 'children': [ { 'children': [ { 'children': - [ { 'population': 1, - 'predicates': [ { 'field': '00001f', - 'op': '>', - 'value': 35.54357}]}, - - ... - { 'population': 1, - 'predicates': [ { 'field': '00001f', - 'op': '<=', - 'value': 35.54357}]}], - 'population': 2, - 'predicates': [ { 'field': '000005', - 'op': '<=', - 'value': 1385.5166}]}], - 'population': 3, - 'predicates': [ { 'field': '000020', - 'op': '<=', - 'value': 65.14308}, - { 'field': '000019', - 'op': '=', - 'value': 0}]}], - 'population': 105, - 'predicates': [ { 'field': '000017', - 'op': '<=', - 'value': 13.21754}, - { 'field': '000009', - 'op': 'in', - 'value': [ '0']}]}], - 'population': 126, - 'predicates': [ True, - { 'field': '000018', - 'op': '=', - 'value': 0}]}, - 'training_mean_depth': 11.071428571428571}]}, - 'name': "tiny_kdd's dataset anomaly detector", - 'number_of_batchscores': 0, - 'number_of_public_predictions': 0, - 'number_of_scores': 0, - 'out_of_bag': False, - 'price': 0.0, - 'private': True, - 'project': None, - 'range': [1, 200], - 'replacement': False, - 'resource': 'anomaly/540dfa9f9841fa5c8800076a', - 'rows': 200, - 'sample_rate': 1.0, - 'sample_size': 126, - 'seed': 'BigML', - 'shared': False, - 'size': 30549, - 'source': 'source/540dfa979841fa5c7f000363', - 'source_status': True, - 'status': { 'code': 5, - 'elapsed': 32397, - 'message': 'The anomaly detector has been created', - 'progress': 1.0}, - 'subscription': False, - 'tags': [], - 'updated': '2014-09-08T23:54:28.647000', - 'white_box': False} - -Note that we have abbreviated the output in the snippet above for -readability: the full anomaly detector you'll get is going to contain -much more details). - -The `trees` list contains the actual isolation forest, and it can be quite -large usually. That's why, this part of the resource should only be included -in downloads when needed. If you are only interested in other properties, such -as `top_anomalies`, you'll improve performance by excluding it, using the -`excluded=trees` query string in the API call: - -.. code-block:: python - - anomaly = api.get_anomaly('anomaly/540dfa9f9841fa5c8800076a', \ - query_string='excluded=trees') - -Each node in an isolation tree can have multiple predicates. -For the node to be a valid branch when evaluated with a data point, all of its -predicates must be true. - -You can check the anomaly detector properties at the `API documentation -`_. - -Samples -------- - -To provide quick access to your row data you can create a ``sample``. Samples -are in-memory objects that can be queried for subsets of data by limiting -their size, the fields or the rows returned. The structure of a sample would -be:: - -Samples are not permanent objects. Once they are created, they will be -available as long as GETs are requested within periods smaller than -a pre-established TTL (Time to Live). The expiration timer of a sample is -reset every time a new GET is received. - -If requested, a sample can also perform linear regression and compute -Pearson's and Spearman's correlations for either one numeric field -against all other numeric fields or between two specific numeric fields. - -You can check the sample properties at the `API documentation -`_. - -Correlations ------------- - -A ``correlation`` resource contains a series of computations that reflect the -degree of dependence between the field set as objective for your predictions -and the rest of fields in your dataset. The dependence degree is obtained by -comparing the distributions in every objective and non-objective field pair, -as independent fields should have probabilistic -independent distributions. Depending on the types of the fields to compare, -the metrics used to compute the correlation degree will be: - -- for numeric to numeric pairs: - `Pearson's `_ - and `Spearman's correlation `_ - coefficients. -- for numeric to categorical pairs: - `One-way Analysis of Variance `_, with the - categorical field as the predictor variable. -- for categorical to categorical pairs: - `contingency table (or two-way table) `_, - `Chi-square test of independence `_ - , and `Cramer's V `_ - and `Tschuprow's T `_ coefficients. - -An example of the correlation resource JSON structure is: - -.. code-block:: python - - >>> from bigml.api import BigML - >>> api = BigML() - >>> correlation = api.create_correlation('dataset/55b7a6749841fa2500000d41') - >>> api.ok(correlation) - >>> api.pprint(correlation['object']) - { u'category': 0, - u'clones': 0, - u'code': 200, - u'columns': 5, - u'correlations': { u'correlations': [ { u'name': u'one_way_anova', - u'result': { u'000000': { u'eta_square': 0.61871, - u'f_ratio': 119.2645, - u'p_value': 0, - u'significant': [ True, - True, - True]}, - u'000001': { u'eta_square': 0.40078, - u'f_ratio': 49.16004, - u'p_value': 0, - u'significant': [ True, - True, - True]}, - u'000002': { u'eta_square': 0.94137, - u'f_ratio': 1180.16118, - u'p_value': 0, - u'significant': [ True, - True, - True]}, - u'000003': { u'eta_square': 0.92888, - u'f_ratio': 960.00715, - u'p_value': 0, - u'significant': [ True, - True, - True]}}}], - u'fields': { u'000000': { u'column_number': 0, - u'datatype': u'double', - u'idx': 0, - u'name': u'sepal length', - u'optype': u'numeric', - u'order': 0, - u'preferred': True, - u'summary': { u'bins': [ [ 4.3, - 1], - [ 4.425, - 4], - ... - [ 7.9, - 1]], - u'kurtosis': -0.57357, - u'maximum': 7.9, - u'mean': 5.84333, - u'median': 5.8, - u'minimum': 4.3, - u'missing_count': 0, - u'population': 150, - u'skewness': 0.31175, - u'splits': [ 4.51526, - 4.67252, - 4.81113, - 4.89582, - 4.96139, - 5.01131, - ... - 6.92597, - 7.20423, - 7.64746], - u'standard_deviation': 0.82807, - u'sum': 876.5, - u'sum_squares': 5223.85, - u'variance': 0.68569}}, - u'000001': { u'column_number': 1, - u'datatype': u'double', - u'idx': 1, - u'name': u'sepal width', - u'optype': u'numeric', - u'order': 1, - u'preferred': True, - u'summary': { u'counts': [ [ 2, - 1], - [ 2.2, - ... - u'000004': { u'column_number': 4, - u'datatype': u'string', - u'idx': 4, - u'name': u'species', - u'optype': u'categorical', - u'order': 4, - u'preferred': True, - u'summary': { u'categories': [ [ u'Iris-setosa', - 50], - [ u'Iris-versicolor', - 50], - [ u'Iris-virginica', - 50]], - u'missing_count': 0}, - u'term_analysis': { u'enabled': True}}}, - u'significance_levels': [0.01, 0.05, 0.1]}, - u'created': u'2015-07-28T18:07:37.010000', - u'credits': 0.017581939697265625, - u'dataset': u'dataset/55b7a6749841fa2500000d41', - u'dataset_status': True, - u'dataset_type': 0, - u'description': u'', - u'excluded_fields': [], - u'fields_meta': { u'count': 5, - u'limit': 1000, - u'offset': 0, - u'query_total': 5, - u'total': 5}, - u'input_fields': [u'000000', u'000001', u'000002', u'000003'], - u'locale': u'en_US', - u'max_columns': 5, - u'max_rows': 150, - u'name': u"iris' dataset correlation", - u'objective_field_details': { u'column_number': 4, - u'datatype': u'string', - u'name': u'species', - u'optype': u'categorical', - u'order': 4}, - u'out_of_bag': False, - u'price': 0.0, - u'private': True, - u'project': None, - u'range': [1, 150], - u'replacement': False, - u'resource': u'correlation/55b7c4e99841fa24f20009bf', - u'rows': 150, - u'sample_rate': 1.0, - u'shared': False, - u'size': 4609, - u'source': u'source/55b7a6729841fa24f100036a', - u'source_status': True, - u'status': { u'code': 5, - u'elapsed': 274, - u'message': u'The correlation has been created', - u'progress': 1.0}, - u'subscription': True, - u'tags': [], - u'updated': u'2015-07-28T18:07:49.057000', - u'white_box': False} - -Note that the output in the snippet above has been abbreviated. As you see, the -``correlations`` attribute contains the information about each field -correlation to the objective field. - -You can check the correlations properties at the `API documentation -`_. - - -Statistical Tests ------------------ - -A ``statisticaltest`` resource contains a series of tests -that compare the -distribution of data in each numeric field of a dataset -to certain canonical distributions, -such as the -`normal distribution `_ -or `Benford's law `_ -distribution. Statistical test are useful in tasks such as fraud, normality, -or outlier detection. - -- Fraud Detection Tests: -Benford: This statistical test performs a comparison of the distribution of -first significant digits (FSDs) of each value of the field to the Benford's -law distribution. Benford's law applies to numerical distributions spanning -several orders of magnitude, such as the values found on financial balance -sheets. It states that the frequency distribution of leading, or first -significant digits (FSD) in such distributions is not uniform. -On the contrary, lower digits like 1 and 2 occur disproportionately -often as leading significant digits. The test compares the distribution -in the field to Bendford's distribution using a Chi-square goodness-of-fit -test, and Cho-Gaines d test. If a field has a dissimilar distribution, -it may contain anomalous or fraudulent values. - -- Normality tests: -These tests can be used to confirm the assumption that the data in each field -of a dataset is distributed according to a normal distribution. The results -are relevant because many statistical and machine learning techniques rely on -this assumption. -Anderson-Darling: The Anderson-Darling test computes a test statistic based on -the difference between the observed cumulative distribution function (CDF) to -that of a normal distribution. A significant result indicates that the -assumption of normality is rejected. -Jarque-Bera: The Jarque-Bera test computes a test statistic based on the third -and fourth central moments (skewness and kurtosis) of the data. Again, a -significant result indicates that the normality assumption is rejected. -Z-score: For a given sample size, the maximum deviation from the mean that -would expected in a sampling of a normal distribution can be computed based -on the 68-95-99.7 rule. This test simply reports this expected deviation and -the actual deviation observed in the data, as a sort of sanity check. - -- Outlier tests: -Grubbs: When the values of a field are normally distributed, a few values may -still deviate from the mean distribution. The outlier tests reports whether -at least one value in each numeric field differs significantly from the mean -using Grubb's test for outliers. If an outlier is found, then its value will -be returned. - -The JSON structure for ``statisticaltest`` resources is similar to this one: - -.. code-block:: python - - >>> statistical_test = api.create_statistical_test('dataset/55b7a6749841fa2500000d41') - >>> api.ok(statistical_test) - True - >>> api.pprint(statistical_test['object']) - { u'category': 0, - u'clones': 0, - u'code': 200, - u'columns': 5, - u'created': u'2015-07-28T18:16:40.582000', - u'credits': 0.017581939697265625, - u'dataset': u'dataset/55b7a6749841fa2500000d41', - u'dataset_status': True, - u'dataset_type': 0, - u'description': u'', - u'excluded_fields': [], - u'fields_meta': { u'count': 5, - u'limit': 1000, - u'offset': 0, - u'query_total': 5, - u'total': 5}, - u'input_fields': [u'000000', u'000001', u'000002', u'000003'], - u'locale': u'en_US', - u'max_columns': 5, - u'max_rows': 150, - u'name': u"iris' dataset test", - u'out_of_bag': False, - u'price': 0.0, - u'private': True, - u'project': None, - u'range': [1, 150], - u'replacement': False, - u'resource': u'statisticaltest/55b7c7089841fa25000010ad', - u'rows': 150, - u'sample_rate': 1.0, - u'shared': False, - u'size': 4609, - u'source': u'source/55b7a6729841fa24f100036a', - u'source_status': True, - u'status': { u'code': 5, - u'elapsed': 302, - u'message': u'The test has been created', - u'progress': 1.0}, - u'subscription': True, - u'tags': [], - u'statistical_tests': { u'ad_sample_size': 1024, - u'fields': { u'000000': { u'column_number': 0, - u'datatype': u'double', - u'idx': 0, - u'name': u'sepal length', - u'optype': u'numeric', - u'order': 0, - u'preferred': True, - u'summary': { u'bins': [ [ 4.3, - 1], - [ 4.425, - 4], + 'statistical_tests': { 'ad_sample_size': 1024, + 'fields': { '000000': { 'column_number': 0, + 'datatype': 'double', + 'idx': 0, + 'name': 'sepal length', + 'optype': 'numeric', + 'order': 0, + 'preferred': True, + 'summary': { 'bins': [ [ 4.3, + 1], + [ 4.425, + 4], ... [ 7.9, 1]], - u'kurtosis': -0.57357, - u'maximum': 7.9, - u'mean': 5.84333, - u'median': 5.8, - u'minimum': 4.3, - u'missing_count': 0, - u'population': 150, - u'skewness': 0.31175, - u'splits': [ 4.51526, + 'kurtosis': -0.57357, + 'maximum': 7.9, + 'mean': 5.84333, + 'median': 5.8, + 'minimum': 4.3, + 'missing_count': 0, + 'population': 150, + 'skewness': 0.31175, + 'splits': [ 4.51526, 4.67252, 4.81113, 4.89582, ... 7.20423, 7.64746], - u'standard_deviation': 0.82807, - u'sum': 876.5, - u'sum_squares': 5223.85, - u'variance': 0.68569}}, + 'standard_deviation': 0.82807, + 'sum': 876.5, + 'sum_squares': 5223.85, + 'variance': 0.68569}}, ... - u'000004': { u'column_number': 4, - u'datatype': u'string', - u'idx': 4, - u'name': u'species', - u'optype': u'categorical', - u'order': 4, - u'preferred': True, - u'summary': { u'categories': [ [ u'Iris-setosa', + '000004': { 'column_number': 4, + 'datatype': 'string', + 'idx': 4, + 'name': 'species', + 'optype': 'categorical', + 'order': 4, + 'preferred': True, + 'summary': { 'categories': [ [ 'Iris-setosa', 50], - [ u'Iris-versicolor', + [ 'Iris-versicolor', 50], - [ u'Iris-virginica', + [ 'Iris-virginica', 50]], - u'missing_count': 0}, - u'term_analysis': { u'enabled': True}}}, - u'fraud': [ { u'name': u'benford', - u'result': { u'000000': { u'chi_square': { u'chi_square_value': 506.39302, - u'p_value': 0, - u'significant': [ True, + 'missing_count': 0}, + 'term_analysis': { 'enabled': True}}}, + 'fraud': [ { 'name': 'benford', + 'result': { '000000': { 'chi_square': { 'chi_square_value': 506.39302, + 'p_value': 0, + 'significant': [ True, True, True]}, - u'cho_gaines': { u'd_statistic': 7.124311073683573, - u'significant': [ True, + 'cho_gaines': { 'd_statistic': 7.124311073683573, + 'significant': [ True, True, True]}, - u'distribution': [ 0, + 'distribution': [ 0, 0, 0, 22, @@ -1099,18 +575,18 @@ The JSON structure for ``statisticaltest`` resources is similar to this one: 13, 0, 0], - u'negatives': 0, - u'zeros': 0}, - u'000001': { u'chi_square': { u'chi_square_value': 396.76556, - u'p_value': 0, - u'significant': [ True, + 'negatives': 0, + 'zeros': 0}, + '000001': { 'chi_square': { 'chi_square_value': 396.76556, + 'p_value': 0, + 'significant': [ True, True, True]}, - u'cho_gaines': { u'd_statistic': 7.503503138331123, - u'significant': [ True, + 'cho_gaines': { 'd_statistic': 7.503503138331123, + 'significant': [ True, True, True]}, - u'distribution': [ 0, + 'distribution': [ 0, 57, 89, 4, @@ -1119,18 +595,18 @@ The JSON structure for ``statisticaltest`` resources is similar to this one: 0, 0, 0], - u'negatives': 0, - u'zeros': 0}, - u'000002': { u'chi_square': { u'chi_square_value': 154.20728, - u'p_value': 0, - u'significant': [ True, + 'negatives': 0, + 'zeros': 0}, + '000002': { 'chi_square': { 'chi_square_value': 154.20728, + 'p_value': 0, + 'significant': [ True, True, True]}, - u'cho_gaines': { u'd_statistic': 3.9229974017266054, - u'significant': [ True, + 'cho_gaines': { 'd_statistic': 3.9229974017266054, + 'significant': [ True, True, True]}, - u'distribution': [ 50, + 'distribution': [ 50, 0, 11, 43, @@ -1139,18 +615,18 @@ The JSON structure for ``statisticaltest`` resources is similar to this one: 0, 0, 0], - u'negatives': 0, - u'zeros': 0}, - u'000003': { u'chi_square': { u'chi_square_value': 111.4438, - u'p_value': 0, - u'significant': [ True, + 'negatives': 0, + 'zeros': 0}, + '000003': { 'chi_square': { 'chi_square_value': 111.4438, + 'p_value': 0, + 'significant': [ True, True, True]}, - u'cho_gaines': { u'd_statistic': 4.103257341299901, - u'significant': [ True, + 'cho_gaines': { 'd_statistic': 4.103257341299901, + 'significant': [ True, True, True]}, - u'distribution': [ 76, + 'distribution': [ 76, 58, 7, 7, @@ -1159,71 +635,71 @@ The JSON structure for ``statisticaltest`` resources is similar to this one: 0, 0, 0], - u'negatives': 0, - u'zeros': 0}}}], - u'normality': [ { u'name': u'anderson_darling', - u'result': { u'000000': { u'p_value': 0.02252, - u'significant': [ False, + 'negatives': 0, + 'zeros': 0}}}], + 'normality': [ { 'name': 'anderson_darling', + 'result': { '000000': { 'p_value': 0.02252, + 'significant': [ False, True, True]}, - u'000001': { u'p_value': 0.02023, - u'significant': [ False, + '000001': { 'p_value': 0.02023, + 'significant': [ False, True, True]}, - u'000002': { u'p_value': 0, - u'significant': [ True, + '000002': { 'p_value': 0, + 'significant': [ True, True, True]}, - u'000003': { u'p_value': 0, - u'significant': [ True, + '000003': { 'p_value': 0, + 'significant': [ True, True, True]}}}, - { u'name': u'jarque_bera', - u'result': { u'000000': { u'p_value': 0.10615, - u'significant': [ False, + { 'name': 'jarque_bera', + 'result': { '000000': { 'p_value': 0.10615, + 'significant': [ False, False, False]}, - u'000001': { u'p_value': 0.25957, - u'significant': [ False, + '000001': { 'p_value': 0.25957, + 'significant': [ False, False, False]}, - u'000002': { u'p_value': 0.0009, - u'significant': [ True, + '000002': { 'p_value': 0.0009, + 'significant': [ True, True, True]}, - u'000003': { u'p_value': 0.00332, - u'significant': [ True, + '000003': { 'p_value': 0.00332, + 'significant': [ True, True, True]}}}, - { u'name': u'z_score', - u'result': { u'000000': { u'expected_max_z': 2.71305, - u'max_z': 2.48369}, - u'000001': { u'expected_max_z': 2.71305, - u'max_z': 3.08044}, - u'000002': { u'expected_max_z': 2.71305, - u'max_z': 1.77987}, - u'000003': { u'expected_max_z': 2.71305, - u'max_z': 1.70638}}}], - u'outliers': [ { u'name': u'grubbs', - u'result': { u'000000': { u'p_value': 1, - u'significant': [ False, + { 'name': 'z_score', + 'result': { '000000': { 'expected_max_z': 2.71305, + 'max_z': 2.48369}, + '000001': { 'expected_max_z': 2.71305, + 'max_z': 3.08044}, + '000002': { 'expected_max_z': 2.71305, + 'max_z': 1.77987}, + '000003': { 'expected_max_z': 2.71305, + 'max_z': 1.70638}}}], + 'outliers': [ { 'name': 'grubbs', + 'result': { '000000': { 'p_value': 1, + 'significant': [ False, False, False]}, - u'000001': { u'p_value': 0.26555, - u'significant': [ False, + '000001': { 'p_value': 0.26555, + 'significant': [ False, False, False]}, - u'000002': { u'p_value': 1, - u'significant': [ False, + '000002': { 'p_value': 1, + 'significant': [ False, False, False]}, - u'000003': { u'p_value': 1, - u'significant': [ False, + '000003': { 'p_value': 1, + 'significant': [ False, False, False]}}}], - u'significance_levels': [0.01, 0.05, 0.1]}, - u'updated': u'2015-07-28T18:17:11.829000', - u'white_box': False} + 'significance_levels': [0.01, 0.05, 0.1]}, + 'updated': '2015-07-28T18:17:11.829000', + 'white_box': False} Note that the output in the snippet above has been abbreviated. As you see, the ``statistical_tests`` attribute contains the ``fraud`, ``normality`` @@ -1231,10 +707,236 @@ and ``outliers`` sections where the information for each field's distribution is stored. You can check the statistical tests properties at the `API documentation -`_. +`_. + + +Supervised Models +----------------- + +Model +~~~~~ + +One of the greatest things about BigML is that the models that it +generates for you are fully white-boxed. To get the explicit tree-like +predictive model for the example above: + +.. code-block:: python + + >>> model = api.get_model(model) + >>> api.pprint(model['object']['model']['root']) + {'children': [ + {'children': [ + {'children': [{'count': 38, + 'distribution': [['Iris-virginica', 38]], + 'output': 'Iris-virginica', + 'predicate': {'field': '000002', + 'operator': '>', + 'value': 5.05}}, + 'children': [ + + [ ... ] + + {'count': 50, + 'distribution': [['Iris-setosa', 50]], + 'output': 'Iris-setosa', + 'predicate': {'field': '000002', + 'operator': '<=', + 'value': 2.45}}]}, + {'count': 150, + 'distribution': [['Iris-virginica', 50], + ['Iris-versicolor', 50], + ['Iris-setosa', 50]], + 'output': 'Iris-virginica', + 'predicate': True}]}}} + +(Note that we have abbreviated the output in the snippet above for +readability: the full predictive model yo'll get is going to contain +much more details). + +Again, filtering options are also available using a query string expression, +for instance: + +.. code-block:: python + + >>> model = api.get_model(model, "limit=5") + +limits the number of fields that will be included in ``model`` to 5. + +You can check the model properties at the `API documentation +`_. + + +Linear Regressions +~~~~~~~~~~~~~~~~~~ + +A linear regression is a supervised machine learning method for +solving regression problems by computing the objective as a linear +combination of factors. The implementation is a multiple linear regression +that models the output as a linear combination of the predictors. +The coefficients are estimated doing a least-squares fit on the training data. + +As a linear combination can only be done using numeric values, non-numeric +fields need to be transformed to numeric ones following some rules: + +- Categorical fields will be encoded and each class appearance in input data + will convey a different contribution to the input vector. +- Text and items fields will be expanded to several numeric predictors, + each one indicating the number of occurences for a specific term. + Text fields without term analysis are excluded from the model. + +Therefore, the initial input data is transformed into an input vector with one +or may components per field. Also, if a field in the training data contains +missing data, the components corresponding to that field will include an +additional 1 or 0 value depending on whether the field is missing in the +input data or not. + +The JSON structure for a linear regression is: + +.. code-block:: python + + >>> api.pprint(linear_regression["object"]) + { 'category': 0, + 'code': 200, + 'columns': 4, + 'composites': None, + 'configuration': None, + 'configuration_status': False, + 'created': '2019-02-20T21:02:40.027000', + 'creator': 'merce', + 'credits': 0.0, + 'credits_per_prediction': 0.0, + 'dataset': 'dataset/5c6dc06a983efc18e2000084', + 'dataset_field_types': { 'categorical': 0, + 'datetime': 0, + 'items': 0, + 'numeric': 6, + 'preferred': 6, + 'text': 0, + 'total': 6}, + 'dataset_status': True, + 'datasets': [], + 'default_numeric_value': None, + 'description': '', + 'excluded_fields': [], + 'execution_id': None, + 'execution_status': None, + 'fields_maps': None, + 'fields_meta': { 'count': 4, + 'limit': 1000, + 'offset': 0, + 'query_total': 4, + 'total': 4}, + 'fusions': None, + 'input_fields': ['000000', '000001', '000002'], + 'linear_regression': { 'bias': True, + 'coefficients': [ [-1.88196], + [0.475633], + [0.122468], + [30.9141]], + 'fields': { '000000': { 'column_number': 0, + 'datatype': 'int8', + 'name': 'Prefix', + 'optype': 'numeric', + 'order': 0, + 'preferred': True, + 'summary': { 'counts': [ [ 4, + 1], + + ... + 'stats': { 'confidence_intervals': [ [ 5.63628], + [ 0.375062], + [ 0.348577], + [ 44.4112]], + 'mean_squared_error': 342.206, + 'number_of_parameters': 4, + 'number_of_samples': 77, + 'p_values': [ [0.512831], + [0.0129362], + [0.491069], + [0.172471]], + 'r_squared': 0.136672, + 'standard_errors': [ [ 2.87571], + [ 0.191361], + [ 0.177849], + [ 22.6592]], + 'sum_squared_errors': 24981, + 'xtx': [ [ 4242, + 48396.9, + 51273.97, + 568], + [ 48396.9, + 570177.6584, + 594274.3274, + 6550.52], + [ 51273.97, + 594274.3274, + 635452.7068, + 6894.24], + [ 568, + 6550.52, + 6894.24, + 77]], + 'z_scores': [ [-0.654436], + [2.48552], + [0.688609], + [1.36431]]}}, + 'locale': 'en_US', + 'max_columns': 6, + 'max_rows': 80, + 'name': 'grades', + 'name_options': 'bias', + 'number_of_batchpredictions': 0, + 'number_of_evaluations': 0, + 'number_of_predictions': 2, + 'number_of_public_predictions': 0, + 'objective_field': '000005', + 'objective_field_name': 'Final', + 'objective_field_type': 'numeric', + 'objective_fields': ['000005'], + 'operating_point': { }, + 'optiml': None, + 'optiml_status': False, + 'ordering': 0, + 'out_of_bag': False, + 'out_of_bags': None, + 'price': 0.0, + 'private': True, + 'project': 'project/5c6dc062983efc18d5000129', + 'range': None, + 'ranges': None, + 'replacement': False, + 'replacements': None, + 'resource': 'linearregression/5c6dc070983efc18e00001f1', + 'rows': 80, + 'sample_rate': 1.0, + 'sample_rates': None, + 'seed': None, + 'seeds': None, + 'shared': False, + 'size': 2691, + 'source': 'source/5c6dc064983efc18e00001ed', + 'source_status': True, + 'status': { 'code': 5, + 'elapsed': 62086, + 'message': 'The linear regression has been created', + 'progress': 1}, + 'subscription': True, + 'tags': [], + 'type': 0, + 'updated': '2019-02-27T18:01:18.539000', + 'user_metadata': { }, + 'webhook': None, + 'weight_field': None, + 'white_box': False} + +Note that the output in the snippet above has been abbreviated. As you see, +the ``linear_regression`` attribute stores the coefficients used in the +linear function as well as the configuration parameters described in +the `developers section `_ . + Logistic Regressions --------------------- +~~~~~~~~~~~~~~~~~~~~ A logistic regression is a supervised machine learning method for solving classification problems. Each of the classes in the field @@ -1257,59 +959,59 @@ The JSON structure for a logistic regression is: .. code-block:: python >>> api.pprint(logistic_regression['object']) - { u'balance_objective': False, - u'category': 0, - u'code': 200, - u'columns': 5, - u'created': u'2015-10-09T16:11:08.444000', - u'credits': 0.017581939697265625, - u'credits_per_prediction': 0.0, - u'dataset': u'dataset/561304f537203f4c930001ca', - u'dataset_field_types': { u'categorical': 1, - u'datetime': 0, - u'effective_fields': 5, - u'numeric': 4, - u'preferred': 5, - u'text': 0, - u'total': 5}, - u'dataset_status': True, - u'description': u'', - u'excluded_fields': [], - u'fields_meta': { u'count': 5, - u'limit': 1000, - u'offset': 0, - u'query_total': 5, - u'total': 5}, - u'input_fields': [u'000000', u'000001', u'000002', u'000003'], - u'locale': u'en_US', - u'logistic_regression': { u'bias': 1, - u'c': 1, - u'coefficients': [ [ u'Iris-virginica', + { 'balance_objective': False, + 'category': 0, + 'code': 200, + 'columns': 5, + 'created': '2015-10-09T16:11:08.444000', + 'credits': 0.017581939697265625, + 'credits_per_prediction': 0.0, + 'dataset': 'dataset/561304f537203f4c930001ca', + 'dataset_field_types': { 'categorical': 1, + 'datetime': 0, + 'effective_fields': 5, + 'numeric': 4, + 'preferred': 5, + 'text': 0, + 'total': 5}, + 'dataset_status': True, + 'description': '', + 'excluded_fields': [], + 'fields_meta': { 'count': 5, + 'limit': 1000, + 'offset': 0, + 'query_total': 5, + 'total': 5}, + 'input_fields': ['000000', '000001', '000002', '000003'], + 'locale': 'en_US', + 'logistic_regression': { 'bias': 1, + 'c': 1, + 'coefficients': [ [ 'Iris-virginica', [ -1.7074433493289376, -1.533662474502423, 2.47026986670851, 2.5567582221085563, -1.2158200612711925]], - [ u'Iris-setosa', + [ 'Iris-setosa', [ 0.41021712519841674, 1.464162165246765, -2.26003266131107, -1.0210350909174153, 0.26421852991732514]], - [ u'Iris-versicolor', + [ 'Iris-versicolor', [ 0.42702327817072505, -1.611817241669904, 0.5763832839459982, -1.4069842681625884, 1.0946877732663143]]], - u'eps': 1e-05, - u'fields': { u'000000': { u'column_number': 0, - u'datatype': u'double', - u'name': u'sepal length', - u'optype': u'numeric', - u'order': 0, - u'preferred': True, - u'summary': { u'bins': [ [ 4.3, + 'eps': 1e-05, + 'fields': { '000000': { 'column_number': 0, + 'datatype': 'double', + 'name': 'sepal length', + 'optype': 'numeric', + 'order': 0, + 'preferred': True, + 'summary': { 'bins': [ [ 4.3, 1], [ 4.425, 4], @@ -1318,32 +1020,32 @@ The JSON structure for a logistic regression is: ... [ 7.9, 1]], - u'kurtosis': -0.57357, - u'maximum': 7.9, - u'mean': 5.84333, - u'median': 5.8, - u'minimum': 4.3, - u'missing_count': 0, - u'population': 150, - u'skewness': 0.31175, - u'splits': [ 4.51526, + 'kurtosis': -0.57357, + 'maximum': 7.9, + 'mean': 5.84333, + 'median': 5.8, + 'minimum': 4.3, + 'missing_count': 0, + 'population': 150, + 'skewness': 0.31175, + 'splits': [ 4.51526, 4.67252, 4.81113, ... 6.92597, 7.20423, 7.64746], - u'standard_deviation': 0.82807, - u'sum': 876.5, - u'sum_squares': 5223.85, - u'variance': 0.68569}}, - u'000001': { u'column_number': 1, - u'datatype': u'double', - u'name': u'sepal width', - u'optype': u'numeric', - u'order': 1, - u'preferred': True, - u'summary': { u'counts': [ [ 2, + 'standard_deviation': 0.82807, + 'sum': 876.5, + 'sum_squares': 5223.85, + 'variance': 0.68569}}, + '000001': { 'column_number': 1, + 'datatype': 'double', + 'name': 'sepal width', + 'optype': 'numeric', + 'order': 1, + 'preferred': True, + 'summary': { 'counts': [ [ 2, 1], [ 2.2, 3], @@ -1352,25 +1054,25 @@ The JSON structure for a logistic regression is: 1], [ 4.4, 1]], - u'kurtosis': 0.18098, - u'maximum': 4.4, - u'mean': 3.05733, - u'median': 3, - u'minimum': 2, - u'missing_count': 0, - u'population': 150, - u'skewness': 0.31577, - u'standard_deviation': 0.43587, - u'sum': 458.6, - u'sum_squares': 1430.4, - u'variance': 0.18998}}, - u'000002': { u'column_number': 2, - u'datatype': u'double', - u'name': u'petal length', - u'optype': u'numeric', - u'order': 2, - u'preferred': True, - u'summary': { u'bins': [ [ 1, + 'kurtosis': 0.18098, + 'maximum': 4.4, + 'mean': 3.05733, + 'median': 3, + 'minimum': 2, + 'missing_count': 0, + 'population': 150, + 'skewness': 0.31577, + 'standard_deviation': 0.43587, + 'sum': 458.6, + 'sum_squares': 1430.4, + 'variance': 0.18998}}, + '000002': { 'column_number': 2, + 'datatype': 'double', + 'name': 'petal length', + 'optype': 'numeric', + 'order': 2, + 'preferred': True, + 'summary': { 'bins': [ [ 1, 1], [ 1.16667, 3], @@ -1381,31 +1083,31 @@ The JSON structure for a logistic regression is: 2], [ 6.9, 1]], - u'kurtosis': -1.39554, - u'maximum': 6.9, - u'mean': 3.758, - u'median': 4.35, - u'minimum': 1, - u'missing_count': 0, - u'population': 150, - u'skewness': -0.27213, - u'splits': [ 1.25138, + 'kurtosis': -1.39554, + 'maximum': 6.9, + 'mean': 3.758, + 'median': 4.35, + 'minimum': 1, + 'missing_count': 0, + 'population': 150, + 'skewness': -0.27213, + 'splits': [ 1.25138, 1.32426, 1.37171, ... 6.02913, 6.38125], - u'standard_deviation': 1.7653, - u'sum': 563.7, - u'sum_squares': 2582.71, - u'variance': 3.11628}}, - u'000003': { u'column_number': 3, - u'datatype': u'double', - u'name': u'petal width', - u'optype': u'numeric', - u'order': 3, - u'preferred': True, - u'summary': { u'counts': [ [ 0.1, + 'standard_deviation': 1.7653, + 'sum': 563.7, + 'sum_squares': 2582.71, + 'variance': 3.11628}}, + '000003': { 'column_number': 3, + 'datatype': 'double', + 'name': 'petal width', + 'optype': 'numeric', + 'order': 3, + 'preferred': True, + 'summary': { 'counts': [ [ 0.1, 5], [ 0.2, 29], @@ -1414,1135 +1116,782 @@ The JSON structure for a logistic regression is: 3], [ 2.5, 3]], - u'kurtosis': -1.33607, - u'maximum': 2.5, - u'mean': 1.19933, - u'median': 1.3, - u'minimum': 0.1, - u'missing_count': 0, - u'population': 150, - u'skewness': -0.10193, - u'standard_deviation': 0.76224, - u'sum': 179.9, - u'sum_squares': 302.33, - u'variance': 0.58101}}, - u'000004': { u'column_number': 4, - u'datatype': u'string', - u'name': u'species', - u'optype': u'categorical', - u'order': 4, - u'preferred': True, - u'summary': { u'categories': [ [ u'Iris-setosa', + 'kurtosis': -1.33607, + 'maximum': 2.5, + 'mean': 1.19933, + 'median': 1.3, + 'minimum': 0.1, + 'missing_count': 0, + 'population': 150, + 'skewness': -0.10193, + 'standard_deviation': 0.76224, + 'sum': 179.9, + 'sum_squares': 302.33, + 'variance': 0.58101}}, + '000004': { 'column_number': 4, + 'datatype': 'string', + 'name': 'species', + 'optype': 'categorical', + 'order': 4, + 'preferred': True, + 'summary': { 'categories': [ [ 'Iris-setosa', 50], - [ u'Iris-versicolor', + [ 'Iris-versicolor', 50], - [ u'Iris-virginica', + [ 'Iris-virginica', 50]], - u'missing_count': 0}, - u'term_analysis': { u'enabled': True}}}, - u'normalize': False, - u'regularization': u'l2'}, - u'max_columns': 5, - u'max_rows': 150, - u'name': u"iris' dataset's logistic regression", - u'number_of_batchpredictions': 0, - u'number_of_evaluations': 0, - u'number_of_predictions': 1, - u'objective_field': u'000004', - u'objective_field_name': u'species', - u'objective_field_type': u'categorical', - u'objective_fields': [u'000004'], - u'out_of_bag': False, - u'private': True, - u'project': u'project/561304c137203f4c9300016c', - u'range': [1, 150], - u'replacement': False, - u'resource': u'logisticregression/5617e71c37203f506a000001', - u'rows': 150, - u'sample_rate': 1.0, - u'shared': False, - u'size': 4609, - u'source': u'source/561304f437203f4c930001c3', - u'source_status': True, - u'status': { u'code': 5, - u'elapsed': 86, - u'message': u'The logistic regression has been created', - u'progress': 1.0}, - u'subscription': False, - u'tags': [u'species'], - u'updated': u'2015-10-09T16:14:02.336000', - u'white_box': False} + 'missing_count': 0}, + 'term_analysis': { 'enabled': True}}}, + 'normalize': False, + 'regularization': 'l2'}, + 'max_columns': 5, + 'max_rows': 150, + 'name': u"iris' dataset's logistic regression", + 'number_of_batchpredictions': 0, + 'number_of_evaluations': 0, + 'number_of_predictions': 1, + 'objective_field': '000004', + 'objective_field_name': 'species', + 'objective_field_type': 'categorical', + 'objective_fields': ['000004'], + 'out_of_bag': False, + 'private': True, + 'project': 'project/561304c137203f4c9300016c', + 'range': [1, 150], + 'replacement': False, + 'resource': 'logisticregression/5617e71c37203f506a000001', + 'rows': 150, + 'sample_rate': 1.0, + 'shared': False, + 'size': 4609, + 'source': 'source/561304f437203f4c930001c3', + 'source_status': True, + 'status': { 'code': 5, + 'elapsed': 86, + 'message': 'The logistic regression has been created', + 'progress': 1.0}, + 'subscription': False, + 'tags': ['species'], + 'updated': '2015-10-09T16:14:02.336000', + 'white_box': False} Note that the output in the snippet above has been abbreviated. As you see, the ``logistic_regression`` attribute stores the coefficients used in the logistic function as well as the configuration parameters described in the `developers section -`_ . +`_ . +Ensembles +~~~~~~~~~ -Linear Regressions ------------------- - -A linear regression is a supervised machine learning method for -solving regression problems by computing the objective as a linear -combination of factors. The implementation is a multiple linear regression -that models the output as a linear combination of the predictors. -The coefficients are estimated doing a least-squares fit on the training data. +Ensembles are superveised machine learning models that contain several decision +tree models. In BigML, we offer different flavors or ensembles: bagging, +boosted and random decision forests. -As a linear combination can only be done using numeric values, non-numeric -fields need to be transformed to numeric ones following some rules: +The structure of an ensemble can be obtained as follows: -- Categorical fields will be encoded and each class appearance in input data - will convey a different contribution to the input vector. -- Text and items fields will be expanded to several numeric predictors, - each one indicating the number of occurences for a specific term. - Text fields without term analysis are excluded from the model. +.. code-block:: python -Therefore, the initial input data is transformed into an input vector with one -or may components per field. Also, if a field in the training data contains -missing data, the components corresponding to that field will include an -additional 1 or 0 value depending on whether the field is missing in the -input data or not. + >>> ensemble = api.get_ensemble("ensemble/5d5aea06e476842219000add") + >>> api.pprint(ensemble["object"]) + { 'boosting': None, + 'category': 0, + 'code': 200, + 'columns': 5, + 'configuration': None, + 'configuration_status': False, + 'created': '2019-08-19T18:27:18.529000', + 'creator': 'mmartin', + 'dataset': 'dataset/5d5ae9f97811dd0195009c17', + 'dataset_field_types': { 'categorical': 1, + 'datetime': 0, + 'items': 0, + 'numeric': 4, + 'preferred': 5, + 'text': 0, + 'total': 5}, + 'dataset_status': False, + 'depth_threshold': 512, + 'description': '', + 'distributions': [ { 'importance': [ ['000002', 0.72548], + ['000003', 0.24971], + ['000001', 0.02481]], + 'predictions': { 'categories': [ [ 'Iris-setosa', + 52], + [ 'Iris-versicolor', + 49], + [ 'Iris-virginica', + 49]]}, + 'training': { 'categories': [ [ 'Iris-setosa', + 52], + [ 'Iris-versicolor', + 49], + [ 'Iris-virginica', + 49]]}}, + { 'importance': [ ['000002', 0.7129], + ['000003', 0.2635], + ['000000', 0.01485], + ['000001', 0.00875]], + 'predictions': { 'categories': [ [ 'Iris-setosa', + 52], + [ 'Iris-versicolor', + 46], + [ 'Iris-virginica', + 52]]}, + 'training': { 'categories': [ [ 'Iris-setosa', + 52], + [ 'Iris-versicolor', + 46], + [ 'Iris-virginica', + 52]]}}], + 'ensemble': { 'fields': { '000000': { 'column_number': 0, + 'datatype': 'double', + 'name': 'sepal length', + 'optype': 'numeric', + 'order': 0, + 'preferred': True, + 'summary': + ... + 'missing_count': 0}, + 'term_analysis': { 'enabled': True}}}}, + 'ensemble_sample': { 'rate': 1, + 'replacement': True, + 'seed': '820c4aa0a34a4fb69392476c6ffc38dc'}, + 'error_models': 0, + 'fields_meta': { 'count': 5, + 'limit': 1000, + 'offset': 0, + 'query_total': 5, + 'total': 5}, + 'finished_models': 2, + 'focus_field': None, + 'focus_field_name': None, + 'fusions': ['fusion/6488ab197411b45de19f1e19'], + 'importance': { '000000': 0.00743, + '000001': 0.01678, + '000002': 0.71919, + '000003': 0.2566}, + 'input_fields': ['000000', '000001', '000002', '000003'], + 'locale': 'en_US', + 'max_columns': 5, + 'max_rows': 150, + 'missing_splits': False, + 'models': [ 'model/5d5aea073514cd6bf200a630', + 'model/5d5aea083514cd6bf200a632'], + 'name': 'iris', + 'name_options': 'bootstrap decision forest, 512-node, 2-model, pruned, ' + 'deterministic order', + 'node_threshold': 512, + 'number_of_batchpredictions': 0, + 'number_of_evaluations': 0, + 'number_of_models': 2, + 'number_of_predictions': 0, + 'number_of_public_predictions': 0, + 'objective_field': '000004', + 'objective_field_details': { 'column_number': 4, + 'datatype': 'string', + 'name': 'species', + 'optype': 'categorical', + 'order': 4}, + 'objective_field_name': 'species', + 'objective_field_type': 'categorical', + 'objective_fields': ['000004'], + 'optiml': None, + 'optiml_status': False, + 'ordering': 0, + 'out_of_bag': False, + 'price': 0.0, + 'private': True, + 'project': None, + 'randomize': False, + 'range': None, + 'replacement': False, + 'resource': 'ensemble/5d5aea06e476842219000add', + 'rows': 150, + 'sample_rate': 1.0, + 'selective_pruning': True, + 'shared': True, + 'shared_clonable': True, + 'shared_hash': 'qfCR2ezORt5u8GNyGaTtJqwJemh', + 'sharing_key': '125380a1560a8efdc0e3eedee7bd2ccce1c4936c', + 'size': 4608, + 'source': 'source/5d5ae9f7e47684769e001337', + 'source_status': False, + 'split_candidates': 32, + 'split_field': None, + 'split_field_name': None, + 'stat_pruning': True, + 'status': { 'code': 5, + 'elapsed': 804, + 'message': 'The ensemble has been created', + 'progress': 1}, + 'subscription': False, + 'support_threshold': 0.0, + 'tags': [], + 'type': 0, + 'updated': '2023-06-13T17:44:57.780000', + 'white_box': False} -The JSON structure for a linear regression is: +Note that the output in the snippet above has been abbreviated. As you see, +the ``number_of_models`` attribute stores number of decision trees used in the +ensemble and the rest of the dictionary contains the configuration parameters described in the `developers section +`_ . -.. code-block:: python +Deepnets +~~~~~~~~ - >>> api.pprint(linear_regression["object"]) - { u'category': 0, - u'code': 200, - u'columns': 4, - u'composites': None, - u'configuration': None, - u'configuration_status': False, - u'created': u'2019-02-20T21:02:40.027000', - u'creator': u'merce', - u'credits': 0.0, - u'credits_per_prediction': 0.0, - u'dataset': u'dataset/5c6dc06a983efc18e2000084', - u'dataset_field_types': { u'categorical': 0, - u'datetime': 0, - u'items': 0, - u'numeric': 6, - u'preferred': 6, - u'text': 0, - u'total': 6}, - u'dataset_status': True, - u'datasets': [], - u'default_numeric_value': None, - u'description': u'', - u'excluded_fields': [], - u'execution_id': None, - u'execution_status': None, - u'fields_maps': None, - u'fields_meta': { u'count': 4, - u'limit': 1000, - u'offset': 0, - u'query_total': 4, - u'total': 4}, - u'fusions': None, - u'input_fields': [u'000000', u'000001', u'000002'], - u'linear_regression': { u'bias': True, - u'coefficients': [ [-1.88196], - [0.475633], - [0.122468], - [30.9141]], - u'fields': { u'000000': { u'column_number': 0, - u'datatype': u'int8', - u'name': u'Prefix', - u'optype': u'numeric', - u'order': 0, - u'preferred': True, - u'summary': { u'counts': [ [ 4, - 1], +Ensembles are superveised machine learning models that contain several decision +tree models. In BigML, we offer different flavors or ensembles: bagging, +boosted and random decision forests. - ... - u'stats': { u'confidence_intervals': [ [ 5.63628], - [ 0.375062], - [ 0.348577], - [ 44.4112]], - u'mean_squared_error': 342.206, - u'number_of_parameters': 4, - u'number_of_samples': 77, - u'p_values': [ [0.512831], - [0.0129362], - [0.491069], - [0.172471]], - u'r_squared': 0.136672, - u'standard_errors': [ [ 2.87571], - [ 0.191361], - [ 0.177849], - [ 22.6592]], - u'sum_squared_errors': 24981, - u'xtx': [ [ 4242, - 48396.9, - 51273.97, - 568], - [ 48396.9, - 570177.6584, - 594274.3274, - 6550.52], - [ 51273.97, - 594274.3274, - 635452.7068, - 6894.24], - [ 568, - 6550.52, - 6894.24, - 77]], - u'z_scores': [ [-0.654436], - [2.48552], - [0.688609], - [1.36431]]}}, - u'locale': u'en_US', - u'max_columns': 6, - u'max_rows': 80, - u'name': u'grades', - u'name_options': u'bias', - u'number_of_batchpredictions': 0, - u'number_of_evaluations': 0, - u'number_of_predictions': 2, - u'number_of_public_predictions': 0, - u'objective_field': u'000005', - u'objective_field_name': u'Final', - u'objective_field_type': u'numeric', - u'objective_fields': [u'000005'], - u'operating_point': { }, - u'optiml': None, - u'optiml_status': False, - u'ordering': 0, - u'out_of_bag': False, - u'out_of_bags': None, - u'price': 0.0, - u'private': True, - u'project': u'project/5c6dc062983efc18d5000129', - u'range': None, - u'ranges': None, - u'replacement': False, - u'replacements': None, - u'resource': u'linearregression/5c6dc070983efc18e00001f1', - u'rows': 80, - u'sample_rate': 1.0, - u'sample_rates': None, - u'seed': None, - u'seeds': None, - u'shared': False, - u'size': 2691, - u'source': u'source/5c6dc064983efc18e00001ed', - u'source_status': True, - u'status': { u'code': 5, - u'elapsed': 62086, - u'message': u'The linear regression has been created', - u'progress': 1}, - u'subscription': True, - u'tags': [], - u'type': 0, - u'updated': u'2019-02-27T18:01:18.539000', - u'user_metadata': { }, - u'webhook': None, - u'weight_field': None, - u'white_box': False} +The structure of an ensemble can be obtained as follows: -Note that the output in the snippet above has been abbreviated. As you see, -the ``linear_regression`` attribute stores the coefficients used in the -linear function as well as the configuration parameters described in -the `developers section `_ . +.. code-block:: python + >>> deepnet = api.get_deepnet("deepnet/64f2193379c602359ec90197") + >>> api.pprint(deepnet["object"]) + { 'category': 0, + 'code': 200, + 'columns': 11, + 'configuration': None, + 'configuration_status': False, + 'created': '2023-09-01T17:02:43.222000', + 'creator': 'mmartin', + 'dataset': 'dataset/64f2192251595a5d90394c1e', + 'dataset_field_types': { 'categorical': 1, + 'datetime': 1, + 'image': 0, + 'items': 0, + 'numeric': 9, + 'path': 0, + 'preferred': 10, + 'regions': 0, + 'text': 0, + 'total': 11}, + 'dataset_status': True, + 'deepnet': { 'batch_normalization': False, + 'deepnet_seed': 'bigml', + 'deepnet_version': 'alpha', + 'dropout_rate': 0.0, + 'fields': { '000000': { 'column_number': 0, + 'datatype': 'string', + 'name': 'cat-0', + 'optype': 'categorical', + 'order': 0, + 'preferred': True, + 'summary': { + ... + 1954.26254, + 'variance': 0.9737}}}, + 'hidden_layers': [ { 'activation_function': 'tanh', + 'number_of_nodes': 64, + 'offset': 'zeros', + 'seed': 0, + 'type': 'dense', + 'weights': 'glorot_uniform'}], + 'holdout_metrics': { 'mean_absolute_error': 0.8178046941757202, + 'mean_squared_error': 1.0125617980957031, + 'median_absolute_error': 0.6850314736366272, + 'r_squared': -0.009405492794412496, + 'spearman_r': 0.07955370033562714}, + 'learn_residuals': False, + 'learning_rate': 0.01, + 'max_iterations': 100, + 'missing_numerics': True, + 'network': { 'image_network': None, + 'layers': [ { 'activation_function': 'tanh', + 'mean': None, + 'number_of_nodes': 64, + 'offset': [ -0.01426, + 0.06489, + 0.00609, + ... + -0.06769, + 0.2289, + 0.03777]]}], + 'output_exposition': { 'mean': -0.06256, + 'stdev': 0.98676, + 'type': 'numeric'}, + 'preprocess': [ { 'index': 0, + 'type': 'categorical', + 'values': [ 'cat0', + 'cat1', + 'cat2']}, + { 'index': 1, + 'mean': 1974.3085, + 'stdev': 43.39534, + 'type': 'numeric'}, + { 'index': 2, + 'mean': 6.459, + 'stdev': 3.4764, + 'type': 'numeric'}, + { 'index': 3, + 'mean': 15.537, + 'stdev': 8.7924, + 'type': 'numeric'}, + { 'index': 4, + 'mean': 4.0015, + 'stdev': 2.02893, + 'type': 'numeric'}, + { 'index': 5, + 'mean': 11.8105, + 'stdev': 6.84646, + 'type': 'numeric'}, + { 'index': 6, + 'mean': 29.3555, + 'stdev': 17.3928, + 'type': 'numeric'}, + { 'index': 7, + 'mean': 29.715, + 'stdev': 17.14149, + 'type': 'numeric'}, + { 'index': 8, + 'mean': 501.6185, + 'stdev': 292.27451, + 'type': 'numeric'}], + 'trees': None}, + 'network_structure': { 'image_network': None, + 'layers': [ { 'activation_function': 'tanh', + 'mean': None, + 'number_of_nodes': 64, + 'offset': 'zeros', + 'residuals': False, + 'scale': None, + 'stdev': None, + 'weights': 'glorot_uniform'}, + { 'activation_function': 'linear', + 'mean': None, + 'number_of_nodes': 1, + 'offset': 'zeros', + 'residuals': False, + 'scale': None, + 'stdev': None, + 'weights': 'glorot_uniform'}], + 'output_exposition': { 'mean': -0.06256, + 'stdev': 0.98676, + 'type': 'numeric'}, + 'preprocess': [ { 'index': 0, + 'type': 'categorical', + 'values': [ 'cat0', + 'cat1', + 'cat2']}, + { 'index': 1, + 'mean': 1974.3085, + 'stdev': 43.39534, + 'type': 'numeric'}, + { 'index': 2, + 'mean': 6.459, + 'stdev': 3.4764, + 'type': 'numeric'}, + { 'index': 3, + 'mean': 15.537, + 'stdev': 8.7924, + 'type': 'numeric'}, + { 'index': 4, + 'mean': 4.0015, + 'stdev': 2.02893, + 'type': 'numeric'}, + { 'index': 5, + 'mean': 11.8105, + 'stdev': 6.84646, + 'type': 'numeric'}, + { 'index': 6, + 'mean': 29.3555, + 'stdev': 17.3928, + 'type': 'numeric'}, + { 'index': 7, + 'mean': 29.715, + 'stdev': 17.14149, + 'type': 'numeric'}, + { 'index': 8, + 'mean': 501.6185, + 'stdev': 292.27451, + 'type': 'numeric'}], + 'trees': None}, + 'number_of_hidden_layers': 1, + 'number_of_iterations': 100, + 'optimizer': { 'adam': { 'beta1': 0.9, + 'beta2': 0.999, + 'epsilon': 1e-08}}, + 'search': False, + 'suggest_structure': False, + 'tree_embedding': False}, + 'description': '', + 'excluded_fields': [], + 'fields_meta': { 'count': 11, + 'limit': 1000, + 'offset': 0, + 'query_total': 11, + 'total': 11}, + 'importance': { '000000': 0.12331, + '000001-0': 0.25597, + '000001-1': 0.07716, + '000001-2': 0.15659, + '000001-3': 0.11564, + '000001-4': 0.0644, + '000001-5': 0.09814, + '000001-6': 0.0555, + '000001-7': 0.05329}, + 'input_fields': [ '000000', + '000001-0', + '000001-1', + '000001-2', + '000001-3', + '000001-4', + '000001-5', + '000001-6', + '000001-7'], + 'locale': 'en_US', + 'max_columns': 11, + 'max_rows': 2000, + 'name': 'dates2', + 'name_options': '1 hidden layers, adam, learning rate=0.01, 100-iteration, ' + 'beta1=0.9, beta2=0.999, epsilon=1e-08, missing values', + 'number_of_batchpredictions': 0, + 'number_of_evaluations': 0, + 'number_of_predictions': 0, + 'number_of_public_predictions': 0, + 'objective_field': '000002', + 'objective_field_name': 'target-2', + 'objective_field_type': 'numeric', + 'objective_fields': ['000002'], + 'optiml': None, + 'optiml_status': False, + 'ordering': 0, + 'out_of_bag': False, + 'price': 0.0, + 'private': True, + 'project': 'project/64f2191c4a1a2c29a1084943', + 'range': None, + 'regression_weight_ratio': None, + 'replacement': False, + 'resource': 'deepnet/64f2193379c602359ec90197', + 'rows': 2000, + 'sample_rate': 1.0, + 'shared': False, + 'size': 96976, + 'source': 'source/64f2191f51595a5d8cbf7883', + 'source_status': True, + 'status': { 'code': 5, + 'elapsed': 10013, + 'message': 'The deepnet has been created', + 'progress': 1.0}, + 'subscription': False, + 'tags': [], + 'type': 0, + 'updated': '2023-09-01T17:11:28.762000', + 'white_box': False} -Associations ------------- -Association Discovery is a popular method to find out relations among values -in high-dimensional datasets. +Note that the output in the snippet above has been abbreviated. As you see, +the ``network`` attribute stores the coefficients used in the +neural network structure and the rest of the dictionary shows the +configuration parameters described in the `developers section +`_ . -A common case where association discovery is often used is -market basket analysis. This analysis seeks for customer shopping -patterns across large transactional -datasets. For instance, do customers who buy hamburgers and ketchup also -consume bread? +OptiMLs +~~~~~~~ -Businesses use those insights to make decisions on promotions and product -placements. -Association Discovery can also be used for other purposes such as early -incident detection, web usage analysis, or software intrusion detection. +An OptiML is the result of an automated optimization process to find the +best model (type and configuration) to solve a particular +classification or regression problem. -In BigML, the Association resource object can be built from any dataset, and -its results are a list of association rules between the items in the dataset. -In the example case, the corresponding -association rule would have hamburguers and ketchup as the items at the -left hand side of the association rule and bread would be the item at the -right hand side. Both sides in this association rule are related, -in the sense that observing -the items in the left hand side implies observing the items in the right hand -side. There are some metrics to ponder the quality of these association rules: +The selection process automates the usual time-consuming task of trying +different models and parameters and evaluating their results to find the +best one. Using the OptiML, non-experts can build top-performing models. -- Support: the proportion of instances which contain an itemset. +You can create an OptiML selecting the ojective field to be predicted, the +evaluation metric to be used to rank the models tested in the process and +a maximum time for the task to be run. -For an association rule, it means the number of instances in the dataset which -contain the rule's antecedent and rule's consequent together -over the total number of instances (N) in the dataset. +The JSON structure for an OptiML is: -It gives a measure of the importance of the rule. Association rules have -to satisfy a minimum support constraint (i.e., min_support). +.. code-block:: python -- Coverage: the support of the antedecent of an association rule. -It measures how often a rule can be applied. + >>> api.pprint(optiml["object"]) + { 'category': 0, + 'code': 200, + 'configuration': None, + 'configuration_status': False, + 'created': '2018-05-17T20:23:00.060000', + 'creator': 'mmartin', + 'dataset': 'dataset/5afdb7009252732d930009e8', + 'dataset_status': True, + 'datasets': [ 'dataset/5afde6488bf7d551ee00081c', + 'dataset/5afde6488bf7d551fd00511f', + 'dataset/5afde6488bf7d551fe002e0f', + ... + 'dataset/5afde64d8bf7d551fd00512e'], + 'description': '', + 'evaluations': [ 'evaluation/5afde65c8bf7d551fd00514c', + 'evaluation/5afde65c8bf7d551fd00514f', + ... + 'evaluation/5afde6628bf7d551fd005161'], + 'excluded_fields': [], + 'fields_meta': { 'count': 5, + 'limit': 1000, + 'offset': 0, + 'query_total': 5, + 'total': 5}, + 'input_fields': ['000000', '000001', '000002', '000003'], + 'model_count': { 'logisticregression': 1, 'model': 8, 'total': 9}, + 'models': [ 'model/5afde64e8bf7d551fd005131', + 'model/5afde64f8bf7d551fd005134', + 'model/5afde6518bf7d551fd005137', + 'model/5afde6538bf7d551fd00513a', + 'logisticregression/5afde6558bf7d551fd00513d', + ... + 'model/5afde65a8bf7d551fd005149'], + 'models_meta': { 'count': 9, 'limit': 1000, 'offset': 0, 'total': 9}, + 'name': 'iris', + 'name_options': '9 total models (logisticregression: 1, model: 8), metric=max_phi, model candidates=18, max. training time=300', + 'objective_field': '000004', + 'objective_field_details': { 'column_number': 4, + 'datatype': 'string', + 'name': 'species', + 'optype': 'categorical', + 'order': 4}, + 'objective_field_name': 'species', + 'objective_field_type': 'categorical', + 'objective_fields': ['000004'], + 'optiml': { 'created_resources': { 'dataset': 10, + 'logisticregression': 11, + 'logisticregression_evaluation': 11, + 'model': 29, + 'model_evaluation': 29}, + 'datasets': [ { 'id': 'dataset/5afde6488bf7d551ee00081c', + 'name': 'iris', + 'name_options': '120 instances, 5 fields (1 categorical, 4 numeric), sample rate=0.8'}, + { 'id': 'dataset/5afde6488bf7d551fd00511f', + 'name': 'iris', + 'name_options': '30 instances, 5 fields (1 categorical, 4 numeric), sample rate=0.2, out of bag'}, + { 'id': 'dataset/5afde6488bf7d551fe002e0f', + 'name': 'iris', + 'name_options': '120 instances, 5 fields (1 categorical, 4 numeric), sample rate=0.8'}, + ... + { 'id': 'dataset/5afde64d8bf7d551fd00512e', + 'name': 'iris', + 'name_options': '120 instances, 5 fields (1 categorical, 4 numeric), sample rate=0.8'}], + 'fields': { '000000': { 'column_number': 0, + 'datatype': 'double', + 'name': 'sepal length', + 'optype': 'numeric', + 'order': 0, + 'preferred': True, + 'summary': { 'bins': [ [ 4.3, + 1], + ... + [ 7.9, + 1]], + ... + 'sum': 179.9, + 'sum_squares': 302.33, + 'variance': 0.58101}}, + '000004': { 'column_number': 4, + 'datatype': 'string', + 'name': 'species', + 'optype': 'categorical', + 'order': 4, + 'preferred': True, + 'summary': { 'categories': [ [ 'Iris-setosa', + 50], + [ 'Iris-versicolor', + 50], + [ 'Iris-virginica', + 50]], + 'missing_count': 0}, + 'term_analysis': { 'enabled': True}}}, + 'max_training_time': 300, + 'metric': 'max_phi', + 'model_types': ['model', 'logisticregression'], + 'models': [ { 'evaluation': { 'id': 'evaluation/5afde65c8bf7d551fd00514c', + 'info': { 'accuracy': 0.96667, + 'average_area_under_pr_curve': 0.97867, + ... + 'per_class_statistics': [ { 'accuracy': 1, + 'area_under_pr_curve': 1, + ... + 'spearmans_rho': 0.82005}]}, + 'metric_value': 0.95356, + 'metric_variance': 0.00079, + 'name': 'iris vs. iris', + 'name_options': '279-node, deterministic order, operating kind=probability'}, + 'evaluation_count': 3, + 'id': 'model/5afde64e8bf7d551fd005131', + 'importance': [ [ '000002', + 0.70997], + [ '000003', + 0.27289], + [ '000000', + 0.0106], + [ '000001', + 0.00654]], + 'kind': 'model', + 'name': 'iris', + 'name_options': '279-node, deterministic order'}, + { 'evaluation': { 'id': 'evaluation/5afde65c8bf7d551fd00514f', + 'info': { 'accuracy': 0.93333, -- Confidence or (strength): The probability of seeing the rule's consequent -under the condition that the instances also contain the rule's antecedent. -Confidence is computed using the support of the association rule over the -coverage. That is, the percentage of instances which contain the consequent -and antecedent together over the number of instances which only contain -the antecedent. + ... + [ '000001', + 0.02133]], + 'kind': 'model', + 'name': 'iris', + 'name_options': '12-node, randomize, deterministic order, balanced'}], + 'number_of_model_candidates': 18, + 'recent_evaluations': [ 0.90764, + 0.94952, + ... + 0.90427], + 'search_complete': True, + 'summary': { 'logisticregression': { 'best': 'logisticregression/5afde6558bf7d551fd00513d', + 'count': 1}, + 'model': { 'best': 'model/5afde64e8bf7d551fd005131', + 'count': 8}}}, + 'private': True, + 'project': None, + 'resource': 'optiml/5afde4a42a83475c1b0008a2', + 'shared': False, + 'size': 3686, + 'source': 'source/5afdb6fb9252732d930009e5', + 'source_status': True, + 'status': { 'code': 5, + 'elapsed': 448878.0, + 'message': 'The optiml has been created', + 'progress': 1}, + 'subscription': False, + 'tags': [], + 'test_dataset': None, + 'type': 0, + 'updated': '2018-05-17T20:30:29.063000'} -Confidence is directed and gives different values for the association -rules Antecedent → Consequent and Consequent → Antecedent. Association -rules also need to satisfy a minimum confidence constraint -(i.e., min_confidence). -- Leverage: the difference of the support of the association -rule (i.e., the antecedent and consequent appearing together) and what would -be expected if antecedent and consequent where statistically independent. -This is a value between -1 and 1. A positive value suggests a positive -relationship and a negative value suggests a negative relationship. -0 indicates independence. +You can check the optiml properties at the `API documentation +`_. -Lift: how many times more often antecedent and consequent occur together -than expected if they where statistically independent. -A value of 1 suggests that there is no relationship between the antecedent -and the consequent. Higher values suggest stronger positive relationships. -Lower values suggest stronger negative relationships (the presence of the -antecedent reduces the likelihood of the consequent) -As to the items used in association rules, each type of field is parsed to -extract items for the rules as follows: +Fusions +~~~~~~~ -- Categorical: each different value (class) will be considered a separate item. -- Text: each unique term will be considered a separate item. -- Items: each different item in the items summary will be considered. -- Numeric: Values will be converted into categorical by making a -segmentation of the values. -For example, a numeric field with values ranging from 0 to 600 split -into 3 segments: -segment 1 → [0, 200), segment 2 → [200, 400), segment 3 → [400, 600]. -You can refine the behavior of the transformation using -`discretization `_ -and `field_discretizations `_. +A Fusion is a special type of composed resource for which all +submodels satisfy the following constraints: they're all either +classifications or regressions over the same kind of data or +compatible fields, with the same objective field. Given those +properties, a fusion can be considered a supervised model, +and therefore one can predict with fusions and evaluate them. +Ensembles can be viewed as a kind of fusion subject to the additional +constraints that all its submodels are tree models that, moreover, +have been built from the same base input data, but sampled in particular ways. -The JSON structure for an association resource is: +The model types allowed to be a submodel of a fusion are: +deepnet, ensemble, fusion, model, logistic regression and linear regression. -.. code-block:: python +The JSON structure for an Fusion is: +.. code-block:: python - >>> api.pprint(association['object']) + >>> api.pprint(fusion["object"]) { - "associations":{ - "complement":false, - "discretization":{ - "pretty":true, - "size":5, - "trim":0, - "type":"width" - }, - "items":[ + "category": 0, + "code": 200, + "configuration": null, + "configuration_status": false, + "created": "2018-05-09T20:11:05.821000", + "credits_per_prediction": 0, + "description": "", + "fields_meta": { + "count": 5, + "limit": 1000, + "offset": 0, + "query_total": 5, + "total": 5 + }, + "fusion": { + "models": [ { - "complement":false, - "count":32, - "field_id":"000000", - "name":"Segment 1", - "bin_end":5, - "bin_start":null - }, - { - "complement":false, - "count":49, - "field_id":"000000", - "name":"Segment 3", - "bin_end":7, - "bin_start":6 - }, - { - "complement":false, - "count":12, - "field_id":"000000", - "name":"Segment 4", - "bin_end":null, - "bin_start":7 - }, - { - "complement":false, - "count":19, - "field_id":"000001", - "name":"Segment 1", - "bin_end":2.5, - "bin_start":null - }, - ... - { - "complement":false, - "count":50, - "field_id":"000004", - "name":"Iris-versicolor" - }, - { - "complement":false, - "count":50, - "field_id":"000004", - "name":"Iris-virginica" - } - ], - "max_k": 100, - "min_confidence":0, - "min_leverage":0, - "min_lift":1, - "min_support":0, - "rules":[ - { - "confidence":1, - "id":"000000", - "leverage":0.22222, - "lhs":[ - 13 - ], - "lhs_cover":[ - 0.33333, - 50 - ], - "lift":3, - "p_value":0.000000000, - "rhs":[ - 6 - ], - "rhs_cover":[ - 0.33333, - 50 - ], - "support":[ - 0.33333, - 50 - ] - }, - { - "confidence":1, - "id":"000001", - "leverage":0.22222, - "lhs":[ - 6 - ], - "lhs_cover":[ - 0.33333, - 50 - ], - "lift":3, - "p_value":0.000000000, - "rhs":[ - 13 - ], - "rhs_cover":[ - 0.33333, - 50 - ], - "support":[ - 0.33333, - 50 - ] - }, - ... - { - "confidence":0.26, - "id":"000029", - "leverage":0.05111, - "lhs":[ - 13 - ], - "lhs_cover":[ - 0.33333, - 50 - ], - "lift":2.4375, - "p_value":0.0000454342, - "rhs":[ - 5 - ], - "rhs_cover":[ - 0.10667, - 16 - ], - "support":[ - 0.08667, - 13 - ] + "id": "ensemble/5af272eb4e1727d378000050", + "kind": "ensemble", + "name": "Iris ensemble", + "name_options": "boosted trees, 1999-node, 16-iteration, deterministic order, balanced" }, { - "confidence":0.18, - "id":"00002a", - "leverage":0.04, - "lhs":[ - 15 - ], - "lhs_cover":[ - 0.33333, - 50 - ], - "lift":3, - "p_value":0.0000302052, - "rhs":[ - 9 - ], - "rhs_cover":[ - 0.06, - 9 - ], - "support":[ - 0.06, - 9 - ] + "id": "model/5af272fe4e1727d3780000d6", + "kind": "model", + "name": "Iris model", + "name_options": "1999-node, pruned, deterministic order, balanced" }, { - "confidence":1, - "id":"00002b", - "leverage":0.04, - "lhs":[ - 9 - ], - "lhs_cover":[ - 0.06, - 9 - ], - "lift":3, - "p_value":0.0000302052, - "rhs":[ - 15 - ], - "rhs_cover":[ - 0.33333, - 50 - ], - "support":[ - 0.06, - 9 - ] + "id": "logisticregression/5af272ff4e1727d3780000d9", + "kind": "logisticregression", + "name": "Iris LR", + "name_options": "L2 regularized (c=1), bias, auto-scaled, missing values, eps=0.001" } - ], - "rules_summary":{ - "confidence":{ - "counts":[ - [ - 0.18, - 1 - ], - [ - 0.24, - 1 - ], - [ - 0.26, - 2 - ], - ... - [ - 0.97959, - 1 - ], - [ - 1, - 9 - ] - ], - "maximum":1, - "mean":0.70986, - "median":0.72864, - "minimum":0.18, - "population":44, - "standard_deviation":0.24324, - "sum":31.23367, - "sum_squares":24.71548, - "variance":0.05916 - }, - "k":44, - "leverage":{ - "counts":[ - [ - 0.04, - 2 - ], - [ - 0.05111, - 4 - ], - [ - 0.05316, - 2 - ], - ... - [ - 0.22222, - 2 - ] - ], - "maximum":0.22222, - "mean":0.10603, - "median":0.10156, - "minimum":0.04, - "population":44, - "standard_deviation":0.0536, - "sum":4.6651, - "sum_squares":0.61815, - "variance":0.00287 - }, - "lhs_cover":{ - "counts":[ - [ - 0.06, - 2 - ], - [ - 0.08, - 2 - ], - [ - 0.10667, - 4 - ], - [ - 0.12667, - 1 - ], - ... - [ - 0.5, - 4 - ] - ], - "maximum":0.5, - "mean":0.29894, - "median":0.33213, - "minimum":0.06, - "population":44, - "standard_deviation":0.13386, - "sum":13.15331, - "sum_squares":4.70252, - "variance":0.01792 - }, - "lift":{ - "counts":[ - [ - 1.40625, - 2 - ], - [ - 1.5067, - 2 - ], - ... - [ - 2.63158, - 4 - ], - [ - 3, - 10 - ], - [ - 4.93421, - 2 - ], - [ - 12.5, - 2 - ] - ], - "maximum":12.5, - "mean":2.91963, - "median":2.58068, - "minimum":1.40625, - "population":44, - "standard_deviation":2.24641, - "sum":128.46352, - "sum_squares":592.05855, - "variance":5.04635 - }, - "p_value":{ - "counts":[ - [ - 0.000000000, - 2 - ], - [ - 0.000000000, - 4 - ], - [ - 0.000000000, - 2 - ], - ... - [ - 0.0000910873, - 2 - ] - ], - "maximum":0.0000910873, - "mean":0.0000106114, - "median":0.00000000, - "minimum":0.000000000, - "population":44, - "standard_deviation":0.0000227364, - "sum":0.000466903, - "sum_squares":0.0000000, - "variance":0.000000001 - }, - "rhs_cover":{ - "counts":[ - [ - 0.06, - 2 - ], - [ - 0.08, - 2 - ], - ... - [ - 0.42667, - 2 - ], - [ - 0.46667, - 3 - ], - [ - 0.5, - 4 - ] - ], - "maximum":0.5, - "mean":0.29894, - "median":0.33213, - "minimum":0.06, - "population":44, - "standard_deviation":0.13386, - "sum":13.15331, - "sum_squares":4.70252, - "variance":0.01792 - }, - "support":{ - "counts":[ - [ - 0.06, - 4 - ], - [ - 0.06667, - 2 - ], - [ - 0.08, - 2 - ], - [ - 0.08667, - 4 - ], - [ - 0.10667, - 4 - ], - [ - 0.15333, - 2 - ], - [ - 0.18667, - 4 - ], - [ - 0.19333, - 2 - ], - [ - 0.20667, - 2 - ], - [ - 0.27333, - 2 - ], - [ - 0.28667, - 2 - ], - [ - 0.3, - 4 - ], - [ - 0.32, - 2 - ], - [ - 0.33333, - 6 - ], - [ - 0.37333, - 2 - ] - ], - "maximum":0.37333, - "mean":0.20152, - "median":0.19057, - "minimum":0.06, - "population":44, - "standard_deviation":0.10734, - "sum":8.86668, - "sum_squares":2.28221, - "variance":0.01152 - } - }, - "search_strategy":"leverage", - "significance_level":0.05 + ] }, - "category":0, - "clones":0, - "code":200, - "columns":5, - "created":"2015-11-05T08:06:08.184000", - "credits":0.017581939697265625, - "dataset":"dataset/562fae3f4e1727141d00004e", - "dataset_status":true, - "dataset_type":0, - "description":"", - "excluded_fields":[ ], - "fields_meta":{ - "count":5, - "limit":1000, - "offset":0, - "query_total":5, - "total":5 + "importance": { + "000000": 0.05847, + "000001": 0.03028, + "000002": 0.13582, + "000003": 0.4421 }, - "input_fields":[ - "000000", - "000001", - "000002", - "000003", - "000004" + "model_count": { + "ensemble": 1, + "logisticregression": 1, + "model": 1, + "total": 3 + }, + "models": [ + "ensemble/5af272eb4e1727d378000050", + "model/5af272fe4e1727d3780000d6", + "logisticregression/5af272ff4e1727d3780000d9" ], - "locale":"en_US", - "max_columns":5, - "max_rows":150, - "name":"iris' dataset's association", - "out_of_bag":false, - "price":0, - "private":true, - "project":null, - "range":[ - 1, - 150 + "models_meta": { + "count": 3, + "limit": 1000, + "offset": 0, + "total": 3 + }, + "name": "iris", + "name_options": "3 total models (ensemble: 1, logisticregression: 1, model: 1)", + "number_of_batchpredictions": 0, + "number_of_evaluations": 0, + "number_of_predictions": 0, + "number_of_public_predictions": 0, + "objective_field": "000004", + "objective_field_details": { + "column_number": 4, + "datatype": "string", + "name": "species", + "optype": "categorical", + "order": 4 + }, + "objective_field_name": "species", + "objective_field_type": "categorical", + "objective_fields": [ + "000004" ], - "replacement":false, - "resource":"association/5621b70910cb86ae4c000000", - "rows":150, - "sample_rate":1, - "shared":false, - "size":4609, - "source":"source/562fae3a4e1727141d000048", - "source_status":true, - "status":{ - "code":5, - "elapsed":1072, - "message":"The association has been created", - "progress":1 + "private": true, + "project": null, + "resource":"fusion/59af8107b8aa0965d5b61138", + "shared": false, + "status": { + "code": 5, + "elapsed": 8420, + "message": "The fusion has been created", + "progress": 1 }, - "subscription":false, - "tags":[ ], - "updated":"2015-11-05T08:06:20.403000", - "white_box":false + "subscription": false, + "tags": [], + "type": 0, + "updated": "2018-05-09T20:11:14.258000" } -Note that the output in the snippet above has been abbreviated. As you see, -the ``associations`` attribute stores items, rules and metrics extracted -from the datasets as well as the configuration parameters described in -the `developers section `_ . - - -Topic Models ------------- - -A topic model is an unsupervised machine learning method -for unveiling all the different topics -underlying a collection of documents. -BigML uses Latent Dirichlet Allocation (LDA), one of the most popular -probabilistic methods for topic modeling. -In BigML, each instance (i.e. each row in your dataset) will -be considered a document and the contents of all the text fields -given as inputs will be automatically concatenated and considered the -document bag of words. - -Topic model is based on the assumption that any document -exhibits a mixture of topics. Each topic is composed of a set of words -which are thematically related. The words from a given topic have different -probabilities for that topic. At the same time, each word can be attributable -to one or several topics. So for example the word "sea" may be found in -a topic related with sea transport but also in a topic related to holidays. -Topic model automatically discards stop words and high -frequency words. - -Topic model's main applications include browsing, organizing and understanding -large archives of documents. It can been applied for information retrieval, -collaborative filtering, assessing document similarity among others. -The topics found in the dataset can also be very useful new features -before applying other models like classification, clustering, or -anomaly detection. - -The JSON structure for a topic model is: - -.. code-block:: python - - >>> api.pprint(topic['object']) - { u'category': 0, - u'code': 200, - u'columns': 1, - u'configuration': None, - u'configuration_status': False, - u'created': u'2016-11-23T23:47:54.703000', - u'credits': 0.0, - u'credits_per_prediction': 0.0, - u'dataset': u'dataset/58362aa0983efc45a0000005', - u'dataset_field_types': { u'categorical': 1, - u'datetime': 0, - u'effective_fields': 672, - u'items': 0, - u'numeric': 0, - u'preferred': 2, - u'text': 1, - u'total': 2}, - u'dataset_status': True, - u'dataset_type': 0, - u'description': u'', - u'excluded_fields': [], - u'fields_meta': { u'count': 1, - u'limit': 1000, - u'offset': 0, - u'query_total': 1, - u'total': 1}, - u'input_fields': [u'000001'], - u'locale': u'en_US', - u'max_columns': 2, - u'max_rows': 656, - u'name': u"spam dataset's Topic Model ", - u'number_of_batchtopicdistributions': 0, - u'number_of_public_topicdistributions': 0, - u'number_of_topicdistributions': 0, - u'ordering': 0, - u'out_of_bag': False, - u'price': 0.0, - u'private': True, - u'project': None, - u'range': [1, 656], - u'replacement': False, - u'resource': u'topicmodel/58362aaa983efc45a1000007', - u'rows': 656, - u'sample_rate': 1.0, - u'shared': False, - u'size': 54740, - u'source': u'source/58362a69983efc459f000001', - u'source_status': True, - u'status': { u'code': 5, - u'elapsed': 3222, - u'message': u'The topic model has been created', - u'progress': 1.0}, - u'subscription': True, - u'tags': [], - u'topic_model': { u'alpha': 4.166666666666667, - u'beta': 0.1, - u'bigrams': False, - u'case_sensitive': False, - u'fields': { u'000001': { u'column_number': 1, - u'datatype': u'string', - u'name': u'Message', - u'optype': u'text', - u'order': 0, - u'preferred': True, - u'summary': { u'average_length': 78.14787, - u'missing_count': 0, - u'tag_cloud': [ [ u'call', - 72], - [ u'ok', - 36], - [ u'gt', - 34], - ... - [ u'worse', - 2], - [ u'worth', - 2], - [ u'write', - 2], - [ u'yest', - 2], - [ u'yijue', - 2]], - u'term_forms': { }}, - u'term_analysis': { u'case_sensitive': False, - u'enabled': True, - u'language': u'en', - u'stem_words': False, - u'token_mode': u'all', - u'use_stopwords': False}}}, - u'hashed_seed': 62146850, - u'language': u'en', - u'number_of_topics': 12, - u'term_limit': 4096, - u'term_topic_assignments': [ [ 0, - 5, - 0, - 1, - 0, - 19, - 0, - 0, - 19, - 0, - 1, - 0], - [ 0, - 0, - 0, - 13, - 0, - 0, - 0, - 0, - 5, - 0, - 0, - 0], - ... - [ 0, - 7, - 27, - 0, - 112, - 0, - 0, - 0, - 0, - 0, - 14, - 2]], - u'termset': [ u'000', - u'03', - u'04', - u'06', - u'08000839402', - u'08712460324', - ... - - u'yes', - u'yest', - u'yesterday', - u'yijue', - u'yo', - u'yr', - u'yup', - u'\xfc'], - u'top_n_terms': 10, - u'topicmodel_seed': u'26c386d781963ca1ea5c90dab8a6b023b5e1d180', - u'topics': [ { u'id': u'000000', - u'name': u'Topic 00', - u'probability': 0.09375, - u'top_terms': [ [ u'im', - 0.04849], - [ u'hi', - 0.04717], - [ u'love', - 0.04585], - [ u'please', - 0.02867], - [ u'tomorrow', - 0.02867], - [ u'cos', - 0.02823], - [ u'sent', - 0.02647], - [ u'da', - 0.02383], - [ u'meet', - 0.02207], - [ u'dinner', - 0.01898]]}, - { u'id': u'000001', - u'name': u'Topic 01', - u'probability': 0.08215, - u'top_terms': [ [ u'lt', - 0.1015], - [ u'gt', - 0.1007], - [ u'wish', - 0.03958], - [ u'feel', - 0.0272], - [ u'shit', - 0.02361], - [ u'waiting', - 0.02281], - [ u'stuff', - 0.02001], - [ u'name', - 0.01921], - [ u'comp', - 0.01522], - [ u'forgot', - 0.01482]]}, - ... - { u'id': u'00000b', - u'name': u'Topic 11', - u'probability': 0.0826, - u'top_terms': [ [ u'call', - 0.15084], - [ u'min', - 0.05003], - [ u'msg', - 0.03185], - [ u'home', - 0.02648], - [ u'mind', - 0.02152], - [ u'lt', - 0.01987], - [ u'bring', - 0.01946], - [ u'camera', - 0.01905], - [ u'set', - 0.01905], - [ u'contact', - 0.01781]]}], - u'use_stopwords': False}, - u'updated': u'2016-11-23T23:48:03.336000', - u'white_box': False} - -Note that the output in the snippet above has been abbreviated. - - -The topic model returns a list of top terms for each topic found in the data. -Note that topics are not labeled, so you have to infer their meaning according -to the words they are composed of. - -Once you build the topic model you can calculate each topic probability -for a given document by using Topic Distribution. -This information can be useful to find documents similarities based -on their thematic. -As you see, -the ``topic_model`` attribute stores the topics and termset and term to -topic assignment, -as well as the configuration parameters described in -the `developers section `_ . +You can check the fusion properties at the `API documentation +`_. Time Series ------------ +~~~~~~~~~~~ A time series model is a supervised learning method to forecast the future values of a field based on its previously observed values. @@ -2568,449 +1917,1489 @@ The JSON structure for a time series is: .. code-block:: python >>> api.pprint(time_series['object']) - { u'category': 0, - u'clones': 0, - u'code': 200, - u'columns': 1, - u'configuration': None, - u'configuration_status': False, - u'created': u'2017-07-15T12:49:42.601000', - u'credits': 0.0, - u'dataset': u'dataset/5968ec42983efc21b0000016', - u'dataset_field_types': { u'categorical': 0, - u'datetime': 0, - u'effective_fields': 6, - u'items': 0, - u'numeric': 6, - u'preferred': 6, - u'text': 0, - u'total': 6}, - u'dataset_status': True, - u'dataset_type': 0, - u'description': u'', - u'fields_meta': { u'count': 1, - u'limit': 1000, - u'offset': 0, - u'query_total': 1, - u'total': 1}, - u'forecast': { u'000005': [ { u'lower_bound': [ 30.14111, + { 'category': 0, + 'clones': 0, + 'code': 200, + 'columns': 1, + 'configuration': None, + 'configuration_status': False, + 'created': '2017-07-15T12:49:42.601000', + 'credits': 0.0, + 'dataset': 'dataset/5968ec42983efc21b0000016', + 'dataset_field_types': { 'categorical': 0, + 'datetime': 0, + 'effective_fields': 6, + 'items': 0, + 'numeric': 6, + 'preferred': 6, + 'text': 0, + 'total': 6}, + 'dataset_status': True, + 'dataset_type': 0, + 'description': '', + 'fields_meta': { 'count': 1, + 'limit': 1000, + 'offset': 0, + 'query_total': 1, + 'total': 1}, + 'forecast': { '000005': [ { 'lower_bound': [ 30.14111, 30.14111, ... 30.14111], - u'model': u'A,N,N', - u'point_forecast': [ 68.53181, + 'model': 'A,N,N', + 'point_forecast': [ 68.53181, 68.53181, ... 68.53181, 68.53181], - u'time_range': { u'end': 129, - u'interval': 1, - u'interval_unit': u'milliseconds', - u'start': 80}, - u'upper_bound': [ 106.92251, + 'time_range': { 'end': 129, + 'interval': 1, + 'interval_unit': 'milliseconds', + 'start': 80}, + 'upper_bound': [ 106.92251, 106.92251, ... 106.92251, 106.92251]}, - { u'lower_bound': [ 35.44118, + { 'lower_bound': [ 35.44118, 35.5032, ... 35.28083], - u'model': u'A,Ad,N', + 'model': 'A,Ad,N', ... 66.83537, 66.9465], - u'time_range': { u'end': 129, - u'interval': 1, - u'interval_unit': u'milliseconds', - u'start': 80}}]}, - u'horizon': 50, - u'locale': u'en_US', - u'max_columns': 6, - u'max_rows': 80, - u'name': u'my_ts_data', - u'name_options': u'period=1, range=[1, 80]', - u'number_of_evaluations': 0, - u'number_of_forecasts': 0, - u'number_of_public_forecasts': 0, - u'objective_field': u'000005', - u'objective_field_name': u'Final', - u'objective_field_type': u'numeric', - u'objective_fields': [u'000005'], - u'objective_fields_names': [u'Final'], - u'price': 0.0, - u'private': True, - u'project': None, - u'range': [1, 80], - u'resource': u'timeseries/596a0f66983efc53f3000000', - u'rows': 80, - u'shared': False, - u'short_url': u'', - u'size': 2691, - u'source': u'source/5968ec3c983efc218c000006', - u'source_status': True, - u'status': { u'code': 5, - u'elapsed': 8358, - u'message': u'The time series has been created', - u'progress': 1.0}, - u'subscription': True, - u'tags': [], - u'time_series': { u'all_numeric_objectives': False, - u'datasets': { u'000005': u'dataset/596a0f70983efc53f3000003'}, - u'ets_models': { u'000005': [ { u'aic': 831.30903, - u'aicc': 831.84236, - u'alpha': 0.00012, - u'beta': 0, - u'bic': 840.83713, - u'final_state': { u'b': 0, - u'l': 68.53181, - u's': [ 0]}, - u'gamma': 0, - u'initial_state': { u'b': 0, - u'l': 68.53217, - u's': [ 0]}, - u'name': u'A,N,N', - u'period': 1, - u'phi': 1, - u'r_squared': -0.0187, - u'sigma': 19.19535}, - { u'aic': 834.43049, + 'time_range': { 'end': 129, + 'interval': 1, + 'interval_unit': 'milliseconds', + 'start': 80}}]}, + 'horizon': 50, + 'locale': 'en_US', + 'max_columns': 6, + 'max_rows': 80, + 'name': 'my_ts_data', + 'name_options': 'period=1, range=[1, 80]', + 'number_of_evaluations': 0, + 'number_of_forecasts': 0, + 'number_of_public_forecasts': 0, + 'objective_field': '000005', + 'objective_field_name': 'Final', + 'objective_field_type': 'numeric', + 'objective_fields': ['000005'], + 'objective_fields_names': ['Final'], + 'price': 0.0, + 'private': True, + 'project': None, + 'range': [1, 80], + 'resource': 'timeseries/596a0f66983efc53f3000000', + 'rows': 80, + 'shared': False, + 'short_url': '', + 'size': 2691, + 'source': 'source/5968ec3c983efc218c000006', + 'source_status': True, + 'status': { 'code': 5, + 'elapsed': 8358, + 'message': 'The time series has been created', + 'progress': 1.0}, + 'subscription': True, + 'tags': [], + 'time_series': { 'all_numeric_objectives': False, + 'datasets': { '000005': 'dataset/596a0f70983efc53f3000003'}, + 'ets_models': { '000005': [ { 'aic': 831.30903, + 'aicc': 831.84236, + 'alpha': 0.00012, + 'beta': 0, + 'bic': 840.83713, + 'final_state': { 'b': 0, + 'l': 68.53181, + 's': [ 0]}, + 'gamma': 0, + 'initial_state': { 'b': 0, + 'l': 68.53217, + 's': [ 0]}, + 'name': 'A,N,N', + 'period': 1, + 'phi': 1, + 'r_squared': -0.0187, + 'sigma': 19.19535}, + { 'aic': 834.43049, ... - u'slope': 0.11113, - u'value': 61.39}]}, - u'fields': { u'000005': { u'column_number': 5, - u'datatype': u'double', - u'name': u'Final', - u'optype': u'numeric', - u'order': 0, - u'preferred': True, - u'summary': { u'bins': [ [ 28.06, + 'slope': 0.11113, + 'value': 61.39}]}, + 'fields': { '000005': { 'column_number': 5, + 'datatype': 'double', + 'name': 'Final', + 'optype': 'numeric', + 'order': 0, + 'preferred': True, + 'summary': { 'bins': [ [ 28.06, 1], [ 34.44, ... [ 108.335, 2]], ... - u'sum_squares': 389814.3944, - u'variance': 380.73315}}}, - u'period': 1, - u'time_range': { u'end': 79, - u'interval': 1, - u'interval_unit': u'milliseconds', - u'start': 0}}, - u'type': 0, - u'updated': u'2017-07-15T12:49:52.549000', - u'white_box': False} + 'sum_squares': 389814.3944, + 'variance': 380.73315}}}, + 'period': 1, + 'time_range': { 'end': 79, + 'interval': 1, + 'interval_unit': 'milliseconds', + 'start': 0}}, + 'type': 0, + 'updated': '2017-07-15T12:49:52.549000', + 'white_box': False} You can check the time series properties at the `API documentation -`_. - - - -OptiMLs -------- +`_. -An OptiML is the result of an automated optimization process to find the -best model (type and configuration) to solve a particular -classification or regression problem. -The selection process automates the usual time-consuming task of trying -different models and parameters and evaluating their results to find the -best one. Using the OptiML, non-experts can build top-performing models. +Unsupervised Models +------------------- -You can create an OptiML selecting the ojective field to be predicted, the -evaluation metric to be used to rank the models tested in the process and -a maximum time for the task to be run. +Cluster +~~~~~~~ -The JSON structure for an OptiML is: +For unsupervised learning problems, the cluster is used to classify in a +limited number of groups your training data. The cluster structure is defined +by the centers of each group of data, named centroids, and the data enclosed +in the group. As for in the model's case, the cluster is a white-box resource +and can be retrieved as a JSON: .. code-block:: python - >>> api.pprint(optiml["object"]) - { u'category': 0, - u'code': 200, - u'configuration': None, - u'configuration_status': False, - u'created': u'2018-05-17T20:23:00.060000', - u'creator': u'mmartin', - u'dataset': u'dataset/5afdb7009252732d930009e8', - u'dataset_status': True, - u'datasets': [ u'dataset/5afde6488bf7d551ee00081c', - u'dataset/5afde6488bf7d551fd00511f', - u'dataset/5afde6488bf7d551fe002e0f', - ... - u'dataset/5afde64d8bf7d551fd00512e'], - u'description': u'', - u'evaluations': [ u'evaluation/5afde65c8bf7d551fd00514c', - u'evaluation/5afde65c8bf7d551fd00514f', - ... - u'evaluation/5afde6628bf7d551fd005161'], - u'excluded_fields': [], - u'fields_meta': { u'count': 5, - u'limit': 1000, - u'offset': 0, - u'query_total': 5, - u'total': 5}, - u'input_fields': [u'000000', u'000001', u'000002', u'000003'], - u'model_count': { u'logisticregression': 1, u'model': 8, u'total': 9}, - u'models': [ u'model/5afde64e8bf7d551fd005131', - u'model/5afde64f8bf7d551fd005134', - u'model/5afde6518bf7d551fd005137', - u'model/5afde6538bf7d551fd00513a', - u'logisticregression/5afde6558bf7d551fd00513d', - ... - u'model/5afde65a8bf7d551fd005149'], - u'models_meta': { u'count': 9, u'limit': 1000, u'offset': 0, u'total': 9}, - u'name': u'iris', - u'name_options': u'9 total models (logisticregression: 1, model: 8), metric=max_phi, model candidates=18, max. training time=300', - u'objective_field': u'000004', - u'objective_field_details': { u'column_number': 4, - u'datatype': u'string', - u'name': u'species', - u'optype': u'categorical', - u'order': 4}, - u'objective_field_name': u'species', - u'objective_field_type': u'categorical', - u'objective_fields': [u'000004'], - u'optiml': { u'created_resources': { u'dataset': 10, - u'logisticregression': 11, - u'logisticregression_evaluation': 11, - u'model': 29, - u'model_evaluation': 29}, - u'datasets': [ { u'id': u'dataset/5afde6488bf7d551ee00081c', - u'name': u'iris', - u'name_options': u'120 instances, 5 fields (1 categorical, 4 numeric), sample rate=0.8'}, - { u'id': u'dataset/5afde6488bf7d551fd00511f', - u'name': u'iris', - u'name_options': u'30 instances, 5 fields (1 categorical, 4 numeric), sample rate=0.2, out of bag'}, - { u'id': u'dataset/5afde6488bf7d551fe002e0f', - u'name': u'iris', - u'name_options': u'120 instances, 5 fields (1 categorical, 4 numeric), sample rate=0.8'}, - ... - { u'id': u'dataset/5afde64d8bf7d551fd00512e', - u'name': u'iris', - u'name_options': u'120 instances, 5 fields (1 categorical, 4 numeric), sample rate=0.8'}], - u'fields': { u'000000': { u'column_number': 0, - u'datatype': u'double', - u'name': u'sepal length', - u'optype': u'numeric', - u'order': 0, - u'preferred': True, - u'summary': { u'bins': [ [ 4.3, - 1], - ... - [ 7.9, - 1]], - ... - u'sum': 179.9, - u'sum_squares': 302.33, - u'variance': 0.58101}}, - u'000004': { u'column_number': 4, - u'datatype': u'string', - u'name': u'species', - u'optype': u'categorical', - u'order': 4, - u'preferred': True, - u'summary': { u'categories': [ [ u'Iris-setosa', - 50], - [ u'Iris-versicolor', - 50], - [ u'Iris-virginica', - 50]], - u'missing_count': 0}, - u'term_analysis': { u'enabled': True}}}, - u'max_training_time': 300, - u'metric': u'max_phi', - u'model_types': [u'model', u'logisticregression'], - u'models': [ { u'evaluation': { u'id': u'evaluation/5afde65c8bf7d551fd00514c', - u'info': { u'accuracy': 0.96667, - u'average_area_under_pr_curve': 0.97867, - ... - u'per_class_statistics': [ { u'accuracy': 1, - u'area_under_pr_curve': 1, - ... - u'spearmans_rho': 0.82005}]}, - u'metric_value': 0.95356, - u'metric_variance': 0.00079, - u'name': u'iris vs. iris', - u'name_options': u'279-node, deterministic order, operating kind=probability'}, - u'evaluation_count': 3, - u'id': u'model/5afde64e8bf7d551fd005131', - u'importance': [ [ u'000002', - 0.70997], - [ u'000003', - 0.27289], - [ u'000000', - 0.0106], - [ u'000001', - 0.00654]], - u'kind': u'model', - u'name': u'iris', - u'name_options': u'279-node, deterministic order'}, - { u'evaluation': { u'id': u'evaluation/5afde65c8bf7d551fd00514f', - u'info': { u'accuracy': 0.93333, - - ... - [ u'000001', - 0.02133]], - u'kind': u'model', - u'name': u'iris', - u'name_options': u'12-node, randomize, deterministic order, balanced'}], - u'number_of_model_candidates': 18, - u'recent_evaluations': [ 0.90764, - 0.94952, - ... - 0.90427], - u'search_complete': True, - u'summary': { u'logisticregression': { u'best': u'logisticregression/5afde6558bf7d551fd00513d', - u'count': 1}, - u'model': { u'best': u'model/5afde64e8bf7d551fd005131', - u'count': 8}}}, - u'private': True, - u'project': None, - u'resource': u'optiml/5afde4a42a83475c1b0008a2', - u'shared': False, - u'size': 3686, - u'source': u'source/5afdb6fb9252732d930009e5', - u'source_status': True, - u'status': { u'code': 5, - u'elapsed': 448878.0, - u'message': u'The optiml has been created', - u'progress': 1}, - u'subscription': False, - u'tags': [], - u'test_dataset': None, - u'type': 0, - u'updated': u'2018-05-17T20:30:29.063000'} - + >>> cluster = api.get_cluster(cluster) + >>> api.pprint(cluster['object']) + { 'balance_fields': True, + 'category': 0, + 'cluster_datasets': { '000000': '', '000001': '', '000002': ''}, + 'cluster_datasets_ids': { '000000': '53739b9ae4b0dad82b0a65e6', + '000001': '53739b9ae4b0dad82b0a65e7', + '000002': '53739b9ae4b0dad82b0a65e8'}, + 'cluster_seed': '2c249dda00fbf54ab4cdd850532a584f286af5b6', + 'clusters': { 'clusters': [ { 'center': { '000000': 58.5, + '000001': 26.8314, + '000002': 44.27907, + '000003': 14.37209}, + 'count': 56, + 'distance': { 'bins': [ [ 0.69602, + 2], + [ ... ] + [ 3.77052, + 1]], + 'maximum': 3.77052, + 'mean': 1.61711, + 'median': 1.52146, + 'minimum': 0.69237, + 'population': 56, + 'standard_deviation': 0.6161, + 'sum': 90.55805, + 'sum_squares': 167.31926, + 'variance': 0.37958}, + 'id': '000000', + 'name': 'Cluster 0'}, + { 'center': { '000000': 50.06, + '000001': 34.28, + '000002': 14.62, + '000003': 2.46}, + 'count': 50, + 'distance': { 'bins': [ [ 0.16917, + 1], + [ ... ] + [ 4.94699, + 1]], + 'maximum': 4.94699, + 'mean': 1.50725, + 'median': 1.3393, + 'minimum': 0.16917, + 'population': 50, + 'standard_deviation': 1.00994, + 'sum': 75.36252, + 'sum_squares': 163.56918, + 'variance': 1.01998}, + 'id': '000001', + 'name': 'Cluster 1'}, + { 'center': { '000000': 68.15625, + '000001': 31.25781, + '000002': 55.48438, + '000003': 19.96875}, + 'count': 44, + 'distance': { 'bins': [ [ 0.36825, + 1], + [ ... ] + [ 3.87216, + 1]], + 'maximum': 3.87216, + 'mean': 1.67264, + 'median': 1.63705, + 'minimum': 0.36825, + 'population': 44, + 'standard_deviation': 0.78905, + 'sum': 73.59627, + 'sum_squares': 149.87194, + 'variance': 0.6226}, + 'id': '000002', + 'name': 'Cluster 2'}], + 'fields': { '000000': { 'column_number': 0, + 'datatype': 'int8', + 'name': 'sepal length', + 'optype': 'numeric', + 'order': 0, + 'preferred': True, + 'summary': { 'bins': [ [ 43.75, + 4], + [ ... ] + [ 79, + 1]], + 'maximum': 79, + 'mean': 58.43333, + 'median': 57.7889, + 'minimum': 43, + 'missing_count': 0, + 'population': 150, + 'splits': [ 45.15258, + 46.72525, + 72.04226, + 76.47461], + 'standard_deviation': 8.28066, + 'sum': 8765, + 'sum_squares': 522385, + 'variance': 68.56935}}, + [ ... ] + [ 25, + 3]], + 'maximum': 25, + 'mean': 11.99333, + 'median': 13.28483, + 'minimum': 1, + 'missing_count': 0, + 'population': 150, + 'standard_deviation': 7.62238, + 'sum': 1799, + 'sum_squares': 30233, + 'variance': 58.10063}}}}, + 'code': 202, + 'columns': 4, + 'created': '2014-05-14T16:36:40.993000', + 'credits': 0.017578125, + 'credits_per_prediction': 0.0, + 'dataset': 'dataset/53739b88c8db63122b000411', + 'dataset_field_types': { 'categorical': 1, + 'datetime': 0, + 'numeric': 4, + 'preferred': 5, + 'text': 0, + 'total': 5}, + 'dataset_status': True, + 'dataset_type': 0, + 'description': '', + 'excluded_fields': ['000004'], + 'field_scales': None, + 'fields_meta': { 'count': 4, + 'limit': 1000, + 'offset': 0, + 'query_total': 4, + 'total': 4}, + 'input_fields': ['000000', '000001', '000002', '000003'], + 'k': 3, + 'locale': 'es-ES', + 'max_columns': 5, + 'max_rows': 150, + 'name': 'my iris', + 'number_of_batchcentroids': 0, + 'number_of_centroids': 0, + 'number_of_public_centroids': 0, + 'out_of_bag': False, + 'price': 0.0, + 'private': True, + 'range': [1, 150], + 'replacement': False, + 'resource': 'cluster/53739b98d994972da7001de9', + 'rows': 150, + 'sample_rate': 1.0, + 'scales': { '000000': 0.22445382597655375, + '000001': 0.4264213814821549, + '000002': 0.10528680248949522, + '000003': 0.2438379900517961}, + 'shared': False, + 'size': 4608, + 'source': 'source/53739b24d994972da7001ddd', + 'source_status': True, + 'status': { 'code': 5, + 'elapsed': 1009, + 'message': 'The cluster has been created', + 'progress': 1.0}, + 'subscription': True, + 'tags': [], + 'updated': '2014-05-14T16:40:26.234728', + 'white_box': False} + +(Note that we have abbreviated the output in the snippet above for +readability: the full predictive cluster yo'll get is going to contain +much more details). + +You can check the cluster properties at the `API documentation +`_. + +Anomaly detector +~~~~~~~~~~~~~~~~ + +For anomaly detection problems, BigML anomaly detector uses iforest as an +unsupervised kind of model that detects anomalous data in a dataset. The +information it returns encloses a `top_anomalies` block +that contains a list of the most anomalous +points. For each, we capture a `score` from 0 to 1. The closer to 1, +the more anomalous. We also capture the `row` which gives values for +each field in the order defined by `input_fields`. Similarly we give +a list of `importances` which match the `row` values. These +importances tell us which values contributed most to the anomaly +score. Thus, the structure of an anomaly detector is similar to: + +.. code-block:: python + + { 'category': 0, + 'code': 200, + 'columns': 14, + 'constraints': False, + 'created': '2014-09-08T18:51:11.893000', + 'credits': 0.11653518676757812, + 'credits_per_prediction': 0.0, + 'dataset': 'dataset/540dfa9d9841fa5c88000765', + 'dataset_field_types': { 'categorical': 21, + 'datetime': 0, + 'numeric': 21, + 'preferred': 14, + 'text': 0, + 'total': 42}, + 'dataset_status': True, + 'dataset_type': 0, + 'description': '', + 'excluded_fields': [], + 'fields_meta': { 'count': 14, + 'limit': 1000, + 'offset': 0, + 'query_total': 14, + 'total': 14}, + 'forest_size': 128, + 'input_fields': [ '000004', + '000005', + '000009', + '000016', + '000017', + '000018', + '000019', + '00001e', + '00001f', + '000020', + '000023', + '000024', + '000025', + '000026'], + 'locale': 'en_US', + 'max_columns': 42, + 'max_rows': 200, + 'model': { 'fields': { '000004': { 'column_number': 4, + 'datatype': 'int16', + 'name': 'src_bytes', + 'optype': 'numeric', + 'order': 0, + 'preferred': True, + 'summary': { 'bins': [ [ 143, + 2], + ... + [ 370, + 2]], + 'maximum': 370, + 'mean': 248.235, + 'median': 234.57157, + 'minimum': 141, + 'missing_count': 0, + 'population': 200, + 'splits': [ 159.92462, + 173.73312, + 188, + ... + 339.55228], + 'standard_deviation': 49.39869, + 'sum': 49647, + 'sum_squares': 12809729, + 'variance': 2440.23093}}, + '000005': { 'column_number': 5, + 'datatype': 'int32', + 'name': 'dst_bytes', + 'optype': 'numeric', + 'order': 1, + 'preferred': True, + ... + 'sum': 1030851, + 'sum_squares': 22764504759, + 'variance': 87694652.45224}}, + '000009': { 'column_number': 9, + 'datatype': 'string', + 'name': 'hot', + 'optype': 'categorical', + 'order': 2, + 'preferred': True, + 'summary': { 'categories': [ [ '0', + 199], + [ '1', + 1]], + 'missing_count': 0}, + 'term_analysis': { 'enabled': True}}, + '000016': { 'column_number': 22, + 'datatype': 'int8', + 'name': 'count', + 'optype': 'numeric', + 'order': 3, + 'preferred': True, + ... + 'population': 200, + 'standard_deviation': 5.42421, + 'sum': 1351, + 'sum_squares': 14981, + 'variance': 29.42209}}, + '000017': { ... }}}, + 'kind': 'iforest', + 'mean_depth': 12.314174107142858, + 'top_anomalies': [ { 'importance': [ 0.06768, + 0.01667, + 0.00081, + 0.02437, + 0.04773, + 0.22197, + 0.18208, + 0.01868, + 0.11855, + 0.01983, + 0.01898, + 0.05306, + 0.20398, + 0.00562], + 'row': [ 183.0, + 8654.0, + '0', + 4.0, + 4.0, + 0.25, + 0.25, + 0.0, + 123.0, + 255.0, + 0.01, + 0.04, + 0.01, + 0.0], + 'score': 0.68782}, + { 'importance': [ 0.05645, + 0.02285, + 0.0015, + 0.05196, + 0.04435, + 0.0005, + 0.00056, + 0.18979, + 0.12402, + 0.23671, + 0.20723, + 0.05651, + 0.00144, + 0.00612], + 'row': [ 212.0, + 1940.0, + '0', + 1.0, + 2.0, + 0.0, + 0.0, + 1.0, + 1.0, + 69.0, + 1.0, + 0.04, + 0.0, + 0.0], + 'score': 0.6239}, + ...], + 'trees': [ { 'root': { 'children': [ { 'children': [ { 'children': [ { 'children': [ { 'children': + [ { 'population': 1, + 'predicates': [ { 'field': '00001f', + 'op': '>', + 'value': 35.54357}]}, + + ... + { 'population': 1, + 'predicates': [ { 'field': '00001f', + 'op': '<=', + 'value': 35.54357}]}], + 'population': 2, + 'predicates': [ { 'field': '000005', + 'op': '<=', + 'value': 1385.5166}]}], + 'population': 3, + 'predicates': [ { 'field': '000020', + 'op': '<=', + 'value': 65.14308}, + { 'field': '000019', + 'op': '=', + 'value': 0}]}], + 'population': 105, + 'predicates': [ { 'field': '000017', + 'op': '<=', + 'value': 13.21754}, + { 'field': '000009', + 'op': 'in', + 'value': [ '0']}]}], + 'population': 126, + 'predicates': [ True, + { 'field': '000018', + 'op': '=', + 'value': 0}]}, + 'training_mean_depth': 11.071428571428571}]}, + 'name': "tiny_kdd's dataset anomaly detector", + 'number_of_batchscores': 0, + 'number_of_public_predictions': 0, + 'number_of_scores': 0, + 'out_of_bag': False, + 'price': 0.0, + 'private': True, + 'project': None, + 'range': [1, 200], + 'replacement': False, + 'resource': 'anomaly/540dfa9f9841fa5c8800076a', + 'rows': 200, + 'sample_rate': 1.0, + 'sample_size': 126, + 'seed': 'BigML', + 'shared': False, + 'size': 30549, + 'source': 'source/540dfa979841fa5c7f000363', + 'source_status': True, + 'status': { 'code': 5, + 'elapsed': 32397, + 'message': 'The anomaly detector has been created', + 'progress': 1.0}, + 'subscription': False, + 'tags': [], + 'updated': '2014-09-08T23:54:28.647000', + 'white_box': False} + +Note that we have abbreviated the output in the snippet above for +readability: the full anomaly detector yo'll get is going to contain +much more details). + +The `trees` list contains the actual isolation forest, and it can be quite +large usually. That's why, this part of the resource should only be included +in downloads when needed. If you are only interested in other properties, such +as `top_anomalies`, yo'll improve performance by excluding it, using the +`excluded=trees` query string in the API call: + +.. code-block:: python + + anomaly = api.get_anomaly('anomaly/540dfa9f9841fa5c8800076a', \ + query_string='excluded=trees') + +Each node in an isolation tree can have multiple predicates. +For the node to be a valid branch when evaluated with a data point, all of its +predicates must be true. + +You can check the anomaly detector properties at the `API documentation +`_. + +Associations +~~~~~~~~~~~~ + +Association Discovery is a popular method to find out relations among values +in high-dimensional datasets. + +A common case where association discovery is often used is +market basket analysis. This analysis seeks for customer shopping +patterns across large transactional +datasets. For instance, do customers who buy hamburgers and ketchup also +consume bread? + +Businesses use those insights to make decisions on promotions and product +placements. +Association Discovery can also be used for other purposes such as early +incident detection, web usage analysis, or software intrusion detection. + +In BigML, the Association resource object can be built from any dataset, and +its results are a list of association rules between the items in the dataset. +In the example case, the corresponding +association rule would have hamburguers and ketchup as the items at the +left hand side of the association rule and bread would be the item at the +right hand side. Both sides in this association rule are related, +in the sense that observing +the items in the left hand side implies observing the items in the right hand +side. There are some metrics to ponder the quality of these association rules: + +- Support: the proportion of instances which contain an itemset. + +For an association rule, it means the number of instances in the dataset which +contain the rule's antecedent and rule's consequent together +over the total number of instances (N) in the dataset. + +It gives a measure of the importance of the rule. Association rules have +to satisfy a minimum support constraint (i.e., min_support). + +- Coverage: the support of the antedecent of an association rule. +It measures how often a rule can be applied. + +- Confidence or (strength): The probability of seeing the rule's consequent +under the condition that the instances also contain the rule's antecedent. +Confidence is computed using the support of the association rule over the +coverage. That is, the percentage of instances which contain the consequent +and antecedent together over the number of instances which only contain +the antecedent. + +Confidence is directed and gives different values for the association +rules Antecedent → Consequent and Consequent → Antecedent. Association +rules also need to satisfy a minimum confidence constraint +(i.e., min_confidence). + +- Leverage: the difference of the support of the association +rule (i.e., the antecedent and consequent appearing together) and what would +be expected if antecedent and consequent where statistically independent. +This is a value between -1 and 1. A positive value suggests a positive +relationship and a negative value suggests a negative relationship. +0 indicates independence. + +Lift: how many times more often antecedent and consequent occur together +than expected if they where statistically independent. +A value of 1 suggests that there is no relationship between the antecedent +and the consequent. Higher values suggest stronger positive relationships. +Lower values suggest stronger negative relationships (the presence of the +antecedent reduces the likelihood of the consequent) + +As to the items used in association rules, each type of field is parsed to +extract items for the rules as follows: + +- Categorical: each different value (class) will be considered a separate item. +- Text: each unique term will be considered a separate item. +- Items: each different item in the items summary will be considered. +- Numeric: Values will be converted into categorical by making a +segmentation of the values. +For example, a numeric field with values ranging from 0 to 600 split +into 3 segments: +segment 1 → [0, 200), segment 2 → [200, 400), segment 3 → [400, 600]. +You can refine the behavior of the transformation using +`discretization `_ +and `field_discretizations `_. + +The JSON structure for an association resource is: + +.. code-block:: python + + + >>> api.pprint(association['object']) + { + "associations":{ + "complement":false, + "discretization":{ + "pretty":true, + "size":5, + "trim":0, + "type":"width" + }, + "items":[ + { + "complement":false, + "count":32, + "field_id":"000000", + "name":"Segment 1", + "bin_end":5, + "bin_start":null + }, + { + "complement":false, + "count":49, + "field_id":"000000", + "name":"Segment 3", + "bin_end":7, + "bin_start":6 + }, + { + "complement":false, + "count":12, + "field_id":"000000", + "name":"Segment 4", + "bin_end":null, + "bin_start":7 + }, + { + "complement":false, + "count":19, + "field_id":"000001", + "name":"Segment 1", + "bin_end":2.5, + "bin_start":null + }, + ... + { + "complement":false, + "count":50, + "field_id":"000004", + "name":"Iris-versicolor" + }, + { + "complement":false, + "count":50, + "field_id":"000004", + "name":"Iris-virginica" + } + ], + "max_k": 100, + "min_confidence":0, + "min_leverage":0, + "min_lift":1, + "min_support":0, + "rules":[ + { + "confidence":1, + "id":"000000", + "leverage":0.22222, + "lhs":[ + 13 + ], + "lhs_cover":[ + 0.33333, + 50 + ], + "lift":3, + "p_value":0.000000000, + "rhs":[ + 6 + ], + "rhs_cover":[ + 0.33333, + 50 + ], + "support":[ + 0.33333, + 50 + ] + }, + { + "confidence":1, + "id":"000001", + "leverage":0.22222, + "lhs":[ + 6 + ], + "lhs_cover":[ + 0.33333, + 50 + ], + "lift":3, + "p_value":0.000000000, + "rhs":[ + 13 + ], + "rhs_cover":[ + 0.33333, + 50 + ], + "support":[ + 0.33333, + 50 + ] + }, + ... + { + "confidence":0.26, + "id":"000029", + "leverage":0.05111, + "lhs":[ + 13 + ], + "lhs_cover":[ + 0.33333, + 50 + ], + "lift":2.4375, + "p_value":0.0000454342, + "rhs":[ + 5 + ], + "rhs_cover":[ + 0.10667, + 16 + ], + "support":[ + 0.08667, + 13 + ] + }, + { + "confidence":0.18, + "id":"00002a", + "leverage":0.04, + "lhs":[ + 15 + ], + "lhs_cover":[ + 0.33333, + 50 + ], + "lift":3, + "p_value":0.0000302052, + "rhs":[ + 9 + ], + "rhs_cover":[ + 0.06, + 9 + ], + "support":[ + 0.06, + 9 + ] + }, + { + "confidence":1, + "id":"00002b", + "leverage":0.04, + "lhs":[ + 9 + ], + "lhs_cover":[ + 0.06, + 9 + ], + "lift":3, + "p_value":0.0000302052, + "rhs":[ + 15 + ], + "rhs_cover":[ + 0.33333, + 50 + ], + "support":[ + 0.06, + 9 + ] + } + ], + "rules_summary":{ + "confidence":{ + "counts":[ + [ + 0.18, + 1 + ], + [ + 0.24, + 1 + ], + [ + 0.26, + 2 + ], + ... + [ + 0.97959, + 1 + ], + [ + 1, + 9 + ] + ], + "maximum":1, + "mean":0.70986, + "median":0.72864, + "minimum":0.18, + "population":44, + "standard_deviation":0.24324, + "sum":31.23367, + "sum_squares":24.71548, + "variance":0.05916 + }, + "k":44, + "leverage":{ + "counts":[ + [ + 0.04, + 2 + ], + [ + 0.05111, + 4 + ], + [ + 0.05316, + 2 + ], + ... + [ + 0.22222, + 2 + ] + ], + "maximum":0.22222, + "mean":0.10603, + "median":0.10156, + "minimum":0.04, + "population":44, + "standard_deviation":0.0536, + "sum":4.6651, + "sum_squares":0.61815, + "variance":0.00287 + }, + "lhs_cover":{ + "counts":[ + [ + 0.06, + 2 + ], + [ + 0.08, + 2 + ], + [ + 0.10667, + 4 + ], + [ + 0.12667, + 1 + ], + ... + [ + 0.5, + 4 + ] + ], + "maximum":0.5, + "mean":0.29894, + "median":0.33213, + "minimum":0.06, + "population":44, + "standard_deviation":0.13386, + "sum":13.15331, + "sum_squares":4.70252, + "variance":0.01792 + }, + "lift":{ + "counts":[ + [ + 1.40625, + 2 + ], + [ + 1.5067, + 2 + ], + ... + [ + 2.63158, + 4 + ], + [ + 3, + 10 + ], + [ + 4.93421, + 2 + ], + [ + 12.5, + 2 + ] + ], + "maximum":12.5, + "mean":2.91963, + "median":2.58068, + "minimum":1.40625, + "population":44, + "standard_deviation":2.24641, + "sum":128.46352, + "sum_squares":592.05855, + "variance":5.04635 + }, + "p_value":{ + "counts":[ + [ + 0.000000000, + 2 + ], + [ + 0.000000000, + 4 + ], + [ + 0.000000000, + 2 + ], + ... + [ + 0.0000910873, + 2 + ] + ], + "maximum":0.0000910873, + "mean":0.0000106114, + "median":0.00000000, + "minimum":0.000000000, + "population":44, + "standard_deviation":0.0000227364, + "sum":0.000466903, + "sum_squares":0.0000000, + "variance":0.000000001 + }, + "rhs_cover":{ + "counts":[ + [ + 0.06, + 2 + ], + [ + 0.08, + 2 + ], + ... + [ + 0.42667, + 2 + ], + [ + 0.46667, + 3 + ], + [ + 0.5, + 4 + ] + ], + "maximum":0.5, + "mean":0.29894, + "median":0.33213, + "minimum":0.06, + "population":44, + "standard_deviation":0.13386, + "sum":13.15331, + "sum_squares":4.70252, + "variance":0.01792 + }, + "support":{ + "counts":[ + [ + 0.06, + 4 + ], + [ + 0.06667, + 2 + ], + [ + 0.08, + 2 + ], + [ + 0.08667, + 4 + ], + [ + 0.10667, + 4 + ], + [ + 0.15333, + 2 + ], + [ + 0.18667, + 4 + ], + [ + 0.19333, + 2 + ], + [ + 0.20667, + 2 + ], + [ + 0.27333, + 2 + ], + [ + 0.28667, + 2 + ], + [ + 0.3, + 4 + ], + [ + 0.32, + 2 + ], + [ + 0.33333, + 6 + ], + [ + 0.37333, + 2 + ] + ], + "maximum":0.37333, + "mean":0.20152, + "median":0.19057, + "minimum":0.06, + "population":44, + "standard_deviation":0.10734, + "sum":8.86668, + "sum_squares":2.28221, + "variance":0.01152 + } + }, + "search_strategy":"leverage", + "significance_level":0.05 + }, + "category":0, + "clones":0, + "code":200, + "columns":5, + "created":"2015-11-05T08:06:08.184000", + "credits":0.017581939697265625, + "dataset":"dataset/562fae3f4e1727141d00004e", + "dataset_status":true, + "dataset_type":0, + "description":"", + "excluded_fields":[ ], + "fields_meta":{ + "count":5, + "limit":1000, + "offset":0, + "query_total":5, + "total":5 + }, + "input_fields":[ + "000000", + "000001", + "000002", + "000003", + "000004" + ], + "locale":"en_US", + "max_columns":5, + "max_rows":150, + "name":"iris' dataset's association", + "out_of_bag":false, + "price":0, + "private":true, + "project":null, + "range":[ + 1, + 150 + ], + "replacement":false, + "resource":"association/5621b70910cb86ae4c000000", + "rows":150, + "sample_rate":1, + "shared":false, + "size":4609, + "source":"source/562fae3a4e1727141d000048", + "source_status":true, + "status":{ + "code":5, + "elapsed":1072, + "message":"The association has been created", + "progress":1 + }, + "subscription":false, + "tags":[ ], + "updated":"2015-11-05T08:06:20.403000", + "white_box":false + } +Note that the output in the snippet above has been abbreviated. As you see, +the ``associations`` attribute stores items, rules and metrics extracted +from the datasets as well as the configuration parameters described in +the `developers section `_ . -You can check the optiml properties at the `API documentation -`_. +Topic Models +~~~~~~~~~~~~ -Fusions -------- +A topic model is an unsupervised machine learning method +for unveiling all the different topics +underlying a collection of documents. +BigML uses Latent Dirichlet Allocation (LDA), one of the most popular +probabilistic methods for topic modeling. +In BigML, each instance (i.e. each row in your dataset) will +be considered a document and the contents of all the text fields +given as inputs will be automatically concatenated and considered the +document bag of words. -A Fusion is a special type of composed resource for which all -submodels satisfy the following constraints: they're all either -classifications or regressions over the same kind of data or -compatible fields, with the same objective field. Given those -properties, a fusion can be considered a supervised model, -and therefore one can predict with fusions and evaluate them. -Ensembles can be viewed as a kind of fusion subject to the additional -constraints that all its submodels are tree models that, moreover, -have been built from the same base input data, but sampled in particular ways. +Topic model is based on the assumption that any document +exhibits a mixture of topics. Each topic is composed of a set of words +which are thematically related. The words from a given topic have different +probabilities for that topic. At the same time, each word can be attributable +to one or several topics. So for example the word "sea" may be found in +a topic related with sea transport but also in a topic related to holidays. +Topic model automatically discards stop words and high +frequency words. -The model types allowed to be a submodel of a fusion are: -deepnet, ensemble, fusion, model, logistic regression and linear regression. +Topic model's main applications include browsing, organizing and understanding +large archives of documents. It can been applied for information retrieval, +collaborative filtering, assessing document similarity among others. +The topics found in the dataset can also be very useful new features +before applying other models like classification, clustering, or +anomaly detection. -The JSON structure for an Fusion is: +The JSON structure for a topic model is: .. code-block:: python - >>> api.pprint(fusion["object"]) - { - "category": 0, - "code": 200, - "configuration": null, - "configuration_status": false, - "created": "2018-05-09T20:11:05.821000", - "credits_per_prediction": 0, - "description": "", - "fields_meta": { - "count": 5, - "limit": 1000, - "offset": 0, - "query_total": 5, - "total": 5 - }, - "fusion": { - "models": [ - { - "id": "ensemble/5af272eb4e1727d378000050", - "kind": "ensemble", - "name": "Iris ensemble", - "name_options": "boosted trees, 1999-node, 16-iteration, deterministic order, balanced" - }, - { - "id": "model/5af272fe4e1727d3780000d6", - "kind": "model", - "name": "Iris model", - "name_options": "1999-node, pruned, deterministic order, balanced" - }, - { - "id": "logisticregression/5af272ff4e1727d3780000d9", - "kind": "logisticregression", - "name": "Iris LR", - "name_options": "L2 regularized (c=1), bias, auto-scaled, missing values, eps=0.001" - } - ] - }, - "importance": { - "000000": 0.05847, - "000001": 0.03028, - "000002": 0.13582, - "000003": 0.4421 - }, - "model_count": { - "ensemble": 1, - "logisticregression": 1, - "model": 1, - "total": 3 - }, - "models": [ - "ensemble/5af272eb4e1727d378000050", - "model/5af272fe4e1727d3780000d6", - "logisticregression/5af272ff4e1727d3780000d9" - ], - "models_meta": { - "count": 3, - "limit": 1000, - "offset": 0, - "total": 3 - }, - "name": "iris", - "name_options": "3 total models (ensemble: 1, logisticregression: 1, model: 1)", - "number_of_batchpredictions": 0, - "number_of_evaluations": 0, - "number_of_predictions": 0, - "number_of_public_predictions": 0, - "objective_field": "000004", - "objective_field_details": { - "column_number": 4, - "datatype": "string", - "name": "species", - "optype": "categorical", - "order": 4 - }, - "objective_field_name": "species", - "objective_field_type": "categorical", - "objective_fields": [ - "000004" - ], - "private": true, - "project": null, - "resource":"fusion/59af8107b8aa0965d5b61138", - "shared": false, - "status": { - "code": 5, - "elapsed": 8420, - "message": "The fusion has been created", - "progress": 1 - }, - "subscription": false, - "tags": [], - "type": 0, - "updated": "2018-05-09T20:11:14.258000" - } + >>> api.pprint(topic['object']) + { 'category': 0, + 'code': 200, + 'columns': 1, + 'configuration': None, + 'configuration_status': False, + 'created': '2016-11-23T23:47:54.703000', + 'credits': 0.0, + 'credits_per_prediction': 0.0, + 'dataset': 'dataset/58362aa0983efc45a0000005', + 'dataset_field_types': { 'categorical': 1, + 'datetime': 0, + 'effective_fields': 672, + 'items': 0, + 'numeric': 0, + 'preferred': 2, + 'text': 1, + 'total': 2}, + 'dataset_status': True, + 'dataset_type': 0, + 'description': '', + 'excluded_fields': [], + 'fields_meta': { 'count': 1, + 'limit': 1000, + 'offset': 0, + 'query_total': 1, + 'total': 1}, + 'input_fields': ['000001'], + 'locale': 'en_US', + 'max_columns': 2, + 'max_rows': 656, + 'name': u"spam dataset's Topic Model ", + 'number_of_batchtopicdistributions': 0, + 'number_of_public_topicdistributions': 0, + 'number_of_topicdistributions': 0, + 'ordering': 0, + 'out_of_bag': False, + 'price': 0.0, + 'private': True, + 'project': None, + 'range': [1, 656], + 'replacement': False, + 'resource': 'topicmodel/58362aaa983efc45a1000007', + 'rows': 656, + 'sample_rate': 1.0, + 'shared': False, + 'size': 54740, + 'source': 'source/58362a69983efc459f000001', + 'source_status': True, + 'status': { 'code': 5, + 'elapsed': 3222, + 'message': 'The topic model has been created', + 'progress': 1.0}, + 'subscription': True, + 'tags': [], + 'topic_model': { 'alpha': 4.166666666666667, + 'beta': 0.1, + 'bigrams': False, + 'case_sensitive': False, + 'fields': { '000001': { 'column_number': 1, + 'datatype': 'string', + 'name': 'Message', + 'optype': 'text', + 'order': 0, + 'preferred': True, + 'summary': { 'average_length': 78.14787, + 'missing_count': 0, + 'tag_cloud': [ [ 'call', + 72], + [ 'ok', + 36], + [ 'gt', + 34], + ... + [ 'worse', + 2], + [ 'worth', + 2], + [ 'write', + 2], + [ 'yest', + 2], + [ 'yijue', + 2]], + 'term_forms': { }}, + 'term_analysis': { 'case_sensitive': False, + 'enabled': True, + 'language': 'en', + 'stem_words': False, + 'token_mode': 'all', + 'use_stopwords': False}}}, + 'hashed_seed': 62146850, + 'language': 'en', + 'number_of_topics': 12, + 'term_limit': 4096, + 'term_topic_assignments': [ [ 0, + 5, + 0, + 1, + 0, + 19, + 0, + 0, + 19, + 0, + 1, + 0], + [ 0, + 0, + 0, + 13, + 0, + 0, + 0, + 0, + 5, + 0, + 0, + 0], + ... + [ 0, + 7, + 27, + 0, + 112, + 0, + 0, + 0, + 0, + 0, + 14, + 2]], + 'termset': [ '000', + '03', + '04', + '06', + '08000839402', + '08712460324', + ... -You can check the fusion properties at the `API documentation -`_. + 'yes', + 'yest', + 'yesterday', + 'yijue', + 'yo', + 'yr', + 'yup', + '\xfc'], + 'top_n_terms': 10, + 'topicmodel_seed': '26c386d781963ca1ea5c90dab8a6b023b5e1d180', + 'topics': [ { 'id': '000000', + 'name': 'Topic 00', + 'probability': 0.09375, + 'top_terms': [ [ 'im', + 0.04849], + [ 'hi', + 0.04717], + [ 'love', + 0.04585], + [ 'please', + 0.02867], + [ 'tomorrow', + 0.02867], + [ 'cos', + 0.02823], + [ 'sent', + 0.02647], + [ 'da', + 0.02383], + [ 'meet', + 0.02207], + [ 'dinner', + 0.01898]]}, + { 'id': '000001', + 'name': 'Topic 01', + 'probability': 0.08215, + 'top_terms': [ [ 'lt', + 0.1015], + [ 'gt', + 0.1007], + [ 'wish', + 0.03958], + [ 'feel', + 0.0272], + [ 'shit', + 0.02361], + [ 'waiting', + 0.02281], + [ 'stuff', + 0.02001], + [ 'name', + 0.01921], + [ 'comp', + 0.01522], + [ 'forgot', + 0.01482]]}, + ... + { 'id': '00000b', + 'name': 'Topic 11', + 'probability': 0.0826, + 'top_terms': [ [ 'call', + 0.15084], + [ 'min', + 0.05003], + [ 'msg', + 0.03185], + [ 'home', + 0.02648], + [ 'mind', + 0.02152], + [ 'lt', + 0.01987], + [ 'bring', + 0.01946], + [ 'camera', + 0.01905], + [ 'set', + 0.01905], + [ 'contact', + 0.01781]]}], + 'use_stopwords': False}, + 'updated': '2016-11-23T23:48:03.336000', + 'white_box': False} + +Note that the output in the snippet above has been abbreviated. + + +The topic model returns a list of top terms for each topic found in the data. +Note that topics are not labeled, so you have to infer their meaning according +to the words they are composed of. + +Once you build the topic model you can calculate each topic probability +for a given document by using Topic Distribution. +This information can be useful to find documents similarities based +on their thematic. +As you see, +the ``topic_model`` attribute stores the topics and termset and term to +topic assignment, +as well as the configuration parameters described in +the `developers section `_ . PCAs ----- +~~~~ A PCA (Principal Component Analysis) resource fits a number of orthogonal projections (components) to maximally capture the variance in a dataset. This @@ -3026,43 +3415,43 @@ The JSON structure for an PCA is: {'code': 200, 'error': None, 'location': 'https://strato.dev.bigml.io/andromeda/pca/5c002572983efc0ac5000003', - 'object': {u'category': 0, - u'code': 200, - u'columns': 2, - u'configuration': None, - u'configuration_status': False, - u'created': u'2018-11-29T17:44:18.359000', - u'creator': u'merce', - u'credits': 0.0, - u'credits_per_prediction': 0.0, - u'dataset': u'dataset/5c00256a983efc0acf000000', - u'dataset_field_types': {u'categorical': 1, - u'datetime': 0, - u'items': 0, - u'numeric': 0, - u'preferred': 2, - u'text': 1, - u'total': 2}, - u'dataset_status': True, - u'description': u'', - u'excluded_fields': [], - u'fields_meta': {u'count': 2, - u'limit': 1000, - u'offset': 0, - u'query_total': 2, - u'total': 2}, - u'input_fields': [u'000000', u'000001'], - u'locale': u'en-us', - u'max_columns': 2, - u'max_rows': 7, - u'name': u'spam 4 words', - u'name_options': u'standardized', - u'number_of_batchprojections': 2, - u'number_of_projections': 0, - u'number_of_public_projections': 0, - u'ordering': 0, - u'out_of_bag': False, - u'pca': {u'components': [[-0.64757, + 'object': {'category': 0, + 'code': 200, + 'columns': 2, + 'configuration': None, + 'configuration_status': False, + 'created': '2018-11-29T17:44:18.359000', + 'creator': 'merce', + 'credits': 0.0, + 'credits_per_prediction': 0.0, + 'dataset': 'dataset/5c00256a983efc0acf000000', + 'dataset_field_types': {'categorical': 1, + 'datetime': 0, + 'items': 0, + 'numeric': 0, + 'preferred': 2, + 'text': 1, + 'total': 2}, + 'dataset_status': True, + 'description': '', + 'excluded_fields': [], + 'fields_meta': {'count': 2, + 'limit': 1000, + 'offset': 0, + 'query_total': 2, + 'total': 2}, + 'input_fields': ['000000', '000001'], + 'locale': 'en-us', + 'max_columns': 2, + 'max_rows': 7, + 'name': 'spam 4 words', + 'name_options': 'standardized', + 'number_of_batchprojections': 2, + 'number_of_projections': 0, + 'number_of_public_projections': 0, + 'ordering': 0, + 'out_of_bag': False, + 'pca': {'components': [[-0.64757, 0.83392, 0.1158, 0.83481, @@ -3070,63 +3459,422 @@ The JSON structure for an PCA is: -0.09426, -0.08544, -0.03457]], - u'cumulative_variance': [0.43667, + 'cumulative_variance': [0.43667, 0.74066, 0.87902, 0.98488, 0.99561, 1], - u'eigenvectors': [[-0.3894, + 'eigenvectors': [[-0.3894, 0.50146, 0.06963, ... -0.56542, -0.5125, -0.20734]], - u'fields': {u'000000': {u'column_number': 0, - u'datatype': u'string', - u'name': u'Type', + 'fields': {'000000': {'column_number': 0, + 'datatype': 'string', + 'name': 'Type', ... - u'token_mode': u'all', - u'use_stopwords': False}}}, - u'pca_seed': u'2c249dda00fbf54ab4cdd850532a584f286af5b6', - u'standardized': True, - u'text_stats': {u'000001': {u'means': [0.71429, + 'token_mode': 'all', + 'use_stopwords': False}}}, + 'pca_seed': '2c249dda00fbf54ab4cdd850532a584f286af5b6', + 'standardized': True, + 'text_stats': {'000001': {'means': [0.71429, 0.71429, 0.42857, 0.28571], - u'standard_deviations': [0.75593, + 'standard_deviations': [0.75593, 0.75593, 0.53452, 0.48795]}}, - u'variance': [0.43667, + 'variance': [0.43667, 0.30399, 0.13837, 0.10585, 0.01073, 0.00439]}, - u'price': 0.0, - u'private': True, - u'project': None, - u'range': None, - u'replacement': False, - u'resource': u'pca/5c002572983efc0ac5000003', - u'rows': 7, - u'sample_rate': 1.0, - u'shared': False, - u'size': 127, - u'source': u'source/5c00255e983efc0acd00001b', - u'source_status': True, - u'status': {u'code': 5, - u'elapsed': 1571, - u'message': u'The pca has been created', - u'progress': 1}, - u'subscription': True, - u'tags': [], - u'type': 0, - u'updated': u'2018-11-29T18:13:19.714000', - u'white_box': False}, - 'resource': u'pca/5c002572983efc0ac5000003'} + 'price': 0.0, + 'private': True, + 'project': None, + 'range': None, + 'replacement': False, + 'resource': 'pca/5c002572983efc0ac5000003', + 'rows': 7, + 'sample_rate': 1.0, + 'shared': False, + 'size': 127, + 'source': 'source/5c00255e983efc0acd00001b', + 'source_status': True, + 'status': {'code': 5, + 'elapsed': 1571, + 'message': 'The pca has been created', + 'progress': 1}, + 'subscription': True, + 'tags': [], + 'type': 0, + 'updated': '2018-11-29T18:13:19.714000', + 'white_box': False}, + 'resource': 'pca/5c002572983efc0ac5000003'} You can check the PCA properties at the `API documentation -`_. +`_. + +Predictions and Evaluations +--------------------------- + +Prediction +~~~~~~~~~~ + +The output of a supervised learning model for a particular input is its +prediction. In BigML, a model is ready to produce predictions immediately, so +there's no need of a special deployment in order to start using it. Here's how +you create a prediction for a model and its response: + +.. code-block:: python + + >>> input_data = {"petal length": 4} + >>> prediction = api.create_prediction(model_id, input_data) + >>> api.pprint(prediction["object"]) + { 'boosted_ensemble': False, + 'category': 12, + 'code': 201, + 'confidence': 0.40383, + 'confidence_bounds': {}, + 'confidences': [ ['Iris-setosa', 0], + ['Iris-versicolor', 0.40383], + ['Iris-virginica', 0.40383]], + 'configuration': None, + 'configuration_status': False, + 'created': '2024-09-09T15:48:58.918313', + 'creator': 'mmartin', + 'dataset': 'dataset/6668805ad7413f90007ab83e', + 'dataset_status': True, + 'description': 'Created using BigMLer', + 'expanded_input_data': {'000002': 4.0}, + 'explanation': None, + 'fields': { '000002': { 'column_number': 2, + 'datatype': 'double', + 'name': 'petal length', + 'optype': 'numeric', + 'order': 2, + 'preferred': True}, + '000003': { 'column_number': 3, + 'datatype': 'double', + 'name': 'petal width', + 'optype': 'numeric', + 'order': 3, + 'preferred': True}, + '000004': { 'column_number': 4, + 'datatype': 'string', + 'name': 'species', + 'optype': 'categorical', + 'order': 4, + 'preferred': True, + 'term_analysis': {'enabled': True}}}, + 'importance': {'000002': 1}, + 'input_data': {'petal length': 4}, + 'locale': 'en_US', + 'missing_strategy': 0, + 'model': 'model/6668805f002883f09483369d', + 'model_status': True, + 'model_type': 0, + 'name': 'iris.csv', + 'name_options': 'operating kind=probability, 1 inputs', + 'number_of_models': 1, + 'objective_field': '000004', + 'objective_field_name': 'species', + 'objective_field_type': 'categorical', + 'objective_fields': ['000004'], + 'operating_kind': 'probability', + 'output': 'Iris-versicolor', + 'prediction': {'000004': 'Iris-versicolor'}, + 'prediction_path': { 'confidence': 0.40383, + 'next_predicates': [ { 'count': 46, + 'field': '000003', + 'operator': '>', + 'value': 1.75}, + { 'count': 54, + 'field': '000003', + 'operator': '<=', + 'value': 1.75}], + 'node_id': 1, + 'objective_summary': { 'categories': [ [ 'Iris-versicolor', + 50], + [ 'Iris-virginica', + 50]]}, + 'path': [ { 'field': '000002', + 'operator': '>', + 'value': 2.45}]}, + 'private': True, + 'probabilities': [ ['Iris-setosa', 0.0033], + ['Iris-versicolor', 0.49835], + ['Iris-virginica', 0.49835]], + 'probability': 0.49835, + 'project': None, + 'query_string': '', + 'resource': 'prediction/66df18eac6f7849b7b3f10ec', + 'shared': False, + 'source': 'source/66688055450bc914a2c147e0', + 'source_status': True, + 'status': { 'code': 5, + 'elapsed': 227, + 'message': 'The prediction has been created', + 'progress': 1}, + 'subscription': True, + 'tags': ['BigMLer', 'BigMLer_TueJun1124_094957'], + 'task': 'classification', + 'type': 0, + 'updated': '2024-09-09T15:48:58.918335'} + +As you see, +the ``output`` attribute stores the prediction value and the ``confidence`` +and ``probability`` attributes show the respective values. The rest of the +dictionary contains the configuration parameters described in +the `developers section `_. + +Evaluation +~~~~~~~~~~ + +The predictive performance of a model can be measured using many different +measures. In BigML these measures can be obtained by creating evaluations. To +create an evaluation you need the id of the model you are evaluating and the id +of the dataset that contains the data to be tested with. The result is shown +as: + +.. code-block:: python + + >>> evaluation = api.get_evaluation(evaluation) + >>> api.pprint(evaluation['object']['result']) + { 'class_names': ['0', '1'], + 'mode': { 'accuracy': 0.9802, + 'average_f_measure': 0.495, + 'average_phi': 0, + 'average_precision': 0.5, + 'average_recall': 0.4901, + 'confusion_matrix': [[99, 0], [2, 0]], + 'per_class_statistics': [ { 'accuracy': 0.9801980198019802, + 'class_name': '0', + 'f_measure': 0.99, + 'phi_coefficient': 0, + 'precision': 1.0, + 'present_in_test_data': True, + 'recall': 0.9801980198019802}, + { 'accuracy': 0.9801980198019802, + 'class_name': '1', + 'f_measure': 0, + 'phi_coefficient': 0, + 'precision': 0.0, + 'present_in_test_data': True, + 'recall': 0}]}, + 'model': { 'accuracy': 0.9901, + 'average_f_measure': 0.89746, + 'average_phi': 0.81236, + 'average_precision': 0.99495, + 'average_recall': 0.83333, + 'confusion_matrix': [[98, 1], [0, 2]], + 'per_class_statistics': [ { 'accuracy': 0.9900990099009901, + 'class_name': '0', + 'f_measure': 0.9949238578680203, + 'phi_coefficient': 0.8123623944599232, + 'precision': 0.98989898989899, + 'present_in_test_data': True, + 'recall': 1.0}, + { 'accuracy': 0.9900990099009901, + 'class_name': '1', + 'f_measure': 0.8, + 'phi_coefficient': 0.8123623944599232, + 'precision': 1.0, + 'present_in_test_data': True, + 'recall': 0.6666666666666666}]}, + 'random': { 'accuracy': 0.50495, + 'average_f_measure': 0.36812, + 'average_phi': 0.13797, + 'average_precision': 0.74747, + 'average_recall': 0.51923, + 'confusion_matrix': [[49, 50], [0, 2]], + 'per_class_statistics': [ { 'accuracy': 0.504950495049505, + 'class_name': '0', + 'f_measure': 0.6621621621621622, + 'phi_coefficient': 0.1379728923974526, + 'precision': 0.494949494949495, + 'present_in_test_data': True, + 'recall': 1.0}, + { 'accuracy': 0.504950495049505, + 'class_name': '1', + 'f_measure': 0.07407407407407407, + 'phi_coefficient': 0.1379728923974526, + 'precision': 1.0, + 'present_in_test_data': True, + 'recall': 0.038461538461538464}]}} + +where two levels of detail are easily identified. For classifications, +the first level shows these keys: + +- **class_names**: A list with the names of all the categories for the objective field (i.e., all the classes) +- **mode**: A detailed result object. Measures of the performance of the classifier that predicts the mode class for all the instances in the dataset +- **model**: A detailed result object. +- **random**: A detailed result object. Measures the performance of the classifier that predicts a random class for all the instances in the dataset. + +and the detailed result objects include ``accuracy``, ``average_f_measure``, ``average_phi``, +``average_precision``, ``average_recall``, ``confusion_matrix`` +and ``per_class_statistics``. + +For regressions first level will contain these keys: + +- **mean**: A detailed result object. Measures the performance of the model that predicts the mean for all the instances in the dataset. +- **model**: A detailed result object. +- **random**: A detailed result object. Measures the performance of the model that predicts a random class for all the instances in the dataset. + +where the detailed result objects include ``mean_absolute_error``, +``mean_squared_error`` and ``r_squared`` (refer to +`developers documentation `_ for +more info on the meaning of these measures. + +You can check the evaluation properties at the `API documentation +`_. + +Centroid +~~~~~~~~ + +A ``centroid`` is the value predicted by a cluster model. Here's how to create +a centroid: + + +.. code-block:: python + + >>> input_data = {"petal length": 4} + >>> centroid = api.create_centroid(cluster_id, input_data) + +Mind that you will need to provide values for all the input fields in order to +create a centroid. To know more details about the centroid properties and +parameters you can check the corresponding +`API documentation `_. + +Anomaly Score +~~~~~~~~~~~~~ + +An ``anomaly score`` is the value predicted by an anomaly detector. +Here's how to create an anomaly score: + + +.. code-block:: python + + >>> input_data = {"petal length": 4} + >>> anomaly_score = api.create_anomaly_score(anomaly_id, input_data) + +To know more details about the anomaly score properties and +parameters you can check the corresponding +`API documentation `_. + +Association Set +~~~~~~~~~~~~~~~ + +An ``association set`` is the value predicted by an association discovery model. +Here's how to create an association set: + + +.. code-block:: python + + >>> input_data = {"petal length": 4} + >>> association_set = api.create_association_set(association_id, input_data) + +To know more details about the association set properties and +parameters you can check the corresponding +`API documentation `_. + +Topic Distribution +~~~~~~~~~~~~~~~~~~ + +A ``topic distribution`` is the value predicted by a topic model. +Here's how to create a topic distribution: + + +.. code-block:: python + + >>> input_data = {"text": "Now is the winter of our discontent"} + >>> topic_model = api.create_topic_model(topic_model_id, input_data) + +To know more details about the topic distribution properties and +parameters you can check the corresponding +`API documentation `_. + +Batch Prediction +~~~~~~~~~~~~~~~~ + +In BigML, you can create predictions for all the inputs provided as rows of a +dataset, i.e. a batch prediction. +The result of a batch prediction can either be downloaded as a CSV or +become a new dataset. As with predictions, a model is ready to produce batch +predictions immediately, so there's no need of a special deployment in order +to start using it. Here's how you create a batch prediction for a model +and its response: + +.. code-block:: python + + >>> batch_prediction = api.create_batch_prediction(model_id, test_dataset) + +To know more details about the batch prediction properties and +parameters you can check the corresponding +`API documentation `_. + +Batch Centroid +~~~~~~~~~~~~~~ + +In BigML, you can create centroids for all the inputs provided as rows of a +dataset, i.e. a batch centroid. +The result of a batch centroid can either be downloaded as a CSV or +become a new dataset. As with predictions, a cluster is ready to produce batch +centroids immediately, so there's no need of a special deployment in order +to start using it. Here's how you create a batch centroid for a cluster +and its response: + +.. code-block:: python + + >>> batch_centroid = api.create_batch_centroid(cluster_id, test_dataset) + +To know more details about the batch centroid properties and +parameters you can check the corresponding +`API documentation `_. + +Batch Anomaly Score +~~~~~~~~~~~~~~~~~~~ + +In BigML, you can create anomaly scores for all the inputs provided as rows of a +dataset, i.e. a batch anomaly score. +The result of a batch anomaly score can either be downloaded as a CSV or +become a new dataset. As with predictions, an anomaly detector +is ready to produce batch anomaly scores immediately, +so there's no need of a special deployment in order +to start using it. Here's how you create a batch anomaly score for an anomaly +detector and its response: + +.. code-block:: python + + >>> batch_anomaly_score = api.create_batch_anomaly_score( + anomaly_id, test_dataset) + +To know more details about the batch anomaly score properties and +parameters you can check the corresponding +`API documentation `_. + +Batch Topic Distribution +~~~~~~~~~~~~~~~~~~~~~~~~ + +In BigML, you can create topic distributions for all the inputs +provided as rows of a dataset, i.e. a batch topic distribution. +The result of a batch topic distribution can either be downloaded as a CSV or +become a new dataset. As with predictions, a topic model is ready to produce +batch topic distributions immediately, so there's no need of a +special deployment in order to start using it. +Here's how you create a batch topic distribution for a topic model +and its response: + +.. code-block:: python + + >>> batch_topic_distribution = api.create_batch_topic_distribution( + topic_id, test_dataset) + +To know more details about the batch topic distribution properties and +parameters you can check the corresponding +`API documentation `_. diff --git a/docs/quick_start.rst b/docs/quick_start.rst new file mode 100644 index 00000000..2ff7b0ac --- /dev/null +++ b/docs/quick_start.rst @@ -0,0 +1,284 @@ +Quick Start +=========== + +Imagine that you want to use `this csv +file `_ containing the `Iris +flower dataset `_ to +predict the species of a flower whose ``petal length`` is ``2.45`` and +whose ``petal width`` is ``1.75``. A preview of the dataset is shown +below. It has 4 numeric fields: ``sepal length``, ``sepal width``, +``petal length``, ``petal width`` and a categorical field: ``species``. +By default, BigML considers the last field in the dataset as the +objective field (i.e., the field that you want to generate predictions +for). + +:: + + sepal length,sepal width,petal length,petal width,species + 5.1,3.5,1.4,0.2,Iris-setosa + 4.9,3.0,1.4,0.2,Iris-setosa + 4.7,3.2,1.3,0.2,Iris-setosa + ... + 5.8,2.7,3.9,1.2,Iris-versicolor + 6.0,2.7,5.1,1.6,Iris-versicolor + 5.4,3.0,4.5,1.5,Iris-versicolor + ... + 6.8,3.0,5.5,2.1,Iris-virginica + 5.7,2.5,5.0,2.0,Iris-virginica + 5.8,2.8,5.1,2.4,Iris-virginica + +You can easily generate a prediction following these steps: + +.. code-block:: python + + from bigml.api import BigML + + api = BigML() + + source = api.create_source('./data/iris.csv') + dataset = api.create_dataset(source) + model = api.create_model(dataset) + prediction = api.create_prediction(model, \ + {"petal width": 1.75, "petal length": 2.45}) + +You can then print the prediction using the ``pprint`` method: + +.. code-block:: python + + >>> api.pprint(prediction) + species for {"petal width": 1.75, "petal length": 2.45} is Iris-setosa + +Certainly, any of the resources created in BigML can be configured using +several arguments described in the `API documentation `_. +Any of these configuration arguments can be added to the ``create`` method +as a dictionary in the last optional argument of the calls: + +.. code-block:: python + + from bigml.api import BigML + + api = BigML() + + source_args = {"name": "my source", + "source_parser": {"missing_tokens": ["NULL"]}} + source = api.create_source('./data/iris.csv', source_args) + dataset_args = {"name": "my dataset"} + dataset = api.create_dataset(source, dataset_args) + model_args = {"objective_field": "species"} + model = api.create_model(dataset, model_args) + prediction_args = {"name": "my prediction"} + prediction = api.create_prediction(model, \ + {"petal width": 1.75, "petal length": 2.45}, + prediction_args) + +The ``iris`` dataset has a small number of instances, and usually will be +instantly created, so the ``api.create_`` calls will probably return the +finished resources outright. As BigML's API is asynchronous, +in general you will need to ensure +that objects are finished before using them by using ``api.ok``. + +.. code-block:: python + + from bigml.api import BigML + + api = BigML() + + source = api.create_source('./data/iris.csv') + api.ok(source) + dataset = api.create_dataset(source) + api.ok(dataset) + model = api.create_model(dataset) + api.ok(model) + prediction = api.create_prediction(model, \ + {"petal width": 1.75, "petal length": 2.45}) + +Note that the prediction +call is not followed by the ``api.ok`` method. Predictions are so quick to be +generated that, unlike the +rest of resouces, will be generated synchronously as a finished object. + +Alternatively to the ``api.ok`` method, BigML offers +`webhooks `_ that can be set +when creating a resource and will call the url of you choice when the +finished or failed event is reached. A secret can be included in the call to +verify the webhook call authenticity, and a + +.. code-block:: python + + bigml.webhooks.check_signature(request, signature) + +function is offered to that end. As an example, this snippet creates a source +and sets a webhook to call ``https://my_webhook.com/endpoint`` when finished: + +.. code-block:: python + + from bigml.api import BigML + api = BigML() + # using a webhook with a secret + api.create_source("https://static.bigml.com/csv/iris.csv", + {"webhook": {"url": "https://my_webhook.com/endpoint", + "secret": "mysecret"}}) + + +The ``iris`` prediction example assumed that your objective +field (the one you want to predict) is the last field in the dataset. +If that's not he case, you can explicitly +set the name of this field in the creation call using the ``objective_field`` +argument: + + +.. code-block:: python + + from bigml.api import BigML + + api = BigML() + + source = api.create_source('./data/iris.csv') + api.ok(source) + dataset = api.create_dataset(source) + api.ok(dataset) + model = api.create_model(dataset, {"objective_field": "species"}) + api.ok(model) + prediction = api.create_prediction(model, \ + {'sepal length': 5, 'sepal width': 2.5}) + + +You can also generate an evaluation for the model by using: + +.. code-block:: python + + test_source = api.create_source('./data/test_iris.csv') + api.ok(test_source) + test_dataset = api.create_dataset(test_source) + api.ok(test_dataset) + evaluation = api.create_evaluation(model, test_dataset) + api.ok(evaluation) + + +The API object also offers the ``create``, ``get``, ``update`` and ``delete`` +generic methods to manage all type of resources. The type of resource to be +created is passed as first argument to the ``create`` method; + +.. code-block:: python + + from bigml.api import BigML + + api = BigML() + + source = api.create('source', './data/iris.csv') + source = api.update(source, {"name": "my new source name"}) + +Note that these methods don't need the ``api.ok`` method to be called +to wait for the resource to be finished. +The method waits internally for it by default. +This can be avoided by using ``finished=False`` as one of the arguments. + + +.. code-block:: python + + from bigml.api import BigML + + api = BigML() + + source = api.create('source', './data/iris.csv') + dataset = api.create('dataset', source, finished=False) # unfinished + api.ok(dataset) # waiting explicitly for the dataset to finish + dataset = api.update(dataset, {"name": "my_new_dataset_name"}, + finised=False) + api.ok(dataset) + +As an example for the ``delete`` and ``get`` methods, we could +create a batch prediction, put the predictions in a +dataset object and delete the ``batch_prediction``. + +.. code-block:: python + + from bigml.api import BigML + + api = BigML() + + batch_prediction = api.create('batchprediction', + 'model/5f3c3d2b5299637102000882', + 'dataset/5f29a563529963736c0116e9', + args={"output_dataset": True}) + batch_prediction_dataset = api.get(batch_prediction["object"][ \ + "output_dataset_resource"]) + api.delete(batch_prediction) + +If you set the ``storage`` argument in the ``api`` instantiation: + +.. code-block:: python + + api = BigML(storage='./storage') + +all the generated, updated or retrieved resources will be automatically +saved to the chosen directory. Once they are stored locally, the +``retrieve_resource`` method will look for the resource information +first in the local storage before trying to download the information from +the API. + +.. code-block:: python + + dataset = api.retrieve_resource("dataset/5e8e5672c7736e3d830037b5", + query_string="limit=-1") + + +Alternatively, you can use the ``export`` method to explicitly +download the JSON information +that describes any of your resources in BigML to a particular file: + +.. code-block:: python + + api.export('model/5acea49a08b07e14b9001068', + filename="my_dir/my_model.json") + +This example downloads the JSON for the model and stores it in +the ``my_dir/my_model.json`` file. + +In the case of models that can be represented in a `PMML` syntax, the +export method can be used to produce the corresponding `PMML` file. + +.. code-block:: python + + api.export('model/5acea49a08b07e14b9001068', + filename="my_dir/my_model.pmml", + pmml=True) + +You can also retrieve the last resource with some previously given tag: + +.. code-block:: python + + api.export_last("foo", + resource_type="ensemble", + filename="my_dir/my_ensemble.json") + +which selects the last ensemble that has a ``foo`` tag. This mechanism can +be specially useful when retrieving retrained models that have been created +with a shared unique keyword as tag. + +For a descriptive overview of the steps that you will usually need to +follow to model +your data and obtain predictions, please see the `basic Workflow sketch +`_ +document. You can also check other simple examples in the following documents: + +- `model 101 <101_model.html>`_ +- `logistic regression 101 <101_logistic_regression.html>`_ +- `linear regression 101 <101_linear_regression.html>`_ +- `ensemble 101 <101_ensemble.html>`_ +- `cluster 101 <101_cluster>`_ +- `anomaly detector 101 <101_anomaly.html>`_ +- `association 101 <101_association.html>`_ +- `topic model 101 <101_topic_model.html>`_ +- `deepnet 101 <101_deepnet.html>`_ +- `time series 101 <101_ts.html>`_ +- `fusion 101 <101_fusion.html>`_ +- `optiml 101 <101_optiml.html>`_ +- `PCA 101 <101_pca.html>`_ +- `scripting 101 <101_scripting.html>`_ + +And for examples on Image Processing: + +- `Images Classification 101 <101_images_classification.html>`_ +- `Object Detection 101<101_object_detection.html>`_ +- `Images Feature Extraction 101 <101_images_feature_extraction.html>`_ diff --git a/docs/reading_resources.rst b/docs/reading_resources.rst index e182915f..541125e4 100644 --- a/docs/reading_resources.rst +++ b/docs/reading_resources.rst @@ -30,6 +30,66 @@ that can be used to filter out or limit the attributes obtained: query_string="exclude=root") +Public and shared resources +~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +The previous examples use resources that were created by the same user +that asks for their retrieval or modification. If a user wants to share one +of her resources, she can make them public or share them. Declaring a resource +public means that anyone can see the resource. This can be applied to datasets +and models. To turn a dataset public, just update its ``private`` property: + +.. code-block:: python + + api.update_dataset('dataset/5143a51a37203f2cf7000972', {'private': false}) + +and any user will be able to download it using its id prepended by ``public``: + +.. code-block:: python + + api.get_dataset('public/dataset/5143a51a37203f2cf7000972') + +In the models' case, you can also choose if you want the model to be fully +downloadable or just accesible to make predictions. This is controlled with the +``white_box`` property. If you want to publish your model completely, just +use: + +.. code-block:: python + + api.update_model('model/5143a51a37203f2cf7000956', {'private': false, + 'white_box': true}) + +Both public models and datasets, will be openly accessible for anyone, +registered or not, from the web +gallery. + +Still, you may want to share your models with other users, but without making +them public for everyone. This can be achieved by setting the ``shared`` +property: + +.. code-block:: python + + api.update_model('model/5143a51a37203f2cf7000956', {'shared': true}) + +Shared models can be accessed using their share hash (propery ``shared_hash`` +in the original model): + +.. code-block:: python + + api.get_model('shared/model/d53iw39euTdjsgesj7382ufhwnD') + +or by using their original id with the creator user as username and a specific +sharing api_key you will find as property ``sharing_api_key`` in the updated +model: + +.. code-block:: python + + api.get_model('model/5143a51a37203f2cf7000956', shared_username='creator', + shared_api_key='c972018dc5f2789e65c74ba3170fda31d02e00c3') + +Only users with the share link or credentials information will be able to +access your shared models. + Listing Resources ----------------- @@ -178,63 +238,3 @@ Name of predictions ordered by name. [prediction['name'] for prediction in api.list_predictions("order_by=name")['objects']] - -Public and shared resources ---------------------------- - -The previous examples use resources that were created by the same user -that asks for their retrieval or modification. If a user wants to share one -of her resources, she can make them public or share them. Declaring a resource -public means that anyone can see the resource. This can be applied to datasets -and models. To turn a dataset public, just update its ``private`` property: - -.. code-block:: python - - api.update_dataset('dataset/5143a51a37203f2cf7000972', {'private': false}) - -and any user will be able to download it using its id prepended by ``public``: - -.. code-block:: python - - api.get_dataset('public/dataset/5143a51a37203f2cf7000972') - -In the models' case, you can also choose if you want the model to be fully -downloadable or just accesible to make predictions. This is controlled with the -``white_box`` property. If you want to publish your model completely, just -use: - -.. code-block:: python - - api.update_model('model/5143a51a37203f2cf7000956', {'private': false, - 'white_box': true}) - -Both public models and datasets, will be openly accessible for anyone, -registered or not, from the web -gallery. - -Still, you may want to share your models with other users, but without making -them public for everyone. This can be achieved by setting the ``shared`` -property: - -.. code-block:: python - - api.update_model('model/5143a51a37203f2cf7000956', {'shared': true}) - -Shared models can be accessed using their share hash (propery ``shared_hash`` -in the original model): - -.. code-block:: python - - api.get_model('shared/model/d53iw39euTdjsgesj7382ufhwnD') - -or by using their original id with the creator user as username and a specific -sharing api_key you will find as property ``sharing_api_key`` in the updated -model: - -.. code-block:: python - - api.get_model('model/5143a51a37203f2cf7000956', shared_username='creator', - shared_api_key='c972018dc5f2789e65c74ba3170fda31d02e00c3') - -Only users with the share link or credentials information will be able to -access your shared models. diff --git a/docs/requirements.txt b/docs/requirements.txt new file mode 100644 index 00000000..6daf89af --- /dev/null +++ b/docs/requirements.txt @@ -0,0 +1,2 @@ +sphinx +sphinx_rtd_theme==2.0.0 From c9bf3f693e927b812ee6a221852eb4fb2dd86392 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Merc=C3=A8=20Mart=C3=ADn=20Prats?= Date: Sat, 21 Sep 2024 11:32:07 +0200 Subject: [PATCH 02/19] Fixing test results --- bigml/ensemble.py | 1 - bigml/tests/test_14_create_evaluations.py | 2 +- bigml/tests/test_36_compare_predictions.py | 2 +- bigml/tests/test_40_local_from_file.py | 2 +- bigml/tests/test_49_local_pipeline.py | 16 ++++++++-------- 5 files changed, 11 insertions(+), 12 deletions(-) diff --git a/bigml/ensemble.py b/bigml/ensemble.py index 51e8b50e..07f8faf5 100644 --- a/bigml/ensemble.py +++ b/bigml/ensemble.py @@ -214,7 +214,6 @@ def __init__(self, ensemble, api=None, max_models=None, cache_get=None, # avoid checking fields because of old ensembles ensemble = retrieve_resource(self.api, self.resource_id, no_check_fields=True) - self.parent_id = ensemble.get('object', {}).get('dataset') self.name = ensemble.get('object', {}).get('name') self.description = ensemble.get('object', {}).get('description') diff --git a/bigml/tests/test_14_create_evaluations.py b/bigml/tests/test_14_create_evaluations.py index 316a9460..805be46d 100644 --- a/bigml/tests/test_14_create_evaluations.py +++ b/bigml/tests/test_14_create_evaluations.py @@ -194,7 +194,7 @@ def test_scenario4(self): "evaluation_wait", "metric", "value"] examples = [ ['data/iris.csv', '50', '50', '800', '80', 'average_phi', - '0.97007']] + '0.98029']] for example in examples: example = dict(zip(headers, example)) show_method(self, self.bigml["method"], example) diff --git a/bigml/tests/test_36_compare_predictions.py b/bigml/tests/test_36_compare_predictions.py index ae5c5954..4be945a4 100644 --- a/bigml/tests/test_36_compare_predictions.py +++ b/bigml/tests/test_36_compare_predictions.py @@ -77,7 +77,7 @@ def test_scenario1(self): 'Iris-versicolor', '{}'], ['data/iris_missing2.csv', '30', '50', '60', '{}', '000004', 'Iris-versicolor', '{}'], - ['data/grades.csv', '30', '50', '60', '{}', '000005', 55.6560, + ['data/grades.csv', '30', '50', '60', '{}', '000005', 47.04852, '{}'], ['data/spam.csv', '30', '50', '60', '{}', '000000', 'ham', '{}']] show_doc(self.test_scenario1) diff --git a/bigml/tests/test_40_local_from_file.py b/bigml/tests/test_40_local_from_file.py index 41a3b0f6..d2368812 100644 --- a/bigml/tests/test_40_local_from_file.py +++ b/bigml/tests/test_40_local_from_file.py @@ -213,7 +213,7 @@ def test_scenario4(self): ['data/iris.csv', '10', '10', '500', './tmp/deepnet.json', {}, 'Iris-versicolor', '{}'], ['data/iris.csv', '10', '10', '500', './tmp/deepnet_dft.json', {}, - 'Iris-virginica', '{"default_numeric_value": "maximum"}']] + 'Iris-versicolor', '{"default_numeric_value": "maximum"}']] for example in examples: example = dict(zip(headers, example)) show_method(self, self.bigml["method"], example) diff --git a/bigml/tests/test_49_local_pipeline.py b/bigml/tests/test_49_local_pipeline.py index 204c11f8..348bfd8f 100644 --- a/bigml/tests/test_49_local_pipeline.py +++ b/bigml/tests/test_49_local_pipeline.py @@ -210,28 +210,28 @@ def test_scenario4(self): examples = [ ['data/dates2.csv', '20', '45', '160', '{"time-1": "1910-05-08T19:10:23.106", "cat-0":"cat2"}', - '000002', -0.02616, "pipeline1"], + '000002', -0.4264, "pipeline1"], ['data/dates2.csv', '20', '45', '160', '{"time-1": "2011-04-01T00:16:45.747", "cat-0":"cat2"}', - '000002', 0.13352, "pipeline2"], + '000002', 0.11985, "pipeline2"], ['data/dates2.csv', '20', '45', '160', '{"time-1": "1969-W29-1T17:36:39Z", "cat-0":"cat1"}', - '000002', 0.10071, "pipeline3"], + '000002', -0.08211, "pipeline3"], ['data/dates2.csv', '20', '45', '160', '{"time-1": "1920-06-45T20:21:20.320", "cat-0":"cat1"}', - '000002', 0.10071, "pipeline4"], + '000002', -0.08211, "pipeline4"], ['data/dates2.csv', '20', '45', '160', '{"time-1": "2001-01-05T23:04:04.693", "cat-0":"cat2"}', - '000002', 0.15235, "pipeline5"], + '000002', 0.00388, "pipeline5"], ['data/dates2.csv', '20', '45', '160', '{"time-1": "1950-11-06T05:34:05.602", "cat-0":"cat1"}', - '000002', -0.07686, "pipeline6"], + '000002', -0.04976, "pipeline6"], ['data/dates2.csv', '20', '45', '160', '{"time-1": "1932-01-30T19:24:11.440", "cat-0":"cat2"}', - '000002', 0.0017, "pipeline7"], + '000002', -0.36264, "pipeline7"], ['data/dates2.csv', '20', '45', '160', '{"time-1": "Mon Jul 14 17:36 +0000 1969", "cat-0":"cat1"}', - '000002', 0.10071, "pipeline8"]] + '000002', -0.08211, "pipeline8"]] show_doc(self.test_scenario4) for example in examples: example = dict(zip(headers, example)) From f45f5a4a4a59dc62f4274b2c9edd902bc8a9fabf Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Merc=C3=A8=20Mart=C3=ADn=20Prats?= Date: Wed, 2 Oct 2024 17:28:56 +0200 Subject: [PATCH 03/19] Adding pyproject.toml for compatibility with python 3.12 --- HISTORY.rst | 2 +- pyproject.toml | 8 ++++++++ setup.py | 2 +- 3 files changed, 10 insertions(+), 2 deletions(-) create mode 100644 pyproject.toml diff --git a/HISTORY.rst b/HISTORY.rst index 84612677..64bbc488 100644 --- a/HISTORY.rst +++ b/HISTORY.rst @@ -3,7 +3,7 @@ History ------- -9.8.0 (2024-09-09) +9.8.0 (2024-10-02) ------------------ - Fixing the get_leaves function for local decision trees. diff --git a/pyproject.toml b/pyproject.toml new file mode 100644 index 00000000..70a3fb8e --- /dev/null +++ b/pyproject.toml @@ -0,0 +1,8 @@ +[build-system] +requires=[ + "setuptools" +] + +[tool.black] +line-length = 80 +target-version = ['py312'] diff --git a/setup.py b/setup.py index 824fdea2..d24d7879 100644 --- a/setup.py +++ b/setup.py @@ -30,7 +30,7 @@ open(version_py_path).read()).group(1) TOPIC_MODELING_DEPENDENCIES = ["cython", "pystemmer==2.2.0.1"] -IMAGES_DEPENDENCIES = ["bigml-sensenet==0.7.2"] +IMAGES_DEPENDENCIES = ["bigml-sensenet==0.7.4"] # Concatenate files into the long description file_contents = [] From 1d310af6fe5f1c1966b93e35ba4388655958fd58 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Merc=C3=A8=20Mart=C3=ADn=20Prats?= Date: Wed, 2 Oct 2024 23:55:48 +0200 Subject: [PATCH 04/19] Upgrading sensenet and fixing issues for Python 3.12 --- .github/workflows/tests_01.yml | 2 +- .github/workflows/tests_05.yml | 2 +- .github/workflows/tests_22.yml | 2 +- .github/workflows/tests_23.yml | 2 +- .github/workflows/tests_36.yml | 2 +- HISTORY.rst | 1 + bigml/dataset.py | 10 +++++----- bigml/deepnet.py | 13 ++++--------- pyproject.toml | 2 +- setup.py | 4 ++-- 10 files changed, 18 insertions(+), 22 deletions(-) diff --git a/.github/workflows/tests_01.yml b/.github/workflows/tests_01.yml index 9f12863f..3951257b 100644 --- a/.github/workflows/tests_01.yml +++ b/.github/workflows/tests_01.yml @@ -12,7 +12,7 @@ jobs: runs-on: ubuntu-latest strategy: matrix: - python: [3.11] + python: [3.12] env: BIGML_USERNAME: ${{ secrets.BIGML_USERNAME }} BIGML_API_KEY: ${{ secrets.BIGML_API_KEY }} diff --git a/.github/workflows/tests_05.yml b/.github/workflows/tests_05.yml index 8cc673ec..ed1cac5f 100644 --- a/.github/workflows/tests_05.yml +++ b/.github/workflows/tests_05.yml @@ -12,7 +12,7 @@ jobs: runs-on: ubuntu-latest strategy: matrix: - python: [3.11] + python: [3.12] env: BIGML_USERNAME: ${{ secrets.BIGML_USERNAME }} BIGML_API_KEY: ${{ secrets.BIGML_API_KEY }} diff --git a/.github/workflows/tests_22.yml b/.github/workflows/tests_22.yml index c75adca1..46784de2 100644 --- a/.github/workflows/tests_22.yml +++ b/.github/workflows/tests_22.yml @@ -12,7 +12,7 @@ jobs: runs-on: ubuntu-latest strategy: matrix: - python: [3.11] + python: [3.12] env: BIGML_USERNAME: ${{ secrets.BIGML_USERNAME }} BIGML_API_KEY: ${{ secrets.BIGML_API_KEY }} diff --git a/.github/workflows/tests_23.yml b/.github/workflows/tests_23.yml index 042d57cc..892a73d6 100644 --- a/.github/workflows/tests_23.yml +++ b/.github/workflows/tests_23.yml @@ -12,7 +12,7 @@ jobs: runs-on: ubuntu-latest strategy: matrix: - python: [3.11] + python: [3.12] env: BIGML_USERNAME: ${{ secrets.BIGML_USERNAME }} BIGML_API_KEY: ${{ secrets.BIGML_API_KEY }} diff --git a/.github/workflows/tests_36.yml b/.github/workflows/tests_36.yml index 7b78c0a5..a766fa97 100644 --- a/.github/workflows/tests_36.yml +++ b/.github/workflows/tests_36.yml @@ -12,7 +12,7 @@ jobs: runs-on: ubuntu-latest strategy: matrix: - python: [3.11] + python: [3.12] env: BIGML_USERNAME: ${{ secrets.BIGML_USERNAME }} BIGML_API_KEY: ${{ secrets.BIGML_API_KEY }} diff --git a/HISTORY.rst b/HISTORY.rst index 64bbc488..ed2e7598 100644 --- a/HISTORY.rst +++ b/HISTORY.rst @@ -7,6 +7,7 @@ History ------------------ - Fixing the get_leaves function for local decision trees. +- Fixing setup issues in Python3.12 - Changing documentation templates. 9.8.0.dev1 (2024-02-28) diff --git a/bigml/dataset.py b/bigml/dataset.py index 280c285d..6b45f5e4 100644 --- a/bigml/dataset.py +++ b/bigml/dataset.py @@ -20,7 +20,6 @@ """ import os import logging -import warnings import subprocess from bigml.fields import Fields, sorted_headers, get_new_fields @@ -40,12 +39,13 @@ #pylint: disable=locally-disabled,bare-except,ungrouped-imports try: - # avoiding tensorflow info logging - warnings.filterwarnings("ignore", category=DeprecationWarning) - os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2' + # bigml-sensenet should be installed for image processing + logging.disable(logging.WARNING) + os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3' import tensorflow as tf - tf.get_logger().setLevel('ERROR') tf.autograph.set_verbosity(0) + logging.getLogger("tensorflow").setLevel(logging.ERROR) + import sensenet from bigml.images.featurizers import ImageFeaturizer as Featurizer except: pass diff --git a/bigml/deepnet.py b/bigml/deepnet.py index 75eed911..087f1653 100644 --- a/bigml/deepnet.py +++ b/bigml/deepnet.py @@ -62,20 +62,15 @@ import bigml.laminar.preprocess_np as pp try: - # avoiding tensorflow info logging - warnings.filterwarnings("ignore", category=DeprecationWarning) - os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2' + logging.disable(logging.WARNING) + os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3' + logging.getLogger("tensorflow").setLevel(logging.ERROR) import tensorflow as tf - tf.get_logger().setLevel('ERROR') tf.autograph.set_verbosity(0) - LAMINAR_VERSION = False -except Exception: - LAMINAR_VERSION = True - -try: from sensenet.models.wrappers import create_model from bigml.images.utils import to_relative_coordinates from bigml.constants import IOU_REMOTE_SETTINGS + LAMINAR_VERSION = False except Exception: LAMINAR_VERSION = True diff --git a/pyproject.toml b/pyproject.toml index 70a3fb8e..1de495d4 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [build-system] requires=[ - "setuptools" + "setuptools==69.0.0" ] [tool.black] diff --git a/setup.py b/setup.py index d24d7879..2bcdd013 100644 --- a/setup.py +++ b/setup.py @@ -30,7 +30,7 @@ open(version_py_path).read()).group(1) TOPIC_MODELING_DEPENDENCIES = ["cython", "pystemmer==2.2.0.1"] -IMAGES_DEPENDENCIES = ["bigml-sensenet==0.7.4"] +IMAGES_DEPENDENCIES = ["bigml-sensenet==0.7.5"] # Concatenate files into the long description file_contents = [] @@ -50,7 +50,7 @@ download_url="https://github.com/bigmlcom/python", license="http://www.apache.org/licenses/LICENSE-2.0", setup_requires = ['pytest'], - install_requires = ["unidecode", "bigml-chronos>=0.4.3", "requests", + install_requires = ["setuptools==69.0.0", "unidecode", "bigml-chronos>=0.4.3", "requests", "requests-toolbelt", "msgpack", "numpy>=1.22", "scipy", "javascript"], extras_require={"images": IMAGES_DEPENDENCIES, From d2ecb57b22424834591f60b54fbafa65f84bf303 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Merc=C3=A8=20Mart=C3=ADn=20Prats?= Date: Thu, 3 Oct 2024 00:19:48 +0200 Subject: [PATCH 05/19] Fixing review comments --- .readthedocs.yaml | 2 +- bigml/generators/model.py | 1 - docs/index.rst | 4 ++-- 3 files changed, 3 insertions(+), 4 deletions(-) diff --git a/.readthedocs.yaml b/.readthedocs.yaml index 9712e405..d74e663d 100644 --- a/.readthedocs.yaml +++ b/.readthedocs.yaml @@ -9,7 +9,7 @@ version: 2 build: os: ubuntu-22.04 tools: - python: "3.11" + python: "3.12" # Build documentation in the docs/ directory with Sphinx sphinx: diff --git a/bigml/generators/model.py b/bigml/generators/model.py index d6130bba..b035036d 100644 --- a/bigml/generators/model.py +++ b/bigml/generators/model.py @@ -155,7 +155,6 @@ def get_tree_leaves(tree, fields, path, filter_function=None): path[:], filter_function=filter_function) else: - print("id:", node[offsets["id"]]) leaf = { 'id': node[offsets["id"]], 'confidence': node[offsets["confidence"]], diff --git a/docs/index.rst b/docs/index.rst index 62d26d86..2e9346ac 100644 --- a/docs/index.rst +++ b/docs/index.rst @@ -3,12 +3,12 @@ BigML Python Bindings `BigML `_ makes machine learning easy by taking care of the details required to add data-driven decisions and predictive -power to your company. Unlike other machine learning services, BigML +power to your applications. Unlike other machine learning services, BigML creates `beautiful predictive models `_ that can be easily understood and interacted with. -These BigML Python bindings allow you to interact with BigML.io, the API +These BigML Python bindings allow you interacting with BigML.io, the API for BigML. You can use it to easily create, retrieve, list, update, and delete BigML resources (i.e., sources, datasets, models and, predictions). From 1d54070e1d425dd0247928f7f06fe2bfb9513665 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Merc=C3=A8=20Mart=C3=ADn=20Prats?= Date: Thu, 3 Oct 2024 12:03:00 +0200 Subject: [PATCH 06/19] Fixing preparation for sensenet --- bigml/dataset.py | 9 +++------ bigml/deepnet.py | 8 ++------ bigml/util.py | 11 +++++++++++ 3 files changed, 16 insertions(+), 12 deletions(-) diff --git a/bigml/dataset.py b/bigml/dataset.py index 6b45f5e4..5115f7de 100644 --- a/bigml/dataset.py +++ b/bigml/dataset.py @@ -25,7 +25,8 @@ from bigml.fields import Fields, sorted_headers, get_new_fields from bigml.api import get_api_connection, get_dataset_id, get_status from bigml.basemodel import get_resource_dict -from bigml.util import DEFAULT_LOCALE, use_cache, cast, load, dump, dumps +from bigml.util import DEFAULT_LOCALE, use_cache, cast, load, dump, dumps, \ + sensenet_logging from bigml.constants import FINISHED from bigml.flatline import Flatline from bigml.featurizer import Featurizer @@ -40,11 +41,7 @@ #pylint: disable=locally-disabled,bare-except,ungrouped-imports try: # bigml-sensenet should be installed for image processing - logging.disable(logging.WARNING) - os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3' - import tensorflow as tf - tf.autograph.set_verbosity(0) - logging.getLogger("tensorflow").setLevel(logging.ERROR) + sensenet_logging() import sensenet from bigml.images.featurizers import ImageFeaturizer as Featurizer except: diff --git a/bigml/deepnet.py b/bigml/deepnet.py index 087f1653..18c5b404 100644 --- a/bigml/deepnet.py +++ b/bigml/deepnet.py @@ -49,7 +49,7 @@ from bigml.api import FINISHED from bigml.api import get_status, get_api_connection, get_deepnet_id from bigml.util import cast, use_cache, load, get_data_transformations, \ - PRECISION + PRECISION, sensenet_logging from bigml.basemodel import get_resource_dict, extract_objective from bigml.modelfields import ModelFields from bigml.laminar.constants import NUMERIC @@ -62,11 +62,7 @@ import bigml.laminar.preprocess_np as pp try: - logging.disable(logging.WARNING) - os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3' - logging.getLogger("tensorflow").setLevel(logging.ERROR) - import tensorflow as tf - tf.autograph.set_verbosity(0) + sensenet_logging() from sensenet.models.wrappers import create_model from bigml.images.utils import to_relative_coordinates from bigml.constants import IOU_REMOTE_SETTINGS diff --git a/bigml/util.py b/bigml/util.py index 25ccadba..c1131a0f 100644 --- a/bigml/util.py +++ b/bigml/util.py @@ -748,6 +748,7 @@ def get_formatted_data(input_data_list, out_format=None): inner_data_list = input_data_list.copy() return inner_data_list + #pylint: disable=locally-disabled,import-outside-toplevel def get_data_transformations(resource_id, parent_id): """Returns the pipeline that contains the tranformations and derived @@ -760,3 +761,13 @@ def get_data_transformations(resource_id, parent_id): "pipeline.") from bigml.pipeline.pipeline import BMLPipeline return BMLPipeline("dt-%s" % resource_id, [parent_id]) + + +def sensenet_logging(): + """Removes warnings unnecessary logging when using sensenet""" + logging.disable(logging.WARNING) + os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3' + os.environ["TF_USE_LEGACY_KERAS"] = "1" + import tensorflow as tf + tf.autograph.set_verbosity(0) + logging.getLogger("tensorflow").setLevel(logging.ERROR) From 96c14bcc96a6bc7d67768f7eca75545c5cf6129a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Merc=C3=A8=20Mart=C3=ADn=20Prats?= Date: Thu, 3 Oct 2024 12:44:15 +0200 Subject: [PATCH 07/19] Fixing unimported constant --- bigml/dataset.py | 1 - bigml/deepnet.py | 5 +---- bigml/util.py | 1 + 3 files changed, 2 insertions(+), 5 deletions(-) diff --git a/bigml/dataset.py b/bigml/dataset.py index 5115f7de..2f75aa5e 100644 --- a/bigml/dataset.py +++ b/bigml/dataset.py @@ -19,7 +19,6 @@ """ import os -import logging import subprocess from bigml.fields import Fields, sorted_headers, get_new_fields diff --git a/bigml/deepnet.py b/bigml/deepnet.py index 18c5b404..09370af2 100644 --- a/bigml/deepnet.py +++ b/bigml/deepnet.py @@ -40,7 +40,6 @@ deepnet.predict({"petal length": 3, "petal width": 1}) """ -import logging import os import warnings @@ -56,7 +55,7 @@ from bigml.model import parse_operating_point, sort_categories from bigml.constants import REGIONS, REGIONS_OPERATION_SETTINGS, \ DEFAULT_OPERATION_SETTINGS, REGION_SCORE_ALIAS, REGION_SCORE_THRESHOLD, \ - IMAGE, DECIMALS + IMAGE, DECIMALS, IOU_REMOTE_SETTINGS import bigml.laminar.numpy_ops as net import bigml.laminar.preprocess_np as pp @@ -65,12 +64,10 @@ sensenet_logging() from sensenet.models.wrappers import create_model from bigml.images.utils import to_relative_coordinates - from bigml.constants import IOU_REMOTE_SETTINGS LAMINAR_VERSION = False except Exception: LAMINAR_VERSION = True -LOGGER = logging.getLogger('BigML') MEAN = "mean" STANDARD_DEVIATION = "stdev" diff --git a/bigml/util.py b/bigml/util.py index c1131a0f..6d81a847 100644 --- a/bigml/util.py +++ b/bigml/util.py @@ -28,6 +28,7 @@ import random import ast import datetime +import logging from urllib.parse import urlparse from unidecode import unidecode From 72ca46840653e5f04f3393c625eec830978201ac Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Merc=C3=A8=20Mart=C3=ADn=20Prats?= Date: Mon, 13 Jan 2025 17:42:59 +0100 Subject: [PATCH 08/19] Fixing annotations update in images composite --- HISTORY.rst | 5 +++++ bigml/api_handlers/sourcehandler.py | 5 +++-- bigml/bigmlconnection.py | 1 - bigml/version.py | 2 +- docs/index.rst | 2 +- 5 files changed, 10 insertions(+), 5 deletions(-) diff --git a/HISTORY.rst b/HISTORY.rst index ed2e7598..e4bd9d7d 100644 --- a/HISTORY.rst +++ b/HISTORY.rst @@ -3,6 +3,11 @@ History ------- +9.8.1 (2025-01-14) +------------------ + +- Fixing annotations update in images composite sources. + 9.8.0 (2024-10-02) ------------------ diff --git a/bigml/api_handlers/sourcehandler.py b/bigml/api_handlers/sourcehandler.py index ac897413..57edca45 100644 --- a/bigml/api_handlers/sourcehandler.py +++ b/bigml/api_handlers/sourcehandler.py @@ -553,8 +553,9 @@ def update_composite_annotations(self, source, images_file, pass # we need to limit the amount of changes per update - for offset in range(0, int(len(changes) / MAX_CHANGES) + 1): - new_batch = changes[offset: offset + MAX_CHANGES] + batches_number = int(len(changes) / MAX_CHANGES) + for offset in range(0, batches_number + 1): + new_batch = changes[offset * MAX_CHANGES: (offset + 1) * MAX_CHANGES] if new_batch: source = self.update_source(source, {"row_values": new_batch}) diff --git a/bigml/bigmlconnection.py b/bigml/bigmlconnection.py index f47e6b32..166dc692 100644 --- a/bigml/bigmlconnection.py +++ b/bigml/bigmlconnection.py @@ -650,7 +650,6 @@ def _update(self, url, body, organization=None, resource_id=None): location, resource, error) try: code = response.status_code - if code == HTTP_ACCEPTED: resource = json_load(response.content) resource_id = resource['resource'] diff --git a/bigml/version.py b/bigml/version.py index 3cd81c28..d33e46a3 100644 --- a/bigml/version.py +++ b/bigml/version.py @@ -1 +1 @@ -__version__ = '9.8.0' +__version__ = '9.8.1' diff --git a/docs/index.rst b/docs/index.rst index 2e9346ac..b2f20837 100644 --- a/docs/index.rst +++ b/docs/index.rst @@ -44,7 +44,7 @@ This module is licensed under the `Apache License, Version .. toctree:: :maxdepth: 2 :hidden: - :caption: Resouce Management + :caption: Resource Management ml_resources creating_resources From a8a97f1a1868545f0de01289afd0a9fe96a3a022 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Merc=C3=A8=20Mart=C3=ADn=20Prats?= Date: Mon, 13 Jan 2025 23:05:31 +0100 Subject: [PATCH 09/19] Updating copyright notices --- bigml/anomaly.py | 2 +- bigml/api.py | 2 +- bigml/api_handlers/anomalyhandler.py | 2 +- bigml/api_handlers/anomalyscorehandler.py | 2 +- bigml/api_handlers/associationhandler.py | 2 +- bigml/api_handlers/associationsethandler.py | 2 +- bigml/api_handlers/batchanomalyscorehandler.py | 2 +- bigml/api_handlers/batchcentroidhandler.py | 2 +- bigml/api_handlers/batchpredictionhandler.py | 2 +- bigml/api_handlers/batchprojectionhandler.py | 2 +- bigml/api_handlers/batchtopicdistributionhandler.py | 2 +- bigml/api_handlers/centroidhandler.py | 2 +- bigml/api_handlers/clusterhandler.py | 2 +- bigml/api_handlers/configurationhandler.py | 2 +- bigml/api_handlers/correlationhandler.py | 2 +- bigml/api_handlers/datasethandler.py | 2 +- bigml/api_handlers/deepnethandler.py | 2 +- bigml/api_handlers/ensemblehandler.py | 2 +- bigml/api_handlers/evaluationhandler.py | 2 +- bigml/api_handlers/executionhandler.py | 2 +- bigml/api_handlers/externalconnectorhandler.py | 2 +- bigml/api_handlers/forecasthandler.py | 2 +- bigml/api_handlers/fusionhandler.py | 2 +- bigml/api_handlers/libraryhandler.py | 2 +- bigml/api_handlers/linearhandler.py | 2 +- bigml/api_handlers/logistichandler.py | 2 +- bigml/api_handlers/modelhandler.py | 2 +- bigml/api_handlers/optimlhandler.py | 2 +- bigml/api_handlers/pcahandler.py | 2 +- bigml/api_handlers/predictionhandler.py | 2 +- bigml/api_handlers/projecthandler.py | 2 +- bigml/api_handlers/projectionhandler.py | 2 +- bigml/api_handlers/resourcehandler.py | 2 +- bigml/api_handlers/samplehandler.py | 2 +- bigml/api_handlers/scripthandler.py | 2 +- bigml/api_handlers/sourcehandler.py | 2 +- bigml/api_handlers/statisticaltesthandler.py | 2 +- bigml/api_handlers/timeserieshandler.py | 2 +- bigml/api_handlers/topicdistributionhandler.py | 2 +- bigml/api_handlers/topicmodelhandler.py | 2 +- bigml/association.py | 2 +- bigml/associationrule.py | 2 +- bigml/basemodel.py | 2 +- bigml/bigmlconnection.py | 2 +- bigml/centroid.py | 2 +- bigml/cluster.py | 2 +- bigml/constants.py | 2 +- bigml/dataset.py | 2 +- bigml/deepnet.py | 2 +- bigml/domain.py | 2 +- bigml/ensemble.py | 2 +- bigml/ensemblepredictor.py | 2 +- bigml/evaluation.py | 2 +- bigml/exceptions.py | 2 +- bigml/execution.py | 2 +- bigml/featurizer.py | 2 +- bigml/fields.py | 2 +- bigml/flatline.py | 2 +- bigml/flattree.py | 2 +- bigml/fusion.py | 2 +- bigml/generators/boosted_tree.py | 2 +- bigml/generators/model.py | 2 +- bigml/generators/tree.py | 2 +- bigml/generators/tree_common.py | 2 +- bigml/images/featurizers.py | 2 +- bigml/images/utils.py | 2 +- bigml/io.py | 4 ++-- bigml/item.py | 2 +- bigml/laminar/numpy_ops.py | 2 +- bigml/laminar/preprocess_np.py | 2 +- bigml/linear.py | 2 +- bigml/local_model.py | 2 +- bigml/logistic.py | 2 +- bigml/model.py | 2 +- bigml/modelfields.py | 2 +- bigml/multimodel.py | 2 +- bigml/multivote.py | 2 +- bigml/multivotelist.py | 2 +- bigml/path.py | 2 +- bigml/pca.py | 2 +- bigml/pipeline/pipeline.py | 2 +- bigml/pipeline/transformer.py | 2 +- bigml/predicate.py | 2 +- bigml/predicate_utils/utils.py | 2 +- bigml/predicates.py | 2 +- bigml/predict_utils/boosting.py | 2 +- bigml/predict_utils/classification.py | 2 +- bigml/predict_utils/common.py | 2 +- bigml/predict_utils/regression.py | 2 +- bigml/prediction.py | 2 +- bigml/shapwrapper.py | 2 +- bigml/supervised.py | 2 +- bigml/tests/compare_dataset_steps.py | 2 +- bigml/tests/compare_forecasts_steps.py | 2 +- bigml/tests/compare_pipeline_steps.py | 2 +- bigml/tests/compare_predictions_steps.py | 2 +- bigml/tests/compute_lda_prediction_steps.py | 2 +- bigml/tests/compute_multivote_prediction_steps.py | 2 +- bigml/tests/create_anomaly_steps.py | 2 +- bigml/tests/create_association_steps.py | 2 +- bigml/tests/create_batch_prediction_steps.py | 2 +- bigml/tests/create_batch_projection_steps.py | 2 +- bigml/tests/create_cluster_steps.py | 2 +- bigml/tests/create_configuration_steps.py | 2 +- bigml/tests/create_correlation_steps.py | 2 +- bigml/tests/create_dataset_steps.py | 2 +- bigml/tests/create_ensemble_steps.py | 2 +- bigml/tests/create_evaluation_steps.py | 2 +- bigml/tests/create_execution_steps.py | 2 +- bigml/tests/create_external_steps.py | 2 +- bigml/tests/create_forecast_steps.py | 2 +- bigml/tests/create_lda_steps.py | 2 +- bigml/tests/create_library_steps.py | 2 +- bigml/tests/create_linear_steps.py | 2 +- bigml/tests/create_model_steps.py | 2 +- bigml/tests/create_multimodel_steps.py | 2 +- bigml/tests/create_pca_steps.py | 2 +- bigml/tests/create_prediction_steps.py | 2 +- bigml/tests/create_project_steps.py | 2 +- bigml/tests/create_projection_steps.py | 2 +- bigml/tests/create_sample_steps.py | 2 +- bigml/tests/create_script_steps.py | 2 +- bigml/tests/create_source_steps.py | 2 +- bigml/tests/create_statistical_tst_steps.py | 2 +- bigml/tests/create_time_series_steps.py | 2 +- bigml/tests/delete_project_steps.py | 2 +- bigml/tests/fields_steps.py | 2 +- bigml/tests/inspect_model_steps.py | 2 +- bigml/tests/read_dataset_steps.py | 2 +- bigml/tests/read_resource_steps.py | 2 +- bigml/tests/test_01_prediction.py | 2 +- bigml/tests/test_03_local_prediction.py | 2 +- bigml/tests/test_04_multivote_prediction.py | 2 +- bigml/tests/test_05_compare_predictions.py | 2 +- bigml/tests/test_05_compare_predictions_b.py | 2 +- bigml/tests/test_06_batch_predictions.py | 2 +- bigml/tests/test_07_multimodel_batch_predictions.py | 2 +- bigml/tests/test_08_multimodel.py | 2 +- bigml/tests/test_09_ensemble_prediction.py | 2 +- bigml/tests/test_10_local_ensemble_prediction.py | 2 +- bigml/tests/test_11_multimodel_prediction.py | 2 +- bigml/tests/test_12_public_model_prediction.py | 2 +- bigml/tests/test_13_public_dataset.py | 2 +- bigml/tests/test_14_create_evaluations.py | 2 +- bigml/tests/test_15_download.py | 2 +- bigml/tests/test_16_sample_dataset.py | 2 +- bigml/tests/test_17_split_dataset.py | 2 +- bigml/tests/test_18_create_anomaly.py | 2 +- bigml/tests/test_19_missing_and_errors.py | 2 +- bigml/tests/test_20_rename_duplicated_names.py | 2 +- bigml/tests/test_21_projects.py | 2 +- bigml/tests/test_22_source_args.py | 2 +- bigml/tests/test_23_local_model_info.py | 2 +- bigml/tests/test_24_cluster_derived.py | 2 +- bigml/tests/test_25_correlation.py | 2 +- bigml/tests/test_26_statistical_test.py | 2 +- bigml/tests/test_27_fields.py | 2 +- bigml/tests/test_28_association.py | 2 +- bigml/tests/test_29_script.py | 2 +- bigml/tests/test_30_execution.py | 2 +- bigml/tests/test_31_library.py | 2 +- bigml/tests/test_32_topic_model_prediction.py | 2 +- bigml/tests/test_33_compare_predictions.py | 2 +- bigml/tests/test_34_time_series.py | 2 +- bigml/tests/test_35_b_compare_predictions.py | 2 +- bigml/tests/test_35_c_compare_predictions.py | 2 +- bigml/tests/test_35_compare_predictions.py | 2 +- bigml/tests/test_35_d_compare_predictions.py | 2 +- bigml/tests/test_35_e_compare_predictions.py | 2 +- bigml/tests/test_36_compare_predictions.py | 2 +- bigml/tests/test_37_configuration.py | 2 +- bigml/tests/test_38_organization.py | 2 +- bigml/tests/test_38_project_connection.py | 2 +- bigml/tests/test_39_optiml_fusion.py | 2 +- bigml/tests/test_40_local_from_file.py | 2 +- bigml/tests/test_41_multidataset.py | 2 +- bigml/tests/test_42_pca.py | 2 +- bigml/tests/test_43_linear.py | 2 +- bigml/tests/test_44_compare_predictions.py | 2 +- bigml/tests/test_45_external_connector.py | 2 +- bigml/tests/test_47_webhooks.py | 2 +- bigml/tests/test_48_local_dataset.py | 2 +- bigml/tests/test_49_local_pipeline.py | 2 +- bigml/tests/test_99_cleaning.py | 2 +- bigml/tests/world.py | 2 +- bigml/timeseries.py | 2 +- bigml/topicmodel.py | 2 +- bigml/tree_utils.py | 2 +- bigml/tsoutconstants.py | 2 +- bigml/tssubmodels.py | 2 +- bigml/util.py | 2 +- bigml/webhooks.py | 2 +- setup.py | 2 +- 193 files changed, 194 insertions(+), 194 deletions(-) diff --git a/bigml/anomaly.py b/bigml/anomaly.py index 07f3f6f0..4a345724 100644 --- a/bigml/anomaly.py +++ b/bigml/anomaly.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- # -# Copyright 2020-2023 BigML +# Copyright 2020-2025 BigML # # Licensed under the Apache License, Version 2.0 (the "License"); you may # not use this file except in compliance with the License. You may obtain diff --git a/bigml/api.py b/bigml/api.py index 21d80679..55b1e591 100644 --- a/bigml/api.py +++ b/bigml/api.py @@ -1,7 +1,7 @@ # -*- coding: utf-8 -*- #pylint: disable=too-many-ancestors,non-parent-init-called, unused-import, no-member # -# Copyright 2012-2023 BigML +# Copyright 2012-2025 BigML # # Licensed under the Apache License, Version 2.0 (the "License"); you may # not use this file except in compliance with the License. You may obtain diff --git a/bigml/api_handlers/anomalyhandler.py b/bigml/api_handlers/anomalyhandler.py index 1bb07dd0..03ece5e2 100644 --- a/bigml/api_handlers/anomalyhandler.py +++ b/bigml/api_handlers/anomalyhandler.py @@ -1,7 +1,7 @@ # -*- coding: utf-8 -*- #pylint: disable=abstract-method # -# Copyright 2014-2023 BigML +# Copyright 2014-2025 BigML # # Licensed under the Apache License, Version 2.0 (the "License"); you may # not use this file except in compliance with the License. You may obtain diff --git a/bigml/api_handlers/anomalyscorehandler.py b/bigml/api_handlers/anomalyscorehandler.py index fd0df39b..1398d539 100644 --- a/bigml/api_handlers/anomalyscorehandler.py +++ b/bigml/api_handlers/anomalyscorehandler.py @@ -1,7 +1,7 @@ # -*- coding: utf-8 -*- #pylint: disable=abstract-method # -# Copyright 2014-2023 BigML +# Copyright 2014-2025 BigML # # Licensed under the Apache License, Version 2.0 (the "License"); you may # not use this file except in compliance with the License. You may obtain diff --git a/bigml/api_handlers/associationhandler.py b/bigml/api_handlers/associationhandler.py index c6957cf4..994a0050 100644 --- a/bigml/api_handlers/associationhandler.py +++ b/bigml/api_handlers/associationhandler.py @@ -1,7 +1,7 @@ # -*- coding: utf-8 -*- #pylint: disable=abstract-method # -# Copyright 2015-2023 BigML +# Copyright 2015-2025 BigML # # Licensed under the Apache License, Version 2.0 (the "License"); you may # not use this file except in compliance with the License. You may obtain diff --git a/bigml/api_handlers/associationsethandler.py b/bigml/api_handlers/associationsethandler.py index cd8176c8..f1c13bb1 100644 --- a/bigml/api_handlers/associationsethandler.py +++ b/bigml/api_handlers/associationsethandler.py @@ -1,7 +1,7 @@ # -*- coding: utf-8 -*- #pylint: disable=abstract-method # -# Copyright 2015-2023 BigML +# Copyright 2015-2025 BigML # # Licensed under the Apache License, Version 2.0 (the "License"); you may # not use this file except in compliance with the License. You may obtain diff --git a/bigml/api_handlers/batchanomalyscorehandler.py b/bigml/api_handlers/batchanomalyscorehandler.py index b55f6d27..07516a27 100644 --- a/bigml/api_handlers/batchanomalyscorehandler.py +++ b/bigml/api_handlers/batchanomalyscorehandler.py @@ -1,7 +1,7 @@ # -*- coding: utf-8 -*- #pylint: disable=abstract-method # -# Copyright 2014-2023 BigML +# Copyright 2014-2025 BigML # # Licensed under the Apache License, Version 2.0 (the "License"); you may # not use this file except in compliance with the License. You may obtain diff --git a/bigml/api_handlers/batchcentroidhandler.py b/bigml/api_handlers/batchcentroidhandler.py index a5859d4a..79c25f52 100644 --- a/bigml/api_handlers/batchcentroidhandler.py +++ b/bigml/api_handlers/batchcentroidhandler.py @@ -1,7 +1,7 @@ # -*- coding: utf-8 -*- #pylint: disable=abstract-method # -# Copyright 2014-2023 BigML +# Copyright 2014-2025 BigML # # Licensed under the Apache License, Version 2.0 (the "License"); you may # not use this file except in compliance with the License. You may obtain diff --git a/bigml/api_handlers/batchpredictionhandler.py b/bigml/api_handlers/batchpredictionhandler.py index 1f2da496..462d127a 100644 --- a/bigml/api_handlers/batchpredictionhandler.py +++ b/bigml/api_handlers/batchpredictionhandler.py @@ -1,7 +1,7 @@ # -*- coding: utf-8 -*- #pylint: disable=abstract-method # -# Copyright 2014-2023 BigML +# Copyright 2014-2025 BigML # # Licensed under the Apache License, Version 2.0 (the "License"); you may # not use this file except in compliance with the License. You may obtain diff --git a/bigml/api_handlers/batchprojectionhandler.py b/bigml/api_handlers/batchprojectionhandler.py index 5d9dcbe0..bfb05228 100644 --- a/bigml/api_handlers/batchprojectionhandler.py +++ b/bigml/api_handlers/batchprojectionhandler.py @@ -1,7 +1,7 @@ # -*- coding: utf-8 -*- #pylint: disable=abstract-method # -# Copyright 2018-2023 BigML +# Copyright 2018-2025 BigML # # Licensed under the Apache License, Version 2.0 (the "License"); you may # not use this file except in compliance with the License. You may obtain diff --git a/bigml/api_handlers/batchtopicdistributionhandler.py b/bigml/api_handlers/batchtopicdistributionhandler.py index 0f09a94a..2a1bd204 100644 --- a/bigml/api_handlers/batchtopicdistributionhandler.py +++ b/bigml/api_handlers/batchtopicdistributionhandler.py @@ -1,7 +1,7 @@ # -*- coding: utf-8 -*- #pylint: disable=abstract-method # -# Copyright 2016-2023 BigML +# Copyright 2016-2025 BigML # # Licensed under the Apache License, Version 2.0 (the "License"); you may # not use this file except in compliance with the License. You may obtain diff --git a/bigml/api_handlers/centroidhandler.py b/bigml/api_handlers/centroidhandler.py index 10a836ae..d0455649 100644 --- a/bigml/api_handlers/centroidhandler.py +++ b/bigml/api_handlers/centroidhandler.py @@ -1,7 +1,7 @@ # -*- coding: utf-8 -*- #pylint: disable=abstract-method # -# Copyright 2014-2023 BigML +# Copyright 2014-2025 BigML # # Licensed under the Apache License, Version 2.0 (the "License"); you may # not use this file except in compliance with the License. You may obtain diff --git a/bigml/api_handlers/clusterhandler.py b/bigml/api_handlers/clusterhandler.py index 1511a37b..ffc833eb 100644 --- a/bigml/api_handlers/clusterhandler.py +++ b/bigml/api_handlers/clusterhandler.py @@ -1,7 +1,7 @@ # -*- coding: utf-8 -*- #pylint: disable=abstract-method # -# Copyright 2014-2023 BigML +# Copyright 2014-2025 BigML # # Licensed under the Apache License, Version 2.0 (the "License"); you may # not use this file except in compliance with the License. You may obtain diff --git a/bigml/api_handlers/configurationhandler.py b/bigml/api_handlers/configurationhandler.py index 12a28a96..4e2e1ae1 100644 --- a/bigml/api_handlers/configurationhandler.py +++ b/bigml/api_handlers/configurationhandler.py @@ -1,7 +1,7 @@ # -*- coding: utf-8 -*- #pylint: disable=abstract-method # -# Copyright 2014-2023 BigML +# Copyright 2014-2025 BigML # # Licensed under the Apache License, Version 2.0 (the "License"); you may # not use this file except in compliance with the License. You may obtain diff --git a/bigml/api_handlers/correlationhandler.py b/bigml/api_handlers/correlationhandler.py index ab923aab..29fedc23 100644 --- a/bigml/api_handlers/correlationhandler.py +++ b/bigml/api_handlers/correlationhandler.py @@ -1,7 +1,7 @@ # -*- coding: utf-8 -*- #pylint: disable=abstract-method # -# Copyright 2015-2023 BigML +# Copyright 2015-2025 BigML # # Licensed under the Apache License, Version 2.0 (the "License"); you may # not use this file except in compliance with the License. You may obtain diff --git a/bigml/api_handlers/datasethandler.py b/bigml/api_handlers/datasethandler.py index 656158e8..04ac3ec6 100644 --- a/bigml/api_handlers/datasethandler.py +++ b/bigml/api_handlers/datasethandler.py @@ -1,7 +1,7 @@ # -*- coding: utf-8 -*- #pylint: disable=abstract-method # -# Copyright 2014-2023 BigML +# Copyright 2014-2025 BigML # # Licensed under the Apache License, Version 2.0 (the "License"); you may # not use this file except in compliance with the License. You may obtain diff --git a/bigml/api_handlers/deepnethandler.py b/bigml/api_handlers/deepnethandler.py index 85d91485..ff966793 100644 --- a/bigml/api_handlers/deepnethandler.py +++ b/bigml/api_handlers/deepnethandler.py @@ -1,7 +1,7 @@ # -*- coding: utf-8 -*- #pylint: disable=abstract-method # -# Copyright 2017-2023 BigML +# Copyright 2017-2025 BigML # # Licensed under the Apache License, Version 2.0 (the "License"); you may # not use this file except in compliance with the License. You may obtain diff --git a/bigml/api_handlers/ensemblehandler.py b/bigml/api_handlers/ensemblehandler.py index e03b96fa..6ebd035e 100644 --- a/bigml/api_handlers/ensemblehandler.py +++ b/bigml/api_handlers/ensemblehandler.py @@ -1,7 +1,7 @@ # -*- coding: utf-8 -*- #pylint: disable=abstract-method # -# Copyright 2014-2023 BigML +# Copyright 2014-2025 BigML # # Licensed under the Apache License, Version 2.0 (the "License"); you may # not use this file except in compliance with the License. You may obtain diff --git a/bigml/api_handlers/evaluationhandler.py b/bigml/api_handlers/evaluationhandler.py index 37a9fe5b..82b224d4 100644 --- a/bigml/api_handlers/evaluationhandler.py +++ b/bigml/api_handlers/evaluationhandler.py @@ -1,7 +1,7 @@ # -*- coding: utf-8 -*- #pylint: disable=abstract-method # -# Copyright 2014-2023 BigML +# Copyright 2014-2025 BigML # # Licensed under the Apache License, Version 2.0 (the "License"); you may # not use this file except in compliance with the License. You may obtain diff --git a/bigml/api_handlers/executionhandler.py b/bigml/api_handlers/executionhandler.py index 436e64e1..2fbf6f7e 100644 --- a/bigml/api_handlers/executionhandler.py +++ b/bigml/api_handlers/executionhandler.py @@ -1,7 +1,7 @@ # -*- coding: utf-8 -*- #pylint: disable=abstract-method # -# Copyright 2015-2023 BigML +# Copyright 2015-2025 BigML # # Licensed under the Apache License, Version 2.0 (the "License"); you may # not use this file except in compliance with the License. You may obtain diff --git a/bigml/api_handlers/externalconnectorhandler.py b/bigml/api_handlers/externalconnectorhandler.py index 9a11de46..7d33a58e 100644 --- a/bigml/api_handlers/externalconnectorhandler.py +++ b/bigml/api_handlers/externalconnectorhandler.py @@ -1,7 +1,7 @@ # -*- coding: utf-8 -*- #pylint: disable=abstract-method # -# Copyright 2020-2023 BigML +# Copyright 2020-2025 BigML # # Licensed under the Apache License, Version 2.0 (the "License"); you may # not use this file except in compliance with the License. You may obtain diff --git a/bigml/api_handlers/forecasthandler.py b/bigml/api_handlers/forecasthandler.py index f09f2834..cfaba279 100644 --- a/bigml/api_handlers/forecasthandler.py +++ b/bigml/api_handlers/forecasthandler.py @@ -1,7 +1,7 @@ # -*- coding: utf-8 -*- #pylint: disable=abstract-method # -# Copyright 2017-2023 BigML +# Copyright 2017-2025 BigML # # Licensed under the Apache License, Version 2.0 (the "License"); you may # not use this file except in compliance with the License. You may obtain diff --git a/bigml/api_handlers/fusionhandler.py b/bigml/api_handlers/fusionhandler.py index 315fa907..90e22ee7 100644 --- a/bigml/api_handlers/fusionhandler.py +++ b/bigml/api_handlers/fusionhandler.py @@ -1,7 +1,7 @@ # -*- coding: utf-8 -*- #pylint: disable=abstract-method # -# Copyright 2018-2023 BigML +# Copyright 2018-2025 BigML # # Licensed under the Apache License, Version 2.0 (the "License"); you may # not use this file except in compliance with the License. You may obtain diff --git a/bigml/api_handlers/libraryhandler.py b/bigml/api_handlers/libraryhandler.py index 18ee3a8e..36055eee 100644 --- a/bigml/api_handlers/libraryhandler.py +++ b/bigml/api_handlers/libraryhandler.py @@ -1,7 +1,7 @@ # -*- coding: utf-8 -*- #pylint: disable=abstract-method # -# Copyright 2015-2023 BigML +# Copyright 2015-2025 BigML # # Licensed under the Apache License, Version 2.0 (the "License"); you may # not use this file except in compliance with the License. You may obtain diff --git a/bigml/api_handlers/linearhandler.py b/bigml/api_handlers/linearhandler.py index 4804fd51..3f24a5f8 100644 --- a/bigml/api_handlers/linearhandler.py +++ b/bigml/api_handlers/linearhandler.py @@ -1,7 +1,7 @@ # -*- coding: utf-8 -*- #pylint: disable=abstract-method # -# Copyright 2019-2023 BigML +# Copyright 2019-2025 BigML # # Licensed under the Apache License, Version 2.0 (the "License"); you may # not use this file except in compliance with the License. You may obtain diff --git a/bigml/api_handlers/logistichandler.py b/bigml/api_handlers/logistichandler.py index 5d00754a..744422bf 100644 --- a/bigml/api_handlers/logistichandler.py +++ b/bigml/api_handlers/logistichandler.py @@ -1,7 +1,7 @@ # -*- coding: utf-8 -*- #pylint: disable=abstract-method # -# Copyright 2015-2023 BigML +# Copyright 2015-2025 BigML # # Licensed under the Apache License, Version 2.0 (the "License"); you may # not use this file except in compliance with the License. You may obtain diff --git a/bigml/api_handlers/modelhandler.py b/bigml/api_handlers/modelhandler.py index f2aee4f0..0a94d342 100644 --- a/bigml/api_handlers/modelhandler.py +++ b/bigml/api_handlers/modelhandler.py @@ -1,7 +1,7 @@ # -*- coding: utf-8 -*- #pylint: disable=abstract-method # -# Copyright 2014-2023 BigML +# Copyright 2014-2025 BigML # # Licensed under the Apache License, Version 2.0 (the "License"); you may # not use this file except in compliance with the License. You may obtain diff --git a/bigml/api_handlers/optimlhandler.py b/bigml/api_handlers/optimlhandler.py index 1f1e5f99..cd5853d5 100644 --- a/bigml/api_handlers/optimlhandler.py +++ b/bigml/api_handlers/optimlhandler.py @@ -1,7 +1,7 @@ # -*- coding: utf-8 -*- #pylint: disable=abstract-method # -# Copyright 2018-2023 BigML +# Copyright 2018-2025 BigML # # Licensed under the Apache License, Version 2.0 (the "License"); you may # not use this file except in compliance with the License. You may obtain diff --git a/bigml/api_handlers/pcahandler.py b/bigml/api_handlers/pcahandler.py index 207591c7..933d73da 100644 --- a/bigml/api_handlers/pcahandler.py +++ b/bigml/api_handlers/pcahandler.py @@ -1,7 +1,7 @@ # -*- coding: utf-8 -*- #pylint: disable=abstract-method # -# Copyright 2018-2023 BigML +# Copyright 2018-2025 BigML # # Licensed under the Apache License, Version 2.0 (the "License"); you may # not use this file except in compliance with the License. You may obtain diff --git a/bigml/api_handlers/predictionhandler.py b/bigml/api_handlers/predictionhandler.py index 96d2f6db..c2c160b2 100644 --- a/bigml/api_handlers/predictionhandler.py +++ b/bigml/api_handlers/predictionhandler.py @@ -1,7 +1,7 @@ # -*- coding: utf-8 -*- #pylint: disable=abstract-method # -# Copyright 2014-2023 BigML +# Copyright 2014-2025 BigML # # Licensed under the Apache License, Version 2.0 (the "License"); you may # not use this file except in compliance with the License. You may obtain diff --git a/bigml/api_handlers/projecthandler.py b/bigml/api_handlers/projecthandler.py index a4d17d2b..3c3b7a51 100644 --- a/bigml/api_handlers/projecthandler.py +++ b/bigml/api_handlers/projecthandler.py @@ -1,7 +1,7 @@ # -*- coding: utf-8 -*- #pylint: disable=abstract-method # -# Copyright 2014-2023 BigML +# Copyright 2014-2025 BigML # # Licensed under the Apache License, Version 2.0 (the "License"); you may # not use this file except in compliance with the License. You may obtain diff --git a/bigml/api_handlers/projectionhandler.py b/bigml/api_handlers/projectionhandler.py index 4b227947..d463fca8 100644 --- a/bigml/api_handlers/projectionhandler.py +++ b/bigml/api_handlers/projectionhandler.py @@ -1,7 +1,7 @@ # -*- coding: utf-8 -*- #pylint: disable=abstract-method # -# Copyright 2018-2023 BigML +# Copyright 2018-2025 BigML # # Licensed under the Apache License, Version 2.0 (the "License"); you may # not use this file except in compliance with the License. You may obtain diff --git a/bigml/api_handlers/resourcehandler.py b/bigml/api_handlers/resourcehandler.py index caef0e99..524f53ef 100644 --- a/bigml/api_handlers/resourcehandler.py +++ b/bigml/api_handlers/resourcehandler.py @@ -1,7 +1,7 @@ # -*- coding: utf-8 -*- #pylint: disable=abstract-method,unused-import # -# Copyright 2014-2023 BigML +# Copyright 2014-2025 BigML # # Licensed under the Apache License, Version 2.0 (the "License"); you may # not use this file except in compliance with the License. You may obtain diff --git a/bigml/api_handlers/samplehandler.py b/bigml/api_handlers/samplehandler.py index 53861ae2..d50baf0b 100644 --- a/bigml/api_handlers/samplehandler.py +++ b/bigml/api_handlers/samplehandler.py @@ -1,7 +1,7 @@ # -*- coding: utf-8 -*- #pylint: disable=abstract-method # -# Copyright 2015-2023 BigML +# Copyright 2015-2025 BigML # # Licensed under the Apache License, Version 2.0 (the "License"); you may # not use this file except in compliance with the License. You may obtain diff --git a/bigml/api_handlers/scripthandler.py b/bigml/api_handlers/scripthandler.py index c012d985..d03ed771 100644 --- a/bigml/api_handlers/scripthandler.py +++ b/bigml/api_handlers/scripthandler.py @@ -1,7 +1,7 @@ # -*- coding: utf-8 -*- #pylint: disable=abstract-method # -# Copyright 2015-2023 BigML +# Copyright 2015-2025 BigML # # Licensed under the Apache License, Version 2.0 (the "License"); you may # not use this file except in compliance with the License. You may obtain diff --git a/bigml/api_handlers/sourcehandler.py b/bigml/api_handlers/sourcehandler.py index 57edca45..8f3568ea 100644 --- a/bigml/api_handlers/sourcehandler.py +++ b/bigml/api_handlers/sourcehandler.py @@ -1,7 +1,7 @@ # -*- coding: utf-8 -*- #pylint: disable=abstract-method # -# Copyright 2014-2023 BigML +# Copyright 2014-2025 BigML # # Licensed under the Apache License, Version 2.0 (the "License"); you may # not use this file except in compliance with the License. You may obtain diff --git a/bigml/api_handlers/statisticaltesthandler.py b/bigml/api_handlers/statisticaltesthandler.py index 6bd72330..eca91255 100644 --- a/bigml/api_handlers/statisticaltesthandler.py +++ b/bigml/api_handlers/statisticaltesthandler.py @@ -1,7 +1,7 @@ # -*- coding: utf-8 -*- #pylint: disable=abstract-method # -# Copyright 2015-2023 BigML +# Copyright 2015-2025 BigML # # Licensed under the Apache License, Version 2.0 (the "License"); you may # not use this file except in compliance with the License. You may obtain diff --git a/bigml/api_handlers/timeserieshandler.py b/bigml/api_handlers/timeserieshandler.py index ae68eb2d..2d57a08c 100644 --- a/bigml/api_handlers/timeserieshandler.py +++ b/bigml/api_handlers/timeserieshandler.py @@ -1,7 +1,7 @@ # -*- coding: utf-8 -*- #pylint: disable=abstract-method # -# Copyright 2017-2023 BigML +# Copyright 2017-2025 BigML # # Licensed under the Apache License, Version 2.0 (the "License"); you may # not use this file except in compliance with the License. You may obtain diff --git a/bigml/api_handlers/topicdistributionhandler.py b/bigml/api_handlers/topicdistributionhandler.py index b7fad37c..117cefd2 100644 --- a/bigml/api_handlers/topicdistributionhandler.py +++ b/bigml/api_handlers/topicdistributionhandler.py @@ -1,7 +1,7 @@ # -*- coding: utf-8 -*- #pylint: disable=abstract-method # -# Copyright 2016-2023 BigML +# Copyright 2016-2025 BigML # # Licensed under the Apache License, Version 2.0 (the "License"); you may # not use this file except in compliance with the License. You may obtain diff --git a/bigml/api_handlers/topicmodelhandler.py b/bigml/api_handlers/topicmodelhandler.py index 78dca0a5..a34b904b 100644 --- a/bigml/api_handlers/topicmodelhandler.py +++ b/bigml/api_handlers/topicmodelhandler.py @@ -1,7 +1,7 @@ # -*- coding: utf-8 -*- #pylint: disable=abstract-method # -# Copyright 2016-2023 BigML +# Copyright 2016-2025 BigML # # Licensed under the Apache License, Version 2.0 (the "License"); you may # not use this file except in compliance with the License. You may obtain diff --git a/bigml/association.py b/bigml/association.py index 6cc50f22..a3b65d76 100644 --- a/bigml/association.py +++ b/bigml/association.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- # -# Copyright 2015-2023 BigML +# Copyright 2015-2025 BigML # # Licensed under the Apache License, Version 2.0 (the "License"); you may # not use this file except in compliance with the License. You may obtain diff --git a/bigml/associationrule.py b/bigml/associationrule.py index c7f82835..63944342 100644 --- a/bigml/associationrule.py +++ b/bigml/associationrule.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- # -# Copyright 2015-2023 BigML +# Copyright 2015-2025 BigML # # Licensed under the Apache License, Version 2.0 (the "License"); you may # not use this file except in compliance with the License. You may obtain diff --git a/bigml/basemodel.py b/bigml/basemodel.py index c4d380a1..0c22dc54 100644 --- a/bigml/basemodel.py +++ b/bigml/basemodel.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- # -# Copyright 2013-2023 BigML +# Copyright 2013-2025 BigML # # Licensed under the Apache License, Version 2.0 (the "License"); you may # not use this file except in compliance with the License. You may obtain diff --git a/bigml/bigmlconnection.py b/bigml/bigmlconnection.py index 166dc692..a5a796c5 100644 --- a/bigml/bigmlconnection.py +++ b/bigml/bigmlconnection.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- # -# Copyright 2014-2023 BigML +# Copyright 2014-2025 BigML # # Licensed under the Apache License, Version 2.0 (the "License"); you may # not use this file except in compliance with the License. You may obtain diff --git a/bigml/centroid.py b/bigml/centroid.py index 4e5f30c0..534cb562 100644 --- a/bigml/centroid.py +++ b/bigml/centroid.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- # -# Copyright 2014-2023 BigML +# Copyright 2014-2025 BigML # # Licensed under the Apache License, Version 2.0 (the "License"); you may # not use this file except in compliance with the License. You may obtain diff --git a/bigml/cluster.py b/bigml/cluster.py index 040c108b..5739554b 100644 --- a/bigml/cluster.py +++ b/bigml/cluster.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- # -# Copyright 2014-2023 BigML +# Copyright 2014-2025 BigML # # Licensed under the Apache License, Version 2.0 (the "License"); you may # not use this file except in compliance with the License. You may obtain diff --git a/bigml/constants.py b/bigml/constants.py index 6423faff..5171d557 100644 --- a/bigml/constants.py +++ b/bigml/constants.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- # -# Copyright 2015-2023 BigML +# Copyright 2015-2025 BigML # # Licensed under the Apache License, Version 2.0 (the "License"); you may # not use this file except in compliance with the License. You may obtain diff --git a/bigml/dataset.py b/bigml/dataset.py index 2f75aa5e..5c548e61 100644 --- a/bigml/dataset.py +++ b/bigml/dataset.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- # -# Copyright 2022-2023 BigML +# Copyright 2022-2025 BigML # # Licensed under the Apache License, Version 2.0 (the "License"); you may # not use this file except in compliance with the License. You may obtain diff --git a/bigml/deepnet.py b/bigml/deepnet.py index 09370af2..dbb45dc9 100644 --- a/bigml/deepnet.py +++ b/bigml/deepnet.py @@ -1,7 +1,7 @@ # -*- coding: utf-8 -*- #pylint: disable=wrong-import-position,ungrouped-imports # -# Copyright 2017-2023 BigML +# Copyright 2017-2025 BigML # # Licensed under the Apache License, Version 2.0 (the "License"); you may # not use this file except in compliance with the License. You may obtain diff --git a/bigml/domain.py b/bigml/domain.py index d36b4194..81a26ebc 100644 --- a/bigml/domain.py +++ b/bigml/domain.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- # -# Copyright 2014-2023 BigML +# Copyright 2014-2025 BigML # # Licensed under the Apache License, Version 2.0 (the "License"); you may # not use this file except in compliance with the License. You may obtain diff --git a/bigml/ensemble.py b/bigml/ensemble.py index 07f8faf5..94c96a77 100644 --- a/bigml/ensemble.py +++ b/bigml/ensemble.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- # -# Copyright 2012-2023 BigML +# Copyright 2012-2025 BigML # # Licensed under the Apache License, Version 2.0 (the "License"); you may # not use this file except in compliance with the License. You may obtain diff --git a/bigml/ensemblepredictor.py b/bigml/ensemblepredictor.py index d266805b..cab2fbdd 100644 --- a/bigml/ensemblepredictor.py +++ b/bigml/ensemblepredictor.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- # -# Copyright 2017-2023 BigML +# Copyright 2017-2025 BigML # # Licensed under the Apache License, Version 2.0 (the "License"); you may # not use this file except in compliance with the License. You may obtain diff --git a/bigml/evaluation.py b/bigml/evaluation.py index bd79cc9a..76726589 100644 --- a/bigml/evaluation.py +++ b/bigml/evaluation.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- # -# Copyright 2023 BigML +# Copyright 2023-2025 BigML # # Licensed under the Apache License, Version 2.0 (the "License"); you may # not use this file except in compliance with the License. You may obtain diff --git a/bigml/exceptions.py b/bigml/exceptions.py index 77630bab..71e965f6 100644 --- a/bigml/exceptions.py +++ b/bigml/exceptions.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- # -# Copyright 2021-2023 BigML +# Copyright 2021-2025 BigML # # Licensed under the Apache License, Version 2.0 (the "License"); you may # not use this file except in compliance with the License. You may obtain diff --git a/bigml/execution.py b/bigml/execution.py index f4af728e..626cd06e 100644 --- a/bigml/execution.py +++ b/bigml/execution.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- # -# Copyright 2019-2023 BigML +# Copyright 2019-2025 BigML # # Licensed under the Apache License, Version 2.0 (the "License"); you may # not use this file except in compliance with the License. You may obtain diff --git a/bigml/featurizer.py b/bigml/featurizer.py index e39d754f..0a6d9e33 100644 --- a/bigml/featurizer.py +++ b/bigml/featurizer.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- # -# Copyright 2022-2023 BigML +# Copyright 2022-2025 BigML # # Licensed under the Apache License, Version 2.0 (the "License"); you may # not use this file except in compliance with the License. You may obtain diff --git a/bigml/fields.py b/bigml/fields.py index 62023258..41246b62 100644 --- a/bigml/fields.py +++ b/bigml/fields.py @@ -1,7 +1,7 @@ # -*- coding: utf-8 -*- #pylint: disable=unbalanced-tuple-unpacking # -# Copyright 2012-2023 BigML +# Copyright 2012-2025 BigML # # Licensed under the Apache License, Version 2.0 (the "License"); you may # not use this file except in compliance with the License. You may obtain diff --git a/bigml/flatline.py b/bigml/flatline.py index 7efba23a..ee18536a 100644 --- a/bigml/flatline.py +++ b/bigml/flatline.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- # -# Copyright 2022-2023 BigML +# Copyright 2022-2025 BigML # # Licensed under the Apache License, Version 2.0 (the "License"); you may # not use this file except in compliance with the License. You may obtain diff --git a/bigml/flattree.py b/bigml/flattree.py index cc190204..021d52d6 100644 --- a/bigml/flattree.py +++ b/bigml/flattree.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- # -# Copyright 2019-2023 BigML +# Copyright 2019-2025 BigML # # Licensed under the Apache License, Version 2.0 (the "License"); you may # not use this file except in compliance with the License. You may obtain diff --git a/bigml/fusion.py b/bigml/fusion.py index 3b88481c..c7ce7425 100644 --- a/bigml/fusion.py +++ b/bigml/fusion.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- # -# Copyright 2012-2023 BigML +# Copyright 2012-2025 BigML # # Licensed under the Apache License, Version 2.0 (the "License"); you may # not use this file except in compliance with the License. You may obtain diff --git a/bigml/generators/boosted_tree.py b/bigml/generators/boosted_tree.py index 96fb4a28..14bbf2be 100644 --- a/bigml/generators/boosted_tree.py +++ b/bigml/generators/boosted_tree.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- # -# Copyright 2020-2023 BigML +# Copyright 2020-2025 BigML # # Licensed under the Apache License, Version 2.0 (the "License"); you may # not use this file except in compliance with the License. You may obtain diff --git a/bigml/generators/model.py b/bigml/generators/model.py index b035036d..51c65e92 100644 --- a/bigml/generators/model.py +++ b/bigml/generators/model.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- # -# Copyright 2020-2023 BigML +# Copyright 2020-2025 BigML # # Licensed under the Apache License, Version 2.0 (the "License"); you may # not use this file except in compliance with the License. You may obtain diff --git a/bigml/generators/tree.py b/bigml/generators/tree.py index 086f2108..95d7200e 100644 --- a/bigml/generators/tree.py +++ b/bigml/generators/tree.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- # -# Copyright 2020-2023 BigML +# Copyright 2020-2025 BigML # # Licensed under the Apache License, Version 2.0 (the "License"); you may # not use this file except in compliance with the License. You may obtain diff --git a/bigml/generators/tree_common.py b/bigml/generators/tree_common.py index 64596627..4a46b8e6 100644 --- a/bigml/generators/tree_common.py +++ b/bigml/generators/tree_common.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- # -# Copyright 2020-2023 BigML +# Copyright 2020-2025 BigML # # Licensed under the Apache License, Version 2.0 (the "License"); you may # not use this file except in compliance with the License. You may obtain diff --git a/bigml/images/featurizers.py b/bigml/images/featurizers.py index c2b5ed64..d6919ed1 100644 --- a/bigml/images/featurizers.py +++ b/bigml/images/featurizers.py @@ -1,7 +1,7 @@ # -*- coding: utf-8 -*- #pylint: disable=invalid-name # -# Copyright 2022-2023 BigML +# Copyright 2022-2025 BigML # # Licensed under the Apache License, Version 2.0 (the "License"); you may # not use this file except in compliance with the License. You may obtain diff --git a/bigml/images/utils.py b/bigml/images/utils.py index 1e0a10ba..26378deb 100644 --- a/bigml/images/utils.py +++ b/bigml/images/utils.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- # -# Copyright 2022-2023 BigML +# Copyright 2022-2025 BigML # # Licensed under the Apache License, Version 2.0 (the "License"); you may # not use this file except in compliance with the License. You may obtain diff --git a/bigml/io.py b/bigml/io.py index b6b878e3..c9dc0a20 100644 --- a/bigml/io.py +++ b/bigml/io.py @@ -1,7 +1,7 @@ # -*- coding: utf-8 -*- # pylint: disable=R1732 # -# Copyright (c) 2015-2023 BigML, Inc +# Copyright (c) 2015-2025 BigML, Inc # # Licensed under the Apache License, Version 2.0 (the "License"); you may # not use this file except in compliance with the License. You may obtain @@ -19,7 +19,7 @@ """Python I/O functions. :author: jao -:date: Wed Apr 08, 2015-2023 17:52 +:date: Wed Apr 08, 2015-2025 17:52 """ diff --git a/bigml/item.py b/bigml/item.py index 3138a0e9..3314507a 100644 --- a/bigml/item.py +++ b/bigml/item.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- # -# Copyright 2015-2023 BigML +# Copyright 2015-2025 BigML # # Licensed under the Apache License, Version 2.0 (the "License"); you may # not use this file except in compliance with the License. You may obtain diff --git a/bigml/laminar/numpy_ops.py b/bigml/laminar/numpy_ops.py index 805850f3..85c21ea4 100644 --- a/bigml/laminar/numpy_ops.py +++ b/bigml/laminar/numpy_ops.py @@ -1,7 +1,7 @@ # -*- coding: utf-8 -*- #pylint: disable=invalid-name,missing-function-docstring # -# Copyright 2017-2023 BigML +# Copyright 2017-2025 BigML # # Licensed under the Apache License, Version 2.0 (the "License"); you may # not use this file except in compliance with the License. You may obtain diff --git a/bigml/laminar/preprocess_np.py b/bigml/laminar/preprocess_np.py index c2d6cb9b..95e64899 100644 --- a/bigml/laminar/preprocess_np.py +++ b/bigml/laminar/preprocess_np.py @@ -1,7 +1,7 @@ # -*- coding: utf-8 -*- #pylint: disable=invalid-name,missing-function-docstring # -# Copyright 2017-2023 BigML +# Copyright 2017-2025 BigML # # Licensed under the Apache License, Version 2.0 (the "License"); you may # not use this file except in compliance with the License. You may obtain diff --git a/bigml/linear.py b/bigml/linear.py index 80522948..c6e00407 100644 --- a/bigml/linear.py +++ b/bigml/linear.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- # -# Copyright 2015-2023 BigML +# Copyright 2015-2025 BigML # # Licensed under the Apache License, Version 2.0 (the "License"); you may # not use this file except in compliance with the License. You may obtain diff --git a/bigml/local_model.py b/bigml/local_model.py index 8b545ef1..c8ed68c9 100644 --- a/bigml/local_model.py +++ b/bigml/local_model.py @@ -1,7 +1,7 @@ # -*- coding: utf-8 -*- # pylint: disable=super-init-not-called # -# Copyright 2023 BigML +# Copyright 2023-2025 BigML # # Licensed under the Apache License, Version 2.0 (the "License"); you may # not use this file except in compliance with the License. You may obtain diff --git a/bigml/logistic.py b/bigml/logistic.py index 691199b9..67199512 100644 --- a/bigml/logistic.py +++ b/bigml/logistic.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- # -# Copyright 2015-2023 BigML +# Copyright 2015-2025 BigML # # Licensed under the Apache License, Version 2.0 (the "License"); you may # not use this file except in compliance with the License. You may obtain diff --git a/bigml/model.py b/bigml/model.py index 47b3f56c..560d5c37 100644 --- a/bigml/model.py +++ b/bigml/model.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- # -# Copyright 2013-2023 BigML +# Copyright 2013-2025 BigML # # Licensed under the Apache License, Version 2.0 (the "License"); you may # not use this file except in compliance with the License. You may obtain diff --git a/bigml/modelfields.py b/bigml/modelfields.py index 2eb9b38a..964015f0 100644 --- a/bigml/modelfields.py +++ b/bigml/modelfields.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- # -# Copyright 2013-2023 BigML +# Copyright 2013-2025 BigML # # Licensed under the Apache License, Version 2.0 (the "License"); you may # not use this file except in compliance with the License. You may obtain diff --git a/bigml/multimodel.py b/bigml/multimodel.py index bc1243eb..85e7eb9e 100644 --- a/bigml/multimodel.py +++ b/bigml/multimodel.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- # -# Copyright 2012-2023 BigML +# Copyright 2012-2025 BigML # # Licensed under the Apache License, Version 2.0 (the "License"); you may # not use this file except in compliance with the License. You may obtain diff --git a/bigml/multivote.py b/bigml/multivote.py index 9508c835..873e79aa 100644 --- a/bigml/multivote.py +++ b/bigml/multivote.py @@ -1,7 +1,7 @@ # -*- coding: utf-8 -*- #pylint: disable=dangerous-default-value # -# Copyright 2012-2023 BigML +# Copyright 2012-2025 BigML # # Licensed under the Apache License, Version 2.0 (the "License"); you may # not use this file except in compliance with the License. You may obtain diff --git a/bigml/multivotelist.py b/bigml/multivotelist.py index ee604e39..72f2cb56 100644 --- a/bigml/multivotelist.py +++ b/bigml/multivotelist.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- # -# Copyright 2017-2023 BigML +# Copyright 2017-2025 BigML # # Licensed under the Apache License, Version 2.0 (the "License"); you may # not use this file except in compliance with the License. You may obtain diff --git a/bigml/path.py b/bigml/path.py index 6f533fc7..e85a2ac3 100644 --- a/bigml/path.py +++ b/bigml/path.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- # -# Copyright 2015-2023 BigML +# Copyright 2015-2025 BigML # # Licensed under the Apache License, Version 2.0 (the "License"); you may # not use this file except in compliance with the License. You may obtain diff --git a/bigml/pca.py b/bigml/pca.py index 10d37827..22eb37c8 100644 --- a/bigml/pca.py +++ b/bigml/pca.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- # -# Copyright 2018-2023 BigML +# Copyright 2018-2025 BigML # # Licensed under the Apache License, Version 2.0 (the "License"); you may # not use this file except in compliance with the License. You may obtain diff --git a/bigml/pipeline/pipeline.py b/bigml/pipeline/pipeline.py index 83c62337..20cbb8b9 100644 --- a/bigml/pipeline/pipeline.py +++ b/bigml/pipeline/pipeline.py @@ -1,7 +1,7 @@ # -*- coding: utf-8 -*- #pylint: disable=locally-disabled,cyclic-import # -# Copyright 2022-2023 BigML +# Copyright 2022-2025 BigML # # Licensed under the Apache License, Version 2.0 (the "License"); you may # not use this file except in compliance with the License. You may obtain diff --git a/bigml/pipeline/transformer.py b/bigml/pipeline/transformer.py index d099bfa4..3b983cd8 100644 --- a/bigml/pipeline/transformer.py +++ b/bigml/pipeline/transformer.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- # -# Copyright 2022-2023 BigML +# Copyright 2022-2025 BigML # # Licensed under the Apache License, Version 2.0 (the "License"); you may # not use this file except in compliance with the License. You may obtain diff --git a/bigml/predicate.py b/bigml/predicate.py index 29ac5068..ed6ec690 100644 --- a/bigml/predicate.py +++ b/bigml/predicate.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- # -# Copyright 2013-2023 BigML +# Copyright 2013-2025 BigML # # Licensed under the Apache License, Version 2.0 (the "License"); you may # not use this file except in compliance with the License. You may obtain diff --git a/bigml/predicate_utils/utils.py b/bigml/predicate_utils/utils.py index 8f3801b0..7239d01e 100644 --- a/bigml/predicate_utils/utils.py +++ b/bigml/predicate_utils/utils.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- # -# Copyright 2020-2023 BigML +# Copyright 2020-2025 BigML # # Licensed under the Apache License, Version 2.0 (the "License"); you may # not use this file except in compliance with the License. You may obtain diff --git a/bigml/predicates.py b/bigml/predicates.py index 36abd4a9..54537858 100644 --- a/bigml/predicates.py +++ b/bigml/predicates.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- # -# Copyright 2014-2023 BigML +# Copyright 2014-2025 BigML # # Licensed under the Apache License, Version 2.0 (the "License"); you may # not use this file except in compliance with the License. You may obtain diff --git a/bigml/predict_utils/boosting.py b/bigml/predict_utils/boosting.py index 89b10108..1380e96d 100644 --- a/bigml/predict_utils/boosting.py +++ b/bigml/predict_utils/boosting.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- # -# Copyright 2020-2023 BigML +# Copyright 2020-2025 BigML # # Licensed under the Apache License, Version 2.0 (the "License"); you may # not use this file except in compliance with the License. You may obtain diff --git a/bigml/predict_utils/classification.py b/bigml/predict_utils/classification.py index 56a3a013..862b32c7 100644 --- a/bigml/predict_utils/classification.py +++ b/bigml/predict_utils/classification.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- # -# Copyright 2020-2023 BigML +# Copyright 2020-2025 BigML # # Licensed under the Apache License, Version 2.0 (the "License"); you may # not use this file except in compliance with the License. You may obtain diff --git a/bigml/predict_utils/common.py b/bigml/predict_utils/common.py index e2837594..6b967f52 100644 --- a/bigml/predict_utils/common.py +++ b/bigml/predict_utils/common.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- # -# Copyright 2020-2023 BigML +# Copyright 2020-2025 BigML # # Licensed under the Apache License, Version 2.0 (the "License"); you may # not use this file except in compliance with the License. You may obtain diff --git a/bigml/predict_utils/regression.py b/bigml/predict_utils/regression.py index 82371234..4c291f05 100644 --- a/bigml/predict_utils/regression.py +++ b/bigml/predict_utils/regression.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- # -# Copyright 2020-2023 BigML +# Copyright 2020-2025 BigML # # Licensed under the Apache License, Version 2.0 (the "License"); you may # not use this file except in compliance with the License. You may obtain diff --git a/bigml/prediction.py b/bigml/prediction.py index a284575b..19327510 100644 --- a/bigml/prediction.py +++ b/bigml/prediction.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- # -# Copyright 2015-2023 BigML +# Copyright 2015-2025 BigML # # Licensed under the Apache License, Version 2.0 (the "License"); you may # not use this file except in compliance with the License. You may obtain diff --git a/bigml/shapwrapper.py b/bigml/shapwrapper.py index 659c45aa..65586ca2 100644 --- a/bigml/shapwrapper.py +++ b/bigml/shapwrapper.py @@ -1,7 +1,7 @@ # -*- coding: utf-8 -*- # pylint: disable=super-init-not-called # -# Copyright 2023 BigML +# Copyright 2023-2025 BigML # # Licensed under the Apache License, Version 2.0 (the "License"); you may # not use this file except in compliance with the License. You may obtain diff --git a/bigml/supervised.py b/bigml/supervised.py index d10da99e..57155fa8 100644 --- a/bigml/supervised.py +++ b/bigml/supervised.py @@ -1,7 +1,7 @@ # -*- coding: utf-8 -*- # pylint: disable=super-init-not-called # -# Copyright 2018-2023 BigML +# Copyright 2018-2025 BigML # # Licensed under the Apache License, Version 2.0 (the "License"); you may # not use this file except in compliance with the License. You may obtain diff --git a/bigml/tests/compare_dataset_steps.py b/bigml/tests/compare_dataset_steps.py index f3293f9f..04bc9110 100644 --- a/bigml/tests/compare_dataset_steps.py +++ b/bigml/tests/compare_dataset_steps.py @@ -1,7 +1,7 @@ # -*- coding: utf-8 -*- #pylint: disable=locally-disabled,unused-argument,no-member # -# Copyright 2022-2023 BigML +# Copyright 2022-2025 BigML # # Licensed under the Apache License, Version 2.0 (the "License"); you may # not use this file except in compliance with the License. You may obtain diff --git a/bigml/tests/compare_forecasts_steps.py b/bigml/tests/compare_forecasts_steps.py index af10b02d..0d4fe85a 100644 --- a/bigml/tests/compare_forecasts_steps.py +++ b/bigml/tests/compare_forecasts_steps.py @@ -1,5 +1,5 @@ # -*- coding: utf-8 -*- -# Copyright 2017-2023 BigML +# Copyright 2017-2025 BigML # # Licensed under the Apache License, Version 2.0 (the "License"); you may # not use this file except in compliance with the License. You may obtain diff --git a/bigml/tests/compare_pipeline_steps.py b/bigml/tests/compare_pipeline_steps.py index 4007aef6..146ea408 100644 --- a/bigml/tests/compare_pipeline_steps.py +++ b/bigml/tests/compare_pipeline_steps.py @@ -1,7 +1,7 @@ # -*- coding: utf-8 -*- #pylint: disable=locally-disabled,unused-argument,no-member # -# Copyright 2022-2023 BigML +# Copyright 2022-2025 BigML # # Licensed under the Apache License, Version 2.0 (the "License"); you may # not use this file except in compliance with the License. You may obtain diff --git a/bigml/tests/compare_predictions_steps.py b/bigml/tests/compare_predictions_steps.py index 66f1bc23..b0019411 100644 --- a/bigml/tests/compare_predictions_steps.py +++ b/bigml/tests/compare_predictions_steps.py @@ -2,7 +2,7 @@ #pylint: disable=locally-disabled,unused-argument,no-member #pylint: disable=locally-disabled,pointless-string-statement # -# Copyright 2012-2023 BigML +# Copyright 2012-2025 BigML # # Licensed under the Apache License, Version 2.0 (the "License"); you may # not use this file except in compliance with the License. You may obtain diff --git a/bigml/tests/compute_lda_prediction_steps.py b/bigml/tests/compute_lda_prediction_steps.py index 4479057b..5ec5f6e8 100644 --- a/bigml/tests/compute_lda_prediction_steps.py +++ b/bigml/tests/compute_lda_prediction_steps.py @@ -1,7 +1,7 @@ # -*- coding: utf-8 -*- #pylint: disable=locally-disabled,unused-argument,no-member # -# Copyright 2016-2023 BigML +# Copyright 2016-2025 BigML # # Licensed under the Apache License, Version 2.0 (the "License"); you may # not use this file except in compliance with the License. You may obtain diff --git a/bigml/tests/compute_multivote_prediction_steps.py b/bigml/tests/compute_multivote_prediction_steps.py index e18f754c..251423c1 100644 --- a/bigml/tests/compute_multivote_prediction_steps.py +++ b/bigml/tests/compute_multivote_prediction_steps.py @@ -1,7 +1,7 @@ # -*- coding: utf-8 -*- #pylint: disable=locally-disabled,unused-argument,no-member # -# Copyright 2012, 2015-2023 BigML +# Copyright 2012, 2015-2025 BigML # # Licensed under the Apache License, Version 2.0 (the "License"); you may # not use this file except in compliance with the License. You may obtain diff --git a/bigml/tests/create_anomaly_steps.py b/bigml/tests/create_anomaly_steps.py index d0fecedd..f0b18d3a 100644 --- a/bigml/tests/create_anomaly_steps.py +++ b/bigml/tests/create_anomaly_steps.py @@ -1,7 +1,7 @@ # -*- coding: utf-8 -*- #pylint: disable=locally-disabled,unused-argument,no-member # -# Copyright 2014-2023 BigML +# Copyright 2014-2025 BigML # # Licensed under the Apache License, Version 2.0 (the "License"); you may # not use this file except in compliance with the License. You may obtain diff --git a/bigml/tests/create_association_steps.py b/bigml/tests/create_association_steps.py index 2b56fceb..b54cd9be 100644 --- a/bigml/tests/create_association_steps.py +++ b/bigml/tests/create_association_steps.py @@ -1,7 +1,7 @@ # -*- coding: utf-8 -*- #pylint: disable=locally-disabled,unused-argument,no-member # -# Copyright 2014-2023 BigML +# Copyright 2014-2025 BigML # # Licensed under the Apache License, Version 2.0 (the "License"); you may # not use this file except in compliance with the License. You may obtain diff --git a/bigml/tests/create_batch_prediction_steps.py b/bigml/tests/create_batch_prediction_steps.py index 98bbc4d4..7988a3f9 100644 --- a/bigml/tests/create_batch_prediction_steps.py +++ b/bigml/tests/create_batch_prediction_steps.py @@ -1,7 +1,7 @@ # -*- coding: utf-8 -*- #pylint: disable=locally-disabled,unused-argument,no-member # -# Copyright 2012-2023 BigML +# Copyright 2012-2025 BigML # # Licensed under the Apache License, Version 2.0 (the "License"); you may # not use this file except in compliance with the License. You may obtain diff --git a/bigml/tests/create_batch_projection_steps.py b/bigml/tests/create_batch_projection_steps.py index 9dcb8d3d..d18debf7 100644 --- a/bigml/tests/create_batch_projection_steps.py +++ b/bigml/tests/create_batch_projection_steps.py @@ -1,7 +1,7 @@ # -*- coding: utf-8 -*- #pylint: disable=locally-disabled,unused-argument,no-member # -# Copyright 2018-2023 BigML +# Copyright 2018-2025 BigML # # Licensed under the Apache License, Version 2.0 (the "License"); you may # not use this file except in compliance with the License. You may obtain diff --git a/bigml/tests/create_cluster_steps.py b/bigml/tests/create_cluster_steps.py index 1a993a40..f6c9e002 100644 --- a/bigml/tests/create_cluster_steps.py +++ b/bigml/tests/create_cluster_steps.py @@ -1,7 +1,7 @@ # -*- coding: utf-8 -*- #pylint: disable=locally-disabled,unused-argument,no-member # -# Copyright 2012-2023 BigML +# Copyright 2012-2025 BigML # # Licensed under the Apache License, Version 2.0 (the "License"); you may # not use this file except in compliance with the License. You may obtain diff --git a/bigml/tests/create_configuration_steps.py b/bigml/tests/create_configuration_steps.py index f657be7e..5116986d 100644 --- a/bigml/tests/create_configuration_steps.py +++ b/bigml/tests/create_configuration_steps.py @@ -1,7 +1,7 @@ # -*- coding: utf-8 -*- #pylint: disable=locally-disabled,unused-argument,no-member # -# Copyright 2017-2023 BigML +# Copyright 2017-2025 BigML # # Licensed under the Apache License, Version 2.0 (the "License"); you may # not use this file except in compliance with the License. You may obtain diff --git a/bigml/tests/create_correlation_steps.py b/bigml/tests/create_correlation_steps.py index e3c03894..c5421c6b 100644 --- a/bigml/tests/create_correlation_steps.py +++ b/bigml/tests/create_correlation_steps.py @@ -1,7 +1,7 @@ # -*- coding: utf-8 -*- #pylint: disable=locally-disabled,unused-argument,no-member # -# Copyright 2015-2023 BigML +# Copyright 2015-2025 BigML # # Licensed under the Apache License, Version 2.0 (the "License"); you may # not use this file except in compliance with the License. You may obtain diff --git a/bigml/tests/create_dataset_steps.py b/bigml/tests/create_dataset_steps.py index 0baf4a85..a04d854a 100644 --- a/bigml/tests/create_dataset_steps.py +++ b/bigml/tests/create_dataset_steps.py @@ -1,7 +1,7 @@ # -*- coding: utf-8 -*- #pylint: disable=locally-disabled,unused-argument,no-member # -# Copyright 2012-2023 BigML +# Copyright 2012-2025 BigML # # Licensed under the Apache License, Version 2.0 (the "License"); you may # not use this file except in compliance with the License. You may obtain diff --git a/bigml/tests/create_ensemble_steps.py b/bigml/tests/create_ensemble_steps.py index 5d9c098a..7113dfde 100644 --- a/bigml/tests/create_ensemble_steps.py +++ b/bigml/tests/create_ensemble_steps.py @@ -1,7 +1,7 @@ # -*- coding: utf-8 -*- #pylint: disable=locally-disabled,unused-argument,no-member,broad-except # -# Copyright 2012-2023 BigML +# Copyright 2012-2025 BigML # # Licensed under the Apache License, Version 2.0 (the "License"); you may # not use this file except in compliance with the License. You may obtain diff --git a/bigml/tests/create_evaluation_steps.py b/bigml/tests/create_evaluation_steps.py index 0a9756b5..c7412a38 100644 --- a/bigml/tests/create_evaluation_steps.py +++ b/bigml/tests/create_evaluation_steps.py @@ -1,7 +1,7 @@ # -*- coding: utf-8 -*- #pylint: disable=locally-disabled,unused-argument,no-member # -# Copyright 2012, 2015-2023 BigML +# Copyright 2012, 2015-2025 BigML # # Licensed under the Apache License, Version 2.0 (the "License"); you may # not use this file except in compliance with the License. You may obtain diff --git a/bigml/tests/create_execution_steps.py b/bigml/tests/create_execution_steps.py index de478629..6d4d69a6 100644 --- a/bigml/tests/create_execution_steps.py +++ b/bigml/tests/create_execution_steps.py @@ -1,7 +1,7 @@ # -*- coding: utf-8 -*- #pylint: disable=locally-disabled,unused-argument,no-member # -# Copyright 2015-2023 BigML +# Copyright 2015-2025 BigML # # Licensed under the Apache License, Version 2.0 (the "License"); you may # not use this file except in compliance with the License. You may obtain diff --git a/bigml/tests/create_external_steps.py b/bigml/tests/create_external_steps.py index 06a48425..08bb6f22 100644 --- a/bigml/tests/create_external_steps.py +++ b/bigml/tests/create_external_steps.py @@ -1,7 +1,7 @@ # -*- coding: utf-8 -*- #pylint: disable=locally-disabled,unused-argument,no-member # -# Copyright 2020-2023 BigML +# Copyright 2020-2025 BigML # # Licensed under the Apache License, Version 2.0 (the "License"); you may # not use this file except in compliance with the License. You may obtain diff --git a/bigml/tests/create_forecast_steps.py b/bigml/tests/create_forecast_steps.py index e6999d3a..15a922b8 100644 --- a/bigml/tests/create_forecast_steps.py +++ b/bigml/tests/create_forecast_steps.py @@ -1,7 +1,7 @@ # -*- coding: utf-8 -*- #pylint: disable=locally-disabled,unused-argument,no-member # -# Copyright 2017-2023 BigML +# Copyright 2017-2025 BigML # # Licensed under the Apache License, Version 2.0 (the "License"); you may # not use this file except in compliance with the License. You may obtain diff --git a/bigml/tests/create_lda_steps.py b/bigml/tests/create_lda_steps.py index 597eab4f..cd06ac96 100644 --- a/bigml/tests/create_lda_steps.py +++ b/bigml/tests/create_lda_steps.py @@ -1,7 +1,7 @@ # -*- coding: utf-8 -*- #pylint: disable=locally-disabled,unused-argument,no-member # -# Copyright 2012-2023 BigML +# Copyright 2012-2025 BigML # # Licensed under the Apache License, Version 2.0 (the "License"); you may # not use this file except in compliance with the License. You may obtain diff --git a/bigml/tests/create_library_steps.py b/bigml/tests/create_library_steps.py index 77a37aca..dd8cb5d2 100644 --- a/bigml/tests/create_library_steps.py +++ b/bigml/tests/create_library_steps.py @@ -1,7 +1,7 @@ # -*- coding: utf-8 -*- #pylint: disable=locally-disabled,unused-argument,no-member # -# Copyright 2015-2023 BigML +# Copyright 2015-2025 BigML # # Licensed under the Apache License, Version 2.0 (the "License"); you may # not use this file except in compliance with the License. You may obtain diff --git a/bigml/tests/create_linear_steps.py b/bigml/tests/create_linear_steps.py index b62c41d7..88fae1b9 100644 --- a/bigml/tests/create_linear_steps.py +++ b/bigml/tests/create_linear_steps.py @@ -1,7 +1,7 @@ # -*- coding: utf-8 -*- #pylint: disable=locally-disabled,unused-argument,no-member # -# Copyright 2019-2023 BigML +# Copyright 2019-2025 BigML # # Licensed under the Apache License, Version 2.0 (the "License"); you may # not use this file except in compliance with the License. You may obtain diff --git a/bigml/tests/create_model_steps.py b/bigml/tests/create_model_steps.py index b95893fb..811daf30 100644 --- a/bigml/tests/create_model_steps.py +++ b/bigml/tests/create_model_steps.py @@ -1,7 +1,7 @@ # -*- coding: utf-8 -*- #pylint: disable=locally-disabled,unused-argument,no-member # -# Copyright 2012-2023 BigML +# Copyright 2012-2025 BigML # # Licensed under the Apache License, Version 2.0 (the "License"); you may # not use this file except in compliance with the License. You may obtain diff --git a/bigml/tests/create_multimodel_steps.py b/bigml/tests/create_multimodel_steps.py index ffc5fbf8..7fe82a82 100644 --- a/bigml/tests/create_multimodel_steps.py +++ b/bigml/tests/create_multimodel_steps.py @@ -1,7 +1,7 @@ # -*- coding: utf-8 -*- #pylint: disable=locally-disabled,unused-argument,no-member # -# Copyright 2014-2023 BigML +# Copyright 2014-2025 BigML # # Licensed under the Apache License, Version 2.0 (the "License"); you may # not use this file except in compliance with the License. You may obtain diff --git a/bigml/tests/create_pca_steps.py b/bigml/tests/create_pca_steps.py index 0772fb11..c5a8ff09 100644 --- a/bigml/tests/create_pca_steps.py +++ b/bigml/tests/create_pca_steps.py @@ -1,7 +1,7 @@ # -*- coding: utf-8 -*- #pylint: disable=locally-disabled,unused-argument,no-member # -# Copyright 2018-2023 BigML +# Copyright 2018-2025 BigML # # Licensed under the Apache License, Version 2.0 (the "License"); you may # not use this file except in compliance with the License. You may obtain diff --git a/bigml/tests/create_prediction_steps.py b/bigml/tests/create_prediction_steps.py index 3432c512..978d577c 100644 --- a/bigml/tests/create_prediction_steps.py +++ b/bigml/tests/create_prediction_steps.py @@ -1,7 +1,7 @@ # -*- coding: utf-8 -*- #pylint: disable=locally-disabled,unused-argument,no-member # -# Copyright 2015-2023 BigML +# Copyright 2015-2025 BigML # # Licensed under the Apache License, Version 2.0 (the "License"); you may # not use this file except in compliance with the License. You may obtain diff --git a/bigml/tests/create_project_steps.py b/bigml/tests/create_project_steps.py index f28ee969..3d997bfe 100644 --- a/bigml/tests/create_project_steps.py +++ b/bigml/tests/create_project_steps.py @@ -1,7 +1,7 @@ # -*- coding: utf-8 -*- #pylint: disable=locally-disabled,unused-argument,no-member # -# Copyright 2014-2023 BigML +# Copyright 2014-2025 BigML # # Licensed under the Apache License, Version 2.0 (the "License"); you may # not use this file except in compliance with the License. You may obtain diff --git a/bigml/tests/create_projection_steps.py b/bigml/tests/create_projection_steps.py index 3d9be145..92df6cb7 100644 --- a/bigml/tests/create_projection_steps.py +++ b/bigml/tests/create_projection_steps.py @@ -1,7 +1,7 @@ # -*- coding: utf-8 -*- #pylint: disable=locally-disabled,unused-argument # -# Copyright 2018-2023 BigML +# Copyright 2018-2025 BigML # # Licensed under the Apache License, Version 2.0 (the "License"); you may # not use this file except in compliance with the License. You may obtain diff --git a/bigml/tests/create_sample_steps.py b/bigml/tests/create_sample_steps.py index 2a9029be..8f451f4b 100644 --- a/bigml/tests/create_sample_steps.py +++ b/bigml/tests/create_sample_steps.py @@ -1,7 +1,7 @@ # -*- coding: utf-8 -*- #pylint: disable=locally-disabled,unused-argument,no-member # -# Copyright 2015-2023 BigML +# Copyright 2015-2025 BigML # # Licensed under the Apache License, Version 2.0 (the "License"); you may # not use this file except in compliance with the License. You may obtain diff --git a/bigml/tests/create_script_steps.py b/bigml/tests/create_script_steps.py index 3cebabab..cb7ab4ed 100644 --- a/bigml/tests/create_script_steps.py +++ b/bigml/tests/create_script_steps.py @@ -1,7 +1,7 @@ # -*- coding: utf-8 -*- #pylint: disable=locally-disabled,unused-argument,no-member # -# Copyright 2015-2023 BigML +# Copyright 2015-2025 BigML # # Licensed under the Apache License, Version 2.0 (the "License"); you may # not use this file except in compliance with the License. You may obtain diff --git a/bigml/tests/create_source_steps.py b/bigml/tests/create_source_steps.py index 7ff1b3c7..3eac296a 100644 --- a/bigml/tests/create_source_steps.py +++ b/bigml/tests/create_source_steps.py @@ -1,7 +1,7 @@ # -*- coding: utf-8 -*- #pylint: disable=locally-disabled,unused-argument,no-member # -# Copyright 2012, 2015-2023 BigML +# Copyright 2012, 2015-2025 BigML # # Licensed under the Apache License, Version 2.0 (the "License"); you may # not use this file except in compliance with the License. You may obtain diff --git a/bigml/tests/create_statistical_tst_steps.py b/bigml/tests/create_statistical_tst_steps.py index 88774bc0..44e76dd4 100644 --- a/bigml/tests/create_statistical_tst_steps.py +++ b/bigml/tests/create_statistical_tst_steps.py @@ -1,7 +1,7 @@ # -*- coding: utf-8 -*- #pylint: disable=locally-disabled,unused-argument,no-member # -# Copyright 2015-2023 BigML +# Copyright 2015-2025 BigML # # Licensed under the Apache License, Version 2.0 (the "License"); you may # not use this file except in compliance with the License. You may obtain diff --git a/bigml/tests/create_time_series_steps.py b/bigml/tests/create_time_series_steps.py index 06e9fc82..d12fc2c8 100644 --- a/bigml/tests/create_time_series_steps.py +++ b/bigml/tests/create_time_series_steps.py @@ -1,7 +1,7 @@ # -*- coding: utf-8 -*- #pylint: disable=locally-disabled,unused-argument,no-member # -# Copyright 2017-2023 BigML +# Copyright 2017-2025 BigML # # Licensed under the Apache License, Version 2.0 (the "License"); you may # not use this file except in compliance with the License. You may obtain diff --git a/bigml/tests/delete_project_steps.py b/bigml/tests/delete_project_steps.py index 83be7f13..49d6ddb6 100644 --- a/bigml/tests/delete_project_steps.py +++ b/bigml/tests/delete_project_steps.py @@ -1,7 +1,7 @@ # -*- coding: utf-8 -*- #pylint: disable=locally-disabled,unused-argument,no-member,broad-except # -# Copyright 2014-2023 BigML +# Copyright 2014-2025 BigML # # Licensed under the Apache License, Version 2.0 (the "License"); you may # not use this file except in compliance with the License. You may obtain diff --git a/bigml/tests/fields_steps.py b/bigml/tests/fields_steps.py index 4df3e12e..59336ea5 100644 --- a/bigml/tests/fields_steps.py +++ b/bigml/tests/fields_steps.py @@ -1,7 +1,7 @@ # -*- coding: utf-8 -*- #pylint: disable=locally-disabled,unused-argument,no-member # -# Copyright 2015-2023 BigML +# Copyright 2015-2025 BigML # # Licensed under the Apache License, Version 2.0 (the "License"); you may # not use this file except in compliance with the License. You may obtain diff --git a/bigml/tests/inspect_model_steps.py b/bigml/tests/inspect_model_steps.py index 8b2bd637..a13c90ac 100644 --- a/bigml/tests/inspect_model_steps.py +++ b/bigml/tests/inspect_model_steps.py @@ -1,7 +1,7 @@ # -*- coding: utf-8 -*- #pylint: disable=locally-disabled,unused-argument,no-member # -# Copyright 2012, 2015-2023 BigML +# Copyright 2012, 2015-2025 BigML # # Licensed under the Apache License, Version 2.0 (the "License"); you may # not use this file except in compliance with the License. You may obtain diff --git a/bigml/tests/read_dataset_steps.py b/bigml/tests/read_dataset_steps.py index b1f37e89..026b361c 100644 --- a/bigml/tests/read_dataset_steps.py +++ b/bigml/tests/read_dataset_steps.py @@ -1,7 +1,7 @@ # -*- coding: utf-8 -*- #pylint: disable=locally-disabled,no-member # -# Copyright 2012-2023 BigML +# Copyright 2012-2025 BigML # # Licensed under the Apache License, Version 2.0 (the "License"); you may # not use this file except in compliance with the License. You may obtain diff --git a/bigml/tests/read_resource_steps.py b/bigml/tests/read_resource_steps.py index d5f5070d..d406b8d6 100644 --- a/bigml/tests/read_resource_steps.py +++ b/bigml/tests/read_resource_steps.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- # -# Copyright 2014-2023 BigML +# Copyright 2014-2025 BigML # # Licensed under the Apache License, Version 2.0 (the "License"); you may # not use this file except in compliance with the License. You may obtain diff --git a/bigml/tests/test_01_prediction.py b/bigml/tests/test_01_prediction.py index 9d416e91..7a97fd6d 100644 --- a/bigml/tests/test_01_prediction.py +++ b/bigml/tests/test_01_prediction.py @@ -2,7 +2,7 @@ #pylint: disable=locally-disabled,line-too-long,attribute-defined-outside-init #pylint: disable=locally-disabled,unused-import # -# Copyright 2015-2023 BigML +# Copyright 2015-2025 BigML # # Licensed under the Apache License, Version 2.0 (the "License"); you may # not use this file except in compliance with the License. You may obtain diff --git a/bigml/tests/test_03_local_prediction.py b/bigml/tests/test_03_local_prediction.py index 04cbb06a..e746accd 100644 --- a/bigml/tests/test_03_local_prediction.py +++ b/bigml/tests/test_03_local_prediction.py @@ -2,7 +2,7 @@ #pylint: disable=locally-disabled,line-too-long,attribute-defined-outside-init #pylint: disable=locally-disabled,unused-import # -# Copyright 2015-2023 BigML +# Copyright 2015-2025 BigML # # Licensed under the Apache License, Version 2.0 (the "License"); you may # not use this file except in compliance with the License. You may obtain diff --git a/bigml/tests/test_04_multivote_prediction.py b/bigml/tests/test_04_multivote_prediction.py index 2ec448dd..b66f5abd 100644 --- a/bigml/tests/test_04_multivote_prediction.py +++ b/bigml/tests/test_04_multivote_prediction.py @@ -2,7 +2,7 @@ #pylint: disable=locally-disabled,line-too-long,attribute-defined-outside-init #pylint: disable=locally-disabled,unused-import # -# Copyright 2015-2023 BigML +# Copyright 2015-2025 BigML # # Licensed under the Apache License, Version 2.0 (the "License"); you may # not use this file except in compliance with the License. You may obtain diff --git a/bigml/tests/test_05_compare_predictions.py b/bigml/tests/test_05_compare_predictions.py index c9a8f1df..7cebde55 100644 --- a/bigml/tests/test_05_compare_predictions.py +++ b/bigml/tests/test_05_compare_predictions.py @@ -2,7 +2,7 @@ #pylint: disable=locally-disabled,line-too-long,attribute-defined-outside-init #pylint: disable=locally-disabled,unused-import # -# Copyright 2015-2023 BigML +# Copyright 2015-2025 BigML # # Licensed under the Apache License, Version 2.0 (the "License"); you may # not use this file except in compliance with the License. You may obtain diff --git a/bigml/tests/test_05_compare_predictions_b.py b/bigml/tests/test_05_compare_predictions_b.py index 379a16fc..65097657 100644 --- a/bigml/tests/test_05_compare_predictions_b.py +++ b/bigml/tests/test_05_compare_predictions_b.py @@ -2,7 +2,7 @@ #pylint: disable=locally-disabled,line-too-long,attribute-defined-outside-init #pylint: disable=locally-disabled,unused-import # -# Copyright 2015-2023 BigML +# Copyright 2015-2025 BigML # # Licensed under the Apache License, Version 2.0 (the "License"); you may # not use this file except in compliance with the License. You may obtain diff --git a/bigml/tests/test_06_batch_predictions.py b/bigml/tests/test_06_batch_predictions.py index 1716589c..89266f8b 100644 --- a/bigml/tests/test_06_batch_predictions.py +++ b/bigml/tests/test_06_batch_predictions.py @@ -2,7 +2,7 @@ #pylint: disable=locally-disabled,line-too-long,attribute-defined-outside-init #pylint: disable=locally-disabled,unused-import # -# Copyright 2015-2023 BigML +# Copyright 2015-2025 BigML # # Licensed under the Apache License, Version 2.0 (the "License"); you may # not use this file except in compliance with the License. You may obtain diff --git a/bigml/tests/test_07_multimodel_batch_predictions.py b/bigml/tests/test_07_multimodel_batch_predictions.py index 0a0838e6..a19ea4ca 100644 --- a/bigml/tests/test_07_multimodel_batch_predictions.py +++ b/bigml/tests/test_07_multimodel_batch_predictions.py @@ -2,7 +2,7 @@ #pylint: disable=locally-disabled,line-too-long,attribute-defined-outside-init #pylint: disable=locally-disabled,unused-import # -# Copyright 2015-2023 BigML +# Copyright 2015-2025 BigML # # Licensed under the Apache License, Version 2.0 (the "License"); you may # not use this file except in compliance with the License. You may obtain diff --git a/bigml/tests/test_08_multimodel.py b/bigml/tests/test_08_multimodel.py index 78b3e30a..c9ac4d1b 100644 --- a/bigml/tests/test_08_multimodel.py +++ b/bigml/tests/test_08_multimodel.py @@ -2,7 +2,7 @@ #pylint: disable=locally-disabled,line-too-long,attribute-defined-outside-init #pylint: disable=locally-disabled,unused-import # -# Copyright 2015-2023 BigML +# Copyright 2015-2025 BigML # # Licensed under the Apache License, Version 2.0 (the "License"); you may # not use this file except in compliance with the License. You may obtain diff --git a/bigml/tests/test_09_ensemble_prediction.py b/bigml/tests/test_09_ensemble_prediction.py index ec11cb1e..52b06872 100644 --- a/bigml/tests/test_09_ensemble_prediction.py +++ b/bigml/tests/test_09_ensemble_prediction.py @@ -2,7 +2,7 @@ #pylint: disable=locally-disabled,line-too-long,attribute-defined-outside-init #pylint: disable=locally-disabled,unused-import # -# Copyright 2015-2023 BigML +# Copyright 2015-2025 BigML # # Licensed under the Apache License, Version 2.0 (the "License"); you may # not use this file except in compliance with the License. You may obtain diff --git a/bigml/tests/test_10_local_ensemble_prediction.py b/bigml/tests/test_10_local_ensemble_prediction.py index 997ff0db..2e35f1b0 100644 --- a/bigml/tests/test_10_local_ensemble_prediction.py +++ b/bigml/tests/test_10_local_ensemble_prediction.py @@ -2,7 +2,7 @@ #pylint: disable=locally-disabled,line-too-long,attribute-defined-outside-init #pylint: disable=locally-disabled,unused-import # -# Copyright 2015-2023 BigML +# Copyright 2015-2025 BigML # # Licensed under the Apache License, Version 2.0 (the "License"); you may # not use this file except in compliance with the License. You may obtain diff --git a/bigml/tests/test_11_multimodel_prediction.py b/bigml/tests/test_11_multimodel_prediction.py index b8c2699b..23021c1d 100644 --- a/bigml/tests/test_11_multimodel_prediction.py +++ b/bigml/tests/test_11_multimodel_prediction.py @@ -2,7 +2,7 @@ #pylint: disable=locally-disabled,line-too-long,attribute-defined-outside-init #pylint: disable=locally-disabled,unused-import # -# Copyright 2015-2023 BigML +# Copyright 2015-2025 BigML # # Licensed under the Apache License, Version 2.0 (the "License"); you may # not use this file except in compliance with the License. You may obtain diff --git a/bigml/tests/test_12_public_model_prediction.py b/bigml/tests/test_12_public_model_prediction.py index 8d52d974..cbfe2e36 100644 --- a/bigml/tests/test_12_public_model_prediction.py +++ b/bigml/tests/test_12_public_model_prediction.py @@ -2,7 +2,7 @@ #pylint: disable=locally-disabled,line-too-long,attribute-defined-outside-init #pylint: disable=locally-disabled,unused-import # -# Copyright 2015-2023 BigML +# Copyright 2015-2025 BigML # # Licensed under the Apache License, Version 2.0 (the "License"); you may # not use this file except in compliance with the License. You may obtain diff --git a/bigml/tests/test_13_public_dataset.py b/bigml/tests/test_13_public_dataset.py index 145dcaf7..94657661 100644 --- a/bigml/tests/test_13_public_dataset.py +++ b/bigml/tests/test_13_public_dataset.py @@ -2,7 +2,7 @@ #pylint: disable=locally-disabled,line-too-long,attribute-defined-outside-init #pylint: disable=locally-disabled,unused-import # -# Copyright 2015-2023 BigML +# Copyright 2015-2025 BigML # # Licensed under the Apache License, Version 2.0 (the "License"); you may # not use this file except in compliance with the License. You may obtain diff --git a/bigml/tests/test_14_create_evaluations.py b/bigml/tests/test_14_create_evaluations.py index 805be46d..093dc638 100644 --- a/bigml/tests/test_14_create_evaluations.py +++ b/bigml/tests/test_14_create_evaluations.py @@ -2,7 +2,7 @@ #pylint: disable=locally-disabled,line-too-long,attribute-defined-outside-init #pylint: disable=locally-disabled,unused-import # -# Copyright 2015-2023 BigML +# Copyright 2015-2025 BigML # # Licensed under the Apache License, Version 2.0 (the "License"); you may # not use this file except in compliance with the License. You may obtain diff --git a/bigml/tests/test_15_download.py b/bigml/tests/test_15_download.py index bfba8f70..415257e2 100644 --- a/bigml/tests/test_15_download.py +++ b/bigml/tests/test_15_download.py @@ -2,7 +2,7 @@ #pylint: disable=locally-disabled,line-too-long,attribute-defined-outside-init #pylint: disable=locally-disabled,unused-import # -# Copyright 2015-2023 BigML +# Copyright 2015-2025 BigML # # Licensed under the Apache License, Version 2.0 (the "License"); you may # not use this file except in compliance with the License. You may obtain diff --git a/bigml/tests/test_16_sample_dataset.py b/bigml/tests/test_16_sample_dataset.py index 46c6280c..186b76ef 100644 --- a/bigml/tests/test_16_sample_dataset.py +++ b/bigml/tests/test_16_sample_dataset.py @@ -2,7 +2,7 @@ #pylint: disable=locally-disabled,line-too-long,attribute-defined-outside-init #pylint: disable=locally-disabled,unused-import # -# Copyright 2015-2023 BigML +# Copyright 2015-2025 BigML # # Licensed under the Apache License, Version 2.0 (the "License"); you may # not use this file except in compliance with the License. You may obtain diff --git a/bigml/tests/test_17_split_dataset.py b/bigml/tests/test_17_split_dataset.py index 73056e4f..c570ea12 100644 --- a/bigml/tests/test_17_split_dataset.py +++ b/bigml/tests/test_17_split_dataset.py @@ -2,7 +2,7 @@ #pylint: disable=locally-disabled,line-too-long,attribute-defined-outside-init #pylint: disable=locally-disabled,unused-import # -# Copyright 2015-2023 BigML +# Copyright 2015-2025 BigML # # Licensed under the Apache License, Version 2.0 (the "License"); you may # not use this file except in compliance with the License. You may obtain diff --git a/bigml/tests/test_18_create_anomaly.py b/bigml/tests/test_18_create_anomaly.py index 33e4098e..b38adfa6 100644 --- a/bigml/tests/test_18_create_anomaly.py +++ b/bigml/tests/test_18_create_anomaly.py @@ -2,7 +2,7 @@ #pylint: disable=locally-disabled,line-too-long,attribute-defined-outside-init #pylint: disable=locally-disabled,unused-import # -# Copyright 2015-2023 BigML +# Copyright 2015-2025 BigML # # Licensed under the Apache License, Version 2.0 (the "License"); you may # not use this file except in compliance with the License. You may obtain diff --git a/bigml/tests/test_19_missing_and_errors.py b/bigml/tests/test_19_missing_and_errors.py index d8c41968..22326c08 100644 --- a/bigml/tests/test_19_missing_and_errors.py +++ b/bigml/tests/test_19_missing_and_errors.py @@ -2,7 +2,7 @@ #pylint: disable=locally-disabled,line-too-long,attribute-defined-outside-init #pylint: disable=locally-disabled,unused-import # -# Copyright 2015-2023 BigML +# Copyright 2015-2025 BigML # # Licensed under the Apache License, Version 2.0 (the "License"); you may # not use this file except in compliance with the License. You may obtain diff --git a/bigml/tests/test_20_rename_duplicated_names.py b/bigml/tests/test_20_rename_duplicated_names.py index dadf9d30..ac2def75 100644 --- a/bigml/tests/test_20_rename_duplicated_names.py +++ b/bigml/tests/test_20_rename_duplicated_names.py @@ -2,7 +2,7 @@ #pylint: disable=locally-disabled,line-too-long,attribute-defined-outside-init #pylint: disable=locally-disabled,unused-import # -# Copyright 2015-2023 BigML +# Copyright 2015-2025 BigML # # Licensed under the Apache License, Version 2.0 (the "License"); you may # not use this file except in compliance with the License. You may obtain diff --git a/bigml/tests/test_21_projects.py b/bigml/tests/test_21_projects.py index 2c61acc1..b58f6d0a 100644 --- a/bigml/tests/test_21_projects.py +++ b/bigml/tests/test_21_projects.py @@ -2,7 +2,7 @@ #pylint: disable=locally-disabled,line-too-long,attribute-defined-outside-init #pylint: disable=locally-disabled,unused-import # -# Copyright 2015-2023 BigML +# Copyright 2015-2025 BigML # # Licensed under the Apache License, Version 2.0 (the "License"); you may # not use this file except in compliance with the License. You may obtain diff --git a/bigml/tests/test_22_source_args.py b/bigml/tests/test_22_source_args.py index 7c2b6e3a..478a0959 100644 --- a/bigml/tests/test_22_source_args.py +++ b/bigml/tests/test_22_source_args.py @@ -2,7 +2,7 @@ #pylint: disable=locally-disabled,line-too-long,attribute-defined-outside-init #pylint: disable=locally-disabled,unused-import,no-member # -# Copyright 2015-2023 BigML +# Copyright 2015-2025 BigML # # Licensed under the Apache License, Version 2.0 (the "License"); you may # not use this file except in compliance with the License. You may obtain diff --git a/bigml/tests/test_23_local_model_info.py b/bigml/tests/test_23_local_model_info.py index 18e82a48..8ee0ac97 100644 --- a/bigml/tests/test_23_local_model_info.py +++ b/bigml/tests/test_23_local_model_info.py @@ -2,7 +2,7 @@ #pylint: disable=locally-disabled,line-too-long,attribute-defined-outside-init #pylint: disable=locally-disabled,unused-import # -# Copyright 2015-2023 BigML +# Copyright 2015-2025 BigML # # Licensed under the Apache License, Version 2.0 (the "License"); you may # not use this file except in compliance with the License. You may obtain diff --git a/bigml/tests/test_24_cluster_derived.py b/bigml/tests/test_24_cluster_derived.py index e2e7d588..5e565463 100644 --- a/bigml/tests/test_24_cluster_derived.py +++ b/bigml/tests/test_24_cluster_derived.py @@ -2,7 +2,7 @@ #pylint: disable=locally-disabled,line-too-long,attribute-defined-outside-init #pylint: disable=locally-disabled,unused-import # -# Copyright 2015-2023 BigML +# Copyright 2015-2025 BigML # # Licensed under the Apache License, Version 2.0 (the "License"); you may # not use this file except in compliance with the License. You may obtain diff --git a/bigml/tests/test_25_correlation.py b/bigml/tests/test_25_correlation.py index 5812bf32..27f4c029 100644 --- a/bigml/tests/test_25_correlation.py +++ b/bigml/tests/test_25_correlation.py @@ -2,7 +2,7 @@ #pylint: disable=locally-disabled,line-too-long,attribute-defined-outside-init #pylint: disable=locally-disabled,unused-import # -# Copyright 2015-2023 BigML +# Copyright 2015-2025 BigML # # Licensed under the Apache License, Version 2.0 (the "License"); you may # not use this file except in compliance with the License. You may obtain diff --git a/bigml/tests/test_26_statistical_test.py b/bigml/tests/test_26_statistical_test.py index 332e9988..b09ebd48 100644 --- a/bigml/tests/test_26_statistical_test.py +++ b/bigml/tests/test_26_statistical_test.py @@ -2,7 +2,7 @@ #pylint: disable=locally-disabled,line-too-long,attribute-defined-outside-init #pylint: disable=locally-disabled,unused-import # -# Copyright 2015-2023 BigML +# Copyright 2015-2025 BigML # # Licensed under the Apache License, Version 2.0 (the "License"); you may # not use this file except in compliance with the License. You may obtain diff --git a/bigml/tests/test_27_fields.py b/bigml/tests/test_27_fields.py index a6b0892f..bd461f04 100644 --- a/bigml/tests/test_27_fields.py +++ b/bigml/tests/test_27_fields.py @@ -2,7 +2,7 @@ #pylint: disable=locally-disabled,line-too-long,attribute-defined-outside-init #pylint: disable=locally-disabled,unused-import # -# Copyright 2015-2023 BigML +# Copyright 2015-2025 BigML # # Licensed under the Apache License, Version 2.0 (the "License"); you may # not use this file except in compliance with the License. You may obtain diff --git a/bigml/tests/test_28_association.py b/bigml/tests/test_28_association.py index 60b3015f..7e5bec63 100644 --- a/bigml/tests/test_28_association.py +++ b/bigml/tests/test_28_association.py @@ -2,7 +2,7 @@ #pylint: disable=locally-disabled,line-too-long,attribute-defined-outside-init #pylint: disable=locally-disabled,unused-import # -# Copyright 2015-2023 BigML +# Copyright 2015-2025 BigML # # Licensed under the Apache License, Version 2.0 (the "License"); you may # not use this file except in compliance with the License. You may obtain diff --git a/bigml/tests/test_29_script.py b/bigml/tests/test_29_script.py index 0bd8e7bc..eb5bc752 100644 --- a/bigml/tests/test_29_script.py +++ b/bigml/tests/test_29_script.py @@ -2,7 +2,7 @@ #pylint: disable=locally-disabled,line-too-long,attribute-defined-outside-init #pylint: disable=locally-disabled,unused-import # -# Copyright 2015-2023 BigML +# Copyright 2015-2025 BigML # # Licensed under the Apache License, Version 2.0 (the "License"); you may # not use this file except in compliance with the License. You may obtain diff --git a/bigml/tests/test_30_execution.py b/bigml/tests/test_30_execution.py index 17d8d25b..e1864d5c 100644 --- a/bigml/tests/test_30_execution.py +++ b/bigml/tests/test_30_execution.py @@ -2,7 +2,7 @@ #pylint: disable=locally-disabled,line-too-long,attribute-defined-outside-init #pylint: disable=locally-disabled,unused-import # -# Copyright 2015-2023 BigML +# Copyright 2015-2025 BigML # # Licensed under the Apache License, Version 2.0 (the "License"); you may # not use this file except in compliance with the License. You may obtain diff --git a/bigml/tests/test_31_library.py b/bigml/tests/test_31_library.py index be877fe0..9de406c8 100644 --- a/bigml/tests/test_31_library.py +++ b/bigml/tests/test_31_library.py @@ -2,7 +2,7 @@ #pylint: disable=locally-disabled,line-too-long,attribute-defined-outside-init #pylint: disable=locally-disabled,unused-import # -# Copyright 2015-2023 BigML +# Copyright 2015-2025 BigML # # Licensed under the Apache License, Version 2.0 (the "License"); you may # not use this file except in compliance with the License. You may obtain diff --git a/bigml/tests/test_32_topic_model_prediction.py b/bigml/tests/test_32_topic_model_prediction.py index f1899f09..fd26e407 100644 --- a/bigml/tests/test_32_topic_model_prediction.py +++ b/bigml/tests/test_32_topic_model_prediction.py @@ -2,7 +2,7 @@ #pylint: disable=locally-disabled,line-too-long,attribute-defined-outside-init #pylint: disable=locally-disabled,unused-import # -# Copyright 2016-2023 BigML +# Copyright 2016-2025 BigML # # Licensed under the Apache License, Version 2.0 (the "License"); you may # not use this file except in compliance with the License. You may obtain diff --git a/bigml/tests/test_33_compare_predictions.py b/bigml/tests/test_33_compare_predictions.py index 6dc0bf72..cf322c36 100644 --- a/bigml/tests/test_33_compare_predictions.py +++ b/bigml/tests/test_33_compare_predictions.py @@ -2,7 +2,7 @@ #pylint: disable=locally-disabled,line-too-long,attribute-defined-outside-init #pylint: disable=locally-disabled,unused-import # -# Copyright 2015-2023 BigML +# Copyright 2015-2025 BigML # # Licensed under the Apache License, Version 2.0 (the "License"); you may # not use this file except in compliance with the License. You may obtain diff --git a/bigml/tests/test_34_time_series.py b/bigml/tests/test_34_time_series.py index ebf997af..4b5fb472 100644 --- a/bigml/tests/test_34_time_series.py +++ b/bigml/tests/test_34_time_series.py @@ -2,7 +2,7 @@ #pylint: disable=locally-disabled,line-too-long,attribute-defined-outside-init #pylint: disable=locally-disabled,unused-import # -# Copyright 2017-2023 BigML +# Copyright 2017-2025 BigML # # Licensed under the Apache License, Version 2.0 (the "License"); you may # not use this file except in compliance with the License. You may obtain diff --git a/bigml/tests/test_35_b_compare_predictions.py b/bigml/tests/test_35_b_compare_predictions.py index 0002b9f9..7b768ff6 100644 --- a/bigml/tests/test_35_b_compare_predictions.py +++ b/bigml/tests/test_35_b_compare_predictions.py @@ -2,7 +2,7 @@ #pylint: disable=locally-disabled,line-too-long,attribute-defined-outside-init #pylint: disable=locally-disabled,unused-import # -# Copyright 2017-2023 BigML +# Copyright 2017-2025 BigML # # Licensed under the Apache License, Version 2.0 (the "License"); you may # not use this file except in compliance with the License. You may obtain diff --git a/bigml/tests/test_35_c_compare_predictions.py b/bigml/tests/test_35_c_compare_predictions.py index e45e5b87..0a39e66d 100644 --- a/bigml/tests/test_35_c_compare_predictions.py +++ b/bigml/tests/test_35_c_compare_predictions.py @@ -2,7 +2,7 @@ #pylint: disable=locally-disabled,line-too-long,attribute-defined-outside-init #pylint: disable=locally-disabled,unused-import # -# Copyright 2017-2023 BigML +# Copyright 2017-2025 BigML # # Licensed under the Apache License, Version 2.0 (the "License"); you may # not use this file except in compliance with the License. You may obtain diff --git a/bigml/tests/test_35_compare_predictions.py b/bigml/tests/test_35_compare_predictions.py index 16c54176..248b9520 100644 --- a/bigml/tests/test_35_compare_predictions.py +++ b/bigml/tests/test_35_compare_predictions.py @@ -2,7 +2,7 @@ #pylint: disable=locally-disabled,line-too-long,attribute-defined-outside-init #pylint: disable=locally-disabled,unused-import # -# Copyright 2017-2023 BigML +# Copyright 2017-2025 BigML # # Licensed under the Apache License, Version 2.0 (the "License"); you may # not use this file except in compliance with the License. You may obtain diff --git a/bigml/tests/test_35_d_compare_predictions.py b/bigml/tests/test_35_d_compare_predictions.py index 023d3830..442ac2cf 100644 --- a/bigml/tests/test_35_d_compare_predictions.py +++ b/bigml/tests/test_35_d_compare_predictions.py @@ -2,7 +2,7 @@ #pylint: disable=locally-disabled,line-too-long,attribute-defined-outside-init #pylint: disable=locally-disabled,unused-import # -# Copyright 2017-2023 BigML +# Copyright 2017-2025 BigML # # Licensed under the Apache License, Version 2.0 (the "License"); you may # not use this file except in compliance with the License. You may obtain diff --git a/bigml/tests/test_35_e_compare_predictions.py b/bigml/tests/test_35_e_compare_predictions.py index bd408698..b998b1a4 100644 --- a/bigml/tests/test_35_e_compare_predictions.py +++ b/bigml/tests/test_35_e_compare_predictions.py @@ -2,7 +2,7 @@ #pylint: disable=locally-disabled,line-too-long,attribute-defined-outside-init #pylint: disable=locally-disabled,unused-import # -# Copyright 2017-2023 BigML +# Copyright 2017-2025 BigML # # Licensed under the Apache License, Version 2.0 (the "License"); you may # not use this file except in compliance with the License. You may obtain diff --git a/bigml/tests/test_36_compare_predictions.py b/bigml/tests/test_36_compare_predictions.py index 4be945a4..c8a76e3d 100644 --- a/bigml/tests/test_36_compare_predictions.py +++ b/bigml/tests/test_36_compare_predictions.py @@ -2,7 +2,7 @@ #pylint: disable=locally-disabled,line-too-long,attribute-defined-outside-init #pylint: disable=locally-disabled,unused-import # -# Copyright 2017-2023 BigML +# Copyright 2017-2025 BigML # # Licensed under the Apache License, Version 2.0 (the "License"); you may # not use this file except in compliance with the License. You may obtain diff --git a/bigml/tests/test_37_configuration.py b/bigml/tests/test_37_configuration.py index fecd0da3..1c4ba9ac 100644 --- a/bigml/tests/test_37_configuration.py +++ b/bigml/tests/test_37_configuration.py @@ -2,7 +2,7 @@ #pylint: disable=locally-disabled,line-too-long,attribute-defined-outside-init #pylint: disable=locally-disabled,unused-import # -# Copyright 2015-2023 BigML +# Copyright 2015-2025 BigML # # Licensed under the Apache License, Version 2.0 (the "License"); you may # not use this file except in compliance with the License. You may obtain diff --git a/bigml/tests/test_38_organization.py b/bigml/tests/test_38_organization.py index 2d699fc8..4187a474 100644 --- a/bigml/tests/test_38_organization.py +++ b/bigml/tests/test_38_organization.py @@ -2,7 +2,7 @@ #pylint: disable=locally-disabled,line-too-long,attribute-defined-outside-init #pylint: disable=locally-disabled,unused-import # -# Copyright 2018-2023 BigML +# Copyright 2018-2025 BigML # # Licensed under the Apache License, Version 2.0 (the "License"); you may # not use this file except in compliance with the License. You may obtain diff --git a/bigml/tests/test_38_project_connection.py b/bigml/tests/test_38_project_connection.py index 38a9aeba..7175d8a6 100644 --- a/bigml/tests/test_38_project_connection.py +++ b/bigml/tests/test_38_project_connection.py @@ -2,7 +2,7 @@ #pylint: disable=locally-disabled,line-too-long,attribute-defined-outside-init #pylint: disable=locally-disabled,unused-import,broad-except # -# Copyright 2018-2023 BigML +# Copyright 2018-2025 BigML # # Licensed under the Apache License, Version 2.0 (the "License"); you may # not use this file except in compliance with the License. You may obtain diff --git a/bigml/tests/test_39_optiml_fusion.py b/bigml/tests/test_39_optiml_fusion.py index 4273e371..0ff5992f 100644 --- a/bigml/tests/test_39_optiml_fusion.py +++ b/bigml/tests/test_39_optiml_fusion.py @@ -2,7 +2,7 @@ #pylint: disable=locally-disabled,line-too-long,attribute-defined-outside-init #pylint: disable=locally-disabled,unused-import # -# Copyright 2018-2023 BigML +# Copyright 2018-2025 BigML # # Licensed under the Apache License, Version 2.0 (the "License"); you may # not use this file except in compliance with the License. You may obtain diff --git a/bigml/tests/test_40_local_from_file.py b/bigml/tests/test_40_local_from_file.py index d2368812..c8311285 100644 --- a/bigml/tests/test_40_local_from_file.py +++ b/bigml/tests/test_40_local_from_file.py @@ -2,7 +2,7 @@ #pylint: disable=locally-disabled,line-too-long,attribute-defined-outside-init #pylint: disable=locally-disabled,unused-import # -# Copyright 2018-2023 BigML +# Copyright 2018-2025 BigML # # Licensed under the Apache License, Version 2.0 (the "License"); you may # not use this file except in compliance with the License. You may obtain diff --git a/bigml/tests/test_41_multidataset.py b/bigml/tests/test_41_multidataset.py index ad966e79..e0c8f1b3 100644 --- a/bigml/tests/test_41_multidataset.py +++ b/bigml/tests/test_41_multidataset.py @@ -2,7 +2,7 @@ #pylint: disable=locally-disabled,line-too-long,attribute-defined-outside-init #pylint: disable=locally-disabled,unused-import # -# Copyright 2018-2023 BigML +# Copyright 2018-2025 BigML # # Licensed under the Apache License, Version 2.0 (the "License"); you may # not use this file except in compliance with the License. You may obtain diff --git a/bigml/tests/test_42_pca.py b/bigml/tests/test_42_pca.py index ef67391d..706305bf 100644 --- a/bigml/tests/test_42_pca.py +++ b/bigml/tests/test_42_pca.py @@ -2,7 +2,7 @@ #pylint: disable=locally-disabled,line-too-long,attribute-defined-outside-init #pylint: disable=locally-disabled,unused-import # -# Copyright 2018-2023 BigML +# Copyright 2018-2025 BigML # # Licensed under the Apache License, Version 2.0 (the "License"); you may # not use this file except in compliance with the License. You may obtain diff --git a/bigml/tests/test_43_linear.py b/bigml/tests/test_43_linear.py index 6b8a3a65..a9a20ecb 100644 --- a/bigml/tests/test_43_linear.py +++ b/bigml/tests/test_43_linear.py @@ -2,7 +2,7 @@ #pylint: disable=locally-disabled,line-too-long,attribute-defined-outside-init #pylint: disable=locally-disabled,unused-import # -# Copyright 2019-2023 BigML +# Copyright 2019-2025 BigML # # Licensed under the Apache License, Version 2.0 (the "License"); you may # not use this file except in compliance with the License. You may obtain diff --git a/bigml/tests/test_44_compare_predictions.py b/bigml/tests/test_44_compare_predictions.py index 44e4868e..c50a6350 100644 --- a/bigml/tests/test_44_compare_predictions.py +++ b/bigml/tests/test_44_compare_predictions.py @@ -2,7 +2,7 @@ #pylint: disable=locally-disabled,line-too-long,attribute-defined-outside-init #pylint: disable=locally-disabled,unused-import # -# Copyright 2015-2023 BigML +# Copyright 2015-2025 BigML # # Licensed under the Apache License, Version 2.0 (the "License"); you may # not use this file except in compliance with the License. You may obtain diff --git a/bigml/tests/test_45_external_connector.py b/bigml/tests/test_45_external_connector.py index 28162ef7..deac2c94 100644 --- a/bigml/tests/test_45_external_connector.py +++ b/bigml/tests/test_45_external_connector.py @@ -2,7 +2,7 @@ #pylint: disable=locally-disabled,line-too-long,attribute-defined-outside-init #pylint: disable=locally-disabled,unused-import # -# Copyright 2018-2023 BigML +# Copyright 2018-2025 BigML # # Licensed under the Apache License, Version 2.0 (the "License"); you may # not use this file except in compliance with the License. You may obtain diff --git a/bigml/tests/test_47_webhooks.py b/bigml/tests/test_47_webhooks.py index 09e34fc0..3206f0ef 100644 --- a/bigml/tests/test_47_webhooks.py +++ b/bigml/tests/test_47_webhooks.py @@ -2,7 +2,7 @@ #pylint: disable=locally-disabled,line-too-long,attribute-defined-outside-init #pylint: disable=locally-disabled,unused-import,invalid-name # -# Copyright 2022-2023 BigML +# Copyright 2022-2025 BigML # # Licensed under the Apache License, Version 2.0 (the "License"); you may # not use this file except in compliance with the License. You may obtain diff --git a/bigml/tests/test_48_local_dataset.py b/bigml/tests/test_48_local_dataset.py index a57787d4..eabd52f1 100644 --- a/bigml/tests/test_48_local_dataset.py +++ b/bigml/tests/test_48_local_dataset.py @@ -2,7 +2,7 @@ #pylint: disable=locally-disabled,line-too-long,attribute-defined-outside-init #pylint: disable=locally-disabled,unused-import # -# Copyright 2022-2023 BigML +# Copyright 2022-2025 BigML # # Licensed under the Apache License, Version 2.0 (the "License"); you may # not use this file except in compliance with the License. You may obtain diff --git a/bigml/tests/test_49_local_pipeline.py b/bigml/tests/test_49_local_pipeline.py index 348bfd8f..651a87a3 100644 --- a/bigml/tests/test_49_local_pipeline.py +++ b/bigml/tests/test_49_local_pipeline.py @@ -2,7 +2,7 @@ #pylint: disable=locally-disabled,line-too-long,attribute-defined-outside-init #pylint: disable=locally-disabled,unused-import # -# Copyright 2022-2023 BigML +# Copyright 2022-2025 BigML # # Licensed under the Apache License, Version 2.0 (the "License"); you may # not use this file except in compliance with the License. You may obtain diff --git a/bigml/tests/test_99_cleaning.py b/bigml/tests/test_99_cleaning.py index 9339ec9a..1f80e98e 100644 --- a/bigml/tests/test_99_cleaning.py +++ b/bigml/tests/test_99_cleaning.py @@ -2,7 +2,7 @@ #pylint: disable=locally-disabled,line-too-long,attribute-defined-outside-init #pylint: disable=locally-disabled,unused-import,no-self-use # -# Copyright 2018-2023 BigML +# Copyright 2018-2025 BigML # # Licensed under the Apache License, Version 2.0 (the "License"); you may # not use this file except in compliance with the License. You may obtain diff --git a/bigml/tests/world.py b/bigml/tests/world.py index df52b101..f3c86ba2 100644 --- a/bigml/tests/world.py +++ b/bigml/tests/world.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- # -# Copyright 2015-2023 BigML +# Copyright 2015-2025 BigML # # Licensed under the Apache License, Version 2.0 (the "License"); you may # not use this file except in compliance with the License. You may obtain diff --git a/bigml/timeseries.py b/bigml/timeseries.py index d90b1edb..62c6b2f5 100644 --- a/bigml/timeseries.py +++ b/bigml/timeseries.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- # -# Copyright 2017-2023 BigML +# Copyright 2017-2025 BigML # # Licensed under the Apache License, Version 2.0 (the "License"); you may # not use this file except in compliance with the License. You may obtain diff --git a/bigml/topicmodel.py b/bigml/topicmodel.py index 8c2e56a7..abc87b5f 100644 --- a/bigml/topicmodel.py +++ b/bigml/topicmodel.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- # -# Copyright 2016-2023 BigML +# Copyright 2016-2025 BigML # # Licensed under the Apache License, Version 2.0 (the "License"); you may # not use this file except in compliance with the License. You may obtain diff --git a/bigml/tree_utils.py b/bigml/tree_utils.py index 110957a6..ed033dbf 100644 --- a/bigml/tree_utils.py +++ b/bigml/tree_utils.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- # -# Copyright 2017-2023 BigML +# Copyright 2017-2025 BigML # # Licensed under the Apache License, Version 2.0 (the "License"); you may # not use this file except in compliance with the License. You may obtain diff --git a/bigml/tsoutconstants.py b/bigml/tsoutconstants.py index 3f20f473..7903a6f6 100644 --- a/bigml/tsoutconstants.py +++ b/bigml/tsoutconstants.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- # -# Copyright 2017-2023 BigML +# Copyright 2017-2025 BigML # # Licensed under the Apache License, Version 2.0 (the "License"); you may # not use this file except in compliance with the License. You may obtain diff --git a/bigml/tssubmodels.py b/bigml/tssubmodels.py index 213f25ba..1e055af8 100644 --- a/bigml/tssubmodels.py +++ b/bigml/tssubmodels.py @@ -1,7 +1,7 @@ # -*- coding: utf-8 -*- #pylint: disable=invalid-name # -# Copyright 2017-2023 BigML +# Copyright 2017-2025 BigML # # Licensed under the Apache License, Version 2.0 (the "License"); you may # not use this file except in compliance with the License. You may obtain diff --git a/bigml/util.py b/bigml/util.py index 6d81a847..df6b5d67 100644 --- a/bigml/util.py +++ b/bigml/util.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- # -# Copyright 2012-2023 BigML +# Copyright 2012-2025 BigML # # Licensed under the Apache License, Version 2.0 (the "License"); you may # not use this file except in compliance with the License. You may obtain diff --git a/bigml/webhooks.py b/bigml/webhooks.py index a5db5abe..a1f762e5 100644 --- a/bigml/webhooks.py +++ b/bigml/webhooks.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- # -# Copyright 2022-2023 BigML +# Copyright 2022-2025 BigML # # Licensed under the Apache License, Version 2.0 (the "License"); you may # not use this file except in compliance with the License. You may obtain diff --git a/setup.py b/setup.py index 2bcdd013..adcd3b08 100644 --- a/setup.py +++ b/setup.py @@ -1,6 +1,6 @@ #!/usr/bin/env python # -# Copyright 2012-2023 BigML, Inc +# Copyright 2012-2025 BigML, Inc # # Licensed under the Apache License, Version 2.0 (the "License"); you may # not use this file except in compliance with the License. You may obtain From 2eba398adccd0ba5398633e1daf920c3c70d0db7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Merc=C3=A8=20Mart=C3=ADn=20Prats?= Date: Fri, 21 Mar 2025 13:43:32 +0100 Subject: [PATCH 10/19] Improving composite annotations uploads process --- HISTORY.rst | 6 ++++++ bigml/api_handlers/sourcehandler.py | 32 ++++++++++++++++++++++++++--- bigml/bigmlconnection.py | 8 ++++---- bigml/version.py | 2 +- 4 files changed, 40 insertions(+), 8 deletions(-) diff --git a/HISTORY.rst b/HISTORY.rst index e4bd9d7d..63308907 100644 --- a/HISTORY.rst +++ b/HISTORY.rst @@ -3,6 +3,12 @@ History ------- +9.8.2 (2025-03-21) +------------------ + +- Retrying annotations update to avoid temporary concurrency issues in + source composites updates. + 9.8.1 (2025-01-14) ------------------ diff --git a/bigml/api_handlers/sourcehandler.py b/bigml/api_handlers/sourcehandler.py index 8f3568ea..6106fe82 100644 --- a/bigml/api_handlers/sourcehandler.py +++ b/bigml/api_handlers/sourcehandler.py @@ -24,6 +24,8 @@ import sys import os import numbers +import time +import logging from urllib import parse @@ -67,8 +69,13 @@ from bigml.api_handlers.resourcehandler import ResourceHandlerMixin, LOGGER from bigml.fields import Fields +LOG_FORMAT = '%(asctime)-15s: %(message)s' +LOGGER = logging.getLogger('BigML') +CONSOLE = logging.StreamHandler() +CONSOLE.setLevel(logging.WARNING) +LOGGER.addHandler(CONSOLE) -MAX_CHANGES = 500 +MAX_CHANGES = 5 def compact_regions(regions): @@ -508,6 +515,8 @@ def update_composite_annotations(self, source, images_file, try: _ = file_list.index(filename) except ValueError: + LOGGER.error("WARNING: Could not find annotated file (%s)" + " in the composite's sources list", filename) continue for key in annotation.keys(): if key == "file": @@ -550,16 +559,33 @@ def update_composite_annotations(self, source, images_file, "value": value, "components": [source_id]}) except Exception: + LOGGER.error("WARNING: Problem adding annotation to %s (%s)", + field, values) pass # we need to limit the amount of changes per update batches_number = int(len(changes) / MAX_CHANGES) for offset in range(0, batches_number + 1): - new_batch = changes[offset * MAX_CHANGES: (offset + 1) * MAX_CHANGES] + new_batch = changes[ + offset * MAX_CHANGES: (offset + 1) * MAX_CHANGES] if new_batch: source = self.update_source(source, {"row_values": new_batch}) - self.ok(source) + if source["error"] is not None: + # retrying in case update is temporarily unavailable + time.sleep(1) + source = self.get_source(source) + self.ok(source) + source = self.update_source(source, + {"row_values": new_batch}) + if source["error"] is not None: + LOGGER.error("WARNING: Some annotations were not" + " updated (%s)", + new_batch) + if not self.ok(source): + raise Exception( + f"Failed to update {len(new_batch)} annotations.") + time.sleep(0.1) return source diff --git a/bigml/bigmlconnection.py b/bigml/bigmlconnection.py index a5a796c5..c1b337f0 100644 --- a/bigml/bigmlconnection.py +++ b/bigml/bigmlconnection.py @@ -406,7 +406,7 @@ def _create(self, url, body, verify=None, organization=None): error = json_load(response.content) LOGGER.error(self.error_message(error, method='create')) elif code != HTTP_ACCEPTED: - LOGGER.error("Unexpected error (%s)", code) + LOGGER.error("CREATE Unexpected error (%s)", code) code = HTTP_INTERNAL_SERVER_ERROR except ValueError as exc: LOGGER.error("Malformed response: %s", str(exc)) @@ -489,7 +489,7 @@ def _get(self, url, query_string='', LOGGER.error(self.error_message(error, method='get', resource_id=resource_id)) else: - LOGGER.error("Unexpected error (%s)", code) + LOGGER.error("GET Unexpected error (%s)", code) code = HTTP_INTERNAL_SERVER_ERROR except ValueError as exc: @@ -582,7 +582,7 @@ def _list(self, url, query_string='', organization=None): HTTP_TOO_MANY_REQUESTS]: error = json_load(response.content) else: - LOGGER.error("Unexpected error (%s)", code) + LOGGER.error("LIST Unexpected error (%s)", code) code = HTTP_INTERNAL_SERVER_ERROR except ValueError as exc: LOGGER.error("Malformed response: %s", str(exc)) @@ -662,7 +662,7 @@ def _update(self, url, body, organization=None, resource_id=None): LOGGER.error(self.error_message(error, method='update', resource_id=resource_id)) else: - LOGGER.error("Unexpected error (%s)", code) + LOGGER.error("UPDATE Unexpected error (%s)", code) code = HTTP_INTERNAL_SERVER_ERROR except ValueError: LOGGER.error("Malformed response") diff --git a/bigml/version.py b/bigml/version.py index d33e46a3..977225b8 100644 --- a/bigml/version.py +++ b/bigml/version.py @@ -1 +1 @@ -__version__ = '9.8.1' +__version__ = '9.8.2' From 7763ddff816ccdb6c7820ea6da92a5f68759977a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Merc=C3=A8=20Mart=C3=ADn=20Prats?= Date: Fri, 21 Mar 2025 18:23:31 +0100 Subject: [PATCH 11/19] Adding test for annotations upload --- bigml/api_handlers/sourcehandler.py | 5 +++- bigml/tests/create_dataset_steps.py | 13 ++++++++++ bigml/tests/test_22_source_args.py | 36 ++++++++++++++++++++++++++++ data/images/annotations_compact.json | 2 ++ data/images/metadata_compact.json | 5 ++++ 5 files changed, 60 insertions(+), 1 deletion(-) create mode 100644 data/images/annotations_compact.json create mode 100644 data/images/metadata_compact.json diff --git a/bigml/api_handlers/sourcehandler.py b/bigml/api_handlers/sourcehandler.py index 6106fe82..e2e711ea 100644 --- a/bigml/api_handlers/sourcehandler.py +++ b/bigml/api_handlers/sourcehandler.py @@ -548,9 +548,12 @@ def update_composite_annotations(self, source, images_file, "components": source_ids}) elif optype == "regions": for value, source_id in values: + if isinstance(value, dict): + # dictionary should contain the bigml-coco format + value = compact_regions(value) changes.append( {"field": field, - "value": compact_regions(value), + "value": value, "components": [source_id]}) else: for value, source_id in values: diff --git a/bigml/tests/create_dataset_steps.py b/bigml/tests/create_dataset_steps.py index a04d854a..b341ba51 100644 --- a/bigml/tests/create_dataset_steps.py +++ b/bigml/tests/create_dataset_steps.py @@ -222,3 +222,16 @@ def clone_dataset(step, dataset): def the_cloned_dataset_is(step, dataset): """Checking the dataset is a clone""" eq_(world.dataset["origin"], dataset) + + +def check_annotations(step, annotations_field, annotations_num): + """Checking the dataset contains a number of annotations""" + annotations_num = int(annotations_num) + field = world.dataset["fields"][annotations_field] + if field["optype"] == "regions": + count = field["summary"]["regions"]["sum"] + else: + count = 0 + for _, num in field["summary"]["categories"]: + count += num + eq_(count, annotations_num) diff --git a/bigml/tests/test_22_source_args.py b/bigml/tests/test_22_source_args.py index 478a0959..8fe3567e 100644 --- a/bigml/tests/test_22_source_args.py +++ b/bigml/tests/test_22_source_args.py @@ -25,6 +25,7 @@ from .world import world, setup_module, teardown_module, show_doc, \ show_method from . import create_source_steps as source_create +from . import create_dataset_steps as dataset_create class TestUploadSource: @@ -125,3 +126,38 @@ def test_scenario3(self): source_create.the_source_is_finished( self, example["source_wait"]) source_create.the_cloned_source_origin_is(self, source) + + def test_scenario4(self): + """ + Scenario: Successfully adding annotatations to composite source: + Given I create an annotated images data source uploading a "" file + And I wait until the source is ready less than secs + And I create a dataset + And I wait until the dataset is ready less than secs + Then the new dataset has annotations in the field + """ + headers = ["data", "source_wait", "dataset_wait", "annotations_num", + "annotations_field"] + examples = [ + ['data/images/metadata.json', '500', '500', '12', + '100002'], + ['data/images/metadata_compact.json', '500', '500', '3', + '100003']] + show_doc(self.test_scenario4) + for example in examples: + example = dict(zip(headers, example)) + show_method(self, self.bigml["method"], example) + source_create.i_create_annotated_source( + self, + example["data"], + args={"image_analysis": {"enabled": False, + "extracted_features": []}}) + source_create.the_source_is_finished( + self, example["source_wait"]) + dataset_create.i_create_a_dataset(self) + dataset_create.the_dataset_is_finished_in_less_than( + self, example["dataset_wait"]) + dataset_create.check_annotations(self, + example["annotations_field"], + example["annotations_num"]) + diff --git a/data/images/annotations_compact.json b/data/images/annotations_compact.json new file mode 100644 index 00000000..294de440 --- /dev/null +++ b/data/images/annotations_compact.json @@ -0,0 +1,2 @@ +[{"file": "f1/fruits1f.png", "my_regions": "[[\"region1\" 0.2 0.2 0.4 0.4]]"}, + {"file": "f1/fruits1.png", "my_regions": "[[\"region2\" 0.3 0.3 0.5 0.5] [\"region1\" 0.6 0.6 0.8 0.8]]"}] diff --git a/data/images/metadata_compact.json b/data/images/metadata_compact.json new file mode 100644 index 00000000..45db412f --- /dev/null +++ b/data/images/metadata_compact.json @@ -0,0 +1,5 @@ +{"description": "Fruit images to test colour distributions with regions", + "images_file": "./fruits_hist.zip", + "new_fields": [{"name": "my_regions", "optype": "regions"}], + "source_id": null, + "annotations": "./annotations_compact.json"} From 6ec7ec44622caba400ca7afb432054cda847b97f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Merc=C3=A8=20Mart=C3=ADn=20Prats?= Date: Fri, 21 Mar 2025 18:30:43 +0100 Subject: [PATCH 12/19] Bump setuptools to 70.0.0 --- setup.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/setup.py b/setup.py index adcd3b08..c7858b6c 100644 --- a/setup.py +++ b/setup.py @@ -50,7 +50,8 @@ download_url="https://github.com/bigmlcom/python", license="http://www.apache.org/licenses/LICENSE-2.0", setup_requires = ['pytest'], - install_requires = ["setuptools==69.0.0", "unidecode", "bigml-chronos>=0.4.3", "requests", + install_requires = ["setuptools==70.0.0", "unidecode", + "bigml-chronos>=0.4.3", "requests", "requests-toolbelt", "msgpack", "numpy>=1.22", "scipy", "javascript"], extras_require={"images": IMAGES_DEPENDENCIES, From fa6ed76528544802199eef360ed184d10d5b4570 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Merc=C3=A8=20Mart=C3=ADn=20Prats?= Date: Fri, 21 Mar 2025 19:19:34 +0100 Subject: [PATCH 13/19] Adding more retries to annotations update function --- bigml/api_handlers/sourcehandler.py | 18 ++++++++++-------- 1 file changed, 10 insertions(+), 8 deletions(-) diff --git a/bigml/api_handlers/sourcehandler.py b/bigml/api_handlers/sourcehandler.py index e2e711ea..24d24733 100644 --- a/bigml/api_handlers/sourcehandler.py +++ b/bigml/api_handlers/sourcehandler.py @@ -76,7 +76,7 @@ LOGGER.addHandler(CONSOLE) MAX_CHANGES = 5 - +MAX_RETRIES = 5 def compact_regions(regions): """Returns the list of regions in the compact value used for updates """ @@ -573,18 +573,20 @@ def update_composite_annotations(self, source, images_file, offset * MAX_CHANGES: (offset + 1) * MAX_CHANGES] if new_batch: source = self.update_source(source, - {"row_values": new_batch}) - if source["error"] is not None: + {"row_values": new_batch})+ + counter = 0 + while source["error"] is not None and counter < MAX_RETRIES: # retrying in case update is temporarily unavailable - time.sleep(1) + counter += 1 + time.sleep(counter) source = self.get_source(source) self.ok(source) source = self.update_source(source, {"row_values": new_batch}) - if source["error"] is not None: - LOGGER.error("WARNING: Some annotations were not" - " updated (%s)", - new_batch) + if source["error"] is not None: + LOGGER.error("WARNING: Some annotations were not" + " updated (%s)", + new_batch) if not self.ok(source): raise Exception( f"Failed to update {len(new_batch)} annotations.") From db2f057e86f684a70696de4f98276b792052868f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Merc=C3=A8=20Mart=C3=ADn=20Prats?= Date: Fri, 21 Mar 2025 19:21:11 +0100 Subject: [PATCH 14/19] Removing typo --- bigml/api_handlers/sourcehandler.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/bigml/api_handlers/sourcehandler.py b/bigml/api_handlers/sourcehandler.py index 24d24733..a0747ac3 100644 --- a/bigml/api_handlers/sourcehandler.py +++ b/bigml/api_handlers/sourcehandler.py @@ -573,7 +573,7 @@ def update_composite_annotations(self, source, images_file, offset * MAX_CHANGES: (offset + 1) * MAX_CHANGES] if new_batch: source = self.update_source(source, - {"row_values": new_batch})+ + {"row_values": new_batch}) counter = 0 while source["error"] is not None and counter < MAX_RETRIES: # retrying in case update is temporarily unavailable From a17f88f95321261b2fab59ebe6dab58fd505b8c4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Merc=C3=A8=20Mart=C3=ADn=20Prats?= Date: Fri, 21 Mar 2025 19:40:25 +0100 Subject: [PATCH 15/19] Improving error message for annotations updates --- bigml/api_handlers/sourcehandler.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/bigml/api_handlers/sourcehandler.py b/bigml/api_handlers/sourcehandler.py index a0747ac3..71863b34 100644 --- a/bigml/api_handlers/sourcehandler.py +++ b/bigml/api_handlers/sourcehandler.py @@ -584,9 +584,10 @@ def update_composite_annotations(self, source, images_file, source = self.update_source(source, {"row_values": new_batch}) if source["error"] is not None: - LOGGER.error("WARNING: Some annotations were not" - " updated (%s)", - new_batch) + err_str = json.dumps(source["error"]) + v_str = json.dumps(new_batch) + LOGGER.error("WARNING: Some annotations were not updated " + f" (error: {err_str}, values: {v_str})") if not self.ok(source): raise Exception( f"Failed to update {len(new_batch)} annotations.") From 893e412be494fe5520318cf76034ef455374b518 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Merc=C3=A8=20Mart=C3=ADn=20Prats?= Date: Sat, 22 Mar 2025 01:25:41 +0100 Subject: [PATCH 16/19] Ensuring updates are finished before testing --- bigml/tests/create_association_steps.py | 1 + bigml/tests/create_cluster_steps.py | 1 + bigml/tests/create_configuration_steps.py | 1 + bigml/tests/create_correlation_steps.py | 1 + bigml/tests/create_dataset_steps.py | 1 + bigml/tests/create_execution_steps.py | 1 + bigml/tests/create_lda_steps.py | 1 + bigml/tests/create_linear_steps.py | 1 + bigml/tests/create_model_steps.py | 1 + bigml/tests/create_pca_steps.py | 1 + bigml/tests/create_project_steps.py | 1 + bigml/tests/create_sample_steps.py | 1 + bigml/tests/create_script_steps.py | 1 + bigml/tests/create_source_steps.py | 1 + bigml/tests/create_statistical_tst_steps.py | 1 + bigml/tests/create_time_series_steps.py | 1 + 16 files changed, 16 insertions(+) diff --git a/bigml/tests/create_association_steps.py b/bigml/tests/create_association_steps.py index b54cd9be..ab222f50 100644 --- a/bigml/tests/create_association_steps.py +++ b/bigml/tests/create_association_steps.py @@ -79,6 +79,7 @@ def i_update_association_name(step, name): {'name': name}) world.status = resource['code'] eq_(world.status, HTTP_ACCEPTED) + world.api.ok(resource) world.location = resource['location'] world.association = resource['object'] diff --git a/bigml/tests/create_cluster_steps.py b/bigml/tests/create_cluster_steps.py index f6c9e002..cf1f0731 100644 --- a/bigml/tests/create_cluster_steps.py +++ b/bigml/tests/create_cluster_steps.py @@ -93,6 +93,7 @@ def make_the_cluster_shared(step): {'shared': True}) world.status = resource['code'] eq_(world.status, HTTP_ACCEPTED) + world.api.ok(resource) world.location = resource['location'] world.cluster = resource['object'] diff --git a/bigml/tests/create_configuration_steps.py b/bigml/tests/create_configuration_steps.py index 5116986d..d3070082 100644 --- a/bigml/tests/create_configuration_steps.py +++ b/bigml/tests/create_configuration_steps.py @@ -39,6 +39,7 @@ def i_update_configuration(step, changes): world.configuration["resource"], changes) world.status = resource['code'] eq_(world.status, HTTP_ACCEPTED) + world.api.ok(resource) world.location = resource['location'] world.configuration = resource['object'] diff --git a/bigml/tests/create_correlation_steps.py b/bigml/tests/create_correlation_steps.py index c5421c6b..ede2c47a 100644 --- a/bigml/tests/create_correlation_steps.py +++ b/bigml/tests/create_correlation_steps.py @@ -43,6 +43,7 @@ def i_update_correlation_name(step, name): {'name': name}) world.status = resource['code'] eq_(world.status, HTTP_ACCEPTED) + world.api.ok(resource) world.location = resource['location'] world.correlation = resource['object'] diff --git a/bigml/tests/create_dataset_steps.py b/bigml/tests/create_dataset_steps.py index b341ba51..e62b07a6 100644 --- a/bigml/tests/create_dataset_steps.py +++ b/bigml/tests/create_dataset_steps.py @@ -89,6 +89,7 @@ def make_the_dataset_public(step): {'private': False}) world.status = resource['code'] eq_(world.status, HTTP_ACCEPTED) + world.api.ok(resource) world.location = resource['location'] world.dataset = resource['object'] diff --git a/bigml/tests/create_execution_steps.py b/bigml/tests/create_execution_steps.py index 6d4d69a6..d8716501 100644 --- a/bigml/tests/create_execution_steps.py +++ b/bigml/tests/create_execution_steps.py @@ -78,6 +78,7 @@ def i_update_an_execution(step, param, param_value): {param: param_value}) world.status = resource['code'] eq_(world.status, HTTP_ACCEPTED) + world.api.ok(resource) world.location = resource['location'] world.execution = resource['object'] diff --git a/bigml/tests/create_lda_steps.py b/bigml/tests/create_lda_steps.py index cd06ac96..60bdc1d7 100644 --- a/bigml/tests/create_lda_steps.py +++ b/bigml/tests/create_lda_steps.py @@ -72,6 +72,7 @@ def i_update_topic_model_name(step, name): {'name': name}) world.status = resource['code'] eq_(world.status, HTTP_ACCEPTED) + world.api.ok(resource) world.location = resource['location'] world.topic_model = resource['object'] diff --git a/bigml/tests/create_linear_steps.py b/bigml/tests/create_linear_steps.py index 88fae1b9..5e4106d4 100644 --- a/bigml/tests/create_linear_steps.py +++ b/bigml/tests/create_linear_steps.py @@ -79,6 +79,7 @@ def i_update_linear_regression_name(step, name): {'name': name}) world.status = resource['code'] eq_(world.status, HTTP_ACCEPTED) + world.api.ok(resource) world.location = resource['location'] world.linear_regression = resource['object'] diff --git a/bigml/tests/create_model_steps.py b/bigml/tests/create_model_steps.py index 811daf30..219e891b 100644 --- a/bigml/tests/create_model_steps.py +++ b/bigml/tests/create_model_steps.py @@ -410,6 +410,7 @@ def i_update_optiml_name(step, name): {'name': name}) world.status = resource['code'] eq_(world.status, HTTP_ACCEPTED) + world.api.ok(resource) world.location = resource['location'] world.optiml = resource['object'] diff --git a/bigml/tests/create_pca_steps.py b/bigml/tests/create_pca_steps.py index c5a8ff09..e4b6da59 100644 --- a/bigml/tests/create_pca_steps.py +++ b/bigml/tests/create_pca_steps.py @@ -65,6 +65,7 @@ def i_update_pca_name(step, name): {'name': name}) world.status = resource['code'] eq_(world.status, HTTP_ACCEPTED) + world.api.ok(resource) world.location = resource['location'] world.pca = resource['object'] diff --git a/bigml/tests/create_project_steps.py b/bigml/tests/create_project_steps.py index 3d997bfe..604c187f 100644 --- a/bigml/tests/create_project_steps.py +++ b/bigml/tests/create_project_steps.py @@ -44,6 +44,7 @@ def i_update_project_name_with(step, name=""): {"name": name}) world.status = resource['code'] eq_(world.status, HTTP_ACCEPTED) + world.api.ok(resource) world.project = resource['object'] diff --git a/bigml/tests/create_sample_steps.py b/bigml/tests/create_sample_steps.py index 8f451f4b..0f7de276 100644 --- a/bigml/tests/create_sample_steps.py +++ b/bigml/tests/create_sample_steps.py @@ -44,6 +44,7 @@ def i_update_sample_name(step, name): {'name': name}) world.status = resource['code'] eq_(world.status, HTTP_ACCEPTED) + world.api.ok(resource) world.location = resource['location'] world.sample = resource['object'] diff --git a/bigml/tests/create_script_steps.py b/bigml/tests/create_script_steps.py index cb7ab4ed..d6c68b78 100644 --- a/bigml/tests/create_script_steps.py +++ b/bigml/tests/create_script_steps.py @@ -63,6 +63,7 @@ def i_update_a_script(step, param, param_value): {param: param_value}) world.status = resource['code'] eq_(world.status, HTTP_ACCEPTED) + world.api.ok(resource) world.location = resource['location'] world.script = resource['object'] diff --git a/bigml/tests/create_source_steps.py b/bigml/tests/create_source_steps.py index 3eac296a..2709121c 100644 --- a/bigml/tests/create_source_steps.py +++ b/bigml/tests/create_source_steps.py @@ -210,6 +210,7 @@ def i_update_source_with(step, data="{}"): resource = world.api.update_source(world.source.get('resource'), json.loads(data)) world.status = resource['code'] eq_(world.status, HTTP_ACCEPTED) + world.api.ok(resource) def source_has_args(step, args="{}"): diff --git a/bigml/tests/create_statistical_tst_steps.py b/bigml/tests/create_statistical_tst_steps.py index 44e76dd4..29c0a132 100644 --- a/bigml/tests/create_statistical_tst_steps.py +++ b/bigml/tests/create_statistical_tst_steps.py @@ -45,6 +45,7 @@ def i_update_tst_name(step, name): world.statistical_test['resource'], {'name': name}) world.status = resource['code'] eq_(world.status, HTTP_ACCEPTED) + world.api.ok(resource) world.location = resource['location'] world.statistical_test = resource['object'] diff --git a/bigml/tests/create_time_series_steps.py b/bigml/tests/create_time_series_steps.py index d12fc2c8..bfeb40e7 100644 --- a/bigml/tests/create_time_series_steps.py +++ b/bigml/tests/create_time_series_steps.py @@ -66,6 +66,7 @@ def i_update_time_series_name(step, name): {'name': name}) world.status = resource['code'] eq_(world.status, HTTP_ACCEPTED) + world.api.ok(resource) world.location = resource['location'] world.time_series = resource['object'] From c6c36021f1c4e17f00c2de1a4951a41b4a345184 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Merc=C3=A8=20Mart=C3=ADn=20Prats?= Date: Wed, 26 Mar 2025 13:01:23 +0100 Subject: [PATCH 17/19] Reverting update changes --- bigml/tests/create_association_steps.py | 1 - bigml/tests/create_cluster_steps.py | 1 - bigml/tests/create_configuration_steps.py | 1 - bigml/tests/create_correlation_steps.py | 1 - bigml/tests/create_dataset_steps.py | 1 - bigml/tests/create_execution_steps.py | 1 - bigml/tests/create_lda_steps.py | 1 - bigml/tests/create_linear_steps.py | 1 - bigml/tests/create_model_steps.py | 1 - bigml/tests/create_pca_steps.py | 1 - bigml/tests/create_project_steps.py | 1 - bigml/tests/create_sample_steps.py | 1 - bigml/tests/create_script_steps.py | 1 - bigml/tests/create_source_steps.py | 1 - bigml/tests/create_statistical_tst_steps.py | 1 - bigml/tests/create_time_series_steps.py | 1 - 16 files changed, 16 deletions(-) diff --git a/bigml/tests/create_association_steps.py b/bigml/tests/create_association_steps.py index ab222f50..b54cd9be 100644 --- a/bigml/tests/create_association_steps.py +++ b/bigml/tests/create_association_steps.py @@ -79,7 +79,6 @@ def i_update_association_name(step, name): {'name': name}) world.status = resource['code'] eq_(world.status, HTTP_ACCEPTED) - world.api.ok(resource) world.location = resource['location'] world.association = resource['object'] diff --git a/bigml/tests/create_cluster_steps.py b/bigml/tests/create_cluster_steps.py index cf1f0731..f6c9e002 100644 --- a/bigml/tests/create_cluster_steps.py +++ b/bigml/tests/create_cluster_steps.py @@ -93,7 +93,6 @@ def make_the_cluster_shared(step): {'shared': True}) world.status = resource['code'] eq_(world.status, HTTP_ACCEPTED) - world.api.ok(resource) world.location = resource['location'] world.cluster = resource['object'] diff --git a/bigml/tests/create_configuration_steps.py b/bigml/tests/create_configuration_steps.py index d3070082..5116986d 100644 --- a/bigml/tests/create_configuration_steps.py +++ b/bigml/tests/create_configuration_steps.py @@ -39,7 +39,6 @@ def i_update_configuration(step, changes): world.configuration["resource"], changes) world.status = resource['code'] eq_(world.status, HTTP_ACCEPTED) - world.api.ok(resource) world.location = resource['location'] world.configuration = resource['object'] diff --git a/bigml/tests/create_correlation_steps.py b/bigml/tests/create_correlation_steps.py index ede2c47a..c5421c6b 100644 --- a/bigml/tests/create_correlation_steps.py +++ b/bigml/tests/create_correlation_steps.py @@ -43,7 +43,6 @@ def i_update_correlation_name(step, name): {'name': name}) world.status = resource['code'] eq_(world.status, HTTP_ACCEPTED) - world.api.ok(resource) world.location = resource['location'] world.correlation = resource['object'] diff --git a/bigml/tests/create_dataset_steps.py b/bigml/tests/create_dataset_steps.py index e62b07a6..b341ba51 100644 --- a/bigml/tests/create_dataset_steps.py +++ b/bigml/tests/create_dataset_steps.py @@ -89,7 +89,6 @@ def make_the_dataset_public(step): {'private': False}) world.status = resource['code'] eq_(world.status, HTTP_ACCEPTED) - world.api.ok(resource) world.location = resource['location'] world.dataset = resource['object'] diff --git a/bigml/tests/create_execution_steps.py b/bigml/tests/create_execution_steps.py index d8716501..6d4d69a6 100644 --- a/bigml/tests/create_execution_steps.py +++ b/bigml/tests/create_execution_steps.py @@ -78,7 +78,6 @@ def i_update_an_execution(step, param, param_value): {param: param_value}) world.status = resource['code'] eq_(world.status, HTTP_ACCEPTED) - world.api.ok(resource) world.location = resource['location'] world.execution = resource['object'] diff --git a/bigml/tests/create_lda_steps.py b/bigml/tests/create_lda_steps.py index 60bdc1d7..cd06ac96 100644 --- a/bigml/tests/create_lda_steps.py +++ b/bigml/tests/create_lda_steps.py @@ -72,7 +72,6 @@ def i_update_topic_model_name(step, name): {'name': name}) world.status = resource['code'] eq_(world.status, HTTP_ACCEPTED) - world.api.ok(resource) world.location = resource['location'] world.topic_model = resource['object'] diff --git a/bigml/tests/create_linear_steps.py b/bigml/tests/create_linear_steps.py index 5e4106d4..88fae1b9 100644 --- a/bigml/tests/create_linear_steps.py +++ b/bigml/tests/create_linear_steps.py @@ -79,7 +79,6 @@ def i_update_linear_regression_name(step, name): {'name': name}) world.status = resource['code'] eq_(world.status, HTTP_ACCEPTED) - world.api.ok(resource) world.location = resource['location'] world.linear_regression = resource['object'] diff --git a/bigml/tests/create_model_steps.py b/bigml/tests/create_model_steps.py index 219e891b..811daf30 100644 --- a/bigml/tests/create_model_steps.py +++ b/bigml/tests/create_model_steps.py @@ -410,7 +410,6 @@ def i_update_optiml_name(step, name): {'name': name}) world.status = resource['code'] eq_(world.status, HTTP_ACCEPTED) - world.api.ok(resource) world.location = resource['location'] world.optiml = resource['object'] diff --git a/bigml/tests/create_pca_steps.py b/bigml/tests/create_pca_steps.py index e4b6da59..c5a8ff09 100644 --- a/bigml/tests/create_pca_steps.py +++ b/bigml/tests/create_pca_steps.py @@ -65,7 +65,6 @@ def i_update_pca_name(step, name): {'name': name}) world.status = resource['code'] eq_(world.status, HTTP_ACCEPTED) - world.api.ok(resource) world.location = resource['location'] world.pca = resource['object'] diff --git a/bigml/tests/create_project_steps.py b/bigml/tests/create_project_steps.py index 604c187f..3d997bfe 100644 --- a/bigml/tests/create_project_steps.py +++ b/bigml/tests/create_project_steps.py @@ -44,7 +44,6 @@ def i_update_project_name_with(step, name=""): {"name": name}) world.status = resource['code'] eq_(world.status, HTTP_ACCEPTED) - world.api.ok(resource) world.project = resource['object'] diff --git a/bigml/tests/create_sample_steps.py b/bigml/tests/create_sample_steps.py index 0f7de276..8f451f4b 100644 --- a/bigml/tests/create_sample_steps.py +++ b/bigml/tests/create_sample_steps.py @@ -44,7 +44,6 @@ def i_update_sample_name(step, name): {'name': name}) world.status = resource['code'] eq_(world.status, HTTP_ACCEPTED) - world.api.ok(resource) world.location = resource['location'] world.sample = resource['object'] diff --git a/bigml/tests/create_script_steps.py b/bigml/tests/create_script_steps.py index d6c68b78..cb7ab4ed 100644 --- a/bigml/tests/create_script_steps.py +++ b/bigml/tests/create_script_steps.py @@ -63,7 +63,6 @@ def i_update_a_script(step, param, param_value): {param: param_value}) world.status = resource['code'] eq_(world.status, HTTP_ACCEPTED) - world.api.ok(resource) world.location = resource['location'] world.script = resource['object'] diff --git a/bigml/tests/create_source_steps.py b/bigml/tests/create_source_steps.py index 2709121c..3eac296a 100644 --- a/bigml/tests/create_source_steps.py +++ b/bigml/tests/create_source_steps.py @@ -210,7 +210,6 @@ def i_update_source_with(step, data="{}"): resource = world.api.update_source(world.source.get('resource'), json.loads(data)) world.status = resource['code'] eq_(world.status, HTTP_ACCEPTED) - world.api.ok(resource) def source_has_args(step, args="{}"): diff --git a/bigml/tests/create_statistical_tst_steps.py b/bigml/tests/create_statistical_tst_steps.py index 29c0a132..44e76dd4 100644 --- a/bigml/tests/create_statistical_tst_steps.py +++ b/bigml/tests/create_statistical_tst_steps.py @@ -45,7 +45,6 @@ def i_update_tst_name(step, name): world.statistical_test['resource'], {'name': name}) world.status = resource['code'] eq_(world.status, HTTP_ACCEPTED) - world.api.ok(resource) world.location = resource['location'] world.statistical_test = resource['object'] diff --git a/bigml/tests/create_time_series_steps.py b/bigml/tests/create_time_series_steps.py index bfeb40e7..d12fc2c8 100644 --- a/bigml/tests/create_time_series_steps.py +++ b/bigml/tests/create_time_series_steps.py @@ -66,7 +66,6 @@ def i_update_time_series_name(step, name): {'name': name}) world.status = resource['code'] eq_(world.status, HTTP_ACCEPTED) - world.api.ok(resource) world.location = resource['location'] world.time_series = resource['object'] From 06859b48ffd0b51f42a6362bf6d14f57c7a72236 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Merc=C3=A8=20Mart=C3=ADn=20Prats?= Date: Wed, 26 Mar 2025 20:37:41 +0100 Subject: [PATCH 18/19] Adding small delay in tests --- bigml/tests/read_resource_steps.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/bigml/tests/read_resource_steps.py b/bigml/tests/read_resource_steps.py index d406b8d6..bf702e04 100644 --- a/bigml/tests/read_resource_steps.py +++ b/bigml/tests/read_resource_steps.py @@ -14,6 +14,7 @@ # License for the specific language governing permissions and limitations # under the License. +import time from datetime import datetime @@ -46,6 +47,7 @@ def wait_until_status_code_is(code1, code2, secs, resource_info): if status['code'] == int(code2): world.errors.append(resource_info) eq_(status['code'], int(code1)) + time.sleep(0.1) # added to avoid synch mongo issues return i_get_the_resource(resource_info) From 73c97000235fda8ca988f20df5c57fdf4bf2f40f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Merc=C3=A8=20Mart=C3=ADn=20Prats?= Date: Thu, 27 Mar 2025 17:29:30 +0100 Subject: [PATCH 19/19] Fixing problem in regions annotations given as a list --- HISTORY.rst | 5 +++++ bigml/api_handlers/sourcehandler.py | 11 +++-------- bigml/bigmlconnection.py | 11 +++++------ bigml/tests/test_22_source_args.py | 6 ++++-- bigml/version.py | 2 +- data/images/annotations_list.json | 2 ++ data/images/metadata_list.json | 5 +++++ 7 files changed, 25 insertions(+), 17 deletions(-) create mode 100644 data/images/annotations_list.json create mode 100644 data/images/metadata_list.json diff --git a/HISTORY.rst b/HISTORY.rst index 63308907..6c85c8cd 100644 --- a/HISTORY.rst +++ b/HISTORY.rst @@ -3,6 +3,11 @@ History ------- +9.8.3 (2025-03-27) +------------------ + +- Fixing annotations update for regions as lists. + 9.8.2 (2025-03-21) ------------------ diff --git a/bigml/api_handlers/sourcehandler.py b/bigml/api_handlers/sourcehandler.py index 71863b34..bd4b6e6b 100644 --- a/bigml/api_handlers/sourcehandler.py +++ b/bigml/api_handlers/sourcehandler.py @@ -61,19 +61,14 @@ HTTP_CREATED, HTTP_BAD_REQUEST, HTTP_UNAUTHORIZED, HTTP_PAYMENT_REQUIRED, HTTP_NOT_FOUND, HTTP_TOO_MANY_REQUESTS, - HTTP_INTERNAL_SERVER_ERROR, GAE_ENABLED, SEND_JSON) + HTTP_INTERNAL_SERVER_ERROR, GAE_ENABLED, SEND_JSON, LOGGER) from bigml.bigmlconnection import json_load from bigml.api_handlers.resourcehandler import check_resource_type, \ resource_is_ready, get_source_id, get_id from bigml.constants import SOURCE_PATH, IMAGE_EXTENSIONS -from bigml.api_handlers.resourcehandler import ResourceHandlerMixin, LOGGER +from bigml.api_handlers.resourcehandler import ResourceHandlerMixin from bigml.fields import Fields -LOG_FORMAT = '%(asctime)-15s: %(message)s' -LOGGER = logging.getLogger('BigML') -CONSOLE = logging.StreamHandler() -CONSOLE.setLevel(logging.WARNING) -LOGGER.addHandler(CONSOLE) MAX_CHANGES = 5 MAX_RETRIES = 5 @@ -548,7 +543,7 @@ def update_composite_annotations(self, source, images_file, "components": source_ids}) elif optype == "regions": for value, source_id in values: - if isinstance(value, dict): + if isinstance(value, list): # dictionary should contain the bigml-coco format value = compact_regions(value) changes.append( diff --git a/bigml/bigmlconnection.py b/bigml/bigmlconnection.py index c1b337f0..1e680915 100644 --- a/bigml/bigmlconnection.py +++ b/bigml/bigmlconnection.py @@ -50,7 +50,7 @@ LOG_FORMAT = '%(asctime)-15s: %(message)s' LOGGER = logging.getLogger('BigML') -CONSOLE = logging.StreamHandler() +CONSOLE = logging.StreamHandler(sys.stdout) CONSOLE.setLevel(logging.WARNING) LOGGER.addHandler(CONSOLE) @@ -138,7 +138,7 @@ def debug_request(method, url, **kwargs): """ response = original_request(method, url, **kwargs) - logging.debug("Data: %s", response.request.body) + LOGGER.debug("Data: %s", response.request.body) try: response_content = "Download status is %s" % response.status_code \ if "download" in url else \ @@ -147,7 +147,7 @@ def debug_request(method, url, **kwargs): response_content = response.content response_content = response_content[0: 256] if short_debug else \ response_content - logging.debug("Response: %s\n", response_content) + LOGGER.debug("Response: %s\n", response_content) return response original_request = requests.api.request @@ -213,9 +213,8 @@ def __init__(self, username=None, api_key=None, # when using GAE will fail pass - logging.basicConfig(format=LOG_FORMAT, - level=logging_level, - stream=sys.stdout) + LOGGER.forma = LOG_FORMAT, + LOGGER.level = logging_level if username is None: try: diff --git a/bigml/tests/test_22_source_args.py b/bigml/tests/test_22_source_args.py index 8fe3567e..b66edc9e 100644 --- a/bigml/tests/test_22_source_args.py +++ b/bigml/tests/test_22_source_args.py @@ -126,7 +126,7 @@ def test_scenario3(self): source_create.the_source_is_finished( self, example["source_wait"]) source_create.the_cloned_source_origin_is(self, source) - + def test_scenario4(self): """ Scenario: Successfully adding annotatations to composite source: @@ -142,6 +142,8 @@ def test_scenario4(self): ['data/images/metadata.json', '500', '500', '12', '100002'], ['data/images/metadata_compact.json', '500', '500', '3', + '100003'], + ['data/images/metadata_list.json', '500', '500', '3', '100003']] show_doc(self.test_scenario4) for example in examples: @@ -157,7 +159,7 @@ def test_scenario4(self): dataset_create.i_create_a_dataset(self) dataset_create.the_dataset_is_finished_in_less_than( self, example["dataset_wait"]) - dataset_create.check_annotations(self, + dataset_create.check_annotations(self, example["annotations_field"], example["annotations_num"]) diff --git a/bigml/version.py b/bigml/version.py index 977225b8..68512901 100644 --- a/bigml/version.py +++ b/bigml/version.py @@ -1 +1 @@ -__version__ = '9.8.2' +__version__ = '9.8.3' diff --git a/data/images/annotations_list.json b/data/images/annotations_list.json new file mode 100644 index 00000000..ecfee3db --- /dev/null +++ b/data/images/annotations_list.json @@ -0,0 +1,2 @@ +[{"file": "f1/fruits1f.png", "my_regions": [{"label": "region1", "xmin": 0.2, "ymin": 0.2, "xmax": 0.4, "ymax": 0.4}]}, + {"file": "f1/fruits1.png", "my_regions": [{"label": "region2", "xmin": 0.2, "ymin": 0.2, "xmax": 0.4, "ymax": 0.4}, {"label": "region1", "xmin": 0.5, "ymin": 0.5, "xmax": 0.7, "ymax": 0.7}]}] diff --git a/data/images/metadata_list.json b/data/images/metadata_list.json new file mode 100644 index 00000000..1bf61c67 --- /dev/null +++ b/data/images/metadata_list.json @@ -0,0 +1,5 @@ +{"description": "Fruit images to test colour distributions with regions", + "images_file": "./fruits_hist.zip", + "new_fields": [{"name": "my_regions", "optype": "regions"}], + "source_id": null, + "annotations": "./annotations_list.json"}