From 4506f7e1e01bd59c124afaad010ca4dcfeadd5fb Mon Sep 17 00:00:00 2001 From: Tadej Lahovnik Date: Fri, 19 Jan 2024 13:24:16 +0100 Subject: [PATCH 01/18] feat: Feature scaling --- arm_preprocessing/dataset.py | 29 +++++++++++++++++++++++++++++ 1 file changed, 29 insertions(+) diff --git a/arm_preprocessing/dataset.py b/arm_preprocessing/dataset.py index 8092616..462b325 100644 --- a/arm_preprocessing/dataset.py +++ b/arm_preprocessing/dataset.py @@ -258,6 +258,35 @@ def squash(self, threshold, similarity="euclidean"): # Squash data self.data = Squash.squash(self.data, threshold, similarity) + def scale(self, method): + """ + Scale the dataset using the specified method. + + Args: + method (str): Scaling method ('normalisation', 'standardisation'). + + Raises: + ValueError: Invalid scaling method. + """ + # Validate method + if method not in ['normalisation', 'standardisation']: + raise ValueError(f'Invalid scaling method: {method}') + + # Scale data + for column in self.data.columns: + # Skip non-numerical columns + if self.data[column].dtype in ['datetime64[ns]', 'object']: + continue + + if method == 'normalisation': + self.data[column] = ( + self.data[column] - self.data[column].min() + ) / (self.data[column].max() - self.data[column].min()) + elif method == 'standardisation': + self.data[column] = ( + self.data[column] - self.data[column].mean() + ) / self.data[column].std() + def filter_between_dates( self, start_date=None, end_date=None, datetime_column=None ): From ebc42b4eb09ef30092b07d1f8bd80b0db81cbf44 Mon Sep 17 00:00:00 2001 From: Tadej Lahovnik Date: Fri, 19 Jan 2024 13:37:07 +0100 Subject: [PATCH 02/18] feat: Feature scaling examples --- examples/scaling/normalisation.py | 13 +++++++++++++ examples/scaling/standardisation.py | 13 +++++++++++++ 2 files changed, 26 insertions(+) create mode 100644 examples/scaling/normalisation.py create mode 100644 examples/scaling/standardisation.py diff --git a/examples/scaling/normalisation.py b/examples/scaling/normalisation.py new file mode 100644 index 0000000..13dae04 --- /dev/null +++ b/examples/scaling/normalisation.py @@ -0,0 +1,13 @@ +""" +Example demonstrates how to scale +the using normalisation +""" + +from arm_preprocessing.dataset import Dataset + +# Initialise dataset with filename and format +dataset = Dataset('datasets/Abalone', format='csv') +dataset.load() + +# Scale dataset using normalisation +dataset.scale(method='normalisation') diff --git a/examples/scaling/standardisation.py b/examples/scaling/standardisation.py new file mode 100644 index 0000000..7a694f9 --- /dev/null +++ b/examples/scaling/standardisation.py @@ -0,0 +1,13 @@ +""" +Example demonstrates how to scale +the using standardisation +""" + +from arm_preprocessing.dataset import Dataset + +# Initialise dataset with filename and format +dataset = Dataset('datasets/Abalone', format='csv') +dataset.load() + +# Scale dataset using standardisation +dataset.scale(method='standardisation') From 71e24ba0d06893be6a9c327876a3eedee4d49c44 Mon Sep 17 00:00:00 2001 From: Tadej Lahovnik Date: Fri, 19 Jan 2024 13:44:18 +0100 Subject: [PATCH 03/18] test: Feature scaling --- tests/test_dataset.py | 34 ++++++++++++++++++++++++++++++++++ 1 file changed, 34 insertions(+) diff --git a/tests/test_dataset.py b/tests/test_dataset.py index 7f4d6a4..c93424b 100644 --- a/tests/test_dataset.py +++ b/tests/test_dataset.py @@ -134,6 +134,40 @@ def test_missing_values_invalid_method(): dataset.missing_values(method='invalid_method') +def test_feature_scaling_normalisation(): + # Test feature scaling using normalisation + dataset = Dataset('datasets/Abalone', format='csv') + dataset.load() + dataset.scale(method='normalisation') + for column in dataset.data.columns: + # Skip non-numerical columns + if dataset.data[column].dtype in ['datetime64[ns]', 'object']: + continue + assert dataset.data[column].min() >= 0 + assert dataset.data[column].max() <= 1 + + +def test_feature_scaling_standardisation(): + # Test feature scaling using standardisation + dataset = Dataset('datasets/Abalone', format='csv') + dataset.load() + dataset.scale(method='standardisation') + for column in dataset.data.columns: + # Skip non-numerical columns + if dataset.data[column].dtype in ['datetime64[ns]', 'object']: + continue + assert dataset.data[column].mean() == pytest.approx(0, abs=0.01) + assert dataset.data[column].std() == pytest.approx(1, abs=0.01) + + +def test_feature_scaling_invalid_method(): + # Test invalid method handling + dataset = Dataset('datasets/Abalone', format='csv') + dataset.load() + with pytest.raises(ValueError, match='Invalid scaling method'): + dataset.scale(method='invalid_method') + + def test_filter_between_dates(): # Test filtering between dates dataset = Dataset( From 1adabc27964b82f7c24009b50d3df783926f6203 Mon Sep 17 00:00:00 2001 From: Tadej Lahovnik Date: Fri, 19 Jan 2024 13:44:37 +0100 Subject: [PATCH 04/18] doc: Feature scaling --- README.md | 35 ++++++++++++----- docs/user/usage.rst | 91 ++++++++++++++++++++++++++++++--------------- 2 files changed, 86 insertions(+), 40 deletions(-) diff --git a/README.md b/README.md index 29c3dc1..1709b5a 100644 --- a/README.md +++ b/README.md @@ -18,7 +18,7 @@ arm-preprocessing is a lightweight Python library supporting several key steps involving data preparation, manipulation, and discretisation for Association Rule Mining (ARM). 🧠 Embrace its minimalistic design that prioritises simplicity. 💡 The framework is intended to be fully extensible and offers seamless integration with related ARM libraries (e.g., [NiaARM](https://github.com/firefly-cpp/NiaARM)). 🔗 ## Why arm-preprocessing? -While numerous libraries facilitate data mining preprocessing tasks, this library is designed to integrate seamlessly with association rule mining. It harmonizes well with the NiaARM library, a robust numerical association rule mining framework. The primary aim is to bridge the gap between preprocessing and rule mining, simplifying the workflow/pipeline. Additionally, its design allows for the effortless incorporation of new preprocessing methods and fast benchmarking. +While numerous libraries facilitate data mining preprocessing tasks, this library is designed to integrate seamlessly with association rule mining. It harmonises well with the NiaARM library, a robust numerical association rule mining framework. The primary aim is to bridge the gap between preprocessing and rule mining, simplifying the workflow/pipeline. Additionally, its design allows for the effortless incorporation of new preprocessing methods and fast benchmarking. ## Key features ✨ - Loading various formats of datasets (CSV, JSON, TXT) 📊 @@ -54,6 +54,23 @@ dataset.load_data() df = dataset.data ``` +### Missing values +The following example demonstrates how to handle missing values in a dataset using imputation. More examples can be found in the [examples/missing_values](./examples/missing_values) directory: +- [Handling missing values in a dataset using row deletion](./examples/missing_values/missing_values_rows.py) +- [Handling missing values in a dataset using column deletion](./examples/missing_values/missing_values_columns.py) +- [Handling missing values in a dataset using imputation](./examples/missing_values/missing_values_impute.py) + +```python +from arm_preprocessing.dataset import Dataset + +# Initialise dataset with filename and format +dataset = Dataset('examples/missing_values/data', format='csv') +dataset.load() + +# Impute missing data +dataset.missing_values(method='impute') +``` + ### Data discretisation The following example demonstrates how to discretise a dataset using the equal width method. More examples can be found in the [examples/discretisation](./examples/discretisation) directory: - [Discretising a dataset using the equal width method](./examples/discretisation/equal_width_discretisation.py) @@ -87,23 +104,23 @@ dataset.load() dataset.squash(threshold=0.75, similarity='euclidean') ``` -### Missing values -The following example demonstrates how to handle missing values in a dataset using imputation. More examples can be found in the [examples/missing_values](./examples/missing_values) directory: -- [Handling missing values in a dataset using row deletion](./examples/missing_values/missing_values_rows.py) -- [Handling missing values in a dataset using column deletion](./examples/missing_values/missing_values_columns.py) -- [Handling missing values in a dataset using imputation](./examples/missing_values/missing_values_impute.py) +### Feature scaling +The following example demonstrates how to scale the dataset's features. More examples can be found in the [examples/scaling](./examples/scaling) directory: +- [Scale features using normalisation](./examples/scaling/normalisation.py) +- [Scale features using standardisation](./examples/scaling/standardisation.py) ```python from arm_preprocessing.dataset import Dataset # Initialise dataset with filename and format -dataset = Dataset('examples/missing_values/data', format='csv') +dataset = Dataset('datasets/Abalone', format='csv') dataset.load() -# Impute missing data -dataset.missing_values(method='impute') +# Scale dataset using normalisation +dataset.scale(method='normalisation') ``` + ## Related frameworks 🔗 [1] [NiaARM: A minimalistic framework for Numerical Association Rule Mining](https://github.com/firefly-cpp/NiaARM) diff --git a/docs/user/usage.rst b/docs/user/usage.rst index 1d93715..090af68 100644 --- a/docs/user/usage.rst +++ b/docs/user/usage.rst @@ -1,6 +1,3 @@ -User documentation ------------------- - Installation ============ @@ -16,9 +13,10 @@ Usage This section demonstrates the usage of the arm-preprocessing framework. * :ref:`data loading` +* :ref:`missing values` * :ref:`data discretisation` * :ref:`data squashing` -* :ref:`missing values` +* :ref:`feature scaling` .. _data loading: @@ -67,6 +65,54 @@ The following examples demonstrate how to load a dataset from a file (csv, json, # Print dataset information (columns, categories, min/max values, etc.) dataset.dataset_statistics() + .. _missing values: + +.. _missing values: + +Missing values +~~~~~~~~~~~~~~ + +The following examples demonstrate how to handle missing values in a dataset. + +.. code:: python + + from arm_preprocessing.dataset import Dataset + + # Initialise dataset with filename and format + dataset = Dataset('examples/missing_values/data', format='csv') + + # Load dataset + dataset.load() + + # Remove columns with missing data + dataset.missing_values(method='column') + +.. code:: python + + from arm_preprocessing.dataset import Dataset + + # Initialise dataset with filename and format + dataset = Dataset('examples/missing_values/data', format='csv') + + # Load dataset + dataset.load() + + # Remove rows with missing data + dataset.missing_values(method='row') + +.. code:: python + + from arm_preprocessing.dataset import Dataset + + # Initialise dataset with filename and format + dataset = Dataset('examples/missing_values/data', format='csv') + + # Load dataset + dataset.load() + + # Impute missing data + dataset.missing_values(method='impute') + .. _data discretisation: Data discretisation @@ -153,48 +199,31 @@ The following examples demonstrate how to squash a dataset. # Squash dataset dataset.squash(threshold=0.99, similarity='cosine') -.. _missing values: +.. _feature scaling: -Missing values -~~~~~~~~~~~~~~ +Feature scaling +~~~~~~~~~~~~~~~ -The following examples demonstrate how to handle missing values in a dataset. +The following examples demonstrate how to scale a dataset. .. code:: python from arm_preprocessing.dataset import Dataset # Initialise dataset with filename and format - dataset = Dataset('examples/missing_values/data', format='csv') - - # Load dataset - dataset.load() - - # Remove columns with missing data - dataset.missing_values(method='column') - -.. code:: python - - from arm_preprocessing.dataset import Dataset - - # Initialise dataset with filename and format - dataset = Dataset('examples/missing_values/data', format='csv') - - # Load dataset + dataset = Dataset('datasets/Abalone', format='csv') dataset.load() - # Remove rows with missing data - dataset.missing_values(method='row') + # Scale dataset using normalisation + dataset.scale(method='normalisation') .. code:: python from arm_preprocessing.dataset import Dataset # Initialise dataset with filename and format - dataset = Dataset('examples/missing_values/data', format='csv') - - # Load dataset + dataset = Dataset('datasets/Abalone', format='csv') dataset.load() - # Impute missing data - dataset.missing_values(method='impute') \ No newline at end of file + # Scale dataset using standardisation + dataset.scale(method='standardisation') \ No newline at end of file From 1ca9dc8c6ae64a9d47c72c2d8b4dabfef83c412b Mon Sep 17 00:00:00 2001 From: Tadej Lahovnik Date: Mon, 22 Jan 2024 11:02:20 +0100 Subject: [PATCH 05/18] feat: Feature selection --- arm_preprocessing/dataset.py | 30 ++++++++++++++++++++++++++++++ 1 file changed, 30 insertions(+) diff --git a/arm_preprocessing/dataset.py b/arm_preprocessing/dataset.py index 462b325..76c2d9b 100644 --- a/arm_preprocessing/dataset.py +++ b/arm_preprocessing/dataset.py @@ -287,6 +287,36 @@ def scale(self, method): self.data[column] - self.data[column].mean() ) / self.data[column].std() + def feature_selection(self, method, threshold, class_column): + """ + Select features based on the specified threshold. + + Args: + method (str): Feature selection method ('pearson', 'spearman', 'kendall'). + threshold (float): Threshold. + class_column (str): Name of the column containing class labels. + + Raises: + ValueError: Invalid feature selection method. + + Returns: + None + """ + # Validate method + if method not in ['pearson', 'spearman', 'kendall']: + raise ValueError(f'Invalid feature selection method: {method}') + + # Raise ValueError if column in self.data is not numerical + for column in self.data.columns: + if self.data[column].dtype not in ['int64', 'float64']: + raise ValueError(f'Column {column} is not numerical') + + # Calculate feature importance + feature_importance = self.data.corr(method=method)[class_column] + + # Select features + self.data = self.data[feature_importance[feature_importance >= threshold].index] + def filter_between_dates( self, start_date=None, end_date=None, datetime_column=None ): From 1e3df15a52a10eee3fb9e6cd29cbd140d4904e4c Mon Sep 17 00:00:00 2001 From: Tadej Lahovnik Date: Mon, 22 Jan 2024 11:02:27 +0100 Subject: [PATCH 06/18] feat: Feature selection examples --- examples/feature_selection/feature_selection.py | 14 ++++++++++++++ 1 file changed, 14 insertions(+) create mode 100644 examples/feature_selection/feature_selection.py diff --git a/examples/feature_selection/feature_selection.py b/examples/feature_selection/feature_selection.py new file mode 100644 index 0000000..8b5f7a9 --- /dev/null +++ b/examples/feature_selection/feature_selection.py @@ -0,0 +1,14 @@ +""" +Example demonstrates how to keep only +the most important features in a dataset. +""" + +from arm_preprocessing.dataset import Dataset + +# Initialise dataset with filename and format +dataset = Dataset('datasets/sportydatagen', format='csv') +dataset.load() + +# Feature selection +dataset.feature_selection( + method='kendall', threshold=0.15, class_column='calories') From 55a899cdccc23628cef7de093ab2699bdf1ea46f Mon Sep 17 00:00:00 2001 From: Tadej Lahovnik Date: Mon, 22 Jan 2024 12:17:02 +0100 Subject: [PATCH 07/18] test: Feature selection --- tests/test_dataset.py | 29 +++++++++++++++++++++++++++++ 1 file changed, 29 insertions(+) diff --git a/tests/test_dataset.py b/tests/test_dataset.py index c93424b..fc3556e 100644 --- a/tests/test_dataset.py +++ b/tests/test_dataset.py @@ -168,6 +168,35 @@ def test_feature_scaling_invalid_method(): dataset.scale(method='invalid_method') +def test_feature_selection_numerical(): + # Test feature selection for numerical dataset + dataset = Dataset('datasets/sportydatagen', format='csv') + dataset.load() + no_columns_before = len(dataset.data.columns) + dataset.feature_selection( + method='pearson', threshold=0.15, class_column='calories') + no_columns_after = len(dataset.data.columns) + assert no_columns_before > no_columns_after + + +def test_feature_selection_categorical(): + # Test feature selection for categorical dataset + dataset = Dataset('datasets/Abalone', format='csv') + dataset.load() + with pytest.raises(ValueError, match='Column .* is not numerical'): + dataset.feature_selection( + method='pearson', threshold=0.15, class_column='Rings') + + +def test_feature_selection_invalid_method(): + # Test invalid method handling + dataset = Dataset('datasets/sportydatagen', format='csv') + dataset.load() + with pytest.raises(ValueError, match='Invalid feature selection method'): + dataset.feature_selection( + method='invalid_method', threshold=0.15, class_column='calories') + + def test_filter_between_dates(): # Test filtering between dates dataset = Dataset( From d067ce1cd99b3548d11bfbc3a10e3e16e0b26ee3 Mon Sep 17 00:00:00 2001 From: Tadej Lahovnik Date: Mon, 22 Jan 2024 12:32:21 +0100 Subject: [PATCH 08/18] doc: Feature selection --- README.md | 15 +++++++++++++++ arm_preprocessing/dataset.py | 3 ++- docs/user/usage.rst | 22 +++++++++++++++++++++- 3 files changed, 38 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index 1709b5a..6fa1a1d 100644 --- a/README.md +++ b/README.md @@ -120,6 +120,21 @@ dataset.load() dataset.scale(method='normalisation') ``` +### Feature selection +The following example demonstrates how to select features from a dataset. More examples can be found in the [examples/feature_selection](./examples/feature_selection) directory: +- [Select features using the Kendall Tau correlation coefficient](./examples/feature_selection/feature_selection.py) + +```python +from arm_preprocessing.dataset import Dataset + +# Initialise dataset with filename and format +dataset = Dataset('datasets/sportydatagen', format='csv') +dataset.load() + +# Feature selection +dataset.feature_selection( + method='kendall', threshold=0.15, class_column='calories') +``` ## Related frameworks 🔗 diff --git a/arm_preprocessing/dataset.py b/arm_preprocessing/dataset.py index 76c2d9b..93db4a4 100644 --- a/arm_preprocessing/dataset.py +++ b/arm_preprocessing/dataset.py @@ -297,7 +297,8 @@ def feature_selection(self, method, threshold, class_column): class_column (str): Name of the column containing class labels. Raises: - ValueError: Invalid feature selection method. + ValueError: Invalid feature selection method. + ValueError: Column is not numerical. Returns: None diff --git a/docs/user/usage.rst b/docs/user/usage.rst index 090af68..bcda4b9 100644 --- a/docs/user/usage.rst +++ b/docs/user/usage.rst @@ -17,6 +17,7 @@ This section demonstrates the usage of the arm-preprocessing framework. * :ref:`data discretisation` * :ref:`data squashing` * :ref:`feature scaling` +* :ref:`feature selection` .. _data loading: @@ -226,4 +227,23 @@ The following examples demonstrate how to scale a dataset. dataset.load() # Scale dataset using standardisation - dataset.scale(method='standardisation') \ No newline at end of file + dataset.scale(method='standardisation') + +.. _feature selection: + +Feature selection +~~~~~~~~~~~~~~~~~ + +The following examples demonstrate how to select features from a dataset. + +.. code:: python + + from arm_preprocessing.dataset import Dataset + + # Initialise dataset with filename and format + dataset = Dataset('datasets/sportydatagen', format='csv') + dataset.load() + + # Feature selection + dataset.feature_selection( + method='kendall', threshold=0.15, class_column='calories') \ No newline at end of file From ba164ec33f00a76ae0adba7af859dc54e170be95 Mon Sep 17 00:00:00 2001 From: Iztok Fister Jr Date: Mon, 22 Jan 2024 16:22:55 +0100 Subject: [PATCH 09/18] More install options --- README.md | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/README.md b/README.md index 6fa1a1d..84ce20a 100644 --- a/README.md +++ b/README.md @@ -35,6 +35,17 @@ To install ``arm-preprocessing`` with pip, use: ```bash pip install arm-preprocessing ``` +To install ``arm-preprocessing`` on Alpine Linux, please use: + +```sh +$ apk add py3-arm-preprocessing +``` + +To install ``arm-preprocessing`` on Arch Linux, please use an [AUR helper](https://wiki.archlinux.org/title/AUR_helpers): + +```sh +$ yay -Syyu python-arm-preprocessing +``` ## Usage 🚀 ### Data loading From 0319b045f8b955e51d5826020553a22c8c82e2a8 Mon Sep 17 00:00:00 2001 From: Tadej Lahovnik Date: Tue, 23 Jan 2024 09:08:57 +0100 Subject: [PATCH 10/18] doc: Install options --- docs/user/usage.rst | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/docs/user/usage.rst b/docs/user/usage.rst index bcda4b9..982ec98 100644 --- a/docs/user/usage.rst +++ b/docs/user/usage.rst @@ -7,6 +7,18 @@ To install ``arm-preprocessing`` with pip, use: pip install arm-preprocessing +To install ``arm-preprocessing`` on Alpine Linux, use: + +.. code:: bash + + $ apk add py3-arm-preprocessing + +To install ``arm-preprocessing`` on Arch Linux, use an `AUR helper `_: + +.. code:: bash + + $ yay -Syyu python-arm-preprocessing + Usage ===== From 644c176977c472ff0d4966bab57372df6a234b1e Mon Sep 17 00:00:00 2001 From: Tadej Lahovnik Date: Tue, 23 Jan 2024 11:49:13 +0100 Subject: [PATCH 11/18] fix: Optional parameter --- arm_preprocessing/dataset.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arm_preprocessing/dataset.py b/arm_preprocessing/dataset.py index 93db4a4..61fa682 100644 --- a/arm_preprocessing/dataset.py +++ b/arm_preprocessing/dataset.py @@ -22,7 +22,7 @@ class Dataset: data (pd.DataFrame): Dataset. """ - def __init__(self, filename, format="csv", target_format=None, datetime_columns=[]): + def __init__(self, filename=None, format="csv", target_format=None, datetime_columns=[]): """ Initialise a Dataset instance. From f76b46ad9773ec48d2f70edaf7e12edc60e4cbc9 Mon Sep 17 00:00:00 2001 From: Tadej Lahovnik Date: Tue, 23 Jan 2024 11:50:31 +0100 Subject: [PATCH 12/18] fix: Missing categorical data --- arm_preprocessing/dataset.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arm_preprocessing/dataset.py b/arm_preprocessing/dataset.py index 61fa682..0e28cba 100644 --- a/arm_preprocessing/dataset.py +++ b/arm_preprocessing/dataset.py @@ -203,7 +203,7 @@ def missing_values(self, method): if self.data[column].dtype == 'object': self.data[column].fillna( self.data[column].mode()[0], inplace=True) - elif self.data[column].dtype == 'datetime64[ns]': + elif self.data[column].dtype == 'datetime64[ns]' or self.data[column].dtype == 'category': self.data[column].fillna( self.data[column].mode()[0], inplace=True) else: From 115ac6c5e6144ed117ca4daac5f5d8e49756f797 Mon Sep 17 00:00:00 2001 From: Iztok Fister Jr Date: Wed, 24 Jan 2024 19:19:33 +0100 Subject: [PATCH 13/18] add uARMSolver in related frameworks --- README.md | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/README.md b/README.md index 84ce20a..0ebd8e2 100644 --- a/README.md +++ b/README.md @@ -149,7 +149,9 @@ dataset.feature_selection( ## Related frameworks 🔗 -[1] [NiaARM: A minimalistic framework for Numerical Association Rule Mining](https://github.com/firefly-cpp/NiaARM) +[1] [NiaARM: A minimalistic framework for Numerical Association Rule Mining](https://github.com/firefly-cpp/NiaARM) + +[2] [uARMSolver: universal Association Rule Mining Solver](https://github.com/firefly-cpp/uARMSolver) ## References 📚 From 8a657df7ec813c8f42c5b25c6487f693cd1e1bac Mon Sep 17 00:00:00 2001 From: Tadej Lahovnik Date: Mon, 29 Jan 2024 10:50:13 +0100 Subject: [PATCH 14/18] feat: Integration with NiaARM --- examples/niaarm/niaarm_integration.py | 45 +++++++++++++++++++++++++++ 1 file changed, 45 insertions(+) create mode 100644 examples/niaarm/niaarm_integration.py diff --git a/examples/niaarm/niaarm_integration.py b/examples/niaarm/niaarm_integration.py new file mode 100644 index 0000000..9bdfb9a --- /dev/null +++ b/examples/niaarm/niaarm_integration.py @@ -0,0 +1,45 @@ +from arm_preprocessing.dataset import Dataset +import niaarm +from niapy.algorithms.basic import DifferentialEvolution + +# Load dataset +dataset = Dataset('datasets/Abalone', format='csv') +dataset.load() + +# Squash dataset +dataset.squash(threshold=0.85, similarity='euclidean') + +# Impute missing values +dataset.missing_values(method='impute') + +# Drop 'Sex' column +dataset.data.drop('Sex', axis=1, inplace=True) + +# Scale dataset +dataset.scale(method='normalisation') + +# Feature selection +dataset.feature_selection( + method='kendall', threshold=0.25, class_column='Rings') + +# Discretise dataset using equal width, equal frequency, and k-means +dataset.discretise(method='equal_width', num_bins=10, columns=['Height']) +dataset.discretise(method='equal_frequency', num_bins=5, columns=['Diameter']) +dataset.discretise(method='kmeans', num_bins=5, columns=[ + 'Whole weight', 'Shell weight']) + +# Identify dataset and output dataset statistics +dataset.identify_dataset() +dataset.dataset_statistics() + +# Association rule mining +algo = DifferentialEvolution( + population_size=50, differential_weight=0.5, crossover_probability=0.9) +metrics = ('support', 'confidence') +rules, run_time = niaarm.get_rules( + niaarm.Dataset(dataset.data), algo, metrics, max_iters=30, logging=True) + +# Results +print(rules) +print(f'Run Time: {run_time}') +rules.to_csv('output.csv') From b0b32a87a75491d3b7a2c3c8eb34e3c798ba3589 Mon Sep 17 00:00:00 2001 From: Tadej Lahovnik Date: Mon, 29 Jan 2024 11:26:43 +0100 Subject: [PATCH 15/18] doc: Simple logotype --- .github/images/logo_black.png | Bin 0 -> 8379 bytes .github/images/logo_white.png | Bin 0 -> 8379 bytes README.md | 4 ++++ docs/conf.py | 3 +-- 4 files changed, 5 insertions(+), 2 deletions(-) create mode 100644 .github/images/logo_black.png create mode 100644 .github/images/logo_white.png diff --git a/.github/images/logo_black.png b/.github/images/logo_black.png new file mode 100644 index 0000000000000000000000000000000000000000..59cdef03a31dfa3370c59008e7b98fac1bea5ad8 GIT binary patch literal 8379 zcmeI1cPGFxvkDth4e@ zG1%o1CjWm2WCtlNXBZevqyIQ;u}G063=BMslANl9W_)Twm) zvu%x)g}tM*M_5LbaKbpr&0j6)hOxWzl}b~51Y!nOI&y3z48+()ijQ1wMO6`YM|Gs> zlgK9ida&*0Q%@V3`XM<=%Uj3AlB2H=00mk;o-QMzfz){FK<{6jU;OeOQ7_h@&%tLh z1%?hRJ8GN)Od0{8=ukGe@XN2Rea-MH?r{WiBJ8srgFn_)v&2_1c|YI{A86YM?kU%) zGJ3(l5J^e@5LNZaIL&l5#9D+Os>=Pe`r6|#dO-nfu5T6?a68_0y&yq!cMt67S{tn%$D#(Zq8)X3h z#dP_l?jYu1JW)V#-RBV%n@P1<3SyiUmGFaJA8cBwT=4r7aMVw=vb|`B_8Wt0IaP51 za$=b(`h^Q8wRl7HM17flo>@h019T3(T%$`=TqQ6^GTVc{G%m-HM876YK8>4kr9CLs za!kGA6?NaPPY3x5MTo& z6iMtzOo$k?n_4c_7T-u}0U*W$Nk0XQ4ZcIrhOIzudNsBDFY04*Q#|y48J<_0Wgzp` zwH2pb3d^?rc+ftgkKJt+=KAUVHgrvpMoJAjZKIRIiNbIApP}DB>GvnYxw>p7adGSV zBBZI;34}IFckh?0Knl~D!GM9YPwk)z-MOFU*&8;0n>Rg6PlM-*+v=x==lx3qd$L#X zd!sVj3Zo~6#p=yMEyuWhyb3C2O*X5(xfS)M(;;-*?HK4ap)dCp+1OtgmsK`?aKybk zZ909uB1VD<72%z0jTFB|JFl@nI|+%7A-JNWy`mGC-P9Z3QwD$Yc4B(A5e{3~QP<6U zAdq#aU5-V25$3MsxI0MTB77Ip3&ey%?MKtoOm|4Amr@a{0xHl{;)Uy&Z{H){pJ+cW zVgjIO_|WkYQ(5Rnq4wgMJAFKPDsxe)^0>qNSqt-o}l`k|xCJY?+#v12shbi;At4vR?iQ zocx~az+#&vT4IRWZHg07$EiaUA#h5}h|)&v=~EA98sRm3uMW4asIQsR)s3oyDql$z zxGPsQW>Sqp-FFgjdG4&)?O`NCCF5p_WKE)rZtJq27L#=8%l*N?z;t3cQg=yxG94~h z_DU>o5)E}XRXW~WTD0NI=odpLXL7=cWAZ*r0yonW ziXX1pV`nv|EGzz?Ioi0avbF>?a1gHt;tujf9jzl3Y)2U`77`JJWpfzRe@EI%p7Rfm zF;c7lLPmmfLU81J=aqoNt9+jO!iA$A`+nG`1~EpGBWGmkV@%K>oG7-;WzX(-|^0ptuG%*Kt&>FqCM#%0VbDw zOgkxVolQ0r-mYN`mMty=bVrIClg{W{W3krW9Oyd*>%I&7k9%eG!X3+}qB3#PuPaBu zBI2&JQ9>8QO&wbC2pGen%PL@?R~P4pTL|kB^TwY*<%OZH_Bb3idTZwF=g1La)04M$ z{#nc6sdN(kPvp^eFgfeK6W3fFrHO6v_Lby+egKWe3LVr^+}n8ZbN()Wn2k5EkSTho zs(KE%p;~G380UXxYX7jgB0U!SB`R}zeA&Ji{?M?UQ3u~R+pB33I9orYg8|@^x+!yU zTHyCpo!+)DUM3&eYVV~pGG{{hmPL(qb;HNzai6s@wHu+tPr6Dm9cz)AZ&lmaPiUkQ ziAuq!lYu#fwaOc4oIR!8N5{fob3*<=Nxi1&zaPU0r+w;wr*03N#=f#6TOoHn)%YB6 zjYjVQHviKw=)%x;tWJcl=13rTnA$U}dim@@z#5XkBW5cYCemLaP_Z2PN;%0^x=qBy zmOy;|A)=4vH8kC}uK(kBV8T#%l4vAp`I!dWzXB+H_^+Qv3!krdEKclbWR$WB4sn!C zcGqbHuPT+puwu~iyWf?W(HP{)qfOY{GkD28Gt`S7!mdT8)~ea&!?_pB)KLXFE9oDv z8Do1d5w#zXu1@EE{4kfUd)Bmn z2#w&s4AZuL%rwKm)ornRpe!H8gh#WM(O-QR&u42#jpw3{>DY<|vfML$Aw;@dy762L<-9kR}9_jSaj<2?hWpW zuzZ77dgwQu8POb=ZN+}sFztcdZ^&sw4ktq>z+Qiye3}{)4|MHvlojw@RkX3Bu~CA= za+Ur)#Q$5U<#aqZ)~Pnv!DVbL-y7b81sKbVn}1X1v8#MkzukSCjdJB)M8Oodknslm=kxfe4@20npbFuY0laG(d#J5dWFk&!YlpAF%K9eBS z$?b0xyKOu~B-WD|1LI{EtCaO-KR4VgH|zaA{)jZcdktNGS_>c*A(cjj%~IR3UX-e5 z)*bxt%KHF{n4IE}JE_a83$%d(X_K9!VwZwVW3}FRiyAOHTq0{d_Y7>bvVA(d!_uv9 z!o|g&nEm`*j&sD-#Ix(6U3Sp@lyG(3ByAnxGQYR7!M&)OX%p{_r^GZlZT~}i-yj5= z`U{$%pRi+QL|T-8FHL8EO;mYl?q1B?J}1+$98Nb``j|%tWf1?|$X+R}ftT&tULS7; zn6B&xXY*@joyiCt3H``Giv$1fQbbvn!QvH z&N^cPsn?Rel8p(~iUj7edSy~#_R78e(gP9JTWu-@wJ%DY$L(u0IO^%LGeRx%87*Fon5V|Mt-fvQuaPMU8rX;5%I4!f~1v$8o~x?mF%mSxK*u3 zrA{n{pa50?CCanM4riqO5mnM!Y!8Nsx%4Y%ipZDTC*F<8f|iFt`Qq^N7awfpqz_y} z3oQ_3np4TIH7EIv5P%xWpGlv{c04BK$^cKsH zH=4xRJjLzF{|tAVmX*x+Q;gan+Nyi?GP@NL z{Ipta!@7ucS4hlvXt}phi8*c(;UgM+tuN&4bv27kp?xf<`MQIBr*5C2O{Wi`p=$&L1Aj(PW+_2&SkMo zse8FDkF-6ViDW`rKNXF*+QrG9eAD{Yz2TBATV8B^E59fCv&!@a+yE4Q-> z6csP={a9gYgE{GNQ1||{#0yJo?g$H0n(6lRj}{J*tzZ<>M)}%Mk}QC4`~2}3CJctK z3o8!hThV?n3r-bYg<)Ql0`uKtPOiLxh=qyo=l4I?XVMYwF_8w@?phXnuRR@Im~UP; zg#y&C@7Fg?`a`Wr2CL`lQe9+!OpmrcuwgD4LL~-vnPt09?kR_oSh|` zc!tEu@6_JQ9QHPga}v9nm)4y=Z36uVNS>!l5*l7PxsjCx(H5A3-k7MH zgcrUhI35a*D0-xQxfv)MRsPwyA|k49>Hc$wJ2(l0^dbTCo71%O!A*3#O@4md*S0D8y!+EO4}(V|l9QW1Zz0~;O*_dlG%Y?74f&u>lew$a zv+8f20*}=dTlHyH$|zD;x9Hykmlt!pj;zeRR@&<_A^TK63t>CK%7Z{j^8s|@P^)}> zzhTh!nn`Vuc~-(Xic{2lkON`qjlwrc8T&YE?oL0(0CQ#P2Gqwa5#DnRm}OF20C_kr zIOTfxZ1XrIGm+eu*pu&EA}qy)PmJ3o(b1`w-3ssSAg3F>Kr158bUMF$9(C}9{0Y8t zAFHh&g!66Wi#u9&8kS3UYA)Coc;UUAz=e2ftCYw&R%PnFXa4%tlfNxRZZMYy z*oc0cq-%U%gn6#(6bj^XD6)w1-^Zn!DE@qoidK4MI z#0*x3rI<LU3+c%SN*5hlT-K1FxQQqt%p#jw{zXA+vIG;Nl=J&^i%={0k_ z$g|wNu5@}m^QP`?NDQTy&tzt-3wCEY7N??D@6lMYL?{2jmss$2uX!XcVi`QZ97xvQ zj0%-bI<$8@t@jP7Sg};&DSu_;vExXjs>!TYDv0N3Y>FtCqNj9LY^NGGt_`!fn2)f* zc}K*fru#8OOj6%+R+H?_8WwX%y& zaX}`-Qba-akEMWL;{&z-t8Zr&;4hpW~9clMxZB7CwJxjDoiV7b!gFPpUU;pyy>m*tNL_yKh% zuVB|InLxyoNnP2@RCtw1zI8(5c)~gEvapmS@?Yyc|5AmjTy6lM7fu=dr7nC5G?1ym zxU8OziTivWgBjnU{zoFz-q>J&98v$+Z~N;cQ3>_Szd2XK6^Ew5B8Azv4jK9K83o`@ zKZ>NX8dc7oy3b#56m^M?WlXSCyb43wG`-Lg!cLCuh5loViay2Vk3H|p1Zp11Wr?aAUL-8md zqr?Z+e0~jR_XhtJVJ=V2hgS%E_iGuvQvt+U2t#smV2R6A2B41KEJrpAhX9X*fRZVP zm{%23!luxIXQc*Mc!bk01!F)5q`~t0(mE}o8nvxg*w>ta|zi1?cxpdLcRh+?DNG@30JR&X?tR0uxAqNnt%m@S* z+tQDk#Ql6l10DZT7H1pFlaz3M36}~kJi^{v+kR3^%7JTtvq8D9HH6A30d!lvQ^E99 z1sIH=lM4_{N@JDWC_p3-Ot6l~Al5*mMjU?Z>KrGpji+l(C8`7O4yE!$_LJVf;0Im9p9XD<1GBTrK$wxb84A!9bw z;UCbKh=pl>9ESD>6#*f$*mER7+|7;rXxVh5IQbVlK-UM6Rnh6ZIPO%=FvE>8%=f_w zUM_`S3-!0AU^;V_p)8L?u&(SEIyDponNDAjQ>06#dJkhDPLMBSLDV%;VjfD)p6yp` zU`sEcQU!L7JSdyXJQF)v`A^wPvnCIsOOZy_O$^o>1Hz8oMwxZy86ldLyZV`Cv2#;3r zrD^+Cwt(=hK1(-}bn?8=Ag0-rPeKg^-=+pGsH_nO$R}ZBA70jvja-S+Ed(kow)Hg` zvC>Hk57I9}umaLVb=P3s)1@WmfI#SKsy4-7V{sfD!qVa;C5eGx1_o|bKuzv{PfZWD zp*BJ6>+xw1GeS(LT&It~VNwIzhx|{0J3_?NVH|M2N6B~`;{rZMnh$=8Kc%-%x&`oM zGyP_evtcxnvx1;RmTkZJump)IN9q<{f*TWY?;`<3yDuIpqLe~4@WYFm`kQ1`=I#89 z`r7oufRN$k7QD7;_yGxuyQS%WW`GB=z5PG7xc|A+34Hb3J@;=yZ!a4IkokAEdpaGA z(%aW>E6m<<__sZ&?6b5^)iR+$R!PCOb;PFMQv)_wvqdvfAVHPT)0WvJOJVC*=o6xQfz4`2 zYJtZXbh!v{^#y4S*Lx+3l>w2*S^9)(7>P-0!VPy(XIXkBMQ|yc5j-ec`|_)Q_FP ziqH)k`x3`e1zXILW*!Se6}F$MNMwL1l^EA7WQ>Q$%ntAdbh!WC z$EFGh)<`f)nwYZX7hcI*=-dnwZ|l;Vr^tLgWP5$X*kP4FTkf7~wL)#>PGK1RinGV- z{W9v_9q_;h>h#|4Oc8F5ZWSF04Nt<2Be^^K>5~IuHv@?&ygW%1P|41hf9g{1oVZc2 zE~PWcr8DlCdyL=j2t}@C0VeNaKI0#sRc0k2A(m zN;ky(?O>?{OcefGbM_X^`Hmlk@yC^Ht6<2Qi=)g;-~0}2F~vXFh*KPDPm7+>`jpv+ z8dFoNLd`pKc0o-Dp(3gP1&F7|*OsS5C?xfIXFsz=b27RZ+$_|(NrI0eltTN=gtW*= zqLY1%Gm_;AtgkeG!1dF1u%@2nV3|SwN9{?o-!a(?oh* z65iCZ7ULPtQs!L;)8}cfCm_sk&0HsUA*4?S3~DQLjc%)w7IHYiaj)Exkk&akF)H=W z>HW5gl>C{1YbvfgU(Fx?VU7Fx^lPJYS*dU3Bn|=8O}}iG(p(5)@1>+Jdlt&l%sCs4 zQcsLfLgKbuDYvKz@{`EAV~7&R5Vt7o&lWIadqPf?!zP$v5X1NJ)e?F*+_!B$nWPLh!P3#i8O-c4 zoD?Zy4U}PsUc#JU{gXrBL8?BH`$}GVON`=8*@5|)zR&UuzI~}0`J0jipTuIUQ5$S- zYCBH#Ya!}V$eSu)m4ta+N7QNQclJh=(=eZott`*_Ws{foB8Ly^`^)#8;41EG8ecxw z-|``_d-nHl7S)c}{QjORUSeOL4S~(4lJ59F}kL9BO>&}1DaBEe-+_H>$;6uqhbqk%~E4>PQQUwgw8oc|+6-}s{ zHtecp{;rVciBgupXQZU24Gvl-e7(9St@!A9v9cc$N#kO0b@}HZsT{!q3&v&!64@)5ZNlXmFd9fOZDFxr<(rz#o?~Gbg`HNVn(}p08ik4*{^B zjj22_^7Kfza2R>FexAtX(>7CSms3sX)DA%R;we*|NR=8;@FU%Xi=I*P=1g}vu5{iB zFo&7rs*g$vSU_1+S{n;&S-N`OwZ^gP=Qb^xs>V7F>kffs`Ib@gU^Cd(EI%V| zCDLUMgQ%0YWz#8f1}3W)Ggn=gaG+6{_E9u$s)2Iy`wpKO;SEcd@X67kH|-Ph<9H$O zDflT>_Ed=2vC3@z8Rs3fse)3>k?cKWd_6gSfVZmaZK86@Y_XVy&h*8gpt15VU;QF! zR#YLjWzF%D{RZiiVqGsOO$4F$%j-^2MNy;n{*e@b4x8f*le>Ehb!}Xp@jsu-(h>?k JD#Z)~{u}O4;%5K= literal 0 HcmV?d00001 diff --git a/.github/images/logo_white.png b/.github/images/logo_white.png new file mode 100644 index 0000000000000000000000000000000000000000..073885a79d88e84b90b63cad80289c1034ccb35a GIT binary patch literal 8379 zcmeI2*>`p8-$~hp741X1+fmX~x{22t}Pr zw?Es~SXtORI(vj=LToxA{t1Irw;V~)%nFQ?-BK44f-5> zCR1SOz_O#pDZr!=0E!M}gA2d>>e|-~ui_p@ASc2;+cEfKT{TO56_fV^-td98jo_Yg zohqXj3=EN!^bb*0kBrkyS3|5t_@S!YPphvz4x<+oz~=g9fdMxJP+L9QNB(^5DY}^& zv0==Q+MH|;HE@i??Aktm643QxNlk3JxtH$RXz?sr2XT0A!+^2$!=&(@+ z@Lx=qU+NBG4#pD&6xV$oQL&j+o24MeSy2f;==H&-mC6ObKLJPmR4dzyc4)sbsFqU| z7a%8=siI%Fa8ipmL{HS0>F1eM)HXop(91QtM8#DCb0o7p_)Ft*97*(R(&W>)8CTka zQZ2{SD_&9e?fP_(@06$ten{z1sr_|3)iEG8m=B@}&VK5jNU{hUrHg=4|oBVD8@_ zr-Z}IHaSEq$shY^$Q1wyTXF0l)f6tf74tgl@oxa-Z%s4p($_^SnoBcPYGwGassI5t zKthqkj>LqBLA$BtQf={#q!s{TJdpHLz}VnB1Z~&~YH0pZ#o@9x806`UK9FqUy+Udg>hMB;|E9F zyVIuA*DGQqm{1Ykxzv=?FSN{+jO1TMmNA-zCMDAay5Jx%lCIbGeTI;irM zRDrv4MPnw_DAau?0hi~_n%y2oGE_2drbyN#y6Cnp`)M&rm%iK|3=B*smLqkS)F;#7 zf@QD7@+Q$xcT=U~&80;fzKnh`baEyqoH!=$^Tp?ShPO*>7CH_QU?dp1#v9IU`zvrW zJ)!vFsy%jAbIP*f51ONm%PMP2Km!NydLZr~U)0e$Qo(kV;bI{XL0C42LH&26t>iiX z;20yd`Y&W8I41;0zIR>;IK0Z|xj)`Ko@ilpN)BSSuw&smqJ6Bvc^D5?y2!8JUak`YPp+k_rX8Vg!r6_OvK}-{KPzSBii))1J9#R*b zGS@s8M29{UR_MK`rT(IL?}SL3iQ&Y4BQkf@;;%R($(v~A^L3~=K8ueypD+wc1bdb) zvaPd5CYjnv-h#;8nk|5D>a98SSmgb+gYsOT$V#~l!Hi}|@<#B~H1rH%^qzOE4jfsE z1;DhE;?~(@L*eZj#$egvGC+5vxH0LBzBLwW?ahI{Q?Ty4u>ZJMMlam4d@3pvC;hr| z1S}%%N*g6~LEO}#6_0>1EV`@$26}aIez=9O9x-qH2~=Jf>S~X}VWYQZ&VG&@AvQgE zYv-S}9G*%i(f>pqeFu}X?mKbK)lr(*7H?lk{^tkKXspmdEycZ!7eD9k@`u@Y0}Gj= zhpMXQfE%inCXaFcXQuWKn=8^|v0tJxr^lD=d*Kfa+ZlE6jkCR)CV{i{Lpm4$KB=2B z7pDb&U)AYt`{HHtk*)S#IwNx?ly6zoSXVcEY##Sn8&kUxO8lg&6w|R5srgp5js1j1 zI+3Upj5-;ZQ&_9KfyUWW+I@5^3^pg^50unvn*RGSjBwhg{&(v3z-jC&JF*pW*Hewp z0oQ2s9$@o74TCNWZO7_F_-c*>f`_R+!>X6h9t5l*2|Qx9f?*>46#^B@p|6yaY^B>o zOl%3n=N}^aSYAWZZR`3!jt3?Tg(rzdl9r!ou>C85!iWF*X|(Y9ddK3#jz&f)tKbkv z*<^Q}M)0aqISeZXEx-F+nHh~it~}a=%{_yc+%rSH=ppP{WNNLNZ9bfPu}mFRkh7Bh z@tQHV_YzV20qN?rUt_kD)z0|KknfE#y4vwCgN$XH%%ik8S7lZaX5N*J>cS6m>AGi4 z`-jj7{>w0J>&Hwp3|!q7y9dhhVN7^5YZ?93ckz6-cGP$->X?qLSRl(i(?@P#J^UQ` z3YDABC1CdVX~56%PD8Wb7P%qa~)j9#`3-4Jy?LTytw%{bsoFQ_qONcdv~{%+O6@x?xRv= z)|JMJNAUXoGOM@nxSFBNIiN|W3K`j?r2TGs`#2X{uQU1hm`r@zWCbGz^F_H)#^N&x zLY>_HMzPz*LquXdnK3Y4cCkuXZ}xM;&2qEe@8ge1^Sjs3^{2G}QV~*VRM;%F9qUD@ zdS>0h53jrrpoqyS4!M)M%(_4uD3CVUIVyH3*fdt_jkl-)v%@8_)^pFmMl0K=!#gb9 z`X*dl?1|aW&*eBrTunT?9@=FG-A@Tu*G?mIjjYWYlA~m9S%!tA|42ud$~t3oCLeSY9^e55H5b-3&ALc>Z#K5JY8L zo-9Mm02+Pv@lX%cIYXX$bs0D?t903bMce)U=52>MJ(1AgH@dclx1G04*B{lnLOftD zYU5@eGtyl1Zr~n#B>DEEMCW{xjHgs7MPd!VkdQt|`C!2W9t;Ju$u@Jy@dRL!yFhQT z{CJ~DoXu10K9HJS7YPWg2wczu0}f1|VfrAxu9pjE2oA6iS4ilHNT>GV#tQ~8rS(ie z+)zY6m%ZN>wl%d`vbl%(eMBBsxUcAEQtZ-51;#~^Oa0Gqw`p0)d_Tpg9ipweS1+?$ zA;C|p)i$h)NOy(Ae212M8Z&V0-W)Vy)`EIG;JYe!snHji9p{x>W7ClG`w{;NJiZ`QbbZB(=ltrshJbYpu|}C^ zFVD1Dl75I|H`s=JjaQSwLlVUG+(q3)0s&2Q_t>%Hg^`(l~(H4ckRAgrLfYL`K&bxWRo#<4cs9}lrY@;Te@;P zt3Xll65o#%rZ$+94hMDbPfNV8#O98$Fr}GpPycA)5ZMYwF>REu4JF9}__ogbA2Wq;T{udknOHz!S~wJ(S`Zu zbyFxn{rY}=)1*Jtnq;tgt}fM8_H9lA?NI#eJRH5Eer9t4 z*e)dMKeC%6KX-%Gle6G0a+=wT!n;dqDS!Az0U7L!Yl-KSe$bGU)G&LZsOiy6pTXH# zvWaI%ocvDhz06^6vp6TQt9fbN=`&u%f zx=DE9Yl7pU@Q9*E+LxPwvQg!qjVmIe`j+lLhq!~2Fi2lg&t4%|4@sBvPzN)SCDBbW z$&I1t2(vj&J0ILcx7+0B$9-*^qR+cOZSydAG$J{<`STXyjoq}9EJM@cBhio#>NJ_V zT0N`&<|*)4U9nZ4W~GcGg>{SmJ#cw3x9iBt+-s%1E)%j(^|KJR6RbQ4lr$eeHx9MR z*Y_I+ZLgWs7MW)yoTE5J%?CLUmfk3Qla#TKv*zyfV+=4?rfxue%o5=}*MM0j#RZUu zMeT@oFgdfBb;?hbOg(F?R9@=T}m%jZ!CPspF( zEBCS5`aw9~M!vYCWv5}ebf@NmZGjiw%L!bFr?yIooMTm{eqXx;EqUgzUp@KTQsf46 zX@HIBw@JFj_eGfJx=x`$K8GTUDF1z2x{2aH2rT7uCJ7H5eHi7t&r6SD@(;{KYp9s$u7u4I>linObphWi0Y5C~%ww_#DnPOnPi+(B?N~+flA4ae8vH)?ZlUF7 zw|pn;ZFPL;6-#(xf~78!|AY6bei>mBJnB<)mn`!qiAvLE$=d^|kC$FE z$BR76-RnxH*E4VG-iE|bdihLd#=2m4mSb@$di5TSB};VjAAE@gZ}*x<;v$y81I&SB z?aioA>7+w@$J2V>kct&cHJz1YNdjBj>e{law&RBXT^4^apT%Bn~V7f z8=QAUJWS^wsmXVa!sKz1W@JZg0Ukk$eOoTm>lj&aTwd8)Yg}3UI0w@?NSq-n65j0f zixVr;`bg}G$+H6?=Z|OrcSjZ>O=S-W&5v5H0zA-))(NID|3NIpL{~9!Z*o&>%U3JA z=oA-ZGAu#{n<1PzJNh1HX-t#Y2sLJIA0D9q+(O>Gqr$7Ul z8jQ>8>6p0B=P{V^9qNB1LhX$W_Qw(RkNvj4P7;++zxT--*4A`j;!q;bl70!wvrMdU) znKJYT9ose+xlRoV0Wt&W--~33Yp8daMq-QDWDu(oCEFOz!ZMmzxKmYBtM;ENGxVAT z#SpP&VCEu7=ynft7cfaT3piI4VYFYEf_@heZW6&H87Wt@vX#((FW^S3f^j-4S~nDr z@-a$$V9n>(fOc>2UlHc=)O>h_z<0lv!8;W|tc5TnCkK|eOl1J-=*@Cuvv3ITI0z`2 za)^0VF*RNa@Hq-bxwwPc$MNAMzM_)3f(r2rR!kRI@|g@eF=&KSS*(eig1DlM!>GMK#q z_iYHwJNh6e{-&`bH}slq><|Mp$Mj-=>`W-@gd4uMNfjq9;YrAV$N=h+HV%|nRunHM zi%xvNiV?-R-QA3JPSj7I~%ah#}uar%JRL8LYPYz4PC_K39hzw#4G-|}*$F9zC^4fU1)>NWO0AlIvWct#6M^j>T zPS~{6IXO+(B+(#-8dzUS&=t4icYb4Pgwe8nM#@7(|D8kpvVG=~4>*8{5Lp$S&WqztBo@Hp z{p#zo5MTPBIG`MMkY|kdM**>FB6&&w?}UgFv6|He)?dm&H&ZIji=|3iV4V3|(1%^z z`P*W5>SDSVrRGqNWsRvVS`*@r2#I-qUa+(;;;I}#3wRR(CTlASjz@9U-RKs9w~p{= z6j-^#J(P%_An#FgvxdL2plFguzkq?6u2WqOdZAn=X;cl$1yJ8bENs;r}$HP`=na{ zUpCWk200r>BRMMwN@Usgn-5Enm~y0U;U%~+5%)e4K(zbfp(08tR0BV}sHwk6R%PDK z WFANA7UT(o_n}#2dptxI_{$~bw5Zl}TV~hKrJDtE+&)swXCiM2QF#wr=XS=7< z!6?0b{kFpFEr);Glgd6z>r^ch8f29eY+FZc`aN}-uTDcqGJ32*zN;6($}HcM|4msT zq2kUZqa#_Z4@tLin(0z3U4s#duu!4WcCz0XyY(%R8HkQ{ws%zQO^L&x&aOu9WBtDV z7vt5p|LQ4bn{d%*O7j>4US5gF*!WqGXvb8V2`iI}w z=@s92Ls;xe#s8|HIU@rrD;1ux957fIt7yd{s1%&(4(R~xt*{yrm3RY;^c+ztMsZSp zL9Yniu(2<3ELE_@ENSMkFjQgtsjBoNdyqGZ$A&*;nQq~`$u9VaV-T@z4+Q(3zhoc{ zWttBSnNG3+(2b z9Hn$a%-;@{O29dgN-=Fq4u=s8Ldy5 zeW)=twJOxSGiMjngb*sC3Q&M}dVFnpN`yjEuXpw{TQn!5i^0u8t(zqHC_*W;&rC>* zj3heQ*El0tp1}G_^9Ni%Z3oLfi>vSvMT|92?deR_ZAw4``hjo&dY=Upx#vE0{WVRb z*CpXiEo(8J;Vfm|bufLN=6V9c{MO8Mau-7SbikmtGS}#~Drq5y1046tEeUCzgA=1t z@0{LmyGY5O3Am=>y7Sfi@gLT>uTQ@=I+vCDR!-s&K;87qW+}~uAogBL>au5{EX|y= z(J1xA2qh$L%awAAnjk-ktULDD1cp&l!Ml1k+e_ASRR0tux@SGTsO)nt;%2wir5!~P zJp#jeIP)_FCPQhRB!YnIBGy0|hUg{C3D!S31RkX76S=SCrMJW=-jp4fpXvK7&*0mas*%4bN$^Q5#u~N3 z=BBpeRKFIYE`_|Q0#-?w*L6glmVRe%R5=av>DbEhtY0>HX)kj4puWF+-wCebzNYcz zbNwwJ0=s8_|7KC`h|TZsx#A`E^+~QPv(=1WE}OIMI>v2hy-EKhd4b)Kyf&6#?CqP$ z$lTE0WTutV;JDLtJMma9`oHe{Ck?k&1_9r8T2;i%($-BlhW@!-hPABb4=>pBp z^h5sxIBz3^3?lqo4gLJt{XbpYFN6lSNeO6Yz?-}16$$)tDKm5O%Y<}`Ug7!rRs9eE z``MVv6C+QLbPI=(ckAbgTt00xm3BGRgih@MbT6JV)rnN80R=zOO}OY8C2!7jm*Yz3 zoe-l%m%nZ^`=$D*q<{sKMWwZ|z?P+}=Ur + logo +

+ # arm-preprocessing ![PyPI Version](https://img.shields.io/pypi/v/arm-preprocessing.svg) [![arm-preprocessing](https://github.com/firefly-cpp/arm-preprocessing/actions/workflows/test.yml/badge.svg)](https://github.com/firefly-cpp/arm-preprocessing/actions/workflows/test.yml) diff --git a/docs/conf.py b/docs/conf.py index 56d0214..c4ddbf7 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -53,9 +53,8 @@ # html_static_path = ['_static'] # Add logo for project -""" html_logo = '../.github/images/logo.png' +html_logo = '../.github/images/logo_white.png' html_theme_options = { 'logo_only': True, 'display_version': False, } - """ From 1338aedb16a953662246708c7341fa892c60f8fc Mon Sep 17 00:00:00 2001 From: Iztok Fister Jr Date: Mon, 29 Jan 2024 12:28:56 +0100 Subject: [PATCH 16/18] Add NiaARM link to references for artm and sportydatagen datasets --- datasets/README.md | 2 ++ 1 file changed, 2 insertions(+) diff --git a/datasets/README.md b/datasets/README.md index cf24572..be201f5 100644 --- a/datasets/README.md +++ b/datasets/README.md @@ -7,3 +7,5 @@ [3] The Breast Cancer dataset is downloaded from https://archive.ics.uci.edu/ml/index.php [4] The Nursery dataset is downloaded from https://archive.ics.uci.edu/ml/index.php + +[5] https://github.com/firefly-cpp/NiaARM From 6c26b121a446a6f0916d89183cf579438d7a16ad Mon Sep 17 00:00:00 2001 From: Tadej Lahovnik Date: Wed, 31 Jan 2024 14:05:24 +0100 Subject: [PATCH 17/18] doc: Key features --- README.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index 3c2b5ad..b23e046 100644 --- a/README.md +++ b/README.md @@ -32,6 +32,8 @@ While numerous libraries facilitate data mining preprocessing tasks, this librar - Dataset statistics 📈 - Discretisation methods 📏 - Data squashing methods 🤏 +- Feature scaling methods ⚖️ +- Feature selection methods 🎯 ## Installation 📦 ### pip @@ -40,13 +42,11 @@ To install ``arm-preprocessing`` with pip, use: pip install arm-preprocessing ``` To install ``arm-preprocessing`` on Alpine Linux, please use: - ```sh $ apk add py3-arm-preprocessing ``` To install ``arm-preprocessing`` on Arch Linux, please use an [AUR helper](https://wiki.archlinux.org/title/AUR_helpers): - ```sh $ yay -Syyu python-arm-preprocessing ``` From ab2de14dcade082bd8b855b55258bc298281f847 Mon Sep 17 00:00:00 2001 From: Tadej Lahovnik Date: Wed, 31 Jan 2024 14:07:41 +0100 Subject: [PATCH 18/18] doc: Version --- arm_preprocessing/__init__.py | 2 +- docs/conf.py | 2 +- pyproject.toml | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/arm_preprocessing/__init__.py b/arm_preprocessing/__init__.py index df9144c..7fd229a 100644 --- a/arm_preprocessing/__init__.py +++ b/arm_preprocessing/__init__.py @@ -1 +1 @@ -__version__ = '0.1.1' +__version__ = '0.2.0' diff --git a/docs/conf.py b/docs/conf.py index c4ddbf7..4e365d7 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -22,7 +22,7 @@ author = 'Tadej Lahovnik, Iztok Fister Jr.' # The full version, including alpha/beta/rc tags -release = '0.1.1' +release = '0.2.0' # -- General configuration --------------------------------------------------- diff --git a/pyproject.toml b/pyproject.toml index 98b9cae..22c4fe2 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [tool.poetry] name = "arm-preprocessing" -version = "0.1.1" +version = "0.2.0" description = "Implementation of several preprocessing techniques for Association Rule Mining (ARM)" authors = ["Tadej Lahovnik ", "Iztok Fister Jr. "] keywords = ['association rule mining', 'data science', 'preprocessing']