Skip to content

Commit

Permalink
add new files
Browse files Browse the repository at this point in the history
  • Loading branch information
chenxinye committed Dec 20, 2023
1 parent 43e2b07 commit 5f3d5c6
Show file tree
Hide file tree
Showing 10 changed files with 1,895 additions and 1,898 deletions.
2 changes: 1 addition & 1 deletion build/lib.linux-x86_64-3.11/classix/__init__.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@

__version__ = '1.2.2'
__version__ = '1.2.3'
__enable_cython__ = True

from .clustering import CLASSIX
Expand Down
53 changes: 25 additions & 28 deletions build/lib.linux-x86_64-3.11/classix/clustering.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,6 @@
from scipy.spatial import distance



def cython_is_available(verbose=0):
"""Check if CLASSIX is using Cython."""

Expand Down Expand Up @@ -61,14 +60,15 @@ def cython_is_available(verbose=0):
return False



def loadData(name='vdu_signals'):
"""Load built-in sample data.
Parameters
----------
name: str, {'vdu_signals', 'Iris', 'Dermatology', 'Ecoli', 'Glass', 'Banknote', 'Seeds', 'Phoneme', 'Wine'}, default='vdu_signals'
The supported built-in datasets.
name: str, {'vdu_signals', 'Iris', 'Dermatology', 'Ecoli', 'Glass',
'Banknote', 'Seeds', 'Phoneme', 'Wine', 'Covid3MC', 'CovidENV'},
default='vdu_signals'
Identifier of the built-in dataset.
Returns
-------
Expand Down Expand Up @@ -161,13 +161,13 @@ def loadData(name='vdu_signals'):
if name not in ['vdu_signals', 'Iris', 'Dermatology', 'Ecoli', 'Glass',
'Banknote', 'Seeds', 'Phoneme', 'Wine', 'CovidENV', 'Covid3MC']:

warnings.warn("Currently not support this data.")
warnings.warn("Invalid dataset identifier.")




def get_data(current_dir='', name='vdu_signals'):
"""Download the built-in sample data."""
"""Download the built-in sample data from the web."""
import requests

if name == 'vdu_signals':
Expand Down Expand Up @@ -314,16 +314,15 @@ class CLASSIX:
group_merging : str, {'density', 'distance', None}, default='distance'
The method for the merging of groups.
- 'density': two groups are merged if the density of data points in their intersection
is at least as high the smaller density of both groups. This option uses the disjoint
set structure to speedup merging.
- 'distance': two groups are merged if the distance of their group centers is at
most mergeScale*radius (the parameter above). This option uses the disjoint
set structure to speedup merging.
most mergeScale*radius (the parameter above).
- 'density': two groups are merged if the density of data points in their intersection
is at least as high the smaller density of both groups. This option uses a disjoint
set structure for the merging.
For more details, we refer to [1].
If group_merging is set to None, the method will return the labels formed by aggregation as cluster labels.
If group_merging is set to None, the method will return the labels formed by aggregation
as the cluster labels.
minPts : int, default=1
Expand All @@ -333,14 +332,15 @@ class CLASSIX:
norm : boolean, default=True
If normalize the data associated with the sorting, default as True.
Whether to normalize the data associated with the sorting, default as True.
mergeScale : float
Design for distance-clustering, when distance between the two group centers
associated with two distinct groups smaller than mergeScale*radius, then the two groups merge.
Used with distance-clustering; when distance between the two group centers
associated with two distinct groups smaller than mergeScale*radius,
then the two groups merge.
post_alloc : boolean, default=True
If allocate the outliers to the closest groups, hence the corresponding clusters.
Whether to allocate outliers to the closest groups, hence the corresponding clusters.
If False, all outliers will be labeled as -1.
mergeTinyGroups : boolean, default=True
Expand All @@ -353,8 +353,8 @@ class CLASSIX:
- 'set': Use disjoint set structure to merge connected groups.
memory : boolean, default=True
If Cython memoryviews is disable, a fast algorithm with less efficient memory
memory : boolean, default=False
If Cython memoryviews is disabled, a fast algorithm with less efficient memory
consumption is triggered since precomputation for aggregation is used.
Setting it True will use a memory efficient computing.
If Cython memoryviews is effective, this parameter can be ignored.
Expand Down Expand Up @@ -429,12 +429,12 @@ class CLASSIX:
References
----------
[1] X. Chen and S. Güttel. Fast and explainable sorted based clustering, 2022
[1] X. Chen and S. Güttel. Fast and explainable clustering based on sorting,
https://arxiv.org/abs/2202.01456, 2022.
"""

def __init__(self, sorting="pca", radius=0.5, minPts=1, group_merging="distance", norm=True, mergeScale=1.5, post_alloc=True, mergeTinyGroups=True,
memory=True, verbose=1, short_log_form=True):

def __init__(self, sorting="pca", radius=0.5, minPts=1, group_merging="distance", norm=True, mergeScale=1.5,
post_alloc=True, mergeTinyGroups=True, memory=False, verbose=1, short_log_form=True):

self.__verbose = verbose
self.minPts = int(minPts)
Expand All @@ -449,7 +449,6 @@ def __init__(self, sorting="pca", radius=0.5, minPts=1, group_merging="distance"
self.__truncate = short_log_form
self.labels_ = None


self._gcIndices = np.frompyfunc(self.gc2ind, 1, 1)

if self.__verbose:
Expand Down Expand Up @@ -517,8 +516,7 @@ def fit(self, data):
----------
data : numpy.ndarray
The ndarray-like input of shape (n_samples,)
"""
if isinstance(data, pd.core.frame.DataFrame):
self._index_data = data.index
Expand Down Expand Up @@ -620,7 +618,6 @@ def fit_transform(self, data):
return self.fit(data).labels_



def predict(self, data):
"""
Allocate the data to their nearest clusters.
Expand Down
2 changes: 1 addition & 1 deletion classix/__init__.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@

__version__ = '1.2.2'
__version__ = '1.2.3'
__enable_cython__ = True

from .clustering import CLASSIX
Expand Down
2 changes: 1 addition & 1 deletion classixclustering.egg-info/PKG-INFO
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
Metadata-Version: 2.1
Name: classixclustering
Version: 1.2.2
Version: 1.2.3
Summary: Fast and explainable clustering based on sorting
Home-page: https://github.com/nla-group/CLASSIX.git
Author: Xinye Chen, Stefan Güttel
Expand Down
Binary file removed dist/classixclustering-1.2.2.tar.gz
Binary file not shown.
Binary file not shown.
Binary file added dist/classixclustering-1.2.3.tar.gz
Binary file not shown.
Loading

0 comments on commit 5f3d5c6

Please sign in to comment.