Source code for dyconnmap.cluster.cluster

# -*- coding: utf-8 -*-
""" Base class for clustring algorithms

"""
# Author: Avraam Marimpis <avraam.marimpis@gmail.com>

import numpy as np
from sklearn.neighbors import NearestNeighbors
from sklearn.manifold import MDS


[docs]class BaseCluster(object): """ Base class for clustering alorithms. """ def __init__(self): self.rng = None self.protos = None self.metric = None self.__encoding = None self.__symbols = None
[docs] def encode(self, data, metric="euclidean", sort=True): """ Employ a nearest-neighbor rule to encode the given ``data`` using the codebook. Parameters ---------- data : real array-like, shape(n_samples, n_features) Data matrix, each row represents a sample. metric : string or None One of the following valid options as defined for function http://scikit-learn.org/stable/modules/generated/sklearn.metrics.pairwise.pairwise_distances.html. Valid options include: - euclidean - cityblock - l1 - cosine If `None` is passed, the matric used for learning the data will be used. sort : boolean Whether or not to sort the symbols using MDS first. Default `True` Returns ------- encoded_data : real array-like, shape(n_samples, n_features) ``data``, as represented by the prototypes in codebook. ts_symbols : list, shape(n_samples, 1) A discrete symbolic time series """ sprotos = self.protos if sort: mds = MDS(1, random_state=self.rng) protos_1d = mds.fit_transform(self.protos).ravel() sorted_protos_1d = np.argsort(protos_1d) sprotos = self.protos[sorted_protos_1d] if metric is None: metric = self.metric nbrs = NearestNeighbors(n_neighbors=1, algorithm="auto", metric=metric).fit( sprotos ) _, self.__symbols = nbrs.kneighbors(data) self.__encoding = sprotos[self.__symbols] return (self.__encoding, self.__symbols)