Source code for dyconnmap.cluster.rng

# -*- coding: utf-8 -*-
""" Relational Neural Gas

Relational Neural Gas (RNG) [Hammer2007]_ is a variant of Neural Gas, that allows clustering and mining
data from a pairwise similarity or dissimilarity matrix.


|

-----

.. [Hammer2007] Hammer, B., & Hasenfuss, A. (2007, September). Relational neural gas. In Annual Conference on Artificial Intelligence (pp. 190-204). Springer, Berlin, Heidelberg.
.. [Hasenfuss2008] Hasenfuss, A., Hammer, B., & Rossi, F. (2008, July). Patch Relational Neural Gas–Clustering of Huge Dissimilarity Datasets. In IAPR Workshop on Artificial Neural Networks in Pattern Recognition (pp. 1-12). Springer, Berlin, Heidelberg.

"""
# Author: Avraam Marimpis <avraam.marimpis@gmail.com>

import numpy as np
from sklearn.metrics.pairwise import pairwise_distances

from .cluster import BaseCluster


[docs]class RelationalNeuralGas(BaseCluster): """ Relational Neural Gas Parameters ---------- n_protos : int The number of prototypes iterations : int The maximum iterations lrate : list of length 2 The initial and final rearning rates n_jobs : int Number of parallel jobs (will be passed to scikit-learn)) metric : string One of the following valid options as defined for function http://scikit-learn.org/stable/modules/generated/sklearn.metrics.pairwise.pairwise_distances.html. Valid options include: - euclidean - cityblock - l1 - cosine rng : object or None An object of type numpy.random.RandomState Attributes ---------- protos : array-like, shape(n_protos, n_features) The prototypical vectors """ def __init__( self, n_protos=10, iterations=100, # lrate=[0.3, 0.01], lrate=None, metric="euclidean", rng=None, ): self.n_protos = n_protos self.iterations = iterations if lrate is None: lrate = [0.3, 0.01] self.lrate_i = lrate[0] * n_protos self.lrate_f = lrate[1] self.lrate = self.lrate_i self.protos = None self.coeff = None self.__multipl = None if rng is None: self.rng = np.random.RandomState() else: self.rng = rng self.metric = metric self.__symbols = None self.__encoding = None
[docs] def fit(self, data): """ Fit Parameters ---------- data : real array-like, shape(n_samples, n_features) Data matrix, each row represents a sample. metric : string or None One of the following valid options as defined for function http://scikit-learn.org/stable/modules/generated/sklearn.metrics.pairwise.pairwise_distances.html. Valid options include: - euclidean - cityblock - l1 - cosine If `None` is passed, the matric used for learning the data will be used """ diss_mtx = pairwise_distances(data, metric=self.metric) np.fill_diagonal(diss_mtx, 0.0) N, _ = np.shape(diss_mtx) self.__multipl = np.ones((1, N)) self.protos = (1.0 / N) * np.ones((self.n_protos, N)) for iteration in range(1, self.iterations + 1): t = iteration / float(self.iterations) r_dist = np.float32(self.__rdist(diss_mtx, self.protos)) I = np.argsort(r_dist, axis=0, kind="heapsort") II = np.argsort(I, axis=0, kind="heapsort") hl = np.exp(-II / self.lrate) self.protos = np.float32(np.ones((1, N)) * self.__multipl * hl) tmp = np.sum((np.ones((self.n_protos, 1)) * self.__multipl * hl).T, axis=0) tmp = np.reshape(tmp, (self.n_protos, 1)) tmp = tmp * np.ones((1, N)) tmp = np.float32(tmp) tmp2 = np.divide(self.protos, tmp) tmp2 = np.float32(tmp2) self.protos = tmp2 self.lrate = np.float32( self.lrate_i * (self.lrate_f / float(self.lrate_i)) ** t ) self.coeff = self.protos self.protos = np.matmul(self.coeff, data) return self
@staticmethod def __rdist(mtx, coeff): """ Relational Distance Parameters ---------- mtx : array-list, shape(num_features, num_features) Dissimilarity matrix. coeff : Returns ------- """ num_protos, num_features = np.shape(coeff) distances = np.zeros((num_protos, num_features)) for i in range(num_protos): tmp = np.matmul(coeff[i, :], mtx) tmp2 = tmp - (0.5 * np.matmul(tmp, coeff[i, :].T)) distances[i, :] = tmp2 return distances