Source code for dyconnmap.cluster.som

# -*- coding: utf-8 -*-
""" Self Organizing Map


:math:`T` is the number of reference prototypes; in :math:`X` the input patterns are stored; :math:`X^\\ast` contains
the approximated patterns as produced by the Nearest Neighbor rule.

Notes
-----
For faster convergence, we can also draw random weights from the given probability distribution :math:`P(t)`

|

-----

.. [Martinetz1991] Martinetz, T., Schulten, K., et al. A "neural-gas" network learns topologies. University of Illinois at Urbana-Champaign, 1991.
"""
# Author: Avraam Marimpis <avraam.marimpis@gmail.com>

import numpy as np

from .cluster import BaseCluster


[docs]class SOM(BaseCluster):
    """ Self Organizing Map

    Parameters
    ----------
    grid : list of length 2
        The X and Y sizes of the grid

    iterations : int
        The maximum iterations

    lrate : float
        The initial rearning rate

    n_jobs : int
        Number of parallel jobs (will be passed to scikit-learn))

    rng : object or None
        An object of type numpy.random.RandomState


    Attributes
    ----------
    protos : array-like, shape(n_protos, n_features)
        The prototypical vectors

    """

    def __init__(self, grid=(10, 10), iterations=1024, lrate=0.1, n_jobs=1, rng=None):
        if rng is None:
            self.rng = np.random.RandomState()
        else:
            self.rng = rng

        self.grid_y, self.grid_x = grid
        self.iterations = iterations

        self.weights = rng.rand(self.grid_x * self.grid_y, 2)
        self.weights = np.reshape(self.weights, (self.grid_y, self.grid_x, 2))

        self.nodes = np.arange(self.grid_y * self.grid_x)

        self.mapRadius = np.max([self.grid_x, self.grid_y]) / 2.0
        self.timeConstant = float(self.iterations) / float(np.log(self.mapRadius))

        self.lrate_0 = lrate
        self.lrate = self.lrate_0

        self.numIterations = 10000
        self.currentIteration = 0
        self.mapRadius = np.max([self.grid_x, self.grid_y]) / 2.0
        self.startLearningRate = 0.1
        self.timeConstant = float(self.numIterations) / float(np.log(self.mapRadius))
        self.learningRate = self.startLearningRate

[docs]    @classmethod
    def findBMU(self, x, y):
        distance = 0.0
        distance += (x[0] - y[0]) * (x[0] - y[0])
        distance += (x[1] - y[1]) * (x[1] - y[1])

        return distance

[docs]    def fit(self, data):
        [n_samples, _] = data.shape

        for self.currentIteration in range(self.numIterations):
            learn_sample = data[self.rng.choice(n_samples, 1),]
            learn_sample = learn_sample.squeeze()

            dist = np.inf
            I = None
            for nodes_down in range(self.grid_y):
                for nodes_left in range(self.grid_x):
                    node = self.weights[nodes_down, nodes_left, :]

                    tmp_dist = self.findBMU(learn_sample, node)

                    if tmp_dist < dist:
                        dist = tmp_dist
                        I = (nodes_down, nodes_left)

            # bmu = self.weights[I[0], I[1]]

            self.neighborhoodRadius = self.mapRadius * np.exp(
                float(-self.currentIteration) / self.timeConstant
            )
            for nodes_down in range(self.grid_y):
                for nodes_left in range(self.grid_x):
                    I2 = (nodes_down, nodes_left)
                    distToNodeSquared = self.findBMU(I, I2)

                    widthSquared = self.neighborhoodRadius * self.neighborhoodRadius

                    if distToNodeSquared < widthSquared:
                        w = self.weights[I2[0], I2[1], :]
                        infl = np.exp(-(distToNodeSquared) / (2.0 * widthSquared))
                        w += self.learningRate * infl * (learn_sample - w)

            # Should the following line read: self.learningRate ?
            # learningRate = self.startLearningRate * np.exp(
            # float(-self.currentIteration) / self.numIterations
            # )

        return self