Module src.app.Clustering.ClusterSizeReduction
Source code
from src.app.Module import Module
import numpy as np
class ClusterSizeReduction(Module):
    """Reduces number of images in a cluster.
    This class reduces the number of images in a cluster to the specified amount.
    Attributes:
        _num_elements_per_cluster: Number of elements per cluster to reduce to (int)
    """
    def __init__(self, prev_module, num_elements_per_cluster):
        super().__init__('ClusterSizeReduction', prev_module)
        self._num_elements_per_cluster = num_elements_per_cluster
    def run(self):
        super().run()
        self._data = self._prev_model.get_module_results()
        num_unique_labels = len(np.unique(self._data['labels']))
        self._result = {
            'images': [],
            'features': [],
            'labels': [],
            'centers': self._data['centers'],
            'kmeans': self._data['kmeans'],
        }
        # Map array indices to corresponding label/euclidian distance
        distances = np.empty((len(self._data['features']), 3))
        for i, feature in enumerate(self._data['features']):
            label = self._data['labels'][i]
            center = self._data['centers'][label]
            feature = np.array(feature)
            distance = np.linalg.norm(center-feature)
            distances[i, 0] = label
            distances[i, 1] = i
            distances[i, 2] = distance
        # Convert to np array and sort by euclidian distance to each centroid
        distances = distances[distances[:, 2].argsort()]
        # Choose num_elements_per_cluster images/labels/features based on min distance
        for i in range(num_unique_labels):
            nearest = distances[distances[:, 0] == i]
            if len(nearest) >= self._num_elements_per_cluster:
                nearest = nearest[:self._num_elements_per_cluster, :]
            for n in nearest:
                index = int(n[1])
                self._result['images'].append(self._data['images'][index])
                self._result['features'].append(self._data['features'][index])
                self._result['labels'].append(self._data['labels'][index])
Classes
class ClusterSizeReduction (prev_module, num_elements_per_cluster)- 
Reduces number of images in a cluster.
This class reduces the number of images in a cluster to the specified amount.
Attributes
_num_elements_per_cluster- Number of elements per cluster to reduce to (int)
 
Source code
class ClusterSizeReduction(Module): """Reduces number of images in a cluster. This class reduces the number of images in a cluster to the specified amount. Attributes: _num_elements_per_cluster: Number of elements per cluster to reduce to (int) """ def __init__(self, prev_module, num_elements_per_cluster): super().__init__('ClusterSizeReduction', prev_module) self._num_elements_per_cluster = num_elements_per_cluster def run(self): super().run() self._data = self._prev_model.get_module_results() num_unique_labels = len(np.unique(self._data['labels'])) self._result = { 'images': [], 'features': [], 'labels': [], 'centers': self._data['centers'], 'kmeans': self._data['kmeans'], } # Map array indices to corresponding label/euclidian distance distances = np.empty((len(self._data['features']), 3)) for i, feature in enumerate(self._data['features']): label = self._data['labels'][i] center = self._data['centers'][label] feature = np.array(feature) distance = np.linalg.norm(center-feature) distances[i, 0] = label distances[i, 1] = i distances[i, 2] = distance # Convert to np array and sort by euclidian distance to each centroid distances = distances[distances[:, 2].argsort()] # Choose num_elements_per_cluster images/labels/features based on min distance for i in range(num_unique_labels): nearest = distances[distances[:, 0] == i] if len(nearest) >= self._num_elements_per_cluster: nearest = nearest[:self._num_elements_per_cluster, :] for n in nearest: index = int(n[1]) self._result['images'].append(self._data['images'][index]) self._result['features'].append(self._data['features'][index]) self._result['labels'].append(self._data['labels'][index])Ancestors
Inherited members