## @package clustering
#  
#  Handles the clustering of nodes

from sklearn.cluster import KMeans
import time
import numpy as np
import graphLayouting

## performs the clustering
# @param data that data from the disk
# @param the normalized values of the nodes
# @param clusterFeatures the features that are used in the clustering
# @param numberOfClusters the wanted number of clusters
# @param clusterMethod the wanted cluster method
# @param edgeMatrices set of matrices that will be filled with data
# @param lowDetailSuperNodeEdgeMatrices set of matrices that will be filled with data
# @param edgeCollections collections of all edges
# @return a dictionary consisting of the new supernodes, the edge matrices and the low detail edge matrices
def clusterData(data, normalizedNodes, clusterFeatures, numberOfClusters, clusterMethod,
                edgeMatrices, lowDetailSuperNodeEdgeMatrices, edgeCollections):

    clusteringInput = buildMatrixForClustering(
        normalizedNodes, clusterFeatures)

    clusterIndices = []
    if(clusterMethod == "kmeans"):
        clusterIndices = kMeansClustering(clusteringInput, numberOfClusters)
    else:
        return "This cluster Method is not implmented"

    return buildClusterResults(data, clusterIndices, edgeMatrices, lowDetailSuperNodeEdgeMatrices, edgeCollections)


##################################################################################################################
# CLUSTER METHODS
##################################################################################################################

## performs a kMeans clustering
# @param input a matrix of the input data
# @param numberOfClusters the wanted number of clusters
# @return the cluster indices for the input data
def kMeansClustering(input, numberOfClusters):

    random_state = 42
    kmeans = KMeans(n_clusters=numberOfClusters, random_state=random_state)
    clusterIndices = kmeans.fit_predict(input)

    return clusterIndices


##################################################################################################################
# CLUSTER PROCESSING
##################################################################################################################

## builds a matrix that is used as input for the clustering algorithm
# @param nodes the normalized values of the nodes
# @param clusterFeatures the features that are used for clustering
# @return the cluster input matrix
def buildMatrixForClustering(nodes, clusterFeatures):
    matrix = []
    for node in nodes:
        row = []
        for f in clusterFeatures:
            row.append(node[f])
        matrix.append(row)

    return matrix

## builds supernodes and edgeMatrices according to the clsutering result
# @param data that data from the disk
# @param clusterIndices the cluster indices of the nodes accoring to the clustering
# @param edgeMatrices set of matrices that will be filled with data
# @param lowDetailSuperNodeEdgeMatrices set of matrices that will be filled with data
# @param edgeCollections collections of all edges
# @return a dictionary consisting of the new supernodes, the edge matrices and the low detail edge matrices
def buildClusterResults(data, clusterIndices, edgeMatrices, lowDetailSuperNodeEdgeMatrices, edgeCollections):

    supernodes = []
    nodeToSuperNodeMap = [None] * len(data["nodes"])
    lowDetailSuperNodeEdgeMatrices = {}

    for i in range(0, clusterIndices.max()+1):
        supernodes.append({
            "id": i,
            "nodeIds": []
        })

    for i, node in enumerate(data["nodes"]):
        superNodeID = clusterIndices[i]
        supernode = supernodes[superNodeID]
        supernode["nodeIds"].append(i)
        nodeToSuperNodeMap[i] = superNodeID

    for key in edgeCollections:
        edgeColl = edgeCollections[key]
        lowDetailSuperNodeEdgeMatrices[key] = []
        #start = time.time()
        #print(key)
        edgeMatrices[key] = buildEdgeMatrix(
            edgeColl, nodeToSuperNodeMap, supernodes, lowDetailSuperNodeEdgeMatrices[key])
        #end = time.time()
        #print(end - start)

    superNodePositions = graphLayouting.generateSupernodePositions(supernodes, lowDetailSuperNodeEdgeMatrices)

    clusterResult = {
        "supernodes": supernodes,
        "lowDetailSuperNodeEdgeMatrices": lowDetailSuperNodeEdgeMatrices,
        "lowDetailSuperNodePositions": superNodePositions
    }
    return clusterResult

##################################################################################################################
# EDGE PROCESSING
##################################################################################################################

## builds an edge matrix and a lowDetailSuperNodeEdgeMatrix
# @param edgeColl collection of all edges
# @param nodeToSuperNodeMap A list that contains the supernode ID for each node. The node is sepcified by the index of the list
# @param supernodes the supernodes that resulted from the last clustering
# @param lowDetailsSuperNodeEdgeMatrix matrix that will contain the low detail edges between the supernodes
# @return an edge matrix
def buildEdgeMatrix(edgeColl, nodeToSuperNodeMap, supernodes, lowDetailSuperNodeEdgeMatrix):

    edgeMatrix = []
    minEdgeWeight = float('inf')
    maxEdgeWeight = -1

    for i in range(0, len(supernodes)):
        edgeMatrix.append([])
        lowDetailSuperNodeEdgeMatrix.append([])
        for j in range(0, len(supernodes)):
            edgeMatrix[i].append([])
            lowDetailSuperNodeEdgeMatrix[i].append(0)

    for e in edgeColl:
        n1 = nodeToSuperNodeMap[e["id1"]]
        n2 = nodeToSuperNodeMap[e["id2"]]
        sNodeID1 = min(n1,n2)
        sNodeID2 = max(n1,n2)

        edgeMatrix[sNodeID1][sNodeID2].append(e)
        lowDetailSuperNodeEdgeMatrix[sNodeID1][sNodeID2] = lowDetailSuperNodeEdgeMatrix[sNodeID1][sNodeID2] + 1 #e["weight"]

        # TODO: delete, is only needed for selectedEdgeMatrix and not for the complete one
        if e["weight"] > maxEdgeWeight:
            maxEdgeWeight = int(e["weight"])
        if e["weight"] < minEdgeWeight:
            minEdgeWeight = int(e["weight"])

    if maxEdgeWeight == -1:
        minEdgeWeight = 0
        maxEdgeWeight = 0

    edgeMatrixAndWeigth = {
        "minEdgeWeight": minEdgeWeight,
        "maxEdgeWeight": maxEdgeWeight,
        "edgeMatrix": edgeMatrix
    }

    return edgeMatrixAndWeigth


## get submatrices of the edgeMatrices for the selected supernodes
# @param edgeMatrices the edge matrices of all supernodes
# @param edgeCollections collections of all edges
# @param the ids of the selected supernodes
# @return the submatrices for the selected supernodes
def getEdgeMatrixSubSets(edgeMatrices, edgeCollections, selectedSuperNodeIDs):

    selectedEdgeMatrices = {}
    for key in edgeCollections:
        fullEdgeMatrix = edgeMatrices[key]["edgeMatrix"]
        minEdgeWeight = float('inf')
        maxEdgeWeight = -1

        selectedEdgeMatrix = []
        # create empty matrix
        for i, sNode1 in enumerate(selectedSuperNodeIDs):
            selectedEdgeMatrix.append([])
            sNodeID1 = int(sNode1)
            for j, sNode2 in enumerate(selectedSuperNodeIDs):
                sNodeID2 = int(sNode2)
                selectedEdgeMatrix[i].append([])
                minNodeID = min(sNodeID1, sNodeID2)
                maxNodeID = max(sNodeID1, sNodeID2)
                selectedEdgeMatrix[i][j] = fullEdgeMatrix[minNodeID][maxNodeID]
                for e in selectedEdgeMatrix[i][j]:
                    if e["weight"] > maxEdgeWeight:
                        maxEdgeWeight = int(e["weight"])
                    if e["weight"] < minEdgeWeight:
                        minEdgeWeight = int(e["weight"])

        if maxEdgeWeight == -1:
            minEdgeWeight = 0
            maxEdgeWeight = 0

        selectedeEdgeMatrixAndWeigth = {
            "minEdgeWeight": minEdgeWeight,
            "maxEdgeWeight": maxEdgeWeight,
            "edgeMatrix": selectedEdgeMatrix
        }
        selectedEdgeMatrices[key] = selectedeEdgeMatrixAndWeigth

    return selectedEdgeMatrices
