Source code for python.networkgenerator

import random
import numpy as np
import math
import os
import re

[docs]class NetworkGenerator: """A Genrator for generating new random networks"""
[docs] def genClusterNodes(self,numCluster,numPoints,windowSize=100,bounds=[0,900,0,900]): """Generates normally distributed clusters of nodes :param numCluster: (int) Amount of clusters centers to be generated/chosen :param numPoints: (int) Amount of node centers (nodes) to be generated :param windowSize: (int) Data range around cluster (radius) center in which cluster nodes are generated :param bounds: (int[4]) [startx,endx,starty,endy] bounds of target data region in which the clusters are to be generated :return: allpoints, allLabels np.array((numPoints,2)), (np.array((numPoints,1),dtype=int)) Genrated points positions as vector and corresponding cluster ids as label vector """ ranges = np.array([bounds[1] - bounds[0],bounds[3] - bounds[2]]) starts = np.array([bounds[0],bounds[2]]) partition = (0.1 + np.abs(np.random.rand(numCluster,1))) / numCluster factor = 1 / np.sum(partition) #Percent of nodes per partition partition = partition * factor #Create Random Cluster centers in defined value range clusterCenters = starts + np.random.rand(numCluster,2) * ranges allpoints = None allLabels = None cnt = 0 for part, center in zip(partition,clusterCenters): pointsPerPartition = int(part * numPoints) sprayangle = 1 #Random angles from origin for each point of partition angle = np.random.rand(pointsPerPartition,1)* sprayangle * 2 * math.pi #print(angle) #Length of displacement vectors based on windowSize length = np.random.normal(size=(pointsPerPartition, 1)) * (windowSize/2) #print(length) normPoints = np.zeros((pointsPerPartition,2)) #Calculate displacement degree in each direction normPoints[:,0] = np.cos(angle)[:,0] normPoints[:,1] = np.sin(angle)[:,0] #Calculate point postitions points = normPoints * length + center labels = np.ones((pointsPerPartition)) * cnt if(allpoints is None): allpoints = points allLabels = labels else: allpoints = np.concatenate((allpoints, points), axis=0) allLabels = np.concatenate((allLabels, labels), axis=0) cnt += 1 #print(allpoints) return allpoints, allLabels.astype(int)
[docs] def generateNetwork(self,nodeCnt,edgeCnt,xStart,xEnd,yStart,yEnd,closerSamplingIterations=30,clusterCenters=4): """Generate Network with random nodes and connections with normal distribution node positions :param nodeCnt: (int) Amount of nodes to be generated :param edgeCnt: (int) Amount of edges to be generated :param xStart,xEnd,yStart,yEnd: (int) Bounds of the data space for generation :param closerSamplingIterations: (int) Amount of other (possibly closer)edgeconnections to consider when selecting edge :param clusterCenters: (int) Amount of clusters centers to be generated :param bounds: (int[4]) [startx,endx,starty,endy] bounds of target data region in which the clusters are to be generated :return: nework ({"nodes":[],"edges":[],"nodeclusters":[]}) lists of nodes, edgeconnection indices and nodecluster ids """ network = { "nodes":[], "edges":[], "nodeclusters":[] } xRange = xEnd - xStart yRange = yEnd - yStart dataRect = {"x":xStart,"y":yStart,"w":xRange,"h":yRange} nodePositions, network["nodeclusters"] = self.genClusterNodes(clusterCenters,nodeCnt,windowSize=500,bounds=[xStart,xEnd,yStart,yEnd]) network["nodeclusters"] = network["nodeclusters"].tolist() for pos in nodePositions: network["nodes"].append([int(pos[0]),int(pos[1])]) nodeCnt = nodePositions.shape[0] edgesCnt = 0 connectedNodesIndexes = np.ones(nodeCnt) * (-1) selEnds = np.zeros(closerSamplingIterations) endindex = 0 numbers = nodeCnt - 1 for i in range(0,nodeCnt): leftprob = i/numbers #rightprob = (numbers - i)/numbers #roll = abs(random.random()) * 0.9999999 rolls = np.random.rand(closerSamplingIterations) * 0.9999999 leftvalues = (np.abs(np.random.rand(closerSamplingIterations)) * (i-1)).astype(int) rightvalues = ((i+1) + np.abs(np.random.rand(closerSamplingIterations) * (numbers - i))).astype(int) selEnds = np.where(rolls < leftprob, leftvalues, rightvalues) powermat = np.power(nodePositions[selEnds] - nodePositions[i],2) #print(powermat) summat = np.sum(powermat,axis=1) #print(summat) lengths = np.sqrt(summat) #minIndex = np.argmin(lengths) minIndex = np.argsort(lengths) #print(minIndex) #print("REAL: " + str(np.argmin(lengths))) selectedIndex = selEnds[minIndex[0]] selMinIndex = 1 while connectedNodesIndexes[selectedIndex] == i and selMinIndex < minIndex.shape[0]: #print("Selecting other") selectedIndex = selEnds[minIndex[selMinIndex]] selMinIndex += 1 #VERY low likelyhood of happening if closerAmount = 10 (so multiedges allowed in rare cases) #if(selMinIndex == minIndex.shape[0]): # print("Out of candidates") connectedNodesIndexes[selectedIndex] = i connectedNodesIndexes[i] = selectedIndex #print(selEnds) network["edges"].append([i,int(selectedIndex)]) return network
[docs] def readDatasetToNP(self,name): dirname = os.path.dirname(__file__) dirPath = "data" edgesPath = os.path.join(dirname,dirPath,name + "_edges.txt") nodesPath = os.path.join(dirname,dirPath,name + "_nodes.txt") edgesToRead = -1 edgeList = [] #edgesNp = np.array([]) with open(edgesPath, 'r') as edgesFile: line = edgesFile.readline() edgecnt = 1 while edgecnt <= edgesToRead or edgesToRead == -1 and line: splitLine = re.split(r'\t+| +', line) #np.append(edgesNp,[splitLine[0],splitLine[1]],axis=0) edgeList.append([int(splitLine[0]),int(splitLine[1])]) line = edgesFile.readline() edgecnt += 1 edgesNp = np.asarray(edgeList) #np.random.shuffle(edgesNp) #print(edgesNp) #edgesSlice = edgesNp[0:100] #print(edgesSlice) nodesToRead = -1 nodesList = [] #nodesNp = np.array([]) with open(nodesPath, 'r') as nodesFile: line = nodesFile.readline() nodecnt = 1 while nodecnt <= nodesToRead or nodesToRead == -1 and line: splitLine = re.split(r'\t+| +', line) if(splitLine and len(splitLine) > 3): #np.append(nodesNp,[splitLine[0],splitLine[2],splitLine[3]]) nodesList.append([int(splitLine[0]),float(splitLine[2]),float(splitLine[3])]) elif(splitLine and len(splitLine) > 2): nodesList.append([int(splitLine[0]),float(splitLine[1]),float(splitLine[2])]) else: print("Fail parsing Line: ") print(line) print("LineCnt: " + str(nodecnt)) line = nodesFile.readline() nodecnt += 1 nodesNp = np.asarray(nodesList) #nodeSlice = nodesNp[0:100] #print(nodeSlice) return edgesNp, nodesNp
[docs] def firstLoginAsNode(self,nodesNp): lastIndex = 0 newNodesList = [] for row in nodesNp: #print(row[0]) if(int(row[0]) != lastIndex) and row[1] != 0.0 and row[2] != 0.0: #print("found row") #matchIndex += 1 lastIndex = int(row[0]) newNodesList.append(row) return np.asarray(newNodesList)
[docs] def selectNetRecursive(self,startEdges,newEdges,origEdges,connsCnt,maxNodes): if(len(newEdges) >= maxNodes): return for edge in startEdges: newEdges.append(edge) if(len(newEdges) >= maxNodes): return selEdgeRows = origEdges[origEdges[:,0] == edge[0]] newStartEdges = selEdgeRows[0:connsCnt] self.selectNetRecursive(newStartEdges,newEdges,origEdges,connsCnt,maxNodes)
#print(selEdgeRows)
[docs] def extractSubnet(self,edgesNp,nodesNp): np.random.shuffle(edgesNp) startPointCnt = 4 maxConnectionsPerNode = 4 maxNodes = 40 newEdges = [] startEdges = edgesNp[0:startPointCnt] edgesNp = edgesNp[startPointCnt:] print(len(startEdges)) self.selectNetRecursive(startEdges,newEdges,edgesNp,maxConnectionsPerNode,maxNodes) print(newEdges) print(len(newEdges))
[docs] def extractSubnet2(self,edgesNp,nodesNp): np.random.shuffle(nodesNp) startNodes = nodesNp[0:5] newNodes = [] newEdges = [] #for i in range(0,3): for node in startNodes: newNodes.append(node.tolist()) connections = edgesNp[edgesNp[:,0] == node[0]] for edge in connections: newEdges.append(edge.tolist()) endNode = nodesNp[nodesNp[:,0] == edge[1]] newNodes.append(endNode.tolist()) print("newNodes Length: " + str(len(newNodes))) print(newNodes) print("newEdges Length: " + str(len(newEdges))) print(newEdges)
[docs] def createNewDataset(self,name,edgesNp,nodesNp): dirname = os.path.dirname(__file__) dirPath = "data" edgesPath = os.path.join(dirname,dirPath,name + "_edges.txt") nodesPath = os.path.join(dirname,dirPath,name + "_nodes.txt") #nodesNp = self.firstLoginAsNode(nodesNp) #print(nodesNp) self.extractSubnet2(edgesNp,nodesNp) with open(edgesPath, 'w') as edgesFile: for row in edgesNp: edgesFile.write(str(int(row[0])) + " " + str(int(row[1])) + "\n") with open(nodesPath, 'w') as nodesFile: for row in nodesNp: nodesFile.write(str(int(row[0])) + " " + str(row[1]) + " " + str(row[2]) + "\n")
[docs] def readDataset(self): network = { "nodes":[], "edges":[] } edgesNp,nodesNP = self.readDatasetToNP("clean") self.createNewDataset("clean",edgesNp,nodesNP) #edgesNp,nodesNP = self.readDatasetToNP() #network["nodes"] = nodesNP.toList() #network["edges"] = edgesNp.toList() return network