## @package readData
#  Reads data from files and does preprocessing

import json
import numpy as np
import pandas as pd
from datetime import datetime


## reads all needed data from the disk and saves them in the data object
# @return the data
def readAllData():

    with open('data/nodes.json', encoding="utf8") as json_file:
        nodes = json.load(json_file)
    with open('data/castEdges.json', encoding="utf8") as json_file:
        castEdges = json.load(json_file)
    with open('data/keywordsEdges.json', encoding="utf8") as json_file:
        keywordsEdges = json.load(json_file)
    with open('data/genresEdges.json', encoding="utf8") as json_file:
        genresEdges = json.load(json_file)

    data = {
        "nodes": nodes,
        "castEdges": castEdges,
        "keywordsEdges": keywordsEdges,
        "genresEdges": genresEdges
    }

    return data


## normalizes the numeric features of our data.
# first build a numpy array.
# then normalize.
# then create new object with normalized data.
# 
# also saves all nodes into a list
# @param data The original data from the disk
# @param nodesList The list in which the nodes get saves
# @return the normalized data
def normalizeNodeValues(data, nodeList):
    nodes = data["nodes"]
    
    ## [budget,release_date,popularity,runtime,vote_average, vote_count]
    normalizedNodeMatrix = []
    for node in nodes:
        (y, m, d) = node['release_date'].split('-')
        d = int(y) * 10000 + int(m) * 100 + int(d)

        normalizedNodeMatrix.append([float(node["budget"]), d, float(node["popularity"]), float(
            node["runtime"]), float(node["vote_average"]), float(node["vote_count"])])

    normalizedNodeMatrix = np.array(normalizedNodeMatrix)
    normalizedNodeMatrix = normalizedNodeMatrix / \
        normalizedNodeMatrix.max(axis=0)

    normalizedData = []

    for node in nodes:
        i = node["id"]

        nodeList[i] = node

        normalizedData.append({
            "id": i,
            "budget": normalizedNodeMatrix[i][0],
            "release_date": normalizedNodeMatrix[i][1],
            "popularity": normalizedNodeMatrix[i][2],
            "runtime": normalizedNodeMatrix[i][3],
            "vote_average": normalizedNodeMatrix[i][4],
            "vote_count": normalizedNodeMatrix[i][5]
        })
    return normalizedData    


## create a collection of all edges
# @param data The original data from the disk
# @return the edge colelctions
def buildEdgeCollections(data):
    edgeCollections = []

    edgeCollections = {
        "castEdges": data["castEdges"],
        "keywordsEdges": data["keywordsEdges"],
        "genresEdges": data["genresEdges"]
    }

    return edgeCollections
