src.datapreparation.api_reader

Script file to read motions from the Open Data API of the Austrian Parliament

View Source

  1"""
  2Script file to read motions from the Open Data API of the Austrian Parliament
  3"""
  4
  5# Imports
  6from enum import Enum
  7from collections.abc import Sequence
  8import requests as rq
  9import json
 10import pandas as pd
 11import os
 12from tqdm import tqdm
 13
 14# Definitions
 15MIN_LEGISLATIVE_PERIOD = 5
 16MAX_LEGISLATIVE_PERIOD = 27
 17
 18
 19# Classes
 20class LegislativeBody(Enum):
 21    """Possible Codes for legislative bodies to request."""
 22    NATIONAL_ASSEMBLY = "NR"
 23    FEDERAL_ASSEMBLY = "BR"
 24
 25
 26# Functions
 27def _int_to_roman(n: int) -> str:
 28    result = ''
 29    allSymbol = ['M', 'CM', 'D', "CD", 'C', 'XC', 'L', 'XL', 'X', 'IX', 'V', 'IV', 'I']
 30    value = [1000, 900, 500, 400, 100, 90, 50, 40, 10, 9, 5, 4, 1]
 31    for index in range(len(value)):
 32        quotient = n // value[index]  # to know the number of times the symbol should be written
 33        symbol = quotient * allSymbol[index]  # to write the symbol in specific times
 34        result += symbol  # this is to add the symbol to the result.
 35        n = n % value[index]  # this is to get the remainder which will be use again
 36    return result
 37
 38def _add_value_to_request_body(req_body: dict, body_key: str, body_value) -> dict:
 39    to_add = []
 40    if isinstance(body_value, str):
 41        to_add.append(body_value)
 42    else:
 43        to_add.extend(body_value)
 44
 45    if body_key not in req_body.keys():
 46        req_body[body_key] = []
 47    req_body[body_key].extend(to_add)
 48
 49    return req_body
 50
 51
 52
 53def get_motions(periods: list[str], legis_bodies: Sequence[LegislativeBody]):
 54    """Requests the motions from parlament.gv.at.
 55
 56    Keyword arguments:
 57    periods -- list of legislative periods as roman numbers
 58    legis_bodies -- abbreviation of legislative body
 59    """
 60    request_body = {}
 61
 62    request_body = _add_value_to_request_body(request_body, "NRBR", legis_bodies)
 63    request_body = _add_value_to_request_body(request_body, "GP_CODE", periods)
 64    request_body = _add_value_to_request_body(request_body, "VHG", "ANTR")
 65
 66    res = rq.post(
 67        "https://www.parlament.gv.at/Filter/api/filter/data/101?js=eval&showAll=true&export=true",
 68        data=json.dumps(request_body))
 69
 70    return res
 71
 72
 73
 74def parse_gegenstand_document_links(detail_json: dict) -> list[dict]:
 75    """Parses json of 'Gegenstand' to extract the title, link and type of documents attached to it"""
 76    if 'documents' not in detail_json['content'].keys():
 77        return []
 78
 79    docs = detail_json['content']['documents']
 80    links = []
 81    for document_group in docs:
 82        if "title" in document_group.keys() and "documents" in document_group.keys():
 83            for document in document_group["documents"]:
 84                links.append({'title': document_group["title"],
 85                              'link': document["link"],
 86                              'type': document["type"]})
 87
 88    return links
 89
 90
 91def parse_document_links(df: pd.DataFrame) -> pd.Series:
 92    """
 93    Requests 'Gegenstand' detail-site and parses document links for DataFrame of 'Gegenstand'
 94    """
 95    motion_index = df.index
 96
 97    document_links = pd.Series(index=motion_index, dtype=str)
 98
 99    for leg_period, ityp, inr in tqdm(motion_index):
100        res = rq.get(f"https://www.parlament.gv.at/gegenstand/{leg_period}/{ityp}/{inr}?json=true")
101        document_links.at[(leg_period,ityp,inr)] = str(parse_gegenstand_document_links(res.json()))
102    return document_links
103
104
105def export_df_to_csv(path: str, df: pd.DataFrame) -> None:
106    """Exports DataFrame of Motions to CSV file in given directory"""
107    filename = "antraege.csv"
108    os.makedirs(path, exist_ok=True)
109
110    df.to_csv(path_or_buf=path + '/' + filename)
111
112
113# Main
114if __name__ == '__main__':
115    """Script to request motions and save them to csv"""
116    legis_periods = [22,23,24,25,26,27]
117    roman_periods = [_int_to_roman(n) for n in legis_periods]
118
119    for period in roman_periods:
120        print(f"Start requesting Data for Legis Period {period}.")
121        response = get_motions(period, "NR")
122
123        res_json = response.json()
124
125        col_labels = [header_entry["label"] for header_entry in res_json["header"]]
126
127        motion_df = pd.DataFrame(data=res_json["rows"], columns=col_labels)
128        motion_df.set_index(["GP_CODE", "ITYP", "INR"], inplace=True)
129        cols_to_drop = ["DATUMSORT", "NR_GP_CODE", "LZ-Buttons",
130                        "DATUM_VON", "Nummer", "PHASEN_BIS", "Personen", "RSS_DESC", "ZUKZ",
131                        "Status", "WENTRY_ID", "Zust?", "sysdate???", "INRNUM", "NRBR"]
132        motion_df = motion_df.drop(cols_to_drop, axis=1)
133
134        print(f"Start retrieving document links for Legis Period {period}")
135        motion_df['DocumentLinks'] = parse_document_links(motion_df)
136
137        print("Retrieving document links done! Start exporting to CSV")
138        save_path = "../data/" + period + "/"
139        export_df_to_csv(save_path, motion_df.loc[period])
140        print(f"Export for period {period} done.")

class LegislativeBody(enum.Enum): View Source

21class LegislativeBody(Enum):
22    """Possible Codes for legislative bodies to request."""
23    NATIONAL_ASSEMBLY = "NR"
24    FEDERAL_ASSEMBLY = "BR"

Possible Codes for legislative bodies to request.

NATIONAL_ASSEMBLY = <LegislativeBody.NATIONAL_ASSEMBLY: 'NR'>

FEDERAL_ASSEMBLY = <LegislativeBody.FEDERAL_ASSEMBLY: 'BR'>

Inherited Members

enum.Enum: name; value

def get_motions( periods: list[str], legis_bodies: collections.abc.Sequence[src.datapreparation.api_reader.LegislativeBody]): View Source

54def get_motions(periods: list[str], legis_bodies: Sequence[LegislativeBody]):
55    """Requests the motions from parlament.gv.at.
56
57    Keyword arguments:
58    periods -- list of legislative periods as roman numbers
59    legis_bodies -- abbreviation of legislative body
60    """
61    request_body = {}
62
63    request_body = _add_value_to_request_body(request_body, "NRBR", legis_bodies)
64    request_body = _add_value_to_request_body(request_body, "GP_CODE", periods)
65    request_body = _add_value_to_request_body(request_body, "VHG", "ANTR")
66
67    res = rq.post(
68        "https://www.parlament.gv.at/Filter/api/filter/data/101?js=eval&showAll=true&export=true",
69        data=json.dumps(request_body))
70
71    return res

Requests the motions from parlament.gv.at.

Keyword arguments: periods -- list of legislative periods as roman numbers legis_bodies -- abbreviation of legislative body

def parse_gegenstand_document_links(detail_json: dict) -> list[dict]: View Source

75def parse_gegenstand_document_links(detail_json: dict) -> list[dict]:
76    """Parses json of 'Gegenstand' to extract the title, link and type of documents attached to it"""
77    if 'documents' not in detail_json['content'].keys():
78        return []
79
80    docs = detail_json['content']['documents']
81    links = []
82    for document_group in docs:
83        if "title" in document_group.keys() and "documents" in document_group.keys():
84            for document in document_group["documents"]:
85                links.append({'title': document_group["title"],
86                              'link': document["link"],
87                              'type': document["type"]})
88
89    return links

Parses json of 'Gegenstand' to extract the title, link and type of documents attached to it

def parse_document_links(df: pandas.core.frame.DataFrame) -> pandas.core.series.Series: View Source

 92def parse_document_links(df: pd.DataFrame) -> pd.Series:
 93    """
 94    Requests 'Gegenstand' detail-site and parses document links for DataFrame of 'Gegenstand'
 95    """
 96    motion_index = df.index
 97
 98    document_links = pd.Series(index=motion_index, dtype=str)
 99
100    for leg_period, ityp, inr in tqdm(motion_index):
101        res = rq.get(f"https://www.parlament.gv.at/gegenstand/{leg_period}/{ityp}/{inr}?json=true")
102        document_links.at[(leg_period,ityp,inr)] = str(parse_gegenstand_document_links(res.json()))
103    return document_links

Requests 'Gegenstand' detail-site and parses document links for DataFrame of 'Gegenstand'

def export_df_to_csv(path: str, df: pandas.core.frame.DataFrame) -> None: View Source

106def export_df_to_csv(path: str, df: pd.DataFrame) -> None:
107    """Exports DataFrame of Motions to CSV file in given directory"""
108    filename = "antraege.csv"
109    os.makedirs(path, exist_ok=True)
110
111    df.to_csv(path_or_buf=path + '/' + filename)

Exports DataFrame of Motions to CSV file in given directory