src.datapreparation.api_reader
Script file to read motions from the Open Data API of the Austrian Parliament
1""" 2Script file to read motions from the Open Data API of the Austrian Parliament 3""" 4 5# Imports 6from enum import Enum 7from collections.abc import Sequence 8import requests as rq 9import json 10import pandas as pd 11import os 12from tqdm import tqdm 13 14# Definitions 15MIN_LEGISLATIVE_PERIOD = 5 16MAX_LEGISLATIVE_PERIOD = 27 17 18 19# Classes 20class LegislativeBody(Enum): 21 """Possible Codes for legislative bodies to request.""" 22 NATIONAL_ASSEMBLY = "NR" 23 FEDERAL_ASSEMBLY = "BR" 24 25 26# Functions 27def _int_to_roman(n: int) -> str: 28 result = '' 29 allSymbol = ['M', 'CM', 'D', "CD", 'C', 'XC', 'L', 'XL', 'X', 'IX', 'V', 'IV', 'I'] 30 value = [1000, 900, 500, 400, 100, 90, 50, 40, 10, 9, 5, 4, 1] 31 for index in range(len(value)): 32 quotient = n // value[index] # to know the number of times the symbol should be written 33 symbol = quotient * allSymbol[index] # to write the symbol in specific times 34 result += symbol # this is to add the symbol to the result. 35 n = n % value[index] # this is to get the remainder which will be use again 36 return result 37 38def _add_value_to_request_body(req_body: dict, body_key: str, body_value) -> dict: 39 to_add = [] 40 if isinstance(body_value, str): 41 to_add.append(body_value) 42 else: 43 to_add.extend(body_value) 44 45 if body_key not in req_body.keys(): 46 req_body[body_key] = [] 47 req_body[body_key].extend(to_add) 48 49 return req_body 50 51 52 53def get_motions(periods: list[str], legis_bodies: Sequence[LegislativeBody]): 54 """Requests the motions from parlament.gv.at. 55 56 Keyword arguments: 57 periods -- list of legislative periods as roman numbers 58 legis_bodies -- abbreviation of legislative body 59 """ 60 request_body = {} 61 62 request_body = _add_value_to_request_body(request_body, "NRBR", legis_bodies) 63 request_body = _add_value_to_request_body(request_body, "GP_CODE", periods) 64 request_body = _add_value_to_request_body(request_body, "VHG", "ANTR") 65 66 res = rq.post( 67 "https://www.parlament.gv.at/Filter/api/filter/data/101?js=eval&showAll=true&export=true", 68 data=json.dumps(request_body)) 69 70 return res 71 72 73 74def parse_gegenstand_document_links(detail_json: dict) -> list[dict]: 75 """Parses json of 'Gegenstand' to extract the title, link and type of documents attached to it""" 76 if 'documents' not in detail_json['content'].keys(): 77 return [] 78 79 docs = detail_json['content']['documents'] 80 links = [] 81 for document_group in docs: 82 if "title" in document_group.keys() and "documents" in document_group.keys(): 83 for document in document_group["documents"]: 84 links.append({'title': document_group["title"], 85 'link': document["link"], 86 'type': document["type"]}) 87 88 return links 89 90 91def parse_document_links(df: pd.DataFrame) -> pd.Series: 92 """ 93 Requests 'Gegenstand' detail-site and parses document links for DataFrame of 'Gegenstand' 94 """ 95 motion_index = df.index 96 97 document_links = pd.Series(index=motion_index, dtype=str) 98 99 for leg_period, ityp, inr in tqdm(motion_index): 100 res = rq.get(f"https://www.parlament.gv.at/gegenstand/{leg_period}/{ityp}/{inr}?json=true") 101 document_links.at[(leg_period,ityp,inr)] = str(parse_gegenstand_document_links(res.json())) 102 return document_links 103 104 105def export_df_to_csv(path: str, df: pd.DataFrame) -> None: 106 """Exports DataFrame of Motions to CSV file in given directory""" 107 filename = "antraege.csv" 108 os.makedirs(path, exist_ok=True) 109 110 df.to_csv(path_or_buf=path + '/' + filename) 111 112 113# Main 114if __name__ == '__main__': 115 """Script to request motions and save them to csv""" 116 legis_periods = [22,23,24,25,26,27] 117 roman_periods = [_int_to_roman(n) for n in legis_periods] 118 119 for period in roman_periods: 120 print(f"Start requesting Data for Legis Period {period}.") 121 response = get_motions(period, "NR") 122 123 res_json = response.json() 124 125 col_labels = [header_entry["label"] for header_entry in res_json["header"]] 126 127 motion_df = pd.DataFrame(data=res_json["rows"], columns=col_labels) 128 motion_df.set_index(["GP_CODE", "ITYP", "INR"], inplace=True) 129 cols_to_drop = ["DATUMSORT", "NR_GP_CODE", "LZ-Buttons", 130 "DATUM_VON", "Nummer", "PHASEN_BIS", "Personen", "RSS_DESC", "ZUKZ", 131 "Status", "WENTRY_ID", "Zust?", "sysdate???", "INRNUM", "NRBR"] 132 motion_df = motion_df.drop(cols_to_drop, axis=1) 133 134 print(f"Start retrieving document links for Legis Period {period}") 135 motion_df['DocumentLinks'] = parse_document_links(motion_df) 136 137 print("Retrieving document links done! Start exporting to CSV") 138 save_path = "../data/" + period + "/" 139 export_df_to_csv(save_path, motion_df.loc[period]) 140 print(f"Export for period {period} done.")
class
LegislativeBody(enum.Enum):
21class LegislativeBody(Enum): 22 """Possible Codes for legislative bodies to request.""" 23 NATIONAL_ASSEMBLY = "NR" 24 FEDERAL_ASSEMBLY = "BR"
Possible Codes for legislative bodies to request.
NATIONAL_ASSEMBLY =
<LegislativeBody.NATIONAL_ASSEMBLY: 'NR'>
FEDERAL_ASSEMBLY =
<LegislativeBody.FEDERAL_ASSEMBLY: 'BR'>
Inherited Members
- enum.Enum
- name
- value
def
get_motions( periods: list[str], legis_bodies: collections.abc.Sequence[src.datapreparation.api_reader.LegislativeBody]):
54def get_motions(periods: list[str], legis_bodies: Sequence[LegislativeBody]): 55 """Requests the motions from parlament.gv.at. 56 57 Keyword arguments: 58 periods -- list of legislative periods as roman numbers 59 legis_bodies -- abbreviation of legislative body 60 """ 61 request_body = {} 62 63 request_body = _add_value_to_request_body(request_body, "NRBR", legis_bodies) 64 request_body = _add_value_to_request_body(request_body, "GP_CODE", periods) 65 request_body = _add_value_to_request_body(request_body, "VHG", "ANTR") 66 67 res = rq.post( 68 "https://www.parlament.gv.at/Filter/api/filter/data/101?js=eval&showAll=true&export=true", 69 data=json.dumps(request_body)) 70 71 return res
Requests the motions from parlament.gv.at.
Keyword arguments: periods -- list of legislative periods as roman numbers legis_bodies -- abbreviation of legislative body
def
parse_gegenstand_document_links(detail_json: dict) -> list[dict]:
75def parse_gegenstand_document_links(detail_json: dict) -> list[dict]: 76 """Parses json of 'Gegenstand' to extract the title, link and type of documents attached to it""" 77 if 'documents' not in detail_json['content'].keys(): 78 return [] 79 80 docs = detail_json['content']['documents'] 81 links = [] 82 for document_group in docs: 83 if "title" in document_group.keys() and "documents" in document_group.keys(): 84 for document in document_group["documents"]: 85 links.append({'title': document_group["title"], 86 'link': document["link"], 87 'type': document["type"]}) 88 89 return links
Parses json of 'Gegenstand' to extract the title, link and type of documents attached to it
def
parse_document_links(df: pandas.core.frame.DataFrame) -> pandas.core.series.Series:
92def parse_document_links(df: pd.DataFrame) -> pd.Series: 93 """ 94 Requests 'Gegenstand' detail-site and parses document links for DataFrame of 'Gegenstand' 95 """ 96 motion_index = df.index 97 98 document_links = pd.Series(index=motion_index, dtype=str) 99 100 for leg_period, ityp, inr in tqdm(motion_index): 101 res = rq.get(f"https://www.parlament.gv.at/gegenstand/{leg_period}/{ityp}/{inr}?json=true") 102 document_links.at[(leg_period,ityp,inr)] = str(parse_gegenstand_document_links(res.json())) 103 return document_links
Requests 'Gegenstand' detail-site and parses document links for DataFrame of 'Gegenstand'
def
export_df_to_csv(path: str, df: pandas.core.frame.DataFrame) -> None:
106def export_df_to_csv(path: str, df: pd.DataFrame) -> None: 107 """Exports DataFrame of Motions to CSV file in given directory""" 108 filename = "antraege.csv" 109 os.makedirs(path, exist_ok=True) 110 111 df.to_csv(path_or_buf=path + '/' + filename)
Exports DataFrame of Motions to CSV file in given directory