src.draw
1import random 2import math 3from dataclasses import dataclass 4 5import matplotlib.pyplot as plt 6import numpy as np 7import pandas as pd 8from matplotlib.patches import Rectangle, PathPatch 9from matplotlib.path import Path 10 11from src.boxes import build_boxes, Box, box_from_row, topic_boxes_to_path 12from src.util import WordStreamData, Word, get_max_sudden, load_parlament_data, font_path 13from src.placement import Placement, WordPlacement 14 15 16@dataclass 17class DrawOptions: 18 """Class to set configuration for WordStream Visualisation.""" 19 width: int 20 """Width of visualisation in inches""" 21 height: int 22 """Height of visualisation in inches""" 23 min_font_size: float 24 "minimal font size of words placed in visualisation in pt" 25 max_font_size: float 26 "maximal font size of words placed in visualisation in pt" 27 28 29def init_word_placement(placement: Placement, word: Word) -> WordPlacement: 30 """Initializes WordPlacement by setting the word, the size of the bounding box and the sprite 31 32 Keyword arguments: 33 placement: Placement object where to place the Word in. 34 word: 'Word' object with text frequency and sudden attention measure. 35 """ 36 wp = WordPlacement(0, 0, 0, 0, 0, word=word) 37 placement.get_size(wp) 38 placement.get_sprite(wp) 39 return wp 40 41 42def place_topic(placement: Placement, words: pd.Series, topic_boxes: pd.DataFrame, topic_polygon: Path) -> list[dict]: 43 """Places words in the boxes of a topic. 44 Does not guarantee that all words can be placed 45 46 Keyword Arguments: 47 placement -- Figure where to place the words in 48 words -- Series of words which should be placed 49 topic_boxes -- the boxes where to place the words in 50 topic_polygon -- the polygon one get's by stitching the boxes together 51 """ 52 53 # place all words in the first box then second and so on 54 word_placements = words.apply(lambda ws: list(map(lambda w: init_word_placement(placement, w), ws))).tolist() 55 n_words = words.apply(lambda ws: len(ws)).sum() 56 57 words_tried = 0 58 words_placed = 0 59 while words_tried < n_words: 60 # perform run over next most frequent words in each box 61 for i, words_in_box in enumerate(word_placements): 62 for word_placement in words_in_box: 63 words_tried += 1 64 placed = place(word_placement, placement, box=box_from_row(topic_boxes.iloc[i]), topic_boxes=topic_boxes, topic_polygon=topic_polygon) 65 words_placed += placed 66 67 print(f"Placed {words_placed}/{n_words} in topic!") 68 placements_flat = [w for ws in word_placements for w in ws] 69 return list(map(lambda w: w.to_dict(), filter(lambda w: w.placed, placements_flat))) 70 71 72def place_words(data: WordStreamData, width: int, height: int, font_size=tuple[float, float]) -> dict: 73 """Calculates where the words of WordStreamData are placed and returns result 74 75 Keyword arguments: 76 data -- Fully initialized WordStreamData where sudden and frequency is set. 77 width -- width in inches of area where words should be placed. 78 height -- height in inches of area where words should be placed. 79 font_size -- tupel of minimum and maximum font size (in pt) which should be used. 80 """ 81 82 min_font, max_font = font_size 83 ppi = 200 84 boxes = build_boxes(data, width, height) 85 max_sudden = get_max_sudden(data) 86 placement = Placement(width, height, ppi, max_sudden, min_font, max_font, font_path + "/Rubik-Medium.ttf") 87 word_placements = dict() 88 for topic in data.topics: 89 topic_polygon = topic_boxes_to_path(boxes[topic]) 90 word_placements[topic] = place_topic(placement, data.df[topic], boxes[topic], topic_polygon) 91 92 # fig, ax = plt.subplots(1, 1, figsize=(width, height)) 93 # ax.imshow(np.asarray(placement.img)) 94 # _debug_draw_boxes(ax, boxes, placement, word_placements) 95 # plt.show(dpi=ppi) 96 97 return word_placements 98 99 100def placed_in_polygon(topic_polygon: Path, wp: WordPlacement): 101 """Checks if WordPlacement is within this polygon""" 102 103 word_box = [(wp.x, wp.y), (wp.x + wp.width, wp.y), (wp.x + wp.width, wp.y + wp.height), (wp.x, wp.y + wp.height)] 104 return topic_polygon.contains_path(Path(word_box)) 105 106 107def place(word: WordPlacement, placement: Placement, box: Box, topic_boxes: pd.DataFrame, topic_polygon: Path) -> bool: 108 """Tries to place word in figure by searching for unoccupied space in within the given box and polygon. 109 Returns True of word could be placed, False if not 110 111 Keyword arguments: 112 113 word -- Word which should be placed, Coordinates and placed attribute are set if placed sucessfully 114 placement -- Image where word should be placed in. Is used to check if word collides with any other words 115 box -- Box where word should be placed 116 topic_polygon -- Path for a topic, used to see if word can be placed fully in area of the given topic 117 """ 118 maxDelta = (box.width * box.width + box.height * box.height) ** 0.5 119 startX = box.x + (box.width * (random.random() + .5) / 2) 120 startY = box.y + (box.height * (random.random() + .5) / 2) 121 s = achemedeanSpiral([box.width, box.height]) 122 dt = 1 if random.random() < .5 else -1 123 dt *= 0.5 * word.height 124 t = -dt 125 dxdy, dx, dy = None, None, None 126 word.x = startX 127 word.y = startY 128 word.placed = False 129 130 while True: 131 t += dt 132 dxdy = s(t) 133 if not dxdy: 134 break 135 136 dx = dxdy[0] 137 dy = dxdy[1] 138 139 if max(abs(dx), abs(dy)) >= maxDelta: 140 break 141 142 word.x = startX + dx 143 word.y = startY + dy 144 145 # check if word is placed inside the canvas first 146 if word.x < 0 or word.y < -placement.height / 2 or word.x + word.width > placement.width or word.y + word.height > placement.height / 2: 147 continue 148 # also check if word is placed inside the current box first 149 if not placed_in_polygon(topic_polygon, word): 150 continue 151 152 if placement.check_placement(word): 153 placement.place(word) 154 # print(f"Success placing {word.word.text} with {(word.sprite > 0).sum()} pixels ") 155 return True 156 157 return False 158 159 160def achemedeanSpiral(size): 161 """Function to calculate an Archemedean Spiral 162 initialized with a maximum size. 163 164 returns function where next position can be calculated""" 165 e = size[0] / size[1] 166 167 def spiral(t): 168 return [e * (t * 0.1) * math.cos(t), t * math.sin(t)] 169 170 return spiral 171 172 173def rectangularSpiral(size): 174 """Function to calculate a rectangular Spiral 175 initialized with a maximum size. 176 177 returns function where next position can be calculated 178 """ 179 dy = 4 180 dx = dy * size[0] / size[1] 181 x = 0 182 y = 0 183 184 def spiral(t): 185 sign = -1 if t < 0 else 1 186 switch = (int(math.sqrt(1 + 4 * sign * t)) - sign) & 3 187 nonlocal x, y 188 if switch == 0: 189 x += dx 190 elif switch == 1: 191 y += dy 192 elif switch == 2: 193 x -= dx 194 else: 195 y -= dy 196 197 return spiral 198 199 200spirals = { 201 'achemedean': achemedeanSpiral, 202 'rectangular': rectangularSpiral, 203} 204 205 206def _debug_draw_boxes(ax, boxes: dict[str, pd.DataFrame], placement: Placement, placements: dict[str, list]): 207 """Draws boxes - used for debugging to see if boxes are drawn properly""" 208 for tb, col in zip(boxes.items(), ["red", "green", "blue", "purple"]): 209 topic, topic_boxes = tb 210 for x in topic_boxes.index: 211 box = box_from_row(topic_boxes.loc[x]) 212 x_px = placement.width_map(box.x) 213 y_px = placement.height_map(box.y) 214 height_px = placement.box_height_map(box.height) 215 width_px = placement.box_width_map(box.width) 216 ax.add_patch(Rectangle((x_px, y_px), width_px, height_px, edgecolor=col, facecolor="none", lw=2)) 217 for topic, words in placements.items(): 218 for word in words: 219 x_px = placement.width_map(word["x"]) 220 y_px = placement.height_map(word["y"]) 221 ax.plot(x_px,y_px, 'ro') 222 223 224 225def draw_parlament(options: DrawOptions, legislative_periods: list[str], fulltext: bool = False) -> tuple[dict, WordStreamData]: 226 """ Loads the data for given legislative periods, places them in figure generated by Drawoptions 227 returns the placements and WordStreamData 228 229 Keyword arguments: 230 options -- Configuration for size of figure and font 231 legislative_periods -- List of roman numerals to indicate what data should be loaded 232 fulltext -- True: fulltext of motions is drawn; otherwise eurovoc keywords are used. 233 """ 234 data = load_parlament_data(legislative_periods, fulltext=fulltext) 235 placement = place_words(data, options.width, options.height, font_size=(options.min_font_size, options.max_font_size)) 236 return placement, data 237 238 239if __name__ == '__main__': 240 options = DrawOptions(width=24, height=12, min_font_size=15, max_font_size=35) 241 draw_parlament(options)
17@dataclass 18class DrawOptions: 19 """Class to set configuration for WordStream Visualisation.""" 20 width: int 21 """Width of visualisation in inches""" 22 height: int 23 """Height of visualisation in inches""" 24 min_font_size: float 25 "minimal font size of words placed in visualisation in pt" 26 max_font_size: float 27 "maximal font size of words placed in visualisation in pt"
Class to set configuration for WordStream Visualisation.
30def init_word_placement(placement: Placement, word: Word) -> WordPlacement: 31 """Initializes WordPlacement by setting the word, the size of the bounding box and the sprite 32 33 Keyword arguments: 34 placement: Placement object where to place the Word in. 35 word: 'Word' object with text frequency and sudden attention measure. 36 """ 37 wp = WordPlacement(0, 0, 0, 0, 0, word=word) 38 placement.get_size(wp) 39 placement.get_sprite(wp) 40 return wp
Initializes WordPlacement by setting the word, the size of the bounding box and the sprite
Keyword arguments: placement: Placement object where to place the Word in. word: 'Word' object with text frequency and sudden attention measure.
43def place_topic(placement: Placement, words: pd.Series, topic_boxes: pd.DataFrame, topic_polygon: Path) -> list[dict]: 44 """Places words in the boxes of a topic. 45 Does not guarantee that all words can be placed 46 47 Keyword Arguments: 48 placement -- Figure where to place the words in 49 words -- Series of words which should be placed 50 topic_boxes -- the boxes where to place the words in 51 topic_polygon -- the polygon one get's by stitching the boxes together 52 """ 53 54 # place all words in the first box then second and so on 55 word_placements = words.apply(lambda ws: list(map(lambda w: init_word_placement(placement, w), ws))).tolist() 56 n_words = words.apply(lambda ws: len(ws)).sum() 57 58 words_tried = 0 59 words_placed = 0 60 while words_tried < n_words: 61 # perform run over next most frequent words in each box 62 for i, words_in_box in enumerate(word_placements): 63 for word_placement in words_in_box: 64 words_tried += 1 65 placed = place(word_placement, placement, box=box_from_row(topic_boxes.iloc[i]), topic_boxes=topic_boxes, topic_polygon=topic_polygon) 66 words_placed += placed 67 68 print(f"Placed {words_placed}/{n_words} in topic!") 69 placements_flat = [w for ws in word_placements for w in ws] 70 return list(map(lambda w: w.to_dict(), filter(lambda w: w.placed, placements_flat)))
Places words in the boxes of a topic. Does not guarantee that all words can be placed
Keyword Arguments: placement -- Figure where to place the words in words -- Series of words which should be placed topic_boxes -- the boxes where to place the words in topic_polygon -- the polygon one get's by stitching the boxes together
73def place_words(data: WordStreamData, width: int, height: int, font_size=tuple[float, float]) -> dict: 74 """Calculates where the words of WordStreamData are placed and returns result 75 76 Keyword arguments: 77 data -- Fully initialized WordStreamData where sudden and frequency is set. 78 width -- width in inches of area where words should be placed. 79 height -- height in inches of area where words should be placed. 80 font_size -- tupel of minimum and maximum font size (in pt) which should be used. 81 """ 82 83 min_font, max_font = font_size 84 ppi = 200 85 boxes = build_boxes(data, width, height) 86 max_sudden = get_max_sudden(data) 87 placement = Placement(width, height, ppi, max_sudden, min_font, max_font, font_path + "/Rubik-Medium.ttf") 88 word_placements = dict() 89 for topic in data.topics: 90 topic_polygon = topic_boxes_to_path(boxes[topic]) 91 word_placements[topic] = place_topic(placement, data.df[topic], boxes[topic], topic_polygon) 92 93 # fig, ax = plt.subplots(1, 1, figsize=(width, height)) 94 # ax.imshow(np.asarray(placement.img)) 95 # _debug_draw_boxes(ax, boxes, placement, word_placements) 96 # plt.show(dpi=ppi) 97 98 return word_placements
Calculates where the words of WordStreamData are placed and returns result
Keyword arguments: data -- Fully initialized WordStreamData where sudden and frequency is set. width -- width in inches of area where words should be placed. height -- height in inches of area where words should be placed. font_size -- tupel of minimum and maximum font size (in pt) which should be used.
101def placed_in_polygon(topic_polygon: Path, wp: WordPlacement): 102 """Checks if WordPlacement is within this polygon""" 103 104 word_box = [(wp.x, wp.y), (wp.x + wp.width, wp.y), (wp.x + wp.width, wp.y + wp.height), (wp.x, wp.y + wp.height)] 105 return topic_polygon.contains_path(Path(word_box))
Checks if WordPlacement is within this polygon
108def place(word: WordPlacement, placement: Placement, box: Box, topic_boxes: pd.DataFrame, topic_polygon: Path) -> bool: 109 """Tries to place word in figure by searching for unoccupied space in within the given box and polygon. 110 Returns True of word could be placed, False if not 111 112 Keyword arguments: 113 114 word -- Word which should be placed, Coordinates and placed attribute are set if placed sucessfully 115 placement -- Image where word should be placed in. Is used to check if word collides with any other words 116 box -- Box where word should be placed 117 topic_polygon -- Path for a topic, used to see if word can be placed fully in area of the given topic 118 """ 119 maxDelta = (box.width * box.width + box.height * box.height) ** 0.5 120 startX = box.x + (box.width * (random.random() + .5) / 2) 121 startY = box.y + (box.height * (random.random() + .5) / 2) 122 s = achemedeanSpiral([box.width, box.height]) 123 dt = 1 if random.random() < .5 else -1 124 dt *= 0.5 * word.height 125 t = -dt 126 dxdy, dx, dy = None, None, None 127 word.x = startX 128 word.y = startY 129 word.placed = False 130 131 while True: 132 t += dt 133 dxdy = s(t) 134 if not dxdy: 135 break 136 137 dx = dxdy[0] 138 dy = dxdy[1] 139 140 if max(abs(dx), abs(dy)) >= maxDelta: 141 break 142 143 word.x = startX + dx 144 word.y = startY + dy 145 146 # check if word is placed inside the canvas first 147 if word.x < 0 or word.y < -placement.height / 2 or word.x + word.width > placement.width or word.y + word.height > placement.height / 2: 148 continue 149 # also check if word is placed inside the current box first 150 if not placed_in_polygon(topic_polygon, word): 151 continue 152 153 if placement.check_placement(word): 154 placement.place(word) 155 # print(f"Success placing {word.word.text} with {(word.sprite > 0).sum()} pixels ") 156 return True 157 158 return False
Tries to place word in figure by searching for unoccupied space in within the given box and polygon. Returns True of word could be placed, False if not
Keyword arguments:
word -- Word which should be placed, Coordinates and placed attribute are set if placed sucessfully placement -- Image where word should be placed in. Is used to check if word collides with any other words box -- Box where word should be placed topic_polygon -- Path for a topic, used to see if word can be placed fully in area of the given topic
161def achemedeanSpiral(size): 162 """Function to calculate an Archemedean Spiral 163 initialized with a maximum size. 164 165 returns function where next position can be calculated""" 166 e = size[0] / size[1] 167 168 def spiral(t): 169 return [e * (t * 0.1) * math.cos(t), t * math.sin(t)] 170 171 return spiral
Function to calculate an Archemedean Spiral initialized with a maximum size.
returns function where next position can be calculated
174def rectangularSpiral(size): 175 """Function to calculate a rectangular Spiral 176 initialized with a maximum size. 177 178 returns function where next position can be calculated 179 """ 180 dy = 4 181 dx = dy * size[0] / size[1] 182 x = 0 183 y = 0 184 185 def spiral(t): 186 sign = -1 if t < 0 else 1 187 switch = (int(math.sqrt(1 + 4 * sign * t)) - sign) & 3 188 nonlocal x, y 189 if switch == 0: 190 x += dx 191 elif switch == 1: 192 y += dy 193 elif switch == 2: 194 x -= dx 195 else: 196 y -= dy 197 198 return spiral
Function to calculate a rectangular Spiral initialized with a maximum size.
returns function where next position can be calculated
226def draw_parlament(options: DrawOptions, legislative_periods: list[str], fulltext: bool = False) -> tuple[dict, WordStreamData]: 227 """ Loads the data for given legislative periods, places them in figure generated by Drawoptions 228 returns the placements and WordStreamData 229 230 Keyword arguments: 231 options -- Configuration for size of figure and font 232 legislative_periods -- List of roman numerals to indicate what data should be loaded 233 fulltext -- True: fulltext of motions is drawn; otherwise eurovoc keywords are used. 234 """ 235 data = load_parlament_data(legislative_periods, fulltext=fulltext) 236 placement = place_words(data, options.width, options.height, font_size=(options.min_font_size, options.max_font_size)) 237 return placement, data
Loads the data for given legislative periods, places them in figure generated by Drawoptions returns the placements and WordStreamData
Keyword arguments: options -- Configuration for size of figure and font legislative_periods -- List of roman numerals to indicate what data should be loaded fulltext -- True: fulltext of motions is drawn; otherwise eurovoc keywords are used.