src.draw

View Source

  1import random
  2import math
  3from dataclasses import dataclass
  4
  5import matplotlib.pyplot as plt
  6import numpy as np
  7import pandas as pd
  8from matplotlib.patches import Rectangle, PathPatch
  9from matplotlib.path import Path
 10
 11from src.boxes import build_boxes, Box, box_from_row, topic_boxes_to_path
 12from src.util import WordStreamData, Word, get_max_sudden, load_parlament_data, font_path
 13from src.placement import Placement, WordPlacement
 14
 15
 16@dataclass
 17class DrawOptions:
 18    """Class to set configuration for WordStream Visualisation."""
 19    width: int
 20    """Width of visualisation in inches"""
 21    height: int
 22    """Height of visualisation in inches"""
 23    min_font_size: float
 24    "minimal font size of words placed in visualisation in pt"
 25    max_font_size: float
 26    "maximal font size of words placed in visualisation in pt"
 27
 28
 29def init_word_placement(placement: Placement, word: Word) -> WordPlacement:
 30    """Initializes WordPlacement by setting the word, the size of the bounding box and the sprite
 31
 32    Keyword arguments:
 33    placement: Placement object where to place the Word in.
 34    word: 'Word' object with text frequency and sudden attention measure.
 35    """
 36    wp = WordPlacement(0, 0, 0, 0, 0, word=word)
 37    placement.get_size(wp)
 38    placement.get_sprite(wp)
 39    return wp
 40
 41
 42def place_topic(placement: Placement, words: pd.Series, topic_boxes: pd.DataFrame, topic_polygon: Path) -> list[dict]:
 43    """Places words in the boxes of a topic.
 44    Does not guarantee that all words can be placed
 45
 46    Keyword Arguments:
 47    placement -- Figure where to place the words in
 48    words -- Series of words which should be placed
 49    topic_boxes -- the boxes where to place the words in
 50    topic_polygon -- the polygon one get's by stitching the boxes together
 51    """
 52
 53    # place all words in the first box then second and so on
 54    word_placements = words.apply(lambda ws: list(map(lambda w: init_word_placement(placement, w), ws))).tolist()
 55    n_words = words.apply(lambda ws: len(ws)).sum()
 56
 57    words_tried = 0
 58    words_placed = 0
 59    while words_tried < n_words:
 60        # perform run over next most frequent words in each box
 61        for i, words_in_box in enumerate(word_placements):
 62            for word_placement in words_in_box:
 63                words_tried += 1
 64                placed = place(word_placement, placement, box=box_from_row(topic_boxes.iloc[i]), topic_boxes=topic_boxes, topic_polygon=topic_polygon)
 65                words_placed += placed
 66
 67    print(f"Placed {words_placed}/{n_words} in topic!")
 68    placements_flat = [w for ws in word_placements for w in ws]
 69    return list(map(lambda w: w.to_dict(), filter(lambda w: w.placed, placements_flat)))
 70
 71
 72def place_words(data: WordStreamData, width: int, height: int, font_size=tuple[float, float]) -> dict:
 73    """Calculates where the words of WordStreamData are placed and returns result
 74
 75    Keyword arguments:
 76    data -- Fully initialized WordStreamData where sudden and frequency is set.
 77    width -- width in inches of area where words should be placed.
 78    height -- height in inches of area where words should be placed.
 79    font_size -- tupel of minimum and maximum font size (in pt) which should be used.
 80    """
 81
 82    min_font, max_font = font_size
 83    ppi = 200
 84    boxes = build_boxes(data, width, height)
 85    max_sudden = get_max_sudden(data)
 86    placement = Placement(width, height, ppi, max_sudden, min_font, max_font, font_path + "/Rubik-Medium.ttf")
 87    word_placements = dict()
 88    for topic in data.topics:
 89        topic_polygon = topic_boxes_to_path(boxes[topic])
 90        word_placements[topic] = place_topic(placement, data.df[topic], boxes[topic], topic_polygon)
 91
 92    # fig, ax = plt.subplots(1, 1, figsize=(width, height))
 93    # ax.imshow(np.asarray(placement.img))
 94    # _debug_draw_boxes(ax, boxes, placement, word_placements)
 95    # plt.show(dpi=ppi)
 96
 97    return word_placements
 98
 99
100def placed_in_polygon(topic_polygon: Path, wp: WordPlacement):
101    """Checks if WordPlacement is within this polygon"""
102
103    word_box = [(wp.x, wp.y), (wp.x + wp.width, wp.y), (wp.x + wp.width, wp.y + wp.height), (wp.x, wp.y + wp.height)]
104    return topic_polygon.contains_path(Path(word_box))
105
106
107def place(word: WordPlacement, placement: Placement, box: Box, topic_boxes: pd.DataFrame, topic_polygon: Path) -> bool:
108    """Tries to place word in figure by searching for unoccupied space in within the given box and polygon.
109    Returns True of word could be placed, False if not
110
111    Keyword arguments:
112
113    word -- Word which should be placed, Coordinates and placed attribute are set if placed sucessfully
114    placement -- Image where word should be placed in. Is used to check if word collides with any other words
115    box -- Box where word should be placed
116    topic_polygon -- Path for a topic, used to see if word can be placed fully in area of the given topic
117    """
118    maxDelta = (box.width * box.width + box.height * box.height) ** 0.5
119    startX = box.x + (box.width * (random.random() + .5) / 2)
120    startY = box.y + (box.height * (random.random() + .5) / 2)
121    s = achemedeanSpiral([box.width, box.height])
122    dt = 1 if random.random() < .5 else -1
123    dt *= 0.5 * word.height
124    t = -dt
125    dxdy, dx, dy = None, None, None
126    word.x = startX
127    word.y = startY
128    word.placed = False
129
130    while True:
131        t += dt
132        dxdy = s(t)
133        if not dxdy:
134            break
135
136        dx = dxdy[0]
137        dy = dxdy[1]
138
139        if max(abs(dx), abs(dy)) >= maxDelta:
140            break
141
142        word.x = startX + dx
143        word.y = startY + dy
144
145        # check if word is placed inside the canvas first
146        if word.x < 0 or word.y < -placement.height / 2 or word.x + word.width > placement.width or word.y + word.height > placement.height / 2:
147            continue
148        # also check if word is placed inside the current box first
149        if not placed_in_polygon(topic_polygon, word):
150            continue
151
152        if placement.check_placement(word):
153            placement.place(word)
154            # print(f"Success placing {word.word.text} with {(word.sprite > 0).sum()} pixels ")
155            return True
156
157    return False
158
159
160def achemedeanSpiral(size):
161    """Function to calculate an Archemedean Spiral
162    initialized with a maximum size.
163
164    returns function where next position can be calculated"""
165    e = size[0] / size[1]
166
167    def spiral(t):
168        return [e * (t * 0.1) * math.cos(t), t * math.sin(t)]
169
170    return spiral
171
172
173def rectangularSpiral(size):
174    """Function to calculate a rectangular Spiral
175    initialized with a maximum size.
176
177    returns function where next position can be calculated
178    """
179    dy = 4
180    dx = dy * size[0] / size[1]
181    x = 0
182    y = 0
183
184    def spiral(t):
185        sign = -1 if t < 0 else 1
186        switch = (int(math.sqrt(1 + 4 * sign * t)) - sign) & 3
187        nonlocal x, y
188        if switch == 0:
189            x += dx
190        elif switch == 1:
191            y += dy
192        elif switch == 2:
193            x -= dx
194        else:
195            y -= dy
196
197    return spiral
198
199
200spirals = {
201    'achemedean': achemedeanSpiral,
202    'rectangular': rectangularSpiral,
203}
204
205
206def _debug_draw_boxes(ax, boxes: dict[str, pd.DataFrame], placement: Placement, placements: dict[str, list]):
207    """Draws boxes - used for debugging to see if boxes are drawn properly"""
208    for tb, col in zip(boxes.items(), ["red", "green", "blue", "purple"]):
209        topic, topic_boxes = tb
210        for x in topic_boxes.index:
211            box = box_from_row(topic_boxes.loc[x])
212            x_px = placement.width_map(box.x)
213            y_px = placement.height_map(box.y)
214            height_px = placement.box_height_map(box.height)
215            width_px = placement.box_width_map(box.width)
216            ax.add_patch(Rectangle((x_px, y_px), width_px, height_px, edgecolor=col, facecolor="none", lw=2))
217    for topic, words in placements.items():
218        for word in words:
219            x_px = placement.width_map(word["x"])
220            y_px = placement.height_map(word["y"])
221            ax.plot(x_px,y_px, 'ro')
222
223
224
225def draw_parlament(options: DrawOptions, legislative_periods: list[str], fulltext: bool = False) -> tuple[dict, WordStreamData]:
226    """ Loads the data for given legislative periods, places them in figure generated by Drawoptions
227    returns the placements and WordStreamData
228
229    Keyword arguments:
230    options -- Configuration for size of figure and font
231    legislative_periods -- List of roman numerals to indicate what data should be loaded
232    fulltext -- True: fulltext of motions is drawn; otherwise eurovoc keywords are used.
233    """
234    data = load_parlament_data(legislative_periods, fulltext=fulltext)
235    placement = place_words(data, options.width, options.height, font_size=(options.min_font_size, options.max_font_size))
236    return placement, data
237
238
239if __name__ == '__main__':
240    options = DrawOptions(width=24, height=12, min_font_size=15, max_font_size=35)
241    draw_parlament(options)

@dataclass

class DrawOptions: View Source

17@dataclass
18class DrawOptions:
19    """Class to set configuration for WordStream Visualisation."""
20    width: int
21    """Width of visualisation in inches"""
22    height: int
23    """Height of visualisation in inches"""
24    min_font_size: float
25    "minimal font size of words placed in visualisation in pt"
26    max_font_size: float
27    "maximal font size of words placed in visualisation in pt"

Class to set configuration for WordStream Visualisation.

DrawOptions(width: int, height: int, min_font_size: float, max_font_size: float)

width: int

Width of visualisation in inches

height: int

Height of visualisation in inches

min_font_size: float

minimal font size of words placed in visualisation in pt

max_font_size: float

maximal font size of words placed in visualisation in pt

def init_word_placement( placement: src.placement.Placement, word: src.util.Word) -> src.placement.WordPlacement: View Source

30def init_word_placement(placement: Placement, word: Word) -> WordPlacement:
31    """Initializes WordPlacement by setting the word, the size of the bounding box and the sprite
32
33    Keyword arguments:
34    placement: Placement object where to place the Word in.
35    word: 'Word' object with text frequency and sudden attention measure.
36    """
37    wp = WordPlacement(0, 0, 0, 0, 0, word=word)
38    placement.get_size(wp)
39    placement.get_sprite(wp)
40    return wp

Initializes WordPlacement by setting the word, the size of the bounding box and the sprite

Keyword arguments: placement: Placement object where to place the Word in. word: 'Word' object with text frequency and sudden attention measure.

def place_topic( placement: src.placement.Placement, words: pandas.core.series.Series, topic_boxes: pandas.core.frame.DataFrame, topic_polygon: matplotlib.path.Path) -> list[dict]: View Source

43def place_topic(placement: Placement, words: pd.Series, topic_boxes: pd.DataFrame, topic_polygon: Path) -> list[dict]:
44    """Places words in the boxes of a topic.
45    Does not guarantee that all words can be placed
46
47    Keyword Arguments:
48    placement -- Figure where to place the words in
49    words -- Series of words which should be placed
50    topic_boxes -- the boxes where to place the words in
51    topic_polygon -- the polygon one get's by stitching the boxes together
52    """
53
54    # place all words in the first box then second and so on
55    word_placements = words.apply(lambda ws: list(map(lambda w: init_word_placement(placement, w), ws))).tolist()
56    n_words = words.apply(lambda ws: len(ws)).sum()
57
58    words_tried = 0
59    words_placed = 0
60    while words_tried < n_words:
61        # perform run over next most frequent words in each box
62        for i, words_in_box in enumerate(word_placements):
63            for word_placement in words_in_box:
64                words_tried += 1
65                placed = place(word_placement, placement, box=box_from_row(topic_boxes.iloc[i]), topic_boxes=topic_boxes, topic_polygon=topic_polygon)
66                words_placed += placed
67
68    print(f"Placed {words_placed}/{n_words} in topic!")
69    placements_flat = [w for ws in word_placements for w in ws]
70    return list(map(lambda w: w.to_dict(), filter(lambda w: w.placed, placements_flat)))

Places words in the boxes of a topic. Does not guarantee that all words can be placed

Keyword Arguments: placement -- Figure where to place the words in words -- Series of words which should be placed topic_boxes -- the boxes where to place the words in topic_polygon -- the polygon one get's by stitching the boxes together

def place_words( data: src.util.WordStreamData, width: int, height: int, font_size=tuple[float, float]) -> dict: View Source

73def place_words(data: WordStreamData, width: int, height: int, font_size=tuple[float, float]) -> dict:
74    """Calculates where the words of WordStreamData are placed and returns result
75
76    Keyword arguments:
77    data -- Fully initialized WordStreamData where sudden and frequency is set.
78    width -- width in inches of area where words should be placed.
79    height -- height in inches of area where words should be placed.
80    font_size -- tupel of minimum and maximum font size (in pt) which should be used.
81    """
82
83    min_font, max_font = font_size
84    ppi = 200
85    boxes = build_boxes(data, width, height)
86    max_sudden = get_max_sudden(data)
87    placement = Placement(width, height, ppi, max_sudden, min_font, max_font, font_path + "/Rubik-Medium.ttf")
88    word_placements = dict()
89    for topic in data.topics:
90        topic_polygon = topic_boxes_to_path(boxes[topic])
91        word_placements[topic] = place_topic(placement, data.df[topic], boxes[topic], topic_polygon)
92
93    # fig, ax = plt.subplots(1, 1, figsize=(width, height))
94    # ax.imshow(np.asarray(placement.img))
95    # _debug_draw_boxes(ax, boxes, placement, word_placements)
96    # plt.show(dpi=ppi)
97
98    return word_placements

Calculates where the words of WordStreamData are placed and returns result

Keyword arguments: data -- Fully initialized WordStreamData where sudden and frequency is set. width -- width in inches of area where words should be placed. height -- height in inches of area where words should be placed. font_size -- tupel of minimum and maximum font size (in pt) which should be used.

def placed_in_polygon(topic_polygon: matplotlib.path.Path, wp: src.placement.WordPlacement): View Source

101def placed_in_polygon(topic_polygon: Path, wp: WordPlacement):
102    """Checks if WordPlacement is within this polygon"""
103
104    word_box = [(wp.x, wp.y), (wp.x + wp.width, wp.y), (wp.x + wp.width, wp.y + wp.height), (wp.x, wp.y + wp.height)]
105    return topic_polygon.contains_path(Path(word_box))

Checks if WordPlacement is within this polygon

def place( word: src.placement.WordPlacement, placement: src.placement.Placement, box: src.boxes.Box, topic_boxes: pandas.core.frame.DataFrame, topic_polygon: matplotlib.path.Path) -> bool: View Source

108def place(word: WordPlacement, placement: Placement, box: Box, topic_boxes: pd.DataFrame, topic_polygon: Path) -> bool:
109    """Tries to place word in figure by searching for unoccupied space in within the given box and polygon.
110    Returns True of word could be placed, False if not
111
112    Keyword arguments:
113
114    word -- Word which should be placed, Coordinates and placed attribute are set if placed sucessfully
115    placement -- Image where word should be placed in. Is used to check if word collides with any other words
116    box -- Box where word should be placed
117    topic_polygon -- Path for a topic, used to see if word can be placed fully in area of the given topic
118    """
119    maxDelta = (box.width * box.width + box.height * box.height) ** 0.5
120    startX = box.x + (box.width * (random.random() + .5) / 2)
121    startY = box.y + (box.height * (random.random() + .5) / 2)
122    s = achemedeanSpiral([box.width, box.height])
123    dt = 1 if random.random() < .5 else -1
124    dt *= 0.5 * word.height
125    t = -dt
126    dxdy, dx, dy = None, None, None
127    word.x = startX
128    word.y = startY
129    word.placed = False
130
131    while True:
132        t += dt
133        dxdy = s(t)
134        if not dxdy:
135            break
136
137        dx = dxdy[0]
138        dy = dxdy[1]
139
140        if max(abs(dx), abs(dy)) >= maxDelta:
141            break
142
143        word.x = startX + dx
144        word.y = startY + dy
145
146        # check if word is placed inside the canvas first
147        if word.x < 0 or word.y < -placement.height / 2 or word.x + word.width > placement.width or word.y + word.height > placement.height / 2:
148            continue
149        # also check if word is placed inside the current box first
150        if not placed_in_polygon(topic_polygon, word):
151            continue
152
153        if placement.check_placement(word):
154            placement.place(word)
155            # print(f"Success placing {word.word.text} with {(word.sprite > 0).sum()} pixels ")
156            return True
157
158    return False

Tries to place word in figure by searching for unoccupied space in within the given box and polygon. Returns True of word could be placed, False if not

Keyword arguments:

word -- Word which should be placed, Coordinates and placed attribute are set if placed sucessfully placement -- Image where word should be placed in. Is used to check if word collides with any other words box -- Box where word should be placed topic_polygon -- Path for a topic, used to see if word can be placed fully in area of the given topic

def achemedeanSpiral(size): View Source

161def achemedeanSpiral(size):
162    """Function to calculate an Archemedean Spiral
163    initialized with a maximum size.
164
165    returns function where next position can be calculated"""
166    e = size[0] / size[1]
167
168    def spiral(t):
169        return [e * (t * 0.1) * math.cos(t), t * math.sin(t)]
170
171    return spiral

Function to calculate an Archemedean Spiral initialized with a maximum size.

returns function where next position can be calculated

def rectangularSpiral(size): View Source

174def rectangularSpiral(size):
175    """Function to calculate a rectangular Spiral
176    initialized with a maximum size.
177
178    returns function where next position can be calculated
179    """
180    dy = 4
181    dx = dy * size[0] / size[1]
182    x = 0
183    y = 0
184
185    def spiral(t):
186        sign = -1 if t < 0 else 1
187        switch = (int(math.sqrt(1 + 4 * sign * t)) - sign) & 3
188        nonlocal x, y
189        if switch == 0:
190            x += dx
191        elif switch == 1:
192            y += dy
193        elif switch == 2:
194            x -= dx
195        else:
196            y -= dy
197
198    return spiral

Function to calculate a rectangular Spiral initialized with a maximum size.

returns function where next position can be calculated

def draw_parlament( options: src.draw.DrawOptions, legislative_periods: list[str], fulltext: bool = False) -> tuple[dict, src.util.WordStreamData]: View Source

226def draw_parlament(options: DrawOptions, legislative_periods: list[str], fulltext: bool = False) -> tuple[dict, WordStreamData]:
227    """ Loads the data for given legislative periods, places them in figure generated by Drawoptions
228    returns the placements and WordStreamData
229
230    Keyword arguments:
231    options -- Configuration for size of figure and font
232    legislative_periods -- List of roman numerals to indicate what data should be loaded
233    fulltext -- True: fulltext of motions is drawn; otherwise eurovoc keywords are used.
234    """
235    data = load_parlament_data(legislative_periods, fulltext=fulltext)
236    placement = place_words(data, options.width, options.height, font_size=(options.min_font_size, options.max_font_size))
237    return placement, data

Loads the data for given legislative periods, places them in figure generated by Drawoptions returns the placements and WordStreamData

Keyword arguments: options -- Configuration for size of figure and font legislative_periods -- List of roman numerals to indicate what data should be loaded fulltext -- True: fulltext of motions is drawn; otherwise eurovoc keywords are used.