src.subpages.probing

A very direct and interactive way to test your model is by providing it with a list of text inputs and then inspecting the model outputs. The application features a multiline text field so the user can input multiple texts separated by newlines. For each text, the app will show a data frame containing the tokenized string, token predictions, probabilities and a visual indicator for low probability predictions -- these are the ones you should inspect first for prediction errors.

 1"""
 2A very direct and interactive way to test your model is by providing it with a list of text inputs and then inspecting the model outputs. The application features a multiline text field so the user can input multiple texts separated by newlines. For each text, the app will show a data frame containing the tokenized string, token predictions, probabilities and a visual indicator for low probability predictions -- these are the ones you should inspect first for prediction errors.
 3"""
 4import streamlit as st
 5
 6from src.subpages.page import Context, Page
 7from src.utils import device, tag_text
 8
 9_DEFAULT_SENTENCES = """
10Damit hatte er auf ihr letztes , völlig schiefgelaufenes Geschäftsessen angespielt .
11Damit einher geht übrigens auch , dass Marcella , Collocinis Tochter , keine wie auch immer geartete strafrechtliche Verfolgung zu befürchten hat .
12Nach dem Bell ’ schen Theorem , einer Physik jenseits der Quanten , ist die Welt , die wir für real halten , nicht objektivierbar .
13Dazu muss man wiederum wissen , dass die Aussagekraft von Tests , neben der Sensitivität und Spezifität , ganz entscheidend von der Vortestwahrscheinlichkeit abhängt .
14Haben Sie sich schon eingelebt ? « erkundigte er sich .
15Das Auto ein Totalschaden , mein Beifahrer ein weinender Jammerlappen .
16Seltsam , wunderte sie sich , dass das Stück nach mehr als eineinhalb Jahrhunderten noch so gut in Schuss ist .
17Oder auf den Strich gehen , Strümpfe stricken , Geld hamstern .
18Und Allah ist Allumfassend Allwissend .
19Und Pedro Moacir redete weiter : » Verzicht , Pater Antonio , Verzicht , zu großer Schmerz über Verzicht , Sehnsucht , die sich nicht erfüllt , die sich nicht erfüllen kann , das sind Qualen , die ein Verstummen nach sich ziehen können , oder Härte .
20Mama-San ging mittlerweile fast ausnahmslos nur mit Wei an ihrer Seite aus dem Haus , kaum je mit einem der Mädchen und niemals allein.
21""".strip()
22_DEFAULT_SENTENCES = """
23Elon Musk’s Berghain humiliation — I know the feeling
24Musk was also seen at a local spot called Sisyphos celebrating entrepreneur Adeo Ressi's birthday, according to The Times.
25""".strip()
26
27
28class ProbingPage(Page):
29    name = "Probing"
30    icon = "fonts"
31
32    def get_widget_defaults(self):
33        return {"probing_textarea": _DEFAULT_SENTENCES}
34
35    def render(self, context: Context):
36        st.title("🔠 Interactive Probing")
37
38        with st.expander("💡", expanded=True):
39            st.write(
40                "A very direct and interactive way to test your model is by providing it with a list of text inputs and then inspecting the model outputs. The application features a multiline text field so the user can input multiple texts separated by newlines. For each text, the app will show a data frame containing the tokenized string, token predictions, probabilities and a visual indicator for low probability predictions -- these are the ones you should inspect first for prediction errors."
41            )
42
43        sentences = st.text_area("Sentences", height=200, key="probing_textarea")
44        if not sentences.strip():
45            return
46        sentences = [sentence.strip() for sentence in sentences.splitlines()]
47
48        for sent in sentences:
49            sent = sent.replace(",", "").replace("  ", " ")
50            with st.expander(sent):
51                tagged = tag_text(sent, context.tokenizer, context.model, device)
52                tagged = tagged.astype(str)
53                tagged["probs"] = tagged["probs"].apply(lambda x: x[:-2])
54                tagged["check"] = tagged["probs"].apply(
55                    lambda x: "✅ ✅" if int(x) < 100 else "✅" if int(x) < 1000 else ""
56                )
57                st.dataframe(tagged.drop("hidden_states", axis=1).T)
class ProbingPage(src.subpages.page.Page):
29class ProbingPage(Page):
30    name = "Probing"
31    icon = "fonts"
32
33    def get_widget_defaults(self):
34        return {"probing_textarea": _DEFAULT_SENTENCES}
35
36    def render(self, context: Context):
37        st.title("🔠 Interactive Probing")
38
39        with st.expander("💡", expanded=True):
40            st.write(
41                "A very direct and interactive way to test your model is by providing it with a list of text inputs and then inspecting the model outputs. The application features a multiline text field so the user can input multiple texts separated by newlines. For each text, the app will show a data frame containing the tokenized string, token predictions, probabilities and a visual indicator for low probability predictions -- these are the ones you should inspect first for prediction errors."
42            )
43
44        sentences = st.text_area("Sentences", height=200, key="probing_textarea")
45        if not sentences.strip():
46            return
47        sentences = [sentence.strip() for sentence in sentences.splitlines()]
48
49        for sent in sentences:
50            sent = sent.replace(",", "").replace("  ", " ")
51            with st.expander(sent):
52                tagged = tag_text(sent, context.tokenizer, context.model, device)
53                tagged = tagged.astype(str)
54                tagged["probs"] = tagged["probs"].apply(lambda x: x[:-2])
55                tagged["check"] = tagged["probs"].apply(
56                    lambda x: "✅ ✅" if int(x) < 100 else "✅" if int(x) < 1000 else ""
57                )
58                st.dataframe(tagged.drop("hidden_states", axis=1).T)

Base class for all pages.

ProbingPage()
name: str = 'Probing'
icon: str = 'fonts'
def get_widget_defaults(self)
33    def get_widget_defaults(self):
34        return {"probing_textarea": _DEFAULT_SENTENCES}

This function holds the default settings for all the page's widgets.

Returns

dict: A dictionary of widget defaults, where the keys are the widget names and the values are the default.

def render(self, context: src.subpages.page.Context)
36    def render(self, context: Context):
37        st.title("🔠 Interactive Probing")
38
39        with st.expander("💡", expanded=True):
40            st.write(
41                "A very direct and interactive way to test your model is by providing it with a list of text inputs and then inspecting the model outputs. The application features a multiline text field so the user can input multiple texts separated by newlines. For each text, the app will show a data frame containing the tokenized string, token predictions, probabilities and a visual indicator for low probability predictions -- these are the ones you should inspect first for prediction errors."
42            )
43
44        sentences = st.text_area("Sentences", height=200, key="probing_textarea")
45        if not sentences.strip():
46            return
47        sentences = [sentence.strip() for sentence in sentences.splitlines()]
48
49        for sent in sentences:
50            sent = sent.replace(",", "").replace("  ", " ")
51            with st.expander(sent):
52                tagged = tag_text(sent, context.tokenizer, context.model, device)
53                tagged = tagged.astype(str)
54                tagged["probs"] = tagged["probs"].apply(lambda x: x[:-2])
55                tagged["check"] = tagged["probs"].apply(
56                    lambda x: "✅ ✅" if int(x) < 100 else "✅" if int(x) < 1000 else ""
57                )
58                st.dataframe(tagged.drop("hidden_states", axis=1).T)

This function renders the page.