src.model

 1import streamlit as st
 2from sentence_transformers import SentenceTransformer
 3from transformers import AutoModelForTokenClassification  # type: ignore
 4from transformers import AutoTokenizer  # type: ignore
 5
 6
 7@st.experimental_singleton()
 8def get_model(model_name: str, labels=None):
 9    if labels is None:
10        return AutoModelForTokenClassification.from_pretrained(
11            model_name,
12            output_attentions=True,
13        )  # type: ignore
14    else:
15        id2label = {idx: tag for idx, tag in enumerate(labels)}
16        label2id = {tag: idx for idx, tag in enumerate(labels)}
17        return AutoModelForTokenClassification.from_pretrained(
18            model_name,
19            output_attentions=True,
20            num_labels=len(labels),
21            id2label=id2label,
22            label2id=label2id,
23        )  # type: ignore
24
25
26@st.experimental_singleton()
27def get_encoder(model_name: str, device: str = "cpu"):
28    return SentenceTransformer(model_name, device=device)
29
30
31@st.experimental_singleton()
32def get_tokenizer(tokenizer_name: str):
33    return AutoTokenizer.from_pretrained(tokenizer_name)
@st.experimental_singleton()
def get_model(model_name: str, labels=None)
 8@st.experimental_singleton()
 9def get_model(model_name: str, labels=None):
10    if labels is None:
11        return AutoModelForTokenClassification.from_pretrained(
12            model_name,
13            output_attentions=True,
14        )  # type: ignore
15    else:
16        id2label = {idx: tag for idx, tag in enumerate(labels)}
17        label2id = {tag: idx for idx, tag in enumerate(labels)}
18        return AutoModelForTokenClassification.from_pretrained(
19            model_name,
20            output_attentions=True,
21            num_labels=len(labels),
22            id2label=id2label,
23            label2id=label2id,
24        )  # type: ignore
@st.experimental_singleton()
def get_encoder(model_name: str, device: str = 'cpu')
27@st.experimental_singleton()
28def get_encoder(model_name: str, device: str = "cpu"):
29    return SentenceTransformer(model_name, device=device)
@st.experimental_singleton()
def get_tokenizer(tokenizer_name: str)
32@st.experimental_singleton()
33def get_tokenizer(tokenizer_name: str):
34    return AutoTokenizer.from_pretrained(tokenizer_name)