đ RUPunct_small
RUPunct_small is the smallest model in the RUPunct family. It is ideal for simple texts and scenarios where high-speed operation on CPU is required.
đ Quick Start
RUPunct_small is designed to add punctuation to text efficiently, especially suitable for running on CPU with high speed.
đģ Usage Examples
Basic Usage
from transformers import pipeline
from transformers import AutoTokenizer
pt = "RUPunct/RUPunct_small"
tk = AutoTokenizer.from_pretrained(pt, strip_accents=False, add_prefix_space=True)
classifier = pipeline("ner", model=pt, tokenizer=tk, aggregation_strategy="first")
def process_token(token, label):
if label == "LOWER_O":
return token
if label == "LOWER_PERIOD":
return token + "."
if label == "LOWER_COMMA":
return token + ","
if label == "LOWER_QUESTION":
return token + "?"
if label == "LOWER_TIRE":
return token + "â"
if label == "LOWER_DVOETOCHIE":
return token + ":"
if label == "LOWER_VOSKL":
return token + "!"
if label == "LOWER_PERIODCOMMA":
return token + ";"
if label == "LOWER_DEFIS":
return token + "-"
if label == "LOWER_MNOGOTOCHIE":
return token + "..."
if label == "LOWER_QUESTIONVOSKL":
return token + "?!"
if label == "UPPER_O":
return token.capitalize()
if label == "UPPER_PERIOD":
return token.capitalize() + "."
if label == "UPPER_COMMA":
return token.capitalize() + ","
if label == "UPPER_QUESTION":
return token.capitalize() + "?"
if label == "UPPER_TIRE":
return token.capitalize() + " â"
if label == "UPPER_DVOETOCHIE":
return token.capitalize() + ":"
if label == "UPPER_VOSKL":
return token.capitalize() + "!"
if label == "UPPER_PERIODCOMMA":
return token.capitalize() + ";"
if label == "UPPER_DEFIS":
return token.capitalize() + "-"
if label == "UPPER_MNOGOTOCHIE":
return token.capitalize() + "..."
if label == "UPPER_QUESTIONVOSKL":
return token.capitalize() + "?!"
if label == "UPPER_TOTAL_O":
return token.upper()
if label == "UPPER_TOTAL_PERIOD":
return token.upper() + "."
if label == "UPPER_TOTAL_COMMA":
return token.upper() + ","
if label == "UPPER_TOTAL_QUESTION":
return token.upper() + "?"
if label == "UPPER_TOTAL_TIRE":
return token.upper() + " â"
if label == "UPPER_TOTAL_DVOETOCHIE":
return token.upper() + ":"
if label == "UPPER_TOTAL_VOSKL":
return token.upper() + "!"
if label == "UPPER_TOTAL_PERIODCOMMA":
return token.upper() + ";"
if label == "UPPER_TOTAL_DEFIS":
return token.upper() + "-"
if label == "UPPER_TOTAL_MNOGOTOCHIE":
return token.upper() + "..."
if label == "UPPER_TOTAL_QUESTIONVOSKL":
return token.upper() + "?!"
while 1:
input_text = input(":> ")
preds = classifier(input_text)
output = ""
for item in preds:
output += " " + process_token(item['word'].strip(), item['entity_group'])
print(">>>", output)
đ License
This project is licensed under the MIT license.