-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathapp.py
More file actions
72 lines (53 loc) · 2.3 KB
/
app.py
File metadata and controls
72 lines (53 loc) · 2.3 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
import streamlit as st
from tokenizer import Tokenizer
st.title("🤖 Custom BPE Tokenizer")
@st.cache_resource
def load_tokenizer():
tokenizer = Tokenizer()
tokenizer.load("Tokenizer/tokenizer.model")
return tokenizer
with st.spinner(show_time=True):
tokenizer = load_tokenizer()
tokenization_tab, spcl_tokens_tab = st.tabs(['Tokenizer', 'Add/Remove Special Tokens'])
with tokenization_tab:
input_col, output_col = st.columns((0.5,0.5))
if "input_text" not in st.session_state:
st.session_state.input_text = ""
if "tokenized_text" not in st.session_state:
st.session_state.tokenized_text = ""
def tokenize():
tokenized_text = f'{tokenizer.encode(st.session_state.input_text)}'.replace('[', '').replace(']', '').replace(',', ', ')
st.session_state.tokenized_text = tokenized_text
def clear_board():
st.session_state.input_text = ""
st.session_state.tokenized_text = ""
with input_col:
st.text_area("Type our text below:", height=400, key='input_text')
st.button("⚔ Tokenize", on_click=tokenize, use_container_width=True)
with output_col:
st.write("Tokenized text:")
with st.container(height=200, border=True, key="tokenized_text"):
st.write(st.session_state.tokenized_text)
st.button("🗑 Clear Board", on_click=clear_board, use_container_width=True)
with spcl_tokens_tab:
if "spcl_token" not in st.session_state:
st.session_state.spcl_token = ""
st.text("Add or Remove the special tokens for your tokenizer")
st.text_input("Enter special token to add or delete", width=350, key='spcl_token')
add_col, del_col , clear_col = st.columns((0.3,0.3,0.3))
def add_token():
new_spcl_token = st.session_state.spcl_token
if new_spcl_token != '':
tokenizer.add_special_tokens([new_spcl_token])
def remove_token():
new_spcl_token = st.session_state.spcl_token
if new_spcl_token != '':
tokenizer.remove_special_tokens([new_spcl_token])
def clear_text():
st.session_state.spcl_token = ""
with add_col:
st.button("➕ Add", on_click=add_token)
with del_col:
st.button("➖ Del", on_click=remove_token)
with clear_col:
st.button("🗑 Clear", on_click=clear_text)