class
VocabFromTextMethods
- def get_size(self)
- def get_unk_index(self)
- def get_unk_token(self)
- def idx2word(self, n_w)
- def token_idx_2_string(self, tokens: typing.Iterable[int]) -> str
- def tokenize_and_index(self, sentence, regex = re.compile('([^\\w-]+)'), keep = "'s", remove = (',', '?')) -> typing.List[int]
- def word2idx(self, w)
Special methods
Data
- DEFAULT_TOKENS = ['<pad>', '<unk>', '<s>', '</s>']
- END_TOKEN = '</s>'
- PAD_TOKEN = '<pad>'
- START_TOKEN = '<s>'
- UNK_TOKEN = '<unk>'