Class: Transformers::Distilbert::DistilBertTokenizerFast

Inherits:
PreTrainedTokenizerFast show all
Defined in:
lib/transformers/models/distilbert/tokenization_distilbert_fast.rb

Constant Summary collapse

VOCAB_FILES_NAMES =
{vocab_file: "vocab.txt", tokenizer_file: "tokenizer.json"}

Constants included from SpecialTokensMixin

SpecialTokensMixin::SPECIAL_TOKENS_ATTRIBUTES

Instance Attribute Summary

Attributes inherited from PreTrainedTokenizerBase

#init_kwargs, #model_max_length

Instance Method Summary collapse

Methods inherited from PreTrainedTokenizerFast

#_convert_token_to_id_with_added_voc, #backend_tokenizer, #convert_ids_to_tokens, #convert_tokens_to_ids, #convert_tokens_to_string, #get_vocab, #is_fast, #vocab

Methods inherited from PreTrainedTokenizerBase

#_eventual_warn_about_too_long_sequence, _from_pretrained, #call, from_pretrained

Methods included from ClassAttribute

#class_attribute

Methods included from SpecialTokensMixin

#bos_token_id, #cls_token_id, #eos_token_id, #pad_token_id, #sep_token_id, #special_tokens_map, #unk_token_id

Constructor Details

#initialize(vocab_file: nil, tokenizer_file: nil, do_lower_case: true, unk_token: "[UNK]", sep_token: "[SEP]", pad_token: "[PAD]", cls_token: "[CLS]", mask_token: "[MASK]", tokenize_chinese_chars: true, strip_accents: nil, **kwargs) ⇒ DistilBertTokenizerFast

Returns a new instance of DistilBertTokenizerFast.



24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
# File 'lib/transformers/models/distilbert/tokenization_distilbert_fast.rb', line 24

def initialize(
  vocab_file: nil,
  tokenizer_file: nil,
  do_lower_case: true,
  unk_token: "[UNK]",
  sep_token: "[SEP]",
  pad_token: "[PAD]",
  cls_token: "[CLS]",
  mask_token: "[MASK]",
  tokenize_chinese_chars: true,
  strip_accents: nil,
  **kwargs
)
  super(
    vocab_file,
    tokenizer_file: tokenizer_file,
    do_lower_case: do_lower_case,
    unk_token: unk_token,
    sep_token: sep_token,
    pad_token: pad_token,
    cls_token: cls_token,
    mask_token: mask_token,
    tokenize_chinese_chars: tokenize_chinese_chars,
    strip_accents: strip_accents,
    **kwargs
  )

  if @backend_tokenizer
    raise Todo
  end

  @do_lower_case = do_lower_case
end

Instance Method Details

#build_inputs_with_special_tokens(token_ids_0, token_ids_1 = nil) ⇒ Object

Raises:



58
59
60
# File 'lib/transformers/models/distilbert/tokenization_distilbert_fast.rb', line 58

def build_inputs_with_special_tokens(token_ids_0, token_ids_1 = nil)
  raise Todo
end

#create_token_type_ids_from_sequences(token_ids_0, token_ids_1 = nil) ⇒ Object

Raises:



62
63
64
# File 'lib/transformers/models/distilbert/tokenization_distilbert_fast.rb', line 62

def create_token_type_ids_from_sequences(token_ids_0, token_ids_1 = nil)
  raise Todo
end

#save_vocabulary(save_directory, filename_prefix: nil) ⇒ Object

Raises:



66
67
68
# File 'lib/transformers/models/distilbert/tokenization_distilbert_fast.rb', line 66

def save_vocabulary(save_directory, filename_prefix: nil)
  raise Todo
end