_tokenizer.py 286 B

123456789
  1. # import tiktoken
  2. # from typing import Union
  3. # def tokenize(text: str, model: str = 'gpt-3.5-turbo') -> Union[int, str]:
  4. # encoding = tiktoken.encoding_for_model(model)
  5. # encoded = encoding.encode(text)
  6. # num_tokens = len(encoded)
  7. # return num_tokens, encoded