tokens

token_counter

token_counter(
   *args,
   cache_enabled: bool,
   cache_path: typing.Union[str, pathlib.Path, NoneType],
   cache_key_prefix: typing.Optional[str],
   include_model_in_cache_key: bool,
   return_cache_key: bool,
   enable_retries: bool,
   retry_on_exceptions: typing.Optional[list[Exception]],
   retry_on_all_exceptions: bool,
   max_retries: typing.Optional[int],
   retry_delay: typing.Optional[int],
   **kwargs
)

token_counter(
    model="gpt-3.5-turbo",
    messages=[
        {"role": "user", "content": "Hello, how are you?"}
    ]
)

If we set return_cache_key=True, the function is not executed and only the cache key is returned instead.

# This will not execute the function, but only return the cache key.
cache_key = token_counter(
    model="gpt-4o",
    text="Hello, how are you?",
    return_cache_key=True
)

assert not is_in_cache(cache_key)

# This will cache the result.
token_counter(
    model="gpt-4o",
    text="Hello, how are you?"
)

assert is_in_cache(cache_key)
clear_cache_key(cache_key)