Hosted Cache - api.litellm.ai
Use api.litellm.ai for caching completion() and embedding() responses
Quick Start Usage - Completion​
import litellm
from litellm import completion
from litellm.caching.caching import Cache
litellm.cache = Cache(type="hosted") # init cache to use api.litellm.ai
# Make completion calls
response1 = completion(
model="gpt-3.5-turbo",
messages=[{"role": "user", "content": "Tell me a joke."}]
caching=True
)
response2 = completion(
model="gpt-3.5-turbo",
messages=[{"role": "user", "content": "Tell me a joke."}],
caching=True
)
# response1 == response2, response 1 is cached