-
Notifications
You must be signed in to change notification settings - Fork 1
Expand file tree
/
Copy pathconfig-example.yaml
More file actions
74 lines (69 loc) · 2.49 KB
/
config-example.yaml
File metadata and controls
74 lines (69 loc) · 2.49 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
# curl https://127.0.0.1:8080/chat/completions \
# -d '{
# "model": "groq/llama-3.2-90b-vision-preview",
# "messages": [
# {"role": "user", "content": "What is the meaning of life?"}
# ]
# }'
models:
# gigachat корп. доступ
# список моделей - https://developers.sber.ru/docs/ru/gigachat/models
# в token прописывается clientID и clientSecret через двоеточие, сам токен живет 30 мин - обновляется автоматом
# url не используется (запросы через библиотеку)
- name: gigachat/GigaChat
provider: gigachat
priority: 1
requests_per_minute: 60
requests_per_hour: 50000
requests_per_day: 1000000
url: "https://gigachat.devices.sberbank.ru/api/v1"
token: "clientID:clientSecret"
max_request_length: 32768
model_size: SMALL
# https://huggingface.co/docs/api-inference/supported-models
# https://huggingface.co/models?inference=warm&sort=trending - list models
# only 1,000 requests per day for all models
- name: huggingface/Mistral-Nemo-Instruct-2407
provider: huggingface
priority: 2
requests_per_minute: 50
requests_per_hour: 1000
requests_per_day: 1000
url: "https://api-inference.huggingface.co/models/mistralai/Mistral-Nemo-Instruct-2407/v1/chat/completions"
token: "HF_TOKEN"
max_request_length: 32768
model_size: SMALL
# https://console.groq.com/docs/models
# https://console.groq.com/settings/limits
- name: groq/llama-3.2-90b-vision-preview
provider: groq
priority: 1
requests_per_minute: 10
requests_per_hour: 100
requests_per_day: 3500
url: "https://api.groq.com/openai/v1/chat/completions"
token: "groq_token"
max_request_length: 128000
model_size: BIG
# https://openrouter.ai/models
- name: deepseek/deepseek-chat:free
provider: openrouter
priority: 1
requests_per_minute: 20
requests_per_hour: 100
requests_per_day: 200
url: "https://openrouter.ai/api/v1/chat/completions"
token: "poenrouter_token"
max_request_length: 131072
model_size: BIG
# https://glama.ai/models - роутер с кучей моделей
- name: glama/gemini-2.0-flash-thinking-exp-01-21
provider: glama
priority: 1
requests_per_minute: 100
requests_per_hour: 3000
requests_per_day: 10000
url: "https://glama.ai/api/gateway/openai/v1/chat/completions"
token: "glama-token"
max_request_length: 32000
model_size: SMALL