Skip to content

Commit

Permalink
Apply rate limits in LLM service (#15997)
Browse files Browse the repository at this point in the history
Release Notes:

- N/A

---------

Co-authored-by: Marshall <[email protected]>
Co-authored-by: Marshall Bowers <[email protected]>
  • Loading branch information
3 people authored Aug 8, 2024
1 parent 2bc5037 commit 06625bf
Show file tree
Hide file tree
Showing 21 changed files with 976 additions and 220 deletions.
5 changes: 5 additions & 0 deletions crates/collab/k8s/collab.template.yml
Original file line number Diff line number Diff line change
Expand Up @@ -85,6 +85,11 @@ spec:
secretKeyRef:
name: database
key: url
- name: LLM_DATABASE_URL
valueFrom:
secretKeyRef:
name: llm-database
key: url
- name: DATABASE_MAX_CONNECTIONS
value: "${DATABASE_MAX_CONNECTIONS}"
- name: API_TOKEN
Expand Down
126 changes: 122 additions & 4 deletions crates/collab/k8s/postgrest.template.yml
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ metadata:
spec:
type: LoadBalancer
selector:
app: postgrest
app: nginx
ports:
- name: web
protocol: TCP
Expand All @@ -24,17 +24,99 @@ apiVersion: apps/v1
kind: Deployment
metadata:
namespace: ${ZED_KUBE_NAMESPACE}
name: postgrest
name: nginx
spec:
replicas: 1
selector:
matchLabels:
app: nginx
template:
metadata:
labels:
app: nginx
spec:
containers:
- name: nginx
image: nginx:latest
ports:
- containerPort: 8080
protocol: TCP
volumeMounts:
- name: nginx-config
mountPath: /etc/nginx/nginx.conf
subPath: nginx.conf
volumes:
- name: nginx-config
configMap:
name: nginx-config

---
apiVersion: v1
kind: ConfigMap
metadata:
namespace: ${ZED_KUBE_NAMESPACE}
name: nginx-config
data:
nginx.conf: |
events {}
http {
server {
listen 8080;
location /app/ {
proxy_pass http://postgrest-app:8080/;
}
location /llm/ {
proxy_pass http://postgrest-llm:8080/;
}
}
}
---
apiVersion: v1
kind: Service
metadata:
namespace: ${ZED_KUBE_NAMESPACE}
name: postgrest-app
spec:
selector:
app: postgrest-app
ports:
- protocol: TCP
port: 8080
targetPort: 8080

---
apiVersion: v1
kind: Service
metadata:
namespace: ${ZED_KUBE_NAMESPACE}
name: postgrest-llm
spec:
selector:
app: postgrest-llm
ports:
- protocol: TCP
port: 8080
targetPort: 8080

---
apiVersion: apps/v1
kind: Deployment
metadata:
namespace: ${ZED_KUBE_NAMESPACE}
name: postgrest-app
spec:
replicas: 1
selector:
matchLabels:
app: postgrest
app: postgrest-app
template:
metadata:
labels:
app: postgrest
app: postgrest-app
spec:
containers:
- name: postgrest
Expand All @@ -55,3 +137,39 @@ spec:
secretKeyRef:
name: postgrest
key: jwt_secret

---
apiVersion: apps/v1
kind: Deployment
metadata:
namespace: ${ZED_KUBE_NAMESPACE}
name: postgrest-llm
spec:
replicas: 1
selector:
matchLabels:
app: postgrest-llm
template:
metadata:
labels:
app: postgrest-llm
spec:
containers:
- name: postgrest
image: "postgrest/postgrest"
ports:
- containerPort: 8080
protocol: TCP
env:
- name: PGRST_SERVER_PORT
value: "8080"
- name: PGRST_DB_URI
valueFrom:
secretKeyRef:
name: llm-database
key: url
- name: PGRST_JWT_SECRET
valueFrom:
secretKeyRef:
name: postgrest
key: jwt_secret
32 changes: 0 additions & 32 deletions crates/collab/migrations_llm.sqlite/20240806182921_test_schema.sql

This file was deleted.

Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,10 @@ create unique index uix_providers_on_name on providers (name);
create table if not exists models (
id serial primary key,
provider_id integer not null references providers (id) on delete cascade,
name text not null
name text not null,
max_requests_per_minute integer not null,
max_tokens_per_minute integer not null,
max_tokens_per_day integer not null
);

create unique index uix_models_on_provider_id_name on models (provider_id, name);
Expand Down
18 changes: 11 additions & 7 deletions crates/collab/migrations_llm/20240806213401_create_usages.sql
Original file line number Diff line number Diff line change
@@ -1,15 +1,19 @@
create table usage_measures (
id serial primary key,
name text not null
);

create unique index uix_usage_measures_on_name on usage_measures (name);

create table if not exists usages (
id serial primary key,
user_id integer not null,
model_id integer not null references models (id) on delete cascade,
requests_this_minute integer not null default 0,
tokens_this_minute bigint not null default 0,
requests_this_day integer not null default 0,
tokens_this_day bigint not null default 0,
requests_this_month integer not null default 0,
tokens_this_month bigint not null default 0
measure_id integer not null references usage_measures (id) on delete cascade,
timestamp timestamp without time zone not null,
buckets bigint[] not null
);

create index ix_usages_on_user_id on usages (user_id);
create index ix_usages_on_model_id on usages (model_id);
create unique index uix_usages_on_user_id_model_id on usages (user_id, model_id);
create unique index uix_usages_on_user_id_model_id_measure_id on usages (user_id, model_id, measure_id);
Loading

0 comments on commit 06625bf

Please sign in to comment.