Merge branch 'main' into next

jgoux · jgoux · commit 016d0dbea9e4 · 2024-08-19T09:37:57.000+02:00
diff --git a/apps/web/README.md b/apps/web/README.md
@@ -46,7 +46,16 @@ From this directory (`./apps/web`):
    ```shell
    echo 'OPENAI_API_KEY="<openai-api-key>"' >> .env.local
    ```
-5. Start Next.js development server:
+5. Start local Redis containers (used for rate limiting). Serves an API on port 8080:
+   ```shell
+   docker compose up -d
+   ```
+6. Store local KV (Redis) vars. Use these exact values:
+   ```shell
+   echo 'KV_REST_API_URL="http://localhost:8080"' >> .env.local
+   echo 'KV_REST_API_TOKEN="local_token"' >> .env.local
+   ```
+7. Start Next.js development server:
    ```shell
    npm run dev
    ```
diff --git a/apps/web/app/api/chat/route.ts b/apps/web/app/api/chat/route.ts
@@ -1,18 +1,51 @@
 import { openai } from '@ai-sdk/openai'
+import { Ratelimit } from '@upstash/ratelimit'
+import { kv } from '@vercel/kv'
 import { ToolInvocation, convertToCoreMessages, streamText } from 'ai'
 import { codeBlock } from 'common-tags'
 import { convertToCoreTools, maxMessageContext, maxRowLimit, tools } from '~/lib/tools'
+import { createClient } from '~/utils/supabase/server'
 
 // Allow streaming responses up to 30 seconds
 export const maxDuration = 30
 
+const inputTokenRateLimit = new Ratelimit({
+  redis: kv,
+  limiter: Ratelimit.fixedWindow(1000000, '30m'),
+  prefix: 'ratelimit:tokens:input',
+})
+
+const outputTokenRateLimit = new Ratelimit({
+  redis: kv,
+  limiter: Ratelimit.fixedWindow(10000, '30m'),
+  prefix: 'ratelimit:tokens:output',
+})
+
 type Message = {
   role: 'user' | 'assistant'
   content: string
   toolInvocations?: (ToolInvocation & { result: any })[]
 }
 
 export async function POST(req: Request) {
+  const supabase = createClient()
+
+  const { data, error } = await supabase.auth.getUser()
+
+  // We have middleware, so this should never happen (used for type narrowing)
+  if (error) {
+    return new Response('Unauthorized', { status: 401 })
+  }
+
+  const { user } = data
+
+  const { remaining: inputRemaining } = await inputTokenRateLimit.getRemaining(user.id)
+  const { remaining: outputRemaining } = await outputTokenRateLimit.getRemaining(user.id)
+
+  if (inputRemaining <= 0 || outputRemaining <= 0) {
+    return new Response('Rate limited', { status: 429 })
+  }
+
   const { messages }: { messages: Message[] } = await req.json()
 
   // Trim the message context sent to the LLM to mitigate token abuse
@@ -64,6 +97,14 @@ export async function POST(req: Request) {
     model: openai('gpt-4o-2024-08-06'),
     messages: convertToCoreMessages(trimmedMessageContext),
     tools: convertToCoreTools(tools),
+    async onFinish({ usage }) {
+      await inputTokenRateLimit.limit(user.id, {
+        rate: usage.promptTokens,
+      })
+      await outputTokenRateLimit.limit(user.id, {
+        rate: usage.completionTokens,
+      })
+    },
   })
 
   return result.toAIStreamResponse()
diff --git a/apps/web/components/app-provider.tsx b/apps/web/components/app-provider.tsx
@@ -27,6 +27,7 @@ const dbManager = typeof window !== 'undefined' ? new DbManager() : undefined
 export default function AppProvider({ children }: AppProps) {
   const [isLoadingUser, setIsLoadingUser] = useState(true)
   const [user, setUser] = useState<User>()
+  const [isRateLimited, setIsRateLimited] = useState(false)
 
   const focusRef = useRef<FocusHandle>(null)
 
@@ -110,6 +111,8 @@ export default function AppProvider({ children }: AppProps) {
         isLoadingUser,
         signIn,
         signOut,
+        isRateLimited,
+        setIsRateLimited,
         focusRef,
         isPreview,
         dbManager,
@@ -131,6 +134,8 @@ export type AppContextValues = {
   isLoadingUser: boolean
   signIn: () => Promise<User | undefined>
   signOut: () => Promise<void>
+  isRateLimited: boolean
+  setIsRateLimited: (limited: boolean) => void
   focusRef: RefObject<FocusHandle>
   isPreview: boolean
   dbManager?: DbManager
diff --git a/apps/web/components/chat.tsx b/apps/web/components/chat.tsx
@@ -3,7 +3,7 @@
 import { Message, generateId } from 'ai'
 import { useChat } from 'ai/react'
 import { AnimatePresence, m } from 'framer-motion'
-import { ArrowDown, ArrowUp, Paperclip, Square } from 'lucide-react'
+import { ArrowDown, ArrowUp, Flame, Paperclip, Square } from 'lucide-react'
 import {
   ChangeEvent,
   FormEventHandler,
@@ -49,7 +49,7 @@ export function getInitialMessages(tables: TablesData): Message[] {
 }
 
 export default function Chat() {
-  const { user, isLoadingUser, focusRef } = useApp()
+  const { user, isLoadingUser, focusRef, isRateLimited } = useApp()
   const [inputFocusState, setInputFocusState] = useState(false)
 
   const {
@@ -262,6 +262,32 @@ export default function Chat() {
                   isLast={i === messages.length - 1}
                 />
               ))}
+              <AnimatePresence initial={false}>
+                {isRateLimited && !isLoading && (
+                  <m.div
+                    layout="position"
+                    className="flex flex-col gap-4 justify-start items-center max-w-96 p-4 bg-destructive rounded-md text-sm"
+                    variants={{
+                      hidden: { scale: 0 },
+                      show: { scale: 1, transition: { delay: 0.5 } },
+                    }}
+                    initial="hidden"
+                    animate="show"
+                    exit="hidden"
+                  >
+                    <Flame size={64} strokeWidth={1} />
+                    <div className="flex flex-col items-center text-start gap-4">
+                      <h3 className="font-bold">Hang tight!</h3>
+                      <p>
+                        We&apos;re seeing a lot of AI traffic from your end and need to temporarily
+                        pause your chats to make sure our servers don&apos;t melt.
+                      </p>
+
+                      <p>Have a quick coffee break and try again in a few minutes!</p>
+                    </div>
+                  </m.div>
+                )}
+              </AnimatePresence>
               <AnimatePresence>
                 {isLoading && (
                   <m.div
diff --git a/apps/web/components/workspace.tsx b/apps/web/components/workspace.tsx
@@ -8,6 +8,7 @@ import { useTablesQuery } from '~/data/tables/tables-query'
 import { useOnToolCall } from '~/lib/hooks'
 import { useBreakpoint } from '~/lib/use-breakpoint'
 import { ensureMessageId, ensureToolResult } from '~/lib/util'
+import { useApp } from './app-provider'
 import Chat, { getInitialMessages } from './chat'
 import IDE from './ide'
 
@@ -51,6 +52,7 @@ export default function Workspace({
   onReply,
   onCancelReply,
 }: WorkspaceProps) {
+  const { setIsRateLimited } = useApp()
   const isSmallBreakpoint = useBreakpoint('lg')
   const onToolCall = useOnToolCall(databaseId)
   const { mutateAsync: saveMessage } = useMessageCreateMutation(databaseId)
@@ -76,6 +78,9 @@ export default function Workspace({
       await onReply?.(message, append)
       await saveMessage({ message })
     },
+    async onResponse(response) {
+      setIsRateLimited(response.status === 429)
+    },
   })
 
   const appendMessage = useCallback(
diff --git a/apps/web/docker-compose.yml b/apps/web/docker-compose.yml
@@ -0,0 +1,11 @@
+services:
+  redis:
+    image: redis
+  local-vercel-kv:
+    image: hiett/serverless-redis-http:latest
+    ports:
+      - 8080:80
+    environment:
+      SRH_MODE: env
+      SRH_TOKEN: local_token
+      SRH_CONNECTION_STRING: redis://redis:6379
diff --git a/apps/web/package.json b/apps/web/package.json
@@ -32,6 +32,8 @@
     "@supabase/ssr": "^0.4.0",
     "@supabase/supabase-js": "^2.45.0",
     "@tanstack/react-query": "^5.45.0",
+    "@upstash/ratelimit": "^2.0.1",
+    "@vercel/kv": "^2.0.0",
     "@xenova/transformers": "^2.17.2",
     "ai": "^3.2.8",
     "chart.js": "^4.4.3",
diff --git a/package-lock.json b/package-lock.json