Pad first system message with newline

hmarr · hmarr · commit c39c7d644dd3 · 2023-07-23T12:13:50.000-04:00
diff --git a/src/index.ts b/src/index.ts
@@ -15,15 +15,30 @@ let encoder: Tiktoken | undefined;
  * @returns An estimate for the number of tokens the prompt will use
  */
 export function promptTokensEstimate({ messages, functions }: { messages: Message[], functions?: Function[] }): number {
-  let tokens = messages.map(messageTokensEstimate).reduce((a, b) => a + b, 0);
-  tokens += 3; // Add three per completion
+  // It appears that if functions are present, the first system message is padded with a trailing newline. This
+  // was inferred by trying lots of combinations of messages and functions and seeing what the token counts were.
+  let paddedSystem = false;
+  let tokens = messages.map(m => {
+    if (m.role === "system" && functions && !paddedSystem) {
+      m = { ...m, content: m.content + "\n" }
+      paddedSystem = true;
+    }
+    return messageTokensEstimate(m);
+  }).reduce((a, b) => a + b, 0);
+
+  // Each completion (vs message) seems to carry a 3-token overhead
+  tokens += 3;
+
+  // If there are functions, add the function definitions as they count towards token usage
   if (functions) {
     tokens += functionsTokensEstimate(functions as any as FunctionDef[]);
   }
 
-  // If there's a system message _and_ functions are present, subtract three tokens
+  // If there's a system message _and_ functions are present, subtract four tokens. I assume this is because
+  // functions typically add a system message, but reuse the first one if it's already there. This offsets
+  // the extra 9 tokens added by the function definitions.
   if (functions && messages.find(m => m.role === "system")) {
-    tokens -= 3;
+    tokens -= 4;
   }
 
   return tokens;
diff --git a/tests/token-counts.test.ts b/tests/token-counts.test.ts
@@ -10,6 +10,26 @@ type Example = {
   validate?: boolean
 };
 
+const r: OpenAI.Chat.CompletionCreateParams.CreateChatCompletionRequestNonStreaming = {
+  "model": "gpt-3.5-turbo",
+  "temperature": 0,
+  "functions": [
+    {
+      "name": "do_stuff",
+      "parameters": {
+        "type": "object",
+        "properties": {}
+      }
+    }
+  ],
+  "messages": [
+    {
+      "role": "system",
+      "content": "hello:"
+    },
+  ]
+};
+
 const TEST_CASES: Example[] = [
   {
     messages: [
@@ -23,6 +43,18 @@ const TEST_CASES: Example[] = [
     ],
     tokens: 9
   },
+  {
+    messages: [
+      { role: "system", content: "hello" }
+    ],
+    tokens: 8,
+  },
+  {
+    messages: [
+      { role: "system", content: "hello:" }
+    ],
+    tokens: 9,
+  },
   {
     messages: [
       { role: "system", content: "# Important: you're the best robot" },
@@ -161,10 +193,32 @@ const TEST_CASES: Example[] = [
       }
     ],
     tokens: 35,
-  }
+  },
+  {
+    messages: [
+      { "role": "system", "content": "Hello:" },
+      { "role": "user", "content": "Hi there" },
+    ],
+    functions: [
+      { "name": "do_stuff", "parameters": { "type": "object", "properties": {} } }
+    ],
+    tokens: 35,
+  },
+  {
+    messages: [
+      { "role": "system", "content": "Hello:" },
+      { "role": "system", "content": "Hello" },
+      { "role": "user", "content": "Hi there" },
+    ],
+    functions: [
+      { "name": "do_stuff", "parameters": { "type": "object", "properties": {} } }
+    ],
+    tokens: 40,
+  },
 ];
 
 const validateAll = false;
+const openAITimeout = 10000;
 
 describe.each(TEST_CASES)("token counts (%j)", (example) => {
   const validateTest = ((validateAll || example.validate) ? test : test.skip)
@@ -174,10 +228,10 @@ describe.each(TEST_CASES)("token counts (%j)", (example) => {
       model: "gpt-3.5-turbo",
       messages: example.messages,
       functions: example.functions as any,
-      max_tokens: 1,
+      max_tokens: 10,
     });
     expect(response.usage?.prompt_tokens).toBe(example.tokens);
-  });
+  }, openAITimeout);
 
   test("estimate is correct", async () => {
     expect(promptTokensEstimate({ messages: example.messages, functions: example.functions })).toBe(example.tokens);