From 92a2c0b6c082d93dbfb14e6d243505946ed6a56c Mon Sep 17 00:00:00 2001
From: "github-actions[bot]"
 <41898282+github-actions[bot]@users.noreply.github.com>
Date: Mon, 21 Oct 2024 15:55:33 +0000
Subject: [PATCH 1/3] Update changelog (#856)

Co-authored-by: alenakhineika <16307679+alenakhineika@users.noreply.github.com>
---
 CHANGELOG.md | 24 +++++++++++++-----------
 1 file changed, 13 insertions(+), 11 deletions(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 4a2ee7379..b778c4f6c 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -1,5 +1,18 @@
 # Change Log
 
+## [v1.9.2](https://github.com/mongodb-js/vscode/releases/tag/v1.9.2) - 2024-10-21
+
+## What's Changed
+* fix(chat): show empty docs msg, schema set msg content correctly VSCODE-628 by @Anemy in https://github.com/mongodb-js/vscode/pull/851
+* chore(participant): move docs references after content VSCODE-629 by @Anemy in https://github.com/mongodb-js/vscode/pull/852
+* fix: use new connection form by @paula-stacho in https://github.com/mongodb-js/vscode/pull/815
+* chore(chat): update docs chatbot request headers VSCODE-634 by @Anemy in https://github.com/mongodb-js/vscode/pull/853
+* chore: use the latest vsce that adds the chat-participant tag VSCODE-638 by @alenakhineika in https://github.com/mongodb-js/vscode/pull/855
+
+
+**Full Changelog**: https://github.com/mongodb-js/vscode/compare/v1.9.1...v1.9.2
+
+
 ## [v1.9.1](https://github.com/mongodb-js/vscode/releases/tag/v1.9.1) - 2024-09-30
 
 ## What's Changed
@@ -401,14 +414,3 @@ To dig deeper please feel free to follow the links mentioned below:
 - Fix launching mongodb shell with ssl in bash (VSCODE-227, #270)
 
 
-## [v0.4.2](https://github.com/mongodb-js/vscode/releases/tag/v0.4.2) - 2021-02-17
-
-### Added
-
-- Add icon to refresh collection documents list (#264)
-
-### Changed
-
-- Updated the Atlas link to have https (#259)
-
-

From cace4df75f213c427109f70bab4d0e3280422631 Mon Sep 17 00:00:00 2001
From: Nikola Irinchev <irinchev@me.com>
Date: Thu, 24 Oct 2024 11:39:28 +0200
Subject: [PATCH 2/3] VSCODE-640: Adapt message content access to latest vscode
 API (#857)

* Adapt message content access to latest vscode API

* Address CR comments
---
 .vscode/launch.json                           |  14 ++
 src/participant/participant.ts                |  21 +--
 src/participant/prompts/index.ts              |   5 +-
 src/participant/prompts/promptBase.ts         |  48 +++++-
 .../suite/participant/participant.test.ts     | 138 ++++++++++++------
 5 files changed, 169 insertions(+), 57 deletions(-)

diff --git a/.vscode/launch.json b/.vscode/launch.json
index e7f6ca212..9d596e5fd 100644
--- a/.vscode/launch.json
+++ b/.vscode/launch.json
@@ -40,6 +40,20 @@
       "outFiles": [
         "${workspaceFolder}/dist/**/*.js"
       ]
+    },
+    {
+      "name": "Run Tests",
+      "type": "extensionHost",
+      "request": "launch",
+      "runtimeExecutable": "${execPath}",
+      "args": [
+        "${workspaceFolder}/out/test/suite", // TODO: VSCODE-641 - remove suite
+        "--disable-extensions",
+        "--extensionDevelopmentPath=${workspaceFolder}",
+        "--extensionTestsPath=${workspaceFolder}/out/test/suite"
+      ],
+      "outFiles": ["${workspaceFolder}/out/**/*.js"],
+      "preLaunchTask": "npm: compile:extension",
     }
   ],
   "compounds": [
diff --git a/src/participant/participant.ts b/src/participant/participant.ts
index 07c725ca0..5dbf97b31 100644
--- a/src/participant/participant.ts
+++ b/src/participant/participant.ts
@@ -10,7 +10,7 @@ import type { LoadedConnection } from '../storage/connectionStorage';
 import EXTENSION_COMMANDS from '../commands';
 import type { StorageController } from '../storage';
 import { StorageVariables } from '../storage';
-import { Prompts } from './prompts';
+import { getContentLength, Prompts } from './prompts';
 import type { ChatResult } from './constants';
 import {
   askToConnectChatResult,
@@ -189,7 +189,7 @@ export default class ParticipantController {
         (message: vscode.LanguageModelChatMessage) =>
           util.inspect({
             role: message.role,
-            contentLength: message.content.length,
+            contentLength: getContentLength(message),
           })
       ),
     });
@@ -790,15 +790,18 @@ export default class ParticipantController {
       // it currently errors (not on insiders, only main VSCode).
       // Here we're defaulting to have some content as a workaround.
       // TODO: Remove this when the issue is fixed.
-      messagesWithNamespace.messages[
-        messagesWithNamespace.messages.length - 1
-        // eslint-disable-next-line new-cap
-      ] = vscode.LanguageModelChatMessage.User(
+      if (
+        !Prompts.doMessagesContainUserInput([
+          messagesWithNamespace.messages[
+            messagesWithNamespace.messages.length - 1
+          ],
+        ])
+      ) {
         messagesWithNamespace.messages[
           messagesWithNamespace.messages.length - 1
-        ].content.trim() || 'see previous messages'
-      );
-
+          // eslint-disable-next-line new-cap
+        ] = vscode.LanguageModelChatMessage.User('see previous messages');
+      }
       const responseContentWithNamespace = await this.getChatResponseContent({
         modelInput: messagesWithNamespace,
         token,
diff --git a/src/participant/prompts/index.ts b/src/participant/prompts/index.ts
index 18e4150af..5324f2847 100644
--- a/src/participant/prompts/index.ts
+++ b/src/participant/prompts/index.ts
@@ -5,6 +5,9 @@ import { IntentPrompt } from './intent';
 import { NamespacePrompt } from './namespace';
 import { QueryPrompt } from './query';
 import { SchemaPrompt } from './schema';
+import { isContentEmpty } from './promptBase';
+
+export { getContentLength } from './promptBase';
 
 export class Prompts {
   public static generic = new GenericPrompt();
@@ -26,7 +29,7 @@ export class Prompts {
     for (const message of messages) {
       if (
         message.role === vscode.LanguageModelChatMessageRole.User &&
-        message.content.trim().length > 0
+        !isContentEmpty(message)
       ) {
         return true;
       }
diff --git a/src/participant/prompts/promptBase.ts b/src/participant/prompts/promptBase.ts
index 949b4f3d0..38a8ae672 100644
--- a/src/participant/prompts/promptBase.ts
+++ b/src/participant/prompts/promptBase.ts
@@ -24,6 +24,52 @@ export interface ModelInput {
   stats: ParticipantPromptProperties;
 }
 
+export function getContentLength(
+  message: vscode.LanguageModelChatMessage
+): number {
+  const content = message.content as any;
+  if (typeof content === 'string') {
+    return content.trim().length;
+  }
+
+  // TODO: https://github.com/microsoft/vscode/pull/231788 made it so message.content is no longer a string,
+  // but an array of things that a message can contain. This will eventually be reflected in the type definitions
+  // but until then, we're manually checking the array contents to ensure we don't break when this PR gets released
+  // in the stable channel.
+  if (Array.isArray(content)) {
+    return content.reduce((acc: number, element) => {
+      const value = element?.value ?? element?.content?.value;
+      if (typeof value === 'string') {
+        return acc + value.length;
+      }
+
+      return acc;
+    }, 0);
+  }
+
+  return 0;
+}
+
+export function isContentEmpty(
+  message: vscode.LanguageModelChatMessage
+): boolean {
+  const content = message.content as any;
+  if (typeof content === 'string') {
+    return content.trim().length === 0;
+  }
+
+  if (Array.isArray(content)) {
+    for (const element of content) {
+      const value = element?.value ?? element?.content?.value;
+      if (typeof value === 'string' && value.trim().length > 0) {
+        return false;
+      }
+    }
+  }
+
+  return true;
+}
+
 export abstract class PromptBase<TArgs extends PromptArgsBase> {
   protected abstract getAssistantPrompt(args: TArgs): string;
 
@@ -92,7 +138,7 @@ export abstract class PromptBase<TArgs extends PromptArgsBase> {
   ): ParticipantPromptProperties {
     return {
       total_message_length: messages.reduce(
-        (acc, message) => acc + message.content.length,
+        (acc, message) => acc + getContentLength(message),
         0
       ),
       user_input_length: request.prompt.length,
diff --git a/src/test/suite/participant/participant.test.ts b/src/test/suite/participant/participant.test.ts
index 38962a84b..094ff3681 100644
--- a/src/test/suite/participant/participant.test.ts
+++ b/src/test/suite/participant/participant.test.ts
@@ -31,6 +31,7 @@ import { ChatMetadataStore } from '../../../participant/chatMetadata';
 import { Prompts } from '../../../participant/prompts';
 import { createMarkdownLink } from '../../../participant/markdown';
 import EXTENSION_COMMANDS from '../../../commands';
+import { getContentLength } from '../../../participant/prompts/promptBase';
 
 // The Copilot's model in not available in tests,
 // therefore we need to mock its methods and returning values.
@@ -50,6 +51,28 @@ const encodeStringify = (obj: Record<string, any>): string => {
   return encodeURIComponent(JSON.stringify(obj));
 };
 
+const getMessageContent = (
+  message: vscode.LanguageModelChatMessage
+): string => {
+  const content = message.content as any;
+  if (typeof content === 'string') {
+    return content;
+  }
+
+  if (Array.isArray(content)) {
+    return content.reduce((agg: string, element) => {
+      const value = element?.value ?? element?.content?.value;
+      if (typeof value === 'string') {
+        return agg + value;
+      }
+
+      return agg;
+    }, '');
+  }
+
+  return '';
+};
+
 suite('Participant Controller Test Suite', function () {
   const extensionContextStub = new ExtensionContextStub();
 
@@ -514,20 +537,22 @@ suite('Participant Controller Test Suite', function () {
             const res = await invokeChatHandler(chatRequestMock);
 
             expect(sendRequestStub).to.have.been.calledTwice;
-            const intentRequest = sendRequestStub.firstCall.args[0];
+            const intentRequest = sendRequestStub.firstCall
+              .args[0] as vscode.LanguageModelChatMessage[];
             expect(intentRequest).to.have.length(2);
-            expect(intentRequest[0].content).to.include(
+            expect(getMessageContent(intentRequest[0])).to.include(
               'Your task is to help guide a conversation with a user to the correct handler.'
             );
-            expect(intentRequest[1].content).to.equal(
+            expect(getMessageContent(intentRequest[1])).to.equal(
               'what is the shape of the documents in the pineapple collection?'
             );
-            const genericRequest = sendRequestStub.secondCall.args[0];
+            const genericRequest = sendRequestStub.secondCall
+              .args[0] as vscode.LanguageModelChatMessage[];
             expect(genericRequest).to.have.length(2);
-            expect(genericRequest[0].content).to.include(
+            expect(getMessageContent(genericRequest[0])).to.include(
               'Parse all user messages to find a database name and a collection name.'
             );
-            expect(genericRequest[1].content).to.equal(
+            expect(getMessageContent(genericRequest[1])).to.equal(
               'what is the shape of the documents in the pineapple collection?'
             );
 
@@ -544,20 +569,22 @@ suite('Participant Controller Test Suite', function () {
           const res = await invokeChatHandler(chatRequestMock);
 
           expect(sendRequestStub).to.have.been.calledTwice;
-          const intentRequest = sendRequestStub.firstCall.args[0];
+          const intentRequest = sendRequestStub.firstCall
+            .args[0] as vscode.LanguageModelChatMessage[];
           expect(intentRequest).to.have.length(2);
-          expect(intentRequest[0].content).to.include(
+          expect(getMessageContent(intentRequest[0])).to.include(
             'Your task is to help guide a conversation with a user to the correct handler.'
           );
-          expect(intentRequest[1].content).to.equal(
+          expect(getMessageContent(intentRequest[1])).to.equal(
             'how to find documents in my collection?'
           );
-          const genericRequest = sendRequestStub.secondCall.args[0];
+          const genericRequest = sendRequestStub.secondCall
+            .args[0] as vscode.LanguageModelChatMessage[];
           expect(genericRequest).to.have.length(2);
-          expect(genericRequest[0].content).to.include(
+          expect(getMessageContent(genericRequest[0])).to.include(
             'Your task is to help the user with MongoDB related questions.'
           );
-          expect(genericRequest[1].content).to.equal(
+          expect(getMessageContent(genericRequest[1])).to.equal(
             'how to find documents in my collection?'
           );
 
@@ -648,8 +675,9 @@ suite('Participant Controller Test Suite', function () {
               references: [],
             };
             await invokeChatHandler(chatRequestMock);
-            const messages = sendRequestStub.secondCall.args[0];
-            expect(messages[1].content).to.include(
+            const messages = sendRequestStub.secondCall
+              .args[0] as vscode.LanguageModelChatMessage[];
+            expect(getMessageContent(messages[1])).to.include(
               'Collection schema: _id: ObjectId\n' +
                 'field.stringField: String\n' +
                 'field.arrayField: Array<Int32>\n'
@@ -712,8 +740,9 @@ suite('Participant Controller Test Suite', function () {
                 references: [],
               };
               await invokeChatHandler(chatRequestMock);
-              const messages = sendRequestStub.secondCall.args[0];
-              expect(messages[1].content).to.include(
+              const messages = sendRequestStub.secondCall
+                .args[0] as vscode.LanguageModelChatMessage[];
+              expect(getMessageContent(messages[1])).to.include(
                 'Sample documents: [\n' +
                   '  {\n' +
                   "    _id: ObjectId('63ed1d522d8573fa5c203661'),\n" +
@@ -781,8 +810,9 @@ suite('Participant Controller Test Suite', function () {
                 references: [],
               };
               await invokeChatHandler(chatRequestMock);
-              const messages = sendRequestStub.secondCall.args[0];
-              expect(messages[1].content).to.include(
+              const messages = sendRequestStub.secondCall
+                .args[0] as vscode.LanguageModelChatMessage[];
+              expect(getMessageContent(messages[1])).to.include(
                 'Sample document: {\n' +
                   "  _id: ObjectId('63ed1d522d8573fa5c203660'),\n" +
                   '  field: {\n' +
@@ -844,8 +874,9 @@ suite('Participant Controller Test Suite', function () {
                 references: [],
               };
               await invokeChatHandler(chatRequestMock);
-              const messages = sendRequestStub.secondCall.args[0];
-              expect(messages[1].content).to.include(
+              const messages = sendRequestStub.secondCall
+                .args[0] as vscode.LanguageModelChatMessage[];
+              expect(getMessageContent(messages[1])).to.include(
                 'Sample document: {\n' +
                   "  _id: ObjectId('63ed1d522d8573fa5c203661'),\n" +
                   '  field: {\n' +
@@ -904,8 +935,11 @@ suite('Participant Controller Test Suite', function () {
                 references: [],
               };
               await invokeChatHandler(chatRequestMock);
-              const messages = sendRequestStub.secondCall.args[0];
-              expect(messages[1].content).to.not.include('Sample documents');
+              const messages = sendRequestStub.secondCall
+                .args[0] as vscode.LanguageModelChatMessage[];
+              expect(getMessageContent(messages[1])).to.not.include(
+                'Sample documents'
+              );
 
               assertCommandTelemetry('query', chatRequestMock, {
                 callIndex: 0,
@@ -932,8 +966,11 @@ suite('Participant Controller Test Suite', function () {
                 references: [],
               };
               await invokeChatHandler(chatRequestMock);
-              const messages = sendRequestStub.secondCall.args[0];
-              expect(messages[1].content).to.not.include('Sample documents');
+              const messages = sendRequestStub.secondCall
+                .args[0] as vscode.LanguageModelChatMessage[];
+              expect(getMessageContent(messages[1])).to.not.include(
+                'Sample documents'
+              );
 
               assertCommandTelemetry('query', chatRequestMock, {
                 callIndex: 0,
@@ -1374,7 +1411,9 @@ suite('Participant Controller Test Suite', function () {
             await invokeChatHandler(chatRequestMock);
 
             expect(sendRequestStub.calledOnce).to.be.true;
-            expect(sendRequestStub.firstCall.args[0][0].content).to.include(
+            const messages = sendRequestStub.firstCall
+              .args[0] as vscode.LanguageModelChatMessage[];
+            expect(getMessageContent(messages[0])).to.include(
               'Parse all user messages to find a database name and a collection name.'
             );
 
@@ -1422,10 +1461,13 @@ suite('Participant Controller Test Suite', function () {
             await invokeChatHandler(chatRequestMock);
 
             expect(sendRequestStub.calledOnce).to.be.true;
-            expect(sendRequestStub.firstCall.args[0][0].content).to.include(
+
+            const messages = sendRequestStub.firstCall
+              .args[0] as vscode.LanguageModelChatMessage[];
+            expect(getMessageContent(messages[0])).to.include(
               'Parse all user messages to find a database name and a collection name.'
             );
-            expect(sendRequestStub.firstCall.args[0][3].content).to.include(
+            expect(getMessageContent(messages[3])).to.include(
               'see previous messages'
             );
           });
@@ -1528,11 +1570,12 @@ suite('Participant Controller Test Suite', function () {
                 references: [],
               };
               await invokeChatHandler(chatRequestMock);
-              const messages = sendRequestStub.secondCall.args[0];
-              expect(messages[0].content).to.include(
+              const messages = sendRequestStub.secondCall
+                .args[0] as vscode.LanguageModelChatMessage[];
+              expect(getMessageContent(messages[0])).to.include(
                 'Amount of documents sampled: 2'
               );
-              expect(messages[1].content).to.include(
+              expect(getMessageContent(messages[1])).to.include(
                 `Database name: dbOne
 Collection name: collOne
 Schema:
@@ -1540,7 +1583,8 @@ Schema:
   "count": 2,
   "fields": [`
               );
-              expect(messages[1].content).to.include(`"name": "arrayField",
+              expect(getMessageContent(messages[1])).to
+                .include(`"name": "arrayField",
               "path": [
                 "field",
                 "arrayField"
@@ -1703,7 +1747,7 @@ Schema:
       expect(stats.has_sample_documents).to.be.false;
       expect(stats.user_input_length).to.equal(chatRequestMock.prompt.length);
       expect(stats.total_message_length).to.equal(
-        messages[0].content.length + messages[1].content.length
+        getContentLength(messages[0]) + getContentLength(messages[1])
       );
     });
 
@@ -1759,7 +1803,7 @@ Schema:
       expect(messages[1].role).to.equal(
         vscode.LanguageModelChatMessageRole.User
       );
-      expect(messages[1].content).to.equal(
+      expect(getMessageContent(messages[1])).to.equal(
         'give me the count of all people in the prod database'
       );
 
@@ -1772,15 +1816,15 @@ Schema:
       expect(stats.has_sample_documents).to.be.true;
       expect(stats.user_input_length).to.equal(chatRequestMock.prompt.length);
       expect(stats.total_message_length).to.equal(
-        messages[0].content.length +
-          messages[1].content.length +
-          messages[2].content.length
+        getContentLength(messages[0]) +
+          getContentLength(messages[1]) +
+          getContentLength(messages[2])
       );
 
       // The length of the user prompt length should be taken from the prompt supplied
       // by the user, even if we enhance it with sample docs and schema.
       expect(stats.user_input_length).to.be.lessThan(
-        messages[2].content.length
+        getContentLength(messages[2])
       );
     });
 
@@ -1812,20 +1856,22 @@ Schema:
       expect(messages[0].role).to.equal(
         vscode.LanguageModelChatMessageRole.Assistant
       );
-      expect(messages[0].content).to.include('Amount of documents sampled: 3');
+      expect(getMessageContent(messages[0])).to.include(
+        'Amount of documents sampled: 3'
+      );
 
       expect(messages[1].role).to.equal(
         vscode.LanguageModelChatMessageRole.User
       );
-      expect(messages[1].content).to.include(databaseName);
-      expect(messages[1].content).to.include(collectionName);
-      expect(messages[1].content).to.include(schema);
+      expect(getMessageContent(messages[1])).to.include(databaseName);
+      expect(getMessageContent(messages[1])).to.include(collectionName);
+      expect(getMessageContent(messages[1])).to.include(schema);
 
       expect(stats.command).to.equal('schema');
       expect(stats.has_sample_documents).to.be.false;
       expect(stats.user_input_length).to.equal(chatRequestMock.prompt.length);
       expect(stats.total_message_length).to.equal(
-        messages[0].content.length + messages[1].content.length
+        getContentLength(messages[0]) + getContentLength(messages[1])
       );
     });
 
@@ -1852,7 +1898,7 @@ Schema:
       expect(stats.has_sample_documents).to.be.false;
       expect(stats.user_input_length).to.equal(chatRequestMock.prompt.length);
       expect(stats.total_message_length).to.equal(
-        messages[0].content.length + messages[1].content.length
+        getContentLength(messages[0]) + getContentLength(messages[1])
       );
     });
 
@@ -1927,18 +1973,18 @@ Schema:
       expect(messages[1].role).to.equal(
         vscode.LanguageModelChatMessageRole.User
       );
-      expect(messages[1].content).to.contain(expectedPrompt);
+      expect(getMessageContent(messages[1])).to.contain(expectedPrompt);
 
       expect(stats.command).to.equal('query');
       expect(stats.has_sample_documents).to.be.false;
       expect(stats.user_input_length).to.equal(expectedPrompt.length);
       expect(stats.total_message_length).to.equal(
-        messages[0].content.length + messages[1].content.length
+        getContentLength(messages[0]) + getContentLength(messages[1])
       );
 
       // The prompt builder may add extra info, but we're only reporting the actual user input
       expect(stats.user_input_length).to.be.lessThan(
-        messages[1].content.length
+        getContentLength(messages[1])
       );
     });
   });

From 10be1c6049a4fe7644905fa2f83fb00c8ce7fcaf Mon Sep 17 00:00:00 2001
From: Nikola Irinchev <irinchev@me.com>
Date: Thu, 24 Oct 2024 11:52:30 +0200
Subject: [PATCH 3/3] VSCODE-639: Replace regex fragment matching with
 streaming KMP (#837)

* Replace regex fragment matching with streaming KMP

* Simplify content capturing

* Add an explanation for FragmentMatcher

* Handle multiple code blocks per fragment
---
 src/participant/streamParsing.ts              | 240 +++++++++++++-----
 .../suite/participant/streamParsing.test.ts   |  76 ++++++
 2 files changed, 246 insertions(+), 70 deletions(-)

diff --git a/src/participant/streamParsing.ts b/src/participant/streamParsing.ts
index 93bb5dad9..d8131a759 100644
--- a/src/participant/streamParsing.ts
+++ b/src/participant/streamParsing.ts
@@ -1,5 +1,166 @@
-function escapeRegex(str: string): string {
-  return str.replace(/[.*+?^${}()|[\]\\]/g, '\\$&');
+// This is a stateful streaming implementation of the Knuth-Morris-Pratt algorithm
+// for substring search. It supports being invoked with multiple fragments of the
+// haystack and is capable of finding matches spanning multiple fragments.
+class StreamingKMP {
+  public needle: string;
+  private _lookupVector: number[];
+
+  // In cases where we are fed a string that has a suffix that matches a prefix
+  // of the needle, we're storing the index in the needle which we last matched.
+  // Then when we get a new haystack, we start matching from that needle.
+  private _lastMatchingIndex = 0;
+
+  constructor(needle: string) {
+    this.needle = needle;
+    this._lookupVector = this._createLookupVector();
+  }
+
+  private _createLookupVector(): number[] {
+    const vector = new Array<number>(this.needle.length);
+    let j = 0;
+    vector[0] = 0;
+
+    for (let i = 1; i < this.needle.length; i++) {
+      while (j > 0 && this.needle[i] !== this.needle[j]) {
+        j = vector[j - 1];
+      }
+
+      if (this.needle[i] === this.needle[j]) {
+        j++;
+      }
+
+      vector[i] = j;
+    }
+
+    return vector;
+  }
+
+  // Returns the index in the haystackFragment **after** the needle.
+  // This is done because the match may have occurred over multiple fragments,
+  // so the index of the needle start would be negative.
+  public match(haystackFragment: string): number {
+    let j = this._lastMatchingIndex; // index in needle
+    let i = 0; // index in haystack
+
+    while (i < haystackFragment.length) {
+      if (haystackFragment[i] === this.needle[j]) {
+        i++;
+        j++;
+      }
+
+      if (j === this.needle.length) {
+        this._lastMatchingIndex = 0;
+        return i;
+      }
+
+      if (
+        i < haystackFragment.length &&
+        haystackFragment[i] !== this.needle[j]
+      ) {
+        if (j !== 0) {
+          j = this._lookupVector[j - 1];
+        } else {
+          i++;
+        }
+      }
+    }
+
+    this._lastMatchingIndex = j;
+    return -1;
+  }
+
+  public reset(): void {
+    this._lastMatchingIndex = 0;
+  }
+}
+
+// This class is essentially a state machine that processes a stream of text fragments
+// and emitting a callback with the content between each start and end identifier. The
+// two states we have are:
+// 1. "waiting for start identifier" - `_matchedContent === undefined`
+// 2. "waiting for end identifier" - `_matchedContent !== undefined`
+// with the state transitioning from one to the other when the corresponding identifier
+// is matched in the fragment stream.
+class FragmentMatcher {
+  private _startMatcher: StreamingKMP;
+  private _endMatcher: StreamingKMP;
+  private _matchedContent?: string;
+  private _onContentMatched: (content: string) => void;
+  private _onFragmentProcessed: (content: string) => void;
+
+  constructor({
+    identifier,
+    onContentMatched,
+    onFragmentProcessed,
+  }: {
+    identifier: {
+      start: string;
+      end: string;
+    };
+    onContentMatched: (content: string) => void;
+    onFragmentProcessed: (content: string) => void;
+  }) {
+    this._startMatcher = new StreamingKMP(identifier.start);
+    this._endMatcher = new StreamingKMP(identifier.end);
+    this._onContentMatched = onContentMatched;
+    this._onFragmentProcessed = onFragmentProcessed;
+  }
+
+  private _contentMatched(): void {
+    const content = this._matchedContent;
+    if (content !== undefined) {
+      // Strip the trailing end identifier from the matched content
+      this._onContentMatched(
+        content.slice(0, content.length - this._endMatcher.needle.length)
+      );
+    }
+
+    this._matchedContent = undefined;
+    this._startMatcher.reset();
+    this._endMatcher.reset();
+  }
+
+  // This needs to be invoked every time before we call `process` recursively or when `process`
+  // completes processing the fragment. It will emit a notification to subscribers with the partial
+  // fragment we've processed, regardless of whether there's a match or not.
+  private _partialFragmentProcessed(
+    fragment: string,
+    index: number | undefined = undefined
+  ): void {
+    this._onFragmentProcessed(
+      index === undefined ? fragment : fragment.slice(0, index)
+    );
+  }
+
+  public process(fragment: string): void {
+    if (this._matchedContent === undefined) {
+      // We haven't matched the start identifier yet, so try and do that
+      const startIndex = this._startMatcher.match(fragment);
+      if (startIndex !== -1) {
+        // We found a match for the start identifier - update `_matchedContent` to an empty string
+        // and recursively call `process` with the remainder of the fragment.
+        this._matchedContent = '';
+        this._partialFragmentProcessed(fragment, startIndex);
+        this.process(fragment.slice(startIndex));
+      } else {
+        this._partialFragmentProcessed(fragment);
+      }
+    } else {
+      const endIndex = this._endMatcher.match(fragment);
+      if (endIndex !== -1) {
+        // We've matched the end - emit the matched content and continue processing the partial fragment
+        this._matchedContent += fragment.slice(0, endIndex);
+        this._partialFragmentProcessed(fragment, endIndex);
+        this._contentMatched();
+        this.process(fragment.slice(endIndex));
+      } else {
+        // We haven't matched the end yet - append the fragment to the matched content and wait
+        // for a future fragment to contain the end identifier.
+        this._matchedContent += fragment;
+        this._partialFragmentProcessed(fragment);
+      }
+    }
+  }
 }
 
 /**
@@ -22,74 +183,13 @@ export async function processStreamWithIdentifiers({
     end: string;
   };
 }): Promise<void> {
-  const escapedIdentifierStart = escapeRegex(identifier.start);
-  const escapedIdentifierEnd = escapeRegex(identifier.end);
-  const regex = new RegExp(
-    `${escapedIdentifierStart}([\\s\\S]*?)${escapedIdentifierEnd}`,
-    'g'
-  );
-
-  let contentSinceLastIdentifier = '';
-  for await (const fragment of inputIterable) {
-    contentSinceLastIdentifier += fragment;
+  const fragmentMatcher = new FragmentMatcher({
+    identifier,
+    onContentMatched: onStreamIdentifier,
+    onFragmentProcessed: processStreamFragment,
+  });
 
-    let lastIndex = 0;
-    let match: RegExpExecArray | null;
-    while ((match = regex.exec(contentSinceLastIdentifier)) !== null) {
-      const endIndex = regex.lastIndex;
-
-      // Stream content up to the end of the identifier.
-      const contentToStream = contentSinceLastIdentifier.slice(
-        lastIndex,
-        endIndex
-      );
-      processStreamFragment(contentToStream);
-
-      const identifierContent = match[1];
-      onStreamIdentifier(identifierContent);
-
-      lastIndex = endIndex;
-    }
-
-    if (lastIndex > 0) {
-      // Remove all of the processed content.
-      contentSinceLastIdentifier = contentSinceLastIdentifier.slice(lastIndex);
-      // Reset the regex.
-      regex.lastIndex = 0;
-    } else {
-      // Clear as much of the content as we can safely.
-      const maxUnprocessedLength = identifier.start.length - 1;
-      if (contentSinceLastIdentifier.length > maxUnprocessedLength) {
-        const identifierIndex = contentSinceLastIdentifier.indexOf(
-          identifier.start
-        );
-        if (identifierIndex > -1) {
-          // We have an identifier, so clear up until the identifier.
-          const contentToStream = contentSinceLastIdentifier.slice(
-            0,
-            identifierIndex
-          );
-          processStreamFragment(contentToStream);
-          contentSinceLastIdentifier =
-            contentSinceLastIdentifier.slice(identifierIndex);
-        } else {
-          // No identifier, so clear up until the last maxUnprocessedLength.
-          const processUpTo =
-            contentSinceLastIdentifier.length - maxUnprocessedLength;
-          const contentToStream = contentSinceLastIdentifier.slice(
-            0,
-            processUpTo
-          );
-          processStreamFragment(contentToStream);
-          contentSinceLastIdentifier =
-            contentSinceLastIdentifier.slice(processUpTo);
-        }
-      }
-    }
-  }
-
-  // Finish up anything not streamed yet.
-  if (contentSinceLastIdentifier.length > 0) {
-    processStreamFragment(contentSinceLastIdentifier);
+  for await (const fragment of inputIterable) {
+    fragmentMatcher.process(fragment);
   }
 }
diff --git a/src/test/suite/participant/streamParsing.test.ts b/src/test/suite/participant/streamParsing.test.ts
index 66208ecdd..96d274d00 100644
--- a/src/test/suite/participant/streamParsing.test.ts
+++ b/src/test/suite/participant/streamParsing.test.ts
@@ -216,4 +216,80 @@ suite('processStreamWithIdentifiers', () => {
     expect(fragmentsProcessed.join('')).to.deep.equal(inputFragments.join(''));
     expect(identifiersStreamed).to.deep.equal(['\ncode1\n', '\ncode2\n']);
   });
+
+  test('one fragment containing multiple code blocks emits event in correct order', async () => {
+    // In case we have one fragment containing multiple code blocks, we want to make sure that
+    // fragment notifications and identifier notifications arrive in the right order so that we're
+    // adding code actions after the correct subfragment.
+    // For example:
+    // 'Text before code.\n```js\ncode1\n```\nText between code.\n```js\ncode2\n```\nText after code.'
+    //
+    // should emit:
+    //
+    // processStreamFragment: 'Text before code.\n```js\ncode1\n```'
+    // onStreamIdentifier: '\ncode1\n'
+    // processStreamFragment: '\nText between code.\n```js\ncode2\n```'
+    // onStreamIdentifier: '\ncode2\n'
+    // processStreamFragment: '\nText after code.'
+    //
+    // in that order to ensure we add each code action immediately after the code block
+    // rather than add both at the end.
+
+    const inputFragments = [
+      'Text before code.\n```js\ncode1\n```\nText between code.\n```js\ncode2\n```\nText after code.',
+    ];
+
+    const inputIterable = asyncIterableFromArray<string>(inputFragments);
+    const identifier = { start: '```js', end: '```' };
+
+    const fragmentsEmitted: {
+      source: 'processStreamFragment' | 'onStreamIdentifier';
+      content: string;
+    }[] = [];
+
+    const getFragmentHandler = (
+      source: 'processStreamFragment' | 'onStreamIdentifier'
+    ): ((fragment: string) => void) => {
+      return (fragment: string): void => {
+        // It's an implementation detail, but the way the code is structured today, we're splitting the emitted fragments
+        // whenever we find either a start or end identifier. This is irrelevant as long as we're emitting the entirety of
+        // the text until the end of the code block in `processStreamFragment` and then the code block itself in `onStreamIdentifier`.
+        // With the code below, we're combining all subfragments with the same source to make the test verify the desired
+        // behavior rather than the actual implementation.
+        const lastFragment = fragmentsEmitted[fragmentsEmitted.length - 1];
+        if (lastFragment?.source === source) {
+          lastFragment.content += fragment;
+        } else {
+          fragmentsEmitted.push({ source, content: fragment });
+        }
+      };
+    };
+
+    await processStreamWithIdentifiers({
+      processStreamFragment: getFragmentHandler('processStreamFragment'),
+      onStreamIdentifier: getFragmentHandler('onStreamIdentifier'),
+      inputIterable,
+      identifier,
+    });
+
+    expect(fragmentsEmitted).to.have.length(5);
+    expect(fragmentsEmitted[0].source).to.equal('processStreamFragment');
+    expect(fragmentsEmitted[0].content).to.equal(
+      'Text before code.\n```js\ncode1\n```'
+    );
+
+    expect(fragmentsEmitted[1].source).to.equal('onStreamIdentifier');
+    expect(fragmentsEmitted[1].content).to.equal('\ncode1\n');
+
+    expect(fragmentsEmitted[2].source).to.equal('processStreamFragment');
+    expect(fragmentsEmitted[2].content).to.equal(
+      '\nText between code.\n```js\ncode2\n```'
+    );
+
+    expect(fragmentsEmitted[3].source).to.equal('onStreamIdentifier');
+    expect(fragmentsEmitted[3].content).to.equal('\ncode2\n');
+
+    expect(fragmentsEmitted[4].source).to.equal('processStreamFragment');
+    expect(fragmentsEmitted[4].content).to.equal('\nText after code.');
+  });
 });