[PY] fix: streaming - citations & pyproject.toml updates (#2205)

## Linked issues closes: #minor ## Details match with #2185 and #2202, ![image](https://github.com/user-attachments/assets/06846624-0b4b-4913-9177-10016ec9b108) - citations is now enabled during streaming, and for the final message - updated colour/outline pngs for sample to adhere to manifest validation - fixed indexing for citations `position` - added `StreamingEntity` class to fix serialization on `streaminfo` object - version 2 of poetry was released which led to a few scripts failing ## Attestation Checklist - [x] My code follows the style guidelines of this project - I have checked for/fixed spelling, linting, and other errors - I have commented my code for clarity - I have made corresponding changes to the documentation (updating the doc strings in the code is sufficient) - My changes generate no new warnings - I have added tests that validates my changes, and provides sufficient test coverage. I have tested with: - Local testing - E2E testing in Teams - New and existing unit tests pass locally with my changes --------- Co-authored-by: lilydu <lilydu+odspmdb@microsoft.com>
microsoft · Jan 7, 2025 · 850c77b · 850c77b
1 parent 510b2fd
commit 850c77b
Show file tree

Hide file tree

Showing 10 changed files with 780 additions and 691 deletions.
diff --git a/.github/workflows/python-build-test-lint.yml b/.github/workflows/python-build-test-lint.yml
@@ -36,7 +36,7 @@ jobs:
       - name: Install Dependencies
         run: |
           python -m pip install --upgrade pip
-          pip install poetry
+          pip install poetry==1.8.5
           python scripts/install.py
       - name: Check
         run: |

diff --git a/python/packages/ai/poetry.lock b/python/packages/ai/poetry.lock
diff --git a/python/packages/ai/pyproject.toml b/python/packages/ai/pyproject.toml
@@ -72,4 +72,4 @@ warn_unused_ignores = true
 
 [[tool.mypy.overrides]]
 module = "tests.*"
-disable_error_code = ["var-annotated", "method-assign"]
+disable_error_code = ["var-annotated", "method-assign"]
diff --git a/python/packages/ai/teams/ai/clients/llm_client.py b/python/packages/ai/teams/ai/clients/llm_client.py
@@ -187,8 +187,11 @@ def chunk_received(
                 chunk.delta.context.citations if (chunk.delta and chunk.delta.context) else None
             )
 
+            if citations:
+                streamer.set_citations(citations)
+
             if len(text) > 0:
-                streamer.queue_text_chunk(text, citations)
+                streamer.queue_text_chunk(text)
 
         # Subscribe to model events
         if self._options.model.events is not None:

diff --git a/python/packages/ai/teams/streaming/__init__.py b/python/packages/ai/teams/streaming/__init__.py
@@ -8,10 +8,12 @@
 from .streaming_channel_data import StreamingChannelData
 from .streaming_handlers import *
 from .streaming_response import StreamingResponse
+from .streaming_entity import StreamingEntity
 
 __all__ = [
     "StreamingResponse",
     "StreamingChannelData",
     "PromptChunk",
     "StreamHandlerTypes",
+    "StreamingEntity",
 ]
diff --git a/python/packages/ai/teams/streaming/streaming_entity.py b/python/packages/ai/teams/streaming/streaming_entity.py
@@ -0,0 +1,31 @@
+"""
+Copyright (c) Microsoft Corporation. All rights reserved.
+Licensed under the MIT License.
+"""
+
+from __future__ import annotations
+
+from dataclasses import dataclass
+from typing import Optional
+
+from botbuilder.schema import Entity
+
+
+@dataclass
+class StreamingEntity(Entity):
+    """
+    Child class of BotBuilder's Entity class. Temporarily needed for mapping until the oficial
+    version of their SDK releases a StreamingEntity class.
+    """
+
+    _attribute_map = {
+        "type": {"key": "type", "type": "str"},
+        "stream_id": {"key": "streamId", "type": "str"},
+        "stream_type": {"key": "streamType", "type": "str"},
+        "stream_sequence": {"key": "streamSequence", "type": "int"},
+    }
+
+    stream_type: str
+    stream_sequence: Optional[int]
+    stream_id: Optional[str]
+    type: str = "streaminfo"
diff --git a/python/packages/ai/teams/streaming/streaming_response.py b/python/packages/ai/teams/streaming/streaming_response.py
@@ -19,6 +19,7 @@
 from ..ai.prompts.message import Citation
 from ..app_error import ApplicationError
 from .streaming_channel_data import StreamingChannelData
+from .streaming_entity import StreamingEntity
 
 
 class StreamingResponse:
@@ -111,6 +112,24 @@ def updates_sent(self) -> int:
         """
         return self._next_sequence - 1
 
+    def set_citations(self, citations: List[Citation]) -> None:
+        if len(citations) > 0:
+            if not self._citations:
+                self._citations = []
+            curr_pos = len(self._citations)
+
+            for citation in citations:
+                self._citations.append(
+                    ClientCitation(
+                        position=f"{curr_pos + 1}",
+                        appearance=Appearance(
+                            name=citation.title or f"Document {curr_pos + 1}",
+                            abstract=snippet(citation.content, 477),
+                        ),
+                    )
+                )
+                curr_pos += 1
+
     def queue_informative_update(self, text: str) -> None:
         """
         Queue an informative update to be sent to the client.
@@ -131,6 +150,7 @@ def queue_informative_update(self, text: str) -> None:
         self._next_sequence += 1
 
     def queue_text_chunk(self, text: str, citations: Optional[List[Citation]] = None) -> None:
+        # pylint: disable=unused-argument
         """
         Queues a chunk of partial message text to be sent to the client.
         The text we be sent as quickly as possible to the client. Chunks may be combined before
@@ -142,33 +162,9 @@ def queue_text_chunk(self, text: str, citations: Optional[List[Citation]] = None
 
         self._message += text
 
-        if citations and len(citations) > 0:
-            if not self._citations:
-                self._citations = []
-            curr_pos = len(self._citations)
-
-            for citation in citations:
-                self._citations.append(
-                    ClientCitation(
-                        position=f"{curr_pos}",
-                        appearance=Appearance(
-                            name=citation.title or f"Document {curr_pos}",
-                            abstract=snippet(citation.content, 477),
-                        ),
-                    )
-                )
-                curr_pos += 1
-
-            # If there are citations, modify the content so that the sources are numbers
-            # instead of [doc1], [doc2], etc.
-            self._message = (
-                self._message
-                if len(self._citations) == 0
-                else format_citations_response(self._message)
-            )
-
-            # If there are citations, filter out the citations unused in content.
-            self._citations = get_used_citations(self._message, self._citations)
+        # If there are citations, modify the content so that the sources are numbers
+        # instead of [doc1], [doc2], etc.
+        self._message = format_citations_response(self._message)
 
         # Queue the next chunk
         self.queue_next_chunk()
@@ -278,14 +274,23 @@ async def send_activity(self, activity: Activity) -> None:
             channel_data.stream_id = self._stream_id
             activity.channel_data = StreamingChannelData.to_dict(channel_data)
 
-        entity_args = {
-            "stream_id": channel_data.stream_id,
-            "stream_sequence": channel_data.stream_sequence,
-            "stream_type": channel_data.stream_type,
-        }
-        activity.entities = [
-            Entity(type="streaminfo", **entity_args),
-        ]
+        entity = StreamingEntity(
+            stream_id=channel_data.stream_id,
+            stream_sequence=channel_data.stream_sequence,
+            stream_type=channel_data.stream_type
+        )
+        entities: List[Entity] = [entity]
+        activity.entities = entities
+
+        # If there are citations, filter out the citations unused in content.
+        if self._citations and len(self._citations) > 0 and self._ended is False:
+            curr_citations = get_used_citations(self._message, self._citations)
+            activity.entities.append(
+                    AIEntity(
+                        additional_type=[],
+                        citation=curr_citations if curr_citations else [],
+                    )
+            )
 
         # Add in Powered by AI feature flags
         if self._ended:

diff --git a/python/packages/ai/tests/streaming/test_streaming_response.py b/python/packages/ai/tests/streaming/test_streaming_response.py
@@ -73,20 +73,16 @@ async def test_send_text_chunk(self):
     async def test_send_text_chunk_with_irrelevant_citations(self):
         context = self.create_mock_context()
         streamer = StreamingResponse(context)
-        streamer.queue_text_chunk(
-            "first",
-            [
-                Citation(
+        streamer.set_citations([ Citation(
                     content="test-content", url="https://example.com", title="test", filepath="test"
-                )
-            ],
-        )
+                )])
+        streamer.queue_text_chunk("first")
         await streamer.wait_for_queue()
         streamer.queue_text_chunk("second")
         await streamer.wait_for_queue()
         self.assertEqual(streamer.updates_sent(), 2)
         self.assertEqual(streamer.message, "firstsecond")
-        self.assertEqual(streamer.citations, None)
+        self.assertIsNotNone(streamer.citations)
 
     @pytest.mark.asyncio
     async def test_send_text_chunk_assert_throws(self):

diff --git a/python/samples/04.ai.f.dataSource.azureOpenAI/appPackage/color.png b/python/samples/04.ai.f.dataSource.azureOpenAI/appPackage/color.png
diff --git a/python/samples/04.ai.f.dataSource.azureOpenAI/appPackage/outline.png b/python/samples/04.ai.f.dataSource.azureOpenAI/appPackage/outline.png