Skip to content

Commit

Permalink
[PY] fix: streaming - citations & pyproject.toml updates (#2205)
Browse files Browse the repository at this point in the history
## Linked issues

closes: #minor

## Details

match with #2185 and #2202,  


![image](https://github.com/user-attachments/assets/06846624-0b4b-4913-9177-10016ec9b108)


- citations is now enabled during streaming, and for the final message
- updated colour/outline pngs for sample to adhere to manifest
validation
- fixed indexing for citations `position`
- added `StreamingEntity` class to fix serialization on `streaminfo`
object
- version 2 of poetry was released which led to a few scripts failing

## Attestation Checklist

- [x] My code follows the style guidelines of this project

- I have checked for/fixed spelling, linting, and other errors
- I have commented my code for clarity
- I have made corresponding changes to the documentation (updating the
doc strings in the code is sufficient)
- My changes generate no new warnings
- I have added tests that validates my changes, and provides sufficient
test coverage. I have tested with:
  - Local testing
  - E2E testing in Teams
- New and existing unit tests pass locally with my changes

---------

Co-authored-by: lilydu <lilydu+odspmdb@microsoft.com>
  • Loading branch information
lilyydu and lilydu authored Jan 7, 2025
1 parent 510b2fd commit 850c77b
Show file tree
Hide file tree
Showing 10 changed files with 780 additions and 691 deletions.
2 changes: 1 addition & 1 deletion .github/workflows/python-build-test-lint.yml
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,7 @@ jobs:
- name: Install Dependencies
run: |
python -m pip install --upgrade pip
pip install poetry
pip install poetry==1.8.5
python scripts/install.py
- name: Check
run: |
Expand Down
1,334 changes: 693 additions & 641 deletions python/packages/ai/poetry.lock

Large diffs are not rendered by default.

2 changes: 1 addition & 1 deletion python/packages/ai/pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -72,4 +72,4 @@ warn_unused_ignores = true

[[tool.mypy.overrides]]
module = "tests.*"
disable_error_code = ["var-annotated", "method-assign"]
disable_error_code = ["var-annotated", "method-assign"]
5 changes: 4 additions & 1 deletion python/packages/ai/teams/ai/clients/llm_client.py
Original file line number Diff line number Diff line change
Expand Up @@ -187,8 +187,11 @@ def chunk_received(
chunk.delta.context.citations if (chunk.delta and chunk.delta.context) else None
)

if citations:
streamer.set_citations(citations)

if len(text) > 0:
streamer.queue_text_chunk(text, citations)
streamer.queue_text_chunk(text)

# Subscribe to model events
if self._options.model.events is not None:
Expand Down
2 changes: 2 additions & 0 deletions python/packages/ai/teams/streaming/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,10 +8,12 @@
from .streaming_channel_data import StreamingChannelData
from .streaming_handlers import *
from .streaming_response import StreamingResponse
from .streaming_entity import StreamingEntity

__all__ = [
"StreamingResponse",
"StreamingChannelData",
"PromptChunk",
"StreamHandlerTypes",
"StreamingEntity",
]
31 changes: 31 additions & 0 deletions python/packages/ai/teams/streaming/streaming_entity.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
"""
Copyright (c) Microsoft Corporation. All rights reserved.
Licensed under the MIT License.
"""

from __future__ import annotations

from dataclasses import dataclass
from typing import Optional

from botbuilder.schema import Entity


@dataclass
class StreamingEntity(Entity):
"""
Child class of BotBuilder's Entity class. Temporarily needed for mapping until the oficial
version of their SDK releases a StreamingEntity class.
"""

_attribute_map = {
"type": {"key": "type", "type": "str"},
"stream_id": {"key": "streamId", "type": "str"},
"stream_type": {"key": "streamType", "type": "str"},
"stream_sequence": {"key": "streamSequence", "type": "int"},
}

stream_type: str
stream_sequence: Optional[int]
stream_id: Optional[str]
type: str = "streaminfo"
75 changes: 40 additions & 35 deletions python/packages/ai/teams/streaming/streaming_response.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@
from ..ai.prompts.message import Citation
from ..app_error import ApplicationError
from .streaming_channel_data import StreamingChannelData
from .streaming_entity import StreamingEntity


class StreamingResponse:
Expand Down Expand Up @@ -111,6 +112,24 @@ def updates_sent(self) -> int:
"""
return self._next_sequence - 1

def set_citations(self, citations: List[Citation]) -> None:
if len(citations) > 0:
if not self._citations:
self._citations = []
curr_pos = len(self._citations)

for citation in citations:
self._citations.append(
ClientCitation(
position=f"{curr_pos + 1}",
appearance=Appearance(
name=citation.title or f"Document {curr_pos + 1}",
abstract=snippet(citation.content, 477),
),
)
)
curr_pos += 1

def queue_informative_update(self, text: str) -> None:
"""
Queue an informative update to be sent to the client.
Expand All @@ -131,6 +150,7 @@ def queue_informative_update(self, text: str) -> None:
self._next_sequence += 1

def queue_text_chunk(self, text: str, citations: Optional[List[Citation]] = None) -> None:
# pylint: disable=unused-argument
"""
Queues a chunk of partial message text to be sent to the client.
The text we be sent as quickly as possible to the client. Chunks may be combined before
Expand All @@ -142,33 +162,9 @@ def queue_text_chunk(self, text: str, citations: Optional[List[Citation]] = None

self._message += text

if citations and len(citations) > 0:
if not self._citations:
self._citations = []
curr_pos = len(self._citations)

for citation in citations:
self._citations.append(
ClientCitation(
position=f"{curr_pos}",
appearance=Appearance(
name=citation.title or f"Document {curr_pos}",
abstract=snippet(citation.content, 477),
),
)
)
curr_pos += 1

# If there are citations, modify the content so that the sources are numbers
# instead of [doc1], [doc2], etc.
self._message = (
self._message
if len(self._citations) == 0
else format_citations_response(self._message)
)

# If there are citations, filter out the citations unused in content.
self._citations = get_used_citations(self._message, self._citations)
# If there are citations, modify the content so that the sources are numbers
# instead of [doc1], [doc2], etc.
self._message = format_citations_response(self._message)

# Queue the next chunk
self.queue_next_chunk()
Expand Down Expand Up @@ -278,14 +274,23 @@ async def send_activity(self, activity: Activity) -> None:
channel_data.stream_id = self._stream_id
activity.channel_data = StreamingChannelData.to_dict(channel_data)

entity_args = {
"stream_id": channel_data.stream_id,
"stream_sequence": channel_data.stream_sequence,
"stream_type": channel_data.stream_type,
}
activity.entities = [
Entity(type="streaminfo", **entity_args),
]
entity = StreamingEntity(
stream_id=channel_data.stream_id,
stream_sequence=channel_data.stream_sequence,
stream_type=channel_data.stream_type
)
entities: List[Entity] = [entity]
activity.entities = entities

# If there are citations, filter out the citations unused in content.
if self._citations and len(self._citations) > 0 and self._ended is False:
curr_citations = get_used_citations(self._message, self._citations)
activity.entities.append(
AIEntity(
additional_type=[],
citation=curr_citations if curr_citations else [],
)
)

# Add in Powered by AI feature flags
if self._ended:
Expand Down
12 changes: 4 additions & 8 deletions python/packages/ai/tests/streaming/test_streaming_response.py
Original file line number Diff line number Diff line change
Expand Up @@ -73,20 +73,16 @@ async def test_send_text_chunk(self):
async def test_send_text_chunk_with_irrelevant_citations(self):
context = self.create_mock_context()
streamer = StreamingResponse(context)
streamer.queue_text_chunk(
"first",
[
Citation(
streamer.set_citations([ Citation(
content="test-content", url="https://example.com", title="test", filepath="test"
)
],
)
)])
streamer.queue_text_chunk("first")
await streamer.wait_for_queue()
streamer.queue_text_chunk("second")
await streamer.wait_for_queue()
self.assertEqual(streamer.updates_sent(), 2)
self.assertEqual(streamer.message, "firstsecond")
self.assertEqual(streamer.citations, None)
self.assertIsNotNone(streamer.citations)

@pytest.mark.asyncio
async def test_send_text_chunk_assert_throws(self):
Expand Down
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.

0 comments on commit 850c77b

Please sign in to comment.