Skip to content

Commit

Permalink
[HNT-281] Topic section ids match the topic id (#728)
Browse files Browse the repository at this point in the history
* [HNT-281] change section ids to match topic ids

* try dynamic feeds using create_model

* enable pydantic mypy plugin

* fix mypy errors

* add -section suffix to topic sections

add comments warning about topic section names

* apply review feedback

* Refactor section tests

* fix comment
  • Loading branch information
mmiermans authored Dec 17, 2024
1 parent 3377f3b commit fdacb12
Show file tree
Hide file tree
Showing 7 changed files with 136 additions and 109 deletions.
8 changes: 7 additions & 1 deletion merino/curated_recommendations/corpus_backends/protocol.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,13 @@

@unique
class Topic(str, Enum):
"""Topics supported for curated recommendations."""
"""Topics supported for curated recommendations.
The section names must match the corresponding topic names in lowercase. Please update
merino/curated_recommendations/protocol.py if the enum names (e.g. BUSINESS) are changed.
The enum values correspond to the topic identifiers. Changing them would be a breaking change.
"""

BUSINESS = "business"
CAREER = "career"
Expand Down
3 changes: 0 additions & 3 deletions merino/curated_recommendations/layouts.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,9 +8,6 @@

from merino.curated_recommendations.protocol import Layout, ResponsiveLayout, Tile, TileSize

# The following layouts are based on work-in-progress designs. The goal is to get some mock data to
# FE developers. Names and layouts will likely be changed.

# Layout 1: 4 medium tiles on 4 columns. Small tiles are used on fewer columns to display all items.
layout_4_medium = Layout(
name="4-medium-small-1-ad",
Expand Down
47 changes: 29 additions & 18 deletions merino/curated_recommendations/protocol.py
Original file line number Diff line number Diff line change
Expand Up @@ -227,25 +227,36 @@ class CuratedRecommendationsFeed(BaseModel):
need_to_know: CuratedRecommendationsBucket | None = None
fakespot: FakespotFeed | None = None

# The following feeds are used as mock data for the 'sections' experiment.
# They should be removed when the sections are implemented that we'll actually launch with.
business: Section | None = None
career: Section | None = None
arts: Section | None = None
food: Section | None = None
health_fitness: Section | None = None
home: Section | None = None
personal_finance: Section | None = None
politics: Section | None = None
sports: Section | None = None
technology: Section | None = None
travel: Section | None = None
education: Section | None = None
gaming: Section | None = None
parenting: Section | None = None
science: Section | None = None
self_improvement: Section | None = None
# Sections
# Renaming an alias of a section is a breaking change. New Tab stores section names when users
# follow or block sections, and references 'top_stories_section' to show topic labels.
top_stories_section: Section | None = None
# Topic sections are named according to the lowercase Topic enum name, and aliased to the topic
# id. The alias determines the section name in the JSON response.
business: Section | None = Field(None, alias="business")
career: Section | None = Field(None, alias="career")
arts: Section | None = Field(None, alias="arts")
food: Section | None = Field(None, alias="food")
health_fitness: Section | None = Field(None, alias="health")
home: Section | None = Field(None, alias="home")
personal_finance: Section | None = Field(None, alias="finance")
politics: Section | None = Field(None, alias="government")
sports: Section | None = Field(None, alias="sports")
technology: Section | None = Field(None, alias="tech")
travel: Section | None = Field(None, alias="travel")
education: Section | None = Field(None, alias="education")
gaming: Section | None = Field(None, alias="hobbies")
parenting: Section | None = Field(None, alias="society-parenting")
science: Section | None = Field(None, alias="education-science")
self_improvement: Section | None = Field(None, alias="society")

def has_topic_section(self, topic: Topic) -> bool:
"""Check if a section for the given topic exists as an attribute."""
return hasattr(self, topic.name.lower())

def set_topic_section(self, topic: Topic, section: Section):
"""Set a section for the given topic."""
setattr(self, topic.name.lower(), section)


class CuratedRecommendationsResponse(BaseModel):
Expand Down
33 changes: 19 additions & 14 deletions merino/curated_recommendations/provider.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@
layout_4_large,
layout_6_tiles,
)
from merino.curated_recommendations.localization import get_translation
from merino.curated_recommendations.localization import get_translation, LOCALIZED_SECTION_TITLES
from merino.curated_recommendations.prior_backends.protocol import PriorBackend
from merino.curated_recommendations.protocol import (
Locale,
Expand Down Expand Up @@ -184,9 +184,16 @@ def is_need_to_know_experiment(request, surface_id) -> bool:
)

@staticmethod
def is_sections_experiment(request) -> bool:
def is_sections_experiment(
request: CuratedRecommendationsRequest,
surface_id: ScheduledSurfaceId,
) -> bool:
"""Check if the 'sections' experiment is enabled."""
return request.feeds and "sections" in request.feeds
return (
request.feeds is not None
and "sections" in request.feeds # Clients must request "feeds": ["sections"]
and surface_id in LOCALIZED_SECTION_TITLES # The locale must be supported
)

@staticmethod
def is_fakespot_experiment(request, surface_id) -> bool:
Expand Down Expand Up @@ -338,18 +345,16 @@ def get_sections(
)

# Group the remaining recommendations by topic, preserving Thompson sampling order
sections_by_topic: dict[str, Section] = {}
sections_by_topic: dict[Topic, Section] = {}

for rec in remaining_recs:
if rec.topic and rec.topic.value in CuratedRecommendationsFeed.model_fields:
topic = rec.topic
if topic in sections_by_topic:
section = sections_by_topic[topic]
if rec.topic:
if rec.topic in sections_by_topic:
section = sections_by_topic[rec.topic]
else:
formatted_topic_en_us = rec.topic.replace("_", " ").capitalize()
section = sections_by_topic[topic] = Section(
receivedFeedRank=len(sections_by_topic)
+ 1, # add 1 for top_stories_section
section = sections_by_topic[rec.topic] = Section(
receivedFeedRank=len(sections_by_topic) + 1, # +1 for top_stories_section
recommendations=[],
# return the hardcoded localized topic section title
# fallback on en-US topic title
Expand All @@ -362,12 +367,12 @@ def get_sections(
section.recommendations.append(rec)

# Filter and assign sections with valid minimum recommendations
for topic_id, section in sections_by_topic.items():
for topic, section in sections_by_topic.items():
# Find the maximum number of tiles in this section's responsive layouts.
max_tile_count = max(len(rl.tiles) for rl in section.layout.responsiveLayouts)
# Only add sections that meet the minimum number of recommendations.
if len(section.recommendations) >= max_tile_count + min_fallback_recs_per_section:
setattr(feeds, topic_id, section)
feeds.set_topic_section(topic, section)

return feeds

Expand Down Expand Up @@ -411,7 +416,7 @@ async def fetch(
need_to_know_feed = CuratedRecommendationsBucket(
recommendations=need_to_know_recs, title=title
)
elif self.is_sections_experiment(curated_recommendations_request):
elif self.is_sections_experiment(curated_recommendations_request, surface_id):
sections_feeds = self.get_sections(
recommendations, curated_recommendations_request, surface_id
)
Expand Down
1 change: 1 addition & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,7 @@ exclude_dirs = [

[tool.mypy]
python_version = "3.12"
plugins = ['pydantic.mypy']
disallow_untyped_calls = true
follow_imports = "normal"
ignore_missing_imports = true
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -1322,25 +1322,6 @@ class TestSections:
en_us_section_title_top_stories = "Popular Today"
de_section_title_top_stories = "Meistgelesen"

@staticmethod
def assert_section_feed_helper(data, top_stories_title):
"""Help assert the section feed for correctness."""
# Assert that an empty data array is returned. All recommendations are under "feeds".
assert len(data["data"]) == 0

# Check that all sections are present
feeds = data["feeds"]
assert len(feeds) > 5 # fixture data contains enough recommendations for many sections.

# All sections have a layout
assert all(Layout(**feed["layout"]) for feed in feeds.values() if feed)

# Ensure "Today's top stories" is present with a valid date subtitle
top_stories_section = data["feeds"].get("top_stories_section")
assert top_stories_section is not None
assert top_stories_section["title"] == top_stories_title
assert top_stories_section["subtitle"] is not None

@pytest.mark.parametrize(
"surface_id",
[
Expand Down Expand Up @@ -1376,89 +1357,85 @@ def test_section_translations(self, surface_id):
)

@pytest.mark.asyncio
async def test_curated_recommendations_with_sections_feed(self, caplog):
@pytest.mark.parametrize("locale", ["en-US", "de-DE"])
async def test_sections_feed_content(self, locale, caplog):
"""Test the curated recommendations endpoint response is as expected
when requesting the 'sections' feed for en-US locale.
when requesting the 'sections' feed for different locales.
"""
async with AsyncClient(app=app, base_url="http://test") as ac:
# Mock the endpoint to request the sections feed
response = await ac.post(
"/api/v1/curated-recommendations",
json={"locale": "en-US", "feeds": ["sections"]},
json={"locale": locale, "feeds": ["sections"]},
)
data = response.json()

# Check if the response is valid
assert response.status_code == 200

self.assert_section_feed_helper(data, self.en_us_section_title_top_stories)

# Assert no errors were logged
errors = [r for r in caplog.records if r.levelname == "ERROR"]
assert len(errors) == 0

@pytest.mark.asyncio
async def test_curated_recommendations_with_sections_feed_de(self, caplog):
"""Test the curated recommendations endpoint response is as expected
when requesting the 'sections' feed for de-DE locale.
"""
async with AsyncClient(app=app, base_url="http://test") as ac:
# Mock the endpoint to request the sections feed
response = await ac.post(
"/api/v1/curated-recommendations",
json={"locale": "de-DE", "feeds": ["sections"]},
)
data = response.json()
# Assert that an empty data array is returned. All recommendations are under "feeds".
assert len(data["data"]) == 0

# Check if the response is valid
assert response.status_code == 200
feeds = data["feeds"]
sections = {name: section for name, section in feeds.items() if section is not None}

self.assert_section_feed_helper(data, self.de_section_title_top_stories)
# The fixture data contains enough recommendations for at least 4 sections. The number
# of sections varies because top_stories_section is determined by Thompson sampling,
# and therefore the number of recs per topics is non-deterministic.
assert len(sections) >= 4

# Ensure that topic section titles are in German, check at least one topic translation
if data["feeds"]["arts"] is not None:
assert data["feeds"]["arts"]["title"] == "Unterhaltung"
if data["feeds"]["education"] is not None:
assert data["feeds"]["education"]["title"] == "Bildung"
if data["feeds"]["sports"] is not None:
assert data["feeds"]["sports"]["title"] == "Sport"
# All sections have a layout
assert all(Layout(**section["layout"]) for section in sections.values())

# Assert no errors were logged
errors = [r for r in caplog.records if r.levelname == "ERROR"]
assert len(errors) == 0
# Ensure all topics are present and are named according to the Topic Enum value.
assert all(topic.value in feeds for topic in Topic)

@pytest.mark.asyncio
async def test_curated_recommendations_with_sections_feed_other_locale(self, caplog):
"""Test the curated recommendations endpoint response is as expected
when requesting the 'sections' feed for any other locale besides en-US & de-DE.
Check that an error is logged for missing translations.
"""
@pytest.mark.parametrize(
"locale, expected_titles",
[
(
"en-US",
{
"top_stories_section": "Popular Today",
"arts": "Entertainment",
"education": "Education",
"sports": "Sports",
},
),
(
"de-DE",
{
"top_stories_section": "Meistgelesen",
"arts": "Unterhaltung",
"education": "Bildung",
"sports": "Sport",
},
),
],
)
async def test_sections_feed_titles(self, locale, expected_titles):
"""Test the curated recommendations endpoint 'sections' have the expected (sub)titles."""
async with AsyncClient(app=app, base_url="http://test") as ac:
# Mock the endpoint to request the sections feed
response = await ac.post(
"/api/v1/curated-recommendations",
json={"locale": "it-IT", "feeds": ["sections"]},
json={"locale": locale, "feeds": ["sections"]},
)
data = response.json()
feeds = data["feeds"]

# Check if the response is valid
assert response.status_code == 200

self.assert_section_feed_helper(data, self.en_us_section_title_top_stories)
# Sections have their expected, localized title
for section_name, expected_title in expected_titles.items():
section = feeds.get(section_name)
if section:
assert section["title"] == expected_title

# Ensure that topic section titles fallback to English
if data["feeds"]["arts"] is not None:
assert data["feeds"]["arts"]["title"] == "Arts"
if data["feeds"]["education"] is not None:
assert data["feeds"]["education"]["title"] == "Education"
if data["feeds"]["sports"] is not None:
assert data["feeds"]["sports"]["title"] == "Sports"

# Assert that errors were logged with a descriptive message when missing translation
expected_error = "No translations found for surface 'ScheduledSurfaceId.NEW_TAB_IT_IT'"
errors = [r for r in caplog.records if r.levelname == "ERROR"]
assert len(errors) == 9
assert all(expected_error in error.message for error in errors)
# Ensure "Today's top stories" subtitle is present
assert feeds["top_stories_section"]["subtitle"] is not None


class TestExtendedExpiration:
Expand Down
30 changes: 30 additions & 0 deletions tests/unit/curated_recommendations/test_localization.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
"""Tests covering merino/curated_recommendations/localization.py"""

from merino.curated_recommendations.localization import get_translation
from merino.curated_recommendations.corpus_backends.protocol import ScheduledSurfaceId


def test_get_translation_existing_translation():
"""Test that get_translation returns existing translations."""
result = get_translation(ScheduledSurfaceId.NEW_TAB_EN_US, "business", "Default")
assert result == "Business"


def test_get_translation_non_existing_locale(caplog):
"""Test logs error and falls back to default when locale translations do not exist."""
result = get_translation(ScheduledSurfaceId.NEW_TAB_IT_IT, "business", "Default")
assert result == "Default"

errors = [r for r in caplog.records if r.levelname == "ERROR"]
assert len(errors) == 1
assert "No translations found for surface" in errors[0].message


def test_get_translation_non_existing_slug(caplog):
"""Test logs error and falls back to default when the topic slug does not exist."""
result = get_translation(ScheduledSurfaceId.NEW_TAB_EN_US, "non_existing_slug", "Default")
assert result == "Default"

errors = [r for r in caplog.records if r.levelname == "ERROR"]
assert len(errors) == 1
assert "Missing or empty translation for topic" in errors[0].message

0 comments on commit fdacb12

Please sign in to comment.