diff --git a/merino/curated_recommendations/corpus_backends/protocol.py b/merino/curated_recommendations/corpus_backends/protocol.py index bef83035..5c9685c0 100644 --- a/merino/curated_recommendations/corpus_backends/protocol.py +++ b/merino/curated_recommendations/corpus_backends/protocol.py @@ -8,7 +8,13 @@ @unique class Topic(str, Enum): - """Topics supported for curated recommendations.""" + """Topics supported for curated recommendations. + + The section names must match the corresponding topic names in lowercase. Please update + merino/curated_recommendations/protocol.py if the enum names (e.g. BUSINESS) are changed. + + The enum values correspond to the topic identifiers. Changing them would be a breaking change. + """ BUSINESS = "business" CAREER = "career" diff --git a/merino/curated_recommendations/layouts.py b/merino/curated_recommendations/layouts.py index fd1a03f0..a9809844 100644 --- a/merino/curated_recommendations/layouts.py +++ b/merino/curated_recommendations/layouts.py @@ -8,9 +8,6 @@ from merino.curated_recommendations.protocol import Layout, ResponsiveLayout, Tile, TileSize -# The following layouts are based on work-in-progress designs. The goal is to get some mock data to -# FE developers. Names and layouts will likely be changed. - # Layout 1: 4 medium tiles on 4 columns. Small tiles are used on fewer columns to display all items. layout_4_medium = Layout( name="4-medium-small-1-ad", diff --git a/merino/curated_recommendations/protocol.py b/merino/curated_recommendations/protocol.py index 783cb5ee..5efaae72 100644 --- a/merino/curated_recommendations/protocol.py +++ b/merino/curated_recommendations/protocol.py @@ -227,25 +227,36 @@ class CuratedRecommendationsFeed(BaseModel): need_to_know: CuratedRecommendationsBucket | None = None fakespot: FakespotFeed | None = None - # The following feeds are used as mock data for the 'sections' experiment. - # They should be removed when the sections are implemented that we'll actually launch with. - business: Section | None = None - career: Section | None = None - arts: Section | None = None - food: Section | None = None - health_fitness: Section | None = None - home: Section | None = None - personal_finance: Section | None = None - politics: Section | None = None - sports: Section | None = None - technology: Section | None = None - travel: Section | None = None - education: Section | None = None - gaming: Section | None = None - parenting: Section | None = None - science: Section | None = None - self_improvement: Section | None = None + # Sections + # Renaming an alias of a section is a breaking change. New Tab stores section names when users + # follow or block sections, and references 'top_stories_section' to show topic labels. top_stories_section: Section | None = None + # Topic sections are named according to the lowercase Topic enum name, and aliased to the topic + # id. The alias determines the section name in the JSON response. + business: Section | None = Field(None, alias="business") + career: Section | None = Field(None, alias="career") + arts: Section | None = Field(None, alias="arts") + food: Section | None = Field(None, alias="food") + health_fitness: Section | None = Field(None, alias="health") + home: Section | None = Field(None, alias="home") + personal_finance: Section | None = Field(None, alias="finance") + politics: Section | None = Field(None, alias="government") + sports: Section | None = Field(None, alias="sports") + technology: Section | None = Field(None, alias="tech") + travel: Section | None = Field(None, alias="travel") + education: Section | None = Field(None, alias="education") + gaming: Section | None = Field(None, alias="hobbies") + parenting: Section | None = Field(None, alias="society-parenting") + science: Section | None = Field(None, alias="education-science") + self_improvement: Section | None = Field(None, alias="society") + + def has_topic_section(self, topic: Topic) -> bool: + """Check if a section for the given topic exists as an attribute.""" + return hasattr(self, topic.name.lower()) + + def set_topic_section(self, topic: Topic, section: Section): + """Set a section for the given topic.""" + setattr(self, topic.name.lower(), section) class CuratedRecommendationsResponse(BaseModel): diff --git a/merino/curated_recommendations/provider.py b/merino/curated_recommendations/provider.py index 93dbe5a2..4c3246c6 100644 --- a/merino/curated_recommendations/provider.py +++ b/merino/curated_recommendations/provider.py @@ -20,7 +20,7 @@ layout_4_large, layout_6_tiles, ) -from merino.curated_recommendations.localization import get_translation +from merino.curated_recommendations.localization import get_translation, LOCALIZED_SECTION_TITLES from merino.curated_recommendations.prior_backends.protocol import PriorBackend from merino.curated_recommendations.protocol import ( Locale, @@ -184,9 +184,16 @@ def is_need_to_know_experiment(request, surface_id) -> bool: ) @staticmethod - def is_sections_experiment(request) -> bool: + def is_sections_experiment( + request: CuratedRecommendationsRequest, + surface_id: ScheduledSurfaceId, + ) -> bool: """Check if the 'sections' experiment is enabled.""" - return request.feeds and "sections" in request.feeds + return ( + request.feeds is not None + and "sections" in request.feeds # Clients must request "feeds": ["sections"] + and surface_id in LOCALIZED_SECTION_TITLES # The locale must be supported + ) @staticmethod def is_fakespot_experiment(request, surface_id) -> bool: @@ -338,18 +345,16 @@ def get_sections( ) # Group the remaining recommendations by topic, preserving Thompson sampling order - sections_by_topic: dict[str, Section] = {} + sections_by_topic: dict[Topic, Section] = {} for rec in remaining_recs: - if rec.topic and rec.topic.value in CuratedRecommendationsFeed.model_fields: - topic = rec.topic - if topic in sections_by_topic: - section = sections_by_topic[topic] + if rec.topic: + if rec.topic in sections_by_topic: + section = sections_by_topic[rec.topic] else: formatted_topic_en_us = rec.topic.replace("_", " ").capitalize() - section = sections_by_topic[topic] = Section( - receivedFeedRank=len(sections_by_topic) - + 1, # add 1 for top_stories_section + section = sections_by_topic[rec.topic] = Section( + receivedFeedRank=len(sections_by_topic) + 1, # +1 for top_stories_section recommendations=[], # return the hardcoded localized topic section title # fallback on en-US topic title @@ -362,12 +367,12 @@ def get_sections( section.recommendations.append(rec) # Filter and assign sections with valid minimum recommendations - for topic_id, section in sections_by_topic.items(): + for topic, section in sections_by_topic.items(): # Find the maximum number of tiles in this section's responsive layouts. max_tile_count = max(len(rl.tiles) for rl in section.layout.responsiveLayouts) # Only add sections that meet the minimum number of recommendations. if len(section.recommendations) >= max_tile_count + min_fallback_recs_per_section: - setattr(feeds, topic_id, section) + feeds.set_topic_section(topic, section) return feeds @@ -411,7 +416,7 @@ async def fetch( need_to_know_feed = CuratedRecommendationsBucket( recommendations=need_to_know_recs, title=title ) - elif self.is_sections_experiment(curated_recommendations_request): + elif self.is_sections_experiment(curated_recommendations_request, surface_id): sections_feeds = self.get_sections( recommendations, curated_recommendations_request, surface_id ) diff --git a/pyproject.toml b/pyproject.toml index 7e41ed14..0be5c0a1 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -46,6 +46,7 @@ exclude_dirs = [ [tool.mypy] python_version = "3.12" +plugins = ['pydantic.mypy'] disallow_untyped_calls = true follow_imports = "normal" ignore_missing_imports = true diff --git a/tests/integration/api/v1/curated_recommendations/test_curated_recommendations.py b/tests/integration/api/v1/curated_recommendations/test_curated_recommendations.py index 408b80c0..3d30cd0d 100644 --- a/tests/integration/api/v1/curated_recommendations/test_curated_recommendations.py +++ b/tests/integration/api/v1/curated_recommendations/test_curated_recommendations.py @@ -1322,25 +1322,6 @@ class TestSections: en_us_section_title_top_stories = "Popular Today" de_section_title_top_stories = "Meistgelesen" - @staticmethod - def assert_section_feed_helper(data, top_stories_title): - """Help assert the section feed for correctness.""" - # Assert that an empty data array is returned. All recommendations are under "feeds". - assert len(data["data"]) == 0 - - # Check that all sections are present - feeds = data["feeds"] - assert len(feeds) > 5 # fixture data contains enough recommendations for many sections. - - # All sections have a layout - assert all(Layout(**feed["layout"]) for feed in feeds.values() if feed) - - # Ensure "Today's top stories" is present with a valid date subtitle - top_stories_section = data["feeds"].get("top_stories_section") - assert top_stories_section is not None - assert top_stories_section["title"] == top_stories_title - assert top_stories_section["subtitle"] is not None - @pytest.mark.parametrize( "surface_id", [ @@ -1376,89 +1357,85 @@ def test_section_translations(self, surface_id): ) @pytest.mark.asyncio - async def test_curated_recommendations_with_sections_feed(self, caplog): + @pytest.mark.parametrize("locale", ["en-US", "de-DE"]) + async def test_sections_feed_content(self, locale, caplog): """Test the curated recommendations endpoint response is as expected - when requesting the 'sections' feed for en-US locale. + when requesting the 'sections' feed for different locales. """ async with AsyncClient(app=app, base_url="http://test") as ac: # Mock the endpoint to request the sections feed response = await ac.post( "/api/v1/curated-recommendations", - json={"locale": "en-US", "feeds": ["sections"]}, + json={"locale": locale, "feeds": ["sections"]}, ) data = response.json() # Check if the response is valid assert response.status_code == 200 - - self.assert_section_feed_helper(data, self.en_us_section_title_top_stories) - # Assert no errors were logged errors = [r for r in caplog.records if r.levelname == "ERROR"] assert len(errors) == 0 - @pytest.mark.asyncio - async def test_curated_recommendations_with_sections_feed_de(self, caplog): - """Test the curated recommendations endpoint response is as expected - when requesting the 'sections' feed for de-DE locale. - """ - async with AsyncClient(app=app, base_url="http://test") as ac: - # Mock the endpoint to request the sections feed - response = await ac.post( - "/api/v1/curated-recommendations", - json={"locale": "de-DE", "feeds": ["sections"]}, - ) - data = response.json() + # Assert that an empty data array is returned. All recommendations are under "feeds". + assert len(data["data"]) == 0 - # Check if the response is valid - assert response.status_code == 200 + feeds = data["feeds"] + sections = {name: section for name, section in feeds.items() if section is not None} - self.assert_section_feed_helper(data, self.de_section_title_top_stories) + # The fixture data contains enough recommendations for at least 4 sections. The number + # of sections varies because top_stories_section is determined by Thompson sampling, + # and therefore the number of recs per topics is non-deterministic. + assert len(sections) >= 4 - # Ensure that topic section titles are in German, check at least one topic translation - if data["feeds"]["arts"] is not None: - assert data["feeds"]["arts"]["title"] == "Unterhaltung" - if data["feeds"]["education"] is not None: - assert data["feeds"]["education"]["title"] == "Bildung" - if data["feeds"]["sports"] is not None: - assert data["feeds"]["sports"]["title"] == "Sport" + # All sections have a layout + assert all(Layout(**section["layout"]) for section in sections.values()) - # Assert no errors were logged - errors = [r for r in caplog.records if r.levelname == "ERROR"] - assert len(errors) == 0 + # Ensure all topics are present and are named according to the Topic Enum value. + assert all(topic.value in feeds for topic in Topic) @pytest.mark.asyncio - async def test_curated_recommendations_with_sections_feed_other_locale(self, caplog): - """Test the curated recommendations endpoint response is as expected - when requesting the 'sections' feed for any other locale besides en-US & de-DE. - Check that an error is logged for missing translations. - """ + @pytest.mark.parametrize( + "locale, expected_titles", + [ + ( + "en-US", + { + "top_stories_section": "Popular Today", + "arts": "Entertainment", + "education": "Education", + "sports": "Sports", + }, + ), + ( + "de-DE", + { + "top_stories_section": "Meistgelesen", + "arts": "Unterhaltung", + "education": "Bildung", + "sports": "Sport", + }, + ), + ], + ) + async def test_sections_feed_titles(self, locale, expected_titles): + """Test the curated recommendations endpoint 'sections' have the expected (sub)titles.""" async with AsyncClient(app=app, base_url="http://test") as ac: # Mock the endpoint to request the sections feed response = await ac.post( "/api/v1/curated-recommendations", - json={"locale": "it-IT", "feeds": ["sections"]}, + json={"locale": locale, "feeds": ["sections"]}, ) data = response.json() + feeds = data["feeds"] - # Check if the response is valid - assert response.status_code == 200 - - self.assert_section_feed_helper(data, self.en_us_section_title_top_stories) + # Sections have their expected, localized title + for section_name, expected_title in expected_titles.items(): + section = feeds.get(section_name) + if section: + assert section["title"] == expected_title - # Ensure that topic section titles fallback to English - if data["feeds"]["arts"] is not None: - assert data["feeds"]["arts"]["title"] == "Arts" - if data["feeds"]["education"] is not None: - assert data["feeds"]["education"]["title"] == "Education" - if data["feeds"]["sports"] is not None: - assert data["feeds"]["sports"]["title"] == "Sports" - - # Assert that errors were logged with a descriptive message when missing translation - expected_error = "No translations found for surface 'ScheduledSurfaceId.NEW_TAB_IT_IT'" - errors = [r for r in caplog.records if r.levelname == "ERROR"] - assert len(errors) == 9 - assert all(expected_error in error.message for error in errors) + # Ensure "Today's top stories" subtitle is present + assert feeds["top_stories_section"]["subtitle"] is not None class TestExtendedExpiration: diff --git a/tests/unit/curated_recommendations/test_localization.py b/tests/unit/curated_recommendations/test_localization.py new file mode 100644 index 00000000..3c93cb3a --- /dev/null +++ b/tests/unit/curated_recommendations/test_localization.py @@ -0,0 +1,30 @@ +"""Tests covering merino/curated_recommendations/localization.py""" + +from merino.curated_recommendations.localization import get_translation +from merino.curated_recommendations.corpus_backends.protocol import ScheduledSurfaceId + + +def test_get_translation_existing_translation(): + """Test that get_translation returns existing translations.""" + result = get_translation(ScheduledSurfaceId.NEW_TAB_EN_US, "business", "Default") + assert result == "Business" + + +def test_get_translation_non_existing_locale(caplog): + """Test logs error and falls back to default when locale translations do not exist.""" + result = get_translation(ScheduledSurfaceId.NEW_TAB_IT_IT, "business", "Default") + assert result == "Default" + + errors = [r for r in caplog.records if r.levelname == "ERROR"] + assert len(errors) == 1 + assert "No translations found for surface" in errors[0].message + + +def test_get_translation_non_existing_slug(caplog): + """Test logs error and falls back to default when the topic slug does not exist.""" + result = get_translation(ScheduledSurfaceId.NEW_TAB_EN_US, "non_existing_slug", "Default") + assert result == "Default" + + errors = [r for r in caplog.records if r.levelname == "ERROR"] + assert len(errors) == 1 + assert "Missing or empty translation for topic" in errors[0].message