[HNT-281] Topic section ids match the topic id (#728)

* [HNT-281] change section ids to match topic ids * try dynamic feeds using create_model * enable pydantic mypy plugin * fix mypy errors * add -section suffix to topic sections add comments warning about topic section names * apply review feedback * Refactor section tests * fix comment
mozilla-services · Dec 17, 2024 · fdacb12 · fdacb12
1 parent 3377f3b
commit fdacb12
Show file tree

Hide file tree

Showing 7 changed files with 136 additions and 109 deletions.
diff --git a/merino/curated_recommendations/corpus_backends/protocol.py b/merino/curated_recommendations/corpus_backends/protocol.py
@@ -8,7 +8,13 @@
 
 @unique
 class Topic(str, Enum):
-    """Topics supported for curated recommendations."""
+    """Topics supported for curated recommendations.
+
+    The section names must match the corresponding topic names in lowercase. Please update
+    merino/curated_recommendations/protocol.py if the enum names (e.g. BUSINESS) are changed.
+
+    The enum values correspond to the topic identifiers. Changing them would be a breaking change.
+    """
 
     BUSINESS = "business"
     CAREER = "career"

diff --git a/merino/curated_recommendations/layouts.py b/merino/curated_recommendations/layouts.py
@@ -8,9 +8,6 @@
 
 from merino.curated_recommendations.protocol import Layout, ResponsiveLayout, Tile, TileSize
 
-# The following layouts are based on work-in-progress designs. The goal is to get some mock data to
-# FE developers. Names and layouts will likely be changed.
-
 # Layout 1: 4 medium tiles on 4 columns. Small tiles are used on fewer columns to display all items.
 layout_4_medium = Layout(
     name="4-medium-small-1-ad",

diff --git a/merino/curated_recommendations/protocol.py b/merino/curated_recommendations/protocol.py
@@ -227,25 +227,36 @@ class CuratedRecommendationsFeed(BaseModel):
     need_to_know: CuratedRecommendationsBucket | None = None
     fakespot: FakespotFeed | None = None
 
-    # The following feeds are used as mock data for the 'sections' experiment.
-    # They should be removed when the sections are implemented that we'll actually launch with.
-    business: Section | None = None
-    career: Section | None = None
-    arts: Section | None = None
-    food: Section | None = None
-    health_fitness: Section | None = None
-    home: Section | None = None
-    personal_finance: Section | None = None
-    politics: Section | None = None
-    sports: Section | None = None
-    technology: Section | None = None
-    travel: Section | None = None
-    education: Section | None = None
-    gaming: Section | None = None
-    parenting: Section | None = None
-    science: Section | None = None
-    self_improvement: Section | None = None
+    # Sections
+    # Renaming an alias of a section is a breaking change. New Tab stores section names when users
+    # follow or block sections, and references 'top_stories_section' to show topic labels.
     top_stories_section: Section | None = None
+    # Topic sections are named according to the lowercase Topic enum name, and aliased to the topic
+    # id. The alias determines the section name in the JSON response.
+    business: Section | None = Field(None, alias="business")
+    career: Section | None = Field(None, alias="career")
+    arts: Section | None = Field(None, alias="arts")
+    food: Section | None = Field(None, alias="food")
+    health_fitness: Section | None = Field(None, alias="health")
+    home: Section | None = Field(None, alias="home")
+    personal_finance: Section | None = Field(None, alias="finance")
+    politics: Section | None = Field(None, alias="government")
+    sports: Section | None = Field(None, alias="sports")
+    technology: Section | None = Field(None, alias="tech")
+    travel: Section | None = Field(None, alias="travel")
+    education: Section | None = Field(None, alias="education")
+    gaming: Section | None = Field(None, alias="hobbies")
+    parenting: Section | None = Field(None, alias="society-parenting")
+    science: Section | None = Field(None, alias="education-science")
+    self_improvement: Section | None = Field(None, alias="society")
+
+    def has_topic_section(self, topic: Topic) -> bool:
+        """Check if a section for the given topic exists as an attribute."""
+        return hasattr(self, topic.name.lower())
+
+    def set_topic_section(self, topic: Topic, section: Section):
+        """Set a section for the given topic."""
+        setattr(self, topic.name.lower(), section)
 
 
 class CuratedRecommendationsResponse(BaseModel):

diff --git a/merino/curated_recommendations/provider.py b/merino/curated_recommendations/provider.py
@@ -20,7 +20,7 @@
     layout_4_large,
     layout_6_tiles,
 )
-from merino.curated_recommendations.localization import get_translation
+from merino.curated_recommendations.localization import get_translation, LOCALIZED_SECTION_TITLES
 from merino.curated_recommendations.prior_backends.protocol import PriorBackend
 from merino.curated_recommendations.protocol import (
     Locale,
@@ -184,9 +184,16 @@ def is_need_to_know_experiment(request, surface_id) -> bool:
         )
 
     @staticmethod
-    def is_sections_experiment(request) -> bool:
+    def is_sections_experiment(
+        request: CuratedRecommendationsRequest,
+        surface_id: ScheduledSurfaceId,
+    ) -> bool:
         """Check if the 'sections' experiment is enabled."""
-        return request.feeds and "sections" in request.feeds
+        return (
+            request.feeds is not None
+            and "sections" in request.feeds  # Clients must request "feeds": ["sections"]
+            and surface_id in LOCALIZED_SECTION_TITLES  # The locale must be supported
+        )
 
     @staticmethod
     def is_fakespot_experiment(request, surface_id) -> bool:
@@ -338,18 +345,16 @@ def get_sections(
         )
 
         # Group the remaining recommendations by topic, preserving Thompson sampling order
-        sections_by_topic: dict[str, Section] = {}
+        sections_by_topic: dict[Topic, Section] = {}
 
         for rec in remaining_recs:
-            if rec.topic and rec.topic.value in CuratedRecommendationsFeed.model_fields:
-                topic = rec.topic
-                if topic in sections_by_topic:
-                    section = sections_by_topic[topic]
+            if rec.topic:
+                if rec.topic in sections_by_topic:
+                    section = sections_by_topic[rec.topic]
                 else:
                     formatted_topic_en_us = rec.topic.replace("_", " ").capitalize()
-                    section = sections_by_topic[topic] = Section(
-                        receivedFeedRank=len(sections_by_topic)
-                        + 1,  # add 1 for top_stories_section
+                    section = sections_by_topic[rec.topic] = Section(
+                        receivedFeedRank=len(sections_by_topic) + 1,  # +1 for top_stories_section
                         recommendations=[],
                         # return the hardcoded localized topic section title
                         # fallback on en-US topic title
@@ -362,12 +367,12 @@ def get_sections(
                     section.recommendations.append(rec)
 
         # Filter and assign sections with valid minimum recommendations
-        for topic_id, section in sections_by_topic.items():
+        for topic, section in sections_by_topic.items():
             # Find the maximum number of tiles in this section's responsive layouts.
             max_tile_count = max(len(rl.tiles) for rl in section.layout.responsiveLayouts)
             # Only add sections that meet the minimum number of recommendations.
             if len(section.recommendations) >= max_tile_count + min_fallback_recs_per_section:
-                setattr(feeds, topic_id, section)
+                feeds.set_topic_section(topic, section)
 
         return feeds
 
@@ -411,7 +416,7 @@ async def fetch(
             need_to_know_feed = CuratedRecommendationsBucket(
                 recommendations=need_to_know_recs, title=title
             )
-        elif self.is_sections_experiment(curated_recommendations_request):
+        elif self.is_sections_experiment(curated_recommendations_request, surface_id):
             sections_feeds = self.get_sections(
                 recommendations, curated_recommendations_request, surface_id
             )

diff --git a/pyproject.toml b/pyproject.toml
@@ -46,6 +46,7 @@ exclude_dirs = [
 
 [tool.mypy]
 python_version = "3.12"
+plugins = ['pydantic.mypy']
 disallow_untyped_calls = true
 follow_imports = "normal"
 ignore_missing_imports = true

diff --git a/tests/integration/api/v1/curated_recommendations/test_curated_recommendations.py b/tests/integration/api/v1/curated_recommendations/test_curated_recommendations.py
@@ -1322,25 +1322,6 @@ class TestSections:
     en_us_section_title_top_stories = "Popular Today"
     de_section_title_top_stories = "Meistgelesen"
 
-    @staticmethod
-    def assert_section_feed_helper(data, top_stories_title):
-        """Help assert the section feed for correctness."""
-        # Assert that an empty data array is returned. All recommendations are under "feeds".
-        assert len(data["data"]) == 0
-
-        # Check that all sections are present
-        feeds = data["feeds"]
-        assert len(feeds) > 5  # fixture data contains enough recommendations for many sections.
-
-        # All sections have a layout
-        assert all(Layout(**feed["layout"]) for feed in feeds.values() if feed)
-
-        # Ensure "Today's top stories" is present with a valid date subtitle
-        top_stories_section = data["feeds"].get("top_stories_section")
-        assert top_stories_section is not None
-        assert top_stories_section["title"] == top_stories_title
-        assert top_stories_section["subtitle"] is not None
-
     @pytest.mark.parametrize(
         "surface_id",
         [
@@ -1376,89 +1357,85 @@ def test_section_translations(self, surface_id):
             )
 
     @pytest.mark.asyncio
-    async def test_curated_recommendations_with_sections_feed(self, caplog):
+    @pytest.mark.parametrize("locale", ["en-US", "de-DE"])
+    async def test_sections_feed_content(self, locale, caplog):
         """Test the curated recommendations endpoint response is as expected
-        when requesting the 'sections' feed for en-US locale.
+        when requesting the 'sections' feed for different locales.
         """
         async with AsyncClient(app=app, base_url="http://test") as ac:
             # Mock the endpoint to request the sections feed
             response = await ac.post(
                 "/api/v1/curated-recommendations",
-                json={"locale": "en-US", "feeds": ["sections"]},
+                json={"locale": locale, "feeds": ["sections"]},
             )
             data = response.json()
 
             # Check if the response is valid
             assert response.status_code == 200
-
-            self.assert_section_feed_helper(data, self.en_us_section_title_top_stories)
-
             # Assert no errors were logged
             errors = [r for r in caplog.records if r.levelname == "ERROR"]
             assert len(errors) == 0
 
-    @pytest.mark.asyncio
-    async def test_curated_recommendations_with_sections_feed_de(self, caplog):
-        """Test the curated recommendations endpoint response is as expected
-        when requesting the 'sections' feed for de-DE locale.
-        """
-        async with AsyncClient(app=app, base_url="http://test") as ac:
-            # Mock the endpoint to request the sections feed
-            response = await ac.post(
-                "/api/v1/curated-recommendations",
-                json={"locale": "de-DE", "feeds": ["sections"]},
-            )
-            data = response.json()
+            # Assert that an empty data array is returned. All recommendations are under "feeds".
+            assert len(data["data"]) == 0
 
-            # Check if the response is valid
-            assert response.status_code == 200
+            feeds = data["feeds"]
+            sections = {name: section for name, section in feeds.items() if section is not None}
 
-            self.assert_section_feed_helper(data, self.de_section_title_top_stories)
+            # The fixture data contains enough recommendations for at least 4 sections. The number
+            # of sections varies because top_stories_section is determined by Thompson sampling,
+            # and therefore the number of recs per topics is non-deterministic.
+            assert len(sections) >= 4
 
-            # Ensure that topic section titles are in German, check at least one topic translation
-            if data["feeds"]["arts"] is not None:
-                assert data["feeds"]["arts"]["title"] == "Unterhaltung"
-            if data["feeds"]["education"] is not None:
-                assert data["feeds"]["education"]["title"] == "Bildung"
-            if data["feeds"]["sports"] is not None:
-                assert data["feeds"]["sports"]["title"] == "Sport"
+            # All sections have a layout
+            assert all(Layout(**section["layout"]) for section in sections.values())
 
-            # Assert no errors were logged
-            errors = [r for r in caplog.records if r.levelname == "ERROR"]
-            assert len(errors) == 0
+            # Ensure all topics are present and are named according to the Topic Enum value.
+            assert all(topic.value in feeds for topic in Topic)
 
     @pytest.mark.asyncio
-    async def test_curated_recommendations_with_sections_feed_other_locale(self, caplog):
-        """Test the curated recommendations endpoint response is as expected
-        when requesting the 'sections' feed for any other locale besides en-US & de-DE.
-        Check that an error is logged for missing translations.
-        """
+    @pytest.mark.parametrize(
+        "locale, expected_titles",
+        [
+            (
+                "en-US",
+                {
+                    "top_stories_section": "Popular Today",
+                    "arts": "Entertainment",
+                    "education": "Education",
+                    "sports": "Sports",
+                },
+            ),
+            (
+                "de-DE",
+                {
+                    "top_stories_section": "Meistgelesen",
+                    "arts": "Unterhaltung",
+                    "education": "Bildung",
+                    "sports": "Sport",
+                },
+            ),
+        ],
+    )
+    async def test_sections_feed_titles(self, locale, expected_titles):
+        """Test the curated recommendations endpoint 'sections' have the expected (sub)titles."""
         async with AsyncClient(app=app, base_url="http://test") as ac:
             # Mock the endpoint to request the sections feed
             response = await ac.post(
                 "/api/v1/curated-recommendations",
-                json={"locale": "it-IT", "feeds": ["sections"]},
+                json={"locale": locale, "feeds": ["sections"]},
             )
             data = response.json()
+            feeds = data["feeds"]
 
-            # Check if the response is valid
-            assert response.status_code == 200
-
-            self.assert_section_feed_helper(data, self.en_us_section_title_top_stories)
+            # Sections have their expected, localized title
+            for section_name, expected_title in expected_titles.items():
+                section = feeds.get(section_name)
+                if section:
+                    assert section["title"] == expected_title
 
-            # Ensure that topic section titles fallback to English
-            if data["feeds"]["arts"] is not None:
-                assert data["feeds"]["arts"]["title"] == "Arts"
-            if data["feeds"]["education"] is not None:
-                assert data["feeds"]["education"]["title"] == "Education"
-            if data["feeds"]["sports"] is not None:
-                assert data["feeds"]["sports"]["title"] == "Sports"
-
-            # Assert that errors were logged with a descriptive message when missing translation
-            expected_error = "No translations found for surface 'ScheduledSurfaceId.NEW_TAB_IT_IT'"
-            errors = [r for r in caplog.records if r.levelname == "ERROR"]
-            assert len(errors) == 9
-            assert all(expected_error in error.message for error in errors)
+            # Ensure "Today's top stories" subtitle is present
+            assert feeds["top_stories_section"]["subtitle"] is not None
 
 
 class TestExtendedExpiration:

diff --git a/tests/unit/curated_recommendations/test_localization.py b/tests/unit/curated_recommendations/test_localization.py
@@ -0,0 +1,30 @@
+"""Tests covering merino/curated_recommendations/localization.py"""
+
+from merino.curated_recommendations.localization import get_translation
+from merino.curated_recommendations.corpus_backends.protocol import ScheduledSurfaceId
+
+
+def test_get_translation_existing_translation():
+    """Test that get_translation returns existing translations."""
+    result = get_translation(ScheduledSurfaceId.NEW_TAB_EN_US, "business", "Default")
+    assert result == "Business"
+
+
+def test_get_translation_non_existing_locale(caplog):
+    """Test logs error and falls back to default when locale translations do not exist."""
+    result = get_translation(ScheduledSurfaceId.NEW_TAB_IT_IT, "business", "Default")
+    assert result == "Default"
+
+    errors = [r for r in caplog.records if r.levelname == "ERROR"]
+    assert len(errors) == 1
+    assert "No translations found for surface" in errors[0].message
+
+
+def test_get_translation_non_existing_slug(caplog):
+    """Test logs error and falls back to default when the topic slug does not exist."""
+    result = get_translation(ScheduledSurfaceId.NEW_TAB_EN_US, "non_existing_slug", "Default")
+    assert result == "Default"
+
+    errors = [r for r in caplog.records if r.levelname == "ERROR"]
+    assert len(errors) == 1
+    assert "Missing or empty translation for topic" in errors[0].message