diff --git a/pandas-stubs/_typing.pyi b/pandas-stubs/_typing.pyi index ad6ae4d4..5ab88069 100644 --- a/pandas-stubs/_typing.pyi +++ b/pandas-stubs/_typing.pyi @@ -587,6 +587,9 @@ ParseDatesArg: TypeAlias = ( # read_xml parsers XMLParsers: TypeAlias = Literal["lxml", "etree"] +# read_html flavors +HTMLFlavors: TypeAlias = Literal["lxml", "html5lib", "bs4"] + # Any plain Python or numpy function Function: TypeAlias = np.ufunc | Callable[..., Any] # Use a distinct HashableT in shared types to avoid conflicts with diff --git a/pandas-stubs/io/html.pyi b/pandas-stubs/io/html.pyi index fb39043d..bd0395d6 100644 --- a/pandas-stubs/io/html.pyi +++ b/pandas-stubs/io/html.pyi @@ -21,6 +21,7 @@ from pandas._typing import ( HashableT3, HashableT4, HashableT5, + HTMLFlavors, ReadBuffer, StorageOptions, ) @@ -29,7 +30,7 @@ def read_html( io: FilePath | ReadBuffer[str], *, match: str | Pattern = ..., - flavor: str | None = ..., + flavor: HTMLFlavors | Sequence[HTMLFlavors] | None = ..., header: int | Sequence[int] | None = ..., index_col: int | Sequence[int] | list[HashableT1] | None = ..., skiprows: int | Sequence[int] | slice | None = ..., diff --git a/pyproject.toml b/pyproject.toml index e5ffc592..5e8af09b 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -48,7 +48,7 @@ pre-commit = ">=2.19.0" black = ">=23.3.0" isort = ">=5.12.0" openpyxl = ">=3.0.10" -tables = { version = ">=3.8.0" , python = "<4"} # 3.8.0 depends on blosc2 which caps python to <4 +tables = { version = ">=3.8.0", python = "<4"} # 3.8.0 depends on blosc2 which caps python to <4 lxml = ">=4.9.1" pyreadstat = ">=1.2.0" xlrd = ">=2.0.1" @@ -61,6 +61,9 @@ jinja2 = ">=3.1" scipy = { version = ">=1.9.1", python = "<3.13" } SQLAlchemy = ">=2.0.12" types-python-dateutil = ">=2.8.19" +numexpr = "<2.8.5" # https://github.com/pandas-dev/pandas/issues/54449 +beautifulsoup4 = ">=4.12.2" +html5lib = ">=1.1" [build-system] requires = ["poetry-core>=1.0.0"] diff --git a/tests/test_io.py b/tests/test_io.py index 4985af52..ce570c6c 100644 --- a/tests/test_io.py +++ b/tests/test_io.py @@ -1237,6 +1237,12 @@ def test_read_html(): with ensure_clean() as path: check(assert_type(DF.to_html(path), None), type(None)) check(assert_type(read_html(path), list[DataFrame]), list) + check(assert_type(read_html(path, flavor=None), list[DataFrame]), list) + check(assert_type(read_html(path, flavor="bs4"), list[DataFrame]), list) + check(assert_type(read_html(path, flavor=["bs4"]), list[DataFrame]), list) + check( + assert_type(read_html(path, flavor=["bs4", "lxml"]), list[DataFrame]), list + ) def test_csv_quoting():