Skip to content

Commit

Permalink
Fix flavor param with incorrect type hint in read_html (#772)
Browse files Browse the repository at this point in the history
* Fix flavor param with incorrect type hint in read_html

refs:
- pandas-dev/pandas#55059
- pandas-dev/pandas#55076

* Add HTMLFlavors type to read_html

ref: pandas-dev/pandas#55529

* Add tests and new dev dependencies

Added:
- tests to check HTMLFlavors type in read_html flavor arg;
- set beautifulsoup4 and html5lib as dev dependencies. They are used
by the respective flavors in read_html.
  • Loading branch information
matheusfelipeog authored Nov 2, 2023
1 parent 2fefa8a commit 717f4dd
Show file tree
Hide file tree
Showing 4 changed files with 15 additions and 2 deletions.
3 changes: 3 additions & 0 deletions pandas-stubs/_typing.pyi
Original file line number Diff line number Diff line change
Expand Up @@ -587,6 +587,9 @@ ParseDatesArg: TypeAlias = (
# read_xml parsers
XMLParsers: TypeAlias = Literal["lxml", "etree"]

# read_html flavors
HTMLFlavors: TypeAlias = Literal["lxml", "html5lib", "bs4"]

# Any plain Python or numpy function
Function: TypeAlias = np.ufunc | Callable[..., Any]
# Use a distinct HashableT in shared types to avoid conflicts with
Expand Down
3 changes: 2 additions & 1 deletion pandas-stubs/io/html.pyi
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@ from pandas._typing import (
HashableT3,
HashableT4,
HashableT5,
HTMLFlavors,
ReadBuffer,
StorageOptions,
)
Expand All @@ -29,7 +30,7 @@ def read_html(
io: FilePath | ReadBuffer[str],
*,
match: str | Pattern = ...,
flavor: str | None = ...,
flavor: HTMLFlavors | Sequence[HTMLFlavors] | None = ...,
header: int | Sequence[int] | None = ...,
index_col: int | Sequence[int] | list[HashableT1] | None = ...,
skiprows: int | Sequence[int] | slice | None = ...,
Expand Down
5 changes: 4 additions & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -48,7 +48,7 @@ pre-commit = ">=2.19.0"
black = ">=23.3.0"
isort = ">=5.12.0"
openpyxl = ">=3.0.10"
tables = { version = ">=3.8.0" , python = "<4"} # 3.8.0 depends on blosc2 which caps python to <4
tables = { version = ">=3.8.0", python = "<4"} # 3.8.0 depends on blosc2 which caps python to <4
lxml = ">=4.9.1"
pyreadstat = ">=1.2.0"
xlrd = ">=2.0.1"
Expand All @@ -61,6 +61,9 @@ jinja2 = ">=3.1"
scipy = { version = ">=1.9.1", python = "<3.13" }
SQLAlchemy = ">=2.0.12"
types-python-dateutil = ">=2.8.19"
numexpr = "<2.8.5" # https://github.com/pandas-dev/pandas/issues/54449
beautifulsoup4 = ">=4.12.2"
html5lib = ">=1.1"

[build-system]
requires = ["poetry-core>=1.0.0"]
Expand Down
6 changes: 6 additions & 0 deletions tests/test_io.py
Original file line number Diff line number Diff line change
Expand Up @@ -1237,6 +1237,12 @@ def test_read_html():
with ensure_clean() as path:
check(assert_type(DF.to_html(path), None), type(None))
check(assert_type(read_html(path), list[DataFrame]), list)
check(assert_type(read_html(path, flavor=None), list[DataFrame]), list)
check(assert_type(read_html(path, flavor="bs4"), list[DataFrame]), list)
check(assert_type(read_html(path, flavor=["bs4"]), list[DataFrame]), list)
check(
assert_type(read_html(path, flavor=["bs4", "lxml"]), list[DataFrame]), list
)


def test_csv_quoting():
Expand Down

0 comments on commit 717f4dd

Please sign in to comment.