Skip to content

Commit

Permalink
Merge pull request #2063 from dlt-hub/devel
Browse files Browse the repository at this point in the history
master merge for 1.4.0 release
  • Loading branch information
rudolfix authored Nov 14, 2024
2 parents de9d7bf + 73b79ee commit 0fce1c8
Show file tree
Hide file tree
Showing 243 changed files with 7,129 additions and 2,253 deletions.
20 changes: 18 additions & 2 deletions .github/workflows/test_common.yml
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,8 @@ env:
# we need the secrets only for the rest_api_pipeline tests which are in tests/sources
# so we inject them only at the end
SOURCES__GITHUB__ACCESS_TOKEN: ${{ secrets.GITHUB_TOKEN }}
# and also for the github_api_pipeline tests
SOURCES__GITHUB_API_PIPELINE__ACCESS_TOKEN: ${{ secrets.GITHUB_TOKEN }}

jobs:
get_docs_changes:
Expand Down Expand Up @@ -114,7 +116,7 @@ jobs:
shell: cmd
- name: Install pyarrow
run: poetry install --no-interaction -E duckdb -E cli -E parquet --with sentry-sdk
run: poetry install --no-interaction -E duckdb -E cli -E parquet --with sentry-sdk && poetry run pip install pyarrow==15.0.2

- run: |
poetry run pytest tests/pipeline/test_pipeline_extra.py -k arrow
Expand All @@ -127,7 +129,7 @@ jobs:
shell: cmd
- name: Install pipeline and sources dependencies
run: poetry install --no-interaction -E duckdb -E cli -E parquet -E deltalake -E sql_database --with sentry-sdk,pipeline,sources
run: poetry install --no-interaction -E duckdb -E cli -E parquet -E deltalake -E sql_database --with sentry-sdk,pipeline,sources && poetry run pip install pyarrow==15.0.2

- run: |
poetry run pytest tests/extract tests/pipeline tests/libs tests/cli/common tests/destinations tests/sources
Expand All @@ -153,6 +155,20 @@ jobs:
name: Run extract tests Windows
shell: cmd
# here we upgrade pyarrow to 17 and run the libs tests again
- name: Install pyarrow 17
run: poetry run pip install pyarrow==17.0.0

- run: |
poetry run pytest tests/libs
if: runner.os != 'Windows'
name: Run libs tests Linux/MAC
- run: |
poetry run pytest tests/libs
if: runner.os == 'Windows'
name: Run libs tests Windows
shell: cmd
# - name: Install Pydantic 1.0
# run: pip install "pydantic<2"

Expand Down
18 changes: 8 additions & 10 deletions .github/workflows/test_destinations.yml
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@ env:
TESTS__R2_AWS_ACCESS_KEY_ID: a4950a5003b26f5a71ac97ef3848ff4c
TESTS__R2_AWS_SECRET_ACCESS_KEY: ${{ secrets.CLOUDFLARE_R2_SECRET_ACCESS_KEY }}
TESTS__R2_ENDPOINT_URL: https://9830548e4e4b582989be0811f2a0a97f.r2.cloudflarestorage.com
TESTS__R2_REGION_NAME: us-east-1

# RUNTIME__SENTRY_DSN: https://6f6f7b6f8e0f458a89be4187603b55fe@o1061158.ingest.sentry.io/4504819859914752
RUNTIME__LOG_LEVEL: ERROR
Expand Down Expand Up @@ -67,13 +68,13 @@ jobs:
virtualenvs-in-project: true
installer-parallel: true

- name: Load cached venv
id: cached-poetry-dependencies
uses: actions/cache@v3
with:
# path: ${{ steps.pip-cache.outputs.dir }}
path: .venv
key: venv-${{ runner.os }}-${{ steps.setup-python.outputs.python-version }}-${{ hashFiles('**/poetry.lock') }}-redshift
# - name: Load cached venv
# id: cached-poetry-dependencies
# uses: actions/cache@v3
# with:
# # path: ${{ steps.pip-cache.outputs.dir }}
# path: .venv
# key: venv-${{ runner.os }}-${{ steps.setup-python.outputs.python-version }}-${{ hashFiles('**/poetry.lock') }}-redshift

- name: Install dependencies
# if: steps.cached-poetry-dependencies.outputs.cache-hit != 'true'
Expand All @@ -82,9 +83,6 @@ jobs:
- name: create secrets.toml
run: pwd && echo "$DLT_SECRETS_TOML" > tests/.dlt/secrets.toml

- name: clear duckdb secrets and cache
run: rm -rf ~/.duckdb

- run: |
poetry run pytest tests/load --ignore tests/load/sources -m "essential"
name: Run essential tests Linux
Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/test_doc_snippets.yml
Original file line number Diff line number Diff line change
Expand Up @@ -91,7 +91,7 @@ jobs:

- name: Install dependencies
# if: steps.cached-poetry-dependencies.outputs.cache-hit != 'true'
run: poetry install --no-interaction -E duckdb -E weaviate -E parquet -E qdrant -E bigquery -E postgres -E lancedb --with docs,sentry-sdk --without airflow
run: poetry install --no-interaction -E duckdb -E weaviate -E parquet -E qdrant -E bigquery -E postgres -E lancedb --with docs,sentry-sdk --without airflow -E s3

- name: create secrets.toml for examples
run: pwd && echo "$DLT_SECRETS_TOML" > docs/examples/.dlt/secrets.toml
Expand Down
1 change: 0 additions & 1 deletion .github/workflows/test_local_destinations.yml
Original file line number Diff line number Diff line change
Expand Up @@ -119,7 +119,6 @@ jobs:
DESTINATION__FILESYSTEM__CREDENTIALS__SFTP_USERNAME: foo
DESTINATION__FILESYSTEM__CREDENTIALS__SFTP_PASSWORD: pass


- name: Stop weaviate
if: always()
run: docker compose -f "tests/load/weaviate/docker-compose.yml" down -v
Expand Down
83 changes: 0 additions & 83 deletions .github/workflows/test_pyarrow17.yml

This file was deleted.

10 changes: 6 additions & 4 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -60,10 +60,13 @@ format:
poetry run black dlt docs tests --exclude=".*syntax_error.py|\.venv.*|_storage/.*"
# poetry run isort ./

lint-and-test-snippets:
lint-snippets:
cd docs/tools && poetry run python check_embedded_snippets.py full
poetry run mypy --config-file mypy.ini docs/website docs/examples docs/tools --exclude docs/tools/lint_setup --exclude docs/website/docs_processed
poetry run flake8 --max-line-length=200 docs/website docs/examples docs/tools


lint-and-test-snippets: lint-snippets
poetry run mypy --config-file mypy.ini docs/website docs/tools --exclude docs/tools/lint_setup --exclude docs/website/docs_processed
poetry run flake8 --max-line-length=200 docs/website docs/tools --exclude docs/website/.dlt-repo
cd docs/website/docs && poetry run pytest --ignore=node_modules

lint-and-test-examples:
Expand All @@ -72,7 +75,6 @@ lint-and-test-examples:
poetry run mypy --config-file mypy.ini docs/examples
cd docs/examples && poetry run pytest


test-examples:
cd docs/examples && poetry run pytest

Expand Down
3 changes: 3 additions & 0 deletions dlt/cli/__init__.py
Original file line number Diff line number Diff line change
@@ -1 +1,4 @@
from dlt.cli.reference import SupportsCliCommand
from dlt.cli.exceptions import CliCommandException

__all__ = ["SupportsCliCommand", "CliCommandException"]
22 changes: 18 additions & 4 deletions dlt/cli/config_toml_writer.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
from typing import Any, NamedTuple, Tuple, Iterable
from typing import Any, NamedTuple, Tuple, Iterable, Mapping
import tomlkit
from tomlkit.items import Table as TOMLTable
from tomlkit.container import Container as TOMLContainer
Expand Down Expand Up @@ -72,7 +72,7 @@ def write_value(
hint = extract_inner_hint(hint)
if is_base_configuration_inner_hint(hint):
inner_table = tomlkit.table(is_super_table=True)
write_spec(inner_table, hint(), overwrite_existing)
write_spec(inner_table, hint(), default_value, overwrite_existing)
if len(inner_table) > 0:
toml_table[name] = inner_table
else:
Expand All @@ -86,17 +86,31 @@ def write_value(
toml_table[name] = default_value


def write_spec(toml_table: TOMLTable, config: BaseConfiguration, overwrite_existing: bool) -> None:
def write_spec(
toml_table: TOMLTable,
config: BaseConfiguration,
initial_value: Mapping[str, Any],
overwrite_existing: bool,
) -> None:
for name, hint in config.get_resolvable_fields().items():
# use initial value
initial_ = initial_value.get(name) if initial_value else None
# use default value stored in config
default_value = getattr(config, name, None)

# check if field is of particular interest and should be included if it has default
is_default_of_interest = name in config.__config_gen_annotations__

# if initial is different from default, it is of interest as well
if initial_ is not None:
is_default_of_interest = is_default_of_interest or (initial_ != default_value)

write_value(
toml_table,
name,
hint,
overwrite_existing,
default_value=default_value,
default_value=initial_ or default_value,
is_default_of_interest=is_default_of_interest,
)

Expand Down
16 changes: 13 additions & 3 deletions dlt/cli/init_command.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,8 @@
from dlt.cli.config_toml_writer import WritableConfigValue, write_values
from dlt.cli.pipeline_files import (
TEMPLATE_FILES,
SOURCES_MODULE_NAME,
SINGLE_FILE_TEMPLATE_MODULE_NAME,
SourceConfiguration,
TVerifiedSourceFileEntry,
TVerifiedSourceFileIndex,
Expand All @@ -41,8 +43,6 @@

DLT_INIT_DOCS_URL = "https://dlthub.com/docs/reference/command-line-interface#dlt-init"
DEFAULT_VERIFIED_SOURCES_REPO = "https://github.com/dlt-hub/verified-sources.git"
TEMPLATES_MODULE_NAME = "pipeline_templates"
SOURCES_MODULE_NAME = "sources"


def _get_core_sources_storage() -> FileStorage:
Expand All @@ -57,7 +57,7 @@ def _get_templates_storage() -> FileStorage:
init_path = (
Path(os.path.dirname(os.path.realpath(__file__))).parent
/ SOURCES_MODULE_NAME
/ TEMPLATES_MODULE_NAME
/ SINGLE_FILE_TEMPLATE_MODULE_NAME
)
return FileStorage(str(init_path))

Expand Down Expand Up @@ -382,6 +382,16 @@ def init_command(
source_configuration = files_ops.get_core_source_configuration(
core_sources_storage, source_name
)
from importlib.metadata import Distribution

dist = Distribution.from_name(DLT_PKG_NAME)
extras = dist.metadata.get_all("Provides-Extra") or []

# Match the extra name to the source name
canonical_source_name = source_name.replace("_", "-").lower()

if canonical_source_name in extras:
source_configuration.requirements.update_dlt_extras(canonical_source_name)
else:
if not is_valid_schema_name(source_name):
raise InvalidSchemaName(source_name)
Expand Down
5 changes: 2 additions & 3 deletions dlt/cli/pipeline_command.py
Original file line number Diff line number Diff line change
Expand Up @@ -131,9 +131,8 @@ def _display_pending_packages() -> Tuple[Sequence[str], Sequence[str]]:

streamlit_cmd.append("--")
streamlit_cmd.append(pipeline_name)
if pipelines_dir:
streamlit_cmd.append("--pipelines-dir")
streamlit_cmd.append(pipelines_dir)
streamlit_cmd.append("--pipelines-dir")
streamlit_cmd.append(p.pipelines_dir)

venv = Venv.restore_current()
for line in iter_stdout(venv, *streamlit_cmd):
Expand Down
20 changes: 13 additions & 7 deletions dlt/cli/pipeline_files.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,17 +18,22 @@
TSourceType = Literal["core", "verified", "template"]

SOURCES_INIT_INFO_ENGINE_VERSION = 1

SOURCES_MODULE_NAME = "sources"
CORE_SOURCE_TEMPLATE_MODULE_NAME = "_core_source_templates"
SINGLE_FILE_TEMPLATE_MODULE_NAME = "_single_file_templates"

SOURCES_INIT_INFO_FILE = ".sources"
IGNORE_FILES = ["*.py[cod]", "*$py.class", "__pycache__", "py.typed", "requirements.txt"]
IGNORE_VERIFIED_SOURCES = [".*", "_*"]
IGNORE_CORE_SOURCES = [
".*",
"_*",
"helpers",
"pipeline_templates",
SINGLE_FILE_TEMPLATE_MODULE_NAME,
CORE_SOURCE_TEMPLATE_MODULE_NAME,
]
PIPELINE_FILE_SUFFIX = "_pipeline.py"

# hardcode default template files here
TEMPLATE_FILES = [".gitignore", ".dlt/config.toml"]
DEFAULT_PIPELINE_TEMPLATE = "default_pipeline.py"
Expand Down Expand Up @@ -224,15 +229,16 @@ def get_template_configuration(
def get_core_source_configuration(
sources_storage: FileStorage, source_name: str
) -> SourceConfiguration:
pipeline_file = source_name + "_pipeline.py"
src_pipeline_file = CORE_SOURCE_TEMPLATE_MODULE_NAME + "/" + source_name + PIPELINE_FILE_SUFFIX
dest_pipeline_file = source_name + PIPELINE_FILE_SUFFIX

return SourceConfiguration(
"core",
"dlt.sources." + source_name,
sources_storage,
pipeline_file,
pipeline_file,
[],
src_pipeline_file,
dest_pipeline_file,
[".gitignore"],
SourceRequirements([]),
_get_docstring_for_module(sources_storage, source_name),
False,
Expand All @@ -247,7 +253,7 @@ def get_verified_source_configuration(
f"Verified source {source_name} could not be found in the repository", source_name
)
# find example script
example_script = f"{source_name}_pipeline.py"
example_script = f"{source_name}{PIPELINE_FILE_SUFFIX}"
if not sources_storage.has_file(example_script):
raise VerifiedSourceRepoError(
f"Pipeline example script {example_script} could not be found in the repository",
Expand Down
7 changes: 7 additions & 0 deletions dlt/common/configuration/accessors.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,13 @@ def __setitem__(self, field: str, value: Any) -> None:
key = sections.pop()
self.writable_provider.set_value(key, value, None, *sections)

def __contains__(self, field: str) -> bool:
try:
self[field]
return True
except KeyError:
return False

def get(self, field: str, expected_type: Type[TConfigAny] = None) -> TConfigAny:
value: TConfigAny
value, _ = self._get_value(field, expected_type)
Expand Down
Loading

0 comments on commit 0fce1c8

Please sign in to comment.