diff --git a/.ci/pipeline/build-and-test-lnx.yml b/.ci/pipeline/build-and-test-lnx.yml index 247f96e60b..005ff67eac 100644 --- a/.ci/pipeline/build-and-test-lnx.yml +++ b/.ci/pipeline/build-and-test-lnx.yml @@ -67,20 +67,24 @@ steps: - script: | . /usr/share/miniconda/etc/profile.d/conda.sh conda activate CB + if [ -n "${COVERAGE_RCFILE}" ]; then export COVERAGE_FILE=$(pwd)/.coverage.sklearnex; fi cd .. if [ -n "${TBBROOT}" ] && [ "${TBBROOT}" != "${CONDA_PREFIX}" ]; then source ${TBBROOT}/env/vars.sh; fi ./s/conda-recipe/run_test.sh --json-report env: TBBROOT: ${{ variables.TBBROOT }} + COVERAGE_RCFILE: ${{ variables.COVERAGE_RCFILE }} displayName: "Sklearnex testing" - script: | . /usr/share/miniconda/etc/profile.d/conda.sh conda activate CB if [ -n "${TBBROOT}" ] && [ "${TBBROOT}" != "${CONDA_PREFIX}" ]; then source ${TBBROOT}/env/vars.sh; fi + if [ -n "${COVERAGE_RCFILE}" ]; then export COVERAGE_FILE=$(pwd)/.coverage.sklearn; fi if [ -z "${NO_DPC}" ]; then export CPU="cpu"; fi bash .ci/scripts/run_sklearn_tests.sh $CPU env: TBBROOT: ${{ variables.TBBROOT }} + COVERAGE_RCFILE: ${{ variables.COVERAGE_RCFILE }} NO_DPC: ${{ variables.NO_DPC }} displayName: "Sklearn testing" condition: succeededOrFailed() diff --git a/.ci/pipeline/build-and-test-win.yml b/.ci/pipeline/build-and-test-win.yml index 87df9c661d..68a5505d55 100644 --- a/.ci/pipeline/build-and-test-win.yml +++ b/.ci/pipeline/build-and-test-win.yml @@ -50,14 +50,20 @@ steps: displayName: 'Install testing requirements' - script: | call activate CB + if defined COVERAGE_RCFILE set COVERAGE_FILE=%cd%\.coverage.sklearnex cd .. call s\conda-recipe\run_test.bat s\ --json-report displayName: 'Sklearnex testing' + env: + COVERAGE_RCFILE: ${{ variables.COVERAGE_RCFILE }} - script: | call activate CB + if defined COVERAGE_RCFILE set COVERAGE_FILE=%cd%\.coverage.sklearn bash .ci/scripts/run_sklearn_tests.sh displayName: 'Sklearn testing' condition: succeededOrFailed() + env: + COVERAGE_RCFILE: ${{ variables.COVERAGE_RCFILE }} - script: | call activate CB bash .ci/scripts/run_sklearn_tests.sh diff --git a/.ci/pipeline/ci.yml b/.ci/pipeline/ci.yml index 3e761c79bc..2d3d2a4922 100644 --- a/.ci/pipeline/ci.yml +++ b/.ci/pipeline/ci.yml @@ -76,6 +76,8 @@ jobs: - job: LinuxCondaEnv dependsOn: Lint timeoutInMinutes: 120 + variables: + COVERAGE_RCFILE: '$(Build.Repository.LocalPath)/.coveragerc' strategy: matrix: Python3.9_Sklearn1.0: @@ -97,9 +99,12 @@ jobs: vmImage: 'ubuntu-22.04' steps: - template: build-and-test-lnx.yml + - template: codecov-lnx.yml - job: WindowsCondaEnv dependsOn: Lint timeoutInMinutes: 120 + variables: + COVERAGE_RCFILE: '$(Build.Repository.LocalPath)\.coveragerc' strategy: matrix: Python3.9_Sklearn1.0: @@ -121,3 +126,4 @@ jobs: vmImage: 'windows-2022' steps: - template: build-and-test-win.yml + - template: codecov-win.yml diff --git a/.ci/pipeline/codecov-lnx.yml b/.ci/pipeline/codecov-lnx.yml new file mode 100644 index 0000000000..7ef026e3e8 --- /dev/null +++ b/.ci/pipeline/codecov-lnx.yml @@ -0,0 +1,30 @@ +#=============================================================================== +# Copyright contributors to the oneDAL project +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +#=============================================================================== +steps: + - script: | + . /usr/share/miniconda/etc/profile.d/conda.sh + conda activate CB + coverage combine .coverage.sklearnex .coverage.sklearn + coverage lcov -o coverage.info + displayName: "Create coverage report" + - script: | + curl -Os https://cli.codecov.io/latest/linux/codecov + chmod +x codecov + export VARARGS="-n azure-${AGENT_OS}-$(PYTHON_VERSION)-$(SKLEARN_VERSION)" + ./codecov -v upload-process -Z -t ${CODECOV_TOKEN} "${VARARGS}" -F azure -f coverage.info + displayName: "Upload to codecov" + env: + CODECOV_TOKEN: $(CODECOV_TOKEN) diff --git a/.ci/pipeline/codecov-win.yml b/.ci/pipeline/codecov-win.yml new file mode 100644 index 0000000000..e5cf4a62ce --- /dev/null +++ b/.ci/pipeline/codecov-win.yml @@ -0,0 +1,29 @@ +#=============================================================================== +# Copyright contributors to the oneDAL project +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +#=============================================================================== +steps: + - script: | + call activate CB + coverage combine .coverage.sklearnex .coverage.sklearn + coverage lcov -o coverage.info + displayName: "Create coverage report" + - script: | + set PATH=C:\msys64\usr\bin;%PATH% + curl -Os https://cli.codecov.io/latest/windows/codecov.exe + set VARARGS=-n azure-%AGENT_OS%-$(PYTHON_VERSION)-$(SKLEARN_VERSION) + .\codecov.exe -v upload-process -Z -t %CODECOV_TOKEN% %VARARGS% -F azure -f coverage.info + displayName: "Upload to codecov" + env: + CODECOV_TOKEN: $(CODECOV_TOKEN) diff --git a/.ci/scripts/run_sklearn_tests.py b/.ci/scripts/run_sklearn_tests.py index a7f5b04b7f..4dac925b2f 100644 --- a/.ci/scripts/run_sklearn_tests.py +++ b/.ci/scripts/run_sklearn_tests.py @@ -58,6 +58,7 @@ pytest_args += ( "--cov=onedal", "--cov=sklearnex", + "--cov-branch", f"--cov-config={rc}", "--cov-report=", ) diff --git a/.clang-format b/.clang-format new file mode 100644 index 0000000000..57fe428dc2 --- /dev/null +++ b/.clang-format @@ -0,0 +1,3 @@ +--- +DisableFormat: true +... diff --git a/.github/.codecov.yml b/.github/.codecov.yml index 58eb17c48b..7fd2581f06 100644 --- a/.github/.codecov.yml +++ b/.github/.codecov.yml @@ -28,3 +28,9 @@ coverage: target: 0 # Allow for diffs to have no code coverage. # threshold: 50 + +flags: + github: + after_n_builds: 1 + azure: + after_n_builds: 8 diff --git a/.github/CODEOWNERS b/.github/CODEOWNERS index 9237ad6052..c317818e6d 100644 --- a/.github/CODEOWNERS +++ b/.github/CODEOWNERS @@ -1,14 +1,15 @@ # Infra and tools +.* @napetrov @Alexsandruss @homksei @ahuber21 @ethanglaser .ci/ @napetrov @Alexsandruss @homksei @ahuber21 @ethanglaser -.github/ @napetrov @Alexsandruss @homksei @ahuber21 @ethanglaser @samir-nasibli @icfaust .circleci/ @napetrov @Alexsandruss @homksei @ahuber21 @ethanglaser +.github/ @napetrov @Alexsandruss @homksei @ahuber21 @ethanglaser @samir-nasibli @icfaust # Docs *.md @maria-Petrova @napetrov @Alexsandruss @samir-nasibli @icfaust @david-cortes-intel doc/ @maria-Petrova @napetrov @Alexsandruss @samir-nasibli @icfaust @david-cortes-intel requirements-doc.txt @Alexsandruss @samir-nasibli @icfaust @david-cortes-intel -# DPC++ +# sklearnex onedal/ @Alexsandruss @samir-nasibli @icfaust sklearnex/ @Alexsandruss @samir-nasibli @icfaust @@ -16,10 +17,11 @@ sklearnex/ @Alexsandruss @samir-nasibli @icfaust examples/ @maria-Petrova @Alexsandruss @samir-nasibli @napetrov # Dependencies -setup.py @napetrov @Alexsandruss @samir-nasibli @icfaust -requirements* @napetrov @Alexsandruss @samir-nasibli @homksei @ahuber21 @ethanglaser -dependencies-dev @napetrov @Alexsandruss @samir-nasibli @homksei @ahuber21 @ethanglaser conda-recipe/ @napetrov @Alexsandruss +dependencies-dev @napetrov @Alexsandruss @samir-nasibli @homksei @ahuber21 @ethanglaser +pyproject.toml @napetrov @Alexsandruss @samir-nasibli @homksei @ahuber21 @ethanglaser +requirements* @napetrov @Alexsandruss @samir-nasibli @homksei @ahuber21 @ethanglaser +setup.* @napetrov @Alexsandruss @samir-nasibli @icfaust # Model builders *model_builders* @razdoburdin @ahuber21 @avolkov-intel @@ -29,13 +31,16 @@ conda-recipe/ @napetrov @Alexsandruss # Testing **/test*.py @Alexsandruss @samir-nasibli @icfaust +deselected_tests.yaml @Alexsandruss @samir-nasibli @icfaust +tests/ @Alexsandruss @samir-nasibli @icfaust # Distributed *spmd* @samir-nasibli @ethanglaser -# Scikit-learn patching +# daal4py daal4py/sklearn/ @Alexsandruss @samir-nasibli @icfaust # Core -src/ @Alexsandruss @samir-nasibli @icfaust generator/ @Alexsandruss @samir-nasibli @icfaust +scripts/ @Alexsandruss @samir-nasibli @icfaust +src/ @Alexsandruss @samir-nasibli @icfaust diff --git a/.github/scripts/generate_coverage_reports.sh b/.github/scripts/generate_coverage_reports.sh new file mode 100644 index 0000000000..72f2f9eab8 --- /dev/null +++ b/.github/scripts/generate_coverage_reports.sh @@ -0,0 +1,46 @@ +#=============================================================================== +# Copyright Contributors to the oneDAL project +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +#=============================================================================== + +ci_dir=$(dirname $(dirname $(dirname "${BASH_SOURCE[0]}"))) +cd $ci_dir + +# create coverage.py report +coverage combine .coverage.sklearnex .coverage.sklearn +coverage lcov -o coverage_py_"${1}".info + +# create gcov report (lcov format) +if [[ -n "${SKLEARNEX_GCOV}" ]]; then + # extract llvm tool for gcov processing + if [[ -z "$2" ]]; then + GCOV_EXE="$(dirname $(type -P -a icx))/compiler/llvm-cov gcov" + else + GCOV_EXE="gcov" + fi + echo $GCOV_EXE + FILTER=$(realpath ./onedal).* + echo $FILTER + + NUMPY_TEST=$(python -m pip freeze | grep numpy) + # install dependencies + # proper operation of gcov with sklearnex requires the header files from + # the build numpy, this must be previously set as NUMPY_BUILD + python -m pip install gcovr $NUMPY_BUILD + + gcovr --gcov-executable "${GCOV_EXE}" -r . -v --lcov --filter "${FILTER}" -o coverage_cpp_"${1}".info + + # reinstall previous numpy + python -m pip install $NUMPY_TEST +fi diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 9450f922ae..248f7cb0fc 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -95,6 +95,7 @@ jobs: echo "DPCFLAG=${DPCFLAG}" >> "$GITHUB_OUTPUT" # enable coverage report generation echo "COVERAGE_RCFILE=$(readlink -f .coveragerc)" >> "$GITHUB_ENV" + if [[ -z $DPCFLAG ]]; then echo "SKLEARNEX_GCOV=1" >> "$GITHUB_ENV"; fi - name: apt-get run: sudo apt-get update && sudo apt-get install -y clang-format - name: dpcpp installation @@ -111,6 +112,7 @@ jobs: source venv/bin/activate pip install -r dependencies-dev pip list + echo "NUMPY_BUILD=$(python -m pip freeze | grep numpy)" >> "$GITHUB_ENV" - name: Build daal4py/sklearnex run: | source venv/bin/activate @@ -142,13 +144,14 @@ jobs: - name: Create coverage report run: | source venv/bin/activate - coverage combine .coverage.sklearnex .coverage.sklearn - coverage json -o coverage.lnx${{ matrix.PYTHON_VERSION }}_${{ matrix.SKLEARN_VERSION }}.json + source .github/scripts/activate_components.sh ${{ steps.set-env.outputs.DPCFLAG }} + bash .github/scripts/generate_coverage_reports.sh lnx${{ matrix.PYTHON_VERSION }}_${{ matrix.SKLEARN_VERSION }} ${{ steps.set-env.outputs.DPCFLAG }} - name: Archive coverage report uses: actions/upload-artifact@v4 with: name: coverage_lnx_Py${{ matrix.PYTHON_VERSION }}_${{ matrix.SKLEARN_VERSION }} - path: coverage.lnx${{ matrix.PYTHON_VERSION }}_${{ matrix.SKLEARN_VERSION }}.json + path: | + *_lnx${{ matrix.PYTHON_VERSION }}_${{ matrix.SKLEARN_VERSION }}.info - name: Sklearn testing [preview] run: | source venv/bin/activate @@ -218,7 +221,12 @@ jobs: echo C:\msys64\usr\bin;>> %GITHUB_PATH% echo NO_DIST=YES>> %GITHUB_ENV% set DPCTL_TEMP="${{ env.DPCTL_PY_VERSIONS }}" - if not %DPCTL_TEMP:${{ matrix.PYTHON_VERSION }}=%==%DPCTL_TEMP% (echo DPCFLAG=>> %GITHUB_OUTPUT%) else (echo DPCFLAG="0">> %GITHUB_OUTPUT%) + if not %DPCTL_TEMP:${{ matrix.PYTHON_VERSION }}=%==%DPCTL_TEMP% ( + echo DPCFLAG=>> %GITHUB_OUTPUT% + echo SKLEARNEX_GCOV=YES>> %GITHUB_ENV% + ) else ( + echo DPCFLAG="0">> %GITHUB_OUTPUT% + ) echo COVERAGE_RCFILE=%cd%\.coveragerc>> %GITHUB_ENV% - name: Download Intel OpenCL CPU Runtime artifact if: ${{ steps.set-env.outputs.DPCFLAG == '' }} @@ -242,6 +250,7 @@ jobs: pip install --upgrade setuptools pip install cpufeature clang-format pyyaml pip install -r dependencies-dev + for /f "delims=" %%c in ('python -m pip freeze ^| grep numpy') do echo NUMPY_BUILD=%%c>> %GITHUB_ENV% - name: System info shell: cmd run: | @@ -288,13 +297,14 @@ jobs: shell: cmd run: | call .\venv\Scripts\activate.bat - coverage combine .coverage.sklearnex .coverage.sklearn - coverage json -o coverage.win${{ matrix.PYTHON_VERSION }}_${{ matrix.SKLEARN_VERSION }}.json + call .\.github\scripts\activate_components.bat ${{ steps.set-env.outputs.DPCFLAG }} + bash .github/scripts/generate_coverage_reports.sh win${{ matrix.PYTHON_VERSION }}_${{ matrix.SKLEARN_VERSION }} - name: Archive coverage report uses: actions/upload-artifact@v4 with: name: coverage_win_Py${{ matrix.PYTHON_VERSION }}_${{ matrix.SKLEARN_VERSION }} - path: coverage.win${{ matrix.PYTHON_VERSION }}_${{ matrix.SKLEARN_VERSION }}.json + path: | + *_win${{ matrix.PYTHON_VERSION }}_${{ matrix.SKLEARN_VERSION }}.info - name: Sklearn testing [preview] shell: cmd run: | diff --git a/.github/workflows/codecov.yml b/.github/workflows/codecov.yml index 7840bcc524..443f5bd503 100644 --- a/.github/workflows/codecov.yml +++ b/.github/workflows/codecov.yml @@ -50,8 +50,8 @@ jobs: OWNER="${FULL_NAME%/$NAME}" if [ "${{ github.repository_owner }}" != "${OWNER}" ]; then BRANCH="${OWNER}:${BRANCH}"; fi if [ $(git branch --show-current) != $BRANCH ]; then PR=$(gh pr view $BRANCH --json number -q .number); fi - echo uploading $BRANCH SHA=${{ github.event.workflow_run.head_sha }} + echo uploading $BRANCH $SHA VARARGS="-C ${SHA} -n github-${SHA}" # if a PR, pass proper information to codecov-cli about the PR number if [ -n "${PR}" ]; then VARARGS="${VARARGS}-${PR} -P ${PR}"; fi diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index c4f49656f4..0df0706894 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -25,3 +25,7 @@ repos: hooks: - id: isort language_version: python3.10 + - repo: https://github.com/pre-commit/mirrors-clang-format + rev: v14.0.6 + hooks: + - id: clang-format diff --git a/INSTALL.md b/INSTALL.md index f05d7a4cb2..f60c1cb735 100755 --- a/INSTALL.md +++ b/INSTALL.md @@ -157,8 +157,8 @@ The build-process (using setup.py) happens in 4 stages: * Numpy * cmake and pybind11 * A C++ compiler with C++11 support -* Clang-Format -* [Intel® oneAPI Data Analytics Library (oneDAL)](https://github.com/uxlfoundation/oneDAL) version 2021.1 or later +* Clang-Format version >=14 +* [Intel® oneAPI Data Analytics Library (oneDAL)](https://github.com/uxlfoundation/oneDAL) version 2021.1 or later, but be mindful that **the oneDAL version must be <= than that of scikit-learn-intelex** (it's backwards compatible but not forwards compatible). * You can use the pre-built `dal-devel` conda package from conda-forge channel * MPI (optional, needed for distributed mode) * You can use the pre-built `impi_rt` and `impi-devel` conda packages from conda-forge channel diff --git a/conda-recipe/run_test.bat b/conda-recipe/run_test.bat index 2be86075d2..59519b2801 100644 --- a/conda-recipe/run_test.bat +++ b/conda-recipe/run_test.bat @@ -34,7 +34,7 @@ if "%PYTHON%"=="python" ( set "PYTEST_ARGS= " -IF DEFINED COVERAGE_RCFILE (set "PYTEST_ARGS=--cov=onedal --cov=sklearnex --cov-config=%COVERAGE_RCFILE% --cov-append --cov-report= %PYTEST_ARGS%") +IF DEFINED COVERAGE_RCFILE (set "PYTEST_ARGS=--cov=onedal --cov=sklearnex --cov-config=%COVERAGE_RCFILE% --cov-append --cov-branch --cov-report= %PYTEST_ARGS%") rem Note: execute with argument --json-report as second argument rem in order to produce a JSON report under folder '.pytest_reports'. diff --git a/conda-recipe/run_test.sh b/conda-recipe/run_test.sh index 92e8b97672..28620adfb5 100755 --- a/conda-recipe/run_test.sh +++ b/conda-recipe/run_test.sh @@ -55,7 +55,7 @@ function generate_pytest_args { ARGS+=("--json-report-file=.pytest_reports/$1_report.json") fi if [ -n "${COVERAGE_RCFILE}" ]; then - ARGS+=(--cov=onedal --cov=sklearnex --cov-config="${COVERAGE_RCFILE}" --cov-append --cov-report=) + ARGS+=(--cov=onedal --cov=sklearnex --cov-config="${COVERAGE_RCFILE}" --cov-append --cov-branch --cov-report=) fi printf -- "${ARGS[*]}" } diff --git a/dependencies-dev b/dependencies-dev index 5132b2502e..16d0e13501 100644 --- a/dependencies-dev +++ b/dependencies-dev @@ -3,5 +3,5 @@ Jinja2==3.1.5 numpy==2.0.1 ; python_version <= '3.9' numpy==2.2.1 ; python_version > '3.9' pybind11==2.13.6 -cmake==3.31.2 -setuptools==75.7.0 +cmake==3.31.4 +setuptools==75.8.0 diff --git a/doc/sources/quick-start.rst b/doc/sources/quick-start.rst index e9ed7ea184..084039f321 100644 --- a/doc/sources/quick-start.rst +++ b/doc/sources/quick-start.rst @@ -319,11 +319,7 @@ Download the Intel AI Tools `here `_ for each version of Intel® Extension for Scikit-learn*. -======= -See the `Release Notes `_ for each version of Intel® Extension for Scikit-learn*. ->>>>>>> e8a9b150 (CI: add `skywalking-eyes` license header check) System Requirements -------------------- diff --git a/generator/gen_daal4py.py b/generator/gen_daal4py.py index 13a2ac7669..adb48839db 100755 --- a/generator/gen_daal4py.py +++ b/generator/gen_daal4py.py @@ -1220,9 +1220,17 @@ def gen_daal4py(dalroot, outdir, version, warn_all=False, no_dist=False, no_stre algo_path = jp(head_path, "algorithms") rmtree(head_path, ignore_errors=True) copytree(orig_path, head_path) + formatfile = jp("src", ".clang-format") for dirpath, dirnames, filenames in os.walk(algo_path): for filename in filenames: - call([shutil.which("clang-format"), "-i", jp(dirpath, filename)]) + call( + [ + shutil.which("clang-format"), + "-i", + jp(dirpath, filename), + "-style=file:" + formatfile, + ] + ) iface = cython_interface(algo_path) iface.read() print("Generating sources...") diff --git a/onedal/basic_statistics/basic_statistics.cpp b/onedal/basic_statistics/basic_statistics.cpp index 838728455b..6d037ef08a 100644 --- a/onedal/basic_statistics/basic_statistics.cpp +++ b/onedal/basic_statistics/basic_statistics.cpp @@ -114,9 +114,9 @@ auto get_onedal_result_options(const py::dict& params) { struct params2desc { template auto operator()(const py::dict& params) { - auto desc = dal::basic_statistics::descriptor() - .set_result_options(get_onedal_result_options(params)); + auto desc = + dal::basic_statistics::descriptor() + .set_result_options(get_onedal_result_options(params)); return desc; } }; @@ -126,54 +126,53 @@ struct params2desc_incremental { template auto operator()(const py::dict& params) { auto desc = dal::basic_statistics::descriptor() - .set_result_options(get_onedal_result_options(params)); + dal::basic_statistics::method::dense, + dal::basic_statistics::task::compute>() + .set_result_options(get_onedal_result_options(params)); return desc; } }; template void init_compute_ops(py::module& m) { - m.def("compute", []( - const Policy& policy, - const py::dict& params, - const table& data, - const table& weights) { + m.def( + "compute", + [](const Policy& policy, const py::dict& params, const table& data, const table& weights) { using namespace dal::basic_statistics; using input_t = compute_input; compute_ops ops(policy, input_t{ data, weights }, params2desc{}); return fptype2t{ method2t{ Task{}, ops } }(params); - } - ); + }); } - template void init_partial_compute_ops(py::module& m) { using prev_result_t = dal::basic_statistics::partial_compute_result; - m.def("partial_compute", []( - const Policy& policy, - const py::dict& params, - const prev_result_t& prev, - const table& data, - const table& weights) { - using namespace dal::basic_statistics; - using input_t = partial_compute_input; - partial_compute_ops ops(policy, input_t{ prev, data, weights }, params2desc_incremental{}); - return fptype2t{ method2t{ Task{}, ops } }(params); - } - ); + m.def("partial_compute", + [](const Policy& policy, + const py::dict& params, + const prev_result_t& prev, + const table& data, + const table& weights) { + using namespace dal::basic_statistics; + using input_t = partial_compute_input; + partial_compute_ops ops(policy, + input_t{ prev, data, weights }, + params2desc_incremental{}); + return fptype2t{ method2t{ Task{}, ops } }(params); + }); } template void init_finalize_compute_ops(pybind11::module_& m) { using namespace dal::basic_statistics; using input_t = partial_compute_result; - m.def("finalize_compute", [](const Policy& policy, const pybind11::dict& params, const input_t& data) { - finalize_compute_ops ops(policy, data, params2desc_incremental{}); - return fptype2t{ method2t{ Task{}, ops } }(params); - }); + m.def("finalize_compute", + [](const Policy& policy, const pybind11::dict& params, const input_t& data) { + finalize_compute_ops ops(policy, data, params2desc_incremental{}); + return fptype2t{ method2t{ Task{}, ops } }(params); + }); } template @@ -216,23 +215,28 @@ void init_partial_compute_result(py::module_& m) { py::cast(convert_to_pyobject(res.get_partial_max())), py::cast(convert_to_pyobject(res.get_partial_sum())), py::cast(convert_to_pyobject(res.get_partial_sum_squares())), - py::cast(convert_to_pyobject(res.get_partial_sum_squares_centered())) - ); + py::cast( + convert_to_pyobject(res.get_partial_sum_squares_centered()))); }, [](py::tuple t) { if (t.size() != 6) throw std::runtime_error("Invalid state!"); result_t res; - if (py::cast(t[0].attr("size")) != 0) res.set_partial_n_rows(convert_to_table(t[0])); - if (py::cast(t[1].attr("size")) != 0) res.set_partial_min(convert_to_table(t[1])); - if (py::cast(t[2].attr("size")) != 0) res.set_partial_max(convert_to_table(t[2])); - if (py::cast(t[3].attr("size")) != 0) res.set_partial_sum(convert_to_table(t[3])); - if (py::cast(t[4].attr("size")) != 0) res.set_partial_sum_squares(convert_to_table(t[4])); - if (py::cast(t[5].attr("size")) != 0) res.set_partial_sum_squares_centered(convert_to_table(t[5])); - + if (py::cast(t[0].attr("size")) != 0) + res.set_partial_n_rows(convert_to_table(t[0])); + if (py::cast(t[1].attr("size")) != 0) + res.set_partial_min(convert_to_table(t[1])); + if (py::cast(t[2].attr("size")) != 0) + res.set_partial_max(convert_to_table(t[2])); + if (py::cast(t[3].attr("size")) != 0) + res.set_partial_sum(convert_to_table(t[3])); + if (py::cast(t[4].attr("size")) != 0) + res.set_partial_sum_squares(convert_to_table(t[4])); + if (py::cast(t[5].attr("size")) != 0) + res.set_partial_sum_squares_centered(convert_to_table(t[5])); + return res; - } - )); + })); } ONEDAL_PY_DECLARE_INSTANTIATOR(init_compute_result); diff --git a/onedal/cluster/dbscan.cpp b/onedal/cluster/dbscan.cpp index f743a347dd..7d3593d2fd 100644 --- a/onedal/cluster/dbscan.cpp +++ b/onedal/cluster/dbscan.cpp @@ -100,17 +100,15 @@ struct params2desc { template void init_compute_ops(py::module_& m) { - m.def("compute", - [](const Policy& policy, - const py::dict& params, - const table& data, - const table& weights) { - using namespace dbscan; - using input_t = compute_input; - - compute_ops ops(policy, input_t{ data, weights }, params2desc{}); - return fptype2t{ method2t{ Task{}, ops } }(params); - }); + m.def( + "compute", + [](const Policy& policy, const py::dict& params, const table& data, const table& weights) { + using namespace dbscan; + using input_t = compute_input; + + compute_ops ops(policy, input_t{ data, weights }, params2desc{}); + return fptype2t{ method2t{ Task{}, ops } }(params); + }); } template diff --git a/onedal/cluster/kmeans_common.cpp b/onedal/cluster/kmeans_common.cpp index 3d3e52b29f..65e4dae4ef 100644 --- a/onedal/cluster/kmeans_common.cpp +++ b/onedal/cluster/kmeans_common.cpp @@ -23,13 +23,11 @@ #include "onedal/common/pybind11_helpers.hpp" -namespace oneapi::dal::python{ +namespace oneapi::dal::python { namespace kmeans { -bool is_same_clustering(const dal::table& left, - const dal::table& right, - std::int64_t n_clusters) { +bool is_same_clustering(const dal::table& left, const dal::table& right, std::int64_t n_clusters) { if (!left.has_data() || !right.has_data()) throw std::invalid_argument("Empty input table"); @@ -39,15 +37,16 @@ bool is_same_clustering(const dal::table& left, if (left.get_column_count() > 1 || right.get_column_count() > 1) throw std::length_error("Too many columns in input table"); - const auto l_arr = l_acc.pull({0, -1}); - const auto r_arr = r_acc.pull({0, -1}); + const auto l_arr = l_acc.pull({ 0, -1 }); + const auto r_arr = r_acc.pull({ 0, -1 }); if (n_clusters < 1) throw std::invalid_argument("Invalid number of clusters"); constexpr std::int32_t minus_one = -1; auto map = dal::array::full( // - n_clusters, minus_one); + n_clusters, + minus_one); auto* const m_ptr = map.get_mutable_data(); @@ -85,4 +84,4 @@ ONEDAL_PY_INIT_MODULE(kmeans_common) { sub.def("_is_same_clustering", &kmeans::is_same_clustering); } // ONEDAL_PY_INIT_MODULE(kmeans_common) -} // namespace oneapi::dal::python::kmeans +} // namespace oneapi::dal::python diff --git a/onedal/common.hpp b/onedal/common.hpp index b3e43865b9..774b8a1ab7 100644 --- a/onedal/common.hpp +++ b/onedal/common.hpp @@ -16,12 +16,12 @@ #pragma once -#define OVERFLOW_CHECK_BY_ADDING(type, op1, op2) \ - { \ - volatile type r = (op1) + (op2); \ - r -= (op1); \ - if (!(r == (op2))) \ - throw std::runtime_error("Integer overflow by adding"); \ +#define OVERFLOW_CHECK_BY_ADDING(type, op1, op2) \ + { \ + volatile type r = (op1) + (op2); \ + r -= (op1); \ + if (!(r == (op2))) \ + throw std::runtime_error("Integer overflow by adding"); \ } #define OVERFLOW_CHECK_BY_MULTIPLICATION(type, op1, op2) \ diff --git a/onedal/common/dispatch_utils.hpp b/onedal/common/dispatch_utils.hpp index 7beae22c46..801b589105 100644 --- a/onedal/common/dispatch_utils.hpp +++ b/onedal/common/dispatch_utils.hpp @@ -60,9 +60,9 @@ struct compute_ops { using Task = typename Input::task_t; compute_ops(const Policy& policy, const Input& input, const Ops& ops) - : policy(policy), - input(input), - ops(ops) {} + : policy(policy), + input(input), + ops(ops) {} template auto operator()(const pybind11::dict& params) { @@ -81,13 +81,14 @@ template struct compute_ops_with_hyperparams { using Task = typename Input::task_t; - compute_ops_with_hyperparams( - const Policy& policy, const Input& input, - const Ops& ops, const Hyperparams& hyperparams) - : policy(policy), - input(input), - ops(ops), - hyperparams(hyperparams) {} + compute_ops_with_hyperparams(const Policy& policy, + const Input& input, + const Ops& ops, + const Hyperparams& hyperparams) + : policy(policy), + input(input), + ops(ops), + hyperparams(hyperparams) {} template auto operator()(const pybind11::dict& params) { @@ -108,9 +109,9 @@ struct train_ops { using Task = typename Input::task_t; train_ops(const Policy& policy, const Input& input, const Ops& ops) - : policy(policy), - input(input), - ops(ops) {} + : policy(policy), + input(input), + ops(ops) {} template auto operator()(const pybind11::dict& params) { @@ -129,13 +130,14 @@ template struct train_ops_with_hyperparams { using Task = typename Input::task_t; - train_ops_with_hyperparams( - const Policy& policy, const Input& input, - const Ops& ops, const Hyperparams& hyperparams) - : policy(policy), - input(input), - ops(ops), - hyperparams(hyperparams) {} + train_ops_with_hyperparams(const Policy& policy, + const Input& input, + const Ops& ops, + const Hyperparams& hyperparams) + : policy(policy), + input(input), + ops(ops), + hyperparams(hyperparams) {} template auto operator()(const pybind11::dict& params) { @@ -156,9 +158,9 @@ struct infer_ops { using Task = typename Input::task_t; infer_ops(const Policy& policy, const Input& input, const Ops& ops) - : policy(policy), - input(input), - ops(ops) {} + : policy(policy), + input(input), + ops(ops) {} template auto operator()(const pybind11::dict& params) { @@ -177,13 +179,14 @@ template struct infer_ops_with_hyperparams { using Task = typename Input::task_t; - infer_ops_with_hyperparams( - const Policy& policy, const Input& input, - const Ops& ops, const Hyperparams& hyperparams) - : policy(policy), - input(input), - ops(ops), - hyperparams(hyperparams) {} + infer_ops_with_hyperparams(const Policy& policy, + const Input& input, + const Ops& ops, + const Hyperparams& hyperparams) + : policy(policy), + input(input), + ops(ops), + hyperparams(hyperparams) {} template auto operator()(const pybind11::dict& params) { @@ -203,9 +206,9 @@ template struct partial_compute_ops { using Task = typename Input::task_t; partial_compute_ops(const Policy& policy, const Input& input, const Ops& ops) - : policy(policy), - input(input), - ops(ops) {} + : policy(policy), + input(input), + ops(ops) {} template auto operator()(const pybind11::dict& params) { @@ -222,9 +225,9 @@ template struct finalize_compute_ops { using Task = typename Input::task_t; finalize_compute_ops(const Policy& policy, const Input& input, const Ops& ops) - : policy(policy), - input(input), - ops(ops) {} + : policy(policy), + input(input), + ops(ops) {} template auto operator()(const pybind11::dict& params) { @@ -241,9 +244,9 @@ template struct partial_train_ops { using Task = typename Input::task_t; partial_train_ops(const Policy& policy, const Input& input, const Ops& ops) - : policy(policy), - input(input), - ops(ops) {} + : policy(policy), + input(input), + ops(ops) {} template auto operator()(const pybind11::dict& params) { @@ -259,13 +262,14 @@ struct partial_train_ops { template struct partial_train_ops_with_hyperparams { using Task = typename Input::task_t; - partial_train_ops_with_hyperparams( - const Policy& policy, const Input& input, - const Ops& ops, const Hyperparams& hyperparams) - : policy(policy), - input(input), - ops(ops), - hyperparams(hyperparams) {} + partial_train_ops_with_hyperparams(const Policy& policy, + const Input& input, + const Ops& ops, + const Hyperparams& hyperparams) + : policy(policy), + input(input), + ops(ops), + hyperparams(hyperparams) {} template auto operator()(const pybind11::dict& params) { @@ -283,15 +287,15 @@ template struct finalize_train_ops { using Task = typename Input::task_t; finalize_train_ops(const Policy& policy, const Input& input, const Ops& ops) - : policy(policy), - input(input), - ops(ops) {} + : policy(policy), + input(input), + ops(ops) {} template auto operator()(const pybind11::dict& params) { auto desc = ops.template operator()(params); return dal::finalize_train(policy, desc, input); - } + } Policy policy; Input input; @@ -301,19 +305,20 @@ struct finalize_train_ops { template struct finalize_train_ops_with_hyperparams { using Task = typename Input::task_t; - finalize_train_ops_with_hyperparams( - const Policy& policy, const Input& input, - const Ops& ops, const Hyperparams& hyperparams) - : policy(policy), - input(input), - ops(ops), - hyperparams(hyperparams) {} + finalize_train_ops_with_hyperparams(const Policy& policy, + const Input& input, + const Ops& ops, + const Hyperparams& hyperparams) + : policy(policy), + input(input), + ops(ops), + hyperparams(hyperparams) {} template auto operator()(const pybind11::dict& params) { auto desc = ops.template operator()(params); return dal::finalize_train(policy, desc, hyperparams, input); - } + } Policy policy; Input input; diff --git a/onedal/common/policy.cpp b/onedal/common/policy.cpp index b4973972e1..710c2f2acf 100644 --- a/onedal/common/policy.cpp +++ b/onedal/common/policy.cpp @@ -41,7 +41,6 @@ void instantiate_default_host_policy(py::module& m) { #ifdef ONEDAL_DATA_PARALLEL - dp_policy_t make_dp_policy(std::uint32_t id) { sycl::queue queue = get_queue_by_device_id(id); return dp_policy_t{ std::move(queue) }; diff --git a/onedal/common/sycl.cpp b/onedal/common/sycl.cpp index c0b8e290c7..14ff5e84a6 100644 --- a/onedal/common/sycl.cpp +++ b/onedal/common/sycl.cpp @@ -23,7 +23,7 @@ namespace oneapi::dal::python { #ifdef ONEDAL_DATA_PARALLEL -void instantiate_sycl_interfaces(py::module& m){ +void instantiate_sycl_interfaces(py::module& m) { // These classes mirror a subset of functionality of the dpctl python // package's `SyclQueue` and `SyclDevice` objects. In the case that dpctl // is not installed, these classes will enable scikit-learn-intelex to still @@ -31,57 +31,56 @@ void instantiate_sycl_interfaces(py::module& m){ py::class_ syclqueue(m, "SyclQueue"); syclqueue.def(py::init()) .def(py::init([](const std::string& filter) { - return get_queue_by_filter_string(filter); - }) - ) + return get_queue_by_filter_string(filter); + })) .def(py::init([](const py::int_& obj) { - return get_queue_by_pylong_pointer(obj); - }) - ) + return get_queue_by_pylong_pointer(obj); + })) .def(py::init([](const py::object& syclobj) { - return get_queue_from_python(syclobj); - }) - ) - .def("_get_capsule",[](const sycl::queue& queue) { - return pack_queue(std::make_shared(queue)); - } - ) + return get_queue_from_python(syclobj); + })) + .def("_get_capsule", + [](const sycl::queue& queue) { + return pack_queue(std::make_shared(queue)); + }) .def_property_readonly("sycl_device", &sycl::queue::get_device); // expose limited sycl device features to python for oneDAL analysis py::class_ sycldevice(m, "SyclDevice"); - sycldevice.def(py::init([](std::uint32_t id) { - return get_device_by_id(id).value(); - }) - ) - .def_property_readonly("has_aspect_fp64",[](const sycl::device& device) { - return device.has(sycl::aspect::fp64); - } - ) - .def_property_readonly("has_aspect_fp16",[](const sycl::device& device) { - return device.has(sycl::aspect::fp16); - } - ) - .def_property_readonly("filter_string",[](const sycl::device& device) { - // assumes we are not working with accelerators - // This is a minimal reproduction of DPCTL_GetRelativeDeviceId - std::uint32_t outidx = 0; - std::string filter = get_device_name(device); - auto devtype = device.get_info(); - auto devs = device.get_devices(devtype); - auto be = device.get_platform().get_backend(); - for(std::uint32_t id = 0; devs[outidx] != device; ++id){ - if (devs[id].get_platform().get_backend() == be) ++outidx; - } - return py::str(filter + ":") + py::str(py::int_(outidx)); - } - ) - .def_property_readonly("device_id",[](const sycl::device& device) { - // assumes we are not working with accelerators - std::string filter = get_device_name(device); - return get_device_id(device).value(); - } - ) + sycldevice + .def(py::init([](std::uint32_t id) { + return get_device_by_id(id).value(); + })) + .def_property_readonly("has_aspect_fp64", + [](const sycl::device& device) { + return device.has(sycl::aspect::fp64); + }) + .def_property_readonly("has_aspect_fp16", + [](const sycl::device& device) { + return device.has(sycl::aspect::fp16); + }) + .def_property_readonly("filter_string", + [](const sycl::device& device) { + // assumes we are not working with accelerators + // This is a minimal reproduction of DPCTL_GetRelativeDeviceId + std::uint32_t outidx = 0; + std::string filter = get_device_name(device); + auto devtype = + device.get_info(); + auto devs = device.get_devices(devtype); + auto be = device.get_platform().get_backend(); + for (std::uint32_t id = 0; devs[outidx] != device; ++id) { + if (devs[id].get_platform().get_backend() == be) + ++outidx; + } + return py::str(filter + ":") + py::str(py::int_(outidx)); + }) + .def_property_readonly("device_id", + [](const sycl::device& device) { + // assumes we are not working with accelerators + std::string filter = get_device_name(device); + return get_device_id(device).value(); + }) .def_property_readonly("is_cpu", &sycl::device::is_cpu) .def_property_readonly("is_gpu", &sycl::device::is_gpu); } diff --git a/onedal/common/sycl_interfaces.cpp b/onedal/common/sycl_interfaces.cpp index 2109312503..ce6013cc28 100644 --- a/onedal/common/sycl_interfaces.cpp +++ b/onedal/common/sycl_interfaces.cpp @@ -109,10 +109,10 @@ sycl::queue get_queue_by_get_capsule(const py::object& syclobj) { sycl::queue get_queue_by_pylong_pointer(const py::int_& syclobj) { // PyTorch XPU streams have a sycl_queue attribute which is - // a void pointer as PyLong (Python integer). It can be read and - // converted into a sycl::queue. This function allows + // a void pointer as PyLong (Python integer). It can be read and + // converted into a sycl::queue. This function allows // consumption of these objects for use in oneDAL. - void *ptr = PyLong_AsVoidPtr(syclobj.ptr()); + void* ptr = PyLong_AsVoidPtr(syclobj.ptr()); // assumes that the PyLong is a pointer to a queue return sycl::queue{ *static_cast(ptr) }; } @@ -139,17 +139,17 @@ sycl::queue get_queue_from_python(const py::object& syclobj) { const auto caps = syclobj.cast(); return extract_from_capsule(std::move(caps)); } - else if (py::hasattr(syclobj, device_name) && py::hasattr(syclobj.attr(device_name), filter_name)) { + else if (py::hasattr(syclobj, device_name) && + py::hasattr(syclobj.attr(device_name), filter_name)) { auto attr = syclobj.attr(device_name).attr(filter_name); return get_queue_by_filter_string(attr.cast()); } - else - { + else { throw std::runtime_error("Unable to interpret \"syclobj\""); } } -std::string get_device_name(const sycl::queue& queue){ +std::string get_device_name(const sycl::queue& queue) { return get_device_name(queue.get_device()); } @@ -175,8 +175,8 @@ std::uint32_t get_device_id(const sycl::queue& queue) { } } -std::size_t get_used_memory(const py::object& syclobj){ - const auto& device = get_queue_from_python(syclobj).get_device(); +std::size_t get_used_memory(const py::object& syclobj) { + const auto& device = get_queue_from_python(syclobj).get_device(); std::size_t total_memory = device.get_info(); std::size_t free_memory = device.get_info(); return total_memory - free_memory; diff --git a/onedal/common/sycl_interfaces.hpp b/onedal/common/sycl_interfaces.hpp index 1b838254a4..2681fe639b 100644 --- a/onedal/common/sycl_interfaces.hpp +++ b/onedal/common/sycl_interfaces.hpp @@ -54,14 +54,13 @@ std::size_t get_used_memory(const py::object& syclobj); std::string get_device_name(const dp_policy_t& policy); std::string get_device_name(const sycl::device& device); - /// TODO: This is a workaround class. /// It hides deprecated ``sycl::ext::oneapi::filter_selector`` to get rid of build warnings /// until a better solution is provided. struct filter_selector_wrapper { - filter_selector_wrapper(std::string filter) : filter_selector_{filter} {} + filter_selector_wrapper(std::string filter) : filter_selector_{ filter } {} - int operator()(const sycl::device &dev) { + int operator()(const sycl::device& dev) { return filter_selector_(dev); } diff --git a/onedal/common/type_utils.hpp b/onedal/common/type_utils.hpp index eecee5612b..d79a157458 100644 --- a/onedal/common/type_utils.hpp +++ b/onedal/common/type_utils.hpp @@ -50,16 +50,16 @@ template struct type_to_str; #ifdef ONEDAL_DATA_PARALLEL_SPMD - ONEDAL_PY_TYPE2STR(dal::detail::spmd_policy, ""); - using policy_spmd = types>; +ONEDAL_PY_TYPE2STR(dal::detail::spmd_policy, ""); +using policy_spmd = types>; #else - ONEDAL_PY_TYPE2STR(dal::detail::host_policy, ""); - #ifdef ONEDAL_DATA_PARALLEL - ONEDAL_PY_TYPE2STR(dal::detail::data_parallel_policy, ""); - using policy_list = types; - #else - using policy_list = types; - #endif +ONEDAL_PY_TYPE2STR(dal::detail::host_policy, ""); +#ifdef ONEDAL_DATA_PARALLEL +ONEDAL_PY_TYPE2STR(dal::detail::data_parallel_policy, ""); +using policy_list = types; +#else +using policy_list = types; +#endif #endif } // namespace oneapi::dal::python diff --git a/onedal/covariance/covariance.cpp b/onedal/covariance/covariance.cpp index 9f0e08b9c1..cc9bf83aba 100644 --- a/onedal/covariance/covariance.cpp +++ b/onedal/covariance/covariance.cpp @@ -21,7 +21,6 @@ #define NO_IMPORT_ARRAY // import_array called in table.cpp #include "onedal/datatypes/data_conversion.hpp" - #include "onedal/common.hpp" #include "onedal/version.hpp" @@ -50,7 +49,8 @@ struct params2desc { auto operator()(const py::dict& params) { using namespace dal::covariance; auto desc = dal::covariance::descriptor{}; - desc.set_result_options(dal::covariance::result_options::cov_matrix | dal::covariance::result_options::means); + desc.set_result_options(dal::covariance::result_options::cov_matrix | + dal::covariance::result_options::means); #if defined(ONEDAL_VERSION) && ONEDAL_VERSION >= 20240001 if (params.contains("bias")) { desc.set_bias(params["bias"].cast()); @@ -65,64 +65,56 @@ struct params2desc { } }; - template void init_compute_ops(py::module_& m) { #if defined(ONEDAL_VERSION) && ONEDAL_VERSION >= 20240000 using compute_hyperparams_t = dal::covariance::detail::compute_parameters; - m.def("compute", []( - const Policy& policy, - const py::dict& params, - const compute_hyperparams_t hyperparams, - const table& data) { - using namespace dal::covariance; - using input_t = compute_input; - - compute_ops_with_hyperparams ops( - policy, input_t{ data }, params2desc{}, hyperparams); - return fptype2t{ method2t{ Task{}, ops } }(params); - } - ); + m.def("compute", + [](const Policy& policy, + const py::dict& params, + const compute_hyperparams_t hyperparams, + const table& data) { + using namespace dal::covariance; + using input_t = compute_input; + + compute_ops_with_hyperparams ops(policy, input_t{ data }, params2desc{}, hyperparams); + return fptype2t{ method2t{ Task{}, ops } }(params); + }); #endif // defined(ONEDAL_VERSION) && ONEDAL_VERSION >= 20240000 - m.def("compute", []( - const Policy& policy, - const py::dict& params, - const table& data) { - using namespace dal::covariance; - using input_t = compute_input; - compute_ops ops(policy, input_t{ data }, params2desc{}); - return fptype2t{ method2t{ Task{}, ops } }(params); - } - ); + m.def("compute", [](const Policy& policy, const py::dict& params, const table& data) { + using namespace dal::covariance; + using input_t = compute_input; + compute_ops ops(policy, input_t{ data }, params2desc{}); + return fptype2t{ method2t{ Task{}, ops } }(params); + }); } template void init_partial_compute_ops(pybind11::module_& m) { using prev_result_t = dal::covariance::partial_compute_result; - m.def("partial_compute", []( - const Policy& policy, - const pybind11::dict& params, - const prev_result_t& prev, - const table& data) { - using namespace dal::covariance; - using input_t = partial_compute_input; - partial_compute_ops ops(policy, input_t{prev, data}, params2desc{}); - return fptype2t{ method2t{ Task{}, ops } }(params); - } - ); + m.def("partial_compute", + [](const Policy& policy, + const pybind11::dict& params, + const prev_result_t& prev, + const table& data) { + using namespace dal::covariance; + using input_t = partial_compute_input; + partial_compute_ops ops(policy, input_t{ prev, data }, params2desc{}); + return fptype2t{ method2t{ Task{}, ops } }(params); + }); } template void init_finalize_compute_ops(pybind11::module_& m) { using namespace dal::covariance; using input_t = partial_compute_result; - m.def("finalize_compute", [](const Policy& policy, const pybind11::dict& params, const input_t& data) { - finalize_compute_ops ops(policy, data, params2desc{}); - return fptype2t{ method2t{ Task{}, ops } }(params); - }); + m.def("finalize_compute", + [](const Policy& policy, const pybind11::dict& params, const input_t& data) { + finalize_compute_ops ops(policy, data, params2desc{}); + return fptype2t{ method2t{ Task{}, ops } }(params); + }); } - template inline void init_compute_result(py::module_& m) { using namespace dal::covariance; @@ -139,27 +131,32 @@ inline void init_partial_compute_result(pybind11::module_& m) { using result_t = partial_compute_result; pybind11::class_(m, "partial_compute_result") .def(pybind11::init()) - .def_property("partial_n_rows", &result_t::get_partial_n_rows, &result_t::set_partial_n_rows) - .def_property("partial_crossproduct", &result_t::get_partial_crossproduct, &result_t::set_partial_crossproduct) + .def_property("partial_n_rows", + &result_t::get_partial_n_rows, + &result_t::set_partial_n_rows) + .def_property("partial_crossproduct", + &result_t::get_partial_crossproduct, + &result_t::set_partial_crossproduct) .def_property("partial_sums", &result_t::get_partial_sum, &result_t::set_partial_sum) .def(py::pickle( [](const result_t& res) { return py::make_tuple( py::cast(convert_to_pyobject(res.get_partial_n_rows())), py::cast(convert_to_pyobject(res.get_partial_crossproduct())), - py::cast(convert_to_pyobject(res.get_partial_sum())) - ); + py::cast(convert_to_pyobject(res.get_partial_sum()))); }, [](py::tuple t) { if (t.size() != 3) throw std::runtime_error("Invalid state!"); result_t res; - if (py::cast(t[0].attr("size")) != 0) res.set_partial_n_rows(convert_to_table(t[0])); - if (py::cast(t[1].attr("size")) != 0) res.set_partial_crossproduct(convert_to_table(t[1])); - if (py::cast(t[2].attr("size")) != 0) res.set_partial_sum(convert_to_table(t[2])); + if (py::cast(t[0].attr("size")) != 0) + res.set_partial_n_rows(convert_to_table(t[0])); + if (py::cast(t[1].attr("size")) != 0) + res.set_partial_crossproduct(convert_to_table(t[1])); + if (py::cast(t[2].attr("size")) != 0) + res.set_partial_sum(convert_to_table(t[2])); return res; - } - )); + })); } #if defined(ONEDAL_VERSION) && ONEDAL_VERSION >= 20240000 @@ -171,11 +168,12 @@ void init_compute_hyperparameters(py::module_& m) { auto cls = py::class_(m, "compute_hyperparameters") .def(py::init()) - .def("set_cpu_macro_block", [](compute_hyperparams_t& self, int64_t cpu_macro_block) { - self.set_cpu_macro_block(cpu_macro_block); - }) + .def("set_cpu_macro_block", + [](compute_hyperparams_t& self, int64_t cpu_macro_block) { + self.set_cpu_macro_block(cpu_macro_block); + }) .def("get_cpu_macro_block", [](const compute_hyperparams_t& self) { - return self.get_cpu_macro_block(); + return self.get_cpu_macro_block(); }); } @@ -187,7 +185,7 @@ ONEDAL_PY_DECLARE_INSTANTIATOR(init_compute_ops); ONEDAL_PY_DECLARE_INSTANTIATOR(init_partial_compute_ops); ONEDAL_PY_DECLARE_INSTANTIATOR(init_finalize_compute_ops); #if defined(ONEDAL_VERSION) && ONEDAL_VERSION >= 20240000 - ONEDAL_PY_DECLARE_INSTANTIATOR(init_compute_hyperparameters); +ONEDAL_PY_DECLARE_INSTANTIATOR(init_compute_hyperparameters); #endif // defined(ONEDAL_VERSION) && ONEDAL_VERSION >= 20240000 } //namespace covariance @@ -198,19 +196,19 @@ ONEDAL_PY_INIT_MODULE(covariance) { auto sub = m.def_submodule("covariance"); - #ifdef ONEDAL_DATA_PARALLEL_SPMD - ONEDAL_PY_INSTANTIATE(init_compute_ops, sub, policy_spmd, task::compute); - ONEDAL_PY_INSTANTIATE(init_finalize_compute_ops, sub, policy_spmd, task::compute); - #else - ONEDAL_PY_INSTANTIATE(init_compute_ops, sub, policy_list, task::compute); - ONEDAL_PY_INSTANTIATE(init_partial_compute_ops, sub, policy_list, task::compute); - ONEDAL_PY_INSTANTIATE(init_finalize_compute_ops, sub, policy_list, task::compute); - ONEDAL_PY_INSTANTIATE(init_compute_result, sub, task::compute); - ONEDAL_PY_INSTANTIATE(init_partial_compute_result, sub, task::compute); - #if defined(ONEDAL_VERSION) && ONEDAL_VERSION >= 20240000 - ONEDAL_PY_INSTANTIATE(init_compute_hyperparameters, sub, task::compute); - #endif // defined(ONEDAL_VERSION) && ONEDAL_VERSION >= 20240000 - #endif +#ifdef ONEDAL_DATA_PARALLEL_SPMD + ONEDAL_PY_INSTANTIATE(init_compute_ops, sub, policy_spmd, task::compute); + ONEDAL_PY_INSTANTIATE(init_finalize_compute_ops, sub, policy_spmd, task::compute); +#else + ONEDAL_PY_INSTANTIATE(init_compute_ops, sub, policy_list, task::compute); + ONEDAL_PY_INSTANTIATE(init_partial_compute_ops, sub, policy_list, task::compute); + ONEDAL_PY_INSTANTIATE(init_finalize_compute_ops, sub, policy_list, task::compute); + ONEDAL_PY_INSTANTIATE(init_compute_result, sub, task::compute); + ONEDAL_PY_INSTANTIATE(init_partial_compute_result, sub, task::compute); +#if defined(ONEDAL_VERSION) && ONEDAL_VERSION >= 20240000 + ONEDAL_PY_INSTANTIATE(init_compute_hyperparameters, sub, task::compute); +#endif // defined(ONEDAL_VERSION) && ONEDAL_VERSION >= 20240000 +#endif } } // namespace oneapi::dal::python diff --git a/onedal/dal.cpp b/onedal/dal.cpp index 298ab39fd9..0be4671857 100644 --- a/onedal/dal.cpp +++ b/onedal/dal.cpp @@ -23,128 +23,128 @@ namespace oneapi::dal::python { /* common */ #ifdef ONEDAL_DATA_PARALLEL_SPMD - ONEDAL_PY_INIT_MODULE(spmd_policy); +ONEDAL_PY_INIT_MODULE(spmd_policy); - /* algorithms */ - ONEDAL_PY_INIT_MODULE(covariance); - ONEDAL_PY_INIT_MODULE(dbscan); - ONEDAL_PY_INIT_MODULE(ensemble); - ONEDAL_PY_INIT_MODULE(decomposition); - #if defined(ONEDAL_VERSION) && ONEDAL_VERSION >= 20230100 - ONEDAL_PY_INIT_MODULE(basic_statistics); - ONEDAL_PY_INIT_MODULE(linear_model); - #endif // defined(ONEDAL_VERSION) && ONEDAL_VERSION >= 20230100 - #if defined(ONEDAL_VERSION) && ONEDAL_VERSION >= 20230200 - ONEDAL_PY_INIT_MODULE(kmeans_init); - #endif // defined(ONEDAL_VERSION) && ONEDAL_VERSION >= 20230200 - ONEDAL_PY_INIT_MODULE(kmeans); - ONEDAL_PY_INIT_MODULE(kmeans_common); - ONEDAL_PY_INIT_MODULE(neighbors); - #if defined(ONEDAL_VERSION) && ONEDAL_VERSION >= 20240001 - ONEDAL_PY_INIT_MODULE(logistic_regression); - #endif // defined(ONEDAL_VERSION) && ONEDAL_VERSION >= 20240001 +/* algorithms */ +ONEDAL_PY_INIT_MODULE(covariance); +ONEDAL_PY_INIT_MODULE(dbscan); +ONEDAL_PY_INIT_MODULE(ensemble); +ONEDAL_PY_INIT_MODULE(decomposition); +#if defined(ONEDAL_VERSION) && ONEDAL_VERSION >= 20230100 +ONEDAL_PY_INIT_MODULE(basic_statistics); +ONEDAL_PY_INIT_MODULE(linear_model); +#endif // defined(ONEDAL_VERSION) && ONEDAL_VERSION >= 20230100 +#if defined(ONEDAL_VERSION) && ONEDAL_VERSION >= 20230200 +ONEDAL_PY_INIT_MODULE(kmeans_init); +#endif // defined(ONEDAL_VERSION) && ONEDAL_VERSION >= 20230200 +ONEDAL_PY_INIT_MODULE(kmeans); +ONEDAL_PY_INIT_MODULE(kmeans_common); +ONEDAL_PY_INIT_MODULE(neighbors); +#if defined(ONEDAL_VERSION) && ONEDAL_VERSION >= 20240001 +ONEDAL_PY_INIT_MODULE(logistic_regression); +#endif // defined(ONEDAL_VERSION) && ONEDAL_VERSION >= 20240001 #else // ONEDAL_DATA_PARALLEL_SPMD - #ifdef ONEDAL_DATA_PARALLEL - ONEDAL_PY_INIT_MODULE(sycl); - #endif // ONEDAL_DATA_PARALLEL +#ifdef ONEDAL_DATA_PARALLEL +ONEDAL_PY_INIT_MODULE(sycl); +#endif // ONEDAL_DATA_PARALLEL - ONEDAL_PY_INIT_MODULE(policy); - /* datatypes*/ - ONEDAL_PY_INIT_MODULE(table); - ONEDAL_PY_INIT_MODULE(table_metadata); +ONEDAL_PY_INIT_MODULE(policy); +/* datatypes*/ +ONEDAL_PY_INIT_MODULE(table); +ONEDAL_PY_INIT_MODULE(table_metadata); - /* primitives */ - ONEDAL_PY_INIT_MODULE(get_tree); - ONEDAL_PY_INIT_MODULE(linear_kernel); - ONEDAL_PY_INIT_MODULE(rbf_kernel); - ONEDAL_PY_INIT_MODULE(polynomial_kernel); - ONEDAL_PY_INIT_MODULE(sigmoid_kernel); +/* primitives */ +ONEDAL_PY_INIT_MODULE(get_tree); +ONEDAL_PY_INIT_MODULE(linear_kernel); +ONEDAL_PY_INIT_MODULE(rbf_kernel); +ONEDAL_PY_INIT_MODULE(polynomial_kernel); +ONEDAL_PY_INIT_MODULE(sigmoid_kernel); - /* algorithms */ - ONEDAL_PY_INIT_MODULE(covariance); - ONEDAL_PY_INIT_MODULE(dbscan); - ONEDAL_PY_INIT_MODULE(ensemble); - ONEDAL_PY_INIT_MODULE(decomposition); - #if defined(ONEDAL_VERSION) && ONEDAL_VERSION >= 20230100 - ONEDAL_PY_INIT_MODULE(basic_statistics); - ONEDAL_PY_INIT_MODULE(linear_model); - #endif // defined(ONEDAL_VERSION) && ONEDAL_VERSION >= 20230100 - #if defined(ONEDAL_VERSION) && ONEDAL_VERSION >= 20230200 - ONEDAL_PY_INIT_MODULE(kmeans_init); - #endif // defined(ONEDAL_VERSION) && ONEDAL_VERSION >= 20230200 - ONEDAL_PY_INIT_MODULE(kmeans); - ONEDAL_PY_INIT_MODULE(kmeans_common); - ONEDAL_PY_INIT_MODULE(neighbors); - ONEDAL_PY_INIT_MODULE(svm); - #if defined(ONEDAL_VERSION) && ONEDAL_VERSION >= 20240001 - ONEDAL_PY_INIT_MODULE(logistic_regression); - #endif // defined(ONEDAL_VERSION) && ONEDAL_VERSION >= 20240001 - #if defined(ONEDAL_VERSION) && ONEDAL_VERSION >= 20240700 - ONEDAL_PY_INIT_MODULE(finiteness_checker); - #endif // defined(ONEDAL_VERSION) && ONEDAL_VERSION >= 20240700 +/* algorithms */ +ONEDAL_PY_INIT_MODULE(covariance); +ONEDAL_PY_INIT_MODULE(dbscan); +ONEDAL_PY_INIT_MODULE(ensemble); +ONEDAL_PY_INIT_MODULE(decomposition); +#if defined(ONEDAL_VERSION) && ONEDAL_VERSION >= 20230100 +ONEDAL_PY_INIT_MODULE(basic_statistics); +ONEDAL_PY_INIT_MODULE(linear_model); +#endif // defined(ONEDAL_VERSION) && ONEDAL_VERSION >= 20230100 +#if defined(ONEDAL_VERSION) && ONEDAL_VERSION >= 20230200 +ONEDAL_PY_INIT_MODULE(kmeans_init); +#endif // defined(ONEDAL_VERSION) && ONEDAL_VERSION >= 20230200 +ONEDAL_PY_INIT_MODULE(kmeans); +ONEDAL_PY_INIT_MODULE(kmeans_common); +ONEDAL_PY_INIT_MODULE(neighbors); +ONEDAL_PY_INIT_MODULE(svm); +#if defined(ONEDAL_VERSION) && ONEDAL_VERSION >= 20240001 +ONEDAL_PY_INIT_MODULE(logistic_regression); +#endif // defined(ONEDAL_VERSION) && ONEDAL_VERSION >= 20240001 +#if defined(ONEDAL_VERSION) && ONEDAL_VERSION >= 20240700 +ONEDAL_PY_INIT_MODULE(finiteness_checker); +#endif // defined(ONEDAL_VERSION) && ONEDAL_VERSION >= 20240700 #endif // ONEDAL_DATA_PARALLEL_SPMD #ifdef ONEDAL_DATA_PARALLEL_SPMD - PYBIND11_MODULE(_onedal_py_spmd_dpc, m) { - init_spmd_policy(m); +PYBIND11_MODULE(_onedal_py_spmd_dpc, m) { + init_spmd_policy(m); - init_covariance(m); - init_dbscan(m); - init_decomposition(m); - init_ensemble(m); - #if defined(ONEDAL_VERSION) && ONEDAL_VERSION >= 20230100 - init_basic_statistics(m); - init_linear_model(m); - #endif // defined(ONEDAL_VERSION) && ONEDAL_VERSION >= 20230100 - #if defined(ONEDAL_VERSION) && ONEDAL_VERSION >= 20230200 - init_kmeans_init(m); - #endif // defined(ONEDAL_VERSION) && ONEDAL_VERSION >= 20230200 - init_kmeans(m); - init_kmeans_common(m); - init_neighbors(m); - #if defined(ONEDAL_VERSION) && ONEDAL_VERSION >= 20240001 - init_logistic_regression(m); - #endif // defined(ONEDAL_VERSION) && ONEDAL_VERSION >= 20240001 - } + init_covariance(m); + init_dbscan(m); + init_decomposition(m); + init_ensemble(m); +#if defined(ONEDAL_VERSION) && ONEDAL_VERSION >= 20230100 + init_basic_statistics(m); + init_linear_model(m); +#endif // defined(ONEDAL_VERSION) && ONEDAL_VERSION >= 20230100 +#if defined(ONEDAL_VERSION) && ONEDAL_VERSION >= 20230200 + init_kmeans_init(m); +#endif // defined(ONEDAL_VERSION) && ONEDAL_VERSION >= 20230200 + init_kmeans(m); + init_kmeans_common(m); + init_neighbors(m); +#if defined(ONEDAL_VERSION) && ONEDAL_VERSION >= 20240001 + init_logistic_regression(m); +#endif // defined(ONEDAL_VERSION) && ONEDAL_VERSION >= 20240001 +} #else - #ifdef ONEDAL_DATA_PARALLEL - PYBIND11_MODULE(_onedal_py_dpc, m) { - init_sycl(m); - #else - PYBIND11_MODULE(_onedal_py_host, m) { - #endif - init_policy(m); - init_table(m); - init_table_metadata(m); - - init_linear_kernel(m); - init_rbf_kernel(m); - init_polynomial_kernel(m); - init_sigmoid_kernel(m); - init_get_tree(m); +#ifdef ONEDAL_DATA_PARALLEL +PYBIND11_MODULE(_onedal_py_dpc, m) { + init_sycl(m); +#else +PYBIND11_MODULE(_onedal_py_host, m) { +#endif + init_policy(m); + init_table(m); + init_table_metadata(m); + + init_linear_kernel(m); + init_rbf_kernel(m); + init_polynomial_kernel(m); + init_sigmoid_kernel(m); + init_get_tree(m); - init_covariance(m); - init_dbscan(m); - init_decomposition(m); - init_ensemble(m); - #if defined(ONEDAL_VERSION) && ONEDAL_VERSION >= 20230100 - init_basic_statistics(m); - init_linear_model(m); - #endif // defined(ONEDAL_VERSION) && ONEDAL_VERSION >= 20230100 - #if defined(ONEDAL_VERSION) && ONEDAL_VERSION >= 20230200 - init_kmeans_init(m); - #endif // defined(ONEDAL_VERSION) && ONEDAL_VERSION >= 20230200 - init_kmeans(m); - init_kmeans_common(m); - init_neighbors(m); - init_svm(m); - #if defined(ONEDAL_VERSION) && ONEDAL_VERSION >= 20240001 - init_logistic_regression(m); - #endif // defined(ONEDAL_VERSION) && ONEDAL_VERSION >= 20240001 - #if defined(ONEDAL_VERSION) && ONEDAL_VERSION >= 20240700 - init_finiteness_checker(m); - #endif // defined(ONEDAL_VERSION) && ONEDAL_VERSION >= 20240700 - } + init_covariance(m); + init_dbscan(m); + init_decomposition(m); + init_ensemble(m); +#if defined(ONEDAL_VERSION) && ONEDAL_VERSION >= 20230100 + init_basic_statistics(m); + init_linear_model(m); +#endif // defined(ONEDAL_VERSION) && ONEDAL_VERSION >= 20230100 +#if defined(ONEDAL_VERSION) && ONEDAL_VERSION >= 20230200 + init_kmeans_init(m); +#endif // defined(ONEDAL_VERSION) && ONEDAL_VERSION >= 20230200 + init_kmeans(m); + init_kmeans_common(m); + init_neighbors(m); + init_svm(m); +#if defined(ONEDAL_VERSION) && ONEDAL_VERSION >= 20240001 + init_logistic_regression(m); +#endif // defined(ONEDAL_VERSION) && ONEDAL_VERSION >= 20240001 +#if defined(ONEDAL_VERSION) && ONEDAL_VERSION >= 20240700 + init_finiteness_checker(m); +#endif // defined(ONEDAL_VERSION) && ONEDAL_VERSION >= 20240700 +} #endif // ONEDAL_DATA_PARALLEL_SPMD } // namespace oneapi::dal::python diff --git a/onedal/datatypes/data_conversion.cpp b/onedal/datatypes/data_conversion.cpp index 5bc2d86275..569ad277c8 100644 --- a/onedal/datatypes/data_conversion.cpp +++ b/onedal/datatypes/data_conversion.cpp @@ -27,9 +27,9 @@ #include "onedal/version.hpp" #if ONEDAL_VERSION <= 20230100 - #include "oneapi/dal/table/detail/csr.hpp" +#include "oneapi/dal/table/detail/csr.hpp" #else - #include "oneapi/dal/table/csr.hpp" +#include "oneapi/dal/table/csr.hpp" #endif namespace oneapi::dal::python { @@ -41,13 +41,13 @@ typedef oneapi::dal::csr_table csr_table_t; #endif template -static dal::array transfer_to_host(const dal::array& array) { - #ifdef ONEDAL_DATA_PARALLEL +static dal::array transfer_to_host(const dal::array &array) { +#ifdef ONEDAL_DATA_PARALLEL auto opt_queue = array.get_queue(); if (opt_queue.has_value()) { auto device = opt_queue->get_device(); if (!device.is_cpu()) { - const auto* device_data = array.get_data(); + const auto *device_data = array.get_data(); auto memory_kind = sycl::get_pointer_type(device_data, opt_queue->get_context()); if (memory_kind == sycl::usm::alloc::unknown) { @@ -61,7 +61,7 @@ static dal::array transfer_to_host(const dal::array& array) { } } } - #endif +#endif return array; } @@ -73,23 +73,26 @@ inline dal::homogen_table convert_to_homogen_impl(PyArrayObject *np_data) { if (array_numdims(np_data) > 2) { throw std::runtime_error("Input array has wrong dimensionality (must be 2d)."); } - T* const data_pointer = reinterpret_cast(array_data(np_data)); + T *const data_pointer = reinterpret_cast(array_data(np_data)); // TODO: check safe cast from int to std::int64_t const std::int64_t row_count = static_cast(array_size(np_data, 0)); if (array_numdims(np_data) == 2) { // TODO: check safe cast from int to std::int64_t column_count = static_cast(array_size(np_data, 1)); } - // If both array_is_behaved_C(np_data) and array_is_behaved_F(np_data) are true + // If both array_is_behaved_C(np_data) and array_is_behaved_F(np_data) are true // (for example, if the array has only one column), then row-major layout will be chosen // which is default on oneDAL side. const auto layout = array_is_behaved_C(np_data) ? dal::data_layout::row_major : dal::data_layout::column_major; - auto res_table = dal::homogen_table(data_pointer, - row_count, - column_count, - [np_data](const T* data) { Py_DECREF(np_data); }, - layout); + auto res_table = dal::homogen_table( + data_pointer, + row_count, + column_count, + [np_data](const T *data) { + Py_DECREF(np_data); + }, + layout); // we need to increment the ref-count as we use the input array in-place Py_INCREF(np_data); @@ -97,9 +100,9 @@ inline dal::homogen_table convert_to_homogen_impl(PyArrayObject *np_data) { } template -inline csr_table_t convert_to_csr_impl(PyObject* py_data, - PyObject* py_column_indices, - PyObject* py_row_indices, +inline csr_table_t convert_to_csr_impl(PyObject *py_data, + PyObject *py_column_indices, + PyObject *py_row_indices, std::int64_t row_count, std::int64_t column_count) { PyArrayObject *np_data = reinterpret_cast(py_data); @@ -130,18 +133,19 @@ inline csr_table_t convert_to_csr_impl(PyObject* py_data, const T *data_pointer = static_cast(array_data(np_data)); const std::int64_t data_count = static_cast(array_size(np_data, 0)); - auto res_table = csr_table_t(dal::array(data_pointer, - data_count, - [np_data](const T*) { - Py_DECREF(np_data); - }), - column_indices_one_based, - row_indices_one_based, + auto res_table = csr_table_t( + dal::array(data_pointer, + data_count, + [np_data](const T *) { + Py_DECREF(np_data); + }), + column_indices_one_based, + row_indices_one_based, #if ONEDAL_VERSION <= 20230100 -// row_count parameter is present in csr_table's constructor only in older versions of oneDAL - row_count, + // row_count parameter is present in csr_table's constructor only in older versions of oneDAL + row_count, #endif - column_count); + column_count); // we need to increment the ref-count as we use the input array in-place Py_INCREF(np_data); @@ -150,24 +154,26 @@ inline csr_table_t convert_to_csr_impl(PyObject* py_data, dal::table convert_to_table(py::object inp_obj, py::object queue) { dal::table res; - #ifdef ONEDAL_DATA_PARALLEL - if (!queue.is(py::none()) && !queue.attr("sycl_device").attr("has_aspect_fp64").cast()){ +#ifdef ONEDAL_DATA_PARALLEL + if (!queue.is(py::none()) && !queue.attr("sycl_device").attr("has_aspect_fp64").cast()) { // If the queue exists, doesn't have the fp64 aspect, and the data is float64 // then cast it to float32 int type = reinterpret_cast(inp_obj.attr("dtype").ptr())->type_num; - if(type == NPY_DOUBLE || type == NPY_DOUBLELTR){ - PyErr_WarnEx(PyExc_RuntimeWarning, - "Data will be converted into float32 from float64 because device does not support it", - 1); + if (type == NPY_DOUBLE || type == NPY_DOUBLELTR) { + PyErr_WarnEx( + PyExc_RuntimeWarning, + "Data will be converted into float32 from float64 because device does not support it", + 1); // use astype instead of PyArray_Cast in order to support scipy sparse inputs inp_obj = inp_obj.attr("astype")(py::dtype::of()); - res = convert_to_table(inp_obj); // queue will be set to none, as this check is no longer necessary + res = convert_to_table( + inp_obj); // queue will be set to none, as this check is no longer necessary return res; } } - #endif // ONEDAL_DATA_PARALLEL +#endif // ONEDAL_DATA_PARALLEL - PyObject* obj = inp_obj.ptr(); + PyObject *obj = inp_obj.ptr(); if (obj == nullptr || obj == Py_None) { return res; @@ -183,10 +189,10 @@ dal::table convert_to_table(py::object inp_obj, py::object queue) { res = convert_to_table(py::cast(obj), queue); Py_DECREF(obj); return res; - } + } else { throw std::invalid_argument( - "[convert_to_table] Numpy input could not be converted into onedal table."); + "[convert_to_table] Numpy input could not be converted into onedal table."); } } #define MAKE_HOMOGEN_TABLE(CType) res = convert_to_homogen_impl(ary); @@ -196,7 +202,8 @@ dal::table convert_to_table(py::object inp_obj, py::object queue) { throw std::invalid_argument("Found unsupported array type")); #undef MAKE_HOMOGEN_TABLE } - else if (strcmp(Py_TYPE(obj)->tp_name, "csr_matrix") == 0 || strcmp(Py_TYPE(obj)->tp_name, "csr_array") == 0) { + else if (strcmp(Py_TYPE(obj)->tp_name, "csr_matrix") == 0 || + strcmp(Py_TYPE(obj)->tp_name, "csr_array") == 0) { PyObject *py_data = PyObject_GetAttrString(obj, "data"); PyObject *py_column_indices = PyObject_GetAttrString(obj, "indices"); PyObject *py_row_indices = PyObject_GetAttrString(obj, "indptr"); @@ -270,12 +277,9 @@ static PyObject *convert_to_numpy_impl(const dal::array &array, npy_intp dims[2] = { static_cast(row_count), static_cast(column_count) }; auto host_array = transfer_to_host(array); host_array.need_mutable_data(); - auto* bytes = host_array.get_mutable_data(); + auto *bytes = host_array.get_mutable_data(); - PyObject *obj = PyArray_SimpleNewFromData(size_dims, - dims, - NpType, - static_cast(bytes)); + PyObject *obj = PyArray_SimpleNewFromData(size_dims, dims, NpType, static_cast(bytes)); if (!obj) throw std::invalid_argument("Conversion to numpy array failed"); @@ -290,39 +294,39 @@ static PyObject *convert_to_numpy_impl(const dal::array &array, // dal::detail::csr_table class is valid // only one-based indeices are supported template -static PyObject* convert_to_py_from_csr_impl(const detail::csr_table& table) { - PyObject* result = PyTuple_New(3); +static PyObject *convert_to_py_from_csr_impl(const detail::csr_table &table) { + PyObject *result = PyTuple_New(3); const std::int64_t rows_indices_count = table.get_row_count() + 1; - const std::int64_t* row_indices_one_based = table.get_row_indices(); - std::uint64_t* row_indices_zero_based_data = + const std::int64_t *row_indices_one_based = table.get_row_indices(); + std::uint64_t *row_indices_zero_based_data = detail::host_allocator().allocate(rows_indices_count); for (std::int64_t i = 0; i < rows_indices_count; ++i) row_indices_zero_based_data[i] = row_indices_one_based[i] - 1; auto row_indices_zero_based_array = dal::array::wrap(row_indices_zero_based_data, rows_indices_count); - PyObject* py_row = + PyObject *py_row = convert_to_numpy_impl(row_indices_zero_based_array, rows_indices_count); PyTuple_SetItem(result, 2, py_row); const std::int64_t non_zero_count = row_indices_zero_based_data[rows_indices_count - 1]; - const T* data = reinterpret_cast(table.get_data()); + const T *data = reinterpret_cast(table.get_data()); auto data_array = dal::array::wrap(data, non_zero_count); - PyObject* py_data = convert_to_numpy_impl(data_array, non_zero_count); + PyObject *py_data = convert_to_numpy_impl(data_array, non_zero_count); PyTuple_SetItem(result, 0, py_data); - const std::int64_t* column_indices_one_based = table.get_column_indices(); - std::uint64_t* column_indices_zero_based_data = + const std::int64_t *column_indices_one_based = table.get_column_indices(); + std::uint64_t *column_indices_zero_based_data = detail::host_allocator().allocate(non_zero_count); for (std::int64_t i = 0; i < non_zero_count; ++i) column_indices_zero_based_data[i] = column_indices_one_based[i] - 1; auto column_indices_zero_based_array = dal::array::wrap(column_indices_zero_based_data, non_zero_count); - PyObject* py_col = + PyObject *py_col = convert_to_numpy_impl(column_indices_zero_based_array, non_zero_count); PyTuple_SetItem(result, 1, py_col); @@ -334,21 +338,21 @@ static PyObject* convert_to_py_from_csr_impl(const detail::csr_table& table) { // dal::csr_table class is valid // zero- and one-based indeices are supported template -static PyObject* convert_to_py_from_csr_impl(const csr_table& table) { - PyObject* result = PyTuple_New(3); +static PyObject *convert_to_py_from_csr_impl(const csr_table &table) { + PyObject *result = PyTuple_New(3); const std::int64_t rows_indices_count = table.get_row_count() + 1; const std::int64_t non_zero_count = table.get_non_zero_count(); - const std::int64_t* row_offsets = table.get_row_offsets(); - const std::int64_t* column_indices = table.get_column_indices(); + const std::int64_t *row_offsets = table.get_row_offsets(); + const std::int64_t *column_indices = table.get_column_indices(); - std::uint64_t* column_indices_zero_based_data = nullptr; - std::uint64_t* row_offsets_zero_based_data = nullptr; + std::uint64_t *column_indices_zero_based_data = nullptr; + std::uint64_t *row_offsets_zero_based_data = nullptr; if (table.get_indexing() == sparse_indexing::zero_based) { column_indices_zero_based_data = - const_cast(reinterpret_cast(column_indices)); + const_cast(reinterpret_cast(column_indices)); row_offsets_zero_based_data = - const_cast(reinterpret_cast(row_offsets)); + const_cast(reinterpret_cast(row_offsets)); } else { // table.get_indexing() == sparse_indexing::one_based column_indices_zero_based_data = @@ -363,21 +367,21 @@ static PyObject* convert_to_py_from_csr_impl(const csr_table& table) { row_offsets_zero_based_data[i] = row_offsets[i] - 1; } - const T* data = table.get_data(); + const T *data = table.get_data(); auto data_array = dal::array::wrap(data, non_zero_count); - PyObject* py_data = convert_to_numpy_impl(data_array, non_zero_count); + PyObject *py_data = convert_to_numpy_impl(data_array, non_zero_count); PyTuple_SetItem(result, 0, py_data); auto column_indices_zero_based_array = dal::array::wrap(column_indices_zero_based_data, non_zero_count); - PyObject* py_col = + PyObject *py_col = convert_to_numpy_impl(column_indices_zero_based_array, non_zero_count); PyTuple_SetItem(result, 1, py_col); auto row_indices_zero_based_array = dal::array::wrap(row_offsets_zero_based_data, rows_indices_count); - PyObject* py_row = + PyObject *py_row = convert_to_numpy_impl(row_indices_zero_based_array, rows_indices_count); PyTuple_SetItem(result, 2, py_row); diff --git a/onedal/datatypes/data_conversion.hpp b/onedal/datatypes/data_conversion.hpp index cfd7745bbe..920e99d013 100644 --- a/onedal/datatypes/data_conversion.hpp +++ b/onedal/datatypes/data_conversion.hpp @@ -33,4 +33,3 @@ PyObject *convert_to_pyobject(const dal::table &input); dal::table convert_to_table(py::object inp_obj, py::object queue = py::none()); } // namespace oneapi::dal::python - diff --git a/onedal/datatypes/data_conversion_sua_iface.cpp b/onedal/datatypes/data_conversion_sua_iface.cpp index 673bdb6c63..303b9c9629 100644 --- a/onedal/datatypes/data_conversion_sua_iface.cpp +++ b/onedal/datatypes/data_conversion_sua_iface.cpp @@ -44,7 +44,7 @@ using namespace pybind11::literals; template dal::table convert_to_homogen_impl(py::object obj) { dal::table res{}; - + // Get `__sycl_usm_array_interface__` dictionary representing USM allocations. auto sua_iface_dict = get_sua_interface(obj); @@ -67,14 +67,14 @@ dal::table convert_to_homogen_impl(py::object obj) { // Get oneDAL Homogen DataLayout enumeration from input object shape and strides. const auto layout = get_sua_iface_layout(sua_iface_dict, r_count, c_count); - if (layout == dal::data_layout::unknown){ + if (layout == dal::data_layout::unknown) { // NOTE: this will make a C-contiguous deep copy of the data // if possible, this is expected to be a special case py::object copy; - if (py::hasattr(obj, "copy")){ + if (py::hasattr(obj, "copy")) { copy = obj.attr("copy")(); } - else if (py::hasattr(obj, "__array_namespace__")){ + else if (py::hasattr(obj, "__array_namespace__")) { const auto space = obj.attr("__array_namespace__")(); copy = space.attr("asarray")(obj, "copy"_a = true); } diff --git a/onedal/datatypes/table.cpp b/onedal/datatypes/table.cpp index 4b9fc71307..ed8a931522 100644 --- a/onedal/datatypes/table.cpp +++ b/onedal/datatypes/table.cpp @@ -27,9 +27,9 @@ #include "onedal/version.hpp" #if ONEDAL_VERSION <= 20230100 - #include "oneapi/dal/table/detail/csr.hpp" +#include "oneapi/dal/table/detail/csr.hpp" #else - #include "oneapi/dal/table/csr.hpp" +#include "oneapi/dal/table/csr.hpp" #endif namespace py = pybind11; @@ -72,7 +72,7 @@ ONEDAL_PY_INIT_MODULE(table) { const auto column_count = t.get_column_count(); return py::make_tuple(row_count, column_count); }); - table_obj.def_property_readonly("dtype", [](const table& t){ + table_obj.def_property_readonly("dtype", [](const table& t) { // returns a numpy dtype, even if source was not from numpy return py::dtype(convert_dal_to_npy_type(t.get_metadata().get_data_type(0))); }); @@ -82,11 +82,11 @@ ONEDAL_PY_INIT_MODULE(table) { #endif // ONEDAL_DATA_PARALLEL m.def("to_table", [](py::object obj, py::object queue) { - #ifdef ONEDAL_DATA_PARALLEL +#ifdef ONEDAL_DATA_PARALLEL if (py::hasattr(obj, "__sycl_usm_array_interface__")) { return convert_from_sua_iface(obj); } - #endif // ONEDAL_DATA_PARALLEL +#endif // ONEDAL_DATA_PARALLEL return convert_to_table(obj, queue); }); diff --git a/onedal/datatypes/table_metadata.cpp b/onedal/datatypes/table_metadata.cpp index 2ddd32570c..d83baa7cf3 100644 --- a/onedal/datatypes/table_metadata.cpp +++ b/onedal/datatypes/table_metadata.cpp @@ -28,16 +28,19 @@ ONEDAL_PY_INIT_MODULE(table_metadata) { py::class_ table_metadata_obj(m, "table_metadata"); table_metadata_obj.def(py::init()); table_metadata_obj.def_property_readonly("feature_count", // - &table_metadata::get_feature_count); - - table_metadata_obj.def("get_raw_dtype", [](const table_metadata* const ptr, std::int64_t feature) { - return static_cast(ptr->get_data_type(feature)); - }); - - table_metadata_obj.def("get_npy_dtype", [](const table_metadata* const ptr, std::int64_t feature) { - const auto npy_type = convert_dal_to_npy_type(ptr->get_data_type(feature)); - return py::dtype(npy_type); - }); + &table_metadata::get_feature_count); + + table_metadata_obj.def("get_raw_dtype", + [](const table_metadata* const ptr, std::int64_t feature) { + return static_cast(ptr->get_data_type(feature)); + }); + + table_metadata_obj.def("get_npy_dtype", + [](const table_metadata* const ptr, std::int64_t feature) { + const auto npy_type = + convert_dal_to_npy_type(ptr->get_data_type(feature)); + return py::dtype(npy_type); + }); m.def("get_table_metadata", [](const dal::table& t) { return t.get_metadata(); diff --git a/onedal/datatypes/utils/numpy_helpers.cpp b/onedal/datatypes/utils/numpy_helpers.cpp index 4fb774a6c6..bce5ee453d 100644 --- a/onedal/datatypes/utils/numpy_helpers.cpp +++ b/onedal/datatypes/utils/numpy_helpers.cpp @@ -29,10 +29,10 @@ auto reverse_map(const std::map& input) { const npy_to_dal_t& get_npy_to_dal_map() { static npy_to_dal_t body = { - {NPY_INT32, dal::data_type::int32}, - {NPY_INT64, dal::data_type::int64}, - {NPY_FLOAT32, dal::data_type::float32}, - {NPY_FLOAT64, dal::data_type::float64}, + { NPY_INT32, dal::data_type::int32 }, + { NPY_INT64, dal::data_type::int64 }, + { NPY_FLOAT32, dal::data_type::float32 }, + { NPY_FLOAT64, dal::data_type::float64 }, }; return body; } @@ -50,5 +50,4 @@ npy_dtype_t convert_dal_to_npy_type(dal::data_type type) { return get_dal_to_npy_map().at(type); } - } // namespace oneapi::dal::python diff --git a/onedal/datatypes/utils/numpy_helpers.hpp b/onedal/datatypes/utils/numpy_helpers.hpp index 04ebeb13cd..461498d1b2 100644 --- a/onedal/datatypes/utils/numpy_helpers.hpp +++ b/onedal/datatypes/utils/numpy_helpers.hpp @@ -66,58 +66,70 @@ }; #define SET_NPY_FEATURE(_T, _S, _FUNCT, _EXCEPTION) \ - switch (_T) { \ - case NPY_FLOAT: \ - case NPY_CFLOAT: \ - case NPY_FLOATLTR: \ - case NPY_CFLOATLTR: { \ - _FUNCT(float); \ - break; \ - } \ - case NPY_DOUBLE: \ - case NPY_CDOUBLE: \ - case NPY_DOUBLELTR: \ - case NPY_CDOUBLELTR: { \ - _FUNCT(double); \ - break; \ - } \ - case NPY_INTLTR: \ - case NPY_INT32: { \ - _FUNCT(std::int32_t); \ - break; \ - } \ - case NPY_UINTLTR: \ - case NPY_UINT32: { \ - _FUNCT(std::uint32_t); \ - break; \ - } \ - case NPY_LONGLONGLTR: \ - case NPY_INT64: { \ - _FUNCT(std::int64_t); \ - break; \ - } \ - case NPY_ULONGLONGLTR: \ - case NPY_UINT64: { \ - _FUNCT(std::uint64_t); \ - break; \ - } \ - case NPY_LONGLTR: {\ - if (_S == 4) {_FUNCT(std::int32_t);} \ - else if (_S == 8) {_FUNCT(std::int64_t);} \ - else {_EXCEPTION;} \ - break; \ - } \ - case NPY_ULONGLTR: {\ - if (_S == 4) {_FUNCT(std::uint32_t);} \ - else if (_S == 8) {_FUNCT(std::uint64_t);} \ - else {_EXCEPTION;} \ - break; \ - }\ - default: _EXCEPTION; \ + switch (_T) { \ + case NPY_FLOAT: \ + case NPY_CFLOAT: \ + case NPY_FLOATLTR: \ + case NPY_CFLOATLTR: { \ + _FUNCT(float); \ + break; \ + } \ + case NPY_DOUBLE: \ + case NPY_CDOUBLE: \ + case NPY_DOUBLELTR: \ + case NPY_CDOUBLELTR: { \ + _FUNCT(double); \ + break; \ + } \ + case NPY_INTLTR: \ + case NPY_INT32: { \ + _FUNCT(std::int32_t); \ + break; \ + } \ + case NPY_UINTLTR: \ + case NPY_UINT32: { \ + _FUNCT(std::uint32_t); \ + break; \ + } \ + case NPY_LONGLONGLTR: \ + case NPY_INT64: { \ + _FUNCT(std::int64_t); \ + break; \ + } \ + case NPY_ULONGLONGLTR: \ + case NPY_UINT64: { \ + _FUNCT(std::uint64_t); \ + break; \ + } \ + case NPY_LONGLTR: { \ + if (_S == 4) { \ + _FUNCT(std::int32_t); \ + } \ + else if (_S == 8) { \ + _FUNCT(std::int64_t); \ + } \ + else { \ + _EXCEPTION; \ + } \ + break; \ + } \ + case NPY_ULONGLTR: { \ + if (_S == 4) { \ + _FUNCT(std::uint32_t); \ + } \ + else if (_S == 8) { \ + _FUNCT(std::uint64_t); \ + } \ + else { \ + _EXCEPTION; \ + } \ + break; \ + } \ + default: _EXCEPTION; \ }; -#define is_array(a) ((a) && PyArray_Check(a)) -#define array_type(a) PyArray_TYPE((PyArrayObject *)a) +#define is_array(a) ((a) && PyArray_Check(a)) +#define array_type(a) PyArray_TYPE((PyArrayObject *)a) #define array_type_sizeof(a) PyArray_ITEMSIZE((PyArrayObject *)a) #define array_is_behaved_C(a) \ (PyArray_ISCARRAY_RO((PyArrayObject *)a) && array_type(a) < NPY_OBJECT) @@ -134,8 +146,8 @@ using npy_dtype_t = decltype(NPY_FLOAT); using npy_to_dal_t = std::map; using dal_to_npy_t = std::map; -const npy_to_dal_t& get_npy_to_dal_map(); -const dal_to_npy_t& get_dal_to_npy_map(); +const npy_to_dal_t &get_npy_to_dal_map(); +const dal_to_npy_t &get_dal_to_npy_map(); dal::data_type convert_npy_to_dal_type(npy_dtype_t); npy_dtype_t convert_dal_to_npy_type(dal::data_type); diff --git a/onedal/decomposition/pca.cpp b/onedal/decomposition/pca.cpp index be4e55290a..929824ce9b 100644 --- a/onedal/decomposition/pca.cpp +++ b/onedal/decomposition/pca.cpp @@ -109,7 +109,8 @@ void init_train_result(py::module_& m) { .def_property_readonly("eigenvalues", &result_t::get_eigenvalues) #if defined(ONEDAL_VERSION) && ONEDAL_VERSION >= 20240100 .def_property_readonly("singular_values", &result_t::get_singular_values) - .def_property_readonly("explained_variances_ratio", &result_t::get_explained_variances_ratio) + .def_property_readonly("explained_variances_ratio", + &result_t::get_explained_variances_ratio) #endif // defined(ONEDAL_VERSION) && ONEDAL_VERSION>=20240100 .def_property_readonly("means", &result_t::get_means) .def_property_readonly("variances", &result_t::get_variances); @@ -126,7 +127,7 @@ void init_partial_train_result(py::module_& m) { .DEF_ONEDAL_PY_PROPERTY(partial_crossproduct, result_t) .DEF_ONEDAL_PY_PROPERTY(partial_sum, result_t) .DEF_ONEDAL_PY_PROPERTY(auxiliary_table, result_t) - .def_property_readonly("auxiliary_table_count", &result_t::get_auxiliary_table_count) + .def_property_readonly("auxiliary_table_count", &result_t::get_auxiliary_table_count) .def(py::pickle( [](const result_t& res) { py::list auxiliary; @@ -139,23 +140,24 @@ void init_partial_train_result(py::module_& m) { py::cast(convert_to_pyobject(res.get_partial_n_rows())), py::cast(convert_to_pyobject(res.get_partial_crossproduct())), py::cast(convert_to_pyobject(res.get_partial_sum())), - auxiliary - ); + auxiliary); }, [](py::tuple t) { if (t.size() != 4) throw std::runtime_error("Invalid state!"); result_t res; - if (py::cast(t[0].attr("size")) != 0) res.set_partial_n_rows(convert_to_table(t[0])); - if (py::cast(t[1].attr("size")) != 0) res.set_partial_crossproduct(convert_to_table(t[1])); - if (py::cast(t[2].attr("size")) != 0) res.set_partial_sum(convert_to_table(t[2])); + if (py::cast(t[0].attr("size")) != 0) + res.set_partial_n_rows(convert_to_table(t[0])); + if (py::cast(t[1].attr("size")) != 0) + res.set_partial_crossproduct(convert_to_table(t[1])); + if (py::cast(t[2].attr("size")) != 0) + res.set_partial_sum(convert_to_table(t[2])); py::list aux_list = t[3].cast(); for (int i = 0; i < aux_list.size(); i++) { res.set_auxiliary_table(convert_to_table(aux_list[i])); } return res; - } - )); + })); } template @@ -182,27 +184,25 @@ void init_train_ops(py::module& m) { template void init_partial_train_ops(py::module& m) { using prev_result_t = dal::pca::partial_train_result; - m.def("partial_train", []( - const Policy& policy, - const py::dict& params, - const prev_result_t& prev, - const table& data) { - using namespace dal::pca; - using input_t = partial_train_input; - partial_train_ops ops(policy, input_t{ prev, data }, params2desc{}); - return fptype2t{ incrementalmethod2t{ Task{}, ops } }(params); - } - ); + m.def("partial_train", + [](const Policy& policy, + const py::dict& params, + const prev_result_t& prev, + const table& data) { + using namespace dal::pca; + using input_t = partial_train_input; + partial_train_ops ops(policy, input_t{ prev, data }, params2desc{}); + return fptype2t{ incrementalmethod2t{ Task{}, ops } }(params); + }); }; template void init_finalize_train_ops(py::module& m) { using input_t = dal::pca::partial_train_result; m.def("finalize_train", [](const Policy& policy, const py::dict& params, const input_t& data) { - finalize_train_ops ops(policy, data, params2desc{}); - return fptype2t{ incrementalmethod2t{ Task{}, ops } }(params); - } - ); + finalize_train_ops ops(policy, data, params2desc{}); + return fptype2t{ incrementalmethod2t{ Task{}, ops } }(params); + }); }; template @@ -237,19 +237,19 @@ ONEDAL_PY_INIT_MODULE(decomposition) { using task_list = types; auto sub = m.def_submodule("decomposition"); - #ifdef ONEDAL_DATA_PARALLEL_SPMD - ONEDAL_PY_INSTANTIATE(init_train_ops, sub, policy_spmd, task_list); - ONEDAL_PY_INSTANTIATE(init_finalize_train_ops, sub, policy_spmd, task_list); - #else - ONEDAL_PY_INSTANTIATE(init_train_ops, sub, policy_list, task_list); - ONEDAL_PY_INSTANTIATE(init_infer_ops, sub, policy_list, task_list); - ONEDAL_PY_INSTANTIATE(init_model, sub, task_list); - ONEDAL_PY_INSTANTIATE(init_train_result, sub, task_list); - ONEDAL_PY_INSTANTIATE(init_partial_train_result, sub, task_list); - ONEDAL_PY_INSTANTIATE(init_infer_result, sub, task_list); - ONEDAL_PY_INSTANTIATE(init_partial_train_ops, sub, policy_list, task_list); - ONEDAL_PY_INSTANTIATE(init_finalize_train_ops, sub, policy_list, task_list); - #endif +#ifdef ONEDAL_DATA_PARALLEL_SPMD + ONEDAL_PY_INSTANTIATE(init_train_ops, sub, policy_spmd, task_list); + ONEDAL_PY_INSTANTIATE(init_finalize_train_ops, sub, policy_spmd, task_list); +#else + ONEDAL_PY_INSTANTIATE(init_train_ops, sub, policy_list, task_list); + ONEDAL_PY_INSTANTIATE(init_infer_ops, sub, policy_list, task_list); + ONEDAL_PY_INSTANTIATE(init_model, sub, task_list); + ONEDAL_PY_INSTANTIATE(init_train_result, sub, task_list); + ONEDAL_PY_INSTANTIATE(init_partial_train_result, sub, task_list); + ONEDAL_PY_INSTANTIATE(init_infer_result, sub, task_list); + ONEDAL_PY_INSTANTIATE(init_partial_train_ops, sub, policy_list, task_list); + ONEDAL_PY_INSTANTIATE(init_finalize_train_ops, sub, policy_list, task_list); +#endif } ONEDAL_PY_TYPE2STR(dal::pca::task::dim_reduction, "dim_reduction"); diff --git a/onedal/linear_model/linear_model.cpp b/onedal/linear_model/linear_model.cpp index 8376e652a7..54e0972bae 100644 --- a/onedal/linear_model/linear_model.cpp +++ b/onedal/linear_model/linear_model.cpp @@ -89,10 +89,11 @@ struct params2desc { #if defined(ONEDAL_VERSION) && ONEDAL_VERSION >= 20240600 const auto alpha = params["alpha"].cast(); auto desc = linear_regression::descriptor(intercept, alpha) - .set_result_options(get_onedal_result_options(params)); + .set_result_options(get_onedal_result_options(params)); #else - auto desc = linear_regression::descriptor(intercept) - .set_result_options(get_onedal_result_options(params)); + auto desc = + linear_regression::descriptor(intercept).set_result_options( + get_onedal_result_options(params)); #endif // defined(ONEDAL_VERSION) && ONEDAL_VERSION >= 20240600 return desc; } @@ -102,65 +103,65 @@ template void init_train_ops(py::module& m) { #if defined(ONEDAL_VERSION) && ONEDAL_VERSION >= 20240000 using train_hyperparams_t = dal::linear_regression::detail::train_parameters; - m.def("train", []( - const Policy& policy, - const py::dict& params, - const train_hyperparams_t& hyperparams, - const table& data, - const table& responses) { - using namespace dal::linear_regression; - using input_t = train_input; - train_ops_with_hyperparams ops( - policy, input_t{ data, responses }, params2desc{}, hyperparams); - return fptype2t{ method2t{ Task{}, ops } }(params); - } - ); + m.def("train", + [](const Policy& policy, + const py::dict& params, + const train_hyperparams_t& hyperparams, + const table& data, + const table& responses) { + using namespace dal::linear_regression; + using input_t = train_input; + train_ops_with_hyperparams ops(policy, + input_t{ data, responses }, + params2desc{}, + hyperparams); + return fptype2t{ method2t{ Task{}, ops } }(params); + }); #endif // defined(ONEDAL_VERSION) && ONEDAL_VERSION >= 20240000 - m.def("train", []( - const Policy& policy, - const py::dict& params, - const table& data, - const table& responses) { - using namespace dal::linear_regression; - using input_t = train_input; - train_ops ops(policy, input_t{ data, responses }, params2desc{}); - return fptype2t{ method2t{ Task{}, ops } }(params); - } - ); + m.def("train", + [](const Policy& policy, + const py::dict& params, + const table& data, + const table& responses) { + using namespace dal::linear_regression; + using input_t = train_input; + train_ops ops(policy, input_t{ data, responses }, params2desc{}); + return fptype2t{ method2t{ Task{}, ops } }(params); + }); }; template void init_partial_train_ops(py::module& m) { using prev_result_t = dal::linear_regression::partial_train_result; using train_hyperparams_t = dal::linear_regression::detail::train_parameters; - m.def("partial_train", []( - const Policy& policy, - const py::dict& params, - const train_hyperparams_t& hyperparams, - const prev_result_t& prev, - const table& data, - const table& responses) { - using namespace dal::linear_regression; - using input_t = partial_train_input; - partial_train_ops_with_hyperparams ops( - policy, input_t{ prev, data, responses }, params2desc{}, hyperparams); - return fptype2t{ method2t{ Task{}, ops } }(params); - } - ); + m.def("partial_train", + [](const Policy& policy, + const py::dict& params, + const train_hyperparams_t& hyperparams, + const prev_result_t& prev, + const table& data, + const table& responses) { + using namespace dal::linear_regression; + using input_t = partial_train_input; + partial_train_ops_with_hyperparams ops(policy, + input_t{ prev, data, responses }, + params2desc{}, + hyperparams); + return fptype2t{ method2t{ Task{}, ops } }(params); + }); using prev_result_t = dal::linear_regression::partial_train_result; - m.def("partial_train", []( - const Policy& policy, - const py::dict& params, - const prev_result_t& prev, - const table& data, - const table& responses) { - using namespace dal::linear_regression; - using input_t = partial_train_input; - partial_train_ops ops(policy, input_t{ prev, data, responses }, params2desc{}); - return fptype2t{ method2t{ Task{}, ops } }(params); - } - ); + m.def("partial_train", + [](const Policy& policy, + const py::dict& params, + const prev_result_t& prev, + const table& data, + const table& responses) { + using namespace dal::linear_regression; + using input_t = partial_train_input; + partial_train_ops ops(policy, input_t{ prev, data, responses }, params2desc{}); + return fptype2t{ method2t{ Task{}, ops } }(params); + }); }; template @@ -168,24 +169,19 @@ void init_finalize_train_ops(py::module& m) { using input_t = dal::linear_regression::partial_train_result; using train_hyperparams_t = dal::linear_regression::detail::train_parameters; - m.def("finalize_train", []( - const Policy& policy, - const py::dict& params, - const train_hyperparams_t& hyperparams, - const input_t& data) { - finalize_train_ops_with_hyperparams ops(policy, data, params2desc{}, hyperparams); - return fptype2t{ method2t{ Task{}, ops } }(params); - } - ); - - m.def("finalize_train", []( - const Policy& policy, - const py::dict& params, - const input_t& data) { - finalize_train_ops ops(policy, data, params2desc{}); - return fptype2t{ method2t{ Task{}, ops } }(params); - } - ); + m.def("finalize_train", + [](const Policy& policy, + const py::dict& params, + const train_hyperparams_t& hyperparams, + const input_t& data) { + finalize_train_ops_with_hyperparams ops(policy, data, params2desc{}, hyperparams); + return fptype2t{ method2t{ Task{}, ops } }(params); + }); + + m.def("finalize_train", [](const Policy& policy, const py::dict& params, const input_t& data) { + finalize_train_ops ops(policy, data, params2desc{}); + return fptype2t{ method2t{ Task{}, ops } }(params); + }); }; template @@ -263,17 +259,20 @@ void init_train_hyperparameters(py::module_& m) { auto cls = py::class_(m, "train_hyperparameters") .def(py::init()) - .def("set_cpu_macro_block", [](train_hyperparams_t& self, int64_t cpu_macro_block) { - self.set_cpu_macro_block(cpu_macro_block); - }) - .def("set_gpu_macro_block", [](train_hyperparams_t& self, int64_t gpu_macro_block) { - self.set_gpu_macro_block(gpu_macro_block); - }) - .def("get_cpu_macro_block", [](const train_hyperparams_t& self) { - return self.get_cpu_macro_block(); - }) + .def("set_cpu_macro_block", + [](train_hyperparams_t& self, int64_t cpu_macro_block) { + self.set_cpu_macro_block(cpu_macro_block); + }) + .def("set_gpu_macro_block", + [](train_hyperparams_t& self, int64_t gpu_macro_block) { + self.set_gpu_macro_block(gpu_macro_block); + }) + .def("get_cpu_macro_block", + [](const train_hyperparams_t& self) { + return self.get_cpu_macro_block(); + }) .def("get_gpu_macro_block", [](const train_hyperparams_t& self) { - return self.get_gpu_macro_block(); + return self.get_gpu_macro_block(); }); } diff --git a/onedal/linear_model/logistic_regression.cpp b/onedal/linear_model/logistic_regression.cpp index e426d3fec9..18121be4d9 100644 --- a/onedal/linear_model/logistic_regression.cpp +++ b/onedal/linear_model/logistic_regression.cpp @@ -72,7 +72,6 @@ struct optimizer2t { Ops ops; }; - auto get_onedal_result_options(const py::dict& params) { using namespace dal::logistic_regression; @@ -102,7 +101,7 @@ auto get_onedal_result_options(const py::dict& params) { else if (match.str() == "inner_iterations_count") { onedal_options = onedal_options | result_options::inner_iterations_count; } -#endif +#endif else { ONEDAL_PARAM_DISPATCH_THROW_INVALID_VALUE(result_option); } @@ -124,9 +123,8 @@ struct descriptor_creator { static auto get(bool intercept, double C) { - return dal::logistic_regression::descriptor(intercept, C); + return dal::logistic_regression:: + descriptor(intercept, C); } }; @@ -138,9 +136,9 @@ struct params2desc { const auto intercept = params["intercept"].cast(); const auto C = params["C"].cast(); - auto desc = descriptor_creator::get(intercept, C).set_result_options( - get_onedal_result_options(params)); - + auto desc = descriptor_creator::get(intercept, C) + .set_result_options(get_onedal_result_options(params)); + desc.set_optimizer(get_optimizer_descriptor(params)); return desc; @@ -163,7 +161,7 @@ struct init_train_ops_dispatcher; train_ops ops(policy, input_t{ data, responses }, params2desc{}); - return fptype2t{ method2t{ Task{}, optimizer2t{ops} } }(params); + return fptype2t{ method2t{ Task{}, optimizer2t{ ops } } }(params); }); } }; @@ -184,7 +182,7 @@ void init_infer_ops(py::module_& m) { using input_t = infer_input; infer_ops ops(policy, input_t{ data, model }, params2desc{}); - return fptype2t{ method2t{ Task{}, optimizer2t{ops} } }(params); + return fptype2t{ method2t{ Task{}, optimizer2t{ ops } } }(params); }); } @@ -240,10 +238,10 @@ ONEDAL_PY_DECLARE_INSTANTIATOR(init_infer_result); ONEDAL_PY_DECLARE_INSTANTIATOR(init_train_ops); ONEDAL_PY_DECLARE_INSTANTIATOR(init_infer_ops); -} // namespace linear_model - } // namespace logistic_regression +} // namespace linear_model + ONEDAL_PY_INIT_MODULE(logistic_regression) { using namespace dal::detail; using namespace linear_model::logistic_regression; @@ -252,12 +250,11 @@ ONEDAL_PY_INIT_MODULE(logistic_regression) { using task_list = types; auto sub = m.def_submodule("logistic_regression"); - #if defined(ONEDAL_DATA_PARALLEL_SPMD) - #if defined(ONEDAL_VERSION) && ONEDAL_VERSION >= 20240100 - ONEDAL_PY_INSTANTIATE(init_train_ops, sub, policy_spmd, task_list); - ONEDAL_PY_INSTANTIATE(init_infer_ops, sub, policy_spmd, task_list); - #endif // defined(ONEDAL_VERSION) && ONEDAL_VERSION >= 20240100 +#if defined(ONEDAL_VERSION) && ONEDAL_VERSION >= 20240100 + ONEDAL_PY_INSTANTIATE(init_train_ops, sub, policy_spmd, task_list); + ONEDAL_PY_INSTANTIATE(init_infer_ops, sub, policy_spmd, task_list); +#endif // defined(ONEDAL_VERSION) && ONEDAL_VERSION >= 20240100 #else // ONEDAL_DATA_PARALLEL_SPMD ONEDAL_PY_INSTANTIATE(init_train_ops, sub, policy_list, task_list); ONEDAL_PY_INSTANTIATE(init_infer_ops, sub, policy_list, task_list); diff --git a/onedal/linear_model/tests/test_incremental_linear_regression.py b/onedal/linear_model/tests/test_incremental_linear_regression.py index 3335542106..b707ceeada 100644 --- a/onedal/linear_model/tests/test_incremental_linear_regression.py +++ b/onedal/linear_model/tests/test_incremental_linear_regression.py @@ -92,7 +92,8 @@ def test_full_results(queue, num_blocks, dtype): tol = 5e-3 if model.coef_.dtype == np.float32 else 1e-5 else: tol = 3e-3 if model.coef_.dtype == np.float32 else 1e-5 - assert_allclose(coef, model.coef_.T, rtol=tol) + atol = 1e-4 if model.coef_.dtype == np.float32 else 1e-6 + assert_allclose(coef, model.coef_.T, rtol=tol, atol=atol) tol = 3e-3 if model.intercept_.dtype == np.float32 else 1e-5 assert_allclose(intercept, model.intercept_, rtol=tol) diff --git a/onedal/primitives/kernel_functions.hpp b/onedal/primitives/kernel_functions.hpp index 7a10061a1c..4157ec12a3 100644 --- a/onedal/primitives/kernel_functions.hpp +++ b/onedal/primitives/kernel_functions.hpp @@ -50,8 +50,7 @@ inline auto get_kernel_descriptor(const pybind11::dict& params) { kernel.set_sigma(params["sigma"].cast()); } if constexpr (std::is_same_v) { - kernel.set_scale(params["scale"].cast()) - .set_shift(params["shift"].cast()); + kernel.set_scale(params["scale"].cast()).set_shift(params["shift"].cast()); } return kernel; } @@ -78,11 +77,15 @@ struct kernel_params2desc { } }; -template +template inline void init_kernel_compute_ops(pybind11::module_& m) { m.def("compute", [](const Policy& policy, const pybind11::dict& params, const table& x, const table& y) { - compute_ops ops (policy, Input{x, y}, Param2Desc{}); + compute_ops ops(policy, Input{ x, y }, Param2Desc{}); return fptype2t{ kernel_method2t{ DenseMethod{}, ops } }(params); }); } diff --git a/onedal/primitives/linear_kernel.cpp b/onedal/primitives/linear_kernel.cpp index 0f8eaec7e7..bd43e36463 100644 --- a/onedal/primitives/linear_kernel.cpp +++ b/onedal/primitives/linear_kernel.cpp @@ -31,10 +31,16 @@ ONEDAL_PY_INIT_MODULE(linear_kernel) { using param2desc_t = kernel_params2desc; auto sub = m.def_submodule("linear_kernel"); - #ifndef ONEDAL_DATA_PARALLEL_SPMD - ONEDAL_PY_INSTANTIATE(init_kernel_result, sub, result_t); - ONEDAL_PY_INSTANTIATE(init_kernel_compute_ops, sub, policy_list, input_t, result_t, param2desc_t, method::dense); - #endif +#ifndef ONEDAL_DATA_PARALLEL_SPMD + ONEDAL_PY_INSTANTIATE(init_kernel_result, sub, result_t); + ONEDAL_PY_INSTANTIATE(init_kernel_compute_ops, + sub, + policy_list, + input_t, + result_t, + param2desc_t, + method::dense); +#endif } } // namespace oneapi::dal::python diff --git a/onedal/primitives/optimizers.hpp b/onedal/primitives/optimizers.hpp index 18428b9ccb..a66615fbe7 100644 --- a/onedal/primitives/optimizers.hpp +++ b/onedal/primitives/optimizers.hpp @@ -24,7 +24,7 @@ namespace oneapi::dal::python { -template +template auto get_optimizer_descriptor(const pybind11::dict& params) { auto optimizer = Optimizer{}; optimizer.set_tolerance(params["tol"].cast()); diff --git a/onedal/primitives/polynomial_kernel.cpp b/onedal/primitives/polynomial_kernel.cpp index 8a770dc08d..74884062da 100644 --- a/onedal/primitives/polynomial_kernel.cpp +++ b/onedal/primitives/polynomial_kernel.cpp @@ -33,7 +33,13 @@ ONEDAL_PY_INIT_MODULE(polynomial_kernel) { auto sub = m.def_submodule("polynomial_kernel"); #ifndef ONEDAL_DATA_PARALLEL_SPMD ONEDAL_PY_INSTANTIATE(init_kernel_result, sub, result_t); - ONEDAL_PY_INSTANTIATE(init_kernel_compute_ops, sub, policy_list, input_t, result_t, param2desc_t, method::dense); + ONEDAL_PY_INSTANTIATE(init_kernel_compute_ops, + sub, + policy_list, + input_t, + result_t, + param2desc_t, + method::dense); #endif } diff --git a/onedal/primitives/rbf_kernel.cpp b/onedal/primitives/rbf_kernel.cpp index 9ecf98115d..5330c85d7c 100644 --- a/onedal/primitives/rbf_kernel.cpp +++ b/onedal/primitives/rbf_kernel.cpp @@ -31,10 +31,16 @@ ONEDAL_PY_INIT_MODULE(rbf_kernel) { using param2desc_t = kernel_params2desc; auto sub = m.def_submodule("rbf_kernel"); - #ifndef ONEDAL_DATA_PARALLEL_SPMD - ONEDAL_PY_INSTANTIATE(init_kernel_result, sub, result_t); - ONEDAL_PY_INSTANTIATE(init_kernel_compute_ops, sub, policy_list, input_t, result_t, param2desc_t, method::dense); - #endif +#ifndef ONEDAL_DATA_PARALLEL_SPMD + ONEDAL_PY_INSTANTIATE(init_kernel_result, sub, result_t); + ONEDAL_PY_INSTANTIATE(init_kernel_compute_ops, + sub, + policy_list, + input_t, + result_t, + param2desc_t, + method::dense); +#endif } } // namespace oneapi::dal::python diff --git a/onedal/primitives/sigmoid_kernel.cpp b/onedal/primitives/sigmoid_kernel.cpp index a3ac2c155f..396891f130 100644 --- a/onedal/primitives/sigmoid_kernel.cpp +++ b/onedal/primitives/sigmoid_kernel.cpp @@ -31,10 +31,16 @@ ONEDAL_PY_INIT_MODULE(sigmoid_kernel) { using param2desc_t = kernel_params2desc; auto sub = m.def_submodule("sigmoid_kernel"); - #ifndef ONEDAL_DATA_PARALLEL_SPMD - ONEDAL_PY_INSTANTIATE(init_kernel_result, sub, result_t); - ONEDAL_PY_INSTANTIATE(init_kernel_compute_ops, sub, policy_list, input_t, result_t, param2desc_t, method::dense); - #endif +#ifndef ONEDAL_DATA_PARALLEL_SPMD + ONEDAL_PY_INSTANTIATE(init_kernel_result, sub, result_t); + ONEDAL_PY_INSTANTIATE(init_kernel_compute_ops, + sub, + policy_list, + input_t, + result_t, + param2desc_t, + method::dense); +#endif } } // namespace oneapi::dal::python diff --git a/onedal/primitives/tree_visitor.cpp b/onedal/primitives/tree_visitor.cpp index 3f77ae086f..4dcb9e9a1b 100644 --- a/onedal/primitives/tree_visitor.cpp +++ b/onedal/primitives/tree_visitor.cpp @@ -99,7 +99,6 @@ class node_count_visitor { return true; } - std::size_t n_nodes; std::size_t depth; std::size_t n_leaf_nodes; @@ -168,23 +167,25 @@ to_sklearn_tree_object_visitor::to_sklearn_tree_object_visitor(std::size_t OVERFLOW_CHECK_BY_MULTIPLICATION(std::size_t, this->node_count, this->class_count); this->node_ar_ptr = new skl_tree_node[this->node_count]; - this->value_ar_ptr = new double[this->node_count*this->class_count](); + this->value_ar_ptr = new double[this->node_count * this->class_count](); // array_t doesn't initialize the underlying memory with the object's constructor // so the values will not match what is defined above, must be done on C++ side - py::capsule free_value_ar(this->value_ar_ptr, [](void* f){ - double *value_ar_ptr = reinterpret_cast(f); + py::capsule free_value_ar(this->value_ar_ptr, [](void* f) { + double* value_ar_ptr = reinterpret_cast(f); delete[] value_ar_ptr; }); - py::capsule free_node_ar(this->node_ar_ptr, [](void* f){ - skl_tree_node *node_ar_ptr = reinterpret_cast(f); + py::capsule free_node_ar(this->node_ar_ptr, [](void* f) { + skl_tree_node* node_ar_ptr = reinterpret_cast(f); delete[] node_ar_ptr; }); - this->node_ar = py::array_t(node_ar_shape, node_ar_strides, this->node_ar_ptr, free_node_ar); - this->value_ar = py::array_t(value_ar_shape, value_ar_strides, this->value_ar_ptr, free_value_ar); + this->node_ar = + py::array_t(node_ar_shape, node_ar_strides, this->node_ar_ptr, free_node_ar); + this->value_ar = + py::array_t(value_ar_shape, value_ar_strides, this->value_ar_ptr, free_value_ar); } template @@ -206,7 +207,8 @@ bool to_sklearn_tree_object_visitor::call(const df::split_node_info& this->node_ar_ptr[node_id].threshold = info.get_feature_value(); this->node_ar_ptr[node_id].impurity = info.get_impurity(); this->node_ar_ptr[node_id].n_node_samples = info.get_sample_count(); - this->node_ar_ptr[node_id].weighted_n_node_samples = static_cast(info.get_sample_count()); + this->node_ar_ptr[node_id].weighted_n_node_samples = + static_cast(info.get_sample_count()); this->node_ar_ptr[node_id].missing_go_to_left = false; // wrap-up @@ -230,7 +232,8 @@ void to_sklearn_tree_object_visitor::_onLeafNode(const df::leaf_node_info< this->node_ar_ptr[node_id].impurity = info.get_impurity(); this->node_ar_ptr[node_id].n_node_samples = info.get_sample_count(); - this->node_ar_ptr[node_id].weighted_n_node_samples = static_cast(info.get_sample_count()); + this->node_ar_ptr[node_id].weighted_n_node_samples = + static_cast(info.get_sample_count()); this->node_ar_ptr[node_id].missing_go_to_left = false; } @@ -250,13 +253,13 @@ bool to_sklearn_tree_object_visitor::call( template <> bool to_sklearn_tree_object_visitor::call( const df::leaf_node_info& info) { - std::size_t depth = static_cast(info.get_level()); - const std::size_t label = info.get_response(); // these may be a slow accesses due to oneDAL abstraction - const double nNodeSampleCount = static_cast(info.get_sample_count()); // do them only once + const std::size_t label = + info.get_response(); // these may be a slow accesses due to oneDAL abstraction + const double nNodeSampleCount = + static_cast(info.get_sample_count()); // do them only once - while(depth--) - { + while (depth--) { const std::size_t id = parents[depth]; const std::size_t row = id * this->class_count; this->value_ar_ptr[row + label] += nNodeSampleCount; @@ -322,8 +325,8 @@ ONEDAL_PY_INIT_MODULE(get_tree) { using task_list = types; auto sub = m.def_submodule("get_tree"); - #ifndef ONEDAL_DATA_PARALLEL_SPMD - ONEDAL_PY_INSTANTIATE(init_get_tree_state, sub, task_list); - #endif +#ifndef ONEDAL_DATA_PARALLEL_SPMD + ONEDAL_PY_INSTANTIATE(init_get_tree_state, sub, task_list); +#endif } } // namespace oneapi::dal::python diff --git a/onedal/svm/svm.cpp b/onedal/svm/svm.cpp index 16a32df9e1..3907c3f904 100644 --- a/onedal/svm/svm.cpp +++ b/onedal/svm/svm.cpp @@ -143,7 +143,7 @@ void init_train_ops(py::module_& m) { using namespace dal::svm; using input_t = train_input; - train_ops ops(policy, input_t{ data, responses}, params2desc{}); + train_ops ops(policy, input_t{ data, responses }, params2desc{}); return fptype2t{ method2t{ Task{}, kernel2t{ ops } } }(params); }); } @@ -250,14 +250,14 @@ ONEDAL_PY_INIT_MODULE(svm) { types; auto sub = m.def_submodule("svm"); - #ifndef ONEDAL_DATA_PARALLEL_SPMD - ONEDAL_PY_INSTANTIATE(init_train_ops, sub, policy_list, task_list); - ONEDAL_PY_INSTANTIATE(init_infer_ops, sub, policy_list, task_list); +#ifndef ONEDAL_DATA_PARALLEL_SPMD + ONEDAL_PY_INSTANTIATE(init_train_ops, sub, policy_list, task_list); + ONEDAL_PY_INSTANTIATE(init_infer_ops, sub, policy_list, task_list); - ONEDAL_PY_INSTANTIATE(init_model, sub, task_list); - ONEDAL_PY_INSTANTIATE(init_train_result, sub, task_list); - ONEDAL_PY_INSTANTIATE(init_infer_result, sub, task_list); - #endif + ONEDAL_PY_INSTANTIATE(init_model, sub, task_list); + ONEDAL_PY_INSTANTIATE(init_train_result, sub, task_list); + ONEDAL_PY_INSTANTIATE(init_infer_result, sub, task_list); +#endif } ONEDAL_PY_TYPE2STR(dal::svm::task::classification, "classification"); diff --git a/onedal/utils/finiteness_checker.cpp b/onedal/utils/finiteness_checker.cpp index 2b8d84bd6f..a76abb4f7d 100644 --- a/onedal/utils/finiteness_checker.cpp +++ b/onedal/utils/finiteness_checker.cpp @@ -16,9 +16,9 @@ // fix error with missing headers #if defined(ONEDAL_VERSION) && ONEDAL_VERSION >= 20250200 - #include "oneapi/dal/algo/finiteness_checker.hpp" +#include "oneapi/dal/algo/finiteness_checker.hpp" #else - #include "oneapi/dal/algo/finiteness_checker/compute.hpp" +#include "oneapi/dal/algo/finiteness_checker/compute.hpp" #endif // defined(ONEDAL_VERSION) && ONEDAL_VERSION >= 20250200 #include "onedal/common.hpp" @@ -59,16 +59,13 @@ struct params2desc { template void init_compute_ops(py::module_& m) { - m.def("compute", - [](const Policy& policy, - const py::dict& params, - const table& data) { - using namespace finiteness_checker; - using input_t = compute_input; - - compute_ops ops(policy, input_t{ data }, params2desc{}); - return fptype2t{ method2t{ Task{}, ops } }(params); - }); + m.def("compute", [](const Policy& policy, const py::dict& params, const table& data) { + using namespace finiteness_checker; + using input_t = compute_input; + + compute_ops ops(policy, input_t{ data }, params2desc{}); + return fptype2t{ method2t{ Task{}, ops } }(params); + }); } template @@ -94,10 +91,10 @@ ONEDAL_PY_INIT_MODULE(finiteness_checker) { using task_list = types; auto sub = m.def_submodule("finiteness_checker"); - #ifndef ONEDAL_DATA_PARALLEL_SPMD - ONEDAL_PY_INSTANTIATE(init_compute_ops, sub, policy_list, task_list); - ONEDAL_PY_INSTANTIATE(init_compute_result, sub, task_list); - #endif +#ifndef ONEDAL_DATA_PARALLEL_SPMD + ONEDAL_PY_INSTANTIATE(init_compute_ops, sub, policy_list, task_list); + ONEDAL_PY_INSTANTIATE(init_compute_result, sub, task_list); +#endif } } // namespace oneapi::dal::python diff --git a/requirements-test.txt b/requirements-test.txt index 3b9408e868..539f728385 100644 --- a/requirements-test.txt +++ b/requirements-test.txt @@ -7,7 +7,7 @@ numpy>=1.21.6 ; python_version == '3.10' numpy>=1.23.5 ; python_version == '3.11' numpy>=2.0.0 ; python_version >= '3.12' scikit-learn==1.5.2 ; python_version <= '3.9' -scikit-learn==1.6.0 ; python_version >= '3.10' +scikit-learn==1.6.1 ; python_version >= '3.10' pandas==2.1.3 ; python_version < '3.11' pandas==2.2.3 ; python_version >= '3.11' xgboost==2.1.3 diff --git a/scripts/CMakeLists.txt b/scripts/CMakeLists.txt index ed6a781b76..236188df6d 100644 --- a/scripts/CMakeLists.txt +++ b/scripts/CMakeLists.txt @@ -29,6 +29,9 @@ set(CMAKE_CXX_EXTENSIONS OFF) option(ADD_ONEDAL_RPATH "Adds oneDAL's file paths to the RPATH here" OFF) message(STATUS "ADD_ONEDAL_RPATH:" ${ADD_ONEDAL_RPATH}) +option(SKLEARNEX_GCOV "Compile with gcov" OFF) +message(STATUS "SKLEARNEX_GCOV:" ${SKLEARNEX_GCOV}) + if(WIN32) # hint CMake to get python from PYTHON env. variable if defined if(DEFINED ENV{PYTHON}) @@ -122,6 +125,23 @@ if(IFACE STREQUAL "host") endif() endif() + if(SKLEARNEX_GCOV) + if(CMAKE_CXX_COMPILER_ID STREQUAL "IntelLLVM" OR CMAKE_CXX_COMPILER_ID MATCHES ".*Clang") + if(WIN32) + set(CMAKE_CXX_FLAGS "/clang:--coverage ${CMAKE_CXX_FLAGS}") + list(APPEND ONEDAL_LIBRARIES "clang_rt.profile-x86_64.lib") + else() + set(CMAKE_CXX_FLAGS "--coverage ${CMAKE_CXX_FLAGS}") + set(CMAKE_SHARED_LINKER_FLAGS "--coverage ${CMAKE_SHARED_LINKER_FLAGS}") + endif() + elseif(CMAKE_CXX_COMPILER_ID STREQUAL "GNU") + set(CMAKE_CXX_FLAGS "--coverage ${CMAKE_CXX_FLAGS}") + set(CMAKE_SHARED_LINKER_FLAGS "--coverage ${CMAKE_SHARED_LINKER_FLAGS}") + else() + message(WARNING "Code coverage will not be generated for target: host") + endif() + endif() + list(APPEND COMPILE_DEFINITIONS "NPY_NO_DEPRECATED_API=NPY_1_7_API_VERSION") elseif(IFACE_IS_DPC OR IFACE_IS_SPMD_DPC) @@ -137,11 +157,12 @@ elseif(IFACE_IS_DPC OR IFACE_IS_SPMD_DPC) set(CMAKE_INTERPROCEDURAL_OPTIMIZATION OFF) endif() - if(CMAKE_CXX_COMPILER MATCHES ".*icpx" OR CMAKE_CXX_COMPILER MATCHES ".*icx") + if(CMAKE_CXX_COMPILER_ID STREQUAL "IntelLLVM") set(CMAKE_CXX_FLAGS "-fsycl ${CMAKE_CXX_FLAGS}") + endif() - if(CMAKE_C_COMPILER MATCHES ".*icpx" OR CMAKE_C_COMPILER MATCHES ".*icx") + if(CMAKE_C_COMPILER_ID STREQUAL "IntelLLVM") set(CMAKE_C_FLAGS "-fsycl ${CMAKE_C_FLAGS}") endif() @@ -169,6 +190,18 @@ elseif(IFACE_IS_DPC OR IFACE_IS_SPMD_DPC) endif() endif() + if(SKLEARNEX_GCOV) + if(CMAKE_CXX_COMPILER_ID STREQUAL "IntelLLVM" AND WIN32) + set(CMAKE_CXX_FLAGS "/clang:-Xarch_host /clang:--coverage ${CMAKE_CXX_FLAGS}") + list(APPEND ONEDAL_LIBRARIES "clang_rt.profile-x86_64.lib") + elseif(CMAKE_CXX_COMPILER_ID STREQUAL "IntelLLVM" AND UNIX) + set(CMAKE_CXX_FLAGS "-Xarch_host --coverage ${CMAKE_CXX_FLAGS}") + set(CMAKE_SHARED_LINKER_FLAGS "-Xarch_host --coverage ${CMAKE_SHARED_LINKER_FLAGS}") + else() + message(WARNING "Code coverage will not be generated for target: " ${IFACE}) + endif() + endif() + if(IFACE_IS_SPMD_DPC) set(MPI_LIBRARY ${MPI_LIBS}) endif() diff --git a/scripts/build_backend.py b/scripts/build_backend.py index f3546f480e..a6949f8145 100755 --- a/scripts/build_backend.py +++ b/scripts/build_backend.py @@ -49,6 +49,7 @@ def custom_build_cmake_clib( no_dist=True, use_parameters_lib=True, use_abs_rpath=False, + use_gcov=False, ): import pybind11 @@ -130,6 +131,9 @@ def custom_build_cmake_clib( if use_abs_rpath: cmake_args += ["-DADD_ONEDAL_RPATH=ON"] + if use_gcov: + cmake_args += ["-DSKLEARNEX_GCOV=ON"] + cpu_count = multiprocessing.cpu_count() # limit parallel cmake jobs if memory size is insufficient # TODO: add on all platforms diff --git a/setup.py b/setup.py index 8166f00d07..ed48c226e3 100644 --- a/setup.py +++ b/setup.py @@ -92,6 +92,7 @@ no_dist = True if "NO_DIST" in os.environ and os.environ["NO_DIST"] in trues else False no_dpc = True if "NO_DPC" in os.environ and os.environ["NO_DPC"] in trues else False no_stream = "NO_STREAM" in os.environ and os.environ["NO_STREAM"] in trues +use_gcov = "SKLEARNEX_GCOV" in os.environ and os.environ["SKLEARNEX_GCOV"] in trues debug_build = os.getenv("DEBUG_BUILD") == "1" mpi_root = None if no_dist else os.environ["MPIROOT"] dpcpp = ( @@ -426,6 +427,7 @@ def run(self): no_dist=no_dist, use_parameters_lib=use_parameters_lib, use_abs_rpath=USE_ABS_RPATH, + use_gcov=use_gcov, ) if dpcpp: if is_onedal_iface: @@ -435,6 +437,7 @@ def run(self): no_dist=no_dist, use_parameters_lib=use_parameters_lib, use_abs_rpath=USE_ABS_RPATH, + use_gcov=use_gcov, ) if build_distribute: build_backend.custom_build_cmake_clib( @@ -443,6 +446,7 @@ def run(self): no_dist=no_dist, use_parameters_lib=use_parameters_lib, use_abs_rpath=USE_ABS_RPATH, + use_gcov=use_gcov, ) def post_build(self): diff --git a/sklearnex/manifold/tests/test_tsne.py b/sklearnex/manifold/tests/test_tsne.py index 2ba8c64cdc..8201e65bf0 100755 --- a/sklearnex/manifold/tests/test_tsne.py +++ b/sklearnex/manifold/tests/test_tsne.py @@ -15,12 +15,236 @@ # =============================================================================== import numpy as np +import pytest from numpy.testing import assert_allclose +from sklearn.metrics.pairwise import pairwise_distances +# Note: n_components must be 2 for now +from onedal.tests.utils._dataframes_support import ( + _as_numpy, + _convert_to_dataframe, + get_dataframes_and_queues, +) -def test_sklearnex_import(): + +@pytest.mark.parametrize("dataframe,queue", get_dataframes_and_queues()) +def test_sklearnex_import(dataframe, queue): + """Test TSNE compatibility with different backends and queues, and validate sklearnex module.""" from sklearnex.manifold import TSNE X = np.array([[0, 0, 0], [0, 1, 1], [1, 0, 1], [1, 1, 1]]) - tsne = TSNE(n_components=2, perplexity=2.0).fit(X) + X_df = _convert_to_dataframe(X, sycl_queue=queue, target_df=dataframe) + tsne = TSNE(n_components=2, perplexity=2.0, random_state=42, init="pca").fit(X_df) + embedding = tsne.fit_transform(X_df) + embedding = _as_numpy(embedding) assert "daal4py" in tsne.__module__ + assert tsne.n_components == 2 + assert tsne.perplexity == 2.0 + assert tsne.random_state == 42 + assert tsne.init == "pca" + + +@pytest.mark.parametrize( + "X_generator,n_components,perplexity,expected_shape,should_raise", + [ + pytest.param( + lambda rng: np.array([[0, 0, 0], [0, 1, 1], [1, 0, 1], [1, 1, 1]]), + 2, + 2.0, + (4, 2), + False, + id="Basic functionality", + ), + pytest.param( + lambda rng: rng.random((100, 10)), + 2, + 30.0, + (100, 2), + False, + id="Random data", + ), + pytest.param( + lambda rng: np.array([[0, 0], [1, 1], [2, 2]]), + 2, + 2.0, + (3, 2), + False, + id="Valid minimal data", + ), + pytest.param( + lambda rng: np.empty((0, 10)), + 2, + 5.0, + None, + True, + id="Empty data", + ), + pytest.param( + lambda rng: np.array([[0, 0], [1, np.nan], [2, np.inf]]), + 2, + 5.0, + None, + True, + id="Data with NaN/Inf", + ), + pytest.param( + lambda rng: rng.random((50, 500)) * (rng.random((50, 500)) > 0.99), + 2, + 30.0, + (50, 2), + False, + id="Sparse-like high-dimensional data", + ), + pytest.param( + lambda rng: np.hstack( + [ + np.ones((50, 1)), # First column is 1 + rng.random((50, 499)) * (rng.random((50, 499)) > 0.99), + ] + ), + 2, + 30.0, + (50, 2), + False, + id="Sparse-like data with constant column", + ), + pytest.param( + lambda rng: np.where( + np.arange(50 * 500).reshape(50, 500) % 10 == 0, 0, rng.random((50, 500)) + ), + 2, + 30.0, + (50, 2), + False, + id="Sparse-like data with every tenth element zero", + ), + pytest.param( + lambda rng: rng.random((10, 5)), + 2, + 0.5, + (10, 2), + False, + id="Extremely low perplexity", + ), + ], +) +@pytest.mark.parametrize("dataframe,queue", get_dataframes_and_queues()) +@pytest.mark.parametrize("dtype", [np.float32, np.float64]) +def test_tsne_functionality_and_edge_cases( + X_generator, + n_components, + perplexity, + expected_shape, + should_raise, + dataframe, + queue, + dtype, +): + from sklearnex.manifold import TSNE + + rng = np.random.default_rng( + seed=42 + ) # Use generator to ensure independent dataset per test + X = X_generator(rng) + X = X.astype(dtype) if X.size > 0 else X + X_df = _convert_to_dataframe(X, sycl_queue=queue, target_df=dataframe) + + if should_raise: + with pytest.raises(ValueError): + TSNE(n_components=n_components, perplexity=perplexity).fit_transform(X_df) + else: + tsne = TSNE(n_components=n_components, perplexity=perplexity, random_state=42) + embedding = tsne.fit_transform(X_df) + embedding = _as_numpy(embedding) + assert embedding.shape == expected_shape + assert np.all(np.isfinite(embedding)) + assert np.any(embedding != 0) + + +@pytest.mark.parametrize("dataframe,queue", get_dataframes_and_queues()) +@pytest.mark.parametrize("init", ["pca", "random"]) +@pytest.mark.parametrize("dtype", [np.float32, np.float64]) +def test_tsne_constant_data(init, dataframe, queue, dtype): + from sklearnex.manifold import TSNE + + X = np.ones((10, 10), dtype=dtype) + X_df = _convert_to_dataframe(X, sycl_queue=queue, target_df=dataframe) + tsne = TSNE(n_components=2, init=init, perplexity=5, random_state=42) + embedding = tsne.fit_transform(X_df) + embedding = _as_numpy(embedding) + assert embedding.shape == (10, 2) + if init == "pca": + assert np.isclose(embedding[:, 0].std(), 0, atol=1e-6) # Constant first dimension + assert np.allclose(embedding[:, 1], 0, atol=1e-6) # Zero second dimension + elif init == "random": + assert np.all(np.isfinite(embedding)) + + +@pytest.mark.parametrize("dataframe,queue", get_dataframes_and_queues()) +@pytest.mark.parametrize("dtype", [np.float32, np.float64]) +def test_tsne_reproducibility(dataframe, queue, dtype): + from sklearnex.manifold import TSNE + + rng = np.random.default_rng(seed=42) + X = rng.random((50, 10)).astype(dtype) + X_df = _convert_to_dataframe(X, sycl_queue=queue, target_df=dataframe) + tsne_1 = TSNE(n_components=2, random_state=42).fit_transform(X_df) + tsne_2 = TSNE(n_components=2, random_state=42).fit_transform(X_df) + # in case of dpctl.tensor.usm_ndarray covert to numpy array + tsne_1 = _as_numpy(tsne_1) + tsne_2 = _as_numpy(tsne_2) + assert_allclose(tsne_1, tsne_2, rtol=1e-5) + + +@pytest.mark.parametrize("dataframe,queue", get_dataframes_and_queues()) +@pytest.mark.parametrize("dtype", [np.float32, np.float64]) +def test_tsne_complex_and_gpu_validation(dataframe, queue, dtype): + from sklearnex.manifold import TSNE + + X = np.array( + [ + [1, 1, 1, 1], + [1.1, 1.1, 1.1, 1.1], + [0.9, 0.9, 0.9, 0.9], + [2e9, 2e-9, -2e9, -2e-9], + [5e-5, 5e5, -5e-5, -5e5], + [9e-7, -9e7, 9e-7, -9e7], + [1, -1, 1, -1], + [-1e-9, 1e-9, -1e-9, 1e-9], + [42, 42, 42, 42], + [8, -8, 8e8, -8e-8], + [1e-3, 1e3, -1e3, -1e-3], + [0, 1e9, -1e-9, 1], + [0, 0, 1, -1], + [0, 0, 0, 0], + [-1e5, 0, 1e5, -1], + [1, 0, -1e8, 1e8], + ] + ) + n_components = 2 + perplexity = 3.0 + expected_shape = (16, 2) + + X = X.astype(dtype) + X_df = _convert_to_dataframe(X, sycl_queue=queue, target_df=dataframe) + tsne = TSNE(n_components=n_components, perplexity=perplexity, random_state=42) + embedding = tsne.fit_transform(X_df) + + # Validate results + assert embedding.shape == expected_shape + embedding = _as_numpy(embedding) + assert np.all(np.isfinite(embedding)) + assert np.any(embedding != 0) + + # Ensure close points in original space remain close in embedding + group_a_indices = [0, 1, 2] # Hardcoded index of similar points + group_b_indices = [3, 4, 5] # Hardcoded index of dissimilar points from a + embedding_distances = pairwise_distances( + X, metric="euclidean" + ) # Get an array of distance where [i, j] is distance b/t i and j + # Check for distance b/t two points in group A < distance of this point and any point in group B + for i in group_a_indices: + for j in group_a_indices: + assert ( + embedding_distances[i, j] < embedding_distances[i, group_b_indices].min() + ), f"Point {i} in Group A is closer to a point in Group B than to another point in Group A." diff --git a/_clang-format b/src/.clang-format similarity index 96% rename from _clang-format rename to src/.clang-format index ee2ce9e48f..fb98895131 100644 --- a/_clang-format +++ b/src/.clang-format @@ -1,155 +1,155 @@ ---- -Language: Cpp -AccessModifierOffset: -4 -AlignAfterOpenBracket: Align -AlignConsecutiveMacros: true -AlignConsecutiveAssignments: true -AlignConsecutiveDeclarations: false -AlignEscapedNewlines: Left -AlignOperands: true -AlignTrailingComments: true -AllowAllArgumentsOnNextLine: true -AllowAllConstructorInitializersOnNextLine: true -AllowAllParametersOfDeclarationOnNextLine: true -AllowShortBlocksOnASingleLine: false -AllowShortCaseLabelsOnASingleLine: true -AllowShortFunctionsOnASingleLine: Inline -AllowShortLambdasOnASingleLine: All -AllowShortIfStatementsOnASingleLine: WithoutElse -AllowShortLoopsOnASingleLine: true -AlwaysBreakAfterDefinitionReturnType: None -AlwaysBreakAfterReturnType: None -AlwaysBreakBeforeMultilineStrings: false -AlwaysBreakTemplateDeclarations: Yes -BinPackArguments: true -BinPackParameters: true -BraceWrapping: - AfterCaseLabel: true - AfterClass: true - AfterControlStatement: true - AfterEnum: true - AfterFunction: true - AfterNamespace: true - AfterObjCDeclaration: false - AfterStruct: true - AfterUnion: true - AfterExternBlock: true - BeforeCatch: true - BeforeElse: true - IndentBraces: false - SplitEmptyFunction: false - SplitEmptyRecord: false - SplitEmptyNamespace: false -BreakBeforeBinaryOperators: NonAssignment -BreakBeforeBraces: Custom -BreakBeforeInheritanceComma: false -BreakInheritanceList: BeforeColon -BreakBeforeTernaryOperators: false -BreakConstructorInitializersBeforeComma: false -BreakConstructorInitializers: BeforeColon -BreakAfterJavaFieldAnnotations: false -BreakStringLiterals: true -ColumnLimit: 1500 -CommentPragmas: '^ IWYU pragma:' -CompactNamespaces: false -ConstructorInitializerAllOnOneLineOrOnePerLine: true -ConstructorInitializerIndentWidth: 4 -ContinuationIndentWidth: 4 -Cpp11BracedListStyle: false -DerivePointerAlignment: false -DisableFormat: false -ExperimentalAutoDetectBinPacking: false -FixNamespaceComments: true -ForEachMacros: - - foreach - - Q_FOREACH - - BOOST_FOREACH -IncludeBlocks: Preserve -IncludeCategories: - - Regex: '^' - Priority: 2 - - Regex: '^<.*\.h>' - Priority: 1 - - Regex: '^<.*' - Priority: 2 - - Regex: '.*' - Priority: 3 -IncludeIsMainRegex: '([-_](test|unittest))?$' -IndentCaseLabels: false -IndentPPDirectives: BeforeHash -IndentWidth: 4 -IndentWrappedFunctionNames: true -JavaScriptQuotes: Leave -JavaScriptWrapImports: true -KeepEmptyLinesAtTheStartOfBlocks: false -MacroBlockBegin: '' -MacroBlockEnd: '' -MaxEmptyLinesToKeep: 1 -NamespaceIndentation: None -ObjCBinPackProtocolList: Never -ObjCBlockIndentWidth: 2 -ObjCSpaceAfterProperty: false -ObjCSpaceBeforeProtocolList: true -PenaltyBreakAssignment: 2 -PenaltyBreakBeforeFirstCallParameter: 1 -PenaltyBreakComment: 300 -PenaltyBreakFirstLessLess: 120 -PenaltyBreakString: 1000 -PenaltyBreakTemplateDeclaration: 10 -PenaltyExcessCharacter: 1000000 -PenaltyReturnTypeOnItsOwnLine: 200 -PointerAlignment: Middle -RawStringFormats: - - Language: Cpp - Delimiters: - - cc - - CC - - cpp - - Cpp - - CPP - - 'c++' - - 'C++' - CanonicalDelimiter: '' - BasedOnStyle: google - - Language: TextProto - Delimiters: - - pb - - PB - - proto - - PROTO - EnclosingFunctions: - - EqualsProto - - EquivToProto - - PARSE_PARTIAL_TEXT_PROTO - - PARSE_TEST_PROTO - - PARSE_TEXT_PROTO - - ParseTextOrDie - - ParseTextProtoOrDie - CanonicalDelimiter: '' - BasedOnStyle: google -ReflowComments: false -SortIncludes: false -SortUsingDeclarations: false -SpaceAfterCStyleCast: false -SpaceAfterLogicalNot: false -SpaceAfterTemplateKeyword: true -SpaceBeforeAssignmentOperators: true -SpaceBeforeCpp11BracedList: true -SpaceBeforeCtorInitializerColon: true -SpaceBeforeInheritanceColon: true -SpaceBeforeParens: ControlStatements -SpaceBeforeRangeBasedForLoopColon: true -SpaceInEmptyParentheses: false -SpacesBeforeTrailingComments: 1 -SpacesInAngles: false -SpacesInContainerLiterals: false -SpacesInCStyleCastParentheses: false -SpacesInParentheses: false -SpacesInSquareBrackets: false -Standard: Cpp03 -StatementMacros: - - Q_UNUSED - - QT_REQUIRE_VERSION -TabWidth: 4 -UseTab: Never -... +--- +Language: Cpp +AccessModifierOffset: -4 +AlignAfterOpenBracket: Align +AlignConsecutiveMacros: true +AlignConsecutiveAssignments: true +AlignConsecutiveDeclarations: false +AlignEscapedNewlines: Left +AlignOperands: true +AlignTrailingComments: true +AllowAllArgumentsOnNextLine: true +AllowAllConstructorInitializersOnNextLine: true +AllowAllParametersOfDeclarationOnNextLine: true +AllowShortBlocksOnASingleLine: false +AllowShortCaseLabelsOnASingleLine: true +AllowShortFunctionsOnASingleLine: Inline +AllowShortLambdasOnASingleLine: All +AllowShortIfStatementsOnASingleLine: WithoutElse +AllowShortLoopsOnASingleLine: true +AlwaysBreakAfterDefinitionReturnType: None +AlwaysBreakAfterReturnType: None +AlwaysBreakBeforeMultilineStrings: false +AlwaysBreakTemplateDeclarations: Yes +BinPackArguments: true +BinPackParameters: true +BraceWrapping: + AfterCaseLabel: true + AfterClass: true + AfterControlStatement: true + AfterEnum: true + AfterFunction: true + AfterNamespace: true + AfterObjCDeclaration: false + AfterStruct: true + AfterUnion: true + AfterExternBlock: true + BeforeCatch: true + BeforeElse: true + IndentBraces: false + SplitEmptyFunction: false + SplitEmptyRecord: false + SplitEmptyNamespace: false +BreakBeforeBinaryOperators: NonAssignment +BreakBeforeBraces: Custom +BreakBeforeInheritanceComma: false +BreakInheritanceList: BeforeColon +BreakBeforeTernaryOperators: false +BreakConstructorInitializersBeforeComma: false +BreakConstructorInitializers: BeforeColon +BreakAfterJavaFieldAnnotations: false +BreakStringLiterals: true +ColumnLimit: 1500 +CommentPragmas: '^ IWYU pragma:' +CompactNamespaces: false +ConstructorInitializerAllOnOneLineOrOnePerLine: true +ConstructorInitializerIndentWidth: 4 +ContinuationIndentWidth: 4 +Cpp11BracedListStyle: false +DerivePointerAlignment: false +DisableFormat: false +ExperimentalAutoDetectBinPacking: false +FixNamespaceComments: true +ForEachMacros: + - foreach + - Q_FOREACH + - BOOST_FOREACH +IncludeBlocks: Preserve +IncludeCategories: + - Regex: '^' + Priority: 2 + - Regex: '^<.*\.h>' + Priority: 1 + - Regex: '^<.*' + Priority: 2 + - Regex: '.*' + Priority: 3 +IncludeIsMainRegex: '([-_](test|unittest))?$' +IndentCaseLabels: false +IndentPPDirectives: BeforeHash +IndentWidth: 4 +IndentWrappedFunctionNames: true +JavaScriptQuotes: Leave +JavaScriptWrapImports: true +KeepEmptyLinesAtTheStartOfBlocks: false +MacroBlockBegin: '' +MacroBlockEnd: '' +MaxEmptyLinesToKeep: 1 +NamespaceIndentation: None +ObjCBinPackProtocolList: Never +ObjCBlockIndentWidth: 2 +ObjCSpaceAfterProperty: false +ObjCSpaceBeforeProtocolList: true +PenaltyBreakAssignment: 2 +PenaltyBreakBeforeFirstCallParameter: 1 +PenaltyBreakComment: 300 +PenaltyBreakFirstLessLess: 120 +PenaltyBreakString: 1000 +PenaltyBreakTemplateDeclaration: 10 +PenaltyExcessCharacter: 1000000 +PenaltyReturnTypeOnItsOwnLine: 200 +PointerAlignment: Middle +RawStringFormats: + - Language: Cpp + Delimiters: + - cc + - CC + - cpp + - Cpp + - CPP + - 'c++' + - 'C++' + CanonicalDelimiter: '' + BasedOnStyle: google + - Language: TextProto + Delimiters: + - pb + - PB + - proto + - PROTO + EnclosingFunctions: + - EqualsProto + - EquivToProto + - PARSE_PARTIAL_TEXT_PROTO + - PARSE_TEST_PROTO + - PARSE_TEXT_PROTO + - ParseTextOrDie + - ParseTextProtoOrDie + CanonicalDelimiter: '' + BasedOnStyle: google +ReflowComments: false +SortIncludes: false +SortUsingDeclarations: false +SpaceAfterCStyleCast: false +SpaceAfterLogicalNot: false +SpaceAfterTemplateKeyword: true +SpaceBeforeAssignmentOperators: true +SpaceBeforeCpp11BracedList: true +SpaceBeforeCtorInitializerColon: true +SpaceBeforeInheritanceColon: true +SpaceBeforeParens: ControlStatements +SpaceBeforeRangeBasedForLoopColon: true +SpaceInEmptyParentheses: false +SpacesBeforeTrailingComments: 1 +SpacesInAngles: false +SpacesInContainerLiterals: false +SpacesInCStyleCastParentheses: false +SpacesInParentheses: false +SpacesInSquareBrackets: false +Standard: Cpp03 +StatementMacros: + - Q_UNUSED + - QT_REQUIRE_VERSION +TabWidth: 4 +UseTab: Never +... diff --git a/src/daal4py.cpp b/src/daal4py.cpp index 0be14efc08..e02638e505 100755 --- a/src/daal4py.cpp +++ b/src/daal4py.cpp @@ -24,7 +24,7 @@ #include "daal4py_defines.h" #if NPY_ABI_VERSION < 0x02000000 - #define PyDataType_NAMES(descr) ((descr)->names) + #define PyDataType_NAMES(descr) ((descr)->names) #endif // ************************************************************************************ @@ -35,7 +35,7 @@ #define is_array(a) ((a) && PyArray_Check(a)) #define array_type(a) PyArray_TYPE((PyArrayObject *)a) -#define array_is_behaved_C(a) (PyArray_ISCARRAY_RO((PyArrayObject *)a) && array_type(a) < NPY_OBJECT) +#define array_is_behaved_C(a) (PyArray_ISCARRAY_RO((PyArrayObject *)a) && array_type(a) < NPY_OBJECT) #define array_is_behaved_F(a) (PyArray_ISFARRAY_RO((PyArrayObject *)a) && array_type(a) < NPY_OBJECT) #define array_is_native(a) (PyArray_ISNOTSWAPPED((PyArrayObject *)a)) #define array_numdims(a) PyArray_NDIM((PyArrayObject *)a) @@ -98,7 +98,7 @@ void set_rawp_base(PyArrayObject * ary, void * ptr) } inline void py_err_check() -{ +{ if (PyErr_Occurred()) { PyErr_Print(); @@ -444,10 +444,10 @@ daal::data_management::NumericTablePtr make_nt(PyObject * obj) PyArrayObject * slice = reinterpret_cast(PyArray_SimpleNewFromData(1, &column_len, ary_numtype, static_cast(PyArray_ITER_DATA(it)))); PyArray_SetBaseObject(slice, reinterpret_cast(ary)); Py_INCREF(ary); -#define SETARRAY_(_T) \ - { \ - daal::services::SharedPtr<_T> _tmp(reinterpret_cast<_T *>(PyArray_DATA(slice)), NumpyDeleter(slice)); \ - soatbl->setArray(_tmp, i); \ +#define SETARRAY_(_T) \ + { \ + daal::services::SharedPtr<_T> _tmp(reinterpret_cast<_T *>(PyArray_DATA(slice)), NumpyDeleter(slice)); \ + soatbl->setArray(_tmp, i); \ } SET_NPY_FEATURE(PyArray_DESCR(ary)->type, SETARRAY_, throw std::invalid_argument("Found unsupported array type")); #undef SETARRAY_ @@ -554,11 +554,11 @@ daal::data_management::NumericTablePtr make_nt(PyObject * obj) size_t c_nr = static_cast(PyInt_AsSsize_t(nr)); py_err_check(); #define MKCSR_(_T) ret = daal::data_management::CSRNumericTable::create(daal::services::SharedPtr<_T>(reinterpret_cast<_T *>(array_data(np_vals)), NumpyDeleter(reinterpret_cast(np_vals))), daal::services::SharedPtr(c_indcs_one_based, daal::services::ServiceDeleter()), daal::services::SharedPtr(c_roffs_one_based, daal::services::ServiceDeleter()), c_nc, c_nr) - SET_NPY_FEATURE(array_type(np_vals), MKCSR_, throw std::invalid_argument(std::string("Found unsupported data type in ")+Py_TYPE(obj)->tp_name+"\n")); + SET_NPY_FEATURE(array_type(np_vals), MKCSR_, throw std::invalid_argument(std::string("Found unsupported data type in ") + Py_TYPE(obj)->tp_name + "\n")); #undef MKCSR_ } else - throw std::invalid_argument(std::string("Failed accessing csr data when converting ")+Py_TYPE(obj)->tp_name+"\n"); + throw std::invalid_argument(std::string("Failed accessing csr data when converting ") + Py_TYPE(obj)->tp_name + "\n"); Py_DECREF(np_indcs); Py_DECREF(np_roffs); } @@ -743,7 +743,10 @@ extern "C" #endif } - size_t c_num_threads() { return daal::services::Environment::getInstance()->getNumberOfThreads(); } + size_t c_num_threads() + { + return daal::services::Environment::getInstance()->getNumberOfThreads(); + } size_t c_num_procs() { @@ -809,12 +812,7 @@ double c_roc_auc_score(data_or_file & y_true, data_or_file & y_test) auto table_true = get_table(y_true); auto table_test = get_table(y_test); auto type = (*table_test->getDictionary())[0].indexType; - if (type == daal::data_management::data_feature_utils::DAAL_FLOAT64 || - type == daal::data_management::data_feature_utils::DAAL_INT64_S || - type == daal::data_management::data_feature_utils::DAAL_INT64_U || - type == daal::data_management::data_feature_utils::DAAL_FLOAT32 || - type == daal::data_management::data_feature_utils::DAAL_INT32_S || - type == daal::data_management::data_feature_utils::DAAL_INT32_U) + if (type == daal::data_management::data_feature_utils::DAAL_FLOAT64 || type == daal::data_management::data_feature_utils::DAAL_INT64_S || type == daal::data_management::data_feature_utils::DAAL_INT64_U || type == daal::data_management::data_feature_utils::DAAL_FLOAT32 || type == daal::data_management::data_feature_utils::DAAL_INT32_S || type == daal::data_management::data_feature_utils::DAAL_INT32_U) { return daal::data_management::internal::rocAucScore(table_true, table_test); } @@ -850,12 +848,8 @@ void c_tsne_gradient_descent(data_or_file & init, data_or_file & p, data_or_file { switch (dtype) { - case 0: - daal::algorithms::internal::tsneGradientDescent(initTable, csrTable, sizeIterTable, paramTable, resultTable); - break; - case 1: - daal::algorithms::internal::tsneGradientDescent(initTable, csrTable, sizeIterTable, paramTable, resultTable); - break; + case 0: daal::algorithms::internal::tsneGradientDescent(initTable, csrTable, sizeIterTable, paramTable, resultTable); break; + case 1: daal::algorithms::internal::tsneGradientDescent(initTable, csrTable, sizeIterTable, paramTable, resultTable); break; default: throw std::invalid_argument("Invalid data type specified."); } } diff --git a/src/daal4py.h b/src/daal4py.h index 560f98e7e4..1f2109dc56 100755 --- a/src/daal4py.h +++ b/src/daal4py.h @@ -18,7 +18,7 @@ #define _HLAPI_H_INCLUDED_ #ifdef _WIN32 -#define NOMINMAX + #define NOMINMAX #endif #include @@ -41,12 +41,12 @@ using daal::services::LibraryVersionInfo; #define NPY_NO_DEPRECATED_API NPY_1_7_API_VERSION #include -#define NTYPE PyObject* +#define NTYPE PyObject * #if PY_VERSION_HEX < 0x03000000 -#define PyUnicode_Check(_x) PyString_Check(_x) -#define PyUnicode_AsUTF8(_x) PyString_AsString(_x) -#define PyUnicode_FromString(_x) PyString_FromString(_x) + #define PyUnicode_Check(_x) PyString_Check(_x) + #define PyUnicode_AsUTF8(_x) PyString_AsString(_x) + #define PyUnicode_FromString(_x) PyString_FromString(_x) #endif #include "data_management/data/internal/finiteness_checker.h" @@ -59,35 +59,35 @@ using daal::services::LibraryVersionInfo; #include "algorithms/tsne/tsne_gradient_descent.h" #endif - -extern "C" { -void c_daalinit(int nthreads=-1); -void c_daalfini(); -size_t c_num_threads(); -size_t c_num_procs(); -size_t c_my_procid(); -void c_enable_thread_pinning(bool enabled=true); +extern "C" +{ + void c_daalinit(int nthreads = -1); + void c_daalfini(); + size_t c_num_threads(); + size_t c_num_procs(); + size_t c_my_procid(); + void c_enable_thread_pinning(bool enabled = true); } using daal::data_management::NumericTablePtr; -typedef daal::services::SharedPtr< std::vector< std::vector< daal::byte > > > BytesArray; +typedef daal::services::SharedPtr > > BytesArray; typedef std::string std_string; typedef std::unordered_map str2i_map_t; typedef std::unordered_map i2str_map_t; -template< typename T > +template bool use_default(const daal::services::SharedPtr * attr) { return attr == NULL || attr->get() == NULL; } -template< typename T > +template bool use_default(const daal::services::SharedPtr & attr) { return attr.get() == NULL; } -template< typename T > +template bool use_default(const T * attr) { return attr == NULL; @@ -127,8 +127,8 @@ inline bool use_default(const float & attr) inline bool string2bool(const std::string & s) { - if(s == "True" || s == "true" || s == "1") return true; - if(s == "False" || s == "false" || s == "0") return false; + if (s == "True" || s == "true" || s == "1") return true; + if (s == "False" || s == "false" || s == "0") return false; throw std::invalid_argument("Bool must be one of {'True', 'true', '1', 'False', 'false', '0'}"); } @@ -138,7 +138,7 @@ class algo_manager__iface__ inline algo_manager__iface__() {} inline virtual ~algo_manager__iface__() {} // We don't want any manager to be copied - algo_manager__iface__(const algo_manager__iface__ &) = delete; + algo_manager__iface__(const algo_manager__iface__ &) = delete; algo_manager__iface__ operator=(const algo_manager__iface__ &) = delete; }; @@ -154,68 +154,77 @@ static inline NTYPE as_native_shared_ptr(services::SharedPtr< const algo_manager // Our Batch input/Output manager, abstracts from input/output types // also defines how to get results and finalize -template< typename A, typename O > +template struct IOManager { typedef O result_type; - static result_type getResult(A & algo) - { - return daal::services::staticPointerCast(algo.getResult()); - } - static bool needsFini() - { - return true; - } + static result_type getResult(A & algo) { return daal::services::staticPointerCast(algo.getResult()); } + static bool needsFini() { return true; } }; struct data_or_file { mutable daal::data_management::NumericTablePtr table; - std::string file; - template - inline data_or_file(T * ptr, size_t ncols, size_t nrows, Py_ssize_t layout) - : table(), file() + std::string file; + template + inline data_or_file(T * ptr, size_t ncols, size_t nrows, Py_ssize_t layout) : table(), file() { - if(layout > 0) throw std::invalid_argument("Supporting only homogeneous, contiguous arrays."); + if (layout > 0) throw std::invalid_argument("Supporting only homogeneous, contiguous arrays."); table = daal::data_management::HomogenNumericTable::create(ptr, ncols, nrows); } - inline data_or_file() - : table(), file() {} + inline data_or_file() : table(), file() {} data_or_file(PyObject *); }; // return input as oneDAL numeric table. extern const daal::data_management::NumericTablePtr get_table(const data_or_file & t); -template< typename T > +template struct RAW { typedef T TYPE; - const TYPE operator()(const T & o) {return o;} + const TYPE operator()(const T & o) { return o; } }; -template< typename T > -struct RAW< daal::services::SharedPtr< T > > +template +struct RAW > { - typedef daal::services::SharedPtr< T > * TYPE; - TYPE operator()(daal::services::SharedPtr< T > o) {return new daal::services::SharedPtr< T >(o);} + typedef daal::services::SharedPtr * TYPE; + TYPE operator()(daal::services::SharedPtr o) { return new daal::services::SharedPtr(o); } }; -template< typename T > T to_daal(T t) {return t;} -template< typename T > daal::services::SharedPtr to_daal(daal::services::SharedPtr* t) {return *t;} -inline const data_or_file & to_daal(const data_or_file * t) {return *t;} -inline const data_or_file & to_daal(const data_or_file & t) {return t;} -inline const data_or_file & to_daal(data_or_file * t) {return *t;} +template +T to_daal(T t) +{ + return t; +} +template +daal::services::SharedPtr to_daal(daal::services::SharedPtr * t) +{ + return *t; +} +inline const data_or_file & to_daal(const data_or_file * t) +{ + return *t; +} +inline const data_or_file & to_daal(const data_or_file & t) +{ + return t; +} +inline const data_or_file & to_daal(data_or_file * t) +{ + return *t; +} -template< typename T > +template void * get_nt_data_ptr(const daal::data_management::NumericTablePtr * ptr) { - auto dptr = dynamic_cast< const daal::data_management::HomogenNumericTable< T >* >((*ptr).get()); - return dptr ? reinterpret_cast< void* >(dptr->getArraySharedPtr().get()) : NULL; + auto dptr = dynamic_cast *>((*ptr).get()); + return dptr ? reinterpret_cast(dptr->getArraySharedPtr().get()) : NULL; } -extern int64_t string2enum(const std::string& str, str2i_map_t & strmap); +extern int64_t string2enum(const std::string & str, str2i_map_t & strmap); static std::string to_std_string(PyObject * o) { @@ -245,17 +254,16 @@ extern daal::data_management::NumericTablePtr make_nt(PyObject * nda); extern daal::data_management::DataCollectionPtr make_datacoll(PyObject * nda); extern daal::data_management::KeyValueDataCollectionPtr make_dnt(PyObject * dict, str2i_map_t &); -extern const daal::data_management::NumericTablePtr readCSV(const std::string& fname); - +extern const daal::data_management::NumericTablePtr readCSV(const std::string & fname); -template -T* dynamicPointerPtrCast(U *r) +template +T * dynamicPointerPtrCast(U * r) { T tmp = daal::services::dynamicPointerCast(*r); - return tmp ? new T(*reinterpret_cast(r)) : NULL; + return tmp ? new T(*reinterpret_cast(r)) : NULL; } -template +template bool is_valid_ptrptr(T * o) { return o != NULL && (*o).get() != NULL; @@ -263,23 +271,16 @@ bool is_valid_ptrptr(T * o) class ThreadAllow { - PyThreadState *_save; + PyThreadState * _save; + public: - ThreadAllow() - { - allow(); - } - ~ThreadAllow() - { - disallow(); - } - void allow() - { - _save = PyEval_SaveThread(); - } + ThreadAllow() { allow(); } + ~ThreadAllow() { disallow(); } + void allow() { _save = PyEval_SaveThread(); } void disallow() { - if(_save) { + if (_save) + { PyEval_RestoreThread(_save); _save = NULL; } @@ -298,7 +299,7 @@ class VSP virtual ~VSP() {}; }; // typed virtual shared pointer, for simplicity we make it a oneDAL shared pointer -template< typename T > +template class TVSP : public VSP, public daal::services::SharedPtr { public: @@ -310,45 +311,48 @@ class TVSP : public VSP, public daal::services::SharedPtr extern void daalsp_free_cap(PyObject *); extern void rawp_free_cap(PyObject *); -template< typename T > +template void set_sp_base(PyArrayObject * ary, daal::services::SharedPtr & sp) { - void * tmp_sp = static_cast(new TVSP(sp)); - PyObject* cap = PyCapsule_New(tmp_sp, NULL, daalsp_free_cap); + void * tmp_sp = static_cast(new TVSP(sp)); + PyObject * cap = PyCapsule_New(tmp_sp, NULL, daalsp_free_cap); PyArray_SetBaseObject(ary, cap); } -template< typename T > -static T* _daal_clone(const T & o) +template +static T * _daal_clone(const T & o) { return new T(o); } -extern "C" { -void set_rawp_base(PyArrayObject *, void *); +extern "C" +{ + void set_rawp_base(PyArrayObject *, void *); } -extern "C" { -bool c_assert_all_finite(const data_or_file & t, bool allowNaN, char dtype); +extern "C" +{ + bool c_assert_all_finite(const data_or_file & t, bool allowNaN, char dtype); } -extern "C" { -void c_train_test_split(data_or_file & orig, data_or_file & train, data_or_file & test, - data_or_file & train_idx, data_or_file & test_idx); +extern "C" +{ + void c_train_test_split(data_or_file & orig, data_or_file & train, data_or_file & test, data_or_file & train_idx, data_or_file & test_idx); } -extern "C" { -double c_roc_auc_score(data_or_file & y_true, data_or_file & y_test); +extern "C" +{ + double c_roc_auc_score(data_or_file & y_true, data_or_file & y_test); } -extern "C" { -void c_generate_shuffled_indices(data_or_file & idx, data_or_file & random_state); +extern "C" +{ + void c_generate_shuffled_indices(data_or_file & idx, data_or_file & random_state); } extern "C" { - void c_tsne_gradient_descent(data_or_file & init, data_or_file & p, data_or_file & size_iter, - data_or_file & params, data_or_file & results, char dtype); + void c_tsne_gradient_descent(data_or_file & init, data_or_file & p, data_or_file & size_iter, data_or_file & params, data_or_file & results, char dtype); } #endif // _HLAPI_H_INCLUDED_ diff --git a/src/daal4py_defines.h b/src/daal4py_defines.h index fde17fb556..92e80f7d04 100755 --- a/src/daal4py_defines.h +++ b/src/daal4py_defines.h @@ -17,21 +17,21 @@ #ifndef _DAAL4PY_DEFINES_H_ #define _DAAL4PY_DEFINES_H_ -#define DAAL4PY_OVERFLOW_CHECK_BY_MULTIPLICATION(type, op1, op2) \ - { \ - if (!(0 == (op1)) && !(0 == (op2))) \ - { \ - volatile type r = (op1) * (op2); \ - r /= (op1); \ - if (!(r == (op2))) throw std::runtime_error("Buffer size integer overflow"); \ - } \ +#define DAAL4PY_OVERFLOW_CHECK_BY_MULTIPLICATION(type, op1, op2) \ + { \ + if (!(0 == (op1)) && !(0 == (op2))) \ + { \ + volatile type r = (op1) * (op2); \ + r /= (op1); \ + if (!(r == (op2))) throw std::runtime_error("Buffer size integer overflow"); \ + } \ } -#define DAAL4PY_OVERFLOW_CHECK_BY_ADDING(type, op1, op2) \ - { \ - volatile type r = (op1) + (op2); \ - r -= (op1); \ - if (!(r == (op2))) throw std::runtime_error("Buffer size integer overflow"); \ +#define DAAL4PY_OVERFLOW_CHECK_BY_ADDING(type, op1, op2) \ + { \ + volatile type r = (op1) + (op2); \ + r -= (op1); \ + if (!(r == (op2))) throw std::runtime_error("Buffer size integer overflow"); \ } #define DAAL4PY_CHECK(cond, error) \ diff --git a/src/daal4py_dist.h b/src/daal4py_dist.h index e411a20211..9f4d8fc0be 100644 --- a/src/daal4py_dist.h +++ b/src/daal4py_dist.h @@ -18,12 +18,12 @@ #define _HLAPI_DISTR_H_INCLUDED_ #ifdef _WIN32 -#define NOMINMAX + #define NOMINMAX #endif #include "daal4py.h" #ifdef _DIST_ -#include + #include ////////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////// @@ -31,71 +31,52 @@ // Input/Output manager for simple algos with a single fixed result-component // abstracts from input/output types // also defines how to get results and finalize -template< typename A, typename O, typename E, int P > -struct IOManagerSingle : public IOManager< A, O > +template +struct IOManagerSingle : public IOManager { - static O getResult(A & algo) - { - return algo.getResult()->get(static_cast< E >(P)); - } + static O getResult(A & algo) { return algo.getResult()->get(static_cast(P)); } }; - // Input/Output manager for intermediate steps // abstracts from input/output types // also defines how to get results and finalize -template< typename A, typename O > +template struct PartialIOManager { typedef O result_type; - static result_type getResult(A & algo) - { - return daal::services::staticPointerCast(algo.getPartialResult()); - } - static bool needsFini() - { - return false; - } + static result_type getResult(A & algo) { return daal::services::staticPointerCast(algo.getPartialResult()); } + static bool needsFini() { return false; } }; // Input/Output manager for intermediate steps with a single fixed result-component // abstracts from input/output types // also defines how to get results and finalize -template< typename A, typename O, typename E, int P > -struct PartialIOManagerSingle : public PartialIOManager< A, O > +template +struct PartialIOManagerSingle : public PartialIOManager { - static typename PartialIOManager< A, O >::result_type getResult(A & algo) - { - return algo.getPartialResult()->get(static_cast< E >(P)); - } + static typename PartialIOManager::result_type getResult(A & algo) { return algo.getPartialResult()->get(static_cast(P)); } }; // Input/Output manager for intermediate steps, output is a tuple of Result and PartialResult of algo step // abstracts from input/output types // also defines how to get results and finalize -template< typename A, typename O1, typename O2 > +template struct DoubleIOManager { - typedef std::tuple< O1, O2 > result_type; + typedef std::tuple result_type; - static result_type getResult(A & algo) - { - return std::make_tuple(algo.getResult(), algo.getPartialResult()); - } - static bool needsFini() - { - return false; - } + static result_type getResult(A & algo) { return std::make_tuple(algo.getResult(), algo.getPartialResult()); } + static bool needsFini() { return false; } }; -////////////////////////////////////////////////////////////// -////////////////////////////////////////////////////////////// + ////////////////////////////////////////////////////////////// + ////////////////////////////////////////////////////////////// -#include "map_reduce_star.h" -#include "map_reduce_star_plus.h" -#include "map_reduce_tree.h" -#include "dist_custom.h" + #include "map_reduce_star.h" + #include "map_reduce_star_plus.h" + #include "map_reduce_tree.h" + #include "dist_custom.h" #endif // _DIST_ #endif // _HLAPI_DISTR_H_INCLUDED_ diff --git a/src/daal4py_version.h b/src/daal4py_version.h index bd377f4bf0..5beb15685e 100644 --- a/src/daal4py_version.h +++ b/src/daal4py_version.h @@ -18,7 +18,7 @@ #define _DAAL4PY_VERSION_INCLUDED_ #include #ifndef __INTEL_DAAL_STATUS__ - #define __INTEL_DAAL_STATUS__ "P" + #define __INTEL_DAAL_STATUS__ "P" #endif #endif // _DAAL4PY_VERSION_INCLUDED diff --git a/src/daal_compat.h b/src/daal_compat.h index 884253cc9e..79a4a5e209 100644 --- a/src/daal_compat.h +++ b/src/daal_compat.h @@ -24,17 +24,21 @@ // Some oneDAL versions seem broken, e.g. '2199' so we need to check that, too #if __INTEL_DAAL__ < 2019 || __INTEL_DAAL__ > 2100 -namespace daal { -namespace algorithms { -namespace optimization_solver { -namespace iterative_solver { - // BatchPtr typedef not existent in older oneDAL versions - typedef daal::services::SharedPtr BatchPtr; +namespace daal +{ +namespace algorithms +{ +namespace optimization_solver +{ +namespace iterative_solver +{ +// BatchPtr typedef not existent in older oneDAL versions +typedef daal::services::SharedPtr BatchPtr; -} -} -} -} +} // namespace iterative_solver +} // namespace optimization_solver +} // namespace algorithms +} // namespace daal #endif diff --git a/src/dist_custom.h b/src/dist_custom.h index 905b2d7abd..70c7df60b3 100644 --- a/src/dist_custom.h +++ b/src/dist_custom.h @@ -19,11 +19,12 @@ #include "daal4py_cpp.h" -namespace dist_custom { +namespace dist_custom +{ - template - class dist_custom; +template +class dist_custom; -} // namespace dist_custom { +} // namespace dist_custom #endif // _DIST_CUSTOM_INCLUDED_ diff --git a/src/dist_dbscan.h b/src/dist_dbscan.h index 386bdf1c33..7e468f0b31 100755 --- a/src/dist_dbscan.h +++ b/src/dist_dbscan.h @@ -25,540 +25,533 @@ using namespace daal; using namespace daal::algorithms; using namespace daal::data_management; -namespace dist_custom { +namespace dist_custom +{ -template< typename T1, typename T2 > +template class dist { public: -typedef std::vector ByteBuffer; + typedef std::vector ByteBuffer; -size_t serializeDAALObject(SerializationIface * pData, ByteBuffer & buffer) -{ - /* Create a data archive to serialize the numeric table */ - InputDataArchive dataArch; + size_t serializeDAALObject(SerializationIface * pData, ByteBuffer & buffer) + { + /* Create a data archive to serialize the numeric table */ + InputDataArchive dataArch; - /* Serialize the numeric table into the data archive */ - pData->serialize(dataArch); + /* Serialize the numeric table into the data archive */ + pData->serialize(dataArch); - /* Get the length of the serialized data in bytes */ - const size_t length = dataArch.getSizeOfArchive(); + /* Get the length of the serialized data in bytes */ + const size_t length = dataArch.getSizeOfArchive(); - /* Store the serialized data in an array */ - buffer.resize(length); - if (length) dataArch.copyArchiveToArray(&buffer[0], length); - return length; -} + /* Store the serialized data in an array */ + buffer.resize(length); + if (length) dataArch.copyArchiveToArray(&buffer[0], length); + return length; + } -SerializationIfacePtr deserializeDAALObject(daal::byte * buff, size_t length) -{ - /* Create a data archive to deserialize the object */ - OutputDataArchive dataArch(buff, length); + SerializationIfacePtr deserializeDAALObject(daal::byte * buff, size_t length) + { + /* Create a data archive to deserialize the object */ + OutputDataArchive dataArch(buff, length); - /* Deserialize the numeric table from the data archive */ - return dataArch.getAsSharedPtr(); -} + /* Deserialize the numeric table from the data archive */ + return dataArch.getAsSharedPtr(); + } -typedef T2 algorithmFPType; /* Algorithm floating-point type */ + typedef T2 algorithmFPType; /* Algorithm floating-point type */ -/* Algorithm parameters */ -T2 epsilon; -size_t minObservations; + /* Algorithm parameters */ + T2 epsilon; + size_t minObservations; -NumericTablePtr dataTable; + NumericTablePtr dataTable; -DataCollectionPtr partitionedData; -DataCollectionPtr partitionedPartialOrders; + DataCollectionPtr partitionedData; + DataCollectionPtr partitionedPartialOrders; -DataCollectionPtr partialSplits; -DataCollectionPtr partialBoundingBoxes; + DataCollectionPtr partialSplits; + DataCollectionPtr partialBoundingBoxes; -DataCollectionPtr haloData; -DataCollectionPtr haloDataIndices; -DataCollectionPtr haloBlocks; + DataCollectionPtr haloData; + DataCollectionPtr haloDataIndices; + DataCollectionPtr haloBlocks; -DataCollectionPtr queries; + DataCollectionPtr queries; -DataCollectionPtr assignmentQueries; + DataCollectionPtr assignmentQueries; -NumericTablePtr clusterStructure; -NumericTablePtr finishedFlag; -NumericTablePtr nClusters; -NumericTablePtr clusterOffset; -NumericTablePtr assignments; -NumericTablePtr totalNClusters; + NumericTablePtr clusterStructure; + NumericTablePtr finishedFlag; + NumericTablePtr nClusters; + NumericTablePtr clusterOffset; + NumericTablePtr assignments; + NumericTablePtr totalNClusters; -int rankId, comm_size; + int rankId, comm_size; -const int step2ResultBoundingBoxTag = 1; -const int step3ResultSplitTag = 2; -const int step4ResultPartitionedDataTag = 3; -const int step4ResultPartitionedPartialOrdersTag = 4; -const int step5ResultPartitionedHaloDataTag = 5; -const int step5ResultPartitionedHaloDataIndicesTag = 6; -const int step5ResultPartitionedHaloBlocksTag = 7; -const int step6ResultQueriesTag = 8; -const int step8ResultQueriesTag = 9; -const int step8ResultNClustersTag = 10; -const int resultFinishedFlagTag = 11; -const int step7ResultFinishedFlagTag = 12; -const int step9ResultNClustersTag = 13; -const int step9ResultClusterOffsetsTag = 14; -const int step10ResultQueriesTag = 15; -const int step11ResultQueriesTag = 16; -const int step12ResultAssignmentQueriesTag = 17; + const int step2ResultBoundingBoxTag = 1; + const int step3ResultSplitTag = 2; + const int step4ResultPartitionedDataTag = 3; + const int step4ResultPartitionedPartialOrdersTag = 4; + const int step5ResultPartitionedHaloDataTag = 5; + const int step5ResultPartitionedHaloDataIndicesTag = 6; + const int step5ResultPartitionedHaloBlocksTag = 7; + const int step6ResultQueriesTag = 8; + const int step8ResultQueriesTag = 9; + const int step8ResultNClustersTag = 10; + const int resultFinishedFlagTag = 11; + const int step7ResultFinishedFlagTag = 12; + const int step9ResultNClustersTag = 13; + const int step9ResultClusterOffsetsTag = 14; + const int step10ResultQueriesTag = 15; + const int step11ResultQueriesTag = 16; + const int step12ResultAssignmentQueriesTag = 17; -transceiver * tcvr; + transceiver * tcvr; -int main(const T1& input1) -{ - tcvr = get_transceiver(); - rankId = tcvr->me(); - comm_size = tcvr->nMembers(); + int main(const T1 & input1) + { + tcvr = get_transceiver(); + rankId = tcvr->me(); + comm_size = tcvr->nMembers(); - dataTable = input1; + dataTable = input1; - geometricPartitioning(); + geometricPartitioning(); - clustering(); + clustering(); - return 0; -} + return 0; + } -void geometricPartitioning() -{ - dbscan::Distributed step1(rankId, comm_size); - step1.input.set(dbscan::step1Data, dataTable); - step1.compute(); + void geometricPartitioning() + { + dbscan::Distributed step1(rankId, comm_size); + step1.input.set(dbscan::step1Data, dataTable); + step1.compute(); - partitionedData = DataCollectionPtr(new DataCollection()); - partitionedPartialOrders = DataCollectionPtr(new DataCollection()); + partitionedData = DataCollectionPtr(new DataCollection()); + partitionedPartialOrders = DataCollectionPtr(new DataCollection()); - partitionedData->push_back(dataTable); - partitionedPartialOrders->push_back(step1.getPartialResult()->get(dbscan::partialOrder)); + partitionedData->push_back(dataTable); + partitionedPartialOrders->push_back(step1.getPartialResult()->get(dbscan::partialOrder)); - size_t beginId = 0; - size_t endId = comm_size; + size_t beginId = 0; + size_t endId = comm_size; - while (true) - { - const size_t curNPartitions = endId - beginId; - if (curNPartitions == 1) + while (true) { - break; - } + const size_t curNPartitions = endId - beginId; + if (curNPartitions == 1) + { + break; + } - partialSplits = DataCollectionPtr(new DataCollection()); - partialBoundingBoxes = DataCollectionPtr(new DataCollection()); + partialSplits = DataCollectionPtr(new DataCollection()); + partialBoundingBoxes = DataCollectionPtr(new DataCollection()); - dbscan::Distributed step2(rankId - beginId, curNPartitions); - step2.input.set(dbscan::partialData, partitionedData); - step2.compute(); - NumericTablePtr curBoundingBox = step2.getPartialResult()->get(dbscan::boundingBox); + dbscan::Distributed step2(rankId - beginId, curNPartitions); + step2.input.set(dbscan::partialData, partitionedData); + step2.compute(); + NumericTablePtr curBoundingBox = step2.getPartialResult()->get(dbscan::boundingBox); - sendTableAllToAll(beginId, endId, rankId, step2ResultBoundingBoxTag, curBoundingBox, partialBoundingBoxes); + sendTableAllToAll(beginId, endId, rankId, step2ResultBoundingBoxTag, curBoundingBox, partialBoundingBoxes); - const size_t leftPartitions = curNPartitions / 2; - const size_t rightPartitions = curNPartitions - leftPartitions; + const size_t leftPartitions = curNPartitions / 2; + const size_t rightPartitions = curNPartitions - leftPartitions; - dbscan::Distributed step3(leftPartitions, rightPartitions); - step3.input.set(dbscan::partialData, partitionedData); - step3.input.set(dbscan::step3PartialBoundingBoxes, partialBoundingBoxes); - step3.compute(); - NumericTablePtr curSplit = step3.getPartialResult()->get(dbscan::split); + dbscan::Distributed step3(leftPartitions, rightPartitions); + step3.input.set(dbscan::partialData, partitionedData); + step3.input.set(dbscan::step3PartialBoundingBoxes, partialBoundingBoxes); + step3.compute(); + NumericTablePtr curSplit = step3.getPartialResult()->get(dbscan::split); - sendTableAllToAll(beginId, endId, rankId, step3ResultSplitTag, curSplit, partialSplits); + sendTableAllToAll(beginId, endId, rankId, step3ResultSplitTag, curSplit, partialSplits); - dbscan::Distributed step4(leftPartitions, rightPartitions); - step4.input.set(dbscan::partialData, partitionedData); - step4.input.set(dbscan::step4PartialOrders, partitionedPartialOrders); - step4.input.set(dbscan::step4PartialSplits, partialSplits); - step4.compute(); + dbscan::Distributed step4(leftPartitions, rightPartitions); + step4.input.set(dbscan::partialData, partitionedData); + step4.input.set(dbscan::step4PartialOrders, partitionedPartialOrders); + step4.input.set(dbscan::step4PartialSplits, partialSplits); + step4.compute(); - DataCollectionPtr curPartitionedData = step4.getPartialResult()->get(dbscan::partitionedData); - DataCollectionPtr curPartitionedPartialOrders = step4.getPartialResult()->get(dbscan::partitionedPartialOrders); + DataCollectionPtr curPartitionedData = step4.getPartialResult()->get(dbscan::partitionedData); + DataCollectionPtr curPartitionedPartialOrders = step4.getPartialResult()->get(dbscan::partitionedPartialOrders); - partitionedData = DataCollectionPtr(new DataCollection()); - partitionedPartialOrders = DataCollectionPtr(new DataCollection()); + partitionedData = DataCollectionPtr(new DataCollection()); + partitionedPartialOrders = DataCollectionPtr(new DataCollection()); - sendCollectionAllToAll(beginId, endId, rankId, step4ResultPartitionedDataTag, curPartitionedData, partitionedData); - sendCollectionAllToAll(beginId, endId, rankId, step4ResultPartitionedPartialOrdersTag, curPartitionedPartialOrders, partitionedPartialOrders); + sendCollectionAllToAll(beginId, endId, rankId, step4ResultPartitionedDataTag, curPartitionedData, partitionedData); + sendCollectionAllToAll(beginId, endId, rankId, step4ResultPartitionedPartialOrdersTag, curPartitionedPartialOrders, partitionedPartialOrders); - if (rankId < beginId + leftPartitions) - { - endId = beginId + leftPartitions; - } - else - { - beginId = beginId + leftPartitions; + if (rankId < beginId + leftPartitions) + { + endId = beginId + leftPartitions; + } + else + { + beginId = beginId + leftPartitions; + } } } -} -void clustering() -{ - partialBoundingBoxes = DataCollectionPtr(new DataCollection()); - haloData = DataCollectionPtr(new DataCollection()); - haloDataIndices = DataCollectionPtr(new DataCollection()); - haloBlocks = DataCollectionPtr(new DataCollection()); - - dbscan::Distributed step2(rankId, comm_size); - step2.input.set(dbscan::partialData, partitionedData); - step2.compute(); - NumericTablePtr curBoundingBox = step2.getPartialResult()->get(dbscan::boundingBox); - - sendTableAllToAll(0, comm_size, rankId, step2ResultBoundingBoxTag, curBoundingBox, partialBoundingBoxes, true /* preserveOrder */); - - dbscan::Distributed step5(rankId, comm_size, epsilon); - step5.input.set(dbscan::partialData, partitionedData); - step5.input.set(dbscan::step5PartialBoundingBoxes, partialBoundingBoxes); - step5.compute(); - DataCollectionPtr curHaloData = step5.getPartialResult()->get(dbscan::partitionedHaloData); - DataCollectionPtr curHaloDataIndices = step5.getPartialResult()->get(dbscan::partitionedHaloDataIndices); - DataCollectionPtr curHaloBlocks(new DataCollection()); - - for (size_t destId = 0; destId < curHaloData->size(); destId++) + void clustering() { - NumericTablePtr dataTable = services::staticPointerCast((*curHaloData)[destId]); - if (dataTable->getNumberOfRows() > 0) - { - curHaloBlocks->push_back(HomogenNumericTable::create(1, 1, NumericTableIface::doAllocate, static_cast(rankId))); - } - else - { - curHaloBlocks->push_back(NumericTablePtr()); - } - } - - sendCollectionAllToAll(0, comm_size, rankId, step5ResultPartitionedHaloDataTag, curHaloData, haloData); - sendCollectionAllToAll(0, comm_size, rankId, step5ResultPartitionedHaloDataIndicesTag, curHaloDataIndices, haloDataIndices); - sendCollectionAllToAll(0, comm_size, rankId, step5ResultPartitionedHaloBlocksTag, curHaloBlocks, haloBlocks); - - queries = DataCollectionPtr(new DataCollection()); - - dbscan::Distributed step6(rankId, comm_size, epsilon, minObservations); + partialBoundingBoxes = DataCollectionPtr(new DataCollection()); + haloData = DataCollectionPtr(new DataCollection()); + haloDataIndices = DataCollectionPtr(new DataCollection()); + haloBlocks = DataCollectionPtr(new DataCollection()); - step6.input.set(dbscan::partialData, partitionedData); - step6.input.set(dbscan::haloData, haloData); - step6.input.set(dbscan::haloDataIndices, haloDataIndices); - step6.input.set(dbscan::haloBlocks, haloBlocks); - step6.compute(); - clusterStructure = step6.getPartialResult()->get(dbscan::step6ClusterStructure); - finishedFlag = step6.getPartialResult()->get(dbscan::step6FinishedFlag); - nClusters = step6.getPartialResult()->get(dbscan::step6NClusters); + dbscan::Distributed step2(rankId, comm_size); + step2.input.set(dbscan::partialData, partitionedData); + step2.compute(); + NumericTablePtr curBoundingBox = step2.getPartialResult()->get(dbscan::boundingBox); - DataCollectionPtr curQueries = step6.getPartialResult()->get(dbscan::step6Queries); + sendTableAllToAll(0, comm_size, rankId, step2ResultBoundingBoxTag, curBoundingBox, partialBoundingBoxes, true /* preserveOrder */); - sendCollectionAllToAll(0, comm_size, rankId, step6ResultQueriesTag, curQueries, queries); + dbscan::Distributed step5(rankId, comm_size, epsilon); + step5.input.set(dbscan::partialData, partitionedData); + step5.input.set(dbscan::step5PartialBoundingBoxes, partialBoundingBoxes); + step5.compute(); + DataCollectionPtr curHaloData = step5.getPartialResult()->get(dbscan::partitionedHaloData); + DataCollectionPtr curHaloDataIndices = step5.getPartialResult()->get(dbscan::partitionedHaloDataIndices); + DataCollectionPtr curHaloBlocks(new DataCollection()); - while (computeFinishedFlag() == 0) - { - dbscan::Distributed step8(rankId, comm_size); - step8.input.set(dbscan::step8InputClusterStructure, clusterStructure); - step8.input.set(dbscan::step8InputNClusters, nClusters); - step8.input.set(dbscan::step8PartialQueries, queries); - step8.compute(); - - clusterStructure = step8.getPartialResult()->get(dbscan::step8ClusterStructure); - finishedFlag = step8.getPartialResult()->get(dbscan::step8FinishedFlag); - nClusters = step8.getPartialResult()->get(dbscan::step8NClusters); + for (size_t destId = 0; destId < curHaloData->size(); destId++) + { + NumericTablePtr dataTable = services::staticPointerCast((*curHaloData)[destId]); + if (dataTable->getNumberOfRows() > 0) + { + curHaloBlocks->push_back(HomogenNumericTable::create(1, 1, NumericTableIface::doAllocate, static_cast(rankId))); + } + else + { + curHaloBlocks->push_back(NumericTablePtr()); + } + } - DataCollectionPtr curQueries = step8.getPartialResult()->get(dbscan::step8Queries); + sendCollectionAllToAll(0, comm_size, rankId, step5ResultPartitionedHaloDataTag, curHaloData, haloData); + sendCollectionAllToAll(0, comm_size, rankId, step5ResultPartitionedHaloDataIndicesTag, curHaloDataIndices, haloDataIndices); + sendCollectionAllToAll(0, comm_size, rankId, step5ResultPartitionedHaloBlocksTag, curHaloBlocks, haloBlocks); queries = DataCollectionPtr(new DataCollection()); - sendCollectionAllToAll(0, comm_size, rankId, step8ResultQueriesTag, curQueries, queries); - } + dbscan::Distributed step6(rankId, comm_size, epsilon, minObservations); - if (rankId == 0) - { - DataCollectionPtr partialNClusters(new DataCollection()); - sendTableAllToMaster(0, comm_size, rankId, step8ResultNClustersTag, nClusters, partialNClusters); + step6.input.set(dbscan::partialData, partitionedData); + step6.input.set(dbscan::haloData, haloData); + step6.input.set(dbscan::haloDataIndices, haloDataIndices); + step6.input.set(dbscan::haloBlocks, haloBlocks); + step6.compute(); + clusterStructure = step6.getPartialResult()->get(dbscan::step6ClusterStructure); + finishedFlag = step6.getPartialResult()->get(dbscan::step6FinishedFlag); + nClusters = step6.getPartialResult()->get(dbscan::step6NClusters); - dbscan::Distributed step9; - step9.input.set(dbscan::partialNClusters, partialNClusters); - step9.compute(); - step9.finalizeCompute(); + DataCollectionPtr curQueries = step6.getPartialResult()->get(dbscan::step6Queries); - totalNClusters = step9.getResult()->get(dbscan::step9NClusters); - sendTableMasterToAll(0, comm_size, rankId, step9ResultNClustersTag, totalNClusters, totalNClusters); + sendCollectionAllToAll(0, comm_size, rankId, step6ResultQueriesTag, curQueries, queries); - DataCollectionPtr curClusterOffsets = step9.getPartialResult()->get(dbscan::clusterOffsets); - sendCollectionMasterToAll(0, comm_size, rankId, step9ResultClusterOffsetsTag, curClusterOffsets, clusterOffset); - } - else - { - DataCollectionPtr partialNClusters; - sendTableAllToMaster(0, comm_size, rankId, step8ResultNClustersTag, nClusters, partialNClusters); + while (computeFinishedFlag() == 0) + { + dbscan::Distributed step8(rankId, comm_size); + step8.input.set(dbscan::step8InputClusterStructure, clusterStructure); + step8.input.set(dbscan::step8InputNClusters, nClusters); + step8.input.set(dbscan::step8PartialQueries, queries); + step8.compute(); - sendTableMasterToAll(0, comm_size, rankId, step9ResultNClustersTag, totalNClusters, totalNClusters); + clusterStructure = step8.getPartialResult()->get(dbscan::step8ClusterStructure); + finishedFlag = step8.getPartialResult()->get(dbscan::step8FinishedFlag); + nClusters = step8.getPartialResult()->get(dbscan::step8NClusters); - DataCollectionPtr curClusterOffsets; - sendCollectionMasterToAll(0, comm_size, rankId, step9ResultClusterOffsetsTag, curClusterOffsets, clusterOffset); - } + DataCollectionPtr curQueries = step8.getPartialResult()->get(dbscan::step8Queries); - queries = DataCollectionPtr(new DataCollection()); + queries = DataCollectionPtr(new DataCollection()); - dbscan::Distributed step10(rankId, comm_size); - step10.input.set(dbscan::step10InputClusterStructure, clusterStructure); - step10.input.set(dbscan::step10ClusterOffset, clusterOffset); - step10.compute(); + sendCollectionAllToAll(0, comm_size, rankId, step8ResultQueriesTag, curQueries, queries); + } - clusterStructure = step10.getPartialResult()->get(dbscan::step10ClusterStructure); - finishedFlag = step10.getPartialResult()->get(dbscan::step10FinishedFlag); + if (rankId == 0) + { + DataCollectionPtr partialNClusters(new DataCollection()); + sendTableAllToMaster(0, comm_size, rankId, step8ResultNClustersTag, nClusters, partialNClusters); - curQueries = step10.getPartialResult()->get(dbscan::step10Queries); + dbscan::Distributed step9; + step9.input.set(dbscan::partialNClusters, partialNClusters); + step9.compute(); + step9.finalizeCompute(); - sendCollectionAllToAll(0, comm_size, rankId, step10ResultQueriesTag, curQueries, queries); + totalNClusters = step9.getResult()->get(dbscan::step9NClusters); + sendTableMasterToAll(0, comm_size, rankId, step9ResultNClustersTag, totalNClusters, totalNClusters); - while (computeFinishedFlag() == 0) - { - dbscan::Distributed step11(rankId, comm_size); - step11.input.set(dbscan::step11InputClusterStructure, clusterStructure); - step11.input.set(dbscan::step11PartialQueries, queries); - step11.compute(); + DataCollectionPtr curClusterOffsets = step9.getPartialResult()->get(dbscan::clusterOffsets); + sendCollectionMasterToAll(0, comm_size, rankId, step9ResultClusterOffsetsTag, curClusterOffsets, clusterOffset); + } + else + { + DataCollectionPtr partialNClusters; + sendTableAllToMaster(0, comm_size, rankId, step8ResultNClustersTag, nClusters, partialNClusters); - clusterStructure = step11.getPartialResult()->get(dbscan::step11ClusterStructure); - finishedFlag = step11.getPartialResult()->get(dbscan::step11FinishedFlag); + sendTableMasterToAll(0, comm_size, rankId, step9ResultNClustersTag, totalNClusters, totalNClusters); - DataCollectionPtr curQueries = step11.getPartialResult()->get(dbscan::step11Queries); + DataCollectionPtr curClusterOffsets; + sendCollectionMasterToAll(0, comm_size, rankId, step9ResultClusterOffsetsTag, curClusterOffsets, clusterOffset); + } queries = DataCollectionPtr(new DataCollection()); - sendCollectionAllToAll(0, comm_size, rankId, step11ResultQueriesTag, curQueries, queries); - } - assignmentQueries = DataCollectionPtr(new DataCollection()); + dbscan::Distributed step10(rankId, comm_size); + step10.input.set(dbscan::step10InputClusterStructure, clusterStructure); + step10.input.set(dbscan::step10ClusterOffset, clusterOffset); + step10.compute(); - dbscan::Distributed step12(rankId, comm_size); - step12.input.set(dbscan::step12InputClusterStructure, clusterStructure); - step12.input.set(dbscan::step12PartialOrders, partitionedPartialOrders); - step12.compute(); + clusterStructure = step10.getPartialResult()->get(dbscan::step10ClusterStructure); + finishedFlag = step10.getPartialResult()->get(dbscan::step10FinishedFlag); - DataCollectionPtr curAssignmentQueries = step12.getPartialResult()->get(dbscan::assignmentQueries); + curQueries = step10.getPartialResult()->get(dbscan::step10Queries); - sendCollectionAllToAll(0, comm_size, rankId, step12ResultAssignmentQueriesTag, curAssignmentQueries, assignmentQueries); + sendCollectionAllToAll(0, comm_size, rankId, step10ResultQueriesTag, curQueries, queries); - dbscan::Distributed step13; - step13.input.set(dbscan::partialAssignmentQueries, assignmentQueries); - step13.compute(); - step13.finalizeCompute(); + while (computeFinishedFlag() == 0) + { + dbscan::Distributed step11(rankId, comm_size); + step11.input.set(dbscan::step11InputClusterStructure, clusterStructure); + step11.input.set(dbscan::step11PartialQueries, queries); + step11.compute(); - assignments = step13.getResult()->get(dbscan::step13Assignments); -} + clusterStructure = step11.getPartialResult()->get(dbscan::step11ClusterStructure); + finishedFlag = step11.getPartialResult()->get(dbscan::step11FinishedFlag); -int computeFinishedFlag() -{ - if (rankId == 0) - { - DataCollectionPtr partialFinishedFlags(new DataCollection()); - sendTableAllToMaster(0, comm_size, rankId, resultFinishedFlagTag, finishedFlag, partialFinishedFlags); + DataCollectionPtr curQueries = step11.getPartialResult()->get(dbscan::step11Queries); - dbscan::Distributed step7; - step7.input.set(dbscan::partialFinishedFlags, partialFinishedFlags); - step7.compute(); - finishedFlag = step7.getPartialResult()->get(dbscan::finishedFlag); + queries = DataCollectionPtr(new DataCollection()); + sendCollectionAllToAll(0, comm_size, rankId, step11ResultQueriesTag, curQueries, queries); + } - sendTableMasterToAll(0, comm_size, rankId, step7ResultFinishedFlagTag, finishedFlag, finishedFlag); + assignmentQueries = DataCollectionPtr(new DataCollection()); - int finishedFlagValue = finishedFlag->getValue(0, 0); - return finishedFlagValue; - } - else - { - DataCollectionPtr partialFinishedFlags; - sendTableAllToMaster(0, comm_size, rankId, resultFinishedFlagTag, finishedFlag, partialFinishedFlags); + dbscan::Distributed step12(rankId, comm_size); + step12.input.set(dbscan::step12InputClusterStructure, clusterStructure); + step12.input.set(dbscan::step12PartialOrders, partitionedPartialOrders); + step12.compute(); - sendTableMasterToAll(0, comm_size, rankId, step7ResultFinishedFlagTag, finishedFlag, finishedFlag); + DataCollectionPtr curAssignmentQueries = step12.getPartialResult()->get(dbscan::assignmentQueries); - int finishedFlagValue = finishedFlag->getValue(0, 0); - return finishedFlagValue; - } -} + sendCollectionAllToAll(0, comm_size, rankId, step12ResultAssignmentQueriesTag, curAssignmentQueries, assignmentQueries); -void sendCollectionAllToAll(size_t beginId, size_t endId, size_t curId, int tag, DataCollectionPtr & collection, DataCollectionPtr & destCollection) -{ - size_t nIds = endId - beginId; - size_t nShifts = 1; - while (nShifts < nIds) nShifts <<= 1; + dbscan::Distributed step13; + step13.input.set(dbscan::partialAssignmentQueries, assignmentQueries); + step13.compute(); + step13.finalizeCompute(); + + assignments = step13.getResult()->get(dbscan::step13Assignments); + } - for (size_t shift = 0; shift < nShifts; shift++) + int computeFinishedFlag() { - size_t partnerId = ((curId - beginId) ^ shift) + beginId; - if (partnerId < beginId || partnerId >= endId) + if (rankId == 0) { - continue; - } + DataCollectionPtr partialFinishedFlags(new DataCollection()); + sendTableAllToMaster(0, comm_size, rankId, resultFinishedFlagTag, finishedFlag, partialFinishedFlags); - NumericTablePtr table = NumericTable::cast((*collection)[partnerId - beginId]); - NumericTablePtr partnerTable; + dbscan::Distributed step7; + step7.input.set(dbscan::partialFinishedFlags, partialFinishedFlags); + step7.compute(); + finishedFlag = step7.getPartialResult()->get(dbscan::finishedFlag); - if (partnerId == curId) - { - partnerTable = table; + sendTableMasterToAll(0, comm_size, rankId, step7ResultFinishedFlagTag, finishedFlag, finishedFlag); + + int finishedFlagValue = finishedFlag->getValue(0, 0); + return finishedFlagValue; } else { - if (curId < partnerId) - { - sendTable(table, partnerId, tag); - recvTable(partnerTable, partnerId, tag); - } - else - { - recvTable(partnerTable, partnerId, tag); - sendTable(table, partnerId, tag); - } - } + DataCollectionPtr partialFinishedFlags; + sendTableAllToMaster(0, comm_size, rankId, resultFinishedFlagTag, finishedFlag, partialFinishedFlags); - if (partnerTable.get() && partnerTable->getNumberOfRows() > 0) - { - destCollection->push_back(partnerTable); + sendTableMasterToAll(0, comm_size, rankId, step7ResultFinishedFlagTag, finishedFlag, finishedFlag); + + int finishedFlagValue = finishedFlag->getValue(0, 0); + return finishedFlagValue; } } -} -void sendTableAllToAll(size_t beginId, size_t endId, size_t curId, int tag, NumericTablePtr & table, DataCollectionPtr & destCollection, - bool preserveOrder = false) -{ - size_t nIds = endId - beginId; - size_t nShifts = 1; - while (nShifts < nIds) nShifts <<= 1; - - if (preserveOrder) + void sendCollectionAllToAll(size_t beginId, size_t endId, size_t curId, int tag, DataCollectionPtr & collection, DataCollectionPtr & destCollection) { - destCollection = DataCollectionPtr(new DataCollection(nIds)); - } + size_t nIds = endId - beginId; + size_t nShifts = 1; + while (nShifts < nIds) nShifts <<= 1; - for (size_t shift = 0; shift < nShifts; shift++) - { - size_t partnerId = ((curId - beginId) ^ shift) + beginId; - if (partnerId < beginId || partnerId >= endId) + for (size_t shift = 0; shift < nShifts; shift++) { - continue; - } + size_t partnerId = ((curId - beginId) ^ shift) + beginId; + if (partnerId < beginId || partnerId >= endId) + { + continue; + } - NumericTablePtr partnerTable; + NumericTablePtr table = NumericTable::cast((*collection)[partnerId - beginId]); + NumericTablePtr partnerTable; - if (partnerId == curId) - { - partnerTable = table; - } - else - { - if (curId < partnerId) + if (partnerId == curId) { - sendTable(table, partnerId, tag); - recvTable(partnerTable, partnerId, tag); + partnerTable = table; } else { - recvTable(partnerTable, partnerId, tag); - sendTable(table, partnerId, tag); + if (curId < partnerId) + { + sendTable(table, partnerId, tag); + recvTable(partnerTable, partnerId, tag); + } + else + { + recvTable(partnerTable, partnerId, tag); + sendTable(table, partnerId, tag); + } } - } - if (partnerTable.get() && partnerTable->getNumberOfRows() > 0) - { - if (preserveOrder) - { - (*destCollection)[partnerId - beginId] = partnerTable; - } - else + if (partnerTable.get() && partnerTable->getNumberOfRows() > 0) { destCollection->push_back(partnerTable); } } } -} -void sendTableAllToMaster(size_t beginId, size_t endId, size_t rankId, int tag, NumericTablePtr & table, DataCollectionPtr & destCollection) -{ - if (rankId == beginId) + void sendTableAllToAll(size_t beginId, size_t endId, size_t curId, int tag, NumericTablePtr & table, DataCollectionPtr & destCollection, bool preserveOrder = false) { - for (size_t partnerId = beginId; partnerId < endId; partnerId++) + size_t nIds = endId - beginId; + size_t nShifts = 1; + while (nShifts < nIds) nShifts <<= 1; + + if (preserveOrder) { + destCollection = DataCollectionPtr(new DataCollection(nIds)); + } + + for (size_t shift = 0; shift < nShifts; shift++) + { + size_t partnerId = ((curId - beginId) ^ shift) + beginId; + if (partnerId < beginId || partnerId >= endId) + { + continue; + } + NumericTablePtr partnerTable; - if (partnerId == rankId) + + if (partnerId == curId) { partnerTable = table; } else { - recvTable(partnerTable, partnerId, tag); + if (curId < partnerId) + { + sendTable(table, partnerId, tag); + recvTable(partnerTable, partnerId, tag); + } + else + { + recvTable(partnerTable, partnerId, tag); + sendTable(table, partnerId, tag); + } } if (partnerTable.get() && partnerTable->getNumberOfRows() > 0) { - destCollection->push_back(partnerTable); + if (preserveOrder) + { + (*destCollection)[partnerId - beginId] = partnerTable; + } + else + { + destCollection->push_back(partnerTable); + } } } } - else - { - sendTable(table, beginId, tag); - } -} -void sendCollectionMasterToAll(size_t beginId, size_t endId, size_t rankId, int tag, DataCollectionPtr & collection, NumericTablePtr & destTable) -{ - if (rankId == beginId) + void sendTableAllToMaster(size_t beginId, size_t endId, size_t rankId, int tag, NumericTablePtr & table, DataCollectionPtr & destCollection) { - for (size_t partnerId = beginId; partnerId < endId; partnerId++) + if (rankId == beginId) { - NumericTablePtr table = NumericTable::cast((*collection)[partnerId - beginId]); - if (partnerId == rankId) + for (size_t partnerId = beginId; partnerId < endId; partnerId++) { - destTable = table; - } - else - { - sendTable(table, partnerId, tag); + NumericTablePtr partnerTable; + if (partnerId == rankId) + { + partnerTable = table; + } + else + { + recvTable(partnerTable, partnerId, tag); + } + + if (partnerTable.get() && partnerTable->getNumberOfRows() > 0) + { + destCollection->push_back(partnerTable); + } } } + else + { + sendTable(table, beginId, tag); + } } - else - { - recvTable(destTable, beginId, tag); - } -} -void sendTableMasterToAll(size_t beginId, size_t endId, size_t rankId, int tag, NumericTablePtr & table, NumericTablePtr & destTable) -{ - if (rankId == beginId) + void sendCollectionMasterToAll(size_t beginId, size_t endId, size_t rankId, int tag, DataCollectionPtr & collection, NumericTablePtr & destTable) { - for (size_t partnerId = beginId; partnerId < endId; partnerId++) + if (rankId == beginId) { - if (partnerId == rankId) + for (size_t partnerId = beginId; partnerId < endId; partnerId++) { - destTable = table; - } - else - { - sendTable(table, partnerId, tag); + NumericTablePtr table = NumericTable::cast((*collection)[partnerId - beginId]); + if (partnerId == rankId) + { + destTable = table; + } + else + { + sendTable(table, partnerId, tag); + } } } + else + { + recvTable(destTable, beginId, tag); + } } - else + + void sendTableMasterToAll(size_t beginId, size_t endId, size_t rankId, int tag, NumericTablePtr & table, NumericTablePtr & destTable) { - recvTable(destTable, beginId, tag); + if (rankId == beginId) + { + for (size_t partnerId = beginId; partnerId < endId; partnerId++) + { + if (partnerId == rankId) + { + destTable = table; + } + else + { + sendTable(table, partnerId, tag); + } + } + } + else + { + recvTable(destTable, beginId, tag); + } } -} -void sendTable(NumericTablePtr & table, int recpnt, int tag) -{ - tcvr->send(table, recpnt, tag * 2); -} + void sendTable(NumericTablePtr & table, int recpnt, int tag) { tcvr->send(table, recpnt, tag * 2); } -void recvTable(NumericTablePtr & table, int sender, int tag) -{ - table = tcvr->recv(sender, tag * 2); -} + void recvTable(NumericTablePtr & table, int sender, int tag) { table = tcvr->recv(sender, tag * 2); } }; // oneDAL Distributed algos do not return a proper result (like batch), we need to create one -template< typename fptype, daal::algorithms::dbscan::Method method > -typename dbscan_manager::iomb_type::result_type -make_result(const daal::data_management::NumericTablePtr & assignments, const daal::data_management::NumericTablePtr & nClusters) +template +typename dbscan_manager::iomb_type::result_type make_result(const daal::data_management::NumericTablePtr & assignments, const daal::data_management::NumericTablePtr & nClusters) { typename dbscan_manager::iomb_type::result_type res(new typename dbscan_manager::iomb_type::result_type::ElementType); res->set(daal::algorithms::dbscan::assignments, daal::data_management::convertToHomogen(*assignments.get())); @@ -566,31 +559,29 @@ make_result(const daal::data_management::NumericTablePtr & assignments, const da return res; } -template -class dist_custom< dbscan_manager< fptype, method > > +template +class dist_custom > { public: - typedef dbscan_manager< fptype, method > Algo; + typedef dbscan_manager Algo; - template - static typename Algo::iomb_type::result_type - _compute(Algo & algo, const T1& input1) + template + static typename Algo::iomb_type::result_type _compute(Algo & algo, const T1 & input1) { dist d; - d.epsilon = algo._epsilon; + d.epsilon = algo._epsilon; d.minObservations = algo._minObservations; d.main(input1); return make_result(d.assignments, d.totalNClusters); } - template - static typename Algo::iomb_type::result_type - compute(Algo & algo, const Ts& ... inputs) + template + static typename Algo::iomb_type::result_type compute(Algo & algo, const Ts &... inputs) { return _compute(algo, get_table(inputs)...); } }; -} // namespace dist_custom { +} // namespace dist_custom #endif // _DIST_DBSCAN_INCLUDED_ diff --git a/src/dist_kmeans.h b/src/dist_kmeans.h index cb371ab119..651e12a6b6 100644 --- a/src/dist_kmeans.h +++ b/src/dist_kmeans.h @@ -21,13 +21,14 @@ #include "dist_custom.h" #include "map_reduce_tree.h" -namespace dist_custom { +namespace dist_custom +{ -template -class dist_custom< kmeans_manager< fptype, method > > +template +class dist_custom > { public: - typedef kmeans_manager< fptype, method > Algo; + typedef kmeans_manager Algo; /* We basically iterate over a map_reduce_star. @@ -36,34 +37,35 @@ class dist_custom< kmeans_manager< fptype, method > > except for first iter we need to bcast centroids. root detects convergence/end of iteration and bcasts to others */ - template - typename Algo::iomstep2Master__final_type::result_type - static map_reduce(Algo & algo, const T1& input1, const T2& input2) + template + typename Algo::iomstep2Master__final_type::result_type static map_reduce(Algo & algo, const T1 & input1, const T2 & input2) { - auto tcvr = get_transceiver(); + auto tcvr = get_transceiver(); T2 centroids = input2; - bool done = false; + bool done = false; typename Algo::iomstep2Master__final_type::result_type fres; - size_t iter = 0; - double goal = std::numeric_limits::max(); - double accuracyThreshold = use_default(algo._accuracyThreshold) - ? typename Algo::algob_type::ParameterType(algo._nClusters, algo._maxIterations).accuracyThreshold - : algo._accuracyThreshold; - do { - if(iter) tcvr->bcast(centroids); + size_t iter = 0; + double goal = std::numeric_limits::max(); + double accuracyThreshold = use_default(algo._accuracyThreshold) ? typename Algo::algob_type::ParameterType(algo._nClusters, algo._maxIterations).accuracyThreshold : algo._accuracyThreshold; + do + { + if (iter) tcvr->bcast(centroids); ++iter; auto s1_result = algo.run_step1Local(input1, centroids); // reduce all partial results auto pres = map_reduce_tree::map_reduce_tree::reduce(algo, s1_result); // finalize and check convergence/end of iteration - if(tcvr->me() == 0) { - fres = algo.run_step2Master__final(std::vector< daal::algorithms::kmeans::PartialResultPtr >(1, pres)); + if (tcvr->me() == 0) + { + fres = algo.run_step2Master__final(std::vector(1, pres)); // now check if we convered/reached max_iter - if(iter < algo._maxIterations) { + if (iter < algo._maxIterations) + { double new_goal = fres->get(daal::algorithms::kmeans::objectiveFunction)->daal::data_management::NumericTable::template getValue(0, 0); - if(std::abs(goal - new_goal) > accuracyThreshold) { + if (std::abs(goal - new_goal) > accuracyThreshold) + { centroids = fres->get(daal::algorithms::kmeans::centroids); - goal = new_goal; + goal = new_goal; tcvr->bcast(done); continue; } @@ -71,28 +73,24 @@ class dist_custom< kmeans_manager< fptype, method > > // when we get here we either reached maxIter or desired accuracy done = true; // we have to provide the number of iterations in result - daal::data_management::NumericTablePtr nittab( - new daal::data_management::HomogenNumericTable(1, - 1, - daal::data_management::NumericTable::doAllocate, static_cast(iter))); + daal::data_management::NumericTablePtr nittab(new daal::data_management::HomogenNumericTable(1, 1, daal::data_management::NumericTable::doAllocate, static_cast(iter))); fres->set(daal::algorithms::kmeans::nIterations, nittab); } // root gets here if done, other ranks always tcvr->bcast(done); - } while(done == false); + } while (done == false); // bcast final result tcvr->bcast(fres); return fres; } - template - static typename Algo::iomstep2Master__final_type::result_type - compute(Algo & algo, const Ts& ... inputs) + template + static typename Algo::iomstep2Master__final_type::result_type compute(Algo & algo, const Ts &... inputs) { return map_reduce(algo, get_table(inputs)...); } }; -} // namespace dist_kmeans { +} // namespace dist_custom #endif // _DIST_KMEANS_INCLUDED_ diff --git a/src/dist_kmeans_init.h b/src/dist_kmeans_init.h index 10c50ccec8..b3c703143e 100644 --- a/src/dist_kmeans_init.h +++ b/src/dist_kmeans_init.h @@ -20,89 +20,81 @@ #include "dist_custom.h" #include "map_reduce_star.h" -namespace dist_custom { - - // unsupported methods - template< typename fptype, daal::algorithms::kmeans::init::Method method > - class dist_custom< kmeans_init_manager< fptype, method > > - { - public: - typedef kmeans_init_manager< fptype, method > Algo; - - template - static typename Algo::iomb_type::result_type - compute(Algo & algo, const Ts& ... inputs) - { - std::cerr << "kmeans-init: selected method not supported yet; returning empty centroids.\n"; - return typename Algo::iomb_type::result_type(); - } - }; - - - // oneDAL kmeans_init Distributed algos do not return a proper result (like batch), we need to create one - template< typename fptype, daal::algorithms::kmeans::init::Method method > - typename kmeans_init_manager::iomb_type::result_type - mk_kmi_result(const daal::data_management::NumericTablePtr & centroids) +namespace dist_custom +{ + +// unsupported methods +template +class dist_custom > +{ +public: + typedef kmeans_init_manager Algo; + + template + static typename Algo::iomb_type::result_type compute(Algo & algo, const Ts &... inputs) { - typename kmeans_init_manager::iomb_type::result_type res(new typename kmeans_init_manager::iomb_type::result_type::ElementType); - res->set(daal::algorithms::kmeans::init::centroids, daal::data_management::convertToHomogen(*centroids.get())); - return res; + std::cerr << "kmeans-init: selected method not supported yet; returning empty centroids.\n"; + return typename Algo::iomb_type::result_type(); } - - - // kmi using simple map_reduce_star (random and deterministic) - template< typename fptype, daal::algorithms::kmeans::init::Method method > - class kmi_map_reduce +}; + +// oneDAL kmeans_init Distributed algos do not return a proper result (like batch), we need to create one +template +typename kmeans_init_manager::iomb_type::result_type mk_kmi_result(const daal::data_management::NumericTablePtr & centroids) +{ + typename kmeans_init_manager::iomb_type::result_type res(new typename kmeans_init_manager::iomb_type::result_type::ElementType); + res->set(daal::algorithms::kmeans::init::centroids, daal::data_management::convertToHomogen(*centroids.get())); + return res; +} + +// kmi using simple map_reduce_star (random and deterministic) +template +class kmi_map_reduce +{ +public: + typedef kmeans_init_manager Algo; + + typename Algo::iomb_type::result_type static map_reduce(Algo & algo, const daal::data_management::NumericTablePtr input) { - public: - typedef kmeans_init_manager< fptype, method > Algo; + auto tcvr = get_transceiver(); - typename Algo::iomb_type::result_type - static map_reduce(Algo & algo, const daal::data_management::NumericTablePtr input) - { - auto tcvr = get_transceiver(); + size_t tot_rows = input->getNumberOfRows(); + size_t start_row = tot_rows; + // first determine total number of rows + tcvr->reduce_all(&tot_rows, 1, transceiver_iface::OP_SUM); + // determine start of my chunk + tcvr->reduce_exscan(&start_row, 1, transceiver_iface::OP_SUM); + if (tcvr->me() == 0) start_row = 0; - size_t tot_rows = input->getNumberOfRows(); - size_t start_row = tot_rows; - // first determine total number of rows - tcvr->reduce_all(&tot_rows, 1, transceiver_iface::OP_SUM); - // determine start of my chunk - tcvr->reduce_exscan(&start_row, 1, transceiver_iface::OP_SUM); - if(tcvr->me()==0) start_row = 0; - - auto res = map_reduce_star::map_reduce_star::map_reduce(algo, input, tot_rows, start_row); - - return mk_kmi_result(res); - } - - template - static typename Algo::iomb_type::result_type - compute(Algo & algo, const Ts& ... inputs) - { - return map_reduce(algo, get_table(inputs)...); - } - }; - - // specialize dist_custom for kemans_init - template< typename fptype > - class dist_custom< kmeans_init_manager< fptype, daal::algorithms::kmeans::init::randomDense > > - : public kmi_map_reduce - {}; - - // specialize dist_custom for kemans_init - template< typename fptype > - class dist_custom< kmeans_init_manager< fptype, daal::algorithms::kmeans::init::deterministicDense > > - : public kmi_map_reduce - {}; + auto res = map_reduce_star::map_reduce_star::map_reduce(algo, input, tot_rows, start_row); + return mk_kmi_result(res); + } - // specialize dist_custom for kemans_init - template< typename fptype > - class dist_custom< kmeans_init_manager< fptype, daal::algorithms::kmeans::init::plusPlusDense > > + template + static typename Algo::iomb_type::result_type compute(Algo & algo, const Ts &... inputs) { - public: - typedef kmeans_init_manager< fptype, daal::algorithms::kmeans::init::plusPlusDense > Algo; - /* + return map_reduce(algo, get_table(inputs)...); + } +}; + +// specialize dist_custom for kemans_init +template +class dist_custom > : public kmi_map_reduce +{}; + +// specialize dist_custom for kemans_init +template +class dist_custom > : public kmi_map_reduce +{}; + +// specialize dist_custom for kemans_init +template +class dist_custom > +{ +public: + typedef kmeans_init_manager Algo; + /* step1 is a pre-processing step done before the iteration. The iteration identifieds one centroids after the other. step4 is the equivalent of step1, but within the iteration. @@ -113,98 +105,107 @@ namespace dist_custom { step2 results get gathered on root and processed in step3 step3 result is sent to one rank for executing step4 */ - static typename Algo::iomb_type::result_type - kmi(Algo & algo, const daal::data_management::NumericTablePtr input) + static typename Algo::iomb_type::result_type kmi(Algo & algo, const daal::data_management::NumericTablePtr input) + { + auto tcvr = get_transceiver(); + int rank = tcvr->me(); + int nRanks = tcvr->nMembers(); + + // first determine total number of rows + size_t tot_rows = input->getNumberOfRows(); + size_t start_row = tot_rows; + // first determine total number of rows + tcvr->reduce_all(&tot_rows, 1, transceiver_iface::OP_SUM); + // determine start of my chunk + tcvr->reduce_exscan(&start_row, 1, transceiver_iface::OP_SUM); + if (rank == 0) start_row = 0; + + /* Internal data to be stored on the local nodes */ + daal::data_management::DataCollectionPtr localNodeData; + /* Numeric table to collect the results */ + daal::data_management::RowMergedNumericTablePtr pCentroids(new daal::data_management::RowMergedNumericTable()); + // First step on each rank (output var will be used for output of step4 as well) + auto step14Out = algo.run_step1Local(input, tot_rows, start_row)->get(daal::algorithms::kmeans::init::partialCentroids); + // Only one rank actually computes centroids, we need to identify rank and bcast centroids to all others + int data_rank = not_empty(step14Out) ? rank : -1; + tcvr->reduce_all(&data_rank, 1, transceiver_iface::OP_MAX); + tcvr->bcast(step14Out, data_rank); + auto step2In = step14Out; + + pCentroids->addNumericTable(step2In); + + for (size_t iCenter = 1; iCenter < algo._nClusters; ++iCenter) { - auto tcvr = get_transceiver(); - int rank = tcvr->me(); - int nRanks = tcvr->nMembers(); - - // first determine total number of rows - size_t tot_rows = input->getNumberOfRows(); - size_t start_row = tot_rows; - // first determine total number of rows - tcvr->reduce_all(&tot_rows, 1, transceiver_iface::OP_SUM); - // determine start of my chunk - tcvr->reduce_exscan(&start_row, 1, transceiver_iface::OP_SUM); - if(rank==0) start_row = 0; - - /* Internal data to be stored on the local nodes */ - daal::data_management::DataCollectionPtr localNodeData; - /* Numeric table to collect the results */ - daal::data_management::RowMergedNumericTablePtr pCentroids(new daal::data_management::RowMergedNumericTable()); - // First step on each rank (output var will be used for output of step4 as well) - auto step14Out = algo.run_step1Local(input, tot_rows, start_row)->get(daal::algorithms::kmeans::init::partialCentroids); - // Only one rank actually computes centroids, we need to identify rank and bcast centroids to all others - int data_rank = not_empty(step14Out) ? rank : -1; - tcvr->reduce_all(&data_rank, 1, transceiver_iface::OP_MAX); - tcvr->bcast(step14Out, data_rank); - auto step2In = step14Out; - - pCentroids->addNumericTable(step2In); - - for(size_t iCenter = 1; iCenter < algo._nClusters; ++iCenter) { - // run step2 on each rank - auto s2res = algo.run_step2Local(input, localNodeData, step2In, false); - if(iCenter==1) localNodeData = s2res->get(daal::algorithms::kmeans::init::internalResult); - auto s2Out = s2res->get(daal::algorithms::kmeans::init::outputOfStep2ForStep3); - // and gather result on root - auto s3In = tcvr->gather(s2Out); - const int S34TAG = 3003; - // The input for s4 will be stored in s4In - daal::data_management::NumericTablePtr s4In; - // run step3 on root - // step3's output provides input for only one rank, this rank needs to be identified to run step4 - if(rank == 0) { - auto step3Output = algo.run_step3Master(s3In); - for(int i=0; iget(daal::algorithms::kmeans::init::outputOfStep3ForStep4, i)) { - data_rank = i; - break; - } - } - tcvr->bcast(data_rank, 0); - if(data_rank) { - tcvr->send(step3Output->get(daal::algorithms::kmeans::init::outputOfStep3ForStep4, data_rank), data_rank, S34TAG); - } else { - s4In = step3Output->get(daal::algorithms::kmeans::init::outputOfStep3ForStep4, 0); - } - } else { // non-roots get notified about who will do step 4 with output from step3 - tcvr->bcast(data_rank, 0); - if(rank == data_rank) { - s4In = tcvr->recv(0, S34TAG); + // run step2 on each rank + auto s2res = algo.run_step2Local(input, localNodeData, step2In, false); + if (iCenter == 1) localNodeData = s2res->get(daal::algorithms::kmeans::init::internalResult); + auto s2Out = s2res->get(daal::algorithms::kmeans::init::outputOfStep2ForStep3); + // and gather result on root + auto s3In = tcvr->gather(s2Out); + const int S34TAG = 3003; + // The input for s4 will be stored in s4In + daal::data_management::NumericTablePtr s4In; + // run step3 on root + // step3's output provides input for only one rank, this rank needs to be identified to run step4 + if (rank == 0) + { + auto step3Output = algo.run_step3Master(s3In); + for (int i = 0; i < nRanks; ++i) + { + if (step3Output->get(daal::algorithms::kmeans::init::outputOfStep3ForStep4, i)) + { + data_rank = i; + break; } } - // only one rank actually executes step4 - if(rank == data_rank) { - // run step4 on responsible rank, result will feed into step2 of next iteration - step14Out = algo.run_step4Local(input, localNodeData, s4In); + tcvr->bcast(data_rank, 0); + if (data_rank) + { + tcvr->send(step3Output->get(daal::algorithms::kmeans::init::outputOfStep3ForStep4, data_rank), data_rank, S34TAG); + } + else + { + s4In = step3Output->get(daal::algorithms::kmeans::init::outputOfStep3ForStep4, 0); } - // similar to output of step1, output of step4 gets bcasted to all ranks and fed into step2 of next iteration - tcvr->bcast(step14Out, data_rank); - step2In = step14Out; - pCentroids->addNumericTable(step2In); } - - // Now create result object, set centroids and return - return mk_kmi_result(pCentroids); + else + { // non-roots get notified about who will do step 4 with output from step3 + tcvr->bcast(data_rank, 0); + if (rank == data_rank) + { + s4In = tcvr->recv(0, S34TAG); + } + } + // only one rank actually executes step4 + if (rank == data_rank) + { + // run step4 on responsible rank, result will feed into step2 of next iteration + step14Out = algo.run_step4Local(input, localNodeData, s4In); + } + // similar to output of step1, output of step4 gets bcasted to all ranks and fed into step2 of next iteration + tcvr->bcast(step14Out, data_rank); + step2In = step14Out; + pCentroids->addNumericTable(step2In); } - template - static typename Algo::iomb_type::result_type - compute(Algo & algo, Ts& ... inputs) - { - return kmi(algo, get_table(inputs)...); - } - }; + // Now create result object, set centroids and return + return mk_kmi_result(pCentroids); + } - // specialize dist_custom for kemans_init - template< typename fptype > - class dist_custom< kmeans_init_manager< fptype, daal::algorithms::kmeans::init::parallelPlusDense > > + template + static typename Algo::iomb_type::result_type compute(Algo & algo, Ts &... inputs) { - public: - typedef kmeans_init_manager< fptype, daal::algorithms::kmeans::init::parallelPlusDense > Algo; - /* + return kmi(algo, get_table(inputs)...); + } +}; + +// specialize dist_custom for kemans_init +template +class dist_custom > +{ +public: + typedef kmeans_init_manager Algo; + /* step1 provides initial input for step2, inside the loop step4 produces the input for step2. We have to keep input for step2 because it will also be used as input for final step5. Now we iterate/loop for nRounds: @@ -219,120 +220,128 @@ namespace dist_custom { - and finally select the initial centroids in step5 on root The resulting centroids are broadcasted to all processes. */ - static typename Algo::iomb_type::result_type - kmi(Algo & algo, const daal::data_management::NumericTablePtr input) + static typename Algo::iomb_type::result_type kmi(Algo & algo, const daal::data_management::NumericTablePtr input) + { + auto tcvr = get_transceiver(); + int rank = tcvr->me(); + int nRanks = tcvr->nMembers(); + + // first determine total number of rows + size_t tot_rows = input->getNumberOfRows(); + size_t start_row = tot_rows; + // first determine total number of rows + tcvr->reduce_all(&tot_rows, 1, transceiver_iface::OP_SUM); + // determine start of my chunk + tcvr->reduce_exscan(&start_row, 1, transceiver_iface::OP_SUM); + if (rank == 0) start_row = 0; + + // Internal data to be stored on the local nodes + daal::data_management::DataCollectionPtr localNodeData; + // First step on each rank (output var will be used for output of step4 as well) + auto step14Out = algo.run_step1Local(input, tot_rows, start_row)->get(daal::algorithms::kmeans::init::partialCentroids); + // Only one rank actually computes centroids, we need to identify rank and bcast centroids to all others + int data_rank = not_empty(step14Out) ? rank : -1; + tcvr->reduce_all(&data_rank, 1, transceiver_iface::OP_MAX); + tcvr->bcast(step14Out, data_rank); + auto step2In = step14Out; + + // default value of nRounds used by all steps + const size_t nRounds = daal::algorithms::kmeans::init::Parameter(algo._nClusters).nRounds; + + // vector with results of step2 for input into step5 + std::vector s2InForStep5; + if (rank == 0) { - auto tcvr = get_transceiver(); - int rank = tcvr->me(); - int nRanks = tcvr->nMembers(); - - // first determine total number of rows - size_t tot_rows = input->getNumberOfRows(); - size_t start_row = tot_rows; - // first determine total number of rows - tcvr->reduce_all(&tot_rows, 1, transceiver_iface::OP_SUM); - // determine start of my chunk - tcvr->reduce_exscan(&start_row, 1, transceiver_iface::OP_SUM); - if(rank==0) start_row = 0; - - // Internal data to be stored on the local nodes - daal::data_management::DataCollectionPtr localNodeData; - // First step on each rank (output var will be used for output of step4 as well) - auto step14Out = algo.run_step1Local(input, tot_rows, start_row)->get(daal::algorithms::kmeans::init::partialCentroids); - // Only one rank actually computes centroids, we need to identify rank and bcast centroids to all others - int data_rank = not_empty(step14Out) ? rank : -1; - tcvr->reduce_all(&data_rank, 1, transceiver_iface::OP_MAX); - tcvr->bcast(step14Out, data_rank); - auto step2In = step14Out; - - // default value of nRounds used by all steps - const size_t nRounds = daal::algorithms::kmeans::init::Parameter(algo._nClusters).nRounds; + s2InForStep5.push_back(step2In); + } - // vector with results of step2 for input into step5 - std::vector s2InForStep5; - if(rank == 0) { - s2InForStep5.push_back(step2In); - } + // Here we will store the output of step3 for step5 + daal::services::interface1::SharedPtr outputOfStep3ForStep5; - // Here we will store the output of step3 for step5 - daal::services::interface1::SharedPtr outputOfStep3ForStep5; - - for(size_t iRound = 0; iRound < nRounds; ++iRound) { - // run step2 on each rank - auto s2res = algo.run_step2Local(input, localNodeData, step2In, false); - if(iRound==0) localNodeData = s2res->get(daal::algorithms::kmeans::init::internalResult); - auto s2Out = s2res->get(daal::algorithms::kmeans::init::outputOfStep2ForStep3); - // and gather result on root - auto s3In = tcvr->gather(s2Out); - - const int S34TAG = 3003; - // The input for s4 will be stored in s4In - daal::data_management::NumericTablePtr s4In; - // run step3 on root and send results to non-roots - if(rank == 0) { - auto step3Output = algo.run_step3Master(s3In); - // output of step3 in the last iteration will be used in step5 - if (iRound == nRounds - 1) { - outputOfStep3ForStep5 = step3Output->get(daal::algorithms::kmeans::init::outputOfStep3ForStep5); - } - s4In = step3Output->get(daal::algorithms::kmeans::init::outputOfStep3ForStep4, 0); - for(int i=1; isend(step3Output->get(daal::algorithms::kmeans::init::outputOfStep3ForStep4, i), i, S34TAG); // it can be NULL - } - } else { // non-roots get messages with output from step3 - s4In = tcvr->recv(0, S34TAG); + for (size_t iRound = 0; iRound < nRounds; ++iRound) + { + // run step2 on each rank + auto s2res = algo.run_step2Local(input, localNodeData, step2In, false); + if (iRound == 0) localNodeData = s2res->get(daal::algorithms::kmeans::init::internalResult); + auto s2Out = s2res->get(daal::algorithms::kmeans::init::outputOfStep2ForStep3); + // and gather result on root + auto s3In = tcvr->gather(s2Out); + + const int S34TAG = 3003; + // The input for s4 will be stored in s4In + daal::data_management::NumericTablePtr s4In; + // run step3 on root and send results to non-roots + if (rank == 0) + { + auto step3Output = algo.run_step3Master(s3In); + // output of step3 in the last iteration will be used in step5 + if (iRound == nRounds - 1) + { + outputOfStep3ForStep5 = step3Output->get(daal::algorithms::kmeans::init::outputOfStep3ForStep5); } - // if we have a data for step4 then run it - if (s4In) { - step14Out = algo.run_step4Local(input, localNodeData, s4In); - } else { - step14Out = daal::data_management::NumericTablePtr(); + s4In = step3Output->get(daal::algorithms::kmeans::init::outputOfStep3ForStep4, 0); + for (int i = 1; i < nRanks; i++) + { + tcvr->send(step3Output->get(daal::algorithms::kmeans::init::outputOfStep3ForStep4, i), i, S34TAG); // it can be NULL } + } + else + { // non-roots get messages with output from step3 + s4In = tcvr->recv(0, S34TAG); + } + // if we have a data for step4 then run it + if (s4In) + { + step14Out = algo.run_step4Local(input, localNodeData, s4In); + } + else + { + step14Out = daal::data_management::NumericTablePtr(); + } - // we need to gather all exist results on root, merge them into one table and then share it with all non-roots - auto step14OutMaster = tcvr->gather(step14Out); - daal::data_management::RowMergedNumericTablePtr step4OutMerged(new daal::data_management::RowMergedNumericTable()); - if(rank == 0) + // we need to gather all exist results on root, merge them into one table and then share it with all non-roots + auto step14OutMaster = tcvr->gather(step14Out); + daal::data_management::RowMergedNumericTablePtr step4OutMerged(new daal::data_management::RowMergedNumericTable()); + if (rank == 0) + { + for (int i = 0; i < step14OutMaster.size(); i++) { - for (int i = 0; i < step14OutMaster.size(); i++) + // we expect that some of results can be NULL + if (step14OutMaster[i]) { - // we expect that some of results can be NULL - if(step14OutMaster[i]) - { - step4OutMerged->addNumericTable(step14OutMaster[i]); - } + step4OutMerged->addNumericTable(step14OutMaster[i]); } } - tcvr->bcast(step4OutMerged, 0); - step2In = daal::data_management::convertToHomogen(*step4OutMerged.get()); - - // we add results of each iteration to input of step5 - if(rank == 0) - { - s2InForStep5.push_back(step2In); - } } + tcvr->bcast(step4OutMerged, 0); + step2In = daal::data_management::convertToHomogen(*step4OutMerged.get()); - // One more step 2 - auto s2ResForStep5 = algo.run_step2Local(input, localNodeData, step2In, true); - auto s2OutForStep5 = s2ResForStep5->get(daal::algorithms::kmeans::init::outputOfStep2ForStep5); - auto s5In = tcvr->gather(s2OutForStep5); - daal::data_management::NumericTablePtr s5Res; - if(rank == 0) + // we add results of each iteration to input of step5 + if (rank == 0) { - s5Res = algo.run_step5Master(s2InForStep5, s5In, outputOfStep3ForStep5); + s2InForStep5.push_back(step2In); } - tcvr->bcast(s5Res, 0); - return mk_kmi_result(s5Res); } - template - static typename Algo::iomb_type::result_type - compute(Algo & algo, Ts& ... inputs) + // One more step 2 + auto s2ResForStep5 = algo.run_step2Local(input, localNodeData, step2In, true); + auto s2OutForStep5 = s2ResForStep5->get(daal::algorithms::kmeans::init::outputOfStep2ForStep5); + auto s5In = tcvr->gather(s2OutForStep5); + daal::data_management::NumericTablePtr s5Res; + if (rank == 0) { - return kmi(algo, get_table(inputs)...); + s5Res = algo.run_step5Master(s2InForStep5, s5In, outputOfStep3ForStep5); } - }; -} // namespace dist_kmeans_init { + tcvr->bcast(s5Res, 0); + return mk_kmi_result(s5Res); + } + + template + static typename Algo::iomb_type::result_type compute(Algo & algo, Ts &... inputs) + { + return kmi(algo, get_table(inputs)...); + } +}; +} // namespace dist_custom #endif // _DIST_KMEANS_INIT_INCLUDED_ diff --git a/src/dist_logistic_regression_training.h b/src/dist_logistic_regression_training.h index 6d4360857f..ea17252775 100644 --- a/src/dist_logistic_regression_training.h +++ b/src/dist_logistic_regression_training.h @@ -22,123 +22,111 @@ #include #include -template +template struct logistic_regression_training_manager; -namespace dist_custom { - - // We need our own Model-class which needs to be derived from DAAL's - // because we cannot create the actual Model from the outside - class LRModel : public daal::algorithms::logistic_regression::Model { - public: - explicit LRModel(const daal::data_management::NumericTablePtr &coefs) - : coefs_(coefs) {} - - size_t getNumberOfBetas() const override { - return coefs_->getNumberOfColumns(); - } - - size_t getNumberOfFeatures() const override { - return coefs_->getNumberOfColumns() - 1; - } - - bool getInterceptFlag() const override { - return true; //FIXME - } - - daal::data_management::NumericTablePtr getBeta() override { - return coefs_; - } - - const daal::data_management::NumericTablePtr getBeta() const override { - return coefs_; - } - - private: - daal::data_management::NumericTablePtr coefs_; +namespace dist_custom +{ + +// We need our own Model-class which needs to be derived from DAAL's +// because we cannot create the actual Model from the outside +class LRModel : public daal::algorithms::logistic_regression::Model +{ +public: + explicit LRModel(const daal::data_management::NumericTablePtr & coefs) : coefs_(coefs) {} + + size_t getNumberOfBetas() const override { return coefs_->getNumberOfColumns(); } + + size_t getNumberOfFeatures() const override { return coefs_->getNumberOfColumns() - 1; } + + bool getInterceptFlag() const override + { + return true; //FIXME + } + + daal::data_management::NumericTablePtr getBeta() override { return coefs_; } + + const daal::data_management::NumericTablePtr getBeta() const override { return coefs_; } + +private: + daal::data_management::NumericTablePtr coefs_; +}; + +// custom distribution class for logistic regression +template +class dist_custom > +{ +public: + typedef logistic_regression_training_manager Algo; + + // the "map" phase: computing loss/gradient + static daal::data_management::NumericTablePtr compute_loss(Algo & algo, const daal::data_management::NumericTablePtr & x, const daal::data_management::NumericTablePtr & y, daal::data_management::NumericTablePtr & weights) + { + // we use logistic_loss directly + auto loss = daal::algorithms::optimization_solver::logistic_loss::Batch<>::create(x->getNumberOfRows()); + // set parameters as in our logistic regression class + loss->parameter().interceptFlag = true; // FIXME algo._interceptFlag; + // FIXME loss->parameter().penaltyL1 = algo._penaltyL1; raises an error + // FIXME loss->parameter().penaltyL2 = algo._penaltyL2; raises an error + loss->parameter().resultsToCompute = daal::algorithms::optimization_solver::objective_function::value | daal::algorithms::optimization_solver::objective_function::gradient; + loss->input.set(daal::algorithms::optimization_solver::logistic_loss::data, x); + loss->input.set(daal::algorithms::optimization_solver::logistic_loss::dependentVariables, y); + loss->input.set(daal::algorithms::optimization_solver::logistic_loss::argument, weights); + + loss->compute(); + + return loss->getResult()->get(daal::algorithms::optimization_solver::objective_function::gradientIdx); }; - - - // custom distribution class for logistic regression - template< typename fptype, daal::algorithms::logistic_regression::training::Method method > - class dist_custom< logistic_regression_training_manager< fptype, method > > + + static daal::data_management::NumericTablePtr column_to_row(const daal::data_management::NumericTablePtr & x) { - public: - typedef logistic_regression_training_manager< fptype, method > Algo; - - // the "map" phase: computing loss/gradient - static daal::data_management::NumericTablePtr compute_loss(Algo & algo, - const daal::data_management::NumericTablePtr & x, - const daal::data_management::NumericTablePtr & y, - daal::data_management::NumericTablePtr & weights) - { - // we use logistic_loss directly - auto loss = daal::algorithms::optimization_solver::logistic_loss::Batch<>::create(x->getNumberOfRows()); - // set parameters as in our logistic regression class - loss->parameter().interceptFlag = true; // FIXME algo._interceptFlag; - // FIXME loss->parameter().penaltyL1 = algo._penaltyL1; raises an error - // FIXME loss->parameter().penaltyL2 = algo._penaltyL2; raises an error - loss->parameter().resultsToCompute = daal::algorithms::optimization_solver::objective_function::value - | daal::algorithms::optimization_solver::objective_function::gradient; - loss->input.set(daal::algorithms::optimization_solver::logistic_loss::data, x); - loss->input.set(daal::algorithms::optimization_solver::logistic_loss::dependentVariables, y); - loss->input.set(daal::algorithms::optimization_solver::logistic_loss::argument, weights); - - loss->compute(); - - return loss->getResult()->get(daal::algorithms::optimization_solver::objective_function::gradientIdx); - }; - - static daal::data_management::NumericTablePtr column_to_row(const daal::data_management::NumericTablePtr &x) { - const auto x_array = daal::data_management::HomogenNumericTable<>::cast(x)->getArraySharedPtr(); - return daal::data_management::HomogenNumericTable<>::create(x_array, x->getNumberOfRows(), x->getNumberOfColumns()); - } + const auto x_array = daal::data_management::HomogenNumericTable<>::cast(x)->getArraySharedPtr(); + return daal::data_management::HomogenNumericTable<>::create(x_array, x->getNumberOfRows(), x->getNumberOfColumns()); + } - static typename Algo::iomb_type::result_type map_reduce(Algo & algo, - const daal::data_management::NumericTablePtr & x, - const daal::data_management::NumericTablePtr & y, - size_t epoch_number, - float learning_rate) - { - int rank, nRanks; - MPI_Comm_size(MPI_COMM_WORLD, &nRanks); - MPI_Comm_rank(MPI_COMM_WORLD, &rank); - - /* In case of logistic_loss and interceptFlag == true, + static typename Algo::iomb_type::result_type map_reduce(Algo & algo, const daal::data_management::NumericTablePtr & x, const daal::data_management::NumericTablePtr & y, size_t epoch_number, float learning_rate) + { + int rank, nRanks; + MPI_Comm_size(MPI_COMM_WORLD, &nRanks); + MPI_Comm_rank(MPI_COMM_WORLD, &rank); + + /* In case of logistic_loss and interceptFlag == true, * the number of weights is nFeatures + 1 */ - // FIXME: else? - size_t N = x->getNumberOfColumns() + 1; - fptype * weights_ptr = static_cast(daal::services::daal_malloc(N * sizeof(fptype))); - std::fill_n(weights_ptr, N, 1e-2); - daal::data_management::NumericTablePtr weights(new daal::data_management::HomogenNumericTable(weights_ptr, 1, N)); - - for (size_t e = 0; e < epoch_number; e++) { - for (size_t b = 0; b < 1; b++) { // FIXME iterate over batches - const auto gradient_tbl = compute_loss(algo, x, y, weights); - fptype * gradient = daal::services::dynamicPointerCast< daal::data_management::HomogenNumericTable >(gradient_tbl)->getArray(); - get_transceiver()->allreduce(gradient, N, MPI_SUM); // in-place all-reduce, result available on all proces - // we compute the "reduce" on all processes (we want the result on all anyway) - for (int i = 0; i < N; i++) { // FIXME: #weights == #gradient? - weights_ptr[i] -= learning_rate * (gradient[i] / nRanks); // div by nRanks because we need avrg - } - } - } - - const auto weights_T = column_to_row(weights); - auto model = daal::algorithms::logistic_regression::ModelPtr(new LRModel(weights_T)); - auto result = daal::algorithms::logistic_regression::training::ResultPtr(new daal::algorithms::logistic_regression::training::Result); - result->set(daal::algorithms::classifier::training::model, model); + // FIXME: else? + size_t N = x->getNumberOfColumns() + 1; + fptype * weights_ptr = static_cast(daal::services::daal_malloc(N * sizeof(fptype))); + std::fill_n(weights_ptr, N, 1e-2); + daal::data_management::NumericTablePtr weights(new daal::data_management::HomogenNumericTable(weights_ptr, 1, N)); - return result; - } - - static typename Algo::iomb_type::result_type - compute(Algo & algo, const data_or_file & x, const data_or_file & y) + for (size_t e = 0; e < epoch_number; e++) { - get_transceiver()->init(); - return map_reduce(algo, get_table(x), get_table(y), 50 /* FIXME */, 0.00001 /* FIXME */); + for (size_t b = 0; b < 1; b++) + { // FIXME iterate over batches + const auto gradient_tbl = compute_loss(algo, x, y, weights); + fptype * gradient = daal::services::dynamicPointerCast >(gradient_tbl)->getArray(); + get_transceiver()->allreduce(gradient, N, MPI_SUM); // in-place all-reduce, result available on all proces + // we compute the "reduce" on all processes (we want the result on all anyway) + for (int i = 0; i < N; i++) + { // FIXME: #weights == #gradient? + weights_ptr[i] -= learning_rate * (gradient[i] / nRanks); // div by nRanks because we need avrg + } + } } - }; + + const auto weights_T = column_to_row(weights); + auto model = daal::algorithms::logistic_regression::ModelPtr(new LRModel(weights_T)); + auto result = daal::algorithms::logistic_regression::training::ResultPtr(new daal::algorithms::logistic_regression::training::Result); + result->set(daal::algorithms::classifier::training::model, model); + + return result; + } + + static typename Algo::iomb_type::result_type compute(Algo & algo, const data_or_file & x, const data_or_file & y) + { + get_transceiver()->init(); + return map_reduce(algo, get_table(x), get_table(y), 50 /* FIXME */, 0.00001 /* FIXME */); + } +}; } // namespace dist_custom #endif // _DIST_LOG_REG_INCLUDED_ diff --git a/src/log_reg_model_builder.h b/src/log_reg_model_builder.h index 0e31d8df69..217162ac0d 100644 --- a/src/log_reg_model_builder.h +++ b/src/log_reg_model_builder.h @@ -24,8 +24,7 @@ typedef daal::algorithms::logistic_regression::ModelBuilder c_logistic_regression_model_builder; template -static daal::algorithms::logistic_regression::ModelPtr * get_logistic_regression_model_builder_model( - daal::algorithms::logistic_regression::ModelBuilder * obj_) +static daal::algorithms::logistic_regression::ModelPtr * get_logistic_regression_model_builder_model(daal::algorithms::logistic_regression::ModelBuilder * obj_) { return RAW()(obj_->getModel()); } diff --git a/src/map_reduce_star.h b/src/map_reduce_star.h index 98d1c26f94..f7888a0047 100644 --- a/src/map_reduce_star.h +++ b/src/map_reduce_star.h @@ -19,36 +19,35 @@ #include "transceiver.h" -namespace map_reduce_star { +namespace map_reduce_star +{ -template +template class map_reduce_star { public: - template - typename Algo::iomstep2Master_type::result_type - static map_reduce(Algo & algo, Ts& ... inputs) + template + typename Algo::iomstep2Master_type::result_type static map_reduce(Algo & algo, Ts &... inputs) { - auto tcvr = get_transceiver(); + auto tcvr = get_transceiver(); auto s1_result = algo.run_step1Local(inputs...); // gather all partial results auto p_results = tcvr->gather(s1_result); // call reduction on root typename Algo::iomstep2Master_type::result_type res; - if(tcvr->me() == 0) res = algo.run_step2Master(p_results); + if (tcvr->me() == 0) res = algo.run_step2Master(p_results); // bcast final result tcvr->bcast(res); return res; } - template - static typename Algo::iomstep2Master_type::result_type - compute(Algo & algo, Ts& ... inputs) + template + static typename Algo::iomstep2Master_type::result_type compute(Algo & algo, Ts &... inputs) { return map_reduce(algo, get_table(inputs)...); } }; -} // namespace map_reduce_star { +} // namespace map_reduce_star #endif // _MAP_REDUCE_STAR_INCLUDED_ diff --git a/src/map_reduce_star_plus.h b/src/map_reduce_star_plus.h index d7f75d6d12..6bf6ee860f 100644 --- a/src/map_reduce_star_plus.h +++ b/src/map_reduce_star_plus.h @@ -19,60 +19,63 @@ #include "transceiver.h" -namespace map_reduce_star_plus { +namespace map_reduce_star_plus +{ - template - class map_reduce_star_plus +template +class map_reduce_star_plus +{ +public: + template + typename Algo::iomstep3Local_type::result_type static map_reduce(Algo & algo, Ts &... inputs) { - public: - template - typename Algo::iomstep3Local_type::result_type - static map_reduce(Algo & algo, Ts& ... inputs) - { - auto tcvr = get_transceiver(); + auto tcvr = get_transceiver(); - // run step1 and gather all partial results - auto s1Res = algo.run_step1Local(inputs...); - auto s1OutForStep2 = s1Res->get(algo.outputOfStep1ForStep2); - auto s2InFromStep1 = tcvr->gather(s1OutForStep2); + // run step1 and gather all partial results + auto s1Res = algo.run_step1Local(inputs...); + auto s1OutForStep2 = s1Res->get(algo.outputOfStep1ForStep2); + auto s2InFromStep1 = tcvr->gather(s1OutForStep2); - typename Algo::iomstep2Master_type::result_type s2Res; - const int S23TAG = 4004; - daal::data_management::DataCollectionPtr inputOfStep3FromStep2; - if(tcvr->me() == 0) { - s2Res = algo.run_step2Master(s2InFromStep1); - // get intputs for step3 and send them to all processes - auto outputOfStep2ForStep3 = std::get<1>(s2Res)->get(algo.outputOfStep2ForStep3); - inputOfStep3FromStep2 = daal::services::staticPointerCast((*outputOfStep2ForStep3)[0]); - for(size_t i = 1; i < tcvr->nMembers(); i++) { - tcvr->send((*outputOfStep2ForStep3)[i], i, S23TAG); - } - } else { - inputOfStep3FromStep2 = tcvr->recv(0, S23TAG); + typename Algo::iomstep2Master_type::result_type s2Res; + const int S23TAG = 4004; + daal::data_management::DataCollectionPtr inputOfStep3FromStep2; + if (tcvr->me() == 0) + { + s2Res = algo.run_step2Master(s2InFromStep1); + // get intputs for step3 and send them to all processes + auto outputOfStep2ForStep3 = std::get<1>(s2Res)->get(algo.outputOfStep2ForStep3); + inputOfStep3FromStep2 = daal::services::staticPointerCast((*outputOfStep2ForStep3)[0]); + for (size_t i = 1; i < tcvr->nMembers(); i++) + { + tcvr->send((*outputOfStep2ForStep3)[i], i, S23TAG); } + } + else + { + inputOfStep3FromStep2 = tcvr->recv(0, S23TAG); + } - // bcast result of step2 to all - auto result = std::get<0>(s2Res); - tcvr->bcast(result); + // bcast result of step2 to all + auto result = std::get<0>(s2Res); + tcvr->bcast(result); - // perform step3 - auto inputOfStep3FromStep1 = s1Res->get(algo.outputOfStep1ForStep3); - auto step3Output = algo.run_step3Local(inputOfStep3FromStep1, inputOfStep3FromStep2); + // perform step3 + auto inputOfStep3FromStep1 = s1Res->get(algo.outputOfStep1ForStep3); + auto step3Output = algo.run_step3Local(inputOfStep3FromStep1, inputOfStep3FromStep2); - // add result of step3 - result->set(algo.step3Res, step3Output->get(algo.step3Res)); + // add result of step3 + result->set(algo.step3Res, step3Output->get(algo.step3Res)); - return result; - } + return result; + } - template - static typename Algo::iomstep3Local_type::result_type - compute(Algo & algo, Ts& ... inputs) - { - return map_reduce(algo, get_table(inputs)...); - } - }; + template + static typename Algo::iomstep3Local_type::result_type compute(Algo & algo, Ts &... inputs) + { + return map_reduce(algo, get_table(inputs)...); + } +}; -} // namespace map_reduce_star_plus { +} // namespace map_reduce_star_plus #endif // _MAP_REDUCE_STAR_PLUS_INCLUDED_ diff --git a/src/map_reduce_tree.h b/src/map_reduce_tree.h index e6285ca118..d421cb1b7a 100644 --- a/src/map_reduce_tree.h +++ b/src/map_reduce_tree.h @@ -19,71 +19,77 @@ #include "transceiver.h" -namespace map_reduce_tree { +namespace map_reduce_tree +{ -template +template class map_reduce_tree { public: static int get_power2(size_t x) { int power = 1; - while(power < x) power*=2; + while (power < x) power *= 2; return power; } static typename Algo::iomstep1Local_type::result_type reduce(Algo & algo, typename Algo::iomstep1Local_type::result_type inp) { - auto tcvr = get_transceiver(); - int rank = tcvr->me(); + auto tcvr = get_transceiver(); + int rank = tcvr->me(); int nRanks = tcvr->nMembers(); - if(nRanks == 1) { + if (nRanks == 1) + { std::vector p_results(1, inp); inp = algo.run_step2Master(p_results); - } else { - size_t N = get_power2(nRanks); + } + else + { + size_t N = get_power2(nRanks); const int REDTAG = 5534; - - for(size_t cN = N/2; cN>0; cN /= 2) { - if(rank >= cN) { + + for (size_t cN = N / 2; cN > 0; cN /= 2) + { + if (rank >= cN) + { // Upper half of processes send their stuff to lower half tcvr->send(inp, rank - cN, REDTAG); break; - } else if(rank + cN < nRanks) { + } + else if (rank + cN < nRanks) + { // lower half of processes receives message and computes partial reduction std::vector p_results(2); p_results[0] = inp; p_results[1] = tcvr->recv(rank + cN, REDTAG); - inp = algo.run_step2Master(p_results); + inp = algo.run_step2Master(p_results); } } } return inp; } - template - typename Algo::iomstep2Master__final_type::result_type - static map_reduce(Algo & algo, const Ts& ... inputs) + template + typename Algo::iomstep2Master__final_type::result_type static map_reduce(Algo & algo, const Ts &... inputs) { auto s1_result = algo.run_step1Local(inputs...); // reduce all partial results auto pres = reduce(algo, s1_result); // finalize result - auto res = algo.run_step2Master__final(std::vector< typename Algo::iomstep2Master_type::result_type >(1, pres)); + auto res = algo.run_step2Master__final(std::vector(1, pres)); // bcast final result get_transceiver()->bcast(res); return res; } - template - static typename Algo::iomstep2Master__final_type::result_type - compute(Algo & algo, const Ts& ... inputs) + template + static typename Algo::iomstep2Master__final_type::result_type compute(Algo & algo, const Ts &... inputs) { return map_reduce(algo, get_table(inputs)...); } }; -} // namespace map_reduce_tree { +} // namespace map_reduce_tree #endif // _MAP_REDUCE_TREE_INCLUDED_ diff --git a/src/mpi/mpi_transceiver.cpp b/src/mpi/mpi_transceiver.cpp index ca0d38e04e..19c773427a 100644 --- a/src/mpi/mpi_transceiver.cpp +++ b/src/mpi/mpi_transceiver.cpp @@ -25,10 +25,11 @@ void mpi_transceiver::init() int is_mpi_initialized = 0; MPI_Initialized(&is_mpi_initialized); // protect against double-init - if(!is_mpi_initialized) { + if (!is_mpi_initialized) + { MPI_Init(NULL, NULL); } - transceiver_impl::init(); + transceiver_impl::init(); } void mpi_transceiver::fini() @@ -50,7 +51,7 @@ size_t mpi_transceiver::me() return me; } -void mpi_transceiver::send(const void* buff, size_t N, size_t recpnt, size_t tag) +void mpi_transceiver::send(const void * buff, size_t N, size_t recpnt, size_t tag) { MPI_Send(buff, (int)N, MPI_CHAR, recpnt, tag, MPI_COMM_WORLD); } @@ -67,17 +68,20 @@ size_t mpi_transceiver::recv(void * buff, size_t N, int sender, int tag) void * mpi_transceiver::gather(const void * ptr, size_t N, size_t root, const size_t * sizes, bool varying) { char * buff = NULL; - if(varying) { + if (varying) + { // -> gatherv - if(m_me == root) { + if (m_me == root) + { int * offsets = static_cast(daal::services::daal_malloc(m_nMembers * sizeof(int))); DAAL4PY_CHECK_MALLOC(offsets); DAAL4PY_CHECK_BAD_CAST(sizes[0] <= std::numeric_limits::max()); int tot_sz = sizes[0]; offsets[0] = 0; - for(int i = 1; i < m_nMembers; ++i) { - DAAL4PY_OVERFLOW_CHECK_BY_ADDING(int, offsets[i-1], sizes[i-1]); - offsets[i] = offsets[i-1] + sizes[i-1]; + for (int i = 1; i < m_nMembers; ++i) + { + DAAL4PY_OVERFLOW_CHECK_BY_ADDING(int, offsets[i - 1], sizes[i - 1]); + offsets[i] = offsets[i - 1] + sizes[i - 1]; DAAL4PY_OVERFLOW_CHECK_BY_ADDING(int, tot_sz, sizes[i]); tot_sz += sizes[i]; } @@ -85,27 +89,26 @@ void * mpi_transceiver::gather(const void * ptr, size_t N, size_t root, const si DAAL4PY_CHECK_MALLOC(buff); int * szs = static_cast(daal::services::daal_malloc(m_nMembers * sizeof(int))); DAAL4PY_CHECK_MALLOC(szs); - for(size_t i=0; i(sizes[i]); } - MPI_Gatherv(ptr, N, MPI_CHAR, - buff, szs, offsets, MPI_CHAR, - root, MPI_COMM_WORLD); + MPI_Gatherv(ptr, N, MPI_CHAR, buff, szs, offsets, MPI_CHAR, root, MPI_COMM_WORLD); daal::services::daal_free(szs); szs = NULL; daal::services::daal_free(offsets); offsets = NULL; - - } else { - MPI_Gatherv(ptr, N, MPI_CHAR, - NULL, NULL, NULL, MPI_CHAR, - root, MPI_COMM_WORLD); } - } else { - if(m_me == root) + else + { + MPI_Gatherv(ptr, N, MPI_CHAR, NULL, NULL, NULL, MPI_CHAR, root, MPI_COMM_WORLD); + } + } + else + { + if (m_me == root) { - buff = static_cast(daal::services::daal_malloc(m_nMembers*N)); + buff = static_cast(daal::services::daal_malloc(m_nMembers * N)); DAAL4PY_CHECK_MALLOC(buff); } // -> gather with same size on all procs @@ -117,15 +120,16 @@ void * mpi_transceiver::gather(const void * ptr, size_t N, size_t root, const si static MPI_Datatype to_mpi(transceiver_iface::type_type T) { - switch(T) { - case transceiver_iface::BOOL: return MPI_C_BOOL; - case transceiver_iface::INT8: return MPI_INT8_T; - case transceiver_iface::UINT8: return MPI_UINT8_T; - case transceiver_iface::INT32: return MPI_INT32_T; + switch (T) + { + case transceiver_iface::BOOL: return MPI_C_BOOL; + case transceiver_iface::INT8: return MPI_INT8_T; + case transceiver_iface::UINT8: return MPI_UINT8_T; + case transceiver_iface::INT32: return MPI_INT32_T; case transceiver_iface::UINT32: return MPI_INT32_T; - case transceiver_iface::INT64: return MPI_INT64_T; + case transceiver_iface::INT64: return MPI_INT64_T; case transceiver_iface::UINT64: return MPI_INT64_T; - case transceiver_iface::FLOAT: return MPI_FLOAT; + case transceiver_iface::FLOAT: return MPI_FLOAT; case transceiver_iface::DOUBLE: return MPI_DOUBLE; default: throw std::logic_error("unsupported data type"); } @@ -133,15 +137,16 @@ static MPI_Datatype to_mpi(transceiver_iface::type_type T) static MPI_Op to_mpi(transceiver_iface::operation_type o) { - switch(o) { - case transceiver_iface::OP_MAX: return MPI_MAX; - case transceiver_iface::OP_MIN: return MPI_MIN; - case transceiver_iface::OP_SUM: return MPI_SUM; + switch (o) + { + case transceiver_iface::OP_MAX: return MPI_MAX; + case transceiver_iface::OP_MIN: return MPI_MIN; + case transceiver_iface::OP_SUM: return MPI_SUM; case transceiver_iface::OP_PROD: return MPI_PROD; case transceiver_iface::OP_LAND: return MPI_LAND; case transceiver_iface::OP_BAND: return MPI_BAND; - case transceiver_iface::OP_LOR: return MPI_LOR; - case transceiver_iface::OP_BOR: return MPI_BOR; + case transceiver_iface::OP_LOR: return MPI_LOR; + case transceiver_iface::OP_BOR: return MPI_BOR; case transceiver_iface::OP_LXOR: return MPI_LXOR; case transceiver_iface::OP_BXOR: return MPI_BXOR; default: throw std::logic_error("unsupported operation type"); @@ -170,13 +175,14 @@ extern "C" PyMODINIT_FUNC PyInit_mpi_transceiver(void) { // shared pointer, will GC transceiver when shutting down static std::shared_ptr s_smt; - PyObject *m; - static struct PyModuleDef moduledef = { PyModuleDef_HEAD_INIT, "daal4py.mpi_transceiver", "No docs", -1, NULL, }; + PyObject * m; + static struct PyModuleDef moduledef = { + PyModuleDef_HEAD_INIT, "daal4py.mpi_transceiver", "No docs", -1, NULL, + }; m = PyModule_Create(&moduledef); - if (m == NULL) - return NULL; + if (m == NULL) return NULL; s_smt.reset(new mpi_transceiver); - PyObject_SetAttrString(m, "transceiver", PyLong_FromVoidPtr((void*)(&s_smt))); + PyObject_SetAttrString(m, "transceiver", PyLong_FromVoidPtr((void *)(&s_smt))); return m; } diff --git a/src/mpi/mpi_transceiver.h b/src/mpi/mpi_transceiver.h index f661f9aa78..3bf95c3b7b 100644 --- a/src/mpi/mpi_transceiver.h +++ b/src/mpi/mpi_transceiver.h @@ -28,16 +28,16 @@ class mpi_transceiver : public transceiver_impl virtual void init(); virtual void fini(); - + virtual size_t nMembers(); virtual size_t me(); - virtual void send(const void* buff, size_t N, size_t recpnt, size_t tag); + virtual void send(const void * buff, size_t N, size_t recpnt, size_t tag); virtual size_t recv(void * buff, size_t N, int sender, int tag); - virtual void * gather(const void * ptr, size_t N, size_t root, const size_t * sizes, bool varying=true); + virtual void * gather(const void * ptr, size_t N, size_t root, const size_t * sizes, bool varying = true); virtual void bcast(void * ptr, size_t N, size_t root); diff --git a/src/npy4daal.h b/src/npy4daal.h index 181dcc5689..9d2636ed04 100644 --- a/src/npy4daal.h +++ b/src/npy4daal.h @@ -22,84 +22,70 @@ #include "daal4py_defines.h" #if PY_VERSION_HEX >= 0x03000000 -#define PyString_Check(name) PyUnicode_Check(name) -#define PyString_AsString(str) PyUnicode_AsUTF8(str) -#define PyString_FromString(str) PyUnicode_FromString(str) -#define PyString_FromStringAndSize(str, sz) PyUnicode_FromStringAndSize(str, sz) -#define PyInt_AsSsize_t PyLong_AsSsize_t + #define PyString_Check(name) PyUnicode_Check(name) + #define PyString_AsString(str) PyUnicode_AsUTF8(str) + #define PyString_FromString(str) PyUnicode_FromString(str) + #define PyString_FromStringAndSize(str, sz) PyUnicode_FromStringAndSize(str, sz) + #define PyInt_AsSsize_t PyLong_AsSsize_t #endif #if NPY_ABI_VERSION < 0x02000000 - #define PyDataType_ELSIZE(descr) ((descr)->elsize) - #define PyDataType_NAMES(descr) ((descr)->names) - #define PyDataType_FIELDS(descr) ((descr)->fields) + #define PyDataType_ELSIZE(descr) ((descr)->elsize) + #define PyDataType_NAMES(descr) ((descr)->names) + #define PyDataType_FIELDS(descr) ((descr)->fields) #endif - -#define SET_NPY_FEATURE( _T, _M, _E ) \ - switch(_T) { \ - case NPY_DOUBLE: \ - case NPY_CDOUBLE: \ - case NPY_DOUBLELTR: \ - case NPY_CDOUBLELTR: \ - _M(double); \ - break; \ - case NPY_FLOAT: \ - case NPY_CFLOAT: \ - case NPY_FLOATLTR: \ - case NPY_CFLOATLTR: \ - _M(float); \ - break; \ - case NPY_INT: \ - case NPY_INTLTR: \ - _M(int); \ - break; \ - case NPY_UINT: \ - case NPY_UINTLTR: \ - _M(unsigned int); \ - break; \ - case NPY_LONG: \ - case NPY_LONGLTR: \ - _M(long); \ - break; \ - case NPY_ULONG: \ - case NPY_ULONGLTR: \ - _M(unsigned long); \ - break; \ - case NPY_LONGLONG: \ - case NPY_LONGLONGLTR: \ - _M(long long); \ - break; \ - case NPY_ULONGLONG: \ - case NPY_ULONGLONGLTR: \ - _M(unsigned long long); \ - break; \ - case NPY_BYTE: \ - case NPY_BYTELTR: \ - _M(char);\ - break; \ - case NPY_UBYTE: \ - case NPY_UBYTELTR: \ - _M(unsigned char);\ - break; \ - case NPY_SHORT: \ - case NPY_SHORTLTR: \ - _M(short);\ - break; \ - case NPY_USHORT: \ - case NPY_USHORTLTR: \ - _M(unsigned short); \ - break; \ - default: \ - throw std::invalid_argument(std::string("Unsupported NPY type ") + std::to_string(_T) + " ignored\n."); \ - _E;\ +#define SET_NPY_FEATURE(_T, _M, _E) \ + switch (_T) \ + { \ + case NPY_DOUBLE: \ + case NPY_CDOUBLE: \ + case NPY_DOUBLELTR: \ + case NPY_CDOUBLELTR: _M(double); break; \ + case NPY_FLOAT: \ + case NPY_CFLOAT: \ + case NPY_FLOATLTR: \ + case NPY_CFLOATLTR: _M(float); break; \ + case NPY_INT: \ + case NPY_INTLTR: _M(int); break; \ + case NPY_UINT: \ + case NPY_UINTLTR: _M(unsigned int); break; \ + case NPY_LONG: \ + case NPY_LONGLTR: _M(long); break; \ + case NPY_ULONG: \ + case NPY_ULONGLTR: _M(unsigned long); break; \ + case NPY_LONGLONG: \ + case NPY_LONGLONGLTR: _M(long long); break; \ + case NPY_ULONGLONG: \ + case NPY_ULONGLONGLTR: _M(unsigned long long); break; \ + case NPY_BYTE: \ + case NPY_BYTELTR: _M(char); break; \ + case NPY_UBYTE: \ + case NPY_UBYTELTR: _M(unsigned char); break; \ + case NPY_SHORT: \ + case NPY_SHORTLTR: _M(short); break; \ + case NPY_USHORT: \ + case NPY_USHORTLTR: _M(unsigned short); break; \ + default: throw std::invalid_argument(std::string("Unsupported NPY type ") + std::to_string(_T) + " ignored\n."); _E; \ }; -template struct npy_type; -template<> struct npy_type { static constexpr char *value = "f8"; }; -template<> struct npy_type { static constexpr char *value = "f4"; }; -template<> struct npy_type { static constexpr char *value = "i4"; }; - +template +struct npy_type; +template <> +struct npy_type +{ + static constexpr char * value = "f8"; +}; +template <> +struct npy_type +{ + static constexpr char * value = "f4"; +}; +template <> +struct npy_type +{ + static constexpr char * value = "i4"; +}; // For wrapping a non-contiguous, homogen numpy array // Avoids copying by using numpy iterators when accessing blocks of data @@ -110,20 +96,20 @@ class NpyNonContigHandler { Py_XINCREF(ary); - PyArray_Descr * descr = PyArray_DESCR(ary); // type descriptor + PyArray_Descr * descr = PyArray_DESCR(ary); // type descriptor - if(PyArray_NDIM(ary) != 2) { - throw std::invalid_argument(std::string("Found array with ") - + std::to_string(PyArray_NDIM(ary)) - + std::string(" dimensions, extected 2. Don't know how to create homogen NumericTable.")); + if (PyArray_NDIM(ary) != 2) + { + throw std::invalid_argument(std::string("Found array with ") + std::to_string(PyArray_NDIM(ary)) + std::string(" dimensions, extected 2. Don't know how to create homogen NumericTable.")); } Py_ssize_t N = PyArray_DIMS(ary)[1]; - auto _ddict = daal::data_management::NumericTableDictionaryPtr(new daal::data_management::NumericTableDictionary(N)); + auto _ddict = daal::data_management::NumericTableDictionaryPtr(new daal::data_management::NumericTableDictionary(N)); // setNumberOfColumns not needed, done by providing size to ddict // iterate through all elements and init ddict feature accordingly - for (Py_ssize_t i=0; isetFeature<_T>(i) SET_NPY_FEATURE(descr->type, SETFEATURE_, throw std::invalid_argument("Found unsupported data type")); #undef SETFEATURE_ @@ -138,18 +124,17 @@ class NpyNonContigHandler // 1. Retrieve requested slide from numpy array by using python's C-API // 2. Create numpy array iterator setup for casting to requested type // 3. Iterate through numpy array and copy to/from block using daal_memcpy_s - template - static void do_cpy(PyArrayObject * ary, daal::data_management::NumericTableDictionaryPtr & ddict, - daal::data_management::BlockDescriptor& block, size_t startcol, size_t ncols, size_t startrow, size_t nrows) + template + static void do_cpy(PyArrayObject * ary, daal::data_management::NumericTableDictionaryPtr & ddict, daal::data_management::BlockDescriptor & block, size_t startcol, size_t ncols, size_t startrow, size_t nrows) { // Handle zero-sized arrays specially - if (PyArray_SIZE(ary) == 0) { + if (PyArray_SIZE(ary) == 0) + { return; } auto __state = PyGILState_Ensure(); - // Getting the slice/block from the numpy array requires creating slices // so it's not particularly cheap // Even though surprisingly complicated this is much simpler than @@ -158,35 +143,36 @@ class NpyNonContigHandler // cut by rows and then manually detect the columns in the inner loop. // Even if done this way, it's not clear which one is faster. // If performance becomes a problem, we might consider using cython instead. - PyObject* s1s = PyLong_FromLong(startrow); - PyObject* s1e = PyLong_FromLong(startrow+nrows); - PyObject* s2s = PyLong_FromLong(startcol); - PyObject* s2e = PyLong_FromLong(startcol+ncols); - PyObject* slice = PyTuple_New(2); + PyObject * s1s = PyLong_FromLong(startrow); + PyObject * s1e = PyLong_FromLong(startrow + nrows); + PyObject * s2s = PyLong_FromLong(startcol); + PyObject * s2e = PyLong_FromLong(startcol + ncols); + PyObject * slice = PyTuple_New(2); PyTuple_SET_ITEM(slice, 0, PySlice_New(s1s, s1e, NULL)); PyTuple_SET_ITEM(slice, 1, PySlice_New(s2s, s2e, NULL)); - PyArrayObject * ary_block = (PyArrayObject*)PyObject_GetItem((PyObject*)ary, slice); + PyArrayObject * ary_block = (PyArrayObject *)PyObject_GetItem((PyObject *)ary, slice); Py_XDECREF(s1s); Py_XDECREF(s1e); Py_XDECREF(s2s); Py_XDECREF(s2e); // create the iterator - PyObject *val = Py_BuildValue("s", npy_type::value); - PyArray_Descr *dtype; + PyObject * val = Py_BuildValue("s", npy_type::value); + PyArray_Descr * dtype; PyArray_DescrConverter(val, &dtype); Py_XDECREF(val); NpyIter * iter = NpyIter_New(ary_block, ((WBack ? NPY_ITER_WRITEONLY : NPY_ITER_READONLY) // the array is never written to - | NPY_ITER_EXTERNAL_LOOP // Inner loop is done outside the iterator for efficiency. - | NPY_ITER_RANGED // Read a sub-range - | NPY_ITER_BUFFERED), // Buffer, don't copy - NPY_CORDER, // Visit elements in C memory order - NPY_UNSAFE_CASTING, // all casting allowed - dtype); // let's numpy do the casting - - if (iter == NULL) { + | NPY_ITER_EXTERNAL_LOOP // Inner loop is done outside the iterator for efficiency. + | NPY_ITER_RANGED // Read a sub-range + | NPY_ITER_BUFFERED), // Buffer, don't copy + NPY_CORDER, // Visit elements in C memory order + NPY_UNSAFE_CASTING, // all casting allowed + dtype); // let's numpy do the casting + + if (iter == NULL) + { PyGILState_Release(__state); return; } @@ -194,7 +180,8 @@ class NpyNonContigHandler // The iternext function gets stored in a local variable // so it can be called repeatedly in an efficient manner. NpyIter_IterNextFunc * iternext = NpyIter_GetIterNext(iter, NULL); - if (iternext == NULL) { + if (iternext == NULL) + { NpyIter_Deallocate(iter); PyGILState_Release(__state); return; @@ -206,7 +193,8 @@ class NpyNonContigHandler // The location of the inner loop size which the iterator may update npy_intp * innersizeptr = NpyIter_GetInnerLoopSizePtr(iter); - if(PyDataType_ELSIZE(NpyIter_GetDescrArray(iter)[0]) != sizeof(T)) { + if (PyDataType_ELSIZE(NpyIter_GetDescrArray(iter)[0]) != sizeof(T)) + { NpyIter_Deallocate(iter); PyGILState_Release(__state); throw std::invalid_argument("Encountered unexpected element size or type when copying block."); @@ -219,28 +207,28 @@ class NpyNonContigHandler // we assume all inner strides are identical npy_intp innerstride = strideptr[0]; - if(strideptr[0] == sizeof(T)) { - do { + if (strideptr[0] == sizeof(T)) + { + do + { npy_intp size = *innersizeptr; - daal::services::internal::daal_memcpy_s(WBack ? *dataptr : reinterpret_cast(blockPtr), - sizeof(T) * size, - WBack ? reinterpret_cast(blockPtr) : *dataptr, - sizeof(T) * size); + daal::services::internal::daal_memcpy_s(WBack ? *dataptr : reinterpret_cast(blockPtr), sizeof(T) * size, WBack ? reinterpret_cast(blockPtr) : *dataptr, sizeof(T) * size); blockPtr += size; - } while(iternext(iter)); - } else { - do { + } while (iternext(iter)); + } + else + { + do + { // For efficiency, should specialize this based on item size... npy_intp i; - char *src = *dataptr; + char * src = *dataptr; npy_intp size = *innersizeptr; - for(i = 0; i < size; ++i, src += innerstride, blockPtr += 1) { - daal::services::internal::daal_memcpy_s(WBack ? src : reinterpret_cast(blockPtr), - sizeof(T), - WBack ? reinterpret_cast(blockPtr) : src, - sizeof(T)); + for (i = 0; i < size; ++i, src += innerstride, blockPtr += 1) + { + daal::services::internal::daal_memcpy_s(WBack ? src : reinterpret_cast(blockPtr), sizeof(T), WBack ? reinterpret_cast(blockPtr) : src, sizeof(T)); } - } while(iternext(iter)); + } while (iternext(iter)); } __state = PyGILState_Ensure(); @@ -258,34 +246,36 @@ class NpyStructHandler static daal::data_management::NumericTableDictionaryPtr init(PyArrayObject * ary) { // e.g. each element is a tuple. - PyArray_Descr * descr = PyArray_DESCR(ary); // type descriptor + PyArray_Descr * descr = PyArray_DESCR(ary); // type descriptor - if(!PyDataType_NAMES(descr)) { + if (!PyDataType_NAMES(descr)) + { throw std::invalid_argument("No dtype argument provided. Unable to create AOSNumericTable."); } - if(PyArray_NDIM(ary) != 1) { - throw std::invalid_argument(std::string("Found array with ") - + std::to_string(PyArray_NDIM(ary)) - + std::string(" dimensions, extected 1 for a strctured array. Don't know how to create NumericTable.")); + if (PyArray_NDIM(ary) != 1) + { + throw std::invalid_argument(std::string("Found array with ") + std::to_string(PyArray_NDIM(ary)) + std::string(" dimensions, extected 1 for a strctured array. Don't know how to create NumericTable.")); } PyObject * fnames = PySequence_Fast(PyDataType_NAMES(descr), NULL); // list of names of tuple-elements - Py_ssize_t N = PySequence_Fast_GET_SIZE(fnames); // number of elements in tuple + Py_ssize_t N = PySequence_Fast_GET_SIZE(fnames); // number of elements in tuple auto _ddict = daal::data_management::NumericTableDictionaryPtr(new daal::data_management::NumericTableDictionary(N)); // iterate through all elements in tuple // get their type and init ddict feature accordingly - for (Py_ssize_t i=0; isetFeature<_T>(i) @@ -296,23 +286,26 @@ class NpyStructHandler return _ddict; } - // this is a generic copy function + // this is a generic copy function // set template parameter Down to true for down-casts, to false for upcasts - template - static void do_cpy(PyArrayObject * ary, daal::data_management::NumericTableDictionaryPtr & ddict, - daal::data_management::BlockDescriptor& block, size_t startcol, size_t ncols, size_t startrow, size_t nrows) + template + static void do_cpy(PyArrayObject * ary, daal::data_management::NumericTableDictionaryPtr & ddict, daal::data_management::BlockDescriptor & block, size_t startcol, size_t ncols, size_t startrow, size_t nrows) { auto __state = PyGILState_Ensure(); // tuple elements are identified by name, need the list of names PyObject * fnames = PySequence_Fast(PyDataType_NAMES(PyArray_DESCR(ary)), NULL); - for( size_t j = 0; j < ncols ; ++j ) { + for (size_t j = 0; j < ncols; ++j) + { PyObject * name = PySequence_Fast_GET_ITEM(fnames, j); // get column by name - PyArrayObject * col = reinterpret_cast(PyObject_GetItem(reinterpret_cast(ary), name)); assert(col); + PyArrayObject * col = reinterpret_cast(PyObject_GetItem(reinterpret_cast(ary), name)); + assert(col); // need the descriptor to create an iterator - PyArray_Descr * dtype = PyArray_DTYPE(col); assert(dtype); + PyArray_Descr * dtype = PyArray_DTYPE(col); + assert(dtype); // get an iterator for the column - NpyIter * iter = NpyIter_New(col, NPY_ITER_READONLY, NPY_KEEPORDER, NPY_SAME_KIND_CASTING, dtype); assert(iter); + NpyIter * iter = NpyIter_New(col, NPY_ITER_READONLY, NPY_KEEPORDER, NPY_SAME_KIND_CASTING, dtype); + assert(iter); NpyIter_IterNextFunc * iternext = NpyIter_GetIterNext(iter, NULL); // fast forward to first element we want NpyIter_GotoIterIndex(iter, startrow); @@ -320,24 +313,27 @@ class NpyStructHandler // ptr to column in block T * blockPtr = block.getBlockPtr() + j + startcol; // feature for column - daal::data_management::NumericTableFeature &f = (*ddict)[j + startcol]; + daal::data_management::NumericTableFeature & f = (*ddict)[j + startcol]; // iterate through column, use casting functions to upcast, dataptr will point to current element void ** dataptr = reinterpret_cast(NpyIter_GetDataPtrArray(iter)); PyGILState_Release(__state); - if(WBack) { - auto dcast = daal::data_management::internal::getVectorDownCast(f.indexType, - daal::data_management::data_feature_utils::getInternalNumType()); - do { - dcast(1, blockPtr + n*block.getNumberOfColumns(), *dataptr); + if (WBack) + { + auto dcast = daal::data_management::internal::getVectorDownCast(f.indexType, daal::data_management::data_feature_utils::getInternalNumType()); + do + { + dcast(1, blockPtr + n * block.getNumberOfColumns(), *dataptr); ++n; } while (iternext(iter) && n < nrows); - } else { - auto ucast = daal::data_management::internal::getVectorUpCast(f.indexType, - daal::data_management::data_feature_utils::getInternalNumType()); - do { - ucast(1, *dataptr, blockPtr + n*block.getNumberOfColumns()); + } + else + { + auto ucast = daal::data_management::internal::getVectorUpCast(f.indexType, daal::data_management::data_feature_utils::getInternalNumType()); + do + { + ucast(1, *dataptr, blockPtr + n * block.getNumberOfColumns()); ++n; } while (iternext(iter) && n < nrows); } @@ -351,10 +347,9 @@ class NpyStructHandler } }; - // Numeric Table wrapping a non-contiguous, homogen numpy array // Avoids copying by using numpy iterators when accesing blocks of data -template +template class NpyNumericTable : public daal::data_management::NumericTable { private: @@ -365,102 +360,46 @@ class NpyNumericTable : public daal::data_management::NumericTable * Constructor * \param[in] ary The non-contiguous, homogen numpy array to wrap */ - NpyNumericTable(PyArrayObject * ary) - : NumericTable(daal::data_management::NumericTableDictionaryPtr()), - _ary(ary) + NpyNumericTable(PyArrayObject * ary) : NumericTable(daal::data_management::NumericTableDictionaryPtr()), _ary(ary) { _ddict = Hndlr::init(_ary); setNumberOfRows(PyArray_DIMS(ary)[0]); - _layout = daal::data_management::NumericTableIface::aos; + _layout = daal::data_management::NumericTableIface::aos; _memStatus = daal::data_management::NumericTableIface::userAllocated; } /** \private */ - ~NpyNumericTable() - { - Py_XDECREF(_ary); - } + ~NpyNumericTable() { Py_XDECREF(_ary); } - virtual daal::services::Status resize(size_t nrows) DAAL_C11_OVERRIDE - { - throw std::invalid_argument("Resizing numpy array through daal not supported."); - } + virtual daal::services::Status resize(size_t nrows) DAAL_C11_OVERRIDE { throw std::invalid_argument("Resizing numpy array through daal not supported."); } virtual int getSerializationTag() const DAAL_C11_OVERRIDE { - return 3333; // independent of template arg Hndlr! + return 3333; // independent of template arg Hndlr! } - daal::services::Status getBlockOfRows(size_t vector_idx, size_t vector_num, - daal::data_management::ReadWriteMode rwflag, daal::data_management::BlockDescriptor& block) DAAL_C11_OVERRIDE - { - return getTBlock(vector_idx, vector_num, rwflag, block); - } - daal::services::Status getBlockOfRows(size_t vector_idx, size_t vector_num, - daal::data_management::ReadWriteMode rwflag, daal::data_management::BlockDescriptor& block) DAAL_C11_OVERRIDE - { - return getTBlock(vector_idx, vector_num, rwflag, block); - } - daal::services::Status getBlockOfRows(size_t vector_idx, size_t vector_num, - daal::data_management::ReadWriteMode rwflag, daal::data_management::BlockDescriptor& block) DAAL_C11_OVERRIDE - { - return getTBlock(vector_idx, vector_num, rwflag, block); - } + daal::services::Status getBlockOfRows(size_t vector_idx, size_t vector_num, daal::data_management::ReadWriteMode rwflag, daal::data_management::BlockDescriptor & block) DAAL_C11_OVERRIDE { return getTBlock(vector_idx, vector_num, rwflag, block); } + daal::services::Status getBlockOfRows(size_t vector_idx, size_t vector_num, daal::data_management::ReadWriteMode rwflag, daal::data_management::BlockDescriptor & block) DAAL_C11_OVERRIDE { return getTBlock(vector_idx, vector_num, rwflag, block); } + daal::services::Status getBlockOfRows(size_t vector_idx, size_t vector_num, daal::data_management::ReadWriteMode rwflag, daal::data_management::BlockDescriptor & block) DAAL_C11_OVERRIDE { return getTBlock(vector_idx, vector_num, rwflag, block); } - daal::services::Status releaseBlockOfRows(daal::data_management::BlockDescriptor& block) DAAL_C11_OVERRIDE - { - return releaseTBlock(block); - } - daal::services::Status releaseBlockOfRows(daal::data_management::BlockDescriptor& block) DAAL_C11_OVERRIDE - { - return releaseTBlock(block); - } - daal::services::Status releaseBlockOfRows(daal::data_management::BlockDescriptor& block) DAAL_C11_OVERRIDE - { - return releaseTBlock(block); - } + daal::services::Status releaseBlockOfRows(daal::data_management::BlockDescriptor & block) DAAL_C11_OVERRIDE { return releaseTBlock(block); } + daal::services::Status releaseBlockOfRows(daal::data_management::BlockDescriptor & block) DAAL_C11_OVERRIDE { return releaseTBlock(block); } + daal::services::Status releaseBlockOfRows(daal::data_management::BlockDescriptor & block) DAAL_C11_OVERRIDE { return releaseTBlock(block); } - daal::services::Status getBlockOfColumnValues(size_t feature_idx, size_t vector_idx, size_t value_num, daal::data_management::ReadWriteMode rwflag, - daal::data_management::BlockDescriptor& block) DAAL_C11_OVERRIDE - { - return getTBlock(vector_idx, value_num, rwflag, block, feature_idx, 1 ); - } - daal::services::Status getBlockOfColumnValues(size_t feature_idx, size_t vector_idx, size_t value_num, daal::data_management::ReadWriteMode rwflag, - daal::data_management::BlockDescriptor& block) DAAL_C11_OVERRIDE - { - return getTBlock(vector_idx, value_num, rwflag, block, feature_idx, 1); - } - daal::services::Status getBlockOfColumnValues(size_t feature_idx, size_t vector_idx, size_t value_num, daal::data_management::ReadWriteMode rwflag, - daal::data_management::BlockDescriptor& block) DAAL_C11_OVERRIDE - { - return getTBlock(vector_idx, value_num, rwflag, block, feature_idx, 1); - } + daal::services::Status getBlockOfColumnValues(size_t feature_idx, size_t vector_idx, size_t value_num, daal::data_management::ReadWriteMode rwflag, daal::data_management::BlockDescriptor & block) DAAL_C11_OVERRIDE { return getTBlock(vector_idx, value_num, rwflag, block, feature_idx, 1); } + daal::services::Status getBlockOfColumnValues(size_t feature_idx, size_t vector_idx, size_t value_num, daal::data_management::ReadWriteMode rwflag, daal::data_management::BlockDescriptor & block) DAAL_C11_OVERRIDE { return getTBlock(vector_idx, value_num, rwflag, block, feature_idx, 1); } + daal::services::Status getBlockOfColumnValues(size_t feature_idx, size_t vector_idx, size_t value_num, daal::data_management::ReadWriteMode rwflag, daal::data_management::BlockDescriptor & block) DAAL_C11_OVERRIDE { return getTBlock(vector_idx, value_num, rwflag, block, feature_idx, 1); } - daal::services::Status releaseBlockOfColumnValues(daal::data_management::BlockDescriptor& block) DAAL_C11_OVERRIDE - { - return releaseTBlock(block); - } - daal::services::Status releaseBlockOfColumnValues(daal::data_management::BlockDescriptor& block) DAAL_C11_OVERRIDE - { - return releaseTBlock(block); - } - daal::services::Status releaseBlockOfColumnValues(daal::data_management::BlockDescriptor& block) DAAL_C11_OVERRIDE - { - return releaseTBlock(block); - } + daal::services::Status releaseBlockOfColumnValues(daal::data_management::BlockDescriptor & block) DAAL_C11_OVERRIDE { return releaseTBlock(block); } + daal::services::Status releaseBlockOfColumnValues(daal::data_management::BlockDescriptor & block) DAAL_C11_OVERRIDE { return releaseTBlock(block); } + daal::services::Status releaseBlockOfColumnValues(daal::data_management::BlockDescriptor & block) DAAL_C11_OVERRIDE { return releaseTBlock(block); } - daal::services::Status allocateDataMemory(daal::MemType type = daal::dram) DAAL_C11_OVERRIDE - { - return daal::services::Status(daal::services::ErrorMethodNotSupported); - } + daal::services::Status allocateDataMemory(daal::MemType type = daal::dram) DAAL_C11_OVERRIDE { return daal::services::Status(daal::services::ErrorMethodNotSupported); } - void freeDataMemory() DAAL_C11_OVERRIDE - { - daal::services::Status ec(daal::services::ErrorMethodNotSupported); - } + void freeDataMemory() DAAL_C11_OVERRIDE { daal::services::Status ec(daal::services::ErrorMethodNotSupported); } /** \private */ - daal::services::Status serializeImpl(daal::data_management::InputDataArchive *archive) + daal::services::Status serializeImpl(daal::data_management::InputDataArchive * archive) { auto __state = PyGILState_Ensure(); // To make our lives easier, we first create a contiguous array @@ -469,11 +408,12 @@ class NpyNumericTable : public daal::data_management::NumericTable Py_ssize_t len = 0; #if PY_MAJOR_VERSION < 3 char * ds = NULL; - PyString_AsStringAndSize(PyObject_Repr(reinterpret_cast(PyArray_DESCR(ary))), &ds, &len); + PyString_AsStringAndSize(PyObject_Repr(reinterpret_cast(PyArray_DESCR(ary))), &ds, &len); #else - const char * ds = PyUnicode_AsUTF8AndSize(PyObject_Repr(reinterpret_cast(PyArray_DESCR(ary))), &len); + const char * ds = PyUnicode_AsUTF8AndSize(PyObject_Repr(reinterpret_cast(PyArray_DESCR(ary))), &len); #endif - if(ds == NULL) { + if (ds == NULL) + { PyGILState_Release(__state); throw std::invalid_argument("Couldn't get string from/for numpy array's descriptor."); } @@ -483,18 +423,19 @@ class NpyNumericTable : public daal::data_management::NumericTable auto ndim = PyArray_NDIM(ary); archive->set(ndim); size_t N = 1; - for(int i=0; iset(PyArray_DIMS(ary)[i]); N *= PyArray_DIMS(ary)[i]; } - archive->set((static_cast(PyArray_DATA(ary)), N)); + archive->set((static_cast(PyArray_DATA(ary)), N)); PyGILState_Release(__state); return daal::services::Status(); } /** \private */ - daal::services::Status deserializeImpl(const daal::data_management::OutputDataArchive *archive) + daal::services::Status deserializeImpl(const daal::data_management::OutputDataArchive * archive) { auto __state = PyGILState_Ensure(); // First deserialize the type descriptor in string representation... @@ -505,74 +446,80 @@ class NpyNumericTable : public daal::data_management::NumericTable DAAL4PY_CHECK_MALLOC(nds); archive->set(nds, len); // ..then create the type descriptor - PyObject * npy = PyImport_ImportModule("numpy"); + PyObject * npy = PyImport_ImportModule("numpy"); PyObject * globalDictionary = PyModule_GetDict(npy); - PyArray_Descr* nd = reinterpret_cast(PyRun_String(PyString_AsString(PyObject_Str(PyString_FromString(nds))), Py_eval_input, globalDictionary, - NULL)); + PyArray_Descr * nd = reinterpret_cast(PyRun_String(PyString_AsString(PyObject_Str(PyString_FromString(nds))), Py_eval_input, globalDictionary, NULL)); daal::services::daal_free(nds); nds = NULL; - if(nd == NULL) { + if (nd == NULL) + { PyGILState_Release(__state); throw std::invalid_argument("Creating array descriptor failed when deserializing."); } // now get the array shape int ndim; archive->set(ndim); - if(ndim > 2) { + if (ndim > 2) + { PyGILState_Release(__state); throw std::invalid_argument("Found unexpected dimensionality when deserializing."); } npy_intp dims[2]; size_t N = 1; - for(int i=0; iset(dims[i]); DAAL4PY_OVERFLOW_CHECK_BY_MULTIPLICATION(size_t, N, dims[i]); N *= dims[i]; } // create the array... - _ary = reinterpret_cast(PyArray_SimpleNewFromDescr(1, dims, nd)); - if(_ary == NULL) { + _ary = reinterpret_cast(PyArray_SimpleNewFromDescr(1, dims, nd)); + if (_ary == NULL) + { PyGILState_Release(__state); throw std::invalid_argument("Creating numpy array failed when deserializing."); } // ...then copy data - archive->set(reinterpret_cast(PyArray_DATA(_ary)), N); + archive->set(reinterpret_cast(PyArray_DATA(_ary)), N); PyGILState_Release(__state); return daal::services::Status(); } private: - template - daal::services::Status getTBlock(size_t idx, size_t numrows, int rwFlag, daal::data_management::BlockDescriptor& block, size_t firstcol=0, size_t numcols=0xffffffff) + template + daal::services::Status getTBlock(size_t idx, size_t numrows, int rwFlag, daal::data_management::BlockDescriptor & block, size_t firstcol = 0, size_t numcols = 0xffffffff) { // sanitize bounds const size_t ncols = firstcol + numcols <= getNumberOfColumns() ? numcols : getNumberOfColumns() - firstcol; - const size_t nrows = idx + numrows <= getNumberOfRows() ? numrows : getNumberOfRows() - idx; + const size_t nrows = idx + numrows <= getNumberOfRows() ? numrows : getNumberOfRows() - idx; // set shape of blockdescr block.setDetails(firstcol, idx, rwFlag); - if(idx >= getNumberOfRows() || firstcol >= getNumberOfColumns()) { - block.resizeBuffer( ncols, 0 ); + if (idx >= getNumberOfRows() || firstcol >= getNumberOfColumns()) + { + block.resizeBuffer(ncols, 0); return daal::services::Status(); } - if(!block.resizeBuffer(ncols, nrows)) { + if (!block.resizeBuffer(ncols, nrows)) + { return daal::services::Status(daal::services::ErrorMemoryAllocationFailed); } - if(!(rwFlag & static_cast(daal::data_management::readOnly))) return daal::services::Status(); + if (!(rwFlag & static_cast(daal::data_management::readOnly))) return daal::services::Status(); // use our copy method in copy-out mode Hndlr::template do_cpy(_ary, _ddict, block, firstcol, ncols, idx, nrows); return daal::services::Status(); } - template - daal::services::Status releaseTBlock(daal::data_management::BlockDescriptor& block) + template + daal::services::Status releaseTBlock(daal::data_management::BlockDescriptor & block) { - if(block.getRWFlag() & static_cast(daal::data_management::writeOnly)) { + if (block.getRWFlag() & static_cast(daal::data_management::writeOnly)) + { const size_t ncols = block.getNumberOfColumns(); const size_t nrows = block.getNumberOfRows(); diff --git a/src/transceiver.cpp b/src/transceiver.cpp index 3d1df27503..ef6d1b5b58 100644 --- a/src/transceiver.cpp +++ b/src/transceiver.cpp @@ -30,16 +30,24 @@ static std::mutex s_mtx; // We load a python module to get the actual transceiver implementation. // We inspect D4P_TRANSCEIVER env var for using a non-default module. // We throw an exception if something goes wrong (like the module cannot be loaded). -#define CHECK() if(PyErr_Occurred()) { PyErr_Print(); PyGILState_Release(gilstate); throw std::runtime_error("Python Error"); } +#define CHECK() \ + if (PyErr_Occurred()) \ + { \ + PyErr_Print(); \ + PyGILState_Release(gilstate); \ + throw std::runtime_error("Python Error"); \ + } transceiver * get_transceiver() { - if(!s_trsc) { + if (!s_trsc) + { std::lock_guard lock(s_mtx); - if(!s_trsc) { + if (!s_trsc) + { auto gilstate = PyGILState_Ensure(); const char * modname = std::getenv("D4P_TRANSCEIVER"); - if(modname == NULL ) modname = "daal4py.mpi_transceiver"; + if (modname == NULL) modname = "daal4py.mpi_transceiver"; PyObject * mod = PyImport_ImportModule(modname); CHECK(); @@ -51,7 +59,7 @@ transceiver * get_transceiver() PyGILState_Release(gilstate); // we expect the tcvr to be a pointer to a (static) shared-pointer object. - s_trsc.reset(new transceiver(*reinterpret_cast*>(tcvr))); + s_trsc.reset(new transceiver(*reinterpret_cast *>(tcvr))); } } return s_trsc.get(); @@ -60,11 +68,13 @@ transceiver * get_transceiver() void del_transceiver() { - if(s_trsc) { + if (s_trsc) + { std::lock_guard lock(s_mtx); - if(s_trsc) { + if (s_trsc) + { auto gilstate = PyGILState_Ensure(); s_trsc.reset(); - } - } + } + } } diff --git a/src/transceiver.h b/src/transceiver.h index 8c6c254b18..d200a3fa02 100644 --- a/src/transceiver.h +++ b/src/transceiver.h @@ -33,7 +33,6 @@ // The transceiver implementation can be selected by setting env var 'D4P_TRANSCEIVER' to the module name. // The current default is 'mpi_transceiver'. - #ifndef _TRANSCEIVER_INCLUDED_ #define _TRANSCEIVER_INCLUDED_ @@ -49,23 +48,23 @@ class transceiver_base_iface public: // initialize communication network virtual void init() = 0; - + // finalize communication network virtual void fini() = 0; - + // @return number of processes in network virtual size_t nMembers() = 0; - + // @return identifier of current process virtual size_t me() = 0; - + // send message to another process // @param[in] buff bytes to send // @param[in] N number of bytes to send // @param[in] recpnt id of recipient // @param[in] tag message tag, to be matched by recipient - virtual void send(const void* buff, size_t N, size_t recpnt, size_t tag) = 0; - + virtual void send(const void * buff, size_t N, size_t recpnt, size_t tag) = 0; + // receive a message from another process // @param[out] buff buffer to store message in // @param[in] N size of buffer @@ -78,7 +77,6 @@ class transceiver_base_iface virtual ~transceiver_base_iface() {} }; - // Abstract class with all functionality used for communicating between processes. // Extends transceiver_base_iface with collective operations which can be implemented // with functions from transceiver_base_iface (see transceiver_impl) @@ -92,7 +90,7 @@ class transceiver_iface : public transceiver_base_iface // @param[in] sizes number of bytes constributed by each process, relevant on root only // Can be zero also on root if varying==false // @param[in] varying set to false to indicate all members provide same chunksize - virtual void * gather(const void * ptr, size_t N, size_t root, const size_t * sizes, bool varying=true) = 0; + virtual void * gather(const void * ptr, size_t N, size_t root, const size_t * sizes, bool varying = true) = 0; // Broadcast data from root to all other processes // @param[inout] ptr on root: pointer to data to be sent @@ -102,7 +100,8 @@ class transceiver_iface : public transceiver_base_iface virtual void bcast(void * ptr, size_t N, size_t root) = 0; // indicates data types for reductions - enum type_type { + enum type_type + { BOOL, INT8, UINT8, @@ -115,7 +114,8 @@ class transceiver_iface : public transceiver_base_iface }; // indicates reduction operation - enum operation_type { + enum operation_type + { OP_MAX = 100, OP_MIN, OP_SUM, @@ -151,45 +151,31 @@ class transceiver_iface : public transceiver_base_iface class transceiver_impl : public transceiver_iface { public: - transceiver_impl() - : m_me(-1), - m_nMembers(0), - m_initialized(false) - {} + transceiver_impl() : m_me(-1), m_nMembers(0), m_initialized(false) {} // implementations/derived classes must call this in their init() virtual void init() { - if (!m_initialized) { - m_me = me(); - m_nMembers = nMembers(); - m_initialized = true; - } - } - - virtual void * gather(const void * ptr, size_t N, size_t root, const size_t * sizes, bool varying) - { - throw std::logic_error("transceiver_base::gather not yet implemented"); + if (!m_initialized) + { + m_me = me(); + m_nMembers = nMembers(); + m_initialized = true; + } } - virtual void bcast(void * ptr, size_t N, size_t root) - { - throw std::logic_error("transceiver_base::bcast not yet implemented"); - } + virtual void * gather(const void * ptr, size_t N, size_t root, const size_t * sizes, bool varying) { throw std::logic_error("transceiver_base::gather not yet implemented"); } - virtual void reduce_all(void * inout, type_type T, size_t N, operation_type op) - { - throw std::logic_error("transceiver_base::reduce_all not yet implemented"); - } + virtual void bcast(void * ptr, size_t N, size_t root) { throw std::logic_error("transceiver_base::bcast not yet implemented"); } + + virtual void reduce_all(void * inout, type_type T, size_t N, operation_type op) { throw std::logic_error("transceiver_base::reduce_all not yet implemented"); } + + virtual void reduce_exscan(void * inout, type_type T, size_t N, operation_type op) { throw std::logic_error("transceiver_base::reduce_exscan not yet implemented"); } - virtual void reduce_exscan(void * inout, type_type T, size_t N, operation_type op) - { - throw std::logic_error("transceiver_base::reduce_exscan not yet implemented"); - } protected: bool m_initialized; - size_t m_me; // result of me() - size_t m_nMembers; // result of nMembers() + size_t m_me; // result of me() + size_t m_nMembers; // result of nMembers() }; // Higher-level, typ-safe transceiver abstraction. @@ -198,41 +184,31 @@ class transceiver { public: // @param[in] t actual transceiver object - transceiver(const std::shared_ptr & t) - : m_transceiver(t) + transceiver(const std::shared_ptr & t) : m_transceiver(t) { m_transceiver->init(); m_inited = true; } - - ~transceiver() - { - m_transceiver->fini(); - } - - inline size_t nMembers() - { - return m_transceiver->nMembers(); - } - inline size_t me() - { - return m_transceiver->me(); - } + ~transceiver() { m_transceiver->fini(); } + + inline size_t nMembers() { return m_transceiver->nMembers(); } + + inline size_t me() { return m_transceiver->me(); } // Send object of given type to recpnt. // Object is assumed to be a daal::serializable object. // @param[in] obj object to be sent // @param[in] recpnt recipient // @param[in] tag message tag to be matched by recipient - template - void send(const T& obj, size_t recpnt, size_t tag); + template + void send(const T & obj, size_t recpnt, size_t tag); // Receive an object of given type from sender // Object is assumed to be a daal::serializable object. // @param[in] sender sender // @param[in] tag message tag to be matched with send - template + template T recv(size_t sender, size_t tag); // Gather objects stored in a shared pointer on given root process @@ -240,8 +216,8 @@ class transceiver // @param[in] sptr shared pointer with object to be gathered // @param[in] root process id which collects data // @param[in] varying can be set to false if objects are of identical size on all processes - template - std::vector > gather(const daal::services::SharedPtr & sptr, size_t root=0, bool varying=true); + template + std::vector > gather(const daal::services::SharedPtr & sptr, size_t root = 0, bool varying = true); // Broadcast object from root to all other processes // Object is serialized similar to memcpy(buffer, &obj, sizeof(obj)). @@ -249,16 +225,16 @@ class transceiver // @param[inout] obj on root: reference of object to be sent // on all other processes: reference of object to store received data // @param[in] root process id which collects data - template - void bcast(T & obj, size_t root=0); + template + void bcast(T & obj, size_t root = 0); // Broadcast shared pointer object from root to all other processes // Object is assumed to be a daal::serializable object. // @param[inout] obj on root: reference of shared pointer object to be sent // on all other processes: reference of shared pointer object to store received data // @param[in] root process id which collects data - template - void bcast(daal::services::SharedPtr & obj, size_t root=0); + template + void bcast(daal::services::SharedPtr & obj, size_t root = 0); // Element-wise reduce given array with given operation and provide result on all processes // Elements are serialized similar to memcpy(buffer, &obj, sizeof(obj)). @@ -266,7 +242,7 @@ class transceiver // @param[inout] inout input to reduction and result // @param[in] N number of elements in inout // @param[in] op reduction operation - template + template void reduce_all(T * buf, size_t n, transceiver_iface::operation_type op); // Element-wise reduce given array partially with given operation @@ -276,12 +252,12 @@ class transceiver // @param[inout] inout input to reduction and result // @param[in] N number of elements in inout // @param[in] op reduction operation - template + template void reduce_exscan(T * buf, size_t n, transceiver_iface::operation_type op); protected: std::shared_ptr m_transceiver; // the actual transceiver object - bool m_inited; // Initialization status + bool m_inited; // Initialization status }; // @return the global transceiver object @@ -289,154 +265,214 @@ class transceiver extern transceiver * get_transceiver(); extern void del_transceiver(); -template struct from_std; -template<> struct from_std { static const transceiver_iface::type_type typ = transceiver_iface::DOUBLE; }; -template<> struct from_std { static const transceiver_iface::type_type typ = transceiver_iface::FLOAT; }; -template<> struct from_std { static const transceiver_iface::type_type typ = transceiver_iface::BOOL; }; -template<> struct from_std { static const transceiver_iface::type_type typ = transceiver_iface::INT8; }; -template<> struct from_std { static const transceiver_iface::type_type typ = transceiver_iface::UINT8; }; -template<> struct from_std { static const transceiver_iface::type_type typ = transceiver_iface::INT32; }; -template<> struct from_std { static const transceiver_iface::type_type typ = transceiver_iface::UINT32; }; -template<> struct from_std { static const transceiver_iface::type_type typ = transceiver_iface::INT64; }; -template<> struct from_std { static const transceiver_iface::type_type typ = transceiver_iface::UINT64; }; +template +struct from_std; +template <> +struct from_std +{ + static const transceiver_iface::type_type typ = transceiver_iface::DOUBLE; +}; +template <> +struct from_std +{ + static const transceiver_iface::type_type typ = transceiver_iface::FLOAT; +}; +template <> +struct from_std +{ + static const transceiver_iface::type_type typ = transceiver_iface::BOOL; +}; +template <> +struct from_std +{ + static const transceiver_iface::type_type typ = transceiver_iface::INT8; +}; +template <> +struct from_std +{ + static const transceiver_iface::type_type typ = transceiver_iface::UINT8; +}; +template <> +struct from_std +{ + static const transceiver_iface::type_type typ = transceiver_iface::INT32; +}; +template <> +struct from_std +{ + static const transceiver_iface::type_type typ = transceiver_iface::UINT32; +}; +template <> +struct from_std +{ + static const transceiver_iface::type_type typ = transceiver_iface::INT64; +}; +template <> +struct from_std +{ + static const transceiver_iface::type_type typ = transceiver_iface::UINT64; +}; #ifdef __APPLE__ -template<> struct from_std { static const transceiver_iface::type_type typ = transceiver_iface::INT64; }; -template<> struct from_std { static const transceiver_iface::type_type typ = transceiver_iface::UINT64; }; +template <> +struct from_std +{ + static const transceiver_iface::type_type typ = transceiver_iface::INT64; +}; +template <> +struct from_std +{ + static const transceiver_iface::type_type typ = transceiver_iface::UINT64; +}; #endif -template +template static bool not_empty(const daal::services::SharedPtr & obj) { return obj; } -template +template static bool not_empty(const daal::data_management::interface1::NumericTablePtr & obj) { return obj && obj->getNumberOfRows() && obj->getNumberOfColumns(); } -template -void transceiver::send(const T& obj, size_t recpnt, size_t tag) +template +void transceiver::send(const T & obj, size_t recpnt, size_t tag) { daal::data_management::InputDataArchive in_arch; int mysize(0); // Serialize the oneDAL object into a data archive - if(not_empty(obj)) { + if (not_empty(obj)) + { obj->serialize(in_arch); mysize = in_arch.getSizeOfArchive(); } // and send it away to our recipient m_transceiver->send(&mysize, sizeof(mysize), recpnt, tag); - if(mysize > 0) { + if (mysize > 0) + { m_transceiver->send(in_arch.getArchiveAsArraySharedPtr().get(), mysize, recpnt, tag); } } -template +template T transceiver::recv(size_t sender, size_t tag) { - int sz(0); - size_t br = m_transceiver->recv(&sz, sizeof(sz), sender, tag); - assert(br == sizeof(sz)); - T res; - if(sz > 0) { - daal::byte * buf = static_cast(daal::services::daal_malloc(sz * sizeof(daal::byte))); - DAAL4PY_CHECK_MALLOC(buf); - br = m_transceiver->recv(buf, sz, sender, tag); - assert(br == sz); - // It'd be nice to avoid the additional copy, need a special DatArchive (see older CnC versions of daal4py) - daal::data_management::OutputDataArchive out_arch(buf, sz); - res = daal::services::staticPointerCast(out_arch.getAsSharedPtr()); - daal::services::daal_free(buf); - buf = NULL; - } - return res; + int sz(0); + size_t br = m_transceiver->recv(&sz, sizeof(sz), sender, tag); + assert(br == sizeof(sz)); + T res; + if (sz > 0) + { + daal::byte * buf = static_cast(daal::services::daal_malloc(sz * sizeof(daal::byte))); + DAAL4PY_CHECK_MALLOC(buf); + br = m_transceiver->recv(buf, sz, sender, tag); + assert(br == sz); + // It'd be nice to avoid the additional copy, need a special DatArchive (see older CnC versions of daal4py) + daal::data_management::OutputDataArchive out_arch(buf, sz); + res = daal::services::staticPointerCast(out_arch.getAsSharedPtr()); + daal::services::daal_free(buf); + buf = NULL; + } + return res; } -template +template std::vector > transceiver::gather(const daal::services::SharedPtr & obj, size_t root, bool varying) { // we split into 2 gathers: one to send the sizes, a second to send the actual data - if(varying == false) std::cerr << "Performance warning: no optimization implemented for non-varying gather sizes\n"; - + if (varying == false) std::cerr << "Performance warning: no optimization implemented for non-varying gather sizes\n"; + size_t mysize = 0; daal::data_management::InputDataArchive in_arch; // If we got the data then serialize the partial result into a data archive // In other case the size of data to send is equal zero, send nothing - if (obj) { + if (obj) + { obj->serialize(in_arch); mysize = in_arch.getSizeOfArchive(); } // gather all partial results // First get all sizes, then gather on root - size_t * sizes = reinterpret_cast(m_transceiver->gather(&mysize, sizeof(mysize), root, NULL, false)); - char * buff = reinterpret_cast(m_transceiver->gather(in_arch.getArchiveAsArraySharedPtr().get(), mysize, root, sizes)); - + size_t * sizes = reinterpret_cast(m_transceiver->gather(&mysize, sizeof(mysize), root, NULL, false)); + char * buff = reinterpret_cast(m_transceiver->gather(in_arch.getArchiveAsArraySharedPtr().get(), mysize, root, sizes)); + std::vector > all; - if(m_transceiver->me() == root) { + if (m_transceiver->me() == root) + { size_t offset = 0; - size_t nm = m_transceiver->nMembers(); + size_t nm = m_transceiver->nMembers(); all.resize(nm); - for(int i=0; i 0) { + for (int i = 0; i < nm; ++i) + { + if (sizes[i] > 0) + { // This is inefficient, we need to write our own DatArchive to avoid extra copy - daal::data_management::OutputDataArchive out_arch(reinterpret_cast(buff+offset), sizes[i]); + daal::data_management::OutputDataArchive out_arch(reinterpret_cast(buff + offset), sizes[i]); all[i] = daal::services::staticPointerCast(out_arch.getAsSharedPtr()); offset += sizes[i]; - } else { + } + else + { all[i] = daal::services::SharedPtr(); } } daal::services::daal_free(buff); buff = NULL; } - + daal::services::daal_free(sizes); sizes = NULL; - + return all; } -template +template void transceiver::bcast(T & obj, size_t root) { m_transceiver->bcast(&obj, sizeof(obj), root); } -template +template void transceiver::bcast(daal::services::SharedPtr & obj, size_t root) { // we split into 2 messages: one to send the size, a second to send the actual data - if(m_transceiver->me() == root) { + if (m_transceiver->me() == root) + { // Serialize the partial result into a data archive daal::data_management::InputDataArchive in_arch; obj->serialize(in_arch); int size = in_arch.getSizeOfArchive(); m_transceiver->bcast(&size, sizeof(size), root); - if(size > 0) m_transceiver->bcast(in_arch.getArchiveAsArraySharedPtr().get(), size, root); - } else { + if (size > 0) m_transceiver->bcast(in_arch.getArchiveAsArraySharedPtr().get(), size, root); + } + else + { int size = 0; m_transceiver->bcast(&size, sizeof(size), root); - if(size > 0) { + if (size > 0) + { char * buff = static_cast(daal::services::daal_malloc(size)); m_transceiver->bcast(buff, size, root); - daal::data_management::OutputDataArchive out_arch(reinterpret_cast(buff), size); + daal::data_management::OutputDataArchive out_arch(reinterpret_cast(buff), size); obj = daal::services::staticPointerCast(out_arch.getAsSharedPtr()); - } else { + } + else + { obj.reset(); } } } -template +template void transceiver::reduce_all(T * inout, size_t n, transceiver_iface::operation_type op) { m_transceiver->reduce_all(inout, from_std::typ, n, op); } -template +template void transceiver::reduce_exscan(T * inout, size_t n, transceiver_iface::operation_type op) { m_transceiver->reduce_exscan(inout, from_std::typ, n, op); diff --git a/src/tree_visitor.h b/src/tree_visitor.h index 9c341955ff..aa492e90c6 100644 --- a/src/tree_visitor.h +++ b/src/tree_visitor.h @@ -24,11 +24,12 @@ #include #define TERMINAL_NODE -1 -#define NO_FEATURE -2 +#define NO_FEATURE -2 // cython will convert this struct into an numpy structured array // This is the layout that sklearn expects for its tree traversal mechanics -struct skl_tree_node { +struct skl_tree_node +{ Py_ssize_t left_child; Py_ssize_t right_child; Py_ssize_t feature; @@ -38,23 +39,14 @@ struct skl_tree_node { double weighted_n_node_samples; unsigned char missing_go_to_left; - skl_tree_node() - : left_child(TERMINAL_NODE), - right_child(TERMINAL_NODE), - feature(NO_FEATURE), - threshold(NO_FEATURE), - impurity(get_nan64()), - n_node_samples(0), - weighted_n_node_samples(0.0), - missing_go_to_left(false) - {} + skl_tree_node() : left_child(TERMINAL_NODE), right_child(TERMINAL_NODE), feature(NO_FEATURE), threshold(NO_FEATURE), impurity(get_nan64()), n_node_samples(0), weighted_n_node_samples(0.0), missing_go_to_left(false) {} }; // We'd like the Models to have the descriptor typedefs in the class // For now we provide a meat-class to map Models to descriptors // Models might need an explicit instantiation providing visitor_type, leaf_desc_type and split_desc_type // This is the default template for models using regression visitors -template +template struct TNVT { typedef daal::algorithms::tree_utils::regression::TreeNodeVisitor visitor_type; @@ -63,7 +55,7 @@ struct TNVT }; // Decision forest classification uses classification vistors -template<> +template <> struct TNVT { typedef daal::algorithms::tree_utils::classification::TreeNodeVisitor visitor_type; @@ -72,71 +64,71 @@ struct TNVT }; // Decision tree classification uses classification vistors -template<> -struct TNVT - : public TNVT +template <> +struct TNVT : public TNVT {}; // our tree visitor for counting nodes // TODO: Needs to store leaf-node response, and split-node impurity/sample_counts values -template +template class NodeDepthCountNodeVisitor : public TNVT::visitor_type { public: NodeDepthCountNodeVisitor(); - virtual bool onLeafNode(const typename TNVT::leaf_desc_type &desc); - virtual bool onSplitNode(const typename TNVT::split_desc_type &desc); + virtual bool onLeafNode(const typename TNVT::leaf_desc_type & desc); + virtual bool onSplitNode(const typename TNVT::split_desc_type & desc); size_t n_nodes; size_t depth; size_t n_leaf_nodes; }; - // equivalent for numpy arange -template -std::vector arange(T start, T stop, T step = 1) { +template +std::vector arange(T start, T stop, T step = 1) +{ std::vector res; - for(T i = start; i < stop; i += step) res.push_back(i); + for (T i = start; i < stop; i += step) res.push_back(i); return res; } // We only expose the minimum information to cython struct TreeState { - skl_tree_node *node_ar; - double *value_ar; - size_t max_depth; - size_t node_count; - size_t leaf_count; - size_t class_count; + skl_tree_node * node_ar; + double * value_ar; + size_t max_depth; + size_t node_count; + size_t leaf_count; + size_t class_count; }; // our tree visitor for getting tree state -template +template class toSKLearnTreeObjectVisitor : public TNVT::visitor_type, public TreeState { public: toSKLearnTreeObjectVisitor(size_t _depth, size_t _n_nodes, size_t _n_leafs, size_t _max_n_classes); - virtual bool onSplitNode(const typename TNVT::split_desc_type &desc); - virtual bool onLeafNode(const typename TNVT::leaf_desc_type &desc); + virtual bool onSplitNode(const typename TNVT::split_desc_type & desc); + virtual bool onLeafNode(const typename TNVT::leaf_desc_type & desc); + protected: // generic leaf node handling - bool _onLeafNode(const daal::algorithms::tree_utils::NodeDescriptor &desc); + bool _onLeafNode(const daal::algorithms::tree_utils::NodeDescriptor & desc); // implementation of inLeafNode for regression visitors - bool _onLeafNode(const typename TNVT::leaf_desc_type &desc, std::false_type); + bool _onLeafNode(const typename TNVT::leaf_desc_type & desc, std::false_type); // implementation of inLeafNode for classification visitors - bool _onLeafNode(const typename TNVT::leaf_desc_type &desc, std::true_type); + bool _onLeafNode(const typename TNVT::leaf_desc_type & desc, std::true_type); - size_t node_id; - size_t max_n_classes; + size_t node_id; + size_t max_n_classes; std::vector parents; }; // This is the function for getting the tree state from a forest which we use in cython // we will have different model types, so it's a template // Note: the caller will own the memory of the 2 returned arrays! -template +template TreeState _getTreeState(M * model, size_t iTree, size_t n_classes) { // First count nodes @@ -152,7 +144,7 @@ TreeState _getTreeState(M * model, size_t iTree, size_t n_classes) // This is the function for getting the tree state frmo a tree which we use in cython // we will have different model types, so it's a template // Note: the caller will own the memory of the 2 returned arrays! -template +template TreeState _getTreeState(M * model, size_t n_classes) { // First count nodes @@ -165,23 +157,19 @@ TreeState _getTreeState(M * model, size_t n_classes) return TreeState(tsv); } - // **************************************************** // **************************************************** // Visitor implementation // **************************************************** // **************************************************** -template -NodeDepthCountNodeVisitor::NodeDepthCountNodeVisitor() - : n_nodes(0), - depth(0), - n_leaf_nodes(0) +template +NodeDepthCountNodeVisitor::NodeDepthCountNodeVisitor() : n_nodes(0), depth(0), n_leaf_nodes(0) {} // TODO: Needs to store leaf-node response, and split-node impurity/sample_counts values -template -bool NodeDepthCountNodeVisitor::onLeafNode(const typename TNVT::leaf_desc_type &desc) +template +bool NodeDepthCountNodeVisitor::onLeafNode(const typename TNVT::leaf_desc_type & desc) { ++n_nodes; ++n_leaf_nodes; @@ -189,105 +177,106 @@ bool NodeDepthCountNodeVisitor::onLeafNode(const typename TNVT::leaf_desc_ return true; } -template -bool NodeDepthCountNodeVisitor::onSplitNode(const typename TNVT::split_desc_type &desc) +template +bool NodeDepthCountNodeVisitor::onSplitNode(const typename TNVT::split_desc_type & desc) { ++n_nodes; depth = std::max((const size_t)depth, desc.level); return true; } - -template -toSKLearnTreeObjectVisitor::toSKLearnTreeObjectVisitor(size_t _depth, size_t _n_nodes, size_t _n_leafs, size_t _max_n_classes) - : node_id(0), - parents(arange(-1, _depth-1)) +template +toSKLearnTreeObjectVisitor::toSKLearnTreeObjectVisitor(size_t _depth, size_t _n_nodes, size_t _n_leafs, size_t _max_n_classes) : node_id(0), parents(arange(-1, _depth - 1)) { max_n_classes = _max_n_classes; - node_count = _n_nodes; - max_depth = _depth; - leaf_count = _n_leafs; - class_count = _max_n_classes; - node_ar = new skl_tree_node[node_count]; - value_ar = new double[node_count*1*class_count](); // oneDAL only supports scalar responses for now + node_count = _n_nodes; + max_depth = _depth; + leaf_count = _n_leafs; + class_count = _max_n_classes; + node_ar = new skl_tree_node[node_count]; + value_ar = new double[node_count * 1 * class_count](); // oneDAL only supports scalar responses for now } - -template -bool toSKLearnTreeObjectVisitor::onSplitNode(const typename TNVT::split_desc_type &desc) +template +bool toSKLearnTreeObjectVisitor::onSplitNode(const typename TNVT::split_desc_type & desc) { - if(desc.level > 0) { + if (desc.level > 0) + { // has parents Py_ssize_t parent = parents[desc.level - 1]; - if(node_ar[parent].left_child > 0) { + if (node_ar[parent].left_child > 0) + { assert(node_ar[node_id].right_child < 0); node_ar[parent].right_child = node_id; - } else { + } + else + { node_ar[parent].left_child = node_id; } } - parents[desc.level] = node_id; - node_ar[node_id].feature = desc.featureIndex; - node_ar[node_id].threshold = desc.featureValue; - node_ar[node_id].impurity = desc.impurity; - node_ar[node_id].n_node_samples = desc.nNodeSampleCount; + parents[desc.level] = node_id; + node_ar[node_id].feature = desc.featureIndex; + node_ar[node_id].threshold = desc.featureValue; + node_ar[node_id].impurity = desc.impurity; + node_ar[node_id].n_node_samples = desc.nNodeSampleCount; node_ar[node_id].weighted_n_node_samples = desc.nNodeSampleCount; - node_ar[node_id].missing_go_to_left = false; + node_ar[node_id].missing_go_to_left = false; // wrap-up ++node_id; return true; } -template -bool toSKLearnTreeObjectVisitor::onLeafNode(const typename TNVT::leaf_desc_type &desc) +template +bool toSKLearnTreeObjectVisitor::onLeafNode(const typename TNVT::leaf_desc_type & desc) { // we use somewhat complicated C++'11 construct to determine if the descriptor is for classification // The actual implementation is the overloaded _onLeafNode which depends on integral_constant types true_type or false_type // we might want to make this dependent on a more meaningful type than bool - return _onLeafNode(desc, - typename std::integral_constant::leaf_desc_type>::value>()); + return _onLeafNode(desc, typename std::integral_constant::leaf_desc_type>::value>()); } // stuff that is done for all leaf node types -template -bool toSKLearnTreeObjectVisitor::_onLeafNode(const daal::algorithms::tree_utils::NodeDescriptor &desc) +template +bool toSKLearnTreeObjectVisitor::_onLeafNode(const daal::algorithms::tree_utils::NodeDescriptor & desc) { - if(desc.level) { + if (desc.level) + { Py_ssize_t parent = parents[desc.level - 1]; - if(node_ar[parent].left_child > 0) { + if (node_ar[parent].left_child > 0) + { assert(node_ar[node_id].right_child < 0); node_ar[parent].right_child = node_id; - } else { + } + else + { node_ar[parent].left_child = node_id; } } - node_ar[node_id].impurity = desc.impurity; - node_ar[node_id].n_node_samples = desc.nNodeSampleCount; + node_ar[node_id].impurity = desc.impurity; + node_ar[node_id].n_node_samples = desc.nNodeSampleCount; node_ar[node_id].weighted_n_node_samples = desc.nNodeSampleCount; - node_ar[node_id].missing_go_to_left = false; + node_ar[node_id].missing_go_to_left = false; return true; } -template -bool toSKLearnTreeObjectVisitor::_onLeafNode(const typename TNVT::leaf_desc_type &desc, std::false_type) +template +bool toSKLearnTreeObjectVisitor::_onLeafNode(const typename TNVT::leaf_desc_type & desc, std::false_type) { _onLeafNode(desc); DAAL4PY_OVERFLOW_CHECK_BY_MULTIPLICATION(int, node_id, class_count); - value_ar[node_id*1*class_count] = desc.response; + value_ar[node_id * 1 * class_count] = desc.response; // wrap-up ++node_id; return true; } -template -bool toSKLearnTreeObjectVisitor::_onLeafNode(const typename TNVT::leaf_desc_type &desc, std::true_type) +template +bool toSKLearnTreeObjectVisitor::_onLeafNode(const typename TNVT::leaf_desc_type & desc, std::true_type) { if (desc.level > 0) { @@ -295,7 +284,7 @@ bool toSKLearnTreeObjectVisitor::_onLeafNode(const typename TNVT::leaf_des while (depth >= 0) { size_t id = parents[depth]; - value_ar[id*1*class_count + desc.label] += desc.nNodeSampleCount; + value_ar[id * 1 * class_count + desc.label] += desc.nNodeSampleCount; if (depth == 0) { break; @@ -304,8 +293,8 @@ bool toSKLearnTreeObjectVisitor::_onLeafNode(const typename TNVT::leaf_des } } _onLeafNode(desc); - DAAL4PY_OVERFLOW_CHECK_BY_ADDING(int, node_id*1*class_count, desc.label); - value_ar[node_id*1*class_count + desc.label] += desc.nNodeSampleCount; + DAAL4PY_OVERFLOW_CHECK_BY_ADDING(int, node_id * 1 * class_count, desc.label); + value_ar[node_id * 1 * class_count + desc.label] += desc.nNodeSampleCount; // wrap-up ++node_id;