Skip to content

Commit 89635a8

Browse files
authored
Merge pull request #3 from vvish/vvish/feat/algo_recover
Algorithm re-enablement
2 parents 2427e4f + e1b4783 commit 89635a8

File tree

15 files changed

+995
-0
lines changed

15 files changed

+995
-0
lines changed

.clang-format

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,8 @@
1+
---
2+
BasedOnStyle: WebKit
3+
AlignAfterOpenBracket: Align
4+
AlignConsecutiveDeclarations: 'false'
5+
BreakBeforeBraces: Allman
6+
NamespaceIndentation: All
7+
8+
...

.github/workflows/linux.yml

Lines changed: 129 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,129 @@
1+
name: Linux build
2+
on: [push, pull_request]
3+
concurrency:
4+
group: ${{ github.workflow }}-${{ github.job }}-${{ github.ref }}
5+
cancel-in-progress: true
6+
defaults:
7+
run:
8+
shell: bash -l {0}
9+
jobs:
10+
build:
11+
runs-on: ubuntu-latest
12+
name: '${{ matrix.sys.compiler }} ${{ matrix.sys.version }} - ${{ matrix.sys.flags }}'
13+
strategy:
14+
matrix:
15+
sys:
16+
- { compiler: 'gcc', version: '12', flags: 'force_no_instr_set' }
17+
- { compiler: 'gcc', version: '13', flags: 'enable_xtl_complex' }
18+
- { compiler: 'gcc', version: '14', flags: 'avx' }
19+
- { compiler: 'gcc', version: '13', flags: 'avx512' }
20+
- { compiler: 'gcc', version: '12', flags: 'i386' }
21+
- { compiler: 'gcc', version: '13', flags: 'avx512pf' }
22+
- { compiler: 'gcc', version: '13', flags: 'avx512vbmi' }
23+
- { compiler: 'gcc', version: '14', flags: 'avx512vbmi2' }
24+
- { compiler: 'gcc', version: '13', flags: 'avx512vnni' }
25+
- { compiler: 'clang', version: '16', flags: 'force_no_instr_set' }
26+
- { compiler: 'clang', version: '16', flags: 'enable_xtl_complex' }
27+
- { compiler: 'clang', version: '17', flags: 'avx' }
28+
- { compiler: 'clang', version: '17', flags: 'sse3' }
29+
- { compiler: 'clang', version: '18', flags: 'avx512' }
30+
steps:
31+
- name: Setup compiler
32+
if: ${{ matrix.sys.compiler == 'gcc' }}
33+
run: |
34+
GCC_VERSION=${{ matrix.sys.version }}
35+
sudo apt-get update
36+
sudo apt-get --no-install-suggests --no-install-recommends install g++-$GCC_VERSION
37+
sudo dpkg --add-architecture i386
38+
sudo add-apt-repository ppa:ubuntu-toolchain-r/test
39+
sudo apt-get update
40+
sudo apt-get --no-install-suggests --no-install-recommends install gcc-$GCC_VERSION-multilib g++-$GCC_VERSION-multilib linux-libc-dev:i386
41+
CC=gcc-$GCC_VERSION
42+
echo "CC=$CC" >> $GITHUB_ENV
43+
CXX=g++-$GCC_VERSION
44+
echo "CXX=$CXX" >> $GITHUB_ENV
45+
- name: Setup compiler
46+
if: ${{ matrix.sys.compiler == 'clang' }}
47+
run: |
48+
LLVM_VERSION=${{ matrix.sys.version }}
49+
sudo apt-get update || exit 1
50+
sudo apt-get --no-install-suggests --no-install-recommends install clang-$LLVM_VERSION || exit 1
51+
sudo apt-get --no-install-suggests --no-install-recommends install g++ g++-multilib || exit 1
52+
sudo ln -s /usr/include/asm-generic /usr/include/asm
53+
CC=clang-$LLVM_VERSION
54+
echo "CC=$CC" >> $GITHUB_ENV
55+
CXX=clang++-$LLVM_VERSION
56+
echo "CXX=$CXX" >> $GITHUB_ENV
57+
- name: Checkout xsimd-algorithm
58+
uses: actions/checkout@v3
59+
- name: Set conda environment
60+
uses: mamba-org/setup-micromamba@main
61+
with:
62+
environment-name: myenv
63+
environment-file: environment-dev.yml
64+
init-shell: bash
65+
cache-downloads: true
66+
- name: Setup SDE
67+
if: startswith(matrix.sys.flags, 'avx512')
68+
run: sh install_sde.sh
69+
- name: Configure build
70+
env:
71+
CC: ${{ env.CC }}
72+
CXX: ${{ env.CXX }}
73+
run: |
74+
if [[ '${{ matrix.sys.flags }}' == 'enable_xtl_complex' ]]; then
75+
CMAKE_EXTRA_ARGS="$CMAKE_EXTRA_ARGS -DENABLE_XTL_COMPLEX=ON"
76+
fi
77+
if [[ '${{ matrix.sys.flags }}' == 'avx' ]]; then
78+
CMAKE_EXTRA_ARGS="$CMAKE_EXTRA_ARGS -DTARGET_ARCH=sandybridge"
79+
fi
80+
if [[ '${{ matrix.sys.flags }}' == 'sse3' ]]; then
81+
CMAKE_EXTRA_ARGS="$CMAKE_EXTRA_ARGS -DTARGET_ARCH=nocona"
82+
fi
83+
if [[ '${{ matrix.sys.flags }}' == 'avx512' ]]; then
84+
CMAKE_EXTRA_ARGS="$CMAKE_EXTRA_ARGS -DTARGET_ARCH=skylake-avx512"
85+
fi
86+
if [[ '${{ matrix.sys.flags }}' == 'avx512pf' ]]; then
87+
CMAKE_EXTRA_ARGS="$CMAKE_EXTRA_ARGS -DTARGET_ARCH=knl"
88+
fi
89+
if [[ '${{ matrix.sys.flags }}' == 'avx512vbmi' ]]; then
90+
CMAKE_EXTRA_ARGS="$CMAKE_EXTRA_ARGS -DTARGET_ARCH=cannonlake"
91+
fi
92+
if [[ '${{ matrix.sys.flags }}' == 'avx512vbmi2' ]]; then
93+
CMAKE_EXTRA_ARGS="$CMAKE_EXTRA_ARGS -DTARGET_ARCH=icelake-server"
94+
fi
95+
if [[ '${{ matrix.sys.flags }}' == 'avx512vnni' ]]; then
96+
CMAKE_EXTRA_ARGS="$CMAKE_EXTRA_ARGS -DTARGET_ARCH=knm"
97+
fi
98+
if [[ '${{ matrix.sys.flags }}' == 'i386' ]]; then
99+
CXX_FLAGS="$CXX_FLAGS -m32"
100+
fi
101+
if [[ '${{ matrix.sys.flags }}' == 'force_no_instr_set' ]]; then
102+
:
103+
else
104+
CMAKE_EXTRA_ARGS="$CMAKE_EXTRA_ARGS -DXSIMD_ENABLE_WERROR=ON"
105+
fi
106+
107+
# Cheap way of spotting uninitialized read
108+
CXX_FLAGS="$CXX_FLAGS -ftrivial-auto-var-init=pattern"
109+
110+
mkdir _build
111+
cd _build
112+
cmake .. -DBUILD_TESTS=ON \
113+
-DCMAKE_BUILD_TYPE=Release \
114+
-DCMAKE_C_COMPILER=$CC \
115+
-DCMAKE_CXX_COMPILER=$CXX \
116+
$CMAKE_EXTRA_ARGS \
117+
-DCMAKE_CXX_FLAGS='$CXX_FLAGS' \
118+
-G Ninja
119+
- name: Build
120+
run: ninja -C _build
121+
- name: Test
122+
run: |
123+
cd _build
124+
cd test
125+
if echo '${{ matrix.sys.flags }}' | grep -q 'avx512' ; then
126+
../../sde-external-9.48.0-2024-11-25-lin/sde64 -tgl -- ./test_xsimd_algorithm
127+
else
128+
./test_xsimd_algorithm
129+
fi

.gitignore

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -30,3 +30,7 @@
3030
*.exe
3131
*.out
3232
*.app
33+
34+
# Build
35+
build/
36+
build_*/

CMakeLists.txt

Lines changed: 67 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,67 @@
1+
cmake_minimum_required(VERSION 3.15..3.29)
2+
project(xsimd-algorithm CXX)
3+
4+
set(XSIMDALGO_INCLUDE_DIR ${CMAKE_CURRENT_SOURCE_DIR}/include)
5+
6+
# Dependencies
7+
# ============
8+
9+
set(xsimd_REQUIRED_VERSION 13.2.0)
10+
if(TARGET xsimd)
11+
set(xsimd_VERSION ${XSIMD_VERSION_MAJOR}.${XSIMD_VERSION_MINOR}.${XSIMD_VERSION_PATCH})
12+
if(${xsimd_VERSION} VERSION_LESS ${xsimd_REQUIRED_VERSION})
13+
message(ERROR "Mismatch xsimd versions. Found '${xsimd_VERSION}' but requires: '${xsimd_REQUIRED_VERSION}'")
14+
else()
15+
message(STATUS "Found xsimd v${xsimd_VERSION}")
16+
endif()
17+
else()
18+
find_package(xsimd ${xsimd_REQUIRED_VERSION} REQUIRED)
19+
message(STATUS "Found xsimd: ${xsimd_INCLUDE_DIRS}/xsimd")
20+
endif()
21+
22+
# Build
23+
# =====
24+
25+
set(XSIMDALGO_HEADERS
26+
${XSIMDALGO_INCLUDE_DIR}/xsimd_algo/algorithms.hpp
27+
)
28+
29+
add_library(xsimd-algorithm INTERFACE)
30+
31+
target_include_directories(xsimd-algorithm INTERFACE
32+
$<BUILD_INTERFACE:${XSIMDALGO_INCLUDE_DIR}>
33+
$<BUILD_INTERFACE:${CMAKE_CURRENT_BINARY_DIR}>
34+
$<INSTALL_INTERFACE:include>)
35+
36+
target_compile_features(xsimd-algorithm INTERFACE cxx_std_20)
37+
target_link_libraries(xsimd-algorithm INTERFACE xsimd)
38+
39+
OPTION(BUILD_TESTS "xsimd-algorithm test suite" OFF)
40+
41+
if(BUILD_TESTS)
42+
enable_testing()
43+
add_subdirectory(test)
44+
endif()
45+
46+
# Installation
47+
# ============
48+
49+
include(GNUInstallDirs)
50+
include(CMakePackageConfigHelpers)
51+
52+
install(TARGETS xsimd-algorithm
53+
EXPORT ${PROJECT_NAME}-targets)
54+
55+
# Makes the project importable from the build directory
56+
export(EXPORT ${PROJECT_NAME}-targets
57+
FILE "${CMAKE_CURRENT_BINARY_DIR}/${PROJECT_NAME}Targets.cmake")
58+
59+
install(DIRECTORY ${XSIMDALGO_INCLUDE_DIR}/xsimd_algo
60+
DESTINATION ${CMAKE_INSTALL_INCLUDEDIR})
61+
62+
set(XSIMDALGO_CMAKECONFIG_INSTALL_DIR "${CMAKE_INSTALL_DATADIR}/cmake/${PROJECT_NAME}" CACHE
63+
STRING "install path for xsimd-algorithmConfig.cmake")
64+
65+
configure_package_config_file(${PROJECT_NAME}Config.cmake.in
66+
"${CMAKE_CURRENT_BINARY_DIR}/${PROJECT_NAME}Config.cmake"
67+
INSTALL_DESTINATION ${XSIMDALGO_CMAKECONFIG_INSTALL_DIR})

environment-dev.yml

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,8 @@
1+
name: xsimd-algorithm
2+
channels:
3+
- conda-forge
4+
dependencies:
5+
- cmake
6+
- xsimd=13.2.0
7+
- doctest
8+
- ninja
Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,18 @@
1+
/***************************************************************************
2+
* Copyright (c) Johan Mabille, Sylvain Corlay, Wolf Vollprecht and *
3+
* Martin Renou *
4+
* Copyright (c) QuantStack *
5+
* Copyright (c) Serge Guelton *
6+
* *
7+
* Distributed under the terms of the BSD 3-Clause License. *
8+
* *
9+
* The full license is in the file LICENSE, distributed with this software. *
10+
****************************************************************************/
11+
12+
#ifndef XSIMD_ALGORITHMS_HPP
13+
#define XSIMD_ALGORITHMS_HPP
14+
15+
#include "xsimd_algorithm/stl/reduce.hpp"
16+
#include "xsimd_algorithm/stl/transform.hpp"
17+
18+
#endif
Lines changed: 90 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,90 @@
1+
/***************************************************************************
2+
* Copyright (c) Johan Mabille, Sylvain Corlay, Wolf Vollprecht and *
3+
* Martin Renou *
4+
* Copyright (c) QuantStack *
5+
* Copyright (c) Serge Guelton *
6+
* *
7+
* Distributed under the terms of the BSD 3-Clause License. *
8+
* *
9+
* The full license is in the file LICENSE, distributed with this software. *
10+
****************************************************************************/
11+
12+
#ifndef XSIMD_ALGORITHMS_REDUCE_HPP
13+
#define XSIMD_ALGORITHMS_REDUCE_HPP
14+
15+
#include <array>
16+
#include <cstddef>
17+
#include <iterator>
18+
#include <type_traits>
19+
20+
#include "xsimd/xsimd.hpp"
21+
22+
namespace xsimd
23+
{
24+
// TODO: Remove this once we drop C++11 support
25+
namespace detail
26+
{
27+
struct plus
28+
{
29+
template <class X, class Y>
30+
auto operator()(X&& x, Y&& y) noexcept -> decltype(x + y) { return x + y; }
31+
};
32+
}
33+
34+
template <class Arch = default_arch, class Iterator1, class Iterator2, class Init, class BinaryFunction = detail::plus>
35+
Init reduce(Iterator1 first, Iterator2 last, Init init, BinaryFunction&& binfun = detail::plus {}) noexcept
36+
{
37+
using value_type = typename std::decay<decltype(*first)>::type;
38+
using batch_type = batch<value_type, Arch>;
39+
40+
std::size_t size = static_cast<std::size_t>(std::distance(first, last));
41+
constexpr std::size_t simd_size = batch_type::size;
42+
43+
if (size < simd_size)
44+
{
45+
while (first != last)
46+
{
47+
init = binfun(init, *first++);
48+
}
49+
return init;
50+
}
51+
52+
const auto* const ptr_begin = &(*first);
53+
54+
std::size_t align_begin = xsimd::get_alignment_offset(ptr_begin, size, simd_size);
55+
std::size_t align_end = align_begin + ((size - align_begin) & ~(simd_size - 1));
56+
57+
// reduce initial unaligned part
58+
for (std::size_t i = 0; i < align_begin; ++i)
59+
{
60+
init = binfun(init, first[i]);
61+
}
62+
63+
// reduce aligned part
64+
auto ptr = ptr_begin + align_begin;
65+
batch_type batch_init = batch_type::load_aligned(ptr);
66+
ptr += simd_size;
67+
for (auto const end = ptr_begin + align_end; ptr < end; ptr += simd_size)
68+
{
69+
batch_type batch = batch_type::load_aligned(ptr);
70+
batch_init = binfun(batch_init, batch);
71+
}
72+
73+
// reduce across batch
74+
alignas(batch_type) std::array<value_type, simd_size> arr;
75+
xsimd::store_aligned(arr.data(), batch_init);
76+
for (auto x : arr)
77+
init = binfun(init, x);
78+
79+
// reduce final unaligned part
80+
for (std::size_t i = align_end; i < size; ++i)
81+
{
82+
init = binfun(init, first[i]);
83+
}
84+
85+
return init;
86+
}
87+
88+
}
89+
90+
#endif

0 commit comments

Comments
 (0)