Skip to content

Commit a934f8a

Browse files
Merge branch 'main' into dev/seg_fault_fix
2 parents b2475ca + 8dbb081 commit a934f8a

File tree

85 files changed

+2462
-80
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

85 files changed

+2462
-80
lines changed

.ci/env/apt.sh

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -31,19 +31,19 @@ function add_repo {
3131
}
3232

3333
function install_dpcpp {
34-
sudo apt-get install -y intel-oneapi-compiler-dpcpp-cpp-2025.1 intel-oneapi-runtime-libs
34+
sudo apt-get install -y intel-oneapi-compiler-dpcpp-cpp-2025.2 intel-oneapi-runtime-libs
3535
}
3636

3737
function install_tbb {
38-
sudo apt-get install -y intel-oneapi-tbb-devel-2022.1
38+
sudo apt-get install -y intel-oneapi-tbb-devel-2022.2
3939
}
4040

4141
function install_dpl {
4242
sudo apt-get install -y intel-oneapi-libdpstd-devel
4343
}
4444

4545
function install_mkl {
46-
sudo apt-get install -y intel-oneapi-mkl-devel-2025.1
46+
sudo apt-get install -y intel-oneapi-mkl-devel-2025.2
4747
install_tbb
4848
install_dpl
4949
}

.ci/pipeline/ci.yml

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -31,7 +31,10 @@ variables:
3131
SYSROOT_OS: 'noble'
3232
PY_VERSION: '3.11'
3333
SKL_VERSION: '1.5'
34-
WINDOWS_BASEKIT_URL: 'https://registrationcenter-download.intel.com/akdlm/IRC_NAS/487fd8c3-a3d8-4c22-a903-f8d54c2c57be/intel-oneapi-base-toolkit-2025.1.0.650_offline.exe'
34+
# Link to latest version can be taken from basekit download page:
35+
# https://www.intel.com/content/www/us/en/developer/tools/oneapi/base-toolkit-download.html
36+
# Check section 'Install through a Command Line':
37+
WINDOWS_BASEKIT_URL: 'https://registrationcenter-download.intel.com/akdlm/IRC_NAS/09a8acaf-265f-4460-866c-a3375ed5b4ff/intel-oneapi-base-toolkit-2025.2.0.591_offline.exe'
3538
WINDOWS_DPCPP_COMPONENTS: 'intel.oneapi.win.mkl.devel:intel.oneapi.win.tbb.devel:intel.oneapi.win.dpl'
3639

3740
resources:

.github/workflows/nightly-build.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -50,7 +50,7 @@ jobs:
5050
build_lnx:
5151
name: oneDAL Linux nightly build
5252
if: github.repository == 'uxlfoundation/oneDAL'
53-
runs-on: ubuntu-22.04
53+
runs-on: ubuntu-24.04
5454
timeout-minutes: 120
5555

5656
steps:

INSTALL.md

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -123,31 +123,31 @@ is available as an alternative to the manual setup.
123123

124124
7. Download and install Python (version 3.9 or higher).
125125

126-
8. Build oneDAL via command-line interface. Choose the appropriate commands based on the interface, platform, compiler and the optimization level you use. Interface and platform are required arguments of makefile while others are optional. Below you can find the set of examples for building oneDAL. You may use a combination of them to get the desired build configuration:
126+
8. Build oneDAL via command-line interface. Choose the appropriate commands based on the interface, platform, compiler, linker and the optimization level you use. Interface and platform are required arguments of makefile while others are optional. Below you can find the set of examples for building oneDAL. You may use a combination of them to get the desired build configuration:
127127

128128
- DAAL interfaces on **Linux\*** using **Intel(R) C++ Compiler**:
129129

130130
make -f makefile daal PLAT=lnx32e
131131

132132
- DAAL interfaces on **Linux\*** using **GNU Compiler Collection\***:
133133

134-
make -f makefile daal PLAT=lnx32e COMPILER=gnu OPTLEVEL=O0
134+
make -f makefile daal PLAT=lnx32e COMPILER=gnu OPTLEVEL=O0 LINKER=bfd
135135

136136
- DAAL interfaces on **Linux\*** using **Clang\***:
137137

138-
make -f makefile daal PLAT=lnx32e COMPILER=clang OPTLEVEL=O1
138+
make -f makefile daal PLAT=lnx32e COMPILER=clang OPTLEVEL=O1 LINKER=gold
139139

140140
- oneAPI C++/DPC++ interfaces on **Windows\*** using **Intel(R) DPC++ compiler**:
141141

142-
make -f makefile oneapi PLAT=win32e
142+
make -f makefile oneapi PLAT=win32e LINKER=llvm-lib
143143

144144
- oneAPI C++ interfaces on **Windows\*** using **Microsoft Visual\* C++ Compiler**:
145145

146146
make -f makefile oneapi_c PLAT=win32e COMPILER=vc OPTLEVEL=O2
147147

148148
- DAAL and oneAPI C++ interfaces on **Linux\*** using **GNU Compiler Collection\***:
149149

150-
make -f makefile daal oneapi_c PLAT=lnx32e COMPILER=gnu OPTLEVEL=O3
150+
make -f makefile daal oneapi_c PLAT=lnx32e COMPILER=gnu OPTLEVEL=O3 LINKER=lld
151151

152152
It is possible to build oneDAL libraries with selected set of algorithms and/or CPU optimizations. `CORE.ALGORITHMS.CUSTOM` and `REQCPUS` makefile defines are used for it.
153153

cpp/daal/src/algorithms/cosdistance/cosdistance_batch_impl.i

Lines changed: 13 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -63,7 +63,19 @@ services::Status DistanceKernel<algorithmFPType, method, cpu>::compute(const siz
6363

6464
if (isFull<algorithmFPType, cpu>(rLayout))
6565
{
66-
return cosDistanceFull<algorithmFPType, cpu>(xTable, rTable);
66+
if (na == 1)
67+
{
68+
return cosDistanceFull<algorithmFPType, cpu>(xTable, rTable);
69+
}
70+
else if (na == 2)
71+
{
72+
NumericTable * yTable = const_cast<NumericTable *>(a[1]); /* y Input data */
73+
return cosDistanceFull<algorithmFPType, cpu>(xTable, yTable, rTable);
74+
}
75+
else
76+
{
77+
return services::Status(services::ErrorIncorrectNumberOfInputNumericTables);
78+
}
6779
}
6880
else
6981
{

cpp/daal/src/algorithms/cosdistance/cosdistance_dense_default_batch_fpt_cpu.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -38,7 +38,7 @@ template class BatchContainer<DAAL_FPTYPE, defaultDense, DAAL_CPU>;
3838
}
3939
namespace internal
4040
{
41-
template class DistanceKernel<DAAL_FPTYPE, defaultDense, DAAL_CPU>;
41+
template class DAAL_EXPORT DistanceKernel<DAAL_FPTYPE, defaultDense, DAAL_CPU>;
4242

4343
} // namespace internal
4444

cpp/daal/src/algorithms/cosdistance/cosdistance_full_impl.i

Lines changed: 83 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -234,6 +234,89 @@ services::Status cosDistanceFull(const NumericTable * xTable, NumericTable * rTa
234234
return safeStat.detach();
235235
}
236236

237+
template <typename algorithmFPType, CpuType cpu>
238+
services::Status cosDistanceFull(const NumericTable * xTable, const NumericTable * yTable, NumericTable * rTable)
239+
{
240+
size_t p = xTable->getNumberOfColumns(); /* Dimension of input feature vector */
241+
size_t nVectors1 = xTable->getNumberOfRows(); /* Number of input vectors in X */
242+
size_t nVectors2 = yTable->getNumberOfRows(); /* Number of input vectors in Y */
243+
244+
size_t nBlocks1 = nVectors1 / blockSizeDefault;
245+
nBlocks1 += (nBlocks1 * blockSizeDefault != nVectors1);
246+
247+
size_t nBlocks2 = nVectors2 / blockSizeDefault;
248+
nBlocks2 += (nBlocks2 * blockSizeDefault != nVectors2);
249+
250+
SafeStatus safeStat;
251+
252+
/* compute results for blocks of the distance matrix */
253+
daal::threader_for(nBlocks1, nBlocks1, [=, &safeStat](size_t k1) {
254+
DAAL_INT blockSize1 = blockSizeDefault;
255+
if (k1 == nBlocks1 - 1)
256+
{
257+
blockSize1 = nVectors1 - k1 * blockSizeDefault;
258+
}
259+
260+
/* read access to blockSize1 rows in input dataset X at k1*blockSizeDefault*p row */
261+
ReadRows<algorithmFPType, cpu> xBlock(*const_cast<NumericTable *>(xTable), k1 * blockSizeDefault, blockSize1);
262+
DAAL_CHECK_BLOCK_STATUS_THR(xBlock);
263+
const algorithmFPType * x = xBlock.get();
264+
265+
/* write access to blockSize1 rows in output dataset */
266+
WriteOnlyRows<algorithmFPType, cpu> rBlock(rTable, k1 * blockSizeDefault, blockSize1);
267+
DAAL_CHECK_BLOCK_STATUS_THR(rBlock);
268+
algorithmFPType * r = rBlock.get();
269+
270+
for (size_t k2 = 0; k2 < nBlocks2; k2++)
271+
{
272+
DAAL_INT blockSize2 = blockSizeDefault;
273+
if (k2 == nBlocks2 - 1)
274+
{
275+
blockSize2 = nVectors2 - k2 * blockSizeDefault;
276+
}
277+
278+
size_t shift2 = k2 * blockSizeDefault;
279+
280+
/* read access to blockSize2 rows in input dataset Y */
281+
ReadRows<algorithmFPType, cpu> yBlock(*const_cast<NumericTable *>(yTable), shift2, blockSize2);
282+
DAAL_CHECK_BLOCK_STATUS_THR(yBlock);
283+
const algorithmFPType * y = yBlock.get();
284+
285+
for (size_t i = 0; i < blockSize1; i++)
286+
{
287+
for (size_t j = 0; j < blockSize2; j++)
288+
{
289+
algorithmFPType numerator = 0.0;
290+
algorithmFPType xNorm = 0.0;
291+
algorithmFPType yNorm = 0.0;
292+
293+
for (size_t k = 0; k < p; k++)
294+
{
295+
numerator += x[i * p + k] * y[j * p + k];
296+
xNorm += x[i * p + k] * x[i * p + k];
297+
yNorm += y[j * p + k] * y[j * p + k];
298+
}
299+
300+
algorithmFPType denominator = xNorm * yNorm;
301+
if (denominator > 0.0)
302+
{
303+
r[i * nVectors2 + shift2 + j] = 1.0
304+
- numerator
305+
/ (daal::internal::MathInst<algorithmFPType, cpu>::sSqrt(xNorm)
306+
* daal::internal::MathInst<algorithmFPType, cpu>::sSqrt(yNorm));
307+
}
308+
else
309+
{
310+
r[i * nVectors2 + shift2 + j] = 1.0; // Maximum distance when no variance
311+
}
312+
}
313+
}
314+
}
315+
});
316+
317+
return safeStat.detach();
318+
}
319+
237320
} // namespace internal
238321

239322
} // namespace cosine_distance

cpp/daal/src/algorithms/k_nearest_neighbors/bf_knn_classification_train_container.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -64,6 +64,7 @@ services::Status BatchContainer<algorithmFpType, method, cpu>::compute()
6464

6565
const bool copy = (par->dataUseInModel == doNotUse);
6666
status |= r->impl()->setData<algorithmFpType>(x, copy);
67+
DAAL_CHECK_STATUS_VAR(status);
6768
if ((par->resultsToEvaluate & daal::algorithms::classifier::computeClassLabels) != 0)
6869
{
6970
const NumericTablePtr y = input->get(classifier::training::labels);

cpp/daal/src/algorithms/kernel_function/kernel_function_rbf_helper.h

Lines changed: 13 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -159,6 +159,7 @@ inline services::Status HelperKernelRBF<double, sve>::postGemmPart(double * cons
159159

160160
return services::Status();
161161
}
162+
162163
//SVE implementation for RBF kernel post-GEMM part float data type
163164
template <>
164165
inline services::Status HelperKernelRBF<float, sve>::postGemmPart(float * const mklBuff, const float * const sqrA1i, const float sqrA2i,
@@ -185,6 +186,9 @@ inline services::Status HelperKernelRBF<float, sve>::postGemmPart(float * const
185186
tmp = svsel_f32(mask, tmp, thresholdVec);
186187
svst1(pg, &mklBuff[i], tmp);
187188

189+
svfloat32_t expVal = daal::internal::ref::exp_vectorized(tmp);
190+
svst1(pg, &dataRBlock[i], expVal);
191+
188192
// Block 2
189193
mklVec = svld1(pg, &mklBuff[i + step]);
190194
sqrVec = svld1(pg, &sqrA1i[i + step]);
@@ -195,6 +199,9 @@ inline services::Status HelperKernelRBF<float, sve>::postGemmPart(float * const
195199
tmp = svsel_f32(mask, tmp, thresholdVec);
196200
svst1(pg, &mklBuff[i + step], tmp);
197201

202+
expVal = daal::internal::ref::exp_vectorized(tmp);
203+
svst1(pg, &dataRBlock[i + step], expVal);
204+
198205
// Block 3
199206
mklVec = svld1(pg, &mklBuff[i + 2 * step]);
200207
sqrVec = svld1(pg, &sqrA1i[i + 2 * step]);
@@ -204,6 +211,9 @@ inline services::Status HelperKernelRBF<float, sve>::postGemmPart(float * const
204211
mask = svcmpgt_f32(pg, tmp, thresholdVec);
205212
tmp = svsel_f32(mask, tmp, thresholdVec);
206213
svst1(pg, &mklBuff[i + 2 * step], tmp);
214+
215+
expVal = daal::internal::ref::exp_vectorized(tmp);
216+
svst1(pg, &dataRBlock[i + 2 * step], expVal);
207217
}
208218

209219
// Tail loop
@@ -218,9 +228,10 @@ inline services::Status HelperKernelRBF<float, sve>::postGemmPart(float * const
218228
svbool_t mask = svcmpgt_f32(tail_pg, tmp, thresholdVec);
219229
tmp = svsel_f32(mask, tmp, thresholdVec);
220230
svst1(tail_pg, &mklBuff[i], tmp);
231+
232+
svfloat32_t expVal = daal::internal::ref::exp_vectorized(tmp);
233+
svst1(tail_pg, &dataRBlock[i], expVal);
221234
}
222-
//exponential function
223-
MathInst<float, sve>::vExp(n, mklBuff, dataRBlock);
224235

225236
return services::Status();
226237
}

cpp/daal/src/algorithms/kmeans/kmeans_lloyd_impl.i

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -302,7 +302,8 @@ Status TaskKMeansLloyd<algorithmFPType, cpu>::addNTToTaskThreadedCSR(const Numer
302302
SpBlasInst<algorithmFPType, cpu>::xxcsrmm(&transa, &_n, &_c, &_p, &alpha, matdescra, data, (DAAL_INT *)colIdx, (DAAL_INT *)rowIdx, inClusters,
303303
&_p, &beta, x_clusters, &_n);
304304

305-
size_t csrCursor = 0;
305+
algorithmFPType goal = 0;
306+
size_t csrCursor = 0;
306307
for (size_t i = 0; i < blockSize; i++)
307308
{
308309
algorithmFPType minGoalVal = clustersSq[0] - x_clusters[i];
@@ -329,7 +330,7 @@ Status TaskKMeansLloyd<algorithmFPType, cpu>::addNTToTaskThreadedCSR(const Numer
329330

330331
kmeansInsertCandidate(tt, minGoalVal, k * blockSizeDefault + i);
331332

332-
*trg += minGoalVal;
333+
goal += minGoalVal;
333334

334335
cS0[minIdx]++;
335336

@@ -339,6 +340,7 @@ Status TaskKMeansLloyd<algorithmFPType, cpu>::addNTToTaskThreadedCSR(const Numer
339340
assignments[i] = (int)minIdx;
340341
}
341342
}
343+
*trg += goal;
342344
});
343345
return safeStat.detach();
344346
}

0 commit comments

Comments
 (0)