Bug fix for memory leak on CPU
- the call for tbb is disabled. - parallel sort of Kokkos is also very slow. - for now, std::sort is used for sort, which is more performant than both avaible options. This would be changed anytime any possible solution is found.
This commit is contained in:
parent
892f5395bc
commit
5eef26a6ed
|
@ -193,7 +193,7 @@ public:
|
|||
|
||||
if( capacity+1 > flags_.size() )
|
||||
{
|
||||
reallocNoInit(flags_, capacity+1);
|
||||
reallocInit(flags_, capacity+1);
|
||||
}
|
||||
|
||||
// fill the flags
|
||||
|
@ -219,7 +219,7 @@ public:
|
|||
{
|
||||
// get more space to prevent reallocations in next iterations
|
||||
uint32 len = size_*1.1+1;
|
||||
reallocNoInit(sortedPairs_, len);
|
||||
reallocInit(sortedPairs_, len);
|
||||
}
|
||||
|
||||
Kokkos::parallel_for(
|
||||
|
@ -232,6 +232,7 @@ public:
|
|||
sort(sortedPairs_, 0, size_ );
|
||||
|
||||
|
||||
|
||||
}
|
||||
|
||||
INLINE_FUNCTION_HD
|
||||
|
|
|
@ -56,7 +56,7 @@ template<typename Type, typename... Properties>
|
|||
INLINE_FUNCTION_H void
|
||||
reallocInit(ViewType1D<Type, Properties...>& view, uint32 len)
|
||||
{
|
||||
Kokkos::realloc(Kokkos::WithoutInitializing, view, len);
|
||||
Kokkos::realloc(view, len);
|
||||
}
|
||||
|
||||
template<typename Type, typename... Properties>
|
||||
|
|
|
@ -28,6 +28,8 @@ Licence:
|
|||
#include "cudaAlgorithms.hpp"
|
||||
#include "kokkosAlgorithms.hpp"
|
||||
#include "stdAlgorithms.hpp"
|
||||
#include "Kokkos_Sort.hpp"
|
||||
|
||||
|
||||
namespace pFlow
|
||||
{
|
||||
|
@ -295,7 +297,9 @@ sort(ViewType1D<T, properties...>& view, uint32 start, uint32 end)
|
|||
|
||||
if constexpr (isHostAccessible<ExecutionSpace>())
|
||||
{
|
||||
pFlow::algorithms::STD::sort<T, true>(view.data() + start, numElems);
|
||||
//auto sView = Kokkos::subview(view, Kokkos::make_pair<uint32,uint32>(start,end));
|
||||
//Kokkos::sort(sView);
|
||||
pFlow::algorithms::STD::sort<T, false>(view.data() + start, numElems);
|
||||
return;
|
||||
}
|
||||
|
||||
|
@ -316,7 +320,7 @@ sort(
|
|||
ViewType1D<T, properties...>& view,
|
||||
uint32 start,
|
||||
uint32 end,
|
||||
CompareFunc compare
|
||||
const CompareFunc& compare
|
||||
)
|
||||
{
|
||||
using ExecutionSpace =
|
||||
|
@ -326,9 +330,12 @@ sort(
|
|||
|
||||
if constexpr (isHostAccessible<ExecutionSpace>())
|
||||
{
|
||||
pFlow::algorithms::STD::sort<T, CompareFunc, true>(
|
||||
// sort without parallelization
|
||||
pFlow::algorithms::STD::sort<T, CompareFunc,false>(
|
||||
view.data() + start, numElems, compare
|
||||
);
|
||||
//auto sView = Kokkos::subview(view, Kokkos::make_pair<uint32,uint32>(start,end));
|
||||
//Kokkos::sort(sView, compare);
|
||||
return;
|
||||
}
|
||||
|
||||
|
@ -345,6 +352,7 @@ sort(
|
|||
return;
|
||||
}
|
||||
|
||||
|
||||
template<
|
||||
typename Type,
|
||||
typename... properties,
|
||||
|
|
|
@ -148,7 +148,7 @@ void sort(Type* first, int32 numElems)
|
|||
if constexpr(useParallel)
|
||||
{
|
||||
std::sort(
|
||||
std::execution::par,
|
||||
std::execution::par_unseq,
|
||||
first,
|
||||
first+numElems,
|
||||
less<Type>());
|
||||
|
|
Loading…
Reference in New Issue