Kokkos folder updated
This commit is contained in:
parent
34e3f5587b
commit
6e5fe608c6
|
@ -17,133 +17,139 @@ Licence:
|
|||
implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
|
||||
|
||||
-----------------------------------------------------------------------------*/
|
||||
|
||||
#ifndef __KokkosTypes_hpp__
|
||||
#define __KokkosTypes_hpp__
|
||||
|
||||
/**
|
||||
* \file KokkosType.hpp
|
||||
*
|
||||
* \brief name aliases and typedesf for Kokkos entities that are
|
||||
* frequently used in PhasicFlow.
|
||||
*
|
||||
*/
|
||||
|
||||
|
||||
#include <Kokkos_Core.hpp>
|
||||
#include <Kokkos_DualView.hpp>
|
||||
#include <Kokkos_UnorderedMap.hpp>
|
||||
|
||||
#include "iOstream.hpp"
|
||||
#include "builtinTypes.hpp"
|
||||
|
||||
|
||||
namespace pFlow
|
||||
{
|
||||
|
||||
///********DEP
|
||||
class DeviceSide{};
|
||||
class HostSide{};
|
||||
|
||||
template<typename side>
|
||||
struct selectSide{};
|
||||
/*********///
|
||||
|
||||
using HostSpace = Kokkos::HostSpace;
|
||||
using Serial = Kokkos::Serial;
|
||||
/// Host memory space
|
||||
using HostSpace = Kokkos::HostSpace;
|
||||
|
||||
/// Serial execution space
|
||||
using Serial = Kokkos::Serial;
|
||||
|
||||
#ifdef _OPENMP
|
||||
using OpenMP = Kokkos::OpenMP;
|
||||
/// OpenMp execution space
|
||||
using OpenMP = Kokkos::OpenMP;
|
||||
#endif
|
||||
|
||||
#ifdef __CUDACC__
|
||||
/// Cuda execution space
|
||||
using Cuda = Kokkos::Cuda;
|
||||
#endif
|
||||
|
||||
/// Default Host execution space, on top of all host execution spaces
|
||||
using DefaultHostExecutionSpace = Kokkos::DefaultHostExecutionSpace;
|
||||
|
||||
/// Default execution space, it can be device exe. space, if a device space is
|
||||
/// activated.
|
||||
using DefaultExecutionSpace = Kokkos::DefaultExecutionSpace;
|
||||
|
||||
|
||||
|
||||
/// Pair of two variables
|
||||
template<typename T1, typename T2>
|
||||
using kPair = Kokkos::pair<T1,T2>;
|
||||
|
||||
template<typename T>
|
||||
using kRange = kPair<T,T>;
|
||||
|
||||
using range = kRange<int>;
|
||||
|
||||
using range64 = kRange<int long>;
|
||||
using Pair = Kokkos::pair<T1,T2>;
|
||||
|
||||
/// View for a scalar
|
||||
template<typename T, typename... properties>
|
||||
using ViewTypeScalar = Kokkos::View<T,properties...>;
|
||||
|
||||
/// 1D veiw as a vector
|
||||
template<typename T, typename... properties>
|
||||
using ViewType1D = Kokkos::View<T*,properties...>;
|
||||
|
||||
/// 2D view as an array
|
||||
template<typename T, typename... properties>
|
||||
using DualViewType1D = Kokkos::DualView<T*,properties...>;
|
||||
using ViewType2D = Kokkos::View<T**,properties...>;
|
||||
|
||||
/// 3D view as an array
|
||||
template<typename T, typename... properties>
|
||||
using ViewType3D = Kokkos::View<T***,properties...>;
|
||||
|
||||
/// 1D dual view as a vector
|
||||
template<typename T, typename... properties>
|
||||
using DualViewType1D = Kokkos::DualView<T*,properties...>;
|
||||
|
||||
/// unordered map
|
||||
template<typename Key, typename Value, typename... properties>
|
||||
using unorderedMap = Kokkos::UnorderedMap<Key, Value, properties...>;
|
||||
|
||||
/// unordered set
|
||||
template<typename Key, typename... properties>
|
||||
using unorderedSet = Kokkos::UnorderedMap<Key, void, properties...>;
|
||||
|
||||
template<typename Key, typename Value>
|
||||
using deviceHashMap= Kokkos::UnorderedMap<Key, Value>;
|
||||
|
||||
template<typename Key, typename Value>
|
||||
using hostHashMap= Kokkos::UnorderedMap<Key, Value, Kokkos::HostSpace>;
|
||||
|
||||
template<typename Key>
|
||||
using deviceHashSet= Kokkos::UnorderedMap<Key, void>;
|
||||
|
||||
template<typename Key>
|
||||
using hostHashSet = Kokkos::UnorderedMap<Key,void, Kokkos::HostSpace>;
|
||||
|
||||
// a 1D array (vector) with default device (memory space and execution space)
|
||||
/// Scalar on device
|
||||
template<typename T>
|
||||
using deviceViewTypeScalar = Kokkos::View<T>;
|
||||
|
||||
/// 1D array (vector) with default device (memory space and execution space)
|
||||
template<typename T>
|
||||
using deviceViewType1D = Kokkos::View<T*>;
|
||||
|
||||
/// 2D view on device as an array on device
|
||||
template<typename T, typename Layout=void>
|
||||
using deviceViewType2D = Kokkos::View<T**,Layout, void>;
|
||||
|
||||
/// 3D view on device as an array on device
|
||||
template<typename T, typename Layout=void>
|
||||
using deviceViewType3D = Kokkos::View<T***,Layout, void>;
|
||||
|
||||
|
||||
|
||||
// a 1D array (vector with host memeory space)
|
||||
template<typename T>
|
||||
using hostViewTypeScalar = Kokkos::View<T, Kokkos::HostSpace>;
|
||||
|
||||
/// 1D array (vector with host memeory space)
|
||||
template<typename T>
|
||||
using hostViewType1D = Kokkos::View<T*, Kokkos::HostSpace>;
|
||||
|
||||
/// 2D array on host
|
||||
template<typename T, typename Layout=void>
|
||||
using hostViewType2D = Kokkos::View<T**,Layout, Kokkos::HostSpace>;
|
||||
|
||||
|
||||
#ifdef __CUDACC__
|
||||
using Cuda = Kokkos::Cuda;
|
||||
template<typename T>
|
||||
using cudaViewTypeScalar = Kokkos::View<T, Kokkos::CudaSpace>;
|
||||
|
||||
template<typename T>
|
||||
using cudaViewType1D = Kokkos::View<T*, Kokkos::CudaSpace>;
|
||||
|
||||
/// 3D array on host
|
||||
template<typename T, typename Layout=void>
|
||||
using cudaViewType2D = Kokkos::View<T*,Layout, Kokkos::CudaSpace>;
|
||||
#endif
|
||||
|
||||
using hostViewType3D = Kokkos::View<T***,Layout, Kokkos::HostSpace>;
|
||||
|
||||
/// 1D vector on device with atomic capabilities
|
||||
template<typename T>
|
||||
using deviceAtomicViewType1D =
|
||||
Kokkos::View<
|
||||
T*,
|
||||
Kokkos::MemoryTraits<std::is_same<DefaultExecutionSpace,Serial>::value?0:Kokkos::Atomic>>;
|
||||
|
||||
/// 3D array on device with atomic capabilities
|
||||
template<typename T>
|
||||
using deviceAtomicViewType3D =
|
||||
Kokkos::View<
|
||||
T***,
|
||||
Kokkos::MemoryTraits<std::is_same<DefaultExecutionSpace,Serial>::value?0:Kokkos::Atomic>>;
|
||||
|
||||
template<typename T>
|
||||
iOstream& operator <<(iOstream& os, const kRange<T>& rng)
|
||||
{
|
||||
os<<"["<<rng.first<<" "<<rng.second<<")";
|
||||
return os;
|
||||
}
|
||||
|
||||
|
||||
} // pFlow
|
||||
|
||||
|
|
|
@ -25,7 +25,7 @@ Licence:
|
|||
#include "KokkosTypes.hpp"
|
||||
#include "pFlowMacros.hpp"
|
||||
#include "types.hpp"
|
||||
|
||||
#include "iOstream.hpp"
|
||||
|
||||
namespace pFlow
|
||||
{
|
||||
|
@ -37,6 +37,14 @@ bool constexpr isHostAccessible()
|
|||
return Kokkos::SpaceAccessibility<ExecutionSpace,HostSpace>::accessible;
|
||||
}
|
||||
|
||||
template<typename ExecutionSpace>
|
||||
INLINE_FUNCTION_H
|
||||
bool constexpr isDeviceAccessible()
|
||||
{
|
||||
return Kokkos::SpaceAccessibility<ExecutionSpace,DefaultExecutionSpace>::accessible;
|
||||
}
|
||||
|
||||
/// Is MemoerySpace accessible from ExecutionSpace
|
||||
template<typename ExecutionSpace, typename MemoerySpace>
|
||||
INLINE_FUNCTION_H
|
||||
bool constexpr areAccessible()
|
||||
|
@ -48,9 +56,9 @@ template <
|
|||
typename Type,
|
||||
typename... Properties>
|
||||
INLINE_FUNCTION_H
|
||||
void realloc( ViewType1D<Type,Properties...>& view, int32 len)
|
||||
void reallocInit( ViewType1D<Type,Properties...>& view, int32 len)
|
||||
{
|
||||
Kokkos::realloc(view, len);
|
||||
Kokkos::realloc(Kokkos::WithoutInitializing, view, len);
|
||||
}
|
||||
|
||||
template <
|
||||
|
@ -59,14 +67,7 @@ template <
|
|||
INLINE_FUNCTION_H
|
||||
void reallocNoInit(ViewType1D<Type,Properties...>& view, int32 len)
|
||||
{
|
||||
using ViewType = ViewType1D<Type,Properties...>;
|
||||
word vl = view.label();
|
||||
view = ViewType(); // Deallocate first
|
||||
view = ViewType(
|
||||
Kokkos::view_alloc(
|
||||
Kokkos::WithoutInitializing,
|
||||
vl),
|
||||
len);
|
||||
Kokkos::realloc(Kokkos::WithoutInitializing, view, len);
|
||||
}
|
||||
|
||||
template <
|
||||
|
@ -79,12 +80,39 @@ void reallocFill( ViewType1D<Type,Properties...>& view, int32 len, Type val)
|
|||
Kokkos::deep_copy(view, val);
|
||||
}
|
||||
|
||||
template <
|
||||
typename Type,
|
||||
typename... Properties>
|
||||
INLINE_FUNCTION_H
|
||||
void reallocInit( ViewType2D<Type,Properties...>& view, int32 len1, int32 len2)
|
||||
{
|
||||
Kokkos::realloc(view, len1, len2);
|
||||
}
|
||||
|
||||
template <
|
||||
typename Type,
|
||||
typename... Properties>
|
||||
INLINE_FUNCTION_H
|
||||
void realloc( ViewType3D<Type,Properties...>& view, int32 len1, int32 len2, int32 len3)
|
||||
void reallocNoInit(ViewType2D<Type,Properties...>& view, int32 len1, int32 len2)
|
||||
{
|
||||
Kokkos::realloc(Kokkos::WithoutInitializing, view, len1, len2);
|
||||
}
|
||||
|
||||
template <
|
||||
typename Type,
|
||||
typename... Properties>
|
||||
INLINE_FUNCTION_H
|
||||
void reallocFill( ViewType2D<Type,Properties...>& view, int32 len1, int32 len2, Type val)
|
||||
{
|
||||
reallocNoInit(view, len1, len2);
|
||||
Kokkos::deep_copy(view, val);
|
||||
}
|
||||
|
||||
template <
|
||||
typename Type,
|
||||
typename... Properties>
|
||||
INLINE_FUNCTION_H
|
||||
void reallocInit( ViewType3D<Type,Properties...>& view, int32 len1, int32 len2, int32 len3)
|
||||
{
|
||||
Kokkos::realloc(view, len1, len2, len3);
|
||||
}
|
||||
|
@ -95,14 +123,8 @@ template <
|
|||
INLINE_FUNCTION_H
|
||||
void reallocNoInit(ViewType3D<Type,Properties...>& view, int32 len1, int32 len2, int32 len3)
|
||||
{
|
||||
using ViewType = ViewType3D<Type,Properties...>;
|
||||
word vl = view.label();
|
||||
view = ViewType(); // Deallocate first
|
||||
view = ViewType(
|
||||
Kokkos::view_alloc(
|
||||
Kokkos::WithoutInitializing,
|
||||
vl),
|
||||
len1, len2, len3);
|
||||
|
||||
Kokkos::realloc(Kokkos::WithoutInitializing, view, len1, len2, len3);
|
||||
}
|
||||
|
||||
template <
|
||||
|
@ -115,14 +137,44 @@ void reallocFill( ViewType3D<Type,Properties...>& view, int32 len1, int32 len2,
|
|||
Kokkos::deep_copy(view, val);
|
||||
}
|
||||
|
||||
template <
|
||||
typename Type,
|
||||
typename... Properties>
|
||||
INLINE_FUNCTION_H
|
||||
void resizeInit(ViewType1D<Type,Properties...>& view, int32 newLen)
|
||||
{
|
||||
Kokkos::resize(view, newLen);
|
||||
}
|
||||
|
||||
template <
|
||||
typename Type,
|
||||
typename... Properties>
|
||||
INLINE_FUNCTION_H
|
||||
void resizeNoInit(ViewType1D<Type,Properties...>& view, int32 newLen)
|
||||
{
|
||||
Kokkos::resize(Kokkos::WithoutInitializing, view, newLen);
|
||||
}
|
||||
|
||||
template<typename ViewType>
|
||||
INLINE_FUNCTION_H
|
||||
void swapViews(ViewType& v1, ViewType &v2)
|
||||
{
|
||||
auto tmp = v1;
|
||||
v1 = v2;
|
||||
v2 = tmp;
|
||||
static_assert(
|
||||
std::is_move_assignable<ViewType>::value && std::is_move_constructible<ViewType>::value,
|
||||
"swapViews arguments must be move assignable and move constructible");
|
||||
|
||||
ViewType tmp = std::move(v1);
|
||||
v1 = std::move(v2);
|
||||
v2 = std::move(tmp);
|
||||
}
|
||||
|
||||
|
||||
template<typename T1, typename T2>
|
||||
INLINE_FUNCTION_H
|
||||
iOstream& operator <<(iOstream& os, const Pair<T1,T2>& p)
|
||||
{
|
||||
os<<'('<<p.first<<" "<<p.second<<')';
|
||||
return os;
|
||||
}
|
||||
|
||||
} // pFlow
|
||||
|
|
|
@ -0,0 +1,149 @@
|
|||
/*------------------------------- phasicFlow ---------------------------------
|
||||
O C enter of
|
||||
O O E ngineering and
|
||||
O O M ultiscale modeling of
|
||||
OOOOOOO F luid flow
|
||||
------------------------------------------------------------------------------
|
||||
Copyright (C): www.cemf.ir
|
||||
email: hamid.r.norouzi AT gmail.com
|
||||
------------------------------------------------------------------------------
|
||||
Licence:
|
||||
This file is part of phasicFlow code. It is a free software for simulating
|
||||
granular and multiphase flows. You can redistribute it and/or modify it under
|
||||
the terms of GNU General Public License v3 or any other later versions.
|
||||
|
||||
phasicFlow is distributed to help others in their research in the field of
|
||||
granular and multiphase flows, but WITHOUT ANY WARRANTY; without even the
|
||||
implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
|
||||
|
||||
-----------------------------------------------------------------------------*/
|
||||
#ifndef __Range_hpp__
|
||||
#define __Range_hpp__
|
||||
|
||||
|
||||
|
||||
|
||||
#include <Kokkos_Core.hpp>
|
||||
|
||||
#include "pFlowMacros.hpp"
|
||||
#include "typeInfo.hpp"
|
||||
#include "builtinTypes.hpp"
|
||||
#include "iOstream.hpp"
|
||||
|
||||
|
||||
namespace pFlow
|
||||
{
|
||||
|
||||
|
||||
/**
|
||||
* Range for elements in an vector [start,end)
|
||||
*
|
||||
*/
|
||||
template<typename T>
|
||||
struct Range
|
||||
:
|
||||
public Kokkos::pair<T,T>
|
||||
{
|
||||
using Pair = Kokkos::pair<T,T>;
|
||||
|
||||
TypeInfoTemplateNV("Range", T)
|
||||
|
||||
//// - Constructors
|
||||
|
||||
/// Default
|
||||
INLINE_FUNCTION_HD
|
||||
Range(){}
|
||||
|
||||
/// From end, set start to 0
|
||||
INLINE_FUNCTION_HD
|
||||
Range(const T& e)
|
||||
:
|
||||
Range(0,e)
|
||||
{}
|
||||
|
||||
/// From componeents
|
||||
INLINE_FUNCTION_HD
|
||||
Range(const T& s, const T& e)
|
||||
:
|
||||
Range::Pair(s,e)
|
||||
{}
|
||||
|
||||
/// From pair
|
||||
INLINE_FUNCTION_HD
|
||||
Range(const Range::Pair &src )
|
||||
:
|
||||
Range::Pair(src)
|
||||
{}
|
||||
|
||||
/// Copy
|
||||
INLINE_FUNCTION_HD
|
||||
Range(const Range&) = default;
|
||||
|
||||
/// Move
|
||||
INLINE_FUNCTION_HD
|
||||
Range(Range&&) = default;
|
||||
|
||||
/// Copy assignment
|
||||
INLINE_FUNCTION_HD
|
||||
Range& operator=(const Range&) = default;
|
||||
|
||||
/// Move assignment
|
||||
INLINE_FUNCTION_HD
|
||||
Range& operator=(Range&&) = default;
|
||||
|
||||
/// Destructor
|
||||
INLINE_FUNCTION_HD
|
||||
~Range()=default;
|
||||
|
||||
//// - Methods
|
||||
|
||||
/// Start
|
||||
INLINE_FUNCTION_HD
|
||||
T& start()
|
||||
{
|
||||
return this->first;
|
||||
}
|
||||
|
||||
/// End
|
||||
INLINE_FUNCTION_HD
|
||||
T& end()
|
||||
{
|
||||
return this->second;
|
||||
}
|
||||
|
||||
INLINE_FUNCTION_HD
|
||||
const T& start()const
|
||||
{
|
||||
return this->first;
|
||||
}
|
||||
|
||||
INLINE_FUNCTION_HD
|
||||
const T& end()const
|
||||
{
|
||||
return this->second;
|
||||
}
|
||||
|
||||
INLINE_FUNCTION_HD
|
||||
T numElements()
|
||||
{
|
||||
return end()-start();
|
||||
}
|
||||
|
||||
};
|
||||
|
||||
template<typename T>
|
||||
INLINE_FUNCTION_H
|
||||
iOstream& operator <<(iOstream& os, const Range<T>& rng)
|
||||
{
|
||||
os<<"["<<rng.start()<<" "<<rng.end()<<")";
|
||||
return os;
|
||||
}
|
||||
|
||||
using range32 = Range<int32>;
|
||||
|
||||
using range64 = Range<int64>;
|
||||
|
||||
|
||||
} // pFlow
|
||||
|
||||
#endif //__KokkosTypes_hpp__
|
|
@ -23,7 +23,9 @@ Licence:
|
|||
|
||||
|
||||
#include "numericConstants.hpp"
|
||||
#include "Range.hpp"
|
||||
#include "KokkosUtilities.hpp"
|
||||
|
||||
#include "kokkosAlgorithms.hpp"
|
||||
#include "stdAlgorithms.hpp"
|
||||
#include "cudaAlgorithms.hpp"
|
||||
|
@ -31,9 +33,6 @@ Licence:
|
|||
|
||||
namespace pFlow
|
||||
{
|
||||
|
||||
inline const size_t maxSizeToSerial__ = 64;
|
||||
|
||||
template<typename T, typename... properties>
|
||||
INLINE_FUNCTION_H
|
||||
int32 count(
|
||||
|
@ -42,25 +41,17 @@ int32 count(
|
|||
int32 end,
|
||||
const T& val)
|
||||
{
|
||||
|
||||
using ExecutionSpace = typename ViewType1D<T, properties...>::execution_space;
|
||||
|
||||
int32 numElems = end-start;
|
||||
|
||||
if constexpr( isHostAccessible<ExecutionSpace>())
|
||||
{
|
||||
if(numElems<maxSizeToSerial__)
|
||||
{
|
||||
return pFlow::algorithms::STD::count<T,false>(
|
||||
view.data()+start,
|
||||
numElems,
|
||||
val);
|
||||
}
|
||||
}
|
||||
|
||||
return pFlow::algorithms::KOKKOS::count<T, ExecutionSpace>(
|
||||
view.data()+start,
|
||||
numElems,
|
||||
val);
|
||||
return pFlow::algorithms::KOKKOS::count<T, ExecutionSpace>
|
||||
(
|
||||
view.data()+start,
|
||||
numElems,
|
||||
val
|
||||
);
|
||||
}
|
||||
|
||||
template<typename T, typename... properties>
|
||||
|
@ -68,26 +59,10 @@ INLINE_FUNCTION_H
|
|||
void fill
|
||||
(
|
||||
ViewType1D<T, properties...>& view,
|
||||
range span,
|
||||
range32 span,
|
||||
T val
|
||||
)
|
||||
{
|
||||
using ExecutionSpace = typename ViewType1D<T, properties...>::execution_space;
|
||||
|
||||
|
||||
if constexpr( isHostAccessible<ExecutionSpace>())
|
||||
{
|
||||
int32 numElems = span.second-span.first;
|
||||
if( numElems<maxSizeToSerial__)
|
||||
{
|
||||
pFlow::algorithms::STD::fill<T,false>(
|
||||
view.data()+span.first,
|
||||
numElems,
|
||||
val);
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
auto subV = Kokkos::subview(view, span);
|
||||
Kokkos::deep_copy(subV, val);
|
||||
}
|
||||
|
@ -101,7 +76,7 @@ void fill
|
|||
T val
|
||||
)
|
||||
{
|
||||
fill(view, range(start,end),val);
|
||||
fill(view, range32(start,end),val);
|
||||
}
|
||||
|
||||
template<
|
||||
|
@ -118,18 +93,6 @@ void fillSequence(
|
|||
using ExecutionSpace = typename ViewType1D<Type, properties...>::execution_space;
|
||||
int32 numElems = end-start;
|
||||
|
||||
if constexpr( isHostAccessible<ExecutionSpace>())
|
||||
{
|
||||
if(numElems<maxSizeToSerial__)
|
||||
{
|
||||
pFlow::algorithms::STD::fillSequence<Type,false>(
|
||||
view.data()+start,
|
||||
numElems,
|
||||
startVal);
|
||||
return ;
|
||||
}
|
||||
}
|
||||
|
||||
pFlow::algorithms::KOKKOS::fillSequence<Type, ExecutionSpace>(
|
||||
view.data()+start,
|
||||
numElems,
|
||||
|
@ -144,33 +107,22 @@ template<
|
|||
typename... properties,
|
||||
typename indexType,
|
||||
typename... indexProperties>
|
||||
bool fillSelected(
|
||||
bool fillSelected
|
||||
(
|
||||
ViewType1D<Type, properties...> view,
|
||||
const ViewType1D<indexType, indexProperties...> indices,
|
||||
const int32 numElems,
|
||||
const Type val,
|
||||
typename std::enable_if_t<
|
||||
areAccessible<
|
||||
const Type val
|
||||
)
|
||||
{
|
||||
static_assert(
|
||||
areAccessible<
|
||||
typename ViewType1D<Type, properties...>::execution_space,
|
||||
typename ViewType1D<indexType, indexProperties...>::memory_space>(),
|
||||
bool> = true )
|
||||
{
|
||||
"In fillSelected arguments view and indices must have similar spaces");
|
||||
|
||||
using ExecutionSpace = typename ViewType1D<Type, properties...>::execution_space;
|
||||
|
||||
if constexpr( isHostAccessible<ExecutionSpace>())
|
||||
{
|
||||
if(numElems<maxSizeToSerial__)
|
||||
{
|
||||
pFlow::algorithms::STD::fillSelected<Type,indexType,false>(
|
||||
view.data(),
|
||||
indices.data(),
|
||||
numElems,
|
||||
val);
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
||||
pFlow::algorithms::KOKKOS::fillSelected<Type, indexType, ExecutionSpace>(
|
||||
view.data(),
|
||||
indices.data(),
|
||||
|
@ -185,39 +137,21 @@ template<
|
|||
typename... properties,
|
||||
typename indexType,
|
||||
typename... indexProperties>
|
||||
//typename valType> //,
|
||||
//typename... valProperties>
|
||||
bool fillSelected(
|
||||
ViewType1D<Type, properties...> view,
|
||||
const ViewType1D<indexType, indexProperties...> indices,
|
||||
const ViewType1D<Type, indexProperties...> vals,
|
||||
const int32 numElems ,
|
||||
typename std::enable_if_t<
|
||||
areAccessible<
|
||||
const ViewType1D<Type, properties...> vals,
|
||||
const int32 numElems )
|
||||
{
|
||||
|
||||
static_assert(
|
||||
areAccessible<
|
||||
typename ViewType1D<Type, properties...>::execution_space,
|
||||
typename ViewType1D<indexType, indexProperties...>::memory_space>(),
|
||||
bool> = true )
|
||||
{
|
||||
"In fillSelected arguments view and indices must have similar spaces");
|
||||
|
||||
using ExecutionSpace = typename ViewType1D<Type, properties...>::execution_space;
|
||||
|
||||
|
||||
|
||||
if constexpr( isHostAccessible<ExecutionSpace>())
|
||||
{
|
||||
if(numElems<maxSizeToSerial__)
|
||||
{
|
||||
pFlow::algorithms::STD::fillSelected<Type,indexType,false>(
|
||||
view.data(),
|
||||
indices.data(),
|
||||
vals.data(),
|
||||
numElems
|
||||
);
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
pFlow::algorithms::KOKKOS::fillSelected<Type, indexType, ExecutionSpace>(
|
||||
view.data(),
|
||||
indices.data(),
|
||||
|
@ -240,19 +174,7 @@ T min(
|
|||
using ExecutionSpace = typename ViewType1D<T, properties...>::execution_space;
|
||||
|
||||
int32 numElems = end-start;
|
||||
|
||||
if constexpr( isHostAccessible<ExecutionSpace>())
|
||||
{
|
||||
if(numElems<maxSizeToSerial__)
|
||||
{
|
||||
return
|
||||
pFlow::algorithms::STD::min<T,false>(
|
||||
view.data()+start,
|
||||
numElems);
|
||||
}
|
||||
}
|
||||
|
||||
return
|
||||
|
||||
pFlow::algorithms::KOKKOS::min<T, ExecutionSpace>(
|
||||
view.data()+start,
|
||||
numElems);
|
||||
|
@ -270,17 +192,6 @@ T max(
|
|||
|
||||
int32 numElems = end-start;
|
||||
|
||||
if constexpr( isHostAccessible<ExecutionSpace>())
|
||||
{
|
||||
if(numElems<maxSizeToSerial__)
|
||||
{
|
||||
return
|
||||
pFlow::algorithms::STD::max<T,false>(
|
||||
view.data()+start,
|
||||
numElems);
|
||||
}
|
||||
}
|
||||
|
||||
return
|
||||
pFlow::algorithms::KOKKOS::max<T, ExecutionSpace>(
|
||||
view.data()+start,
|
||||
|
@ -316,8 +227,8 @@ void copy(
|
|||
)
|
||||
{
|
||||
|
||||
range sSpan(sStart,sEnd);
|
||||
range dSpan(dStart,dStart+(sEnd-sStart));
|
||||
range32 sSpan(sStart,sEnd);
|
||||
range32 dSpan(dStart,dStart+(sEnd-sStart));
|
||||
|
||||
auto srcSub = Kokkos::subview(src, sSpan);
|
||||
auto dstSub = Kokkos::subview(dst, dSpan);
|
||||
|
@ -336,7 +247,7 @@ void getNth(
|
|||
const int32 n
|
||||
)
|
||||
{
|
||||
range span(n,n+1);
|
||||
range32 span(n,n+1);
|
||||
auto subV = Kokkos::subview(src, span);
|
||||
hostViewType1D<dType> dstView("getNth",1);
|
||||
Kokkos::deep_copy(dstView,subV);
|
||||
|
@ -351,27 +262,16 @@ void sort(
|
|||
int32 start,
|
||||
int32 end)
|
||||
{
|
||||
|
||||
using ExecutionSpace = typename ViewType1D<T, properties...>::execution_space;
|
||||
|
||||
int32 numElems = end-start;
|
||||
|
||||
if constexpr( isHostAccessible<ExecutionSpace>())
|
||||
{
|
||||
if(numElems<maxSizeToSerial__)
|
||||
{
|
||||
pFlow::algorithms::STD::sort<T,false>(
|
||||
view.data()+start,
|
||||
numElems);
|
||||
return;
|
||||
}
|
||||
else
|
||||
{
|
||||
pFlow::algorithms::STD::sort<T,true>(
|
||||
view.data()+start,
|
||||
numElems);
|
||||
return;
|
||||
}
|
||||
{
|
||||
pFlow::algorithms::STD::sort<T,true>(
|
||||
view.data()+start,
|
||||
numElems);
|
||||
return;
|
||||
}
|
||||
|
||||
#ifdef __CUDACC__
|
||||
|
@ -402,22 +302,11 @@ void sort(
|
|||
|
||||
if constexpr( isHostAccessible<ExecutionSpace>())
|
||||
{
|
||||
if(numElems<maxSizeToSerial__)
|
||||
{
|
||||
pFlow::algorithms::STD::sort<T,CompareFunc,false>(
|
||||
view.data()+start,
|
||||
numElems,
|
||||
compare);
|
||||
return;
|
||||
}
|
||||
else
|
||||
{
|
||||
pFlow::algorithms::STD::sort<T,CompareFunc,true>(
|
||||
view.data()+start,
|
||||
numElems,
|
||||
compare);
|
||||
return;
|
||||
}
|
||||
pFlow::algorithms::STD::sort<T,CompareFunc,true>(
|
||||
view.data()+start,
|
||||
numElems,
|
||||
compare);
|
||||
return;
|
||||
}
|
||||
|
||||
#ifdef __CUDACC__
|
||||
|
@ -444,37 +333,25 @@ void permuteSort(
|
|||
int32 start,
|
||||
int32 end,
|
||||
ViewType1D<permType, permProperties...>& permuteView,
|
||||
int32 permStart,
|
||||
typename std::enable_if_t<
|
||||
int32 permStart )
|
||||
{
|
||||
static_assert(
|
||||
areAccessible<
|
||||
typename ViewType1D<Type, properties...>::execution_space,
|
||||
typename ViewType1D<permType, permProperties...>::memory_space>(),
|
||||
bool> = true )
|
||||
{
|
||||
"In permuteSort, view and permuteView should have the same space");
|
||||
|
||||
using ExecutionSpace = typename ViewType1D<Type, properties...>::execution_space;
|
||||
|
||||
int32 numElems = end-start;
|
||||
|
||||
pFlow::algorithms::STD::permuteSort<Type,permType,true>(
|
||||
view.data()+start,
|
||||
permuteView.data()+permStart,
|
||||
numElems);
|
||||
return;
|
||||
|
||||
if constexpr( isHostAccessible<ExecutionSpace>())
|
||||
{
|
||||
if(numElems<maxSizeToSerial__)
|
||||
{
|
||||
pFlow::algorithms::STD::permuteSort<Type,permType,false>(
|
||||
view.data()+start,
|
||||
permuteView.data()+permStart,
|
||||
numElems );
|
||||
return;
|
||||
}
|
||||
else
|
||||
{
|
||||
pFlow::algorithms::STD::permuteSort<Type,permType,true>(
|
||||
view.data()+start,
|
||||
permuteView.data()+permStart,
|
||||
numElems);
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
#ifdef __CUDACC__
|
||||
|
||||
pFlow::algorithms::CUDA::permuteSort(
|
||||
|
@ -488,6 +365,36 @@ void permuteSort(
|
|||
|
||||
}
|
||||
|
||||
template<typename T>
|
||||
INLINE_FUNCTION_HD
|
||||
int binarySearch_(const T* array, int length, const T& val)
|
||||
{
|
||||
|
||||
int low = 0;
|
||||
int high = length - 1;
|
||||
|
||||
while (low <= high)
|
||||
{
|
||||
int mid = low + (high - low)/2;
|
||||
|
||||
if ( array[mid] > val)
|
||||
{
|
||||
high = mid - 1;
|
||||
}
|
||||
else if ( array[mid] < val)
|
||||
{
|
||||
low = mid + 1;
|
||||
}
|
||||
else
|
||||
{
|
||||
return mid;
|
||||
}
|
||||
}
|
||||
|
||||
return -1; // val not found in array[0, length)
|
||||
}
|
||||
|
||||
/// On DEVICE and HOST calls
|
||||
template<
|
||||
typename Type,
|
||||
typename... properties>
|
||||
|
@ -502,7 +409,7 @@ int32 binarySearch(
|
|||
if(end<=start)return -1;
|
||||
|
||||
if(auto res =
|
||||
pFlow::algorithms::binarySearch(view.data()+start,end-start,val); res>=0) {
|
||||
binarySearch_(view.data()+start,end-start,val); res>=0) {
|
||||
return res+start;
|
||||
}
|
||||
else{
|
||||
|
@ -520,28 +427,22 @@ void exclusiveScan(
|
|||
int32 start,
|
||||
int32 end,
|
||||
ViewType1D<dType, dProperties...>& dView,
|
||||
int32 dStart,
|
||||
typename std::enable_if_t<
|
||||
int32 dStart )
|
||||
{
|
||||
|
||||
static_assert
|
||||
(
|
||||
areAccessible<
|
||||
typename ViewType1D<Type, properties...>::execution_space,
|
||||
typename ViewType1D<dType, dProperties...>::memory_space>(),
|
||||
bool> = true )
|
||||
{
|
||||
"In exclusiveScan, view and dView should have the same space"
|
||||
|
||||
);
|
||||
|
||||
using ExecutionSpace = typename ViewType1D<Type, properties...>::execution_space;
|
||||
|
||||
int32 numElems = end-start;
|
||||
if constexpr( isHostAccessible<ExecutionSpace>())
|
||||
{
|
||||
if(numElems<maxSizeToSerial__)
|
||||
{
|
||||
pFlow::algorithms::STD::exclusiveScan<Type,dType,false>(
|
||||
view.data()+start,
|
||||
dView.data()+dStart,
|
||||
numElems);
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
pFlow::algorithms::KOKKOS::exclusiveScan<Type,dType,ExecutionSpace>(
|
||||
view.data()+start,
|
||||
dView.data()+dStart,
|
||||
|
@ -559,27 +460,20 @@ void inclusiveScan(
|
|||
int32 start,
|
||||
int32 end,
|
||||
ViewType1D<dType, dProperties...>& dView,
|
||||
int32 dStart,
|
||||
typename std::enable_if_t<
|
||||
areAccessible<
|
||||
typename ViewType1D<Type, properties...>::execution_space,
|
||||
typename ViewType1D<dType, dProperties...>::memory_space>(),
|
||||
bool> = true )
|
||||
int32 dStart)
|
||||
{
|
||||
using ExecutionSpace = typename ViewType1D<Type, properties...>::execution_space;
|
||||
|
||||
static_assert
|
||||
(
|
||||
areAccessible<
|
||||
typename ViewType1D<Type, properties...>::execution_space,
|
||||
typename ViewType1D<dType, dProperties...>::memory_space>(),
|
||||
"In exclusiveScan, view and dView should have the same space"
|
||||
);
|
||||
|
||||
|
||||
int32 numElems = end-start;
|
||||
if constexpr( isHostAccessible<ExecutionSpace>())
|
||||
{
|
||||
if(numElems<maxSizeToSerial__)
|
||||
{
|
||||
pFlow::algorithms::STD::inclusiveScan<Type,dType,false>(
|
||||
view.data()+start,
|
||||
dView.data()+dStart,
|
||||
numElems);
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
pFlow::algorithms::KOKKOS::inclusiveScan<Type,dType,ExecutionSpace>(
|
||||
view.data()+start,
|
||||
|
|
|
@ -213,17 +213,6 @@ void insertSetElementD
|
|||
|
||||
}
|
||||
|
||||
/*template<typename T, typename... properties>
|
||||
void fill
|
||||
(
|
||||
ViewType1D<T, properties...>& view,
|
||||
range range,
|
||||
T val
|
||||
)
|
||||
{
|
||||
auto subV = Kokkos::subview(view, range);
|
||||
Kokkos::deep_copy(subV, val);
|
||||
}*/
|
||||
|
||||
|
||||
template<typename T, typename... properties>
|
|
@ -0,0 +1,30 @@
|
|||
/*------------------------------- phasicFlow ---------------------------------
|
||||
O C enter of
|
||||
O O E ngineering and
|
||||
O O M ultiscale modeling of
|
||||
OOOOOOO F luid flow
|
||||
------------------------------------------------------------------------------
|
||||
Copyright (C): www.cemf.ir
|
||||
email: hamid.r.norouzi AT gmail.com
|
||||
------------------------------------------------------------------------------
|
||||
Licence:
|
||||
This file is part of phasicFlow code. It is a free software for simulating
|
||||
granular and multiphase flows. You can redistribute it and/or modify it under
|
||||
the terms of GNU General Public License v3 or any other later versions.
|
||||
|
||||
phasicFlow is distributed to help others in their research in the field of
|
||||
granular and multiphase flows, but WITHOUT ANY WARRANTY; without even the
|
||||
implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
|
||||
|
||||
-----------------------------------------------------------------------------*/
|
||||
|
||||
#ifndef __phsicFlowKokkos_hpp__
|
||||
#define __phsicFlowKokkos_hpp__
|
||||
|
||||
#include "kokkosTypes.hpp"
|
||||
#include "KokkosUtilities.hpp"
|
||||
#include "ViewAlgorithms.hpp"
|
||||
#include "Range.hpp"
|
||||
|
||||
|
||||
#endif // __phsicFlowKokkos_hpp__
|
|
@ -60,37 +60,6 @@ struct minimum
|
|||
return lhs < rhs ? lhs : rhs; }
|
||||
};
|
||||
|
||||
|
||||
template<typename T>
|
||||
INLINE_FUNCTION_HD
|
||||
int binarySearch(const T* array, int length, const T& val)
|
||||
{
|
||||
|
||||
int low = 0;
|
||||
int high = length - 1;
|
||||
|
||||
while (low <= high)
|
||||
{
|
||||
int mid = low + (high - low)/2;
|
||||
|
||||
if ( array[mid] > val)
|
||||
{
|
||||
high = mid - 1;
|
||||
}
|
||||
else if ( array[mid] < val)
|
||||
{
|
||||
low = mid + 1;
|
||||
}
|
||||
else
|
||||
{
|
||||
return mid;
|
||||
}
|
||||
}
|
||||
|
||||
return -1; // val not found in array[0, length)
|
||||
}
|
||||
|
||||
|
||||
}
|
||||
|
||||
#endif // __algorithmFunctions_hpp__
|
|
@ -43,9 +43,12 @@ void pFlow::processors::initProcessors(int argc, char *argv[])
|
|||
processors::globalSize_ = MPI::COMM_WORLD.Get_size();
|
||||
processors::globalRank_ = MPI::COMM_WORLD.Get_rank();
|
||||
|
||||
pFlow::pOutput.activatePrefix();
|
||||
pFlow::pOutput.setPrefixNum(processors::globalRank_);
|
||||
|
||||
if(processors::isParallel())
|
||||
{
|
||||
pFlow::pOutput.activatePrefix();
|
||||
pFlow::pOutput.setPrefixNum(processors::globalRank_);
|
||||
}
|
||||
|
||||
pFlow::mOutput.setMasterSlave(processors::isMaster());
|
||||
pFlow::errReport.setMasterSlave(processors::isMaster());
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue