diff --git a/src/phasicFlow/Kokkos/KokkosTypes.hpp b/src/phasicFlow/Kokkos/KokkosTypes.hpp index 4f59872f..3d8cac38 100644 --- a/src/phasicFlow/Kokkos/KokkosTypes.hpp +++ b/src/phasicFlow/Kokkos/KokkosTypes.hpp @@ -17,133 +17,139 @@ Licence: implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. -----------------------------------------------------------------------------*/ - #ifndef __KokkosTypes_hpp__ #define __KokkosTypes_hpp__ +/** + * \file KokkosType.hpp + * + * \brief name aliases and typedesf for Kokkos entities that are + * frequently used in PhasicFlow. + * + */ + #include #include #include -#include "iOstream.hpp" +#include "builtinTypes.hpp" namespace pFlow { +///********DEP class DeviceSide{}; class HostSide{}; template struct selectSide{}; +/*********/// -using HostSpace = Kokkos::HostSpace; -using Serial = Kokkos::Serial; +/// Host memory space +using HostSpace = Kokkos::HostSpace; + +/// Serial execution space +using Serial = Kokkos::Serial; #ifdef _OPENMP -using OpenMP = Kokkos::OpenMP; +/// OpenMp execution space +using OpenMP = Kokkos::OpenMP; #endif +#ifdef __CUDACC__ +/// Cuda execution space +using Cuda = Kokkos::Cuda; +#endif + +/// Default Host execution space, on top of all host execution spaces using DefaultHostExecutionSpace = Kokkos::DefaultHostExecutionSpace; + +/// Default execution space, it can be device exe. space, if a device space is +/// activated. using DefaultExecutionSpace = Kokkos::DefaultExecutionSpace; - +/// Pair of two variables template - using kPair = Kokkos::pair; - -template - using kRange = kPair; - -using range = kRange; - -using range64 = kRange; +using Pair = Kokkos::pair; +/// View for a scalar template using ViewTypeScalar = Kokkos::View; +/// 1D veiw as a vector template using ViewType1D = Kokkos::View; +/// 2D view as an array template - using DualViewType1D = Kokkos::DualView; + using ViewType2D = Kokkos::View; +/// 3D view as an array template using ViewType3D = Kokkos::View; +/// 1D dual view as a vector +template + using DualViewType1D = Kokkos::DualView; + +/// unordered map template using unorderedMap = Kokkos::UnorderedMap; +/// unordered set template using unorderedSet = Kokkos::UnorderedMap; -template - using deviceHashMap= Kokkos::UnorderedMap; - -template - using hostHashMap= Kokkos::UnorderedMap; - -template - using deviceHashSet= Kokkos::UnorderedMap; - -template - using hostHashSet = Kokkos::UnorderedMap; - -// a 1D array (vector) with default device (memory space and execution space) +/// Scalar on device template using deviceViewTypeScalar = Kokkos::View; +/// 1D array (vector) with default device (memory space and execution space) template using deviceViewType1D = Kokkos::View; +/// 2D view on device as an array on device template using deviceViewType2D = Kokkos::View; +/// 3D view on device as an array on device +template + using deviceViewType3D = Kokkos::View; + + -// a 1D array (vector with host memeory space) template using hostViewTypeScalar = Kokkos::View; +/// 1D array (vector with host memeory space) template using hostViewType1D = Kokkos::View; +/// 2D array on host template using hostViewType2D = Kokkos::View; - -#ifdef __CUDACC__ -using Cuda = Kokkos::Cuda; -template - using cudaViewTypeScalar = Kokkos::View; - -template - using cudaViewType1D = Kokkos::View; - +/// 3D array on host template - using cudaViewType2D = Kokkos::View; -#endif - + using hostViewType3D = Kokkos::View; +/// 1D vector on device with atomic capabilities template using deviceAtomicViewType1D = Kokkos::View< T*, Kokkos::MemoryTraits::value?0:Kokkos::Atomic>>; +/// 3D array on device with atomic capabilities template using deviceAtomicViewType3D = Kokkos::View< T***, Kokkos::MemoryTraits::value?0:Kokkos::Atomic>>; -template -iOstream& operator <<(iOstream& os, const kRange& rng) -{ - os<<"["<::accessible; } +template +INLINE_FUNCTION_H +bool constexpr isDeviceAccessible() +{ + return Kokkos::SpaceAccessibility::accessible; +} + +/// Is MemoerySpace accessible from ExecutionSpace template INLINE_FUNCTION_H bool constexpr areAccessible() @@ -48,9 +56,9 @@ template < typename Type, typename... Properties> INLINE_FUNCTION_H -void realloc( ViewType1D& view, int32 len) +void reallocInit( ViewType1D& view, int32 len) { - Kokkos::realloc(view, len); + Kokkos::realloc(Kokkos::WithoutInitializing, view, len); } template < @@ -59,14 +67,7 @@ template < INLINE_FUNCTION_H void reallocNoInit(ViewType1D& view, int32 len) { - using ViewType = ViewType1D; - word vl = view.label(); - view = ViewType(); // Deallocate first - view = ViewType( - Kokkos::view_alloc( - Kokkos::WithoutInitializing, - vl), - len); + Kokkos::realloc(Kokkos::WithoutInitializing, view, len); } template < @@ -79,12 +80,39 @@ void reallocFill( ViewType1D& view, int32 len, Type val) Kokkos::deep_copy(view, val); } +template < + typename Type, + typename... Properties> +INLINE_FUNCTION_H +void reallocInit( ViewType2D& view, int32 len1, int32 len2) +{ + Kokkos::realloc(view, len1, len2); +} template < typename Type, typename... Properties> INLINE_FUNCTION_H -void realloc( ViewType3D& view, int32 len1, int32 len2, int32 len3) +void reallocNoInit(ViewType2D& view, int32 len1, int32 len2) +{ + Kokkos::realloc(Kokkos::WithoutInitializing, view, len1, len2); +} + +template < + typename Type, + typename... Properties> +INLINE_FUNCTION_H +void reallocFill( ViewType2D& view, int32 len1, int32 len2, Type val) +{ + reallocNoInit(view, len1, len2); + Kokkos::deep_copy(view, val); +} + +template < + typename Type, + typename... Properties> +INLINE_FUNCTION_H +void reallocInit( ViewType3D& view, int32 len1, int32 len2, int32 len3) { Kokkos::realloc(view, len1, len2, len3); } @@ -95,14 +123,8 @@ template < INLINE_FUNCTION_H void reallocNoInit(ViewType3D& view, int32 len1, int32 len2, int32 len3) { - using ViewType = ViewType3D; - word vl = view.label(); - view = ViewType(); // Deallocate first - view = ViewType( - Kokkos::view_alloc( - Kokkos::WithoutInitializing, - vl), - len1, len2, len3); + + Kokkos::realloc(Kokkos::WithoutInitializing, view, len1, len2, len3); } template < @@ -115,14 +137,44 @@ void reallocFill( ViewType3D& view, int32 len1, int32 len2, Kokkos::deep_copy(view, val); } +template < + typename Type, + typename... Properties> +INLINE_FUNCTION_H +void resizeInit(ViewType1D& view, int32 newLen) +{ + Kokkos::resize(view, newLen); +} + +template < + typename Type, + typename... Properties> +INLINE_FUNCTION_H +void resizeNoInit(ViewType1D& view, int32 newLen) +{ + Kokkos::resize(Kokkos::WithoutInitializing, view, newLen); +} template INLINE_FUNCTION_H void swapViews(ViewType& v1, ViewType &v2) { - auto tmp = v1; - v1 = v2; - v2 = tmp; + static_assert( + std::is_move_assignable::value && std::is_move_constructible::value, + "swapViews arguments must be move assignable and move constructible"); + + ViewType tmp = std::move(v1); + v1 = std::move(v2); + v2 = std::move(tmp); +} + + +template +INLINE_FUNCTION_H +iOstream& operator <<(iOstream& os, const Pair& p) +{ + os<<'('< + +#include "pFlowMacros.hpp" +#include "typeInfo.hpp" +#include "builtinTypes.hpp" +#include "iOstream.hpp" + + +namespace pFlow +{ + + +/** + * Range for elements in an vector [start,end) + * + */ +template +struct Range +: +public Kokkos::pair +{ + using Pair = Kokkos::pair; + + TypeInfoTemplateNV("Range", T) + + //// - Constructors + + /// Default + INLINE_FUNCTION_HD + Range(){} + + /// From end, set start to 0 + INLINE_FUNCTION_HD + Range(const T& e) + : + Range(0,e) + {} + + /// From componeents + INLINE_FUNCTION_HD + Range(const T& s, const T& e) + : + Range::Pair(s,e) + {} + + /// From pair + INLINE_FUNCTION_HD + Range(const Range::Pair &src ) + : + Range::Pair(src) + {} + + /// Copy + INLINE_FUNCTION_HD + Range(const Range&) = default; + + /// Move + INLINE_FUNCTION_HD + Range(Range&&) = default; + + /// Copy assignment + INLINE_FUNCTION_HD + Range& operator=(const Range&) = default; + + /// Move assignment + INLINE_FUNCTION_HD + Range& operator=(Range&&) = default; + + /// Destructor + INLINE_FUNCTION_HD + ~Range()=default; + + //// - Methods + + /// Start + INLINE_FUNCTION_HD + T& start() + { + return this->first; + } + + /// End + INLINE_FUNCTION_HD + T& end() + { + return this->second; + } + + INLINE_FUNCTION_HD + const T& start()const + { + return this->first; + } + + INLINE_FUNCTION_HD + const T& end()const + { + return this->second; + } + + INLINE_FUNCTION_HD + T numElements() + { + return end()-start(); + } + +}; + +template +INLINE_FUNCTION_H +iOstream& operator <<(iOstream& os, const Range& rng) +{ + os<<"["<; + +using range64 = Range; + + +} // pFlow + +#endif //__KokkosTypes_hpp__ diff --git a/src/phasicFlow/Kokkos/ViewAlgorithms.hpp b/src/phasicFlow/Kokkos/ViewAlgorithms.hpp index 2e94cc3c..112569c6 100644 --- a/src/phasicFlow/Kokkos/ViewAlgorithms.hpp +++ b/src/phasicFlow/Kokkos/ViewAlgorithms.hpp @@ -23,7 +23,9 @@ Licence: #include "numericConstants.hpp" +#include "Range.hpp" #include "KokkosUtilities.hpp" + #include "kokkosAlgorithms.hpp" #include "stdAlgorithms.hpp" #include "cudaAlgorithms.hpp" @@ -31,9 +33,6 @@ Licence: namespace pFlow { - -inline const size_t maxSizeToSerial__ = 64; - template INLINE_FUNCTION_H int32 count( @@ -42,25 +41,17 @@ int32 count( int32 end, const T& val) { + using ExecutionSpace = typename ViewType1D::execution_space; int32 numElems = end-start; - if constexpr( isHostAccessible()) - { - if(numElems( - view.data()+start, - numElems, - val); - } - } - - return pFlow::algorithms::KOKKOS::count( - view.data()+start, - numElems, - val); + return pFlow::algorithms::KOKKOS::count + ( + view.data()+start, + numElems, + val + ); } template @@ -68,26 +59,10 @@ INLINE_FUNCTION_H void fill ( ViewType1D& view, - range span, + range32 span, T val ) { - using ExecutionSpace = typename ViewType1D::execution_space; - - - if constexpr( isHostAccessible()) - { - int32 numElems = span.second-span.first; - if( numElems( - view.data()+span.first, - numElems, - val); - return; - } - } - auto subV = Kokkos::subview(view, span); Kokkos::deep_copy(subV, val); } @@ -101,7 +76,7 @@ void fill T val ) { - fill(view, range(start,end),val); + fill(view, range32(start,end),val); } template< @@ -118,18 +93,6 @@ void fillSequence( using ExecutionSpace = typename ViewType1D::execution_space; int32 numElems = end-start; - if constexpr( isHostAccessible()) - { - if(numElems( - view.data()+start, - numElems, - startVal); - return ; - } - } - pFlow::algorithms::KOKKOS::fillSequence( view.data()+start, numElems, @@ -144,33 +107,22 @@ template< typename... properties, typename indexType, typename... indexProperties> -bool fillSelected( +bool fillSelected +( ViewType1D view, const ViewType1D indices, const int32 numElems, - const Type val, - typename std::enable_if_t< - areAccessible< + const Type val +) +{ + static_assert( + areAccessible< typename ViewType1D::execution_space, typename ViewType1D::memory_space>(), - bool> = true ) -{ + "In fillSelected arguments view and indices must have similar spaces"); using ExecutionSpace = typename ViewType1D::execution_space; - if constexpr( isHostAccessible()) - { - if(numElems( - view.data(), - indices.data(), - numElems, - val); - return true; - } - } - pFlow::algorithms::KOKKOS::fillSelected( view.data(), indices.data(), @@ -185,39 +137,21 @@ template< typename... properties, typename indexType, typename... indexProperties> - //typename valType> //, - //typename... valProperties> bool fillSelected( ViewType1D view, const ViewType1D indices, - const ViewType1D vals, - const int32 numElems , - typename std::enable_if_t< - areAccessible< + const ViewType1D vals, + const int32 numElems ) +{ + + static_assert( + areAccessible< typename ViewType1D::execution_space, typename ViewType1D::memory_space>(), - bool> = true ) -{ + "In fillSelected arguments view and indices must have similar spaces"); using ExecutionSpace = typename ViewType1D::execution_space; - - - if constexpr( isHostAccessible()) - { - if(numElems( - view.data(), - indices.data(), - vals.data(), - numElems - ); - return true; - } - } - - pFlow::algorithms::KOKKOS::fillSelected( view.data(), indices.data(), @@ -240,19 +174,7 @@ T min( using ExecutionSpace = typename ViewType1D::execution_space; int32 numElems = end-start; - - if constexpr( isHostAccessible()) - { - if(numElems( - view.data()+start, - numElems); - } - } - - return + pFlow::algorithms::KOKKOS::min( view.data()+start, numElems); @@ -270,17 +192,6 @@ T max( int32 numElems = end-start; - if constexpr( isHostAccessible()) - { - if(numElems( - view.data()+start, - numElems); - } - } - return pFlow::algorithms::KOKKOS::max( view.data()+start, @@ -316,8 +227,8 @@ void copy( ) { - range sSpan(sStart,sEnd); - range dSpan(dStart,dStart+(sEnd-sStart)); + range32 sSpan(sStart,sEnd); + range32 dSpan(dStart,dStart+(sEnd-sStart)); auto srcSub = Kokkos::subview(src, sSpan); auto dstSub = Kokkos::subview(dst, dSpan); @@ -336,7 +247,7 @@ void getNth( const int32 n ) { - range span(n,n+1); + range32 span(n,n+1); auto subV = Kokkos::subview(src, span); hostViewType1D dstView("getNth",1); Kokkos::deep_copy(dstView,subV); @@ -351,27 +262,16 @@ void sort( int32 start, int32 end) { - using ExecutionSpace = typename ViewType1D::execution_space; int32 numElems = end-start; if constexpr( isHostAccessible()) - { - if(numElems( - view.data()+start, - numElems); - return; - } - else - { - pFlow::algorithms::STD::sort( - view.data()+start, - numElems); - return; - } + { + pFlow::algorithms::STD::sort( + view.data()+start, + numElems); + return; } #ifdef __CUDACC__ @@ -402,22 +302,11 @@ void sort( if constexpr( isHostAccessible()) { - if(numElems( - view.data()+start, - numElems, - compare); - return; - } - else - { - pFlow::algorithms::STD::sort( - view.data()+start, - numElems, - compare); - return; - } + pFlow::algorithms::STD::sort( + view.data()+start, + numElems, + compare); + return; } #ifdef __CUDACC__ @@ -444,37 +333,25 @@ void permuteSort( int32 start, int32 end, ViewType1D& permuteView, - int32 permStart, - typename std::enable_if_t< + int32 permStart ) +{ + static_assert( areAccessible< typename ViewType1D::execution_space, typename ViewType1D::memory_space>(), - bool> = true ) -{ + "In permuteSort, view and permuteView should have the same space"); + using ExecutionSpace = typename ViewType1D::execution_space; int32 numElems = end-start; + + pFlow::algorithms::STD::permuteSort( + view.data()+start, + permuteView.data()+permStart, + numElems); + return; - if constexpr( isHostAccessible()) - { - if(numElems( - view.data()+start, - permuteView.data()+permStart, - numElems ); - return; - } - else - { - pFlow::algorithms::STD::permuteSort( - view.data()+start, - permuteView.data()+permStart, - numElems); - return; - } - } - + #ifdef __CUDACC__ pFlow::algorithms::CUDA::permuteSort( @@ -488,6 +365,36 @@ void permuteSort( } +template +INLINE_FUNCTION_HD +int binarySearch_(const T* array, int length, const T& val) +{ + + int low = 0; + int high = length - 1; + + while (low <= high) + { + int mid = low + (high - low)/2; + + if ( array[mid] > val) + { + high = mid - 1; + } + else if ( array[mid] < val) + { + low = mid + 1; + } + else + { + return mid; + } + } + + return -1; // val not found in array[0, length) +} + +/// On DEVICE and HOST calls template< typename Type, typename... properties> @@ -502,7 +409,7 @@ int32 binarySearch( if(end<=start)return -1; if(auto res = - pFlow::algorithms::binarySearch(view.data()+start,end-start,val); res>=0) { + binarySearch_(view.data()+start,end-start,val); res>=0) { return res+start; } else{ @@ -520,28 +427,22 @@ void exclusiveScan( int32 start, int32 end, ViewType1D& dView, - int32 dStart, - typename std::enable_if_t< + int32 dStart ) +{ + + static_assert + ( areAccessible< typename ViewType1D::execution_space, typename ViewType1D::memory_space>(), - bool> = true ) -{ + "In exclusiveScan, view and dView should have the same space" + + ); + using ExecutionSpace = typename ViewType1D::execution_space; int32 numElems = end-start; - if constexpr( isHostAccessible()) - { - if(numElems( - view.data()+start, - dView.data()+dStart, - numElems); - return; - } - } - + pFlow::algorithms::KOKKOS::exclusiveScan( view.data()+start, dView.data()+dStart, @@ -559,27 +460,20 @@ void inclusiveScan( int32 start, int32 end, ViewType1D& dView, - int32 dStart, - typename std::enable_if_t< - areAccessible< - typename ViewType1D::execution_space, - typename ViewType1D::memory_space>(), - bool> = true ) + int32 dStart) { using ExecutionSpace = typename ViewType1D::execution_space; + static_assert + ( + areAccessible< + typename ViewType1D::execution_space, + typename ViewType1D::memory_space>(), + "In exclusiveScan, view and dView should have the same space" + ); + + int32 numElems = end-start; - if constexpr( isHostAccessible()) - { - if(numElems( - view.data()+start, - dView.data()+dStart, - numElems); - return; - } - } pFlow::algorithms::KOKKOS::inclusiveScan( view.data()+start, diff --git a/src/phasicFlow/Kokkos/baseAlgorithmsFwd.hpp b/src/phasicFlow/Kokkos/baseAlgorithmsFwd_.hpp similarity index 100% rename from src/phasicFlow/Kokkos/baseAlgorithmsFwd.hpp rename to src/phasicFlow/Kokkos/baseAlgorithmsFwd_.hpp diff --git a/src/phasicFlow/Kokkos/baseAlgorithms.hpp b/src/phasicFlow/Kokkos/baseAlgorithms_.hpp similarity index 96% rename from src/phasicFlow/Kokkos/baseAlgorithms.hpp rename to src/phasicFlow/Kokkos/baseAlgorithms_.hpp index 70f1f76f..0c887ae4 100644 --- a/src/phasicFlow/Kokkos/baseAlgorithms.hpp +++ b/src/phasicFlow/Kokkos/baseAlgorithms_.hpp @@ -213,17 +213,6 @@ void insertSetElementD } -/*template -void fill -( - ViewType1D& view, - range range, - T val -) -{ - auto subV = Kokkos::subview(view, range); - Kokkos::deep_copy(subV, val); -}*/ template diff --git a/src/phasicFlow/Kokkos/phsicFlowKokkos.hpp b/src/phasicFlow/Kokkos/phsicFlowKokkos.hpp new file mode 100644 index 00000000..22666e49 --- /dev/null +++ b/src/phasicFlow/Kokkos/phsicFlowKokkos.hpp @@ -0,0 +1,30 @@ +/*------------------------------- phasicFlow --------------------------------- + O C enter of + O O E ngineering and + O O M ultiscale modeling of + OOOOOOO F luid flow +------------------------------------------------------------------------------ + Copyright (C): www.cemf.ir + email: hamid.r.norouzi AT gmail.com +------------------------------------------------------------------------------ +Licence: + This file is part of phasicFlow code. It is a free software for simulating + granular and multiphase flows. You can redistribute it and/or modify it under + the terms of GNU General Public License v3 or any other later versions. + + phasicFlow is distributed to help others in their research in the field of + granular and multiphase flows, but WITHOUT ANY WARRANTY; without even the + implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + +-----------------------------------------------------------------------------*/ + +#ifndef __phsicFlowKokkos_hpp__ +#define __phsicFlowKokkos_hpp__ + +#include "kokkosTypes.hpp" +#include "KokkosUtilities.hpp" +#include "ViewAlgorithms.hpp" +#include "Range.hpp" + + +#endif // __phsicFlowKokkos_hpp__ diff --git a/src/phasicFlow/algorithms/algorithmFunctions.hpp b/src/phasicFlow/algorithms/algorithmFunctions.hpp index 44778caf..0ce97355 100644 --- a/src/phasicFlow/algorithms/algorithmFunctions.hpp +++ b/src/phasicFlow/algorithms/algorithmFunctions.hpp @@ -60,37 +60,6 @@ struct minimum return lhs < rhs ? lhs : rhs; } }; - -template -INLINE_FUNCTION_HD -int binarySearch(const T* array, int length, const T& val) -{ - - int low = 0; - int high = length - 1; - - while (low <= high) - { - int mid = low + (high - low)/2; - - if ( array[mid] > val) - { - high = mid - 1; - } - else if ( array[mid] < val) - { - low = mid + 1; - } - else - { - return mid; - } - } - - return -1; // val not found in array[0, length) -} - - } #endif // __algorithmFunctions_hpp__ \ No newline at end of file diff --git a/src/phasicFlow/processors/processors.cpp b/src/phasicFlow/processors/processors.cpp index 735e75db..c9d0d380 100644 --- a/src/phasicFlow/processors/processors.cpp +++ b/src/phasicFlow/processors/processors.cpp @@ -43,9 +43,12 @@ void pFlow::processors::initProcessors(int argc, char *argv[]) processors::globalSize_ = MPI::COMM_WORLD.Get_size(); processors::globalRank_ = MPI::COMM_WORLD.Get_rank(); - pFlow::pOutput.activatePrefix(); - pFlow::pOutput.setPrefixNum(processors::globalRank_); - + if(processors::isParallel()) + { + pFlow::pOutput.activatePrefix(); + pFlow::pOutput.setPrefixNum(processors::globalRank_); + } + pFlow::mOutput.setMasterSlave(processors::isMaster()); pFlow::errReport.setMasterSlave(processors::isMaster()); }