CPUBackend.hpp 8.1 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247
  1. //
  2. // CPUBackend.hpp
  3. // MNN
  4. //
  5. // Created by MNN on 2018/07/06.
  6. // Copyright © 2018, Alibaba Group Holding Limited
  7. //
  8. #ifndef CPUBackend_hpp
  9. #define CPUBackend_hpp
  10. #include <map>
  11. #include <memory>
  12. #include <MNN/AutoTime.hpp>
  13. #include "core/Backend.hpp"
  14. #include "core/Execution.hpp"
  15. #include "core/BufferAllocator.hpp"
  16. #include "MNN_generated.h"
  17. #ifdef MNN_USE_THREAD_POOL
  18. #include "ThreadPool.hpp"
  19. #endif
  20. #ifdef MNN_KLEIDIAI_ENABLED
  21. #include "arm/mnn_kleidiai.h"
  22. #endif
  23. namespace MNN {
  24. class WorkerThread;
  25. class CPURuntime : public Runtime {
  26. public:
  27. struct DynamicAllocator {
  28. std::shared_ptr<BufferAllocator> mDynamicAllocator;
  29. std::shared_ptr<BufferAllocator> mDynamicAllocatorBackup;
  30. BufferAllocator* mCurrentDynamicAllocator = nullptr;
  31. };
  32. friend class CPUBackend;
  33. CPURuntime(const Backend::Info& info);
  34. virtual ~ CPURuntime();
  35. int onGetRuntimeStatus(RuntimeStatus statusEnum) const override;
  36. virtual Backend* onCreate(const BackendConfig* config, Backend* origin) const override;
  37. virtual void onReset(int numberThread, const BackendConfig* config, bool full) override;
  38. virtual void onGabageCollect(int level) override;
  39. virtual float onGetMemoryInMB() override;
  40. virtual CompilerType onGetCompilerType() const override {
  41. return Compiler_Loop;
  42. }
  43. virtual void onConcurrencyBegin() const override;
  44. virtual void onConcurrencyEnd() const override;
  45. virtual bool onCheckInfo(Backend::Info& info) const override;
  46. SingleBufferWithAllocator* buffer(int index) const;
  47. BufferAllocator* createDynamicBufferAlloctor(int index) const;
  48. private:
  49. void _bindCPUCore() const;
  50. void _resetThreadPool() const;
  51. void _validateCpuIds() const;
  52. mutable std::shared_ptr<EagerBufferAllocator> mStaticAllocator;
  53. mutable int mThreadNumber;
  54. mutable std::vector<int> mCpuIds;
  55. mutable unsigned long mCpuMask;
  56. #ifdef MNN_USE_THREAD_POOL
  57. mutable int mTaskIndex = -1;
  58. mutable int mThreadOpen = 0;
  59. mutable ThreadPool* mThreadPool = nullptr;
  60. #endif
  61. BackendConfig::MemoryMode mMemory;
  62. BackendConfig::PowerMode mPower;
  63. BackendConfig::PrecisionMode mPrecision;
  64. // Backend features
  65. // CPU features
  66. static Backend*(*gExtraCreate)(const Runtime* runtime);
  67. size_t mFlags = 0;
  68. mutable int mCurrentTID = 0;
  69. mutable std::vector<SingleBufferWithAllocator> mDynamic;
  70. mutable std::vector<SingleBufferWithAllocator> mDynamicMmap;
  71. mutable std::shared_ptr<DynamicAllocator> mSharedDmaInfo;
  72. mutable std::shared_ptr<EagerBufferAllocator> mStaticAllocatorRaw;
  73. mutable std::shared_ptr<EagerBufferAllocator> mStaticAllocatorMMap;
  74. };
  75. struct CoreFunctions;
  76. struct CoreInt8Functions;
  77. class CPUResizeCache;
  78. class CPUMemObj : public Backend::MemObj {
  79. public:
  80. CPUMemObj(BufferAllocator* allocator, MemChunk chunk, int size) : mAllocator(allocator), mChunk(chunk), mSize(size) {}
  81. virtual ~ CPUMemObj() {
  82. if (mAllocator) {
  83. mAllocator->free(mChunk);
  84. }
  85. }
  86. virtual MemChunk chunk() {
  87. return mChunk;
  88. }
  89. inline int getSize() const {
  90. return mSize;
  91. }
  92. private:
  93. BufferAllocator* mAllocator;
  94. MemChunk mChunk;
  95. int mSize;
  96. };
  97. class CPUBackend : public Backend {
  98. public:
  99. CPUBackend(const CPURuntime* runtime, BackendConfig::PrecisionMode precision, BackendConfig::MemoryMode memory, MNNForwardType type = MNN_FORWARD_CPU, size_t flags = 0);
  100. virtual ~CPUBackend();
  101. // Return sizeDivide, scheduleNumber aligned memory
  102. std::pair<int, int> multiThreadDivide(int size) const;
  103. virtual bool onSelectDynamicAllocator(int index, int maxIndex) override;
  104. // dividedSize's length should be larger than threadNumber
  105. void computeDivideSizes(int size, int* dst, float computeI = 0.f) const;
  106. public:
  107. virtual MemObj* onAcquire(const Tensor* nativeTensor, StorageType storageType) override;
  108. virtual bool onClearBuffer() override;
  109. virtual void onCopyBuffer(const Tensor* srcTensor, const Tensor* dstTensor) const override;
  110. virtual Execution* onCreate(const std::vector<Tensor*>& inputs, const std::vector<Tensor*>& outputs,
  111. const MNN::Op* op) override;
  112. virtual void onExecuteBegin() const override;
  113. virtual void onExecuteEnd() const override;
  114. virtual void* onMapTensor(Tensor::MapType mtype, Tensor::DimensionType dtype, const Tensor* srcTensor) override;
  115. virtual bool onUnmapTensor(Tensor::MapType mtype, Tensor::DimensionType dtype, const Tensor* dstTensor, void* mapPtr) override;
  116. virtual void onResizeBegin() override;
  117. virtual ErrorCode onResizeEnd() override;
  118. const CoreFunctions* functions() const {
  119. return mCoreFunctions;
  120. }
  121. // Return element size for Tensor, conside pack
  122. size_t getTensorSize(const Tensor* tensor, bool multiBytes = false) const;
  123. const CoreInt8Functions* int8Functions() const {
  124. return mInt8CoreFunctions;
  125. }
  126. void _resetDynamicMemory() const;
  127. public:
  128. class Creator {
  129. public:
  130. virtual Execution* onCreate(const std::vector<Tensor*>& inputs, const std::vector<Tensor*>& outputs,
  131. const MNN::Op* op, Backend* backend) const = 0;
  132. };
  133. static bool addCreator(OpType t, Creator* c);
  134. inline int threadNumber() const {
  135. return mThreadNumber;
  136. }
  137. BufferAllocator* getBufferAllocator(bool defer_allocator = true) const {
  138. return mDmaInfo->mCurrentDynamicAllocator;
  139. }
  140. BackendConfig::MemoryMode memoryMode() const {
  141. return mMemory;
  142. }
  143. BackendConfig::PrecisionMode precisionMode() const {
  144. return mPrecisionMode;
  145. }
  146. CPUResizeCache* getCache() const {
  147. return mCache;
  148. }
  149. virtual const Runtime* getRuntime() override;
  150. #ifdef MNN_USE_THREAD_POOL
  151. inline int taskIndex() const {return mRuntime->mTaskIndex;}
  152. inline ThreadPool* threadPool() const {return mRuntime->mThreadPool;}
  153. #endif
  154. static void initCreatorMap();
  155. static size_t getBytes(const Backend* backend, const Tensor* output);
  156. static DataType getDataType(const Tensor* tensor);
  157. friend class CPURuntime;
  158. void enqueueTask(std::function<int()>&& task);
  159. protected:
  160. MemObj* allocBuffer(size_t size, Tensor* dest, StorageType storageType);
  161. CoreFunctions* mCoreFunctions;
  162. CoreInt8Functions* mInt8CoreFunctions;
  163. private:
  164. mutable std::shared_ptr<WorkerThread> mInitWorkQueue;
  165. int mThreadNumber;
  166. #ifdef MNN_USE_THREAD_POOL
  167. ThreadPool* mThreadPool = nullptr;
  168. #endif
  169. std::vector<std::pair<float, int>> mGroupWithComputeRate;
  170. float mComputeI = 0.f;
  171. std::shared_ptr<CPURuntime::DynamicAllocator> mDmaInfo;
  172. CPURuntime* mRuntime;
  173. BackendConfig::PrecisionMode mPrecisionMode;
  174. BackendConfig::MemoryMode mMemory;
  175. static std::map<OpType, CPUBackend::Creator*>* gCreator;
  176. CPUResizeCache* mCache;
  177. std::vector<std::shared_ptr<CPUResizeCache>> mCacheGroup;
  178. };
  179. /** execution cast wrapper. insert tensor cast dynamic. */
  180. class CastWrapExecution : public Execution {
  181. public:
  182. CastWrapExecution(Backend* backend, DataType runT)
  183. : Execution(backend), mRunType(runT) {}
  184. virtual ErrorCode onExecute(const std::vector<Tensor*>& inputs, const std::vector<Tensor*>& outputs) override;
  185. private:
  186. DataType mRunType;
  187. };
  188. #define REGISTER_CPU_OP_CREATOR(name, opType) \
  189. void ___##name##__##opType##__() { \
  190. static name _temp;\
  191. CPUBackend::addCreator(opType, &_temp); \
  192. }
  193. #ifdef MNN_SUPPORT_DEPRECATED_OP
  194. #define REGISTER_CPU_OP_CREATOR_OLD(name, opType) \
  195. void ___##name##__##opType##__() { \
  196. static name _temp;\
  197. CPUBackend::addCreator(opType, &_temp); \
  198. }
  199. #else
  200. #define REGISTER_CPU_OP_CREATOR_OLD(name, opType) \
  201. void ___##name##__##opType##__() { \
  202. }
  203. #endif
  204. #define REGISTER_CPU_OP_CREATOR_RENDER(name, opType) \
  205. void ___##name##__##opType##__() { \
  206. static name _temp;\
  207. CPUBackend::addCreator(opType, &_temp); \
  208. }
  209. #define REGISTER_CPU_OP_CREATOR_TRANSFORMER(name, opType) \
  210. void ___##name##__##opType##__() { \
  211. static name _temp;\
  212. CPUBackend::addCreator(opType, &_temp); \
  213. }
  214. } // namespace MNN
  215. #endif /* CPUBackend_hpp */