系统任务【免费下载链接】runtime本项目提供CANN运行时组件和维测功能组件。项目地址: https://gitcode.com/cann/runtime除了可以下发Kernel执行任务外Runtime还提供下发Reduce和随机数生成的内置系统任务的功能。系统任务区别于Kernel任务在于无需用户提供执行代码。 系统任务可以下发到某条Stream异步执行同样遵循同一流上任务保序执行的规则。通过aclrtReduceAsync接口可以下发执行Reduce操作任务调用代码示例如下aclInit(NULL); aclrtSetDevice(0); aclrtStream stream; aclrtCreateStream(stream); // 准备 Host 数据 const int count 4; float hostInput[4] {1.0, 2.0, 3.0, 4.0}; float hostOutput[4] {0, 0, 0, 0}; size_t size count * sizeof(float); // 申请 Device 内存 void *devInput NULL; void *devOutput NULL; aclrtMalloc(devInput, size, ACL_MEM_MALLOC_HUGE_FIRST); aclrtMalloc(devOutput, size, ACL_MEM_MALLOC_HUGE_FIRST); // 拷贝数据到 Device aclrtMemcpy(devInput, size, hostInput, size, ACL_MEMCPY_HOST_TO_DEVICE); aclrtMemcpy(devOutput, size, hostInput, size, ACL_MEMCPY_HOST_TO_DEVICE); // 调用 aclrtReduceAsync aclrtReduceAsync(devOutput, devInput, size, ACL_RT_MEMCPY_SDMA_AUTOMATIC_SUM, // 归约类型 ACL_FLOAT, // 数据类型 stream, NULL); // 同步 stream aclrtSynchronizeStream(stream); // 拷回结果 aclrtMemcpy(hostOutput, size, devOutput, size, ACL_MEMCPY_DEVICE_TO_HOST); for (int i 0; i count; i) { printf(Reduce SUM result[%d] %f\n, i, hostOutput[i]); } /* 预期如下结果 Reduce SUM result[0] 2.000000 Reduce SUM result[1] 4.000000 Reduce SUM result[2] 6.000000 Reduce SUM result[3] 8.000000 */ // 释放资源 aclrtFree(devInput); aclrtFree(devOutput); aclrtDestroyStream(stream); aclrtResetDeviceForce(0); aclFinalize();通过aclrtRandomNumAsync执行随机数生成任务调用代码示例如下aclError NormalFloatAsync( float mean, float stddev, uint64_t seed, uint64_t num, void *counterDevAddr, void *devOutput, aclrtStream stream) { aclrtRandomNumTaskInfo taskInfo {}; taskInfo.dataType ACL_FLOAT; taskInfo.randomNumFuncParaInfo.funcType ACL_RT_RANDOM_NUM_FUNC_TYPE_NORMAL_DIS; taskInfo.randomParaAddr NULL; taskInfo.randomCounterAddr counterDevAddr; taskInfo.randomResultAddr devOutput; memcpy(taskInfo.randomNumFuncParaInfo.paramInfo.normalDisInfo.mean.valueOrAddr, mean, sizeof(float)); taskInfo.randomNumFuncParaInfo.paramInfo.normalDisInfo.mean.size sizeof(float); taskInfo.randomNumFuncParaInfo.paramInfo.normalDisInfo.mean.isAddr 0; memcpy(taskInfo.randomNumFuncParaInfo.paramInfo.normalDisInfo.stddev.valueOrAddr, stddev, sizeof(float)); taskInfo.randomNumFuncParaInfo.paramInfo.normalDisInfo.stddev.size sizeof(float); taskInfo.randomNumFuncParaInfo.paramInfo.normalDisInfo.stddev.isAddr 0; memcpy(taskInfo.randomSeed.valueOrAddr, seed, sizeof(uint64_t)); taskInfo.randomSeed.size sizeof(uint64_t); taskInfo.randomSeed.isAddr 0; memcpy(taskInfo.randomNum.valueOrAddr, num, sizeof(uint64_t)); taskInfo.randomNum.size sizeof(uint64_t); taskInfo.randomNum.isAddr 0; return aclrtRandomNumAsync(taskInfo, stream, NULL); } int main() { aclError ret; // 初始化 ACL ret aclInit(NULL); ret aclrtSetDevice(0); aclrtStream stream; ret aclrtCreateStream(stream); uint64_t num 128; size_t size num * sizeof(uint64_t); // 申请足够大内存 // 申请 Device 内存 void *devOutput NULL; ret aclrtMalloc(devOutput, size, ACL_MEM_MALLOC_NORMAL_ONLY); // 准备 Host 数据 void *hostOutput malloc(size); // 申请存放随机数状态 counter 的device内存 要求 16Byte) void *counterAddr NULL; ret aclrtMalloc((void **)counterAddr, 16, ACL_MEM_MALLOC_NORMAL_ONLY); float mean 3.0; float stddev 2.0; ret NormalFloatAsync(mean, stddev, 0, num, counterAddr, devOutput, stream); // 同步 stream aclrtSynchronizeStream(stream); // 拷回结果 aclrtMemcpy(hostOutput, size, devOutput, size, ACL_MEMCPY_DEVICE_TO_HOST); // 释放资源 free(hostOutput); aclrtFree(devOutput); aclrtFree(counterAddr); aclrtDestroyStream(stream); aclrtResetDeviceForce(0); aclFinalize(); return 0; }【免费下载链接】runtime本项目提供CANN运行时组件和维测功能组件。项目地址: https://gitcode.com/cann/runtime创作声明:本文部分内容由AI辅助生成(AIGC),仅供参考