local_executor.cpp 12 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369
  1. #include "local_executor.h"
  2. #include <library/cpp/threading/future/future.h>
  3. #include <util/generic/utility.h>
  4. #include <library/cpp/deprecated/atomic/atomic.h>
  5. #include <util/system/event.h>
  6. #include <util/system/thread.h>
  7. #include <util/system/tls.h>
  8. #include <util/system/yield.h>
  9. #include <util/thread/lfqueue.h>
  10. #include <utility>
  11. #ifdef _win_
  12. static void RegularYield() {
  13. }
  14. #else
  15. // unix actually has cooperative multitasking! :)
  16. // without this function program runs slower and system lags for some magic reason
  17. static void RegularYield() {
  18. SchedYield();
  19. }
  20. #endif
  21. namespace {
  22. struct TFunctionWrapper : NPar::ILocallyExecutable {
  23. NPar::TLocallyExecutableFunction Exec;
  24. TFunctionWrapper(NPar::TLocallyExecutableFunction exec)
  25. : Exec(std::move(exec))
  26. {
  27. }
  28. void LocalExec(int id) override {
  29. Exec(id);
  30. }
  31. };
  32. class TFunctionWrapperWithPromise: public NPar::ILocallyExecutable {
  33. private:
  34. NPar::TLocallyExecutableFunction Exec;
  35. int FirstId, LastId;
  36. TVector<NThreading::TPromise<void>> Promises;
  37. public:
  38. TFunctionWrapperWithPromise(NPar::TLocallyExecutableFunction exec, int firstId, int lastId)
  39. : Exec(std::move(exec))
  40. , FirstId(firstId)
  41. , LastId(lastId)
  42. {
  43. Y_ASSERT(FirstId <= LastId);
  44. const int rangeSize = LastId - FirstId;
  45. Promises.resize(rangeSize, NThreading::NewPromise());
  46. for (auto& promise : Promises) {
  47. promise = NThreading::NewPromise();
  48. }
  49. }
  50. void LocalExec(int id) override {
  51. Y_ASSERT(FirstId <= id && id < LastId);
  52. NThreading::NImpl::SetValue(Promises[id - FirstId], [=] { Exec(id); });
  53. }
  54. TVector<NThreading::TFuture<void>> GetFutures() const {
  55. TVector<NThreading::TFuture<void>> out;
  56. out.reserve(Promises.ysize());
  57. for (auto& promise : Promises) {
  58. out.push_back(promise.GetFuture());
  59. }
  60. return out;
  61. }
  62. };
  63. struct TSingleJob {
  64. TIntrusivePtr<NPar::ILocallyExecutable> Exec;
  65. int Id{0};
  66. TSingleJob() = default;
  67. TSingleJob(TIntrusivePtr<NPar::ILocallyExecutable> exec, int id)
  68. : Exec(std::move(exec))
  69. , Id(id)
  70. {
  71. }
  72. };
  73. class TLocalRangeExecutor: public NPar::ILocallyExecutable {
  74. TIntrusivePtr<NPar::ILocallyExecutable> Exec;
  75. alignas(64) TAtomic Counter;
  76. alignas(64) TAtomic WorkerCount;
  77. int LastId;
  78. void LocalExec(int) override {
  79. AtomicAdd(WorkerCount, 1);
  80. for (;;) {
  81. if (!DoSingleOp())
  82. break;
  83. }
  84. AtomicAdd(WorkerCount, -1);
  85. }
  86. public:
  87. TLocalRangeExecutor(TIntrusivePtr<ILocallyExecutable> exec, int firstId, int lastId)
  88. : Exec(std::move(exec))
  89. , Counter(firstId)
  90. , WorkerCount(0)
  91. , LastId(lastId)
  92. {
  93. }
  94. bool DoSingleOp() {
  95. const int id = AtomicAdd(Counter, 1) - 1;
  96. if (id >= LastId)
  97. return false;
  98. Exec->LocalExec(id);
  99. RegularYield();
  100. return true;
  101. }
  102. void WaitComplete() {
  103. while (AtomicGet(WorkerCount) > 0)
  104. RegularYield();
  105. }
  106. int GetRangeSize() const {
  107. return Max<int>(LastId - Counter, 0);
  108. }
  109. };
  110. }
  111. //////////////////////////////////////////////////////////////////////////
  112. class NPar::TLocalExecutor::TImpl {
  113. public:
  114. TLockFreeQueue<TSingleJob> JobQueue;
  115. TLockFreeQueue<TSingleJob> MedJobQueue;
  116. TLockFreeQueue<TSingleJob> LowJobQueue;
  117. alignas(64) TSystemEvent HasJob;
  118. TAtomic ThreadCount{0};
  119. alignas(64) TAtomic QueueSize{0};
  120. TAtomic MPQueueSize{0};
  121. TAtomic LPQueueSize{0};
  122. TAtomic ThreadId{0};
  123. Y_THREAD(int)
  124. CurrentTaskPriority;
  125. Y_THREAD(int)
  126. WorkerThreadId;
  127. static void* HostWorkerThread(void* p);
  128. bool GetJob(TSingleJob* job);
  129. void RunNewThread();
  130. void LaunchRange(TIntrusivePtr<TLocalRangeExecutor> execRange, int queueSizeLimit,
  131. TAtomic* queueSize, TLockFreeQueue<TSingleJob>* jobQueue);
  132. TImpl() = default;
  133. ~TImpl();
  134. };
  135. NPar::TLocalExecutor::TImpl::~TImpl() {
  136. AtomicAdd(QueueSize, 1);
  137. JobQueue.Enqueue(TSingleJob(nullptr, 0));
  138. HasJob.Signal();
  139. while (AtomicGet(ThreadCount)) {
  140. ThreadYield();
  141. }
  142. }
  143. void* NPar::TLocalExecutor::TImpl::HostWorkerThread(void* p) {
  144. static const int FAST_ITERATIONS = 200;
  145. auto* const ctx = (TImpl*)p;
  146. TThread::SetCurrentThreadName("ParLocalExecutor");
  147. ctx->WorkerThreadId = AtomicAdd(ctx->ThreadId, 1);
  148. for (bool cont = true; cont;) {
  149. TSingleJob job;
  150. bool gotJob = false;
  151. for (int iter = 0; iter < FAST_ITERATIONS; ++iter) {
  152. if (ctx->GetJob(&job)) {
  153. gotJob = true;
  154. break;
  155. }
  156. }
  157. if (!gotJob) {
  158. ctx->HasJob.Reset();
  159. if (!ctx->GetJob(&job)) {
  160. ctx->HasJob.Wait();
  161. continue;
  162. }
  163. }
  164. if (job.Exec.Get()) {
  165. job.Exec->LocalExec(job.Id);
  166. RegularYield();
  167. } else {
  168. AtomicAdd(ctx->QueueSize, 1);
  169. ctx->JobQueue.Enqueue(job);
  170. ctx->HasJob.Signal();
  171. cont = false;
  172. }
  173. }
  174. AtomicAdd(ctx->ThreadCount, -1);
  175. return nullptr;
  176. }
  177. bool NPar::TLocalExecutor::TImpl::GetJob(TSingleJob* job) {
  178. if (JobQueue.Dequeue(job)) {
  179. CurrentTaskPriority = TLocalExecutor::HIGH_PRIORITY;
  180. AtomicAdd(QueueSize, -1);
  181. return true;
  182. } else if (MedJobQueue.Dequeue(job)) {
  183. CurrentTaskPriority = TLocalExecutor::MED_PRIORITY;
  184. AtomicAdd(MPQueueSize, -1);
  185. return true;
  186. } else if (LowJobQueue.Dequeue(job)) {
  187. CurrentTaskPriority = TLocalExecutor::LOW_PRIORITY;
  188. AtomicAdd(LPQueueSize, -1);
  189. return true;
  190. }
  191. return false;
  192. }
  193. void NPar::TLocalExecutor::TImpl::RunNewThread() {
  194. AtomicAdd(ThreadCount, 1);
  195. TThread thr(HostWorkerThread, this);
  196. thr.Start();
  197. thr.Detach();
  198. }
  199. void NPar::TLocalExecutor::TImpl::LaunchRange(TIntrusivePtr<TLocalRangeExecutor> rangeExec,
  200. int queueSizeLimit,
  201. TAtomic* queueSize,
  202. TLockFreeQueue<TSingleJob>* jobQueue) {
  203. int count = Min<int>(ThreadCount + 1, rangeExec->GetRangeSize());
  204. if (queueSizeLimit >= 0 && AtomicGet(*queueSize) >= queueSizeLimit) {
  205. return;
  206. }
  207. AtomicAdd(*queueSize, count);
  208. jobQueue->EnqueueAll(TVector<TSingleJob>{size_t(count), TSingleJob(rangeExec, 0)});
  209. HasJob.Signal();
  210. }
  211. NPar::TLocalExecutor::TLocalExecutor()
  212. : Impl_{MakeHolder<TImpl>()} {
  213. }
  214. NPar::TLocalExecutor::~TLocalExecutor() = default;
  215. void NPar::TLocalExecutor::RunAdditionalThreads(int threadCount) {
  216. for (int i = 0; i < threadCount; i++)
  217. Impl_->RunNewThread();
  218. }
  219. void NPar::TLocalExecutor::Exec(TIntrusivePtr<ILocallyExecutable> exec, int id, int flags) {
  220. Y_ASSERT((flags & WAIT_COMPLETE) == 0); // unsupported
  221. int prior = Max<int>(Impl_->CurrentTaskPriority, flags & PRIORITY_MASK);
  222. switch (prior) {
  223. case HIGH_PRIORITY:
  224. AtomicAdd(Impl_->QueueSize, 1);
  225. Impl_->JobQueue.Enqueue(TSingleJob(std::move(exec), id));
  226. break;
  227. case MED_PRIORITY:
  228. AtomicAdd(Impl_->MPQueueSize, 1);
  229. Impl_->MedJobQueue.Enqueue(TSingleJob(std::move(exec), id));
  230. break;
  231. case LOW_PRIORITY:
  232. AtomicAdd(Impl_->LPQueueSize, 1);
  233. Impl_->LowJobQueue.Enqueue(TSingleJob(std::move(exec), id));
  234. break;
  235. default:
  236. Y_ASSERT(0);
  237. break;
  238. }
  239. Impl_->HasJob.Signal();
  240. }
  241. void NPar::ILocalExecutor::Exec(TLocallyExecutableFunction exec, int id, int flags) {
  242. Exec(new TFunctionWrapper(std::move(exec)), id, flags);
  243. }
  244. void NPar::TLocalExecutor::ExecRange(TIntrusivePtr<ILocallyExecutable> exec, int firstId, int lastId, int flags) {
  245. Y_ASSERT(lastId >= firstId);
  246. if (TryExecRangeSequentially([=] (int id) { exec->LocalExec(id); }, firstId, lastId, flags)) {
  247. return;
  248. }
  249. auto rangeExec = MakeIntrusive<TLocalRangeExecutor>(std::move(exec), firstId, lastId);
  250. int queueSizeLimit = (flags & WAIT_COMPLETE) ? 10000 : -1;
  251. int prior = Max<int>(Impl_->CurrentTaskPriority, flags & PRIORITY_MASK);
  252. switch (prior) {
  253. case HIGH_PRIORITY:
  254. Impl_->LaunchRange(rangeExec, queueSizeLimit, &Impl_->QueueSize, &Impl_->JobQueue);
  255. break;
  256. case MED_PRIORITY:
  257. Impl_->LaunchRange(rangeExec, queueSizeLimit, &Impl_->MPQueueSize, &Impl_->MedJobQueue);
  258. break;
  259. case LOW_PRIORITY:
  260. Impl_->LaunchRange(rangeExec, queueSizeLimit, &Impl_->LPQueueSize, &Impl_->LowJobQueue);
  261. break;
  262. default:
  263. Y_ASSERT(0);
  264. break;
  265. }
  266. if (flags & WAIT_COMPLETE) {
  267. int keepPrior = Impl_->CurrentTaskPriority;
  268. Impl_->CurrentTaskPriority = prior;
  269. while (rangeExec->DoSingleOp()) {
  270. }
  271. Impl_->CurrentTaskPriority = keepPrior;
  272. rangeExec->WaitComplete();
  273. }
  274. }
  275. void NPar::ILocalExecutor::ExecRange(TLocallyExecutableFunction exec, int firstId, int lastId, int flags) {
  276. if (TryExecRangeSequentially(exec, firstId, lastId, flags)) {
  277. return;
  278. }
  279. ExecRange(new TFunctionWrapper(exec), firstId, lastId, flags);
  280. }
  281. void NPar::ILocalExecutor::ExecRangeWithThrow(TLocallyExecutableFunction exec, int firstId, int lastId, int flags) {
  282. Y_ABORT_UNLESS((flags & WAIT_COMPLETE) != 0, "ExecRangeWithThrow() requires WAIT_COMPLETE to wait if exceptions arise.");
  283. if (TryExecRangeSequentially(exec, firstId, lastId, flags)) {
  284. return;
  285. }
  286. TVector<NThreading::TFuture<void>> currentRun = ExecRangeWithFutures(exec, firstId, lastId, flags);
  287. for (auto& result : currentRun) {
  288. result.GetValueSync(); // Exception will be rethrown if exists. If several exception - only the one with minimal id is rethrown.
  289. }
  290. }
  291. TVector<NThreading::TFuture<void>>
  292. NPar::ILocalExecutor::ExecRangeWithFutures(TLocallyExecutableFunction exec, int firstId, int lastId, int flags) {
  293. TFunctionWrapperWithPromise* execWrapper = new TFunctionWrapperWithPromise(exec, firstId, lastId);
  294. TVector<NThreading::TFuture<void>> out = execWrapper->GetFutures();
  295. ExecRange(execWrapper, firstId, lastId, flags);
  296. return out;
  297. }
  298. void NPar::TLocalExecutor::ClearLPQueue() {
  299. for (bool cont = true; cont;) {
  300. cont = false;
  301. TSingleJob job;
  302. while (Impl_->LowJobQueue.Dequeue(&job)) {
  303. AtomicAdd(Impl_->LPQueueSize, -1);
  304. cont = true;
  305. }
  306. while (Impl_->MedJobQueue.Dequeue(&job)) {
  307. AtomicAdd(Impl_->MPQueueSize, -1);
  308. cont = true;
  309. }
  310. }
  311. }
  312. int NPar::TLocalExecutor::GetQueueSize() const noexcept {
  313. return AtomicGet(Impl_->QueueSize);
  314. }
  315. int NPar::TLocalExecutor::GetMPQueueSize() const noexcept {
  316. return AtomicGet(Impl_->MPQueueSize);
  317. }
  318. int NPar::TLocalExecutor::GetLPQueueSize() const noexcept {
  319. return AtomicGet(Impl_->LPQueueSize);
  320. }
  321. int NPar::TLocalExecutor::GetWorkerThreadId() const noexcept {
  322. return Impl_->WorkerThreadId;
  323. }
  324. int NPar::TLocalExecutor::GetThreadCount() const noexcept {
  325. return AtomicGet(Impl_->ThreadCount);
  326. }
  327. //////////////////////////////////////////////////////////////////////////