123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369 |
- #include "local_executor.h"
- #include <library/cpp/threading/future/future.h>
- #include <util/generic/utility.h>
- #include <library/cpp/deprecated/atomic/atomic.h>
- #include <util/system/event.h>
- #include <util/system/thread.h>
- #include <util/system/tls.h>
- #include <util/system/yield.h>
- #include <util/thread/lfqueue.h>
- #include <utility>
- #ifdef _win_
- static void RegularYield() {
- }
- #else
- // unix actually has cooperative multitasking! :)
- // without this function program runs slower and system lags for some magic reason
- static void RegularYield() {
- SchedYield();
- }
- #endif
- namespace {
- struct TFunctionWrapper : NPar::ILocallyExecutable {
- NPar::TLocallyExecutableFunction Exec;
- TFunctionWrapper(NPar::TLocallyExecutableFunction exec)
- : Exec(std::move(exec))
- {
- }
- void LocalExec(int id) override {
- Exec(id);
- }
- };
- class TFunctionWrapperWithPromise: public NPar::ILocallyExecutable {
- private:
- NPar::TLocallyExecutableFunction Exec;
- int FirstId, LastId;
- TVector<NThreading::TPromise<void>> Promises;
- public:
- TFunctionWrapperWithPromise(NPar::TLocallyExecutableFunction exec, int firstId, int lastId)
- : Exec(std::move(exec))
- , FirstId(firstId)
- , LastId(lastId)
- {
- Y_ASSERT(FirstId <= LastId);
- const int rangeSize = LastId - FirstId;
- Promises.resize(rangeSize, NThreading::NewPromise());
- for (auto& promise : Promises) {
- promise = NThreading::NewPromise();
- }
- }
- void LocalExec(int id) override {
- Y_ASSERT(FirstId <= id && id < LastId);
- NThreading::NImpl::SetValue(Promises[id - FirstId], [=] { Exec(id); });
- }
- TVector<NThreading::TFuture<void>> GetFutures() const {
- TVector<NThreading::TFuture<void>> out;
- out.reserve(Promises.ysize());
- for (auto& promise : Promises) {
- out.push_back(promise.GetFuture());
- }
- return out;
- }
- };
- struct TSingleJob {
- TIntrusivePtr<NPar::ILocallyExecutable> Exec;
- int Id{0};
- TSingleJob() = default;
- TSingleJob(TIntrusivePtr<NPar::ILocallyExecutable> exec, int id)
- : Exec(std::move(exec))
- , Id(id)
- {
- }
- };
- class TLocalRangeExecutor: public NPar::ILocallyExecutable {
- TIntrusivePtr<NPar::ILocallyExecutable> Exec;
- alignas(64) TAtomic Counter;
- alignas(64) TAtomic WorkerCount;
- int LastId;
- void LocalExec(int) override {
- AtomicAdd(WorkerCount, 1);
- for (;;) {
- if (!DoSingleOp())
- break;
- }
- AtomicAdd(WorkerCount, -1);
- }
- public:
- TLocalRangeExecutor(TIntrusivePtr<ILocallyExecutable> exec, int firstId, int lastId)
- : Exec(std::move(exec))
- , Counter(firstId)
- , WorkerCount(0)
- , LastId(lastId)
- {
- }
- bool DoSingleOp() {
- const int id = AtomicAdd(Counter, 1) - 1;
- if (id >= LastId)
- return false;
- Exec->LocalExec(id);
- RegularYield();
- return true;
- }
- void WaitComplete() {
- while (AtomicGet(WorkerCount) > 0)
- RegularYield();
- }
- int GetRangeSize() const {
- return Max<int>(LastId - Counter, 0);
- }
- };
- }
- //////////////////////////////////////////////////////////////////////////
- class NPar::TLocalExecutor::TImpl {
- public:
- TLockFreeQueue<TSingleJob> JobQueue;
- TLockFreeQueue<TSingleJob> MedJobQueue;
- TLockFreeQueue<TSingleJob> LowJobQueue;
- alignas(64) TSystemEvent HasJob;
- TAtomic ThreadCount{0};
- alignas(64) TAtomic QueueSize{0};
- TAtomic MPQueueSize{0};
- TAtomic LPQueueSize{0};
- TAtomic ThreadId{0};
- Y_THREAD(int)
- CurrentTaskPriority;
- Y_THREAD(int)
- WorkerThreadId;
- static void* HostWorkerThread(void* p);
- bool GetJob(TSingleJob* job);
- void RunNewThread();
- void LaunchRange(TIntrusivePtr<TLocalRangeExecutor> execRange, int queueSizeLimit,
- TAtomic* queueSize, TLockFreeQueue<TSingleJob>* jobQueue);
- TImpl() = default;
- ~TImpl();
- };
- NPar::TLocalExecutor::TImpl::~TImpl() {
- AtomicAdd(QueueSize, 1);
- JobQueue.Enqueue(TSingleJob(nullptr, 0));
- HasJob.Signal();
- while (AtomicGet(ThreadCount)) {
- ThreadYield();
- }
- }
- void* NPar::TLocalExecutor::TImpl::HostWorkerThread(void* p) {
- static const int FAST_ITERATIONS = 200;
- auto* const ctx = (TImpl*)p;
- TThread::SetCurrentThreadName("ParLocalExecutor");
- ctx->WorkerThreadId = AtomicAdd(ctx->ThreadId, 1);
- for (bool cont = true; cont;) {
- TSingleJob job;
- bool gotJob = false;
- for (int iter = 0; iter < FAST_ITERATIONS; ++iter) {
- if (ctx->GetJob(&job)) {
- gotJob = true;
- break;
- }
- }
- if (!gotJob) {
- ctx->HasJob.Reset();
- if (!ctx->GetJob(&job)) {
- ctx->HasJob.Wait();
- continue;
- }
- }
- if (job.Exec.Get()) {
- job.Exec->LocalExec(job.Id);
- RegularYield();
- } else {
- AtomicAdd(ctx->QueueSize, 1);
- ctx->JobQueue.Enqueue(job);
- ctx->HasJob.Signal();
- cont = false;
- }
- }
- AtomicAdd(ctx->ThreadCount, -1);
- return nullptr;
- }
- bool NPar::TLocalExecutor::TImpl::GetJob(TSingleJob* job) {
- if (JobQueue.Dequeue(job)) {
- CurrentTaskPriority = TLocalExecutor::HIGH_PRIORITY;
- AtomicAdd(QueueSize, -1);
- return true;
- } else if (MedJobQueue.Dequeue(job)) {
- CurrentTaskPriority = TLocalExecutor::MED_PRIORITY;
- AtomicAdd(MPQueueSize, -1);
- return true;
- } else if (LowJobQueue.Dequeue(job)) {
- CurrentTaskPriority = TLocalExecutor::LOW_PRIORITY;
- AtomicAdd(LPQueueSize, -1);
- return true;
- }
- return false;
- }
- void NPar::TLocalExecutor::TImpl::RunNewThread() {
- AtomicAdd(ThreadCount, 1);
- TThread thr(HostWorkerThread, this);
- thr.Start();
- thr.Detach();
- }
- void NPar::TLocalExecutor::TImpl::LaunchRange(TIntrusivePtr<TLocalRangeExecutor> rangeExec,
- int queueSizeLimit,
- TAtomic* queueSize,
- TLockFreeQueue<TSingleJob>* jobQueue) {
- int count = Min<int>(ThreadCount + 1, rangeExec->GetRangeSize());
- if (queueSizeLimit >= 0 && AtomicGet(*queueSize) >= queueSizeLimit) {
- return;
- }
- AtomicAdd(*queueSize, count);
- jobQueue->EnqueueAll(TVector<TSingleJob>{size_t(count), TSingleJob(rangeExec, 0)});
- HasJob.Signal();
- }
- NPar::TLocalExecutor::TLocalExecutor()
- : Impl_{MakeHolder<TImpl>()} {
- }
- NPar::TLocalExecutor::~TLocalExecutor() = default;
- void NPar::TLocalExecutor::RunAdditionalThreads(int threadCount) {
- for (int i = 0; i < threadCount; i++)
- Impl_->RunNewThread();
- }
- void NPar::TLocalExecutor::Exec(TIntrusivePtr<ILocallyExecutable> exec, int id, int flags) {
- Y_ASSERT((flags & WAIT_COMPLETE) == 0); // unsupported
- int prior = Max<int>(Impl_->CurrentTaskPriority, flags & PRIORITY_MASK);
- switch (prior) {
- case HIGH_PRIORITY:
- AtomicAdd(Impl_->QueueSize, 1);
- Impl_->JobQueue.Enqueue(TSingleJob(std::move(exec), id));
- break;
- case MED_PRIORITY:
- AtomicAdd(Impl_->MPQueueSize, 1);
- Impl_->MedJobQueue.Enqueue(TSingleJob(std::move(exec), id));
- break;
- case LOW_PRIORITY:
- AtomicAdd(Impl_->LPQueueSize, 1);
- Impl_->LowJobQueue.Enqueue(TSingleJob(std::move(exec), id));
- break;
- default:
- Y_ASSERT(0);
- break;
- }
- Impl_->HasJob.Signal();
- }
- void NPar::ILocalExecutor::Exec(TLocallyExecutableFunction exec, int id, int flags) {
- Exec(new TFunctionWrapper(std::move(exec)), id, flags);
- }
- void NPar::TLocalExecutor::ExecRange(TIntrusivePtr<ILocallyExecutable> exec, int firstId, int lastId, int flags) {
- Y_ASSERT(lastId >= firstId);
- if (TryExecRangeSequentially([=] (int id) { exec->LocalExec(id); }, firstId, lastId, flags)) {
- return;
- }
- auto rangeExec = MakeIntrusive<TLocalRangeExecutor>(std::move(exec), firstId, lastId);
- int queueSizeLimit = (flags & WAIT_COMPLETE) ? 10000 : -1;
- int prior = Max<int>(Impl_->CurrentTaskPriority, flags & PRIORITY_MASK);
- switch (prior) {
- case HIGH_PRIORITY:
- Impl_->LaunchRange(rangeExec, queueSizeLimit, &Impl_->QueueSize, &Impl_->JobQueue);
- break;
- case MED_PRIORITY:
- Impl_->LaunchRange(rangeExec, queueSizeLimit, &Impl_->MPQueueSize, &Impl_->MedJobQueue);
- break;
- case LOW_PRIORITY:
- Impl_->LaunchRange(rangeExec, queueSizeLimit, &Impl_->LPQueueSize, &Impl_->LowJobQueue);
- break;
- default:
- Y_ASSERT(0);
- break;
- }
- if (flags & WAIT_COMPLETE) {
- int keepPrior = Impl_->CurrentTaskPriority;
- Impl_->CurrentTaskPriority = prior;
- while (rangeExec->DoSingleOp()) {
- }
- Impl_->CurrentTaskPriority = keepPrior;
- rangeExec->WaitComplete();
- }
- }
- void NPar::ILocalExecutor::ExecRange(TLocallyExecutableFunction exec, int firstId, int lastId, int flags) {
- if (TryExecRangeSequentially(exec, firstId, lastId, flags)) {
- return;
- }
- ExecRange(new TFunctionWrapper(exec), firstId, lastId, flags);
- }
- void NPar::ILocalExecutor::ExecRangeWithThrow(TLocallyExecutableFunction exec, int firstId, int lastId, int flags) {
- Y_ABORT_UNLESS((flags & WAIT_COMPLETE) != 0, "ExecRangeWithThrow() requires WAIT_COMPLETE to wait if exceptions arise.");
- if (TryExecRangeSequentially(exec, firstId, lastId, flags)) {
- return;
- }
- TVector<NThreading::TFuture<void>> currentRun = ExecRangeWithFutures(exec, firstId, lastId, flags);
- for (auto& result : currentRun) {
- result.GetValueSync(); // Exception will be rethrown if exists. If several exception - only the one with minimal id is rethrown.
- }
- }
- TVector<NThreading::TFuture<void>>
- NPar::ILocalExecutor::ExecRangeWithFutures(TLocallyExecutableFunction exec, int firstId, int lastId, int flags) {
- TFunctionWrapperWithPromise* execWrapper = new TFunctionWrapperWithPromise(exec, firstId, lastId);
- TVector<NThreading::TFuture<void>> out = execWrapper->GetFutures();
- ExecRange(execWrapper, firstId, lastId, flags);
- return out;
- }
- void NPar::TLocalExecutor::ClearLPQueue() {
- for (bool cont = true; cont;) {
- cont = false;
- TSingleJob job;
- while (Impl_->LowJobQueue.Dequeue(&job)) {
- AtomicAdd(Impl_->LPQueueSize, -1);
- cont = true;
- }
- while (Impl_->MedJobQueue.Dequeue(&job)) {
- AtomicAdd(Impl_->MPQueueSize, -1);
- cont = true;
- }
- }
- }
- int NPar::TLocalExecutor::GetQueueSize() const noexcept {
- return AtomicGet(Impl_->QueueSize);
- }
- int NPar::TLocalExecutor::GetMPQueueSize() const noexcept {
- return AtomicGet(Impl_->MPQueueSize);
- }
- int NPar::TLocalExecutor::GetLPQueueSize() const noexcept {
- return AtomicGet(Impl_->LPQueueSize);
- }
- int NPar::TLocalExecutor::GetWorkerThreadId() const noexcept {
- return Impl_->WorkerThreadId;
- }
- int NPar::TLocalExecutor::GetThreadCount() const noexcept {
- return AtomicGet(Impl_->ThreadCount);
- }
- //////////////////////////////////////////////////////////////////////////
|