tsan_rtl_access.cpp 28 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753
  1. //===-- tsan_rtl_access.cpp -----------------------------------------------===//
  2. //
  3. // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
  4. // See https://llvm.org/LICENSE.txt for license information.
  5. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
  6. //
  7. //===----------------------------------------------------------------------===//
  8. //
  9. // This file is a part of ThreadSanitizer (TSan), a race detector.
  10. //
  11. // Definitions of memory access and function entry/exit entry points.
  12. //===----------------------------------------------------------------------===//
  13. #include "tsan_rtl.h"
  14. namespace __tsan {
  15. ALWAYS_INLINE USED bool TryTraceMemoryAccess(ThreadState* thr, uptr pc,
  16. uptr addr, uptr size,
  17. AccessType typ) {
  18. DCHECK(size == 1 || size == 2 || size == 4 || size == 8);
  19. if (!kCollectHistory)
  20. return true;
  21. EventAccess* ev;
  22. if (UNLIKELY(!TraceAcquire(thr, &ev)))
  23. return false;
  24. u64 size_log = size == 1 ? 0 : size == 2 ? 1 : size == 4 ? 2 : 3;
  25. uptr pc_delta = pc - thr->trace_prev_pc + (1 << (EventAccess::kPCBits - 1));
  26. thr->trace_prev_pc = pc;
  27. if (LIKELY(pc_delta < (1 << EventAccess::kPCBits))) {
  28. ev->is_access = 1;
  29. ev->is_read = !!(typ & kAccessRead);
  30. ev->is_atomic = !!(typ & kAccessAtomic);
  31. ev->size_log = size_log;
  32. ev->pc_delta = pc_delta;
  33. DCHECK_EQ(ev->pc_delta, pc_delta);
  34. ev->addr = CompressAddr(addr);
  35. TraceRelease(thr, ev);
  36. return true;
  37. }
  38. auto* evex = reinterpret_cast<EventAccessExt*>(ev);
  39. evex->is_access = 0;
  40. evex->is_func = 0;
  41. evex->type = EventType::kAccessExt;
  42. evex->is_read = !!(typ & kAccessRead);
  43. evex->is_atomic = !!(typ & kAccessAtomic);
  44. evex->size_log = size_log;
  45. // Note: this is important, see comment in EventAccessExt.
  46. evex->_ = 0;
  47. evex->addr = CompressAddr(addr);
  48. evex->pc = pc;
  49. TraceRelease(thr, evex);
  50. return true;
  51. }
  52. ALWAYS_INLINE
  53. bool TryTraceMemoryAccessRange(ThreadState* thr, uptr pc, uptr addr, uptr size,
  54. AccessType typ) {
  55. if (!kCollectHistory)
  56. return true;
  57. EventAccessRange* ev;
  58. if (UNLIKELY(!TraceAcquire(thr, &ev)))
  59. return false;
  60. thr->trace_prev_pc = pc;
  61. ev->is_access = 0;
  62. ev->is_func = 0;
  63. ev->type = EventType::kAccessRange;
  64. ev->is_read = !!(typ & kAccessRead);
  65. ev->is_free = !!(typ & kAccessFree);
  66. ev->size_lo = size;
  67. ev->pc = CompressAddr(pc);
  68. ev->addr = CompressAddr(addr);
  69. ev->size_hi = size >> EventAccessRange::kSizeLoBits;
  70. TraceRelease(thr, ev);
  71. return true;
  72. }
  73. void TraceMemoryAccessRange(ThreadState* thr, uptr pc, uptr addr, uptr size,
  74. AccessType typ) {
  75. if (LIKELY(TryTraceMemoryAccessRange(thr, pc, addr, size, typ)))
  76. return;
  77. TraceSwitchPart(thr);
  78. UNUSED bool res = TryTraceMemoryAccessRange(thr, pc, addr, size, typ);
  79. DCHECK(res);
  80. }
  81. void TraceFunc(ThreadState* thr, uptr pc) {
  82. if (LIKELY(TryTraceFunc(thr, pc)))
  83. return;
  84. TraceSwitchPart(thr);
  85. UNUSED bool res = TryTraceFunc(thr, pc);
  86. DCHECK(res);
  87. }
  88. NOINLINE void TraceRestartFuncEntry(ThreadState* thr, uptr pc) {
  89. TraceSwitchPart(thr);
  90. FuncEntry(thr, pc);
  91. }
  92. NOINLINE void TraceRestartFuncExit(ThreadState* thr) {
  93. TraceSwitchPart(thr);
  94. FuncExit(thr);
  95. }
  96. void TraceMutexLock(ThreadState* thr, EventType type, uptr pc, uptr addr,
  97. StackID stk) {
  98. DCHECK(type == EventType::kLock || type == EventType::kRLock);
  99. if (!kCollectHistory)
  100. return;
  101. EventLock ev;
  102. ev.is_access = 0;
  103. ev.is_func = 0;
  104. ev.type = type;
  105. ev.pc = CompressAddr(pc);
  106. ev.stack_lo = stk;
  107. ev.stack_hi = stk >> EventLock::kStackIDLoBits;
  108. ev._ = 0;
  109. ev.addr = CompressAddr(addr);
  110. TraceEvent(thr, ev);
  111. }
  112. void TraceMutexUnlock(ThreadState* thr, uptr addr) {
  113. if (!kCollectHistory)
  114. return;
  115. EventUnlock ev;
  116. ev.is_access = 0;
  117. ev.is_func = 0;
  118. ev.type = EventType::kUnlock;
  119. ev._ = 0;
  120. ev.addr = CompressAddr(addr);
  121. TraceEvent(thr, ev);
  122. }
  123. void TraceTime(ThreadState* thr) {
  124. if (!kCollectHistory)
  125. return;
  126. FastState fast_state = thr->fast_state;
  127. EventTime ev;
  128. ev.is_access = 0;
  129. ev.is_func = 0;
  130. ev.type = EventType::kTime;
  131. ev.sid = static_cast<u64>(fast_state.sid());
  132. ev.epoch = static_cast<u64>(fast_state.epoch());
  133. ev._ = 0;
  134. TraceEvent(thr, ev);
  135. }
  136. ALWAYS_INLINE RawShadow LoadShadow(RawShadow* p) {
  137. return static_cast<RawShadow>(
  138. atomic_load((atomic_uint32_t*)p, memory_order_relaxed));
  139. }
  140. ALWAYS_INLINE void StoreShadow(RawShadow* sp, RawShadow s) {
  141. atomic_store((atomic_uint32_t*)sp, static_cast<u32>(s), memory_order_relaxed);
  142. }
  143. NOINLINE void DoReportRace(ThreadState* thr, RawShadow* shadow_mem, Shadow cur,
  144. Shadow old,
  145. AccessType typ) SANITIZER_NO_THREAD_SAFETY_ANALYSIS {
  146. // For the free shadow markers the first element (that contains kFreeSid)
  147. // triggers the race, but the second element contains info about the freeing
  148. // thread, take it.
  149. if (old.sid() == kFreeSid)
  150. old = Shadow(LoadShadow(&shadow_mem[1]));
  151. // This prevents trapping on this address in future.
  152. for (uptr i = 0; i < kShadowCnt; i++)
  153. StoreShadow(&shadow_mem[i], i == 0 ? Shadow::kRodata : Shadow::kEmpty);
  154. // See the comment in MemoryRangeFreed as to why the slot is locked
  155. // for free memory accesses. ReportRace must not be called with
  156. // the slot locked because of the fork. But MemoryRangeFreed is not
  157. // called during fork because fork sets ignore_reads_and_writes,
  158. // so simply unlocking the slot should be fine.
  159. if (typ & kAccessFree)
  160. SlotUnlock(thr);
  161. ReportRace(thr, shadow_mem, cur, Shadow(old), typ);
  162. if (typ & kAccessFree)
  163. SlotLock(thr);
  164. }
  165. #if !TSAN_VECTORIZE
  166. ALWAYS_INLINE
  167. bool ContainsSameAccess(RawShadow* s, Shadow cur, int unused0, int unused1,
  168. AccessType typ) {
  169. for (uptr i = 0; i < kShadowCnt; i++) {
  170. auto old = LoadShadow(&s[i]);
  171. if (!(typ & kAccessRead)) {
  172. if (old == cur.raw())
  173. return true;
  174. continue;
  175. }
  176. auto masked = static_cast<RawShadow>(static_cast<u32>(old) |
  177. static_cast<u32>(Shadow::kRodata));
  178. if (masked == cur.raw())
  179. return true;
  180. if (!(typ & kAccessNoRodata) && !SANITIZER_GO) {
  181. if (old == Shadow::kRodata)
  182. return true;
  183. }
  184. }
  185. return false;
  186. }
  187. ALWAYS_INLINE
  188. bool CheckRaces(ThreadState* thr, RawShadow* shadow_mem, Shadow cur,
  189. int unused0, int unused1, AccessType typ) {
  190. bool stored = false;
  191. for (uptr idx = 0; idx < kShadowCnt; idx++) {
  192. RawShadow* sp = &shadow_mem[idx];
  193. Shadow old(LoadShadow(sp));
  194. if (LIKELY(old.raw() == Shadow::kEmpty)) {
  195. if (!(typ & kAccessCheckOnly) && !stored)
  196. StoreShadow(sp, cur.raw());
  197. return false;
  198. }
  199. if (LIKELY(!(cur.access() & old.access())))
  200. continue;
  201. if (LIKELY(cur.sid() == old.sid())) {
  202. if (!(typ & kAccessCheckOnly) &&
  203. LIKELY(cur.access() == old.access() && old.IsRWWeakerOrEqual(typ))) {
  204. StoreShadow(sp, cur.raw());
  205. stored = true;
  206. }
  207. continue;
  208. }
  209. if (LIKELY(old.IsBothReadsOrAtomic(typ)))
  210. continue;
  211. if (LIKELY(thr->clock.Get(old.sid()) >= old.epoch()))
  212. continue;
  213. DoReportRace(thr, shadow_mem, cur, old, typ);
  214. return true;
  215. }
  216. // We did not find any races and had already stored
  217. // the current access info, so we are done.
  218. if (LIKELY(stored))
  219. return false;
  220. // Choose a random candidate slot and replace it.
  221. uptr index =
  222. atomic_load_relaxed(&thr->trace_pos) / sizeof(Event) % kShadowCnt;
  223. StoreShadow(&shadow_mem[index], cur.raw());
  224. return false;
  225. }
  226. # define LOAD_CURRENT_SHADOW(cur, shadow_mem) UNUSED int access = 0, shadow = 0
  227. #else /* !TSAN_VECTORIZE */
  228. ALWAYS_INLINE
  229. bool ContainsSameAccess(RawShadow* unused0, Shadow unused1, m128 shadow,
  230. m128 access, AccessType typ) {
  231. // Note: we could check if there is a larger access of the same type,
  232. // e.g. we just allocated/memset-ed a block (so it contains 8 byte writes)
  233. // and now do smaller reads/writes, these can also be considered as "same
  234. // access". However, it will make the check more expensive, so it's unclear
  235. // if it's worth it. But this would conserve trace space, so it's useful
  236. // besides potential speed up.
  237. if (!(typ & kAccessRead)) {
  238. const m128 same = _mm_cmpeq_epi32(shadow, access);
  239. return _mm_movemask_epi8(same);
  240. }
  241. // For reads we need to reset read bit in the shadow,
  242. // because we need to match read with both reads and writes.
  243. // Shadow::kRodata has only read bit set, so it does what we want.
  244. // We also abuse it for rodata check to save few cycles
  245. // since we already loaded Shadow::kRodata into a register.
  246. // Reads from rodata can't race.
  247. // Measurements show that they can be 10-20% of all memory accesses.
  248. // Shadow::kRodata has epoch 0 which cannot appear in shadow normally
  249. // (thread epochs start from 1). So the same read bit mask
  250. // serves as rodata indicator.
  251. const m128 read_mask = _mm_set1_epi32(static_cast<u32>(Shadow::kRodata));
  252. const m128 masked_shadow = _mm_or_si128(shadow, read_mask);
  253. m128 same = _mm_cmpeq_epi32(masked_shadow, access);
  254. // Range memory accesses check Shadow::kRodata before calling this,
  255. // Shadow::kRodatas is not possible for free memory access
  256. // and Go does not use Shadow::kRodata.
  257. if (!(typ & kAccessNoRodata) && !SANITIZER_GO) {
  258. const m128 ro = _mm_cmpeq_epi32(shadow, read_mask);
  259. same = _mm_or_si128(ro, same);
  260. }
  261. return _mm_movemask_epi8(same);
  262. }
  263. NOINLINE void DoReportRaceV(ThreadState* thr, RawShadow* shadow_mem, Shadow cur,
  264. u32 race_mask, m128 shadow, AccessType typ) {
  265. // race_mask points which of the shadow elements raced with the current
  266. // access. Extract that element.
  267. CHECK_NE(race_mask, 0);
  268. u32 old;
  269. // Note: _mm_extract_epi32 index must be a constant value.
  270. switch (__builtin_ffs(race_mask) / 4) {
  271. case 0:
  272. old = _mm_extract_epi32(shadow, 0);
  273. break;
  274. case 1:
  275. old = _mm_extract_epi32(shadow, 1);
  276. break;
  277. case 2:
  278. old = _mm_extract_epi32(shadow, 2);
  279. break;
  280. case 3:
  281. old = _mm_extract_epi32(shadow, 3);
  282. break;
  283. }
  284. Shadow prev(static_cast<RawShadow>(old));
  285. // For the free shadow markers the first element (that contains kFreeSid)
  286. // triggers the race, but the second element contains info about the freeing
  287. // thread, take it.
  288. if (prev.sid() == kFreeSid)
  289. prev = Shadow(static_cast<RawShadow>(_mm_extract_epi32(shadow, 1)));
  290. DoReportRace(thr, shadow_mem, cur, prev, typ);
  291. }
  292. ALWAYS_INLINE
  293. bool CheckRaces(ThreadState* thr, RawShadow* shadow_mem, Shadow cur,
  294. m128 shadow, m128 access, AccessType typ) {
  295. // Note: empty/zero slots don't intersect with any access.
  296. const m128 zero = _mm_setzero_si128();
  297. const m128 mask_access = _mm_set1_epi32(0x000000ff);
  298. const m128 mask_sid = _mm_set1_epi32(0x0000ff00);
  299. const m128 mask_read_atomic = _mm_set1_epi32(0xc0000000);
  300. const m128 access_and = _mm_and_si128(access, shadow);
  301. const m128 access_xor = _mm_xor_si128(access, shadow);
  302. const m128 intersect = _mm_and_si128(access_and, mask_access);
  303. const m128 not_intersect = _mm_cmpeq_epi32(intersect, zero);
  304. const m128 not_same_sid = _mm_and_si128(access_xor, mask_sid);
  305. const m128 same_sid = _mm_cmpeq_epi32(not_same_sid, zero);
  306. const m128 both_read_or_atomic = _mm_and_si128(access_and, mask_read_atomic);
  307. const m128 no_race =
  308. _mm_or_si128(_mm_or_si128(not_intersect, same_sid), both_read_or_atomic);
  309. const int race_mask = _mm_movemask_epi8(_mm_cmpeq_epi32(no_race, zero));
  310. if (UNLIKELY(race_mask))
  311. goto SHARED;
  312. STORE : {
  313. if (typ & kAccessCheckOnly)
  314. return false;
  315. // We could also replace different sid's if access is the same,
  316. // rw weaker and happens before. However, just checking access below
  317. // is not enough because we also need to check that !both_read_or_atomic
  318. // (reads from different sids can be concurrent).
  319. // Theoretically we could replace smaller accesses with larger accesses,
  320. // but it's unclear if it's worth doing.
  321. const m128 mask_access_sid = _mm_set1_epi32(0x0000ffff);
  322. const m128 not_same_sid_access = _mm_and_si128(access_xor, mask_access_sid);
  323. const m128 same_sid_access = _mm_cmpeq_epi32(not_same_sid_access, zero);
  324. const m128 access_read_atomic =
  325. _mm_set1_epi32((typ & (kAccessRead | kAccessAtomic)) << 30);
  326. const m128 rw_weaker =
  327. _mm_cmpeq_epi32(_mm_max_epu32(shadow, access_read_atomic), shadow);
  328. const m128 rewrite = _mm_and_si128(same_sid_access, rw_weaker);
  329. const int rewrite_mask = _mm_movemask_epi8(rewrite);
  330. int index = __builtin_ffs(rewrite_mask);
  331. if (UNLIKELY(index == 0)) {
  332. const m128 empty = _mm_cmpeq_epi32(shadow, zero);
  333. const int empty_mask = _mm_movemask_epi8(empty);
  334. index = __builtin_ffs(empty_mask);
  335. if (UNLIKELY(index == 0))
  336. index = (atomic_load_relaxed(&thr->trace_pos) / 2) % 16;
  337. }
  338. StoreShadow(&shadow_mem[index / 4], cur.raw());
  339. // We could zero other slots determined by rewrite_mask.
  340. // That would help other threads to evict better slots,
  341. // but it's unclear if it's worth it.
  342. return false;
  343. }
  344. SHARED:
  345. m128 thread_epochs = _mm_set1_epi32(0x7fffffff);
  346. // Need to unwind this because _mm_extract_epi8/_mm_insert_epi32
  347. // indexes must be constants.
  348. # define LOAD_EPOCH(idx) \
  349. if (LIKELY(race_mask & (1 << (idx * 4)))) { \
  350. u8 sid = _mm_extract_epi8(shadow, idx * 4 + 1); \
  351. u16 epoch = static_cast<u16>(thr->clock.Get(static_cast<Sid>(sid))); \
  352. thread_epochs = _mm_insert_epi32(thread_epochs, u32(epoch) << 16, idx); \
  353. }
  354. LOAD_EPOCH(0);
  355. LOAD_EPOCH(1);
  356. LOAD_EPOCH(2);
  357. LOAD_EPOCH(3);
  358. # undef LOAD_EPOCH
  359. const m128 mask_epoch = _mm_set1_epi32(0x3fff0000);
  360. const m128 shadow_epochs = _mm_and_si128(shadow, mask_epoch);
  361. const m128 concurrent = _mm_cmplt_epi32(thread_epochs, shadow_epochs);
  362. const int concurrent_mask = _mm_movemask_epi8(concurrent);
  363. if (LIKELY(concurrent_mask == 0))
  364. goto STORE;
  365. DoReportRaceV(thr, shadow_mem, cur, concurrent_mask, shadow, typ);
  366. return true;
  367. }
  368. # define LOAD_CURRENT_SHADOW(cur, shadow_mem) \
  369. const m128 access = _mm_set1_epi32(static_cast<u32>((cur).raw())); \
  370. const m128 shadow = _mm_load_si128(reinterpret_cast<m128*>(shadow_mem))
  371. #endif
  372. char* DumpShadow(char* buf, RawShadow raw) {
  373. if (raw == Shadow::kEmpty) {
  374. internal_snprintf(buf, 64, "0");
  375. return buf;
  376. }
  377. Shadow s(raw);
  378. AccessType typ;
  379. s.GetAccess(nullptr, nullptr, &typ);
  380. internal_snprintf(buf, 64, "{tid=%u@%u access=0x%x typ=%x}",
  381. static_cast<u32>(s.sid()), static_cast<u32>(s.epoch()),
  382. s.access(), static_cast<u32>(typ));
  383. return buf;
  384. }
  385. // TryTrace* and TraceRestart* functions allow to turn memory access and func
  386. // entry/exit callbacks into leaf functions with all associated performance
  387. // benefits. These hottest callbacks do only 2 slow path calls: report a race
  388. // and trace part switching. Race reporting is easy to turn into a tail call, we
  389. // just always return from the runtime after reporting a race. But trace part
  390. // switching is harder because it needs to be in the middle of callbacks. To
  391. // turn it into a tail call we immidiately return after TraceRestart* functions,
  392. // but TraceRestart* functions themselves recurse into the callback after
  393. // switching trace part. As the result the hottest callbacks contain only tail
  394. // calls, which effectively makes them leaf functions (can use all registers,
  395. // no frame setup, etc).
  396. NOINLINE void TraceRestartMemoryAccess(ThreadState* thr, uptr pc, uptr addr,
  397. uptr size, AccessType typ) {
  398. TraceSwitchPart(thr);
  399. MemoryAccess(thr, pc, addr, size, typ);
  400. }
  401. ALWAYS_INLINE USED void MemoryAccess(ThreadState* thr, uptr pc, uptr addr,
  402. uptr size, AccessType typ) {
  403. RawShadow* shadow_mem = MemToShadow(addr);
  404. UNUSED char memBuf[4][64];
  405. DPrintf2("#%d: Access: %d@%d %p/%zd typ=0x%x {%s, %s, %s, %s}\n", thr->tid,
  406. static_cast<int>(thr->fast_state.sid()),
  407. static_cast<int>(thr->fast_state.epoch()), (void*)addr, size,
  408. static_cast<int>(typ), DumpShadow(memBuf[0], shadow_mem[0]),
  409. DumpShadow(memBuf[1], shadow_mem[1]),
  410. DumpShadow(memBuf[2], shadow_mem[2]),
  411. DumpShadow(memBuf[3], shadow_mem[3]));
  412. FastState fast_state = thr->fast_state;
  413. Shadow cur(fast_state, addr, size, typ);
  414. LOAD_CURRENT_SHADOW(cur, shadow_mem);
  415. if (LIKELY(ContainsSameAccess(shadow_mem, cur, shadow, access, typ)))
  416. return;
  417. if (UNLIKELY(fast_state.GetIgnoreBit()))
  418. return;
  419. if (!TryTraceMemoryAccess(thr, pc, addr, size, typ))
  420. return TraceRestartMemoryAccess(thr, pc, addr, size, typ);
  421. CheckRaces(thr, shadow_mem, cur, shadow, access, typ);
  422. }
  423. void MemoryAccess16(ThreadState* thr, uptr pc, uptr addr, AccessType typ);
  424. NOINLINE
  425. void RestartMemoryAccess16(ThreadState* thr, uptr pc, uptr addr,
  426. AccessType typ) {
  427. TraceSwitchPart(thr);
  428. MemoryAccess16(thr, pc, addr, typ);
  429. }
  430. ALWAYS_INLINE USED void MemoryAccess16(ThreadState* thr, uptr pc, uptr addr,
  431. AccessType typ) {
  432. const uptr size = 16;
  433. FastState fast_state = thr->fast_state;
  434. if (UNLIKELY(fast_state.GetIgnoreBit()))
  435. return;
  436. Shadow cur(fast_state, 0, 8, typ);
  437. RawShadow* shadow_mem = MemToShadow(addr);
  438. bool traced = false;
  439. {
  440. LOAD_CURRENT_SHADOW(cur, shadow_mem);
  441. if (LIKELY(ContainsSameAccess(shadow_mem, cur, shadow, access, typ)))
  442. goto SECOND;
  443. if (!TryTraceMemoryAccessRange(thr, pc, addr, size, typ))
  444. return RestartMemoryAccess16(thr, pc, addr, typ);
  445. traced = true;
  446. if (UNLIKELY(CheckRaces(thr, shadow_mem, cur, shadow, access, typ)))
  447. return;
  448. }
  449. SECOND:
  450. shadow_mem += kShadowCnt;
  451. LOAD_CURRENT_SHADOW(cur, shadow_mem);
  452. if (LIKELY(ContainsSameAccess(shadow_mem, cur, shadow, access, typ)))
  453. return;
  454. if (!traced && !TryTraceMemoryAccessRange(thr, pc, addr, size, typ))
  455. return RestartMemoryAccess16(thr, pc, addr, typ);
  456. CheckRaces(thr, shadow_mem, cur, shadow, access, typ);
  457. }
  458. NOINLINE
  459. void RestartUnalignedMemoryAccess(ThreadState* thr, uptr pc, uptr addr,
  460. uptr size, AccessType typ) {
  461. TraceSwitchPart(thr);
  462. UnalignedMemoryAccess(thr, pc, addr, size, typ);
  463. }
  464. ALWAYS_INLINE USED void UnalignedMemoryAccess(ThreadState* thr, uptr pc,
  465. uptr addr, uptr size,
  466. AccessType typ) {
  467. DCHECK_LE(size, 8);
  468. FastState fast_state = thr->fast_state;
  469. if (UNLIKELY(fast_state.GetIgnoreBit()))
  470. return;
  471. RawShadow* shadow_mem = MemToShadow(addr);
  472. bool traced = false;
  473. uptr size1 = Min<uptr>(size, RoundUp(addr + 1, kShadowCell) - addr);
  474. {
  475. Shadow cur(fast_state, addr, size1, typ);
  476. LOAD_CURRENT_SHADOW(cur, shadow_mem);
  477. if (LIKELY(ContainsSameAccess(shadow_mem, cur, shadow, access, typ)))
  478. goto SECOND;
  479. if (!TryTraceMemoryAccessRange(thr, pc, addr, size, typ))
  480. return RestartUnalignedMemoryAccess(thr, pc, addr, size, typ);
  481. traced = true;
  482. if (UNLIKELY(CheckRaces(thr, shadow_mem, cur, shadow, access, typ)))
  483. return;
  484. }
  485. SECOND:
  486. uptr size2 = size - size1;
  487. if (LIKELY(size2 == 0))
  488. return;
  489. shadow_mem += kShadowCnt;
  490. Shadow cur(fast_state, 0, size2, typ);
  491. LOAD_CURRENT_SHADOW(cur, shadow_mem);
  492. if (LIKELY(ContainsSameAccess(shadow_mem, cur, shadow, access, typ)))
  493. return;
  494. if (!traced && !TryTraceMemoryAccessRange(thr, pc, addr, size, typ))
  495. return RestartUnalignedMemoryAccess(thr, pc, addr, size, typ);
  496. CheckRaces(thr, shadow_mem, cur, shadow, access, typ);
  497. }
  498. void ShadowSet(RawShadow* p, RawShadow* end, RawShadow v) {
  499. DCHECK_LE(p, end);
  500. DCHECK(IsShadowMem(p));
  501. DCHECK(IsShadowMem(end));
  502. UNUSED const uptr kAlign = kShadowCnt * kShadowSize;
  503. DCHECK_EQ(reinterpret_cast<uptr>(p) % kAlign, 0);
  504. DCHECK_EQ(reinterpret_cast<uptr>(end) % kAlign, 0);
  505. #if !TSAN_VECTORIZE
  506. for (; p < end; p += kShadowCnt) {
  507. p[0] = v;
  508. for (uptr i = 1; i < kShadowCnt; i++) p[i] = Shadow::kEmpty;
  509. }
  510. #else
  511. m128 vv = _mm_setr_epi32(
  512. static_cast<u32>(v), static_cast<u32>(Shadow::kEmpty),
  513. static_cast<u32>(Shadow::kEmpty), static_cast<u32>(Shadow::kEmpty));
  514. m128* vp = reinterpret_cast<m128*>(p);
  515. m128* vend = reinterpret_cast<m128*>(end);
  516. for (; vp < vend; vp++) _mm_store_si128(vp, vv);
  517. #endif
  518. }
  519. static void MemoryRangeSet(uptr addr, uptr size, RawShadow val) {
  520. if (size == 0)
  521. return;
  522. DCHECK_EQ(addr % kShadowCell, 0);
  523. DCHECK_EQ(size % kShadowCell, 0);
  524. // If a user passes some insane arguments (memset(0)),
  525. // let it just crash as usual.
  526. if (!IsAppMem(addr) || !IsAppMem(addr + size - 1))
  527. return;
  528. RawShadow* begin = MemToShadow(addr);
  529. RawShadow* end = begin + size / kShadowCell * kShadowCnt;
  530. // Don't want to touch lots of shadow memory.
  531. // If a program maps 10MB stack, there is no need reset the whole range.
  532. // UnmapOrDie/MmapFixedNoReserve does not work on Windows.
  533. if (SANITIZER_WINDOWS ||
  534. size <= common_flags()->clear_shadow_mmap_threshold) {
  535. ShadowSet(begin, end, val);
  536. return;
  537. }
  538. // The region is big, reset only beginning and end.
  539. const uptr kPageSize = GetPageSizeCached();
  540. // Set at least first kPageSize/2 to page boundary.
  541. RawShadow* mid1 =
  542. Min(end, reinterpret_cast<RawShadow*>(RoundUp(
  543. reinterpret_cast<uptr>(begin) + kPageSize / 2, kPageSize)));
  544. ShadowSet(begin, mid1, val);
  545. // Reset middle part.
  546. RawShadow* mid2 = RoundDown(end, kPageSize);
  547. if (mid2 > mid1) {
  548. if (!MmapFixedSuperNoReserve((uptr)mid1, (uptr)mid2 - (uptr)mid1))
  549. Die();
  550. }
  551. // Set the ending.
  552. ShadowSet(mid2, end, val);
  553. }
  554. void MemoryResetRange(ThreadState* thr, uptr pc, uptr addr, uptr size) {
  555. uptr addr1 = RoundDown(addr, kShadowCell);
  556. uptr size1 = RoundUp(size + addr - addr1, kShadowCell);
  557. MemoryRangeSet(addr1, size1, Shadow::kEmpty);
  558. }
  559. void MemoryRangeFreed(ThreadState* thr, uptr pc, uptr addr, uptr size) {
  560. // Callers must lock the slot to ensure synchronization with the reset.
  561. // The problem with "freed" memory is that it's not "monotonic"
  562. // with respect to bug detection: freed memory is bad to access,
  563. // but then if the heap block is reallocated later, it's good to access.
  564. // As the result a garbage "freed" shadow can lead to a false positive
  565. // if it happens to match a real free in the thread trace,
  566. // but the heap block was reallocated before the current memory access,
  567. // so it's still good to access. It's not the case with data races.
  568. DCHECK(thr->slot_locked);
  569. DCHECK_EQ(addr % kShadowCell, 0);
  570. size = RoundUp(size, kShadowCell);
  571. // Processing more than 1k (2k of shadow) is expensive,
  572. // can cause excessive memory consumption (user does not necessary touch
  573. // the whole range) and most likely unnecessary.
  574. size = Min<uptr>(size, 1024);
  575. const AccessType typ =
  576. kAccessWrite | kAccessFree | kAccessCheckOnly | kAccessNoRodata;
  577. TraceMemoryAccessRange(thr, pc, addr, size, typ);
  578. RawShadow* shadow_mem = MemToShadow(addr);
  579. Shadow cur(thr->fast_state, 0, kShadowCell, typ);
  580. #if TSAN_VECTORIZE
  581. const m128 access = _mm_set1_epi32(static_cast<u32>(cur.raw()));
  582. const m128 freed = _mm_setr_epi32(
  583. static_cast<u32>(Shadow::FreedMarker()),
  584. static_cast<u32>(Shadow::FreedInfo(cur.sid(), cur.epoch())), 0, 0);
  585. for (; size; size -= kShadowCell, shadow_mem += kShadowCnt) {
  586. const m128 shadow = _mm_load_si128((m128*)shadow_mem);
  587. if (UNLIKELY(CheckRaces(thr, shadow_mem, cur, shadow, access, typ)))
  588. return;
  589. _mm_store_si128((m128*)shadow_mem, freed);
  590. }
  591. #else
  592. for (; size; size -= kShadowCell, shadow_mem += kShadowCnt) {
  593. if (UNLIKELY(CheckRaces(thr, shadow_mem, cur, 0, 0, typ)))
  594. return;
  595. StoreShadow(&shadow_mem[0], Shadow::FreedMarker());
  596. StoreShadow(&shadow_mem[1], Shadow::FreedInfo(cur.sid(), cur.epoch()));
  597. StoreShadow(&shadow_mem[2], Shadow::kEmpty);
  598. StoreShadow(&shadow_mem[3], Shadow::kEmpty);
  599. }
  600. #endif
  601. }
  602. void MemoryRangeImitateWrite(ThreadState* thr, uptr pc, uptr addr, uptr size) {
  603. DCHECK_EQ(addr % kShadowCell, 0);
  604. size = RoundUp(size, kShadowCell);
  605. TraceMemoryAccessRange(thr, pc, addr, size, kAccessWrite);
  606. Shadow cur(thr->fast_state, 0, 8, kAccessWrite);
  607. MemoryRangeSet(addr, size, cur.raw());
  608. }
  609. void MemoryRangeImitateWriteOrResetRange(ThreadState* thr, uptr pc, uptr addr,
  610. uptr size) {
  611. if (thr->ignore_reads_and_writes == 0)
  612. MemoryRangeImitateWrite(thr, pc, addr, size);
  613. else
  614. MemoryResetRange(thr, pc, addr, size);
  615. }
  616. ALWAYS_INLINE
  617. bool MemoryAccessRangeOne(ThreadState* thr, RawShadow* shadow_mem, Shadow cur,
  618. AccessType typ) {
  619. LOAD_CURRENT_SHADOW(cur, shadow_mem);
  620. if (LIKELY(ContainsSameAccess(shadow_mem, cur, shadow, access, typ)))
  621. return false;
  622. return CheckRaces(thr, shadow_mem, cur, shadow, access, typ);
  623. }
  624. template <bool is_read>
  625. NOINLINE void RestartMemoryAccessRange(ThreadState* thr, uptr pc, uptr addr,
  626. uptr size) {
  627. TraceSwitchPart(thr);
  628. MemoryAccessRangeT<is_read>(thr, pc, addr, size);
  629. }
  630. template <bool is_read>
  631. void MemoryAccessRangeT(ThreadState* thr, uptr pc, uptr addr, uptr size) {
  632. const AccessType typ =
  633. (is_read ? kAccessRead : kAccessWrite) | kAccessNoRodata;
  634. RawShadow* shadow_mem = MemToShadow(addr);
  635. DPrintf2("#%d: MemoryAccessRange: @%p %p size=%d is_read=%d\n", thr->tid,
  636. (void*)pc, (void*)addr, (int)size, is_read);
  637. #if SANITIZER_DEBUG
  638. if (!IsAppMem(addr)) {
  639. Printf("Access to non app mem %zx\n", addr);
  640. DCHECK(IsAppMem(addr));
  641. }
  642. if (!IsAppMem(addr + size - 1)) {
  643. Printf("Access to non app mem %zx\n", addr + size - 1);
  644. DCHECK(IsAppMem(addr + size - 1));
  645. }
  646. if (!IsShadowMem(shadow_mem)) {
  647. Printf("Bad shadow addr %p (%zx)\n", static_cast<void*>(shadow_mem), addr);
  648. DCHECK(IsShadowMem(shadow_mem));
  649. }
  650. if (!IsShadowMem(shadow_mem + size * kShadowCnt - 1)) {
  651. Printf("Bad shadow addr %p (%zx)\n",
  652. static_cast<void*>(shadow_mem + size * kShadowCnt - 1),
  653. addr + size - 1);
  654. DCHECK(IsShadowMem(shadow_mem + size * kShadowCnt - 1));
  655. }
  656. #endif
  657. // Access to .rodata section, no races here.
  658. // Measurements show that it can be 10-20% of all memory accesses.
  659. // Check here once to not check for every access separately.
  660. // Note: we could (and should) do this only for the is_read case
  661. // (writes shouldn't go to .rodata). But it happens in Chromium tests:
  662. // https://bugs.chromium.org/p/chromium/issues/detail?id=1275581#c19
  663. // Details are unknown since it happens only on CI machines.
  664. if (*shadow_mem == Shadow::kRodata)
  665. return;
  666. FastState fast_state = thr->fast_state;
  667. if (UNLIKELY(fast_state.GetIgnoreBit()))
  668. return;
  669. if (!TryTraceMemoryAccessRange(thr, pc, addr, size, typ))
  670. return RestartMemoryAccessRange<is_read>(thr, pc, addr, size);
  671. if (UNLIKELY(addr % kShadowCell)) {
  672. // Handle unaligned beginning, if any.
  673. uptr size1 = Min(size, RoundUp(addr, kShadowCell) - addr);
  674. size -= size1;
  675. Shadow cur(fast_state, addr, size1, typ);
  676. if (UNLIKELY(MemoryAccessRangeOne(thr, shadow_mem, cur, typ)))
  677. return;
  678. shadow_mem += kShadowCnt;
  679. }
  680. // Handle middle part, if any.
  681. Shadow cur(fast_state, 0, kShadowCell, typ);
  682. for (; size >= kShadowCell; size -= kShadowCell, shadow_mem += kShadowCnt) {
  683. if (UNLIKELY(MemoryAccessRangeOne(thr, shadow_mem, cur, typ)))
  684. return;
  685. }
  686. // Handle ending, if any.
  687. if (UNLIKELY(size)) {
  688. Shadow cur(fast_state, 0, size, typ);
  689. if (UNLIKELY(MemoryAccessRangeOne(thr, shadow_mem, cur, typ)))
  690. return;
  691. }
  692. }
  693. template void MemoryAccessRangeT<true>(ThreadState* thr, uptr pc, uptr addr,
  694. uptr size);
  695. template void MemoryAccessRangeT<false>(ThreadState* thr, uptr pc, uptr addr,
  696. uptr size);
  697. } // namespace __tsan
  698. #if !SANITIZER_GO
  699. // Must be included in this file to make sure everything is inlined.
  700. # include "tsan_interface.inc"
  701. #endif