tsan_rtl_access.cpp 28 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744
  1. //===-- tsan_rtl_access.cpp -----------------------------------------------===//
  2. //
  3. // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
  4. // See https://llvm.org/LICENSE.txt for license information.
  5. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
  6. //
  7. //===----------------------------------------------------------------------===//
  8. //
  9. // This file is a part of ThreadSanitizer (TSan), a race detector.
  10. //
  11. // Definitions of memory access and function entry/exit entry points.
  12. //===----------------------------------------------------------------------===//
  13. #include "tsan_rtl.h"
  14. namespace __tsan {
  15. ALWAYS_INLINE USED bool TryTraceMemoryAccess(ThreadState* thr, uptr pc,
  16. uptr addr, uptr size,
  17. AccessType typ) {
  18. DCHECK(size == 1 || size == 2 || size == 4 || size == 8);
  19. if (!kCollectHistory)
  20. return true;
  21. EventAccess* ev;
  22. if (UNLIKELY(!TraceAcquire(thr, &ev)))
  23. return false;
  24. u64 size_log = size == 1 ? 0 : size == 2 ? 1 : size == 4 ? 2 : 3;
  25. uptr pc_delta = pc - thr->trace_prev_pc + (1 << (EventAccess::kPCBits - 1));
  26. thr->trace_prev_pc = pc;
  27. if (LIKELY(pc_delta < (1 << EventAccess::kPCBits))) {
  28. ev->is_access = 1;
  29. ev->is_read = !!(typ & kAccessRead);
  30. ev->is_atomic = !!(typ & kAccessAtomic);
  31. ev->size_log = size_log;
  32. ev->pc_delta = pc_delta;
  33. DCHECK_EQ(ev->pc_delta, pc_delta);
  34. ev->addr = CompressAddr(addr);
  35. TraceRelease(thr, ev);
  36. return true;
  37. }
  38. auto* evex = reinterpret_cast<EventAccessExt*>(ev);
  39. evex->is_access = 0;
  40. evex->is_func = 0;
  41. evex->type = EventType::kAccessExt;
  42. evex->is_read = !!(typ & kAccessRead);
  43. evex->is_atomic = !!(typ & kAccessAtomic);
  44. evex->size_log = size_log;
  45. // Note: this is important, see comment in EventAccessExt.
  46. evex->_ = 0;
  47. evex->addr = CompressAddr(addr);
  48. evex->pc = pc;
  49. TraceRelease(thr, evex);
  50. return true;
  51. }
  52. ALWAYS_INLINE
  53. bool TryTraceMemoryAccessRange(ThreadState* thr, uptr pc, uptr addr, uptr size,
  54. AccessType typ) {
  55. if (!kCollectHistory)
  56. return true;
  57. EventAccessRange* ev;
  58. if (UNLIKELY(!TraceAcquire(thr, &ev)))
  59. return false;
  60. thr->trace_prev_pc = pc;
  61. ev->is_access = 0;
  62. ev->is_func = 0;
  63. ev->type = EventType::kAccessRange;
  64. ev->is_read = !!(typ & kAccessRead);
  65. ev->is_free = !!(typ & kAccessFree);
  66. ev->size_lo = size;
  67. ev->pc = CompressAddr(pc);
  68. ev->addr = CompressAddr(addr);
  69. ev->size_hi = size >> EventAccessRange::kSizeLoBits;
  70. TraceRelease(thr, ev);
  71. return true;
  72. }
  73. void TraceMemoryAccessRange(ThreadState* thr, uptr pc, uptr addr, uptr size,
  74. AccessType typ) {
  75. if (LIKELY(TryTraceMemoryAccessRange(thr, pc, addr, size, typ)))
  76. return;
  77. TraceSwitchPart(thr);
  78. UNUSED bool res = TryTraceMemoryAccessRange(thr, pc, addr, size, typ);
  79. DCHECK(res);
  80. }
  81. void TraceFunc(ThreadState* thr, uptr pc) {
  82. if (LIKELY(TryTraceFunc(thr, pc)))
  83. return;
  84. TraceSwitchPart(thr);
  85. UNUSED bool res = TryTraceFunc(thr, pc);
  86. DCHECK(res);
  87. }
  88. NOINLINE void TraceRestartFuncEntry(ThreadState* thr, uptr pc) {
  89. TraceSwitchPart(thr);
  90. FuncEntry(thr, pc);
  91. }
  92. NOINLINE void TraceRestartFuncExit(ThreadState* thr) {
  93. TraceSwitchPart(thr);
  94. FuncExit(thr);
  95. }
  96. void TraceMutexLock(ThreadState* thr, EventType type, uptr pc, uptr addr,
  97. StackID stk) {
  98. DCHECK(type == EventType::kLock || type == EventType::kRLock);
  99. if (!kCollectHistory)
  100. return;
  101. EventLock ev;
  102. ev.is_access = 0;
  103. ev.is_func = 0;
  104. ev.type = type;
  105. ev.pc = CompressAddr(pc);
  106. ev.stack_lo = stk;
  107. ev.stack_hi = stk >> EventLock::kStackIDLoBits;
  108. ev._ = 0;
  109. ev.addr = CompressAddr(addr);
  110. TraceEvent(thr, ev);
  111. }
  112. void TraceMutexUnlock(ThreadState* thr, uptr addr) {
  113. if (!kCollectHistory)
  114. return;
  115. EventUnlock ev;
  116. ev.is_access = 0;
  117. ev.is_func = 0;
  118. ev.type = EventType::kUnlock;
  119. ev._ = 0;
  120. ev.addr = CompressAddr(addr);
  121. TraceEvent(thr, ev);
  122. }
  123. void TraceTime(ThreadState* thr) {
  124. if (!kCollectHistory)
  125. return;
  126. FastState fast_state = thr->fast_state;
  127. EventTime ev;
  128. ev.is_access = 0;
  129. ev.is_func = 0;
  130. ev.type = EventType::kTime;
  131. ev.sid = static_cast<u64>(fast_state.sid());
  132. ev.epoch = static_cast<u64>(fast_state.epoch());
  133. ev._ = 0;
  134. TraceEvent(thr, ev);
  135. }
  136. NOINLINE void DoReportRace(ThreadState* thr, RawShadow* shadow_mem, Shadow cur,
  137. Shadow old,
  138. AccessType typ) SANITIZER_NO_THREAD_SAFETY_ANALYSIS {
  139. // For the free shadow markers the first element (that contains kFreeSid)
  140. // triggers the race, but the second element contains info about the freeing
  141. // thread, take it.
  142. if (old.sid() == kFreeSid)
  143. old = Shadow(LoadShadow(&shadow_mem[1]));
  144. // This prevents trapping on this address in future.
  145. for (uptr i = 0; i < kShadowCnt; i++)
  146. StoreShadow(&shadow_mem[i], i == 0 ? Shadow::kRodata : Shadow::kEmpty);
  147. // See the comment in MemoryRangeFreed as to why the slot is locked
  148. // for free memory accesses. ReportRace must not be called with
  149. // the slot locked because of the fork. But MemoryRangeFreed is not
  150. // called during fork because fork sets ignore_reads_and_writes,
  151. // so simply unlocking the slot should be fine.
  152. if (typ & kAccessSlotLocked)
  153. SlotUnlock(thr);
  154. ReportRace(thr, shadow_mem, cur, Shadow(old), typ);
  155. if (typ & kAccessSlotLocked)
  156. SlotLock(thr);
  157. }
  158. #if !TSAN_VECTORIZE
  159. ALWAYS_INLINE
  160. bool ContainsSameAccess(RawShadow* s, Shadow cur, int unused0, int unused1,
  161. AccessType typ) {
  162. for (uptr i = 0; i < kShadowCnt; i++) {
  163. auto old = LoadShadow(&s[i]);
  164. if (!(typ & kAccessRead)) {
  165. if (old == cur.raw())
  166. return true;
  167. continue;
  168. }
  169. auto masked = static_cast<RawShadow>(static_cast<u32>(old) |
  170. static_cast<u32>(Shadow::kRodata));
  171. if (masked == cur.raw())
  172. return true;
  173. if (!(typ & kAccessNoRodata) && !SANITIZER_GO) {
  174. if (old == Shadow::kRodata)
  175. return true;
  176. }
  177. }
  178. return false;
  179. }
  180. ALWAYS_INLINE
  181. bool CheckRaces(ThreadState* thr, RawShadow* shadow_mem, Shadow cur,
  182. int unused0, int unused1, AccessType typ) {
  183. bool stored = false;
  184. for (uptr idx = 0; idx < kShadowCnt; idx++) {
  185. RawShadow* sp = &shadow_mem[idx];
  186. Shadow old(LoadShadow(sp));
  187. if (LIKELY(old.raw() == Shadow::kEmpty)) {
  188. if (!(typ & kAccessCheckOnly) && !stored)
  189. StoreShadow(sp, cur.raw());
  190. return false;
  191. }
  192. if (LIKELY(!(cur.access() & old.access())))
  193. continue;
  194. if (LIKELY(cur.sid() == old.sid())) {
  195. if (!(typ & kAccessCheckOnly) &&
  196. LIKELY(cur.access() == old.access() && old.IsRWWeakerOrEqual(typ))) {
  197. StoreShadow(sp, cur.raw());
  198. stored = true;
  199. }
  200. continue;
  201. }
  202. if (LIKELY(old.IsBothReadsOrAtomic(typ)))
  203. continue;
  204. if (LIKELY(thr->clock.Get(old.sid()) >= old.epoch()))
  205. continue;
  206. DoReportRace(thr, shadow_mem, cur, old, typ);
  207. return true;
  208. }
  209. // We did not find any races and had already stored
  210. // the current access info, so we are done.
  211. if (LIKELY(stored))
  212. return false;
  213. // Choose a random candidate slot and replace it.
  214. uptr index =
  215. atomic_load_relaxed(&thr->trace_pos) / sizeof(Event) % kShadowCnt;
  216. StoreShadow(&shadow_mem[index], cur.raw());
  217. return false;
  218. }
  219. # define LOAD_CURRENT_SHADOW(cur, shadow_mem) UNUSED int access = 0, shadow = 0
  220. #else /* !TSAN_VECTORIZE */
  221. ALWAYS_INLINE
  222. bool ContainsSameAccess(RawShadow* unused0, Shadow unused1, m128 shadow,
  223. m128 access, AccessType typ) {
  224. // Note: we could check if there is a larger access of the same type,
  225. // e.g. we just allocated/memset-ed a block (so it contains 8 byte writes)
  226. // and now do smaller reads/writes, these can also be considered as "same
  227. // access". However, it will make the check more expensive, so it's unclear
  228. // if it's worth it. But this would conserve trace space, so it's useful
  229. // besides potential speed up.
  230. if (!(typ & kAccessRead)) {
  231. const m128 same = _mm_cmpeq_epi32(shadow, access);
  232. return _mm_movemask_epi8(same);
  233. }
  234. // For reads we need to reset read bit in the shadow,
  235. // because we need to match read with both reads and writes.
  236. // Shadow::kRodata has only read bit set, so it does what we want.
  237. // We also abuse it for rodata check to save few cycles
  238. // since we already loaded Shadow::kRodata into a register.
  239. // Reads from rodata can't race.
  240. // Measurements show that they can be 10-20% of all memory accesses.
  241. // Shadow::kRodata has epoch 0 which cannot appear in shadow normally
  242. // (thread epochs start from 1). So the same read bit mask
  243. // serves as rodata indicator.
  244. const m128 read_mask = _mm_set1_epi32(static_cast<u32>(Shadow::kRodata));
  245. const m128 masked_shadow = _mm_or_si128(shadow, read_mask);
  246. m128 same = _mm_cmpeq_epi32(masked_shadow, access);
  247. // Range memory accesses check Shadow::kRodata before calling this,
  248. // Shadow::kRodatas is not possible for free memory access
  249. // and Go does not use Shadow::kRodata.
  250. if (!(typ & kAccessNoRodata) && !SANITIZER_GO) {
  251. const m128 ro = _mm_cmpeq_epi32(shadow, read_mask);
  252. same = _mm_or_si128(ro, same);
  253. }
  254. return _mm_movemask_epi8(same);
  255. }
  256. NOINLINE void DoReportRaceV(ThreadState* thr, RawShadow* shadow_mem, Shadow cur,
  257. u32 race_mask, m128 shadow, AccessType typ) {
  258. // race_mask points which of the shadow elements raced with the current
  259. // access. Extract that element.
  260. CHECK_NE(race_mask, 0);
  261. u32 old;
  262. // Note: _mm_extract_epi32 index must be a constant value.
  263. switch (__builtin_ffs(race_mask) / 4) {
  264. case 0:
  265. old = _mm_extract_epi32(shadow, 0);
  266. break;
  267. case 1:
  268. old = _mm_extract_epi32(shadow, 1);
  269. break;
  270. case 2:
  271. old = _mm_extract_epi32(shadow, 2);
  272. break;
  273. case 3:
  274. old = _mm_extract_epi32(shadow, 3);
  275. break;
  276. }
  277. Shadow prev(static_cast<RawShadow>(old));
  278. // For the free shadow markers the first element (that contains kFreeSid)
  279. // triggers the race, but the second element contains info about the freeing
  280. // thread, take it.
  281. if (prev.sid() == kFreeSid)
  282. prev = Shadow(static_cast<RawShadow>(_mm_extract_epi32(shadow, 1)));
  283. DoReportRace(thr, shadow_mem, cur, prev, typ);
  284. }
  285. ALWAYS_INLINE
  286. bool CheckRaces(ThreadState* thr, RawShadow* shadow_mem, Shadow cur,
  287. m128 shadow, m128 access, AccessType typ) {
  288. // Note: empty/zero slots don't intersect with any access.
  289. const m128 zero = _mm_setzero_si128();
  290. const m128 mask_access = _mm_set1_epi32(0x000000ff);
  291. const m128 mask_sid = _mm_set1_epi32(0x0000ff00);
  292. const m128 mask_read_atomic = _mm_set1_epi32(0xc0000000);
  293. const m128 access_and = _mm_and_si128(access, shadow);
  294. const m128 access_xor = _mm_xor_si128(access, shadow);
  295. const m128 intersect = _mm_and_si128(access_and, mask_access);
  296. const m128 not_intersect = _mm_cmpeq_epi32(intersect, zero);
  297. const m128 not_same_sid = _mm_and_si128(access_xor, mask_sid);
  298. const m128 same_sid = _mm_cmpeq_epi32(not_same_sid, zero);
  299. const m128 both_read_or_atomic = _mm_and_si128(access_and, mask_read_atomic);
  300. const m128 no_race =
  301. _mm_or_si128(_mm_or_si128(not_intersect, same_sid), both_read_or_atomic);
  302. const int race_mask = _mm_movemask_epi8(_mm_cmpeq_epi32(no_race, zero));
  303. if (UNLIKELY(race_mask))
  304. goto SHARED;
  305. STORE : {
  306. if (typ & kAccessCheckOnly)
  307. return false;
  308. // We could also replace different sid's if access is the same,
  309. // rw weaker and happens before. However, just checking access below
  310. // is not enough because we also need to check that !both_read_or_atomic
  311. // (reads from different sids can be concurrent).
  312. // Theoretically we could replace smaller accesses with larger accesses,
  313. // but it's unclear if it's worth doing.
  314. const m128 mask_access_sid = _mm_set1_epi32(0x0000ffff);
  315. const m128 not_same_sid_access = _mm_and_si128(access_xor, mask_access_sid);
  316. const m128 same_sid_access = _mm_cmpeq_epi32(not_same_sid_access, zero);
  317. const m128 access_read_atomic =
  318. _mm_set1_epi32((typ & (kAccessRead | kAccessAtomic)) << 30);
  319. const m128 rw_weaker =
  320. _mm_cmpeq_epi32(_mm_max_epu32(shadow, access_read_atomic), shadow);
  321. const m128 rewrite = _mm_and_si128(same_sid_access, rw_weaker);
  322. const int rewrite_mask = _mm_movemask_epi8(rewrite);
  323. int index = __builtin_ffs(rewrite_mask);
  324. if (UNLIKELY(index == 0)) {
  325. const m128 empty = _mm_cmpeq_epi32(shadow, zero);
  326. const int empty_mask = _mm_movemask_epi8(empty);
  327. index = __builtin_ffs(empty_mask);
  328. if (UNLIKELY(index == 0))
  329. index = (atomic_load_relaxed(&thr->trace_pos) / 2) % 16;
  330. }
  331. StoreShadow(&shadow_mem[index / 4], cur.raw());
  332. // We could zero other slots determined by rewrite_mask.
  333. // That would help other threads to evict better slots,
  334. // but it's unclear if it's worth it.
  335. return false;
  336. }
  337. SHARED:
  338. m128 thread_epochs = _mm_set1_epi32(0x7fffffff);
  339. // Need to unwind this because _mm_extract_epi8/_mm_insert_epi32
  340. // indexes must be constants.
  341. # define LOAD_EPOCH(idx) \
  342. if (LIKELY(race_mask & (1 << (idx * 4)))) { \
  343. u8 sid = _mm_extract_epi8(shadow, idx * 4 + 1); \
  344. u16 epoch = static_cast<u16>(thr->clock.Get(static_cast<Sid>(sid))); \
  345. thread_epochs = _mm_insert_epi32(thread_epochs, u32(epoch) << 16, idx); \
  346. }
  347. LOAD_EPOCH(0);
  348. LOAD_EPOCH(1);
  349. LOAD_EPOCH(2);
  350. LOAD_EPOCH(3);
  351. # undef LOAD_EPOCH
  352. const m128 mask_epoch = _mm_set1_epi32(0x3fff0000);
  353. const m128 shadow_epochs = _mm_and_si128(shadow, mask_epoch);
  354. const m128 concurrent = _mm_cmplt_epi32(thread_epochs, shadow_epochs);
  355. const int concurrent_mask = _mm_movemask_epi8(concurrent);
  356. if (LIKELY(concurrent_mask == 0))
  357. goto STORE;
  358. DoReportRaceV(thr, shadow_mem, cur, concurrent_mask, shadow, typ);
  359. return true;
  360. }
  361. # define LOAD_CURRENT_SHADOW(cur, shadow_mem) \
  362. const m128 access = _mm_set1_epi32(static_cast<u32>((cur).raw())); \
  363. const m128 shadow = _mm_load_si128(reinterpret_cast<m128*>(shadow_mem))
  364. #endif
  365. char* DumpShadow(char* buf, RawShadow raw) {
  366. if (raw == Shadow::kEmpty) {
  367. internal_snprintf(buf, 64, "0");
  368. return buf;
  369. }
  370. Shadow s(raw);
  371. AccessType typ;
  372. s.GetAccess(nullptr, nullptr, &typ);
  373. internal_snprintf(buf, 64, "{tid=%u@%u access=0x%x typ=%x}",
  374. static_cast<u32>(s.sid()), static_cast<u32>(s.epoch()),
  375. s.access(), static_cast<u32>(typ));
  376. return buf;
  377. }
  378. // TryTrace* and TraceRestart* functions allow to turn memory access and func
  379. // entry/exit callbacks into leaf functions with all associated performance
  380. // benefits. These hottest callbacks do only 2 slow path calls: report a race
  381. // and trace part switching. Race reporting is easy to turn into a tail call, we
  382. // just always return from the runtime after reporting a race. But trace part
  383. // switching is harder because it needs to be in the middle of callbacks. To
  384. // turn it into a tail call we immidiately return after TraceRestart* functions,
  385. // but TraceRestart* functions themselves recurse into the callback after
  386. // switching trace part. As the result the hottest callbacks contain only tail
  387. // calls, which effectively makes them leaf functions (can use all registers,
  388. // no frame setup, etc).
  389. NOINLINE void TraceRestartMemoryAccess(ThreadState* thr, uptr pc, uptr addr,
  390. uptr size, AccessType typ) {
  391. TraceSwitchPart(thr);
  392. MemoryAccess(thr, pc, addr, size, typ);
  393. }
  394. ALWAYS_INLINE USED void MemoryAccess(ThreadState* thr, uptr pc, uptr addr,
  395. uptr size, AccessType typ) {
  396. RawShadow* shadow_mem = MemToShadow(addr);
  397. UNUSED char memBuf[4][64];
  398. DPrintf2("#%d: Access: %d@%d %p/%zd typ=0x%x {%s, %s, %s, %s}\n", thr->tid,
  399. static_cast<int>(thr->fast_state.sid()),
  400. static_cast<int>(thr->fast_state.epoch()), (void*)addr, size,
  401. static_cast<int>(typ), DumpShadow(memBuf[0], shadow_mem[0]),
  402. DumpShadow(memBuf[1], shadow_mem[1]),
  403. DumpShadow(memBuf[2], shadow_mem[2]),
  404. DumpShadow(memBuf[3], shadow_mem[3]));
  405. FastState fast_state = thr->fast_state;
  406. Shadow cur(fast_state, addr, size, typ);
  407. LOAD_CURRENT_SHADOW(cur, shadow_mem);
  408. if (LIKELY(ContainsSameAccess(shadow_mem, cur, shadow, access, typ)))
  409. return;
  410. if (UNLIKELY(fast_state.GetIgnoreBit()))
  411. return;
  412. if (!TryTraceMemoryAccess(thr, pc, addr, size, typ))
  413. return TraceRestartMemoryAccess(thr, pc, addr, size, typ);
  414. CheckRaces(thr, shadow_mem, cur, shadow, access, typ);
  415. }
  416. void MemoryAccess16(ThreadState* thr, uptr pc, uptr addr, AccessType typ);
  417. NOINLINE
  418. void RestartMemoryAccess16(ThreadState* thr, uptr pc, uptr addr,
  419. AccessType typ) {
  420. TraceSwitchPart(thr);
  421. MemoryAccess16(thr, pc, addr, typ);
  422. }
  423. ALWAYS_INLINE USED void MemoryAccess16(ThreadState* thr, uptr pc, uptr addr,
  424. AccessType typ) {
  425. const uptr size = 16;
  426. FastState fast_state = thr->fast_state;
  427. if (UNLIKELY(fast_state.GetIgnoreBit()))
  428. return;
  429. Shadow cur(fast_state, 0, 8, typ);
  430. RawShadow* shadow_mem = MemToShadow(addr);
  431. bool traced = false;
  432. {
  433. LOAD_CURRENT_SHADOW(cur, shadow_mem);
  434. if (LIKELY(ContainsSameAccess(shadow_mem, cur, shadow, access, typ)))
  435. goto SECOND;
  436. if (!TryTraceMemoryAccessRange(thr, pc, addr, size, typ))
  437. return RestartMemoryAccess16(thr, pc, addr, typ);
  438. traced = true;
  439. if (UNLIKELY(CheckRaces(thr, shadow_mem, cur, shadow, access, typ)))
  440. return;
  441. }
  442. SECOND:
  443. shadow_mem += kShadowCnt;
  444. LOAD_CURRENT_SHADOW(cur, shadow_mem);
  445. if (LIKELY(ContainsSameAccess(shadow_mem, cur, shadow, access, typ)))
  446. return;
  447. if (!traced && !TryTraceMemoryAccessRange(thr, pc, addr, size, typ))
  448. return RestartMemoryAccess16(thr, pc, addr, typ);
  449. CheckRaces(thr, shadow_mem, cur, shadow, access, typ);
  450. }
  451. NOINLINE
  452. void RestartUnalignedMemoryAccess(ThreadState* thr, uptr pc, uptr addr,
  453. uptr size, AccessType typ) {
  454. TraceSwitchPart(thr);
  455. UnalignedMemoryAccess(thr, pc, addr, size, typ);
  456. }
  457. ALWAYS_INLINE USED void UnalignedMemoryAccess(ThreadState* thr, uptr pc,
  458. uptr addr, uptr size,
  459. AccessType typ) {
  460. DCHECK_LE(size, 8);
  461. FastState fast_state = thr->fast_state;
  462. if (UNLIKELY(fast_state.GetIgnoreBit()))
  463. return;
  464. RawShadow* shadow_mem = MemToShadow(addr);
  465. bool traced = false;
  466. uptr size1 = Min<uptr>(size, RoundUp(addr + 1, kShadowCell) - addr);
  467. {
  468. Shadow cur(fast_state, addr, size1, typ);
  469. LOAD_CURRENT_SHADOW(cur, shadow_mem);
  470. if (LIKELY(ContainsSameAccess(shadow_mem, cur, shadow, access, typ)))
  471. goto SECOND;
  472. if (!TryTraceMemoryAccessRange(thr, pc, addr, size, typ))
  473. return RestartUnalignedMemoryAccess(thr, pc, addr, size, typ);
  474. traced = true;
  475. if (UNLIKELY(CheckRaces(thr, shadow_mem, cur, shadow, access, typ)))
  476. return;
  477. }
  478. SECOND:
  479. uptr size2 = size - size1;
  480. if (LIKELY(size2 == 0))
  481. return;
  482. shadow_mem += kShadowCnt;
  483. Shadow cur(fast_state, 0, size2, typ);
  484. LOAD_CURRENT_SHADOW(cur, shadow_mem);
  485. if (LIKELY(ContainsSameAccess(shadow_mem, cur, shadow, access, typ)))
  486. return;
  487. if (!traced && !TryTraceMemoryAccessRange(thr, pc, addr, size, typ))
  488. return RestartUnalignedMemoryAccess(thr, pc, addr, size, typ);
  489. CheckRaces(thr, shadow_mem, cur, shadow, access, typ);
  490. }
  491. void ShadowSet(RawShadow* p, RawShadow* end, RawShadow v) {
  492. DCHECK_LE(p, end);
  493. DCHECK(IsShadowMem(p));
  494. DCHECK(IsShadowMem(end));
  495. UNUSED const uptr kAlign = kShadowCnt * kShadowSize;
  496. DCHECK_EQ(reinterpret_cast<uptr>(p) % kAlign, 0);
  497. DCHECK_EQ(reinterpret_cast<uptr>(end) % kAlign, 0);
  498. #if !TSAN_VECTORIZE
  499. for (; p < end; p += kShadowCnt) {
  500. p[0] = v;
  501. for (uptr i = 1; i < kShadowCnt; i++) p[i] = Shadow::kEmpty;
  502. }
  503. #else
  504. m128 vv = _mm_setr_epi32(
  505. static_cast<u32>(v), static_cast<u32>(Shadow::kEmpty),
  506. static_cast<u32>(Shadow::kEmpty), static_cast<u32>(Shadow::kEmpty));
  507. m128* vp = reinterpret_cast<m128*>(p);
  508. m128* vend = reinterpret_cast<m128*>(end);
  509. for (; vp < vend; vp++) _mm_store_si128(vp, vv);
  510. #endif
  511. }
  512. static void MemoryRangeSet(uptr addr, uptr size, RawShadow val) {
  513. if (size == 0)
  514. return;
  515. DCHECK_EQ(addr % kShadowCell, 0);
  516. DCHECK_EQ(size % kShadowCell, 0);
  517. // If a user passes some insane arguments (memset(0)),
  518. // let it just crash as usual.
  519. if (!IsAppMem(addr) || !IsAppMem(addr + size - 1))
  520. return;
  521. RawShadow* begin = MemToShadow(addr);
  522. RawShadow* end = begin + size / kShadowCell * kShadowCnt;
  523. // Don't want to touch lots of shadow memory.
  524. // If a program maps 10MB stack, there is no need reset the whole range.
  525. // UnmapOrDie/MmapFixedNoReserve does not work on Windows.
  526. if (SANITIZER_WINDOWS ||
  527. size <= common_flags()->clear_shadow_mmap_threshold) {
  528. ShadowSet(begin, end, val);
  529. return;
  530. }
  531. // The region is big, reset only beginning and end.
  532. const uptr kPageSize = GetPageSizeCached();
  533. // Set at least first kPageSize/2 to page boundary.
  534. RawShadow* mid1 =
  535. Min(end, reinterpret_cast<RawShadow*>(RoundUp(
  536. reinterpret_cast<uptr>(begin) + kPageSize / 2, kPageSize)));
  537. ShadowSet(begin, mid1, val);
  538. // Reset middle part.
  539. RawShadow* mid2 = RoundDown(end, kPageSize);
  540. if (mid2 > mid1) {
  541. if (!MmapFixedSuperNoReserve((uptr)mid1, (uptr)mid2 - (uptr)mid1))
  542. Die();
  543. }
  544. // Set the ending.
  545. ShadowSet(mid2, end, val);
  546. }
  547. void MemoryResetRange(ThreadState* thr, uptr pc, uptr addr, uptr size) {
  548. uptr addr1 = RoundDown(addr, kShadowCell);
  549. uptr size1 = RoundUp(size + addr - addr1, kShadowCell);
  550. MemoryRangeSet(addr1, size1, Shadow::kEmpty);
  551. }
  552. void MemoryRangeFreed(ThreadState* thr, uptr pc, uptr addr, uptr size) {
  553. // Callers must lock the slot to ensure synchronization with the reset.
  554. // The problem with "freed" memory is that it's not "monotonic"
  555. // with respect to bug detection: freed memory is bad to access,
  556. // but then if the heap block is reallocated later, it's good to access.
  557. // As the result a garbage "freed" shadow can lead to a false positive
  558. // if it happens to match a real free in the thread trace,
  559. // but the heap block was reallocated before the current memory access,
  560. // so it's still good to access. It's not the case with data races.
  561. DCHECK(thr->slot_locked);
  562. DCHECK_EQ(addr % kShadowCell, 0);
  563. size = RoundUp(size, kShadowCell);
  564. // Processing more than 1k (2k of shadow) is expensive,
  565. // can cause excessive memory consumption (user does not necessary touch
  566. // the whole range) and most likely unnecessary.
  567. size = Min<uptr>(size, 1024);
  568. const AccessType typ = kAccessWrite | kAccessFree | kAccessSlotLocked |
  569. kAccessCheckOnly | kAccessNoRodata;
  570. TraceMemoryAccessRange(thr, pc, addr, size, typ);
  571. RawShadow* shadow_mem = MemToShadow(addr);
  572. Shadow cur(thr->fast_state, 0, kShadowCell, typ);
  573. #if TSAN_VECTORIZE
  574. const m128 access = _mm_set1_epi32(static_cast<u32>(cur.raw()));
  575. const m128 freed = _mm_setr_epi32(
  576. static_cast<u32>(Shadow::FreedMarker()),
  577. static_cast<u32>(Shadow::FreedInfo(cur.sid(), cur.epoch())), 0, 0);
  578. for (; size; size -= kShadowCell, shadow_mem += kShadowCnt) {
  579. const m128 shadow = _mm_load_si128((m128*)shadow_mem);
  580. if (UNLIKELY(CheckRaces(thr, shadow_mem, cur, shadow, access, typ)))
  581. return;
  582. _mm_store_si128((m128*)shadow_mem, freed);
  583. }
  584. #else
  585. for (; size; size -= kShadowCell, shadow_mem += kShadowCnt) {
  586. if (UNLIKELY(CheckRaces(thr, shadow_mem, cur, 0, 0, typ)))
  587. return;
  588. StoreShadow(&shadow_mem[0], Shadow::FreedMarker());
  589. StoreShadow(&shadow_mem[1], Shadow::FreedInfo(cur.sid(), cur.epoch()));
  590. StoreShadow(&shadow_mem[2], Shadow::kEmpty);
  591. StoreShadow(&shadow_mem[3], Shadow::kEmpty);
  592. }
  593. #endif
  594. }
  595. void MemoryRangeImitateWrite(ThreadState* thr, uptr pc, uptr addr, uptr size) {
  596. DCHECK_EQ(addr % kShadowCell, 0);
  597. size = RoundUp(size, kShadowCell);
  598. TraceMemoryAccessRange(thr, pc, addr, size, kAccessWrite);
  599. Shadow cur(thr->fast_state, 0, 8, kAccessWrite);
  600. MemoryRangeSet(addr, size, cur.raw());
  601. }
  602. void MemoryRangeImitateWriteOrResetRange(ThreadState* thr, uptr pc, uptr addr,
  603. uptr size) {
  604. if (thr->ignore_reads_and_writes == 0)
  605. MemoryRangeImitateWrite(thr, pc, addr, size);
  606. else
  607. MemoryResetRange(thr, pc, addr, size);
  608. }
  609. ALWAYS_INLINE
  610. bool MemoryAccessRangeOne(ThreadState* thr, RawShadow* shadow_mem, Shadow cur,
  611. AccessType typ) {
  612. LOAD_CURRENT_SHADOW(cur, shadow_mem);
  613. if (LIKELY(ContainsSameAccess(shadow_mem, cur, shadow, access, typ)))
  614. return false;
  615. return CheckRaces(thr, shadow_mem, cur, shadow, access, typ);
  616. }
  617. template <bool is_read>
  618. NOINLINE void RestartMemoryAccessRange(ThreadState* thr, uptr pc, uptr addr,
  619. uptr size) {
  620. TraceSwitchPart(thr);
  621. MemoryAccessRangeT<is_read>(thr, pc, addr, size);
  622. }
  623. template <bool is_read>
  624. void MemoryAccessRangeT(ThreadState* thr, uptr pc, uptr addr, uptr size) {
  625. const AccessType typ =
  626. (is_read ? kAccessRead : kAccessWrite) | kAccessNoRodata;
  627. RawShadow* shadow_mem = MemToShadow(addr);
  628. DPrintf2("#%d: MemoryAccessRange: @%p %p size=%d is_read=%d\n", thr->tid,
  629. (void*)pc, (void*)addr, (int)size, is_read);
  630. #if SANITIZER_DEBUG
  631. if (!IsAppMem(addr)) {
  632. Printf("Access to non app mem %zx\n", addr);
  633. DCHECK(IsAppMem(addr));
  634. }
  635. if (!IsAppMem(addr + size - 1)) {
  636. Printf("Access to non app mem %zx\n", addr + size - 1);
  637. DCHECK(IsAppMem(addr + size - 1));
  638. }
  639. if (!IsShadowMem(shadow_mem)) {
  640. Printf("Bad shadow addr %p (%zx)\n", static_cast<void*>(shadow_mem), addr);
  641. DCHECK(IsShadowMem(shadow_mem));
  642. }
  643. if (!IsShadowMem(shadow_mem + size * kShadowCnt - 1)) {
  644. Printf("Bad shadow addr %p (%zx)\n",
  645. static_cast<void*>(shadow_mem + size * kShadowCnt - 1),
  646. addr + size - 1);
  647. DCHECK(IsShadowMem(shadow_mem + size * kShadowCnt - 1));
  648. }
  649. #endif
  650. // Access to .rodata section, no races here.
  651. // Measurements show that it can be 10-20% of all memory accesses.
  652. // Check here once to not check for every access separately.
  653. // Note: we could (and should) do this only for the is_read case
  654. // (writes shouldn't go to .rodata). But it happens in Chromium tests:
  655. // https://bugs.chromium.org/p/chromium/issues/detail?id=1275581#c19
  656. // Details are unknown since it happens only on CI machines.
  657. if (*shadow_mem == Shadow::kRodata)
  658. return;
  659. FastState fast_state = thr->fast_state;
  660. if (UNLIKELY(fast_state.GetIgnoreBit()))
  661. return;
  662. if (!TryTraceMemoryAccessRange(thr, pc, addr, size, typ))
  663. return RestartMemoryAccessRange<is_read>(thr, pc, addr, size);
  664. if (UNLIKELY(addr % kShadowCell)) {
  665. // Handle unaligned beginning, if any.
  666. uptr size1 = Min(size, RoundUp(addr, kShadowCell) - addr);
  667. size -= size1;
  668. Shadow cur(fast_state, addr, size1, typ);
  669. if (UNLIKELY(MemoryAccessRangeOne(thr, shadow_mem, cur, typ)))
  670. return;
  671. shadow_mem += kShadowCnt;
  672. }
  673. // Handle middle part, if any.
  674. Shadow cur(fast_state, 0, kShadowCell, typ);
  675. for (; size >= kShadowCell; size -= kShadowCell, shadow_mem += kShadowCnt) {
  676. if (UNLIKELY(MemoryAccessRangeOne(thr, shadow_mem, cur, typ)))
  677. return;
  678. }
  679. // Handle ending, if any.
  680. if (UNLIKELY(size)) {
  681. Shadow cur(fast_state, 0, size, typ);
  682. if (UNLIKELY(MemoryAccessRangeOne(thr, shadow_mem, cur, typ)))
  683. return;
  684. }
  685. }
  686. template void MemoryAccessRangeT<true>(ThreadState* thr, uptr pc, uptr addr,
  687. uptr size);
  688. template void MemoryAccessRangeT<false>(ThreadState* thr, uptr pc, uptr addr,
  689. uptr size);
  690. } // namespace __tsan
  691. #if !SANITIZER_GO
  692. // Must be included in this file to make sure everything is inlined.
  693. # include "tsan_interface.inc"
  694. #endif