cachesize64.asm 13 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335
  1. %include "defs.asm"
  2. ;************************* cachesize64.asm *************************************
  3. ; Author: Agner Fog
  4. ; Date created: 2011-07-11
  5. ; Last modified: 2013-08-14
  6. ; Description:
  7. ; Determines the size of the data caches
  8. ;
  9. ; extern "C" site_t DataCacheSize(int level);
  10. ; Input:
  11. ; level: n = 1 - 4: level n data cache
  12. ; 0 = largest level data cache
  13. ; Return value: size in bytes of data cache
  14. ;
  15. ; The latest version of this file is available at:
  16. ; www.agner.org/optimize/asmexamples.zip
  17. ; Copyright (c) 2011-2013 GNU General Public License www.gnu.org/licenses
  18. ;******************************************************************************
  19. default rel
  20. global DataCacheSize: function
  21. ; Imported from cputype64.asm
  22. extern CpuType ; near. Determine CPU vendor
  23. struc data_layout
  24. ok: resd 2
  25. level1: resq 1
  26. level2: resq 1
  27. level3: resq 1
  28. level4: resq 1
  29. descriptortable: resd 60
  30. endstruc
  31. struc descriptor_record ; record for table of cache descriptors
  32. d_key: resb 1 ; key from cpuid instruction
  33. d_level: resb 1 ; cache level
  34. d_sizem: resb 1 ; size multiplier
  35. d_2pow: resb 1 ; power of 2. size = d_sizem << d_2pow
  36. endstruc
  37. SECTION .data
  38. dataref: ; reference point
  39. ok_: DD 0, 0 ; 1 when values are determined
  40. level1_: DQ 0 ; level 1 data cache size
  41. level2_: DQ 0 ; level 2 data cache size
  42. level3_: DQ 0 ; level 3 data cache size
  43. level4_: DQ 0 ; level 4 data cache size
  44. numlevels equ 4 ; max level
  45. ; From "Intel Processor Identification and the CPUID Instruction, Application note 485
  46. descriptortable_: ; table of Intel cache descriptors
  47. db 0Ah, 1, 1, 13 ; 8 kb L1 data cache
  48. db 0Ch, 1, 1, 14 ; 16 kb L1 data cache
  49. db 0Dh, 1, 1, 14 ; 16 kb L1 data cache
  50. db 21h, 2, 1, 18 ; 256 kb L2 data cache
  51. db 22h, 3, 1, 19 ; 512 kb L3 data cache
  52. db 23h, 3, 1, 20 ; 1 Mb L3 data cache
  53. db 25h, 3, 1, 21 ; 2 Mb L3 data cache
  54. db 29h, 3, 1, 22 ; 4 Mb L3 data cache
  55. db 2Ch, 1, 1, 15 ; 32 kb L1 data cache
  56. db 39h, 2, 1, 17 ; 128 kb L2 data cache
  57. db 3Ah, 2, 3, 16 ; 192 kb L2 data cache
  58. db 3Bh, 2, 1, 17 ; 128 kb L1 data cache
  59. db 3Ch, 2, 1, 18 ; 256 kb L1 data cache
  60. db 3Dh, 2, 3, 17 ; 384 kb L2 data cache
  61. db 3Eh, 2, 1, 19 ; 512 kb L2 data cache
  62. db 41h, 2, 1, 17 ; 128 kb L2 data cache
  63. db 42h, 2, 1, 18 ; 256 kb L2 data cache
  64. db 43h, 2, 1, 19 ; 512 kb L2 data cache
  65. db 44h, 2, 1, 20 ; 1 Mb L2 data cache
  66. db 45h, 2, 1, 21 ; 2 Mb L2 data cache
  67. db 46h, 3, 1, 22 ; 4 Mb L3 data cache
  68. db 47h, 3, 1, 23 ; 8 Mb L3 data cache
  69. db 48h, 2, 3, 20 ; 3 Mb L2 data cache
  70. db 49h, 2, 1, 22 ; 4 Mb L2 or 3 data cache
  71. db 4Ah, 3, 3, 21 ; 6 Mb L3 data cache
  72. db 4Bh, 3, 1, 23 ; 8 Mb L3 data cache
  73. db 4Ch, 3, 3, 22 ; 12 Mb L3 data cache
  74. db 4Dh, 3, 1, 24 ; 16 Mb L3 data cache
  75. db 4Eh, 2, 3, 21 ; 6 Mb L2 data cache
  76. db 60h, 1, 1, 14 ; 16 kb L1 data cache
  77. db 66h, 1, 1, 13 ; 8 kb L1 data cache
  78. db 67h, 1, 1, 14 ; 16 kb L1 data cache
  79. db 68h, 1, 1, 15 ; 32 kb L1 data cache
  80. db 78h, 2, 1, 20 ; 1 Mb L2 data cache
  81. db 79h, 2, 1, 17 ; 128 kb L2 data cache
  82. db 7Ah, 2, 1, 18 ; 256 kb L2 data cache
  83. db 7Bh, 2, 1, 19 ; 512 kb L2 data cache
  84. db 7Ch, 2, 1, 20 ; 1 Mb L2 data cache
  85. db 7Dh, 2, 1, 21 ; 2 Mb L2 data cache
  86. db 7Fh, 2, 1, 19 ; 512 kb L2 data cache
  87. db 82h, 2, 1, 18 ; 256 kb L2 data cache
  88. db 83h, 2, 1, 19 ; 512 kb L2 data cache
  89. db 84h, 2, 1, 20 ; 1 Mb L2 data cache
  90. db 85h, 2, 1, 21 ; 2 Mb L2 data cache
  91. db 86h, 2, 1, 19 ; 512 kb L2 data cache
  92. db 87h, 2, 1, 20 ; 1 Mb L2 data cache
  93. db 0D0h, 3, 1, 19 ; 512 kb L3 data cache
  94. db 0D1h, 3, 1, 20 ; 1 Mb L3 data cache
  95. db 0D2h, 3, 1, 21 ; 2 Mb L3 data cache
  96. db 0D6h, 3, 1, 20 ; 1 Mb L3 data cache
  97. db 0D7h, 3, 1, 21 ; 2 Mb L3 data cache
  98. db 0D8h, 3, 1, 22 ; 4 Mb L3 data cache
  99. db 0DCh, 3, 3, 19 ; 1.5 Mb L3 data cache
  100. db 0DDh, 3, 3, 20 ; 3 Mb L3 data cache
  101. db 0DEh, 3, 3, 21 ; 6 Mb L3 data cache
  102. db 0E2h, 3, 1, 21 ; 2 Mb L3 data cache
  103. db 0E3h, 3, 1, 22 ; 4 Mb L3 data cache
  104. db 0E4h, 3, 1, 23 ; 8 Mb L3 data cache
  105. db 0EAh, 3, 3, 22 ; 12 Mb L3 data cache
  106. db 0EBh, 3, 9, 21 ; 18 Mb L3 data cache
  107. db 0ECh, 3, 3, 23 ; 24 Mb L3 data cache
  108. descriptortablelength equ ($ - descriptortable_) / descriptor_record_size
  109. SECTION .text
  110. ; extern "C" site_t DataCacheSize(int level);
  111. ; Function entry:
  112. DataCacheSize:
  113. push rbx
  114. push r14
  115. %ifdef WINDOWS
  116. push rsi
  117. push rdi
  118. mov r14d, ecx ; level
  119. %else ; UNIX
  120. mov r14d, edi ; level
  121. %endif
  122. ; check if called before
  123. lea r9, [dataref]
  124. cmp dword [r9+ok], 1 ; ok
  125. je D800
  126. ; find cpu vendor
  127. push 0
  128. %ifdef WINDOWS
  129. mov rcx, rsp
  130. xor edx, edx
  131. xor r8d, r8d
  132. %else ; UNIX
  133. mov rdi, rsp
  134. xor esi, esi
  135. xor edx, edx
  136. %endif
  137. call CpuType
  138. lea r9, [dataref]
  139. pop rax ; eax = vendor
  140. dec eax
  141. jz Intel
  142. dec eax
  143. jz AMD
  144. dec eax
  145. jz VIA
  146. ; unknown vendor, try all methods
  147. call IntelNewMethod
  148. jnc D800 ; not carry = success
  149. call AMDMethod
  150. jnc D800 ; not carry = success
  151. call IntelOldMethod
  152. jmp D800 ; return whether success or not
  153. Intel: call IntelNewMethod
  154. jnc D800 ; not carry = success
  155. call IntelOldMethod
  156. jmp D800 ; return whether success or not
  157. AMD: ; AMD and VIA use same method
  158. VIA: call AMDMethod
  159. D800: ; cache data known, get desired return value
  160. xor eax, eax
  161. cmp r14d, numlevels
  162. ja D900
  163. cmp r14d, 0
  164. je D820
  165. ; level = 1 .. numlevels
  166. mov rax, [r9 + r14*8] ; size of selected cache
  167. jmp D850
  168. D820: ; level = 0. Get size of largest level cache
  169. mov rax, [r9 + level3] ; level3
  170. test rax, rax
  171. jnz D850
  172. mov rax, [r9 + level2] ; level2
  173. test rax, rax
  174. jnz D850
  175. mov eax, [r9 + level1] ; level1
  176. D850: mov dword [r9 + ok], 1 ; remember called, whether success or not
  177. D900:
  178. %ifdef WINDOWS
  179. pop rdi
  180. pop rsi
  181. %endif
  182. pop r14
  183. pop rbx
  184. ret
  185. ; Determine cache sizes by CPUID function 4
  186. ; input: esi = pointer to dataref
  187. ; output: values returned in dataref + level1, level2, level3
  188. ; carry flag = 0 on succes
  189. IntelNewMethod:
  190. xor eax, eax
  191. cpuid ; get number of CPUID functions
  192. cmp eax, 4
  193. jb I900 ; fail
  194. xor esi, esi ; loop counter
  195. I100: mov eax, 4
  196. mov ecx, esi
  197. cpuid ; get cache parameters
  198. mov edx, eax
  199. and edx, 11111b ; cache type
  200. jz I500 ; no more caches
  201. cmp edx, 2
  202. je I200 ; code cache, ignore
  203. inc ecx ; sets
  204. mov edx, ebx
  205. shr edx, 22
  206. inc edx ; ways
  207. imul ecx, edx
  208. mov edx, ebx
  209. shr edx, 12
  210. and edx, 1111111111b
  211. inc edx ; partitions
  212. imul ecx, edx
  213. and ebx, 111111111111b
  214. inc ebx ; line size
  215. imul rcx, rbx ; calculated cache size (64 bit)
  216. shr eax, 5
  217. and eax, 111b ; cache level
  218. cmp eax, numlevels
  219. jna I180
  220. mov eax, numlevels ; limit higher levels
  221. I180: mov [r9+rax*8], rcx ; store size of data cache level eax
  222. I200: inc esi
  223. cmp esi, 100h ; avoid infinite loop
  224. jb I100 ; next cache
  225. I500: ; loop finished
  226. ; check if OK
  227. mov eax, [r9+level1] ; level1
  228. cmp eax, 1024
  229. I900: ret ; carry flag set if fail
  230. ; Determine cache sizes by CPUID function 2
  231. ; input: esi = pointer to dataref
  232. ; output: values returned in dataref + level1, level2, level3
  233. ; carry flag = 0 on succes
  234. IntelOldMethod:
  235. xor eax, eax
  236. cpuid ; get number of CPUID functions
  237. cmp eax, 2
  238. jb J900 ; fail
  239. mov eax, 2
  240. xor ecx, ecx
  241. cpuid ; get 16 descriptor bytes in eax, ebx, ecx, edx
  242. mov al, 0 ; al does not contain a descriptor
  243. sub rsp, 16
  244. mov [rsp], eax ; save all descriptors
  245. mov [rsp+4], ebx
  246. mov [rsp+8], ecx
  247. mov [rsp+12], edx
  248. mov edx, 15 ; loop counter
  249. ; loop to read 16 descriptor bytes
  250. J100: mov al, byte [rsp+rdx]
  251. ; find in table
  252. mov ebx, descriptortablelength-1 ; loop counter
  253. ; loop to search in descriptortable
  254. J200: cmp al, [r9 + descriptortable + rbx*4 + d_key]
  255. jne J300
  256. ; descriptor found
  257. movzx eax, byte [r9 + descriptortable + rbx*4 + d_sizem]
  258. mov cl, [r9 + descriptortable + rbx*4 + d_2pow]
  259. shl eax, cl ; compute size
  260. movzx ecx, byte [r9 + descriptortable + rbx*4 + d_level]
  261. ; check that level = 1-3
  262. cmp ecx, 3
  263. ja J300
  264. mov [r9+rcx*8], rax ; store size eax of data cache level ecx
  265. J300: dec ebx
  266. jns J200 ; inner loop
  267. dec edx
  268. jns J100 ; outer loop
  269. add rsp, 16 ; remove from stack
  270. ; check if OK
  271. mov eax, [r9 + level1]
  272. cmp eax, 1024
  273. J900: ret ; carry flag set if fail
  274. ; Determine cache sizes by CPUID function 80000005H - 80000006H
  275. ; input: esi = pointer to dataref
  276. ; output: values returned in dataref
  277. ; carry flag = 0 on succes
  278. AMDMethod:
  279. mov eax, 80000000H
  280. cpuid ; get number of CPUID functions
  281. cmp eax, 6
  282. jb K900 ; fail
  283. mov eax, 80000005H
  284. cpuid ; get L1 cache size
  285. shr ecx, 24 ; L1 data cache size in kbytes
  286. shl ecx, 10 ; L1 data cache size in bytes
  287. mov [r9 + level1], ecx ; store L1 data cache size
  288. mov eax, 80000006H
  289. cpuid ; get L2 and L3 cache sizes
  290. shr ecx, 16 ; L2 data cache size in kbytes
  291. shl ecx, 10 ; L2 data cache size in bytes
  292. mov [r9 + level2], ecx ; store L2 data cache size
  293. mov ecx, edx
  294. shr ecx, 18 ; L3 data cache size / 512 kbytes
  295. shl rcx, 19 ; L3 data cache size in bytes
  296. %if 0 ; AMD manual is unclear:
  297. ; do we have to increase the value if the number of ways is not a power or 2?
  298. shr edx, 12
  299. and edx, 1111b ; L3 associativity
  300. cmp edx, 3
  301. jb K100
  302. test edx, 1
  303. jz K100
  304. ; number of ways is not a power of 2, multiply by 1.5 ?
  305. mov rax, rcx
  306. shr rax, 1
  307. add rcx, rax
  308. %endif
  309. K100: mov [r9 + level3], rcx ; store L3 data cache size
  310. ; check if OK
  311. mov eax, [r9 + level1]
  312. cmp eax, 1024
  313. K900: ret ; carry flag set if fail