popcount64.asm 3.4 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112
  1. %include "defs.asm"
  2. ;************************* popcount64.asm ************************************
  3. ; Author: Agner Fog
  4. ; Date created: 2011-07-20
  5. ; Last modified: 2011-07-20
  6. ; Description:
  7. ; Population count function. Counts the number of 1-bits in a 32-bit integer
  8. ; unsigned int A_popcount (unsigned int x);
  9. ;
  10. ; Position-independent code is generated if POSITIONINDEPENDENT is defined.
  11. ;
  12. ; CPU dispatching included for 386 and SSE4.2 instruction sets.
  13. ;
  14. ; Copyright (c) 2011 GNU General Public License www.gnu.org/licenses
  15. ;******************************************************************************
  16. default rel
  17. global A_popcount: function
  18. ; Direct entries to CPU-specific versions
  19. global popcountGeneric: function
  20. global popcountSSE42: function
  21. ; Imported from instrset32.asm:
  22. extern InstructionSet ; Instruction set for CPU dispatcher
  23. section .text
  24. ;******************************************************************************
  25. ; popcount function
  26. ;******************************************************************************
  27. A_popcount: ; function dispatching
  28. jmp near [popcountDispatch] ; Go to appropriate version, depending on instruction set
  29. align 16
  30. popcountSSE42: ; SSE4.2 version
  31. %ifdef WINDOWS
  32. popcnt eax, ecx
  33. %else
  34. popcnt eax, edi
  35. %endif
  36. ret
  37. ;******************************************************************************
  38. ; popcount function generic
  39. ;******************************************************************************
  40. popcountGeneric: ; Generic version
  41. %ifdef WINDOWS
  42. mov eax, ecx
  43. %else
  44. mov eax, edi
  45. %endif
  46. mov edx, eax
  47. shr eax, 1
  48. and eax, 55555555h ; odd bits in eax, even bits in edx
  49. and edx, 55555555h
  50. add eax, edx
  51. mov edx, eax
  52. shr eax, 2
  53. and eax, 33333333h
  54. and edx, 33333333h
  55. add eax, edx
  56. mov edx, eax
  57. shr eax, 4
  58. add eax, edx
  59. and eax, 0F0F0F0Fh
  60. mov edx, eax
  61. shr eax, 8
  62. add eax, edx
  63. mov edx, eax
  64. shr eax, 16
  65. add eax, edx
  66. and eax, 03FH
  67. ret
  68. ;popcountGeneric end
  69. ; ********************************************************************************
  70. ; CPU dispatching for popcount. This is executed only once
  71. ; ********************************************************************************
  72. %ifdef WINDOWS
  73. %define par1 rcx ; parameter 1, pointer to haystack
  74. %else
  75. %define par1 rdi ; parameter 1, pointer to haystack
  76. %endif
  77. popcountCPUDispatch:
  78. ; get supported instruction set
  79. push par1
  80. call InstructionSet
  81. pop par1
  82. ; Point to generic version of strstr
  83. lea rdx, [popcountGeneric]
  84. cmp eax, 9 ; check popcnt supported
  85. jb Q100
  86. ; SSE4.2 supported
  87. ; Point to SSE4.2 version of strstr
  88. lea rdx, [popcountSSE42]
  89. Q100: mov [popcountDispatch], rdx
  90. ; Continue in appropriate version
  91. jmp rdx
  92. SECTION .data
  93. ; Pointer to appropriate versions. Initially point to dispatcher
  94. popcountDispatch DQ popcountCPUDispatch