pystrhex.c 5.4 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174
  1. /* Format bytes as hexadecimal */
  2. #include "Python.h"
  3. #include "pycore_strhex.h" // _Py_strhex_with_sep()
  4. #include <stdlib.h> // abs()
  5. static PyObject *_Py_strhex_impl(const char* argbuf, const Py_ssize_t arglen,
  6. PyObject* sep, int bytes_per_sep_group,
  7. const int return_bytes)
  8. {
  9. assert(arglen >= 0);
  10. Py_UCS1 sep_char = 0;
  11. if (sep) {
  12. Py_ssize_t seplen = PyObject_Length((PyObject*)sep);
  13. if (seplen < 0) {
  14. return NULL;
  15. }
  16. if (seplen != 1) {
  17. PyErr_SetString(PyExc_ValueError, "sep must be length 1.");
  18. return NULL;
  19. }
  20. if (PyUnicode_Check(sep)) {
  21. if (PyUnicode_READY(sep))
  22. return NULL;
  23. if (PyUnicode_KIND(sep) != PyUnicode_1BYTE_KIND) {
  24. PyErr_SetString(PyExc_ValueError, "sep must be ASCII.");
  25. return NULL;
  26. }
  27. sep_char = PyUnicode_READ_CHAR(sep, 0);
  28. }
  29. else if (PyBytes_Check(sep)) {
  30. sep_char = PyBytes_AS_STRING(sep)[0];
  31. }
  32. else {
  33. PyErr_SetString(PyExc_TypeError, "sep must be str or bytes.");
  34. return NULL;
  35. }
  36. if (sep_char > 127 && !return_bytes) {
  37. PyErr_SetString(PyExc_ValueError, "sep must be ASCII.");
  38. return NULL;
  39. }
  40. }
  41. else {
  42. bytes_per_sep_group = 0;
  43. }
  44. unsigned int abs_bytes_per_sep = abs(bytes_per_sep_group);
  45. Py_ssize_t resultlen = 0;
  46. if (bytes_per_sep_group && arglen > 0) {
  47. /* How many sep characters we'll be inserting. */
  48. resultlen = (arglen - 1) / abs_bytes_per_sep;
  49. }
  50. /* Bounds checking for our Py_ssize_t indices. */
  51. if (arglen >= PY_SSIZE_T_MAX / 2 - resultlen) {
  52. return PyErr_NoMemory();
  53. }
  54. resultlen += arglen * 2;
  55. if ((size_t)abs_bytes_per_sep >= (size_t)arglen) {
  56. bytes_per_sep_group = 0;
  57. abs_bytes_per_sep = 0;
  58. }
  59. PyObject *retval;
  60. Py_UCS1 *retbuf;
  61. if (return_bytes) {
  62. /* If _PyBytes_FromSize() were public we could avoid malloc+copy. */
  63. retval = PyBytes_FromStringAndSize(NULL, resultlen);
  64. if (!retval) {
  65. return NULL;
  66. }
  67. retbuf = (Py_UCS1 *)PyBytes_AS_STRING(retval);
  68. }
  69. else {
  70. retval = PyUnicode_New(resultlen, 127);
  71. if (!retval) {
  72. return NULL;
  73. }
  74. retbuf = PyUnicode_1BYTE_DATA(retval);
  75. }
  76. /* Hexlify */
  77. Py_ssize_t i, j;
  78. unsigned char c;
  79. if (bytes_per_sep_group == 0) {
  80. for (i = j = 0; i < arglen; ++i) {
  81. assert((j + 1) < resultlen);
  82. c = argbuf[i];
  83. retbuf[j++] = Py_hexdigits[c >> 4];
  84. retbuf[j++] = Py_hexdigits[c & 0x0f];
  85. }
  86. assert(j == resultlen);
  87. }
  88. else {
  89. /* The number of complete chunk+sep periods */
  90. Py_ssize_t chunks = (arglen - 1) / abs_bytes_per_sep;
  91. Py_ssize_t chunk;
  92. unsigned int k;
  93. if (bytes_per_sep_group < 0) {
  94. i = j = 0;
  95. for (chunk = 0; chunk < chunks; chunk++) {
  96. for (k = 0; k < abs_bytes_per_sep; k++) {
  97. c = argbuf[i++];
  98. retbuf[j++] = Py_hexdigits[c >> 4];
  99. retbuf[j++] = Py_hexdigits[c & 0x0f];
  100. }
  101. retbuf[j++] = sep_char;
  102. }
  103. while (i < arglen) {
  104. c = argbuf[i++];
  105. retbuf[j++] = Py_hexdigits[c >> 4];
  106. retbuf[j++] = Py_hexdigits[c & 0x0f];
  107. }
  108. assert(j == resultlen);
  109. }
  110. else {
  111. i = arglen - 1;
  112. j = resultlen - 1;
  113. for (chunk = 0; chunk < chunks; chunk++) {
  114. for (k = 0; k < abs_bytes_per_sep; k++) {
  115. c = argbuf[i--];
  116. retbuf[j--] = Py_hexdigits[c & 0x0f];
  117. retbuf[j--] = Py_hexdigits[c >> 4];
  118. }
  119. retbuf[j--] = sep_char;
  120. }
  121. while (i >= 0) {
  122. c = argbuf[i--];
  123. retbuf[j--] = Py_hexdigits[c & 0x0f];
  124. retbuf[j--] = Py_hexdigits[c >> 4];
  125. }
  126. assert(j == -1);
  127. }
  128. }
  129. #ifdef Py_DEBUG
  130. if (!return_bytes) {
  131. assert(_PyUnicode_CheckConsistency(retval, 1));
  132. }
  133. #endif
  134. return retval;
  135. }
  136. PyObject * _Py_strhex(const char* argbuf, const Py_ssize_t arglen)
  137. {
  138. return _Py_strhex_impl(argbuf, arglen, NULL, 0, 0);
  139. }
  140. /* Same as above but returns a bytes() instead of str() to avoid the
  141. * need to decode the str() when bytes are needed. */
  142. PyObject* _Py_strhex_bytes(const char* argbuf, const Py_ssize_t arglen)
  143. {
  144. return _Py_strhex_impl(argbuf, arglen, NULL, 0, 1);
  145. }
  146. /* These variants include support for a separator between every N bytes: */
  147. PyObject* _Py_strhex_with_sep(const char* argbuf, const Py_ssize_t arglen,
  148. PyObject* sep, const int bytes_per_group)
  149. {
  150. return _Py_strhex_impl(argbuf, arglen, sep, bytes_per_group, 0);
  151. }
  152. /* Same as above but returns a bytes() instead of str() to avoid the
  153. * need to decode the str() when bytes are needed. */
  154. PyObject* _Py_strhex_bytes_with_sep(const char* argbuf, const Py_ssize_t arglen,
  155. PyObject* sep, const int bytes_per_group)
  156. {
  157. return _Py_strhex_impl(argbuf, arglen, sep, bytes_per_group, 1);
  158. }