suggestions.c 13 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429
  1. #include "Python.h"
  2. #include "pycore_frame.h"
  3. #include "pycore_runtime.h" // _PyRuntime
  4. #include "pycore_global_objects.h" // _Py_ID()
  5. #include "pycore_pyerrors.h"
  6. #include "pycore_code.h" // _PyCode_GetVarnames()
  7. #include "stdlib_module_names.h" // _Py_stdlib_module_names
  8. #define MAX_CANDIDATE_ITEMS 750
  9. #define MAX_STRING_SIZE 40
  10. #define MOVE_COST 2
  11. #define CASE_COST 1
  12. #define LEAST_FIVE_BITS(n) ((n) & 31)
  13. static inline int
  14. substitution_cost(char a, char b)
  15. {
  16. if (LEAST_FIVE_BITS(a) != LEAST_FIVE_BITS(b)) {
  17. // Not the same, not a case flip.
  18. return MOVE_COST;
  19. }
  20. if (a == b) {
  21. return 0;
  22. }
  23. if ('A' <= a && a <= 'Z') {
  24. a += ('a' - 'A');
  25. }
  26. if ('A' <= b && b <= 'Z') {
  27. b += ('a' - 'A');
  28. }
  29. if (a == b) {
  30. return CASE_COST;
  31. }
  32. return MOVE_COST;
  33. }
  34. /* Calculate the Levenshtein distance between string1 and string2 */
  35. static Py_ssize_t
  36. levenshtein_distance(const char *a, size_t a_size,
  37. const char *b, size_t b_size,
  38. size_t max_cost, size_t *buffer)
  39. {
  40. // Both strings are the same (by identity)
  41. if (a == b) {
  42. return 0;
  43. }
  44. // Trim away common affixes.
  45. while (a_size && b_size && a[0] == b[0]) {
  46. a++; a_size--;
  47. b++; b_size--;
  48. }
  49. while (a_size && b_size && a[a_size-1] == b[b_size-1]) {
  50. a_size--;
  51. b_size--;
  52. }
  53. if (a_size == 0 || b_size == 0) {
  54. return (a_size + b_size) * MOVE_COST;
  55. }
  56. if (a_size > MAX_STRING_SIZE || b_size > MAX_STRING_SIZE) {
  57. return max_cost + 1;
  58. }
  59. // Prefer shorter buffer
  60. if (b_size < a_size) {
  61. const char *t = a; a = b; b = t;
  62. size_t t_size = a_size; a_size = b_size; b_size = t_size;
  63. }
  64. // quick fail when a match is impossible.
  65. if ((b_size - a_size) * MOVE_COST > max_cost) {
  66. return max_cost + 1;
  67. }
  68. // Instead of producing the whole traditional len(a)-by-len(b)
  69. // matrix, we can update just one row in place.
  70. // Initialize the buffer row
  71. size_t tmp = MOVE_COST;
  72. for (size_t i = 0; i < a_size; i++) {
  73. // cost from b[:0] to a[:i+1]
  74. buffer[i] = tmp;
  75. tmp += MOVE_COST;
  76. }
  77. size_t result = 0;
  78. for (size_t b_index = 0; b_index < b_size; b_index++) {
  79. char code = b[b_index];
  80. // cost(b[:b_index], a[:0]) == b_index * MOVE_COST
  81. size_t distance = result = b_index * MOVE_COST;
  82. size_t minimum = SIZE_MAX;
  83. for (size_t index = 0; index < a_size; index++) {
  84. // cost(b[:b_index+1], a[:index+1]) = min(
  85. // // 1) substitute
  86. // cost(b[:b_index], a[:index])
  87. // + substitution_cost(b[b_index], a[index]),
  88. // // 2) delete from b
  89. // cost(b[:b_index], a[:index+1]) + MOVE_COST,
  90. // // 3) delete from a
  91. // cost(b[:b_index+1], a[index]) + MOVE_COST
  92. // )
  93. // 1) Previous distance in this row is cost(b[:b_index], a[:index])
  94. size_t substitute = distance + substitution_cost(code, a[index]);
  95. // 2) cost(b[:b_index], a[:index+1]) from previous row
  96. distance = buffer[index];
  97. // 3) existing result is cost(b[:b_index+1], a[index])
  98. size_t insert_delete = Py_MIN(result, distance) + MOVE_COST;
  99. result = Py_MIN(insert_delete, substitute);
  100. // cost(b[:b_index+1], a[:index+1])
  101. buffer[index] = result;
  102. if (result < minimum) {
  103. minimum = result;
  104. }
  105. }
  106. if (minimum > max_cost) {
  107. // Everything in this row is too big, so bail early.
  108. return max_cost + 1;
  109. }
  110. }
  111. return result;
  112. }
  113. static inline PyObject *
  114. calculate_suggestions(PyObject *dir,
  115. PyObject *name)
  116. {
  117. assert(!PyErr_Occurred());
  118. assert(PyList_CheckExact(dir));
  119. Py_ssize_t dir_size = PyList_GET_SIZE(dir);
  120. if (dir_size >= MAX_CANDIDATE_ITEMS) {
  121. return NULL;
  122. }
  123. Py_ssize_t suggestion_distance = PY_SSIZE_T_MAX;
  124. PyObject *suggestion = NULL;
  125. Py_ssize_t name_size;
  126. const char *name_str = PyUnicode_AsUTF8AndSize(name, &name_size);
  127. if (name_str == NULL) {
  128. return NULL;
  129. }
  130. size_t *buffer = PyMem_New(size_t, MAX_STRING_SIZE);
  131. if (buffer == NULL) {
  132. return PyErr_NoMemory();
  133. }
  134. for (int i = 0; i < dir_size; ++i) {
  135. PyObject *item = PyList_GET_ITEM(dir, i);
  136. if (_PyUnicode_Equal(name, item)) {
  137. continue;
  138. }
  139. Py_ssize_t item_size;
  140. const char *item_str = PyUnicode_AsUTF8AndSize(item, &item_size);
  141. if (item_str == NULL) {
  142. PyMem_Free(buffer);
  143. return NULL;
  144. }
  145. // No more than 1/3 of the involved characters should need changed.
  146. Py_ssize_t max_distance = (name_size + item_size + 3) * MOVE_COST / 6;
  147. // Don't take matches we've already beaten.
  148. max_distance = Py_MIN(max_distance, suggestion_distance - 1);
  149. Py_ssize_t current_distance =
  150. levenshtein_distance(name_str, name_size, item_str,
  151. item_size, max_distance, buffer);
  152. if (current_distance > max_distance) {
  153. continue;
  154. }
  155. if (!suggestion || current_distance < suggestion_distance) {
  156. suggestion = item;
  157. suggestion_distance = current_distance;
  158. }
  159. }
  160. PyMem_Free(buffer);
  161. return Py_XNewRef(suggestion);
  162. }
  163. static PyObject *
  164. get_suggestions_for_attribute_error(PyAttributeErrorObject *exc)
  165. {
  166. PyObject *name = exc->name; // borrowed reference
  167. PyObject *obj = exc->obj; // borrowed reference
  168. // Abort if we don't have an attribute name or we have an invalid one
  169. if (name == NULL || obj == NULL || !PyUnicode_CheckExact(name)) {
  170. return NULL;
  171. }
  172. PyObject *dir = PyObject_Dir(obj);
  173. if (dir == NULL) {
  174. return NULL;
  175. }
  176. PyObject *suggestions = calculate_suggestions(dir, name);
  177. Py_DECREF(dir);
  178. return suggestions;
  179. }
  180. static PyObject *
  181. offer_suggestions_for_attribute_error(PyAttributeErrorObject *exc)
  182. {
  183. PyObject* suggestion = get_suggestions_for_attribute_error(exc);
  184. if (suggestion == NULL) {
  185. return NULL;
  186. }
  187. // Add a trailer ". Did you mean: (...)?"
  188. PyObject* result = PyUnicode_FromFormat(". Did you mean: %R?", suggestion);
  189. Py_DECREF(suggestion);
  190. return result;
  191. }
  192. static PyObject *
  193. get_suggestions_for_name_error(PyObject* name, PyFrameObject* frame)
  194. {
  195. PyCodeObject *code = PyFrame_GetCode(frame);
  196. assert(code != NULL && code->co_localsplusnames != NULL);
  197. PyObject *varnames = _PyCode_GetVarnames(code);
  198. Py_DECREF(code);
  199. if (varnames == NULL) {
  200. return NULL;
  201. }
  202. PyObject *dir = PySequence_List(varnames);
  203. Py_DECREF(varnames);
  204. if (dir == NULL) {
  205. return NULL;
  206. }
  207. // Are we inside a method and the instance has an attribute called 'name'?
  208. int res = PySequence_Contains(dir, &_Py_ID(self));
  209. if (res < 0) {
  210. goto error;
  211. }
  212. if (res > 0) {
  213. PyObject* locals = PyFrame_GetLocals(frame);
  214. if (!locals) {
  215. goto error;
  216. }
  217. PyObject* self = PyDict_GetItemWithError(locals, &_Py_ID(self)); /* borrowed */
  218. if (!self) {
  219. Py_DECREF(locals);
  220. goto error;
  221. }
  222. PyObject *value;
  223. res = _PyObject_LookupAttr(self, name, &value);
  224. Py_DECREF(locals);
  225. if (res < 0) {
  226. goto error;
  227. }
  228. if (value) {
  229. Py_DECREF(value);
  230. Py_DECREF(dir);
  231. return PyUnicode_FromFormat("self.%U", name);
  232. }
  233. }
  234. PyObject *suggestions = calculate_suggestions(dir, name);
  235. Py_DECREF(dir);
  236. if (suggestions != NULL || PyErr_Occurred()) {
  237. return suggestions;
  238. }
  239. dir = PySequence_List(frame->f_frame->f_globals);
  240. if (dir == NULL) {
  241. return NULL;
  242. }
  243. suggestions = calculate_suggestions(dir, name);
  244. Py_DECREF(dir);
  245. if (suggestions != NULL || PyErr_Occurred()) {
  246. return suggestions;
  247. }
  248. dir = PySequence_List(frame->f_frame->f_builtins);
  249. if (dir == NULL) {
  250. return NULL;
  251. }
  252. suggestions = calculate_suggestions(dir, name);
  253. Py_DECREF(dir);
  254. return suggestions;
  255. error:
  256. Py_DECREF(dir);
  257. return NULL;
  258. }
  259. static bool
  260. is_name_stdlib_module(PyObject* name)
  261. {
  262. const char* the_name = PyUnicode_AsUTF8(name);
  263. Py_ssize_t len = Py_ARRAY_LENGTH(_Py_stdlib_module_names);
  264. for (Py_ssize_t i = 0; i < len; i++) {
  265. if (strcmp(the_name, _Py_stdlib_module_names[i]) == 0) {
  266. return 1;
  267. }
  268. }
  269. return 0;
  270. }
  271. static PyObject *
  272. offer_suggestions_for_name_error(PyNameErrorObject *exc)
  273. {
  274. PyObject *name = exc->name; // borrowed reference
  275. PyTracebackObject *traceback = (PyTracebackObject *) exc->traceback; // borrowed reference
  276. // Abort if we don't have a variable name or we have an invalid one
  277. // or if we don't have a traceback to work with
  278. if (name == NULL || !PyUnicode_CheckExact(name) ||
  279. traceback == NULL || !Py_IS_TYPE(traceback, &PyTraceBack_Type)
  280. ) {
  281. return NULL;
  282. }
  283. // Move to the traceback of the exception
  284. while (1) {
  285. PyTracebackObject *next = traceback->tb_next;
  286. if (next == NULL || !Py_IS_TYPE(next, &PyTraceBack_Type)) {
  287. break;
  288. }
  289. else {
  290. traceback = next;
  291. }
  292. }
  293. PyFrameObject *frame = traceback->tb_frame;
  294. assert(frame != NULL);
  295. PyObject* suggestion = get_suggestions_for_name_error(name, frame);
  296. if (suggestion == NULL && PyErr_Occurred()) {
  297. return NULL;
  298. }
  299. // Add a trailer ". Did you mean: (...)?"
  300. PyObject* result = NULL;
  301. if (!is_name_stdlib_module(name)) {
  302. if (suggestion == NULL) {
  303. return NULL;
  304. }
  305. result = PyUnicode_FromFormat(". Did you mean: %R?", suggestion);
  306. } else if (suggestion == NULL) {
  307. result = PyUnicode_FromFormat(". Did you forget to import %R?", name);
  308. } else {
  309. result = PyUnicode_FromFormat(". Did you mean: %R? Or did you forget to import %R?", suggestion, name);
  310. }
  311. Py_XDECREF(suggestion);
  312. return result;
  313. }
  314. static PyObject *
  315. offer_suggestions_for_import_error(PyImportErrorObject *exc)
  316. {
  317. PyObject *mod_name = exc->name; // borrowed reference
  318. PyObject *name = exc->name_from; // borrowed reference
  319. if (name == NULL || mod_name == NULL || name == Py_None ||
  320. !PyUnicode_CheckExact(name) || !PyUnicode_CheckExact(mod_name)) {
  321. return NULL;
  322. }
  323. PyObject* mod = PyImport_GetModule(mod_name);
  324. if (mod == NULL) {
  325. return NULL;
  326. }
  327. PyObject *dir = PyObject_Dir(mod);
  328. Py_DECREF(mod);
  329. if (dir == NULL) {
  330. return NULL;
  331. }
  332. PyObject *suggestion = calculate_suggestions(dir, name);
  333. Py_DECREF(dir);
  334. if (!suggestion) {
  335. return NULL;
  336. }
  337. PyObject* result = PyUnicode_FromFormat(". Did you mean: %R?", suggestion);
  338. Py_DECREF(suggestion);
  339. return result;
  340. }
  341. // Offer suggestions for a given exception. Returns a python string object containing the
  342. // suggestions. This function returns NULL if no suggestion was found or if an exception happened,
  343. // users must call PyErr_Occurred() to disambiguate.
  344. PyObject *
  345. _Py_Offer_Suggestions(PyObject *exception)
  346. {
  347. PyObject *result = NULL;
  348. assert(!PyErr_Occurred());
  349. if (Py_IS_TYPE(exception, (PyTypeObject*)PyExc_AttributeError)) {
  350. result = offer_suggestions_for_attribute_error((PyAttributeErrorObject *) exception);
  351. } else if (Py_IS_TYPE(exception, (PyTypeObject*)PyExc_NameError)) {
  352. result = offer_suggestions_for_name_error((PyNameErrorObject *) exception);
  353. } else if (Py_IS_TYPE(exception, (PyTypeObject*)PyExc_ImportError)) {
  354. result = offer_suggestions_for_import_error((PyImportErrorObject *) exception);
  355. }
  356. return result;
  357. }
  358. Py_ssize_t
  359. _Py_UTF8_Edit_Cost(PyObject *a, PyObject *b, Py_ssize_t max_cost)
  360. {
  361. assert(PyUnicode_Check(a) && PyUnicode_Check(b));
  362. Py_ssize_t size_a, size_b;
  363. const char *utf8_a = PyUnicode_AsUTF8AndSize(a, &size_a);
  364. if (utf8_a == NULL) {
  365. return -1;
  366. }
  367. const char *utf8_b = PyUnicode_AsUTF8AndSize(b, &size_b);
  368. if (utf8_b == NULL) {
  369. return -1;
  370. }
  371. if (max_cost == -1) {
  372. max_cost = MOVE_COST * Py_MAX(size_a, size_b);
  373. }
  374. size_t *buffer = PyMem_New(size_t, MAX_STRING_SIZE);
  375. if (buffer == NULL) {
  376. PyErr_NoMemory();
  377. return -1;
  378. }
  379. Py_ssize_t res = levenshtein_distance(utf8_a, size_a,
  380. utf8_b, size_b, max_cost, buffer);
  381. PyMem_Free(buffer);
  382. return res;
  383. }