text.py 6.8 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244
  1. # encoding: utf-8
  2. """
  3. Utilities for working with strings and text.
  4. Inheritance diagram:
  5. .. inheritance-diagram:: IPython.utils.text
  6. :parts: 3
  7. """
  8. import os
  9. import re
  10. import sys
  11. import textwrap
  12. from string import Formatter
  13. # datetime.strftime date format for ipython
  14. if sys.platform == 'win32':
  15. date_format = "%B %d, %Y"
  16. else:
  17. date_format = "%B %-d, %Y"
  18. def indent(instr,nspaces=4, ntabs=0, flatten=False):
  19. """Indent a string a given number of spaces or tabstops.
  20. indent(str,nspaces=4,ntabs=0) -> indent str by ntabs+nspaces.
  21. Parameters
  22. ----------
  23. instr : basestring
  24. The string to be indented.
  25. nspaces : int (default: 4)
  26. The number of spaces to be indented.
  27. ntabs : int (default: 0)
  28. The number of tabs to be indented.
  29. flatten : bool (default: False)
  30. Whether to scrub existing indentation. If True, all lines will be
  31. aligned to the same indentation. If False, existing indentation will
  32. be strictly increased.
  33. Returns
  34. -------
  35. str|unicode : string indented by ntabs and nspaces.
  36. """
  37. if instr is None:
  38. return
  39. ind = '\t'*ntabs+' '*nspaces
  40. if flatten:
  41. pat = re.compile(r'^\s*', re.MULTILINE)
  42. else:
  43. pat = re.compile(r'^', re.MULTILINE)
  44. outstr = re.sub(pat, ind, instr)
  45. if outstr.endswith(os.linesep+ind):
  46. return outstr[:-len(ind)]
  47. else:
  48. return outstr
  49. def dedent(text):
  50. """Equivalent of textwrap.dedent that ignores unindented first line.
  51. This means it will still dedent strings like:
  52. '''foo
  53. is a bar
  54. '''
  55. For use in wrap_paragraphs.
  56. """
  57. if text.startswith('\n'):
  58. # text starts with blank line, don't ignore the first line
  59. return textwrap.dedent(text)
  60. # split first line
  61. splits = text.split('\n',1)
  62. if len(splits) == 1:
  63. # only one line
  64. return textwrap.dedent(text)
  65. first, rest = splits
  66. # dedent everything but the first line
  67. rest = textwrap.dedent(rest)
  68. return '\n'.join([first, rest])
  69. def wrap_paragraphs(text, ncols=80):
  70. """Wrap multiple paragraphs to fit a specified width.
  71. This is equivalent to textwrap.wrap, but with support for multiple
  72. paragraphs, as separated by empty lines.
  73. Returns
  74. -------
  75. list of complete paragraphs, wrapped to fill `ncols` columns.
  76. """
  77. paragraph_re = re.compile(r'\n(\s*\n)+', re.MULTILINE)
  78. text = dedent(text).strip()
  79. paragraphs = paragraph_re.split(text)[::2] # every other entry is space
  80. out_ps = []
  81. indent_re = re.compile(r'\n\s+', re.MULTILINE)
  82. for p in paragraphs:
  83. # presume indentation that survives dedent is meaningful formatting,
  84. # so don't fill unless text is flush.
  85. if indent_re.search(p) is None:
  86. # wrap paragraph
  87. p = textwrap.fill(p, ncols)
  88. out_ps.append(p)
  89. return out_ps
  90. def strip_ansi(source):
  91. """
  92. Remove ansi escape codes from text.
  93. Parameters
  94. ----------
  95. source : str
  96. Source to remove the ansi from
  97. """
  98. return re.sub(r'\033\[(\d|;)+?m', '', source)
  99. #-----------------------------------------------------------------------------
  100. # Utils to columnize a list of string
  101. #-----------------------------------------------------------------------------
  102. def _chunks(l, n):
  103. """Yield successive n-sized chunks from l."""
  104. for i in range(0, len(l), n):
  105. yield l[i:i+n]
  106. def _find_optimal(rlist , separator_size=2 , displaywidth=80):
  107. """Calculate optimal info to columnize a list of string"""
  108. for nrow in range(1, len(rlist)+1) :
  109. chk = list(map(max,_chunks(rlist, nrow)))
  110. sumlength = sum(chk)
  111. ncols = len(chk)
  112. if sumlength+separator_size*(ncols-1) <= displaywidth :
  113. break;
  114. return {'columns_numbers' : ncols,
  115. 'optimal_separator_width':(displaywidth - sumlength)/(ncols-1) if (ncols -1) else 0,
  116. 'rows_numbers' : nrow,
  117. 'columns_width' : chk
  118. }
  119. def _get_or_default(mylist, i, default=None):
  120. """return list item number, or default if don't exist"""
  121. if i >= len(mylist):
  122. return default
  123. else :
  124. return mylist[i]
  125. def compute_item_matrix(items, empty=None, *args, **kwargs) :
  126. """Returns a nested list, and info to columnize items
  127. Parameters
  128. ----------
  129. items
  130. list of strings to columize
  131. empty : (default None)
  132. default value to fill list if needed
  133. separator_size : int (default=2)
  134. How much caracters will be used as a separation between each columns.
  135. displaywidth : int (default=80)
  136. The width of the area onto wich the columns should enter
  137. Returns
  138. -------
  139. strings_matrix
  140. nested list of string, the outer most list contains as many list as
  141. rows, the innermost lists have each as many element as colums. If the
  142. total number of elements in `items` does not equal the product of
  143. rows*columns, the last element of some lists are filled with `None`.
  144. dict_info
  145. some info to make columnize easier:
  146. columns_numbers
  147. number of columns
  148. rows_numbers
  149. number of rows
  150. columns_width
  151. list of with of each columns
  152. optimal_separator_width
  153. best separator width between columns
  154. Examples
  155. --------
  156. ::
  157. In [1]: l = ['aaa','b','cc','d','eeeee','f','g','h','i','j','k','l']
  158. ...: compute_item_matrix(l,displaywidth=12)
  159. Out[1]:
  160. ([['aaa', 'f', 'k'],
  161. ['b', 'g', 'l'],
  162. ['cc', 'h', None],
  163. ['d', 'i', None],
  164. ['eeeee', 'j', None]],
  165. {'columns_numbers': 3,
  166. 'columns_width': [5, 1, 1],
  167. 'optimal_separator_width': 2,
  168. 'rows_numbers': 5})
  169. """
  170. info = _find_optimal(list(map(len, items)), *args, **kwargs)
  171. nrow, ncol = info['rows_numbers'], info['columns_numbers']
  172. return ([[ _get_or_default(items, c*nrow+i, default=empty) for c in range(ncol) ] for i in range(nrow) ], info)
  173. def columnize(items, separator=' ', displaywidth=80):
  174. """ Transform a list of strings into a single string with columns.
  175. Parameters
  176. ----------
  177. items : sequence of strings
  178. The strings to process.
  179. separator : str, optional [default is two spaces]
  180. The string that separates columns.
  181. displaywidth : int, optional [default is 80]
  182. Width of the display in number of characters.
  183. Returns
  184. -------
  185. The formatted string.
  186. """
  187. if not items :
  188. return '\n'
  189. matrix, info = compute_item_matrix(items, separator_size=len(separator), displaywidth=displaywidth)
  190. fmatrix = [filter(None, x) for x in matrix]
  191. sjoin = lambda x : separator.join([ y.ljust(w, ' ') for y, w in zip(x, info['columns_width'])])
  192. return '\n'.join(map(sjoin, fmatrix))+'\n'