123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244 |
- # encoding: utf-8
- """
- Utilities for working with strings and text.
- Inheritance diagram:
- .. inheritance-diagram:: IPython.utils.text
- :parts: 3
- """
- import os
- import re
- import sys
- import textwrap
- from string import Formatter
- # datetime.strftime date format for ipython
- if sys.platform == 'win32':
- date_format = "%B %d, %Y"
- else:
- date_format = "%B %-d, %Y"
- def indent(instr,nspaces=4, ntabs=0, flatten=False):
- """Indent a string a given number of spaces or tabstops.
- indent(str,nspaces=4,ntabs=0) -> indent str by ntabs+nspaces.
- Parameters
- ----------
- instr : basestring
- The string to be indented.
- nspaces : int (default: 4)
- The number of spaces to be indented.
- ntabs : int (default: 0)
- The number of tabs to be indented.
- flatten : bool (default: False)
- Whether to scrub existing indentation. If True, all lines will be
- aligned to the same indentation. If False, existing indentation will
- be strictly increased.
- Returns
- -------
- str|unicode : string indented by ntabs and nspaces.
- """
- if instr is None:
- return
- ind = '\t'*ntabs+' '*nspaces
- if flatten:
- pat = re.compile(r'^\s*', re.MULTILINE)
- else:
- pat = re.compile(r'^', re.MULTILINE)
- outstr = re.sub(pat, ind, instr)
- if outstr.endswith(os.linesep+ind):
- return outstr[:-len(ind)]
- else:
- return outstr
- def dedent(text):
- """Equivalent of textwrap.dedent that ignores unindented first line.
- This means it will still dedent strings like:
- '''foo
- is a bar
- '''
- For use in wrap_paragraphs.
- """
- if text.startswith('\n'):
- # text starts with blank line, don't ignore the first line
- return textwrap.dedent(text)
- # split first line
- splits = text.split('\n',1)
- if len(splits) == 1:
- # only one line
- return textwrap.dedent(text)
- first, rest = splits
- # dedent everything but the first line
- rest = textwrap.dedent(rest)
- return '\n'.join([first, rest])
- def wrap_paragraphs(text, ncols=80):
- """Wrap multiple paragraphs to fit a specified width.
- This is equivalent to textwrap.wrap, but with support for multiple
- paragraphs, as separated by empty lines.
- Returns
- -------
- list of complete paragraphs, wrapped to fill `ncols` columns.
- """
- paragraph_re = re.compile(r'\n(\s*\n)+', re.MULTILINE)
- text = dedent(text).strip()
- paragraphs = paragraph_re.split(text)[::2] # every other entry is space
- out_ps = []
- indent_re = re.compile(r'\n\s+', re.MULTILINE)
- for p in paragraphs:
- # presume indentation that survives dedent is meaningful formatting,
- # so don't fill unless text is flush.
- if indent_re.search(p) is None:
- # wrap paragraph
- p = textwrap.fill(p, ncols)
- out_ps.append(p)
- return out_ps
- def strip_ansi(source):
- """
- Remove ansi escape codes from text.
-
- Parameters
- ----------
- source : str
- Source to remove the ansi from
- """
- return re.sub(r'\033\[(\d|;)+?m', '', source)
- #-----------------------------------------------------------------------------
- # Utils to columnize a list of string
- #-----------------------------------------------------------------------------
- def _chunks(l, n):
- """Yield successive n-sized chunks from l."""
- for i in range(0, len(l), n):
- yield l[i:i+n]
- def _find_optimal(rlist , separator_size=2 , displaywidth=80):
- """Calculate optimal info to columnize a list of string"""
- for nrow in range(1, len(rlist)+1) :
- chk = list(map(max,_chunks(rlist, nrow)))
- sumlength = sum(chk)
- ncols = len(chk)
- if sumlength+separator_size*(ncols-1) <= displaywidth :
- break;
- return {'columns_numbers' : ncols,
- 'optimal_separator_width':(displaywidth - sumlength)/(ncols-1) if (ncols -1) else 0,
- 'rows_numbers' : nrow,
- 'columns_width' : chk
- }
- def _get_or_default(mylist, i, default=None):
- """return list item number, or default if don't exist"""
- if i >= len(mylist):
- return default
- else :
- return mylist[i]
- def compute_item_matrix(items, empty=None, *args, **kwargs) :
- """Returns a nested list, and info to columnize items
- Parameters
- ----------
- items
- list of strings to columize
- empty : (default None)
- default value to fill list if needed
- separator_size : int (default=2)
- How much caracters will be used as a separation between each columns.
- displaywidth : int (default=80)
- The width of the area onto wich the columns should enter
- Returns
- -------
- strings_matrix
- nested list of string, the outer most list contains as many list as
- rows, the innermost lists have each as many element as colums. If the
- total number of elements in `items` does not equal the product of
- rows*columns, the last element of some lists are filled with `None`.
- dict_info
- some info to make columnize easier:
- columns_numbers
- number of columns
- rows_numbers
- number of rows
- columns_width
- list of with of each columns
- optimal_separator_width
- best separator width between columns
- Examples
- --------
- ::
- In [1]: l = ['aaa','b','cc','d','eeeee','f','g','h','i','j','k','l']
- ...: compute_item_matrix(l,displaywidth=12)
- Out[1]:
- ([['aaa', 'f', 'k'],
- ['b', 'g', 'l'],
- ['cc', 'h', None],
- ['d', 'i', None],
- ['eeeee', 'j', None]],
- {'columns_numbers': 3,
- 'columns_width': [5, 1, 1],
- 'optimal_separator_width': 2,
- 'rows_numbers': 5})
- """
- info = _find_optimal(list(map(len, items)), *args, **kwargs)
- nrow, ncol = info['rows_numbers'], info['columns_numbers']
- return ([[ _get_or_default(items, c*nrow+i, default=empty) for c in range(ncol) ] for i in range(nrow) ], info)
- def columnize(items, separator=' ', displaywidth=80):
- """ Transform a list of strings into a single string with columns.
- Parameters
- ----------
- items : sequence of strings
- The strings to process.
- separator : str, optional [default is two spaces]
- The string that separates columns.
- displaywidth : int, optional [default is 80]
- Width of the display in number of characters.
- Returns
- -------
- The formatted string.
- """
- if not items :
- return '\n'
- matrix, info = compute_item_matrix(items, separator_size=len(separator), displaywidth=displaywidth)
- fmatrix = [filter(None, x) for x in matrix]
- sjoin = lambda x : separator.join([ y.ljust(w, ' ') for y, w in zip(x, info['columns_width'])])
- return '\n'.join(map(sjoin, fmatrix))+'\n'
|