imaputil.py 13 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469
  1. # IMAP utility module
  2. # Copyright (C) 2002-2015 John Goerzen & contributors
  3. #
  4. # This program is free software; you can redistribute it and/or modify
  5. # it under the terms of the GNU General Public License as published by
  6. # the Free Software Foundation; either version 2 of the License, or
  7. # (at your option) any later version.
  8. #
  9. # This program is distributed in the hope that it will be useful,
  10. # but WITHOUT ANY WARRANTY; without even the implied warranty of
  11. # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  12. # GNU General Public License for more details.
  13. #
  14. # You should have received a copy of the GNU General Public License
  15. # along with this program; if not, write to the Free Software
  16. # Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
  17. import re
  18. import binascii
  19. import codecs
  20. from typing import Tuple
  21. from offlineimap.ui import getglobalui
  22. # Globals
  23. # Message headers that use space as the separator (for label storage)
  24. SPACE_SEPARATED_LABEL_HEADERS = ('X-Label', 'Keywords')
  25. # Find the modified UTF-7 shifts of an international mailbox name.
  26. MUTF7_SHIFT_RE = re.compile(r'&[^-]*-|\+')
  27. def __debug(*args):
  28. msg = []
  29. for arg in args:
  30. msg.append(str(arg))
  31. getglobalui().debug('imap', " ".join(msg))
  32. def dequote(s):
  33. """Takes string which may or may not be quoted and unquotes it.
  34. It only considers double quotes. This function does NOT consider
  35. parenthised lists to be quoted."""
  36. if s and s.startswith('"') and s.endswith('"'):
  37. s = s[1:-1] # Strip off the surrounding quotes.
  38. s = s.replace('\\"', '"')
  39. s = s.replace('\\\\', '\\')
  40. return s
  41. def quote(s):
  42. """Takes an unquoted string and quotes it.
  43. It only adds double quotes. This function does NOT consider
  44. parenthised lists to be quoted."""
  45. s = s.replace('\\', '\\\\')
  46. s = s.replace('"', '\\"')
  47. return '"%s"' % s
  48. def flagsplit(s):
  49. """Converts a string of IMAP flags to a list
  50. :returns: E.g. '(\\Draft \\Deleted)' returns ['\\Draft','\\Deleted'].
  51. (FLAGS (\\Seen Old) UID 4807) returns
  52. ['FLAGS,'(\\Seen Old)','UID', '4807']
  53. """
  54. if s[0] != '(' or s[-1] != ')':
  55. raise ValueError("Passed s '%s' is not a flag list" % s)
  56. return imapsplit(s[1:-1])
  57. def __options2hash(l_list):
  58. """convert l_list [1,2,3,4,5,6] to {1:2, 3:4, 5:6}"""
  59. # effectively this does dict(zip(l[::2],l[1::2])), however
  60. # measurements seemed to have indicated that the manual variant is
  61. # faster for mosly small lists.
  62. retval = {}
  63. counter = 0
  64. while counter < len(l_list):
  65. retval[l_list[counter]] = l_list[counter + 1]
  66. counter += 2
  67. __debug("__options2hash returning:", retval)
  68. return retval
  69. def flags2hash(flags):
  70. """Converts IMAP response string from eg IMAP4.fetch() to a hash.
  71. E.g. '(FLAGS (\\Seen Old) UID 4807)' leads to
  72. {'FLAGS': '(\\Seen Old)', 'UID': '4807'}"""
  73. return __options2hash(flagsplit(flags))
  74. def imapsplit(imapstring):
  75. """Takes a string from an IMAP conversation and returns a list containing
  76. its components. One example string is:
  77. (\\HasNoChildren) "." "INBOX.Sent"
  78. The result from parsing this will be:
  79. ['(\\HasNoChildren)', '"."', '"INBOX.Sent"']"""
  80. if isinstance(imapstring, tuple) and imapstring[0].decode("utf-8").rfind("{")>-1:
  81. imapstring = (imapstring[0].decode("utf-8")[0:imapstring[0].decode("utf-8").rindex("{")] + quote(imapstring[1].decode("utf-8"))).encode("utf-8")
  82. if not isinstance(imapstring, str):
  83. imapstring = imapstring.decode('utf-8')
  84. workstr = imapstring.strip()
  85. retval = []
  86. while len(workstr):
  87. # handle parenthized fragments (...()...)
  88. if workstr[0] == '(':
  89. rparenc = 1 # count of right parenthesis to match
  90. rpareni = 1 # position to examine
  91. while rparenc: # Find the end of the group.
  92. if workstr[rpareni] == ')': # end of a group
  93. rparenc -= 1
  94. elif workstr[rpareni] == '(': # start of a group
  95. rparenc += 1
  96. rpareni += 1 # Move to next character.
  97. parenlist = workstr[0:rpareni]
  98. workstr = workstr[rpareni:].lstrip()
  99. retval.append(parenlist)
  100. elif workstr[0] == '"':
  101. # quoted fragments '"...\"..."'
  102. (quoted, rest) = __split_quoted(workstr)
  103. retval.append(quoted)
  104. workstr = rest
  105. else:
  106. splits = str.split(workstr, maxsplit=1)
  107. splitslen = len(splits)
  108. # The unquoted word is splits[0]; the remainder is splits[1]
  109. if splitslen == 2:
  110. # There's an unquoted word, and more string follows.
  111. retval.append(splits[0])
  112. workstr = splits[1] # split will have already lstripped it
  113. continue
  114. elif splitslen == 1:
  115. # We got a last unquoted word, but nothing else
  116. retval.append(splits[0])
  117. # Nothing remains. workstr would be ''
  118. break
  119. elif splitslen == 0:
  120. # There was not even an unquoted word.
  121. break
  122. return retval
  123. flagmap = [('\\Seen', 'S'),
  124. ('\\Answered', 'R'),
  125. ('\\Flagged', 'F'),
  126. ('\\Deleted', 'T'),
  127. ('\\Draft', 'D')]
  128. def flagsimap2maildir(flagstring):
  129. """Convert string '(\\Draft \\Deleted)' into a flags set(DR)."""
  130. retval = set()
  131. imapflaglist = flagstring[1:-1].split()
  132. for imapflag, maildirflag in flagmap:
  133. if imapflag in imapflaglist:
  134. retval.add(maildirflag)
  135. return retval
  136. def flagsimap2keywords(flagstring):
  137. """Convert string '(\\Draft \\Deleted somekeyword otherkeyword)' into a
  138. keyword set (somekeyword otherkeyword)."""
  139. imapflagset = set(flagstring[1:-1].split())
  140. serverflagset = set([flag for (flag, c) in flagmap])
  141. return imapflagset - serverflagset
  142. def flagsmaildir2imap(maildirflaglist):
  143. """Convert set of flags ([DR]) into a string '(\\Deleted \\Draft)'."""
  144. retval = []
  145. for imapflag, maildirflag in flagmap:
  146. if maildirflag in maildirflaglist:
  147. retval.append(imapflag)
  148. return '(' + ' '.join(sorted(retval)) + ')'
  149. def uid_sequence(uidlist):
  150. """Collapse UID lists into shorter sequence sets
  151. [1,2,3,4,5,10,12,13] will return "1:5,10,12:13". This function sorts
  152. the list, and only collapses if subsequent entries form a range.
  153. :returns: The collapsed UID list as string."""
  154. def getrange(start, end):
  155. if start == end:
  156. return str(start)
  157. return "%s:%s" % (start, end)
  158. if not len(uidlist):
  159. return '' # Empty list, return
  160. start, end = None, None
  161. retval = []
  162. # Force items to be longs and sort them
  163. sorted_uids = sorted(map(int, uidlist))
  164. for item in iter(sorted_uids):
  165. item = int(item)
  166. if start is None: # First item
  167. start, end = item, item
  168. elif item == end + 1: # Next item in a range
  169. end = item
  170. else: # Starting a new range
  171. retval.append(getrange(start, end))
  172. start, end = item, item
  173. retval.append(getrange(start, end)) # Add final range/item
  174. return ",".join(retval)
  175. def __split_quoted(s):
  176. """Looks for the ending quote character in the string that starts
  177. with quote character, splitting out quoted component and the
  178. rest of the string (without possible space between these two
  179. parts.
  180. First character of the string is taken to be quote character.
  181. Examples:
  182. - "this is \" a test" (\\None) => ("this is \" a test", (\\None))
  183. - "\\" => ("\\", )
  184. """
  185. if len(s) == 0:
  186. return '', ''
  187. q = quoted = s[0]
  188. rest = s[1:]
  189. while True:
  190. next_q = rest.find(q)
  191. if next_q == -1:
  192. raise ValueError("can't find ending quote '%s' in '%s'" % (q, s))
  193. # If quote is preceeded by even number of backslashes,
  194. # then it is the ending quote, otherwise the quote
  195. # character is escaped by backslash, so we should
  196. # continue our search.
  197. is_escaped = False
  198. i = next_q - 1
  199. while i >= 0 and rest[i] == '\\':
  200. i -= 1
  201. is_escaped = not is_escaped
  202. quoted += rest[0:next_q + 1]
  203. rest = rest[next_q + 1:]
  204. if not is_escaped:
  205. return quoted, rest.lstrip()
  206. def format_labels_string(header, labels):
  207. """Formats labels for embedding into a message,
  208. with format according to header name.
  209. Headers from SPACE_SEPARATED_LABEL_HEADERS keep space-separated list
  210. of labels, the rest uses comma (',') as the separator.
  211. Also see parse_labels_string() and modify it accordingly
  212. if logics here gets changed."""
  213. if header in SPACE_SEPARATED_LABEL_HEADERS:
  214. sep = ' '
  215. else:
  216. sep = ','
  217. return sep.join(labels)
  218. def parse_labels_string(header, labels_str):
  219. """Parses a string into a set of labels, with a format according to
  220. the name of the header.
  221. See __format_labels_string() for explanation on header handling
  222. and keep these two functions synced with each other.
  223. TODO: add test to ensure that
  224. - format_labels_string * parse_labels_string is unity
  225. and
  226. - parse_labels_string * format_labels_string is unity
  227. """
  228. if header in SPACE_SEPARATED_LABEL_HEADERS:
  229. sep = ' '
  230. else:
  231. sep = ','
  232. labels = labels_str.strip().split(sep)
  233. return set([l.strip() for l in labels if l.strip()])
  234. def labels_from_header(header_name, header_value):
  235. """Helper that builds label set from the corresponding header value.
  236. Arguments:
  237. - header_name: name of the header that keeps labels;
  238. - header_value: value of the said header, can be None
  239. Returns: set of labels parsed from the header (or empty set).
  240. """
  241. if header_value:
  242. labels = parse_labels_string(header_name, header_value)
  243. else:
  244. labels = set()
  245. return labels
  246. def decode_mailbox_name(name):
  247. """Decodes a modified UTF-7 mailbox name.
  248. If the string cannot be decoded, it is returned unmodified.
  249. See RFC 3501, sec. 5.1.3.
  250. Arguments:
  251. - name: string, possibly encoded with modified UTF-7
  252. Returns: decoded UTF-8 string.
  253. """
  254. def demodify(m):
  255. s = m.group()
  256. if s == '+':
  257. return '+-'
  258. return '+' + s[1:-1].replace(',', '/') + '-'
  259. ret = MUTF7_SHIFT_RE.sub(demodify, name)
  260. try:
  261. return ret.decode('utf-7').encode('utf-8')
  262. except (UnicodeDecodeError, UnicodeEncodeError):
  263. return name
  264. # Functionality to convert folder names encoded in IMAP_utf_7 to utf_8.
  265. # This is achieved by defining 'imap4_utf_7' as a proper encoding scheme.
  266. # Public API, to be used in repository definitions
  267. def IMAP_utf8(foldername):
  268. """Convert IMAP4_utf_7 encoded string to utf-8"""
  269. return codecs.decode(
  270. foldername.encode(),
  271. 'imap4-utf-7'
  272. ).encode('utf-8').decode()
  273. def utf8_IMAP(foldername):
  274. """Convert utf-8 encoded string to IMAP4_utf_7"""
  275. return codecs.decode(
  276. foldername.encode(),
  277. 'utf-8'
  278. ).encode('imap4-utf-7').decode()
  279. # Codec definition
  280. def modified_base64(s):
  281. s = s.encode('utf-16be')
  282. return binascii.b2a_base64(s).rstrip(b'\n=').replace(b'/', b',')
  283. def doB64(_in, r):
  284. if _in:
  285. r.append(b'&%s-' % modified_base64(''.join(_in)))
  286. del _in[:]
  287. def utf7m_encode(text: str) -> Tuple[bytes, int]:
  288. r = []
  289. _in = []
  290. for c in text:
  291. if 0x20 <= ord(c) <= 0x7e:
  292. doB64(_in, r)
  293. r.append(b'&-' if c == '&' else c.encode())
  294. else:
  295. _in.append(c)
  296. doB64(_in, r)
  297. return b''.join(r), len(text)
  298. # decoding
  299. def modified_unbase64(s):
  300. b = binascii.a2b_base64(s.replace(',', '/') + '===')
  301. return str(b, 'utf-16be')
  302. def utf7m_decode(binary: bytes) -> Tuple[str, int]:
  303. r = []
  304. decode = []
  305. for c in binary:
  306. if c == ord('&') and not decode:
  307. decode.append('&')
  308. elif c == ord('-') and decode:
  309. if len(decode) == 1:
  310. r.append('&')
  311. else:
  312. r.append(modified_unbase64(''.join(decode[1:])))
  313. decode = []
  314. elif decode:
  315. decode.append(chr(c))
  316. else:
  317. r.append(chr(c))
  318. if decode:
  319. r.append(modified_unbase64(''.join(decode[1:])))
  320. return ''.join(r), len(binary)
  321. class StreamReader(codecs.StreamReader):
  322. def decode(self, s, errors='strict'):
  323. return utf7m_decode(s)
  324. class StreamWriter(codecs.StreamWriter):
  325. def decode(self, s, errors='strict'):
  326. return utf7m_encode(s)
  327. def utf7m_search_function(name):
  328. return codecs.CodecInfo(
  329. utf7m_encode,
  330. utf7m_decode,
  331. StreamReader,
  332. StreamWriter,
  333. name='imap4-utf-7'
  334. )
  335. codecs.register(utf7m_search_function)
  336. def foldername_to_imapname(folder_name):
  337. """
  338. This function returns the folder_name ready to send to the
  339. IMAP server. It tests if the folder_name has special characters
  340. Then, quote it.
  341. Args:
  342. folder_name: Folder's name
  343. Returns: The folder_name quoted if needed
  344. """
  345. # If name includes some of these characters, quote it
  346. atom_specials = [' ', '/', '(', ')', '{', '}', '"']
  347. if any((c in atom_specials) for c in folder_name):
  348. folder_name = quote(folder_name)
  349. return folder_name