Maildir.py 24 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564
  1. # Maildir folder support
  2. # Copyright (C) 2002-2016 John Goerzen & contributors.
  3. #
  4. # This program is free software; you can redistribute it and/or modify
  5. # it under the terms of the GNU General Public License as published by
  6. # the Free Software Foundation; either version 2 of the License, or
  7. # (at your option) any later version.
  8. #
  9. # This program is distributed in the hope that it will be useful,
  10. # but WITHOUT ANY WARRANTY; without even the implied warranty of
  11. # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  12. # GNU General Public License for more details.
  13. #
  14. # You should have received a copy of the GNU General Public License
  15. # along with this program; if not, write to the Free Software
  16. # Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
  17. import errno
  18. import socket
  19. import time
  20. import re
  21. import os
  22. from sys import exc_info
  23. from threading import Lock
  24. from hashlib import md5
  25. from offlineimap import OfflineImapError
  26. from .Base import BaseFolder
  27. from email.errors import NoBoundaryInMultipartDefect
  28. # Find the UID in a message filename
  29. re_uidmatch = re.compile(r',U=(\d+)')
  30. # Find a numeric timestamp in a string (filename prefix)
  31. re_timestampmatch = re.compile(r'(\d+)')
  32. timehash = {}
  33. timelock = Lock()
  34. def _gettimeseq(date=None):
  35. global timehash, timelock
  36. timelock.acquire()
  37. try:
  38. if date is None:
  39. date = int(time.time())
  40. if date in timehash:
  41. timehash[date] += 1
  42. else:
  43. timehash[date] = 0
  44. return date, timehash[date]
  45. finally:
  46. timelock.release()
  47. class MaildirFolder(BaseFolder):
  48. def __init__(self, root, name, sep, repository):
  49. self.sep = sep # needs to be set before super().__init__
  50. super(MaildirFolder, self).__init__(name, repository)
  51. self.root = root
  52. # check if we should use a different infosep to support Win file systems
  53. self.wincompatible = self.config.getdefaultboolean(
  54. "Account " + self.accountname, "maildir-windows-compatible", False)
  55. self.infosep = '!' if self.wincompatible else ':'
  56. """infosep is the separator between maildir name and flag appendix"""
  57. self.re_flagmatch = re.compile(r'%s2,(\w*)' % self.infosep)
  58. # self.ui is set in BaseFolder.init()
  59. # Everything up to the first comma or colon (or ! if Windows):
  60. self.re_prefixmatch = re.compile('([^' + self.infosep + ',]*)')
  61. # folder's md, so we can match with recorded file md5 for validity.
  62. self._foldermd5 = md5(self.getvisiblename().encode('utf-8')).hexdigest()
  63. # Cache the full folder path, as we use getfullname() very often.
  64. self._fullname = os.path.join(self.getroot(), self.getname())
  65. # Modification time from 'Date' header.
  66. utime_from_header_global = self.config.getdefaultboolean(
  67. "general", "utime_from_header", False)
  68. self._utime_from_header = self.config.getdefaultboolean(
  69. self.repoconfname, "utime_from_header", utime_from_header_global)
  70. # What do we substitute pathname separator in names (if any)
  71. self.sep_subst = '-'
  72. if os.path.sep == self.sep_subst:
  73. self.sep_subst = '_'
  74. # Interface from BaseFolder
  75. def getfullname(self):
  76. """Return the absolute file path to the Maildir folder (sans cur|new)"""
  77. return self._fullname
  78. # Interface from BaseFolder
  79. def get_uidvalidity(self):
  80. """Retrieve the current connections UIDVALIDITY value
  81. Maildirs have no notion of uidvalidity, so we just return a magic
  82. token."""
  83. return 42
  84. def _iswithintime(self, messagename, date):
  85. """Check to see if the given message is newer than date (a
  86. time_struct) according to the maildir name which should begin
  87. with a timestamp."""
  88. timestampmatch = re_timestampmatch.search(messagename)
  89. if not timestampmatch:
  90. return True
  91. timestampstr = timestampmatch.group()
  92. timestamplong = int(timestampstr)
  93. if timestamplong < time.mktime(date):
  94. return False
  95. else:
  96. return True
  97. def _parse_filename(self, filename):
  98. """Returns a messages file name components
  99. Receives the file name (without path) of a msg. Usual format is
  100. '<%d_%d.%d.%s>,U=<%d>,FMD5=<%s>:2,<FLAGS>' (pointy brackets
  101. denoting the various components).
  102. If FMD5 does not correspond with the current folder MD5, we will
  103. return None for the UID & FMD5 (as it is not valid in this
  104. folder). If UID or FMD5 can not be detected, we return `None`
  105. for the respective element. If flags are empty or cannot be
  106. detected, we return an empty flags list.
  107. :returns: (prefix, UID, FMD5, flags). UID is a numeric "long"
  108. type. flags is a set() of Maildir flags.
  109. """
  110. prefix, uid, fmd5, flags = None, None, None, set()
  111. prefixmatch = self.re_prefixmatch.match(filename)
  112. if prefixmatch:
  113. prefix = prefixmatch.group(1)
  114. folderstr = ',FMD5=%s' % self._foldermd5
  115. foldermatch = folderstr in filename
  116. # If there was no folder MD5 specified, or if it mismatches,
  117. # assume it is a foreign (new) message and ret: uid, fmd5 = None, None
  118. # XXX: This is wrong behaviour: if FMD5 is missing or mismatches, assume
  119. # the mail is new and **fix UID to None** to avoid any conflict.
  120. # XXX: If UID is missing, I have no idea what FMD5 can do. Should be
  121. # fixed to None in this case, too.
  122. if foldermatch:
  123. uidmatch = re_uidmatch.search(filename)
  124. if uidmatch:
  125. uid = int(uidmatch.group(1))
  126. flagmatch = self.re_flagmatch.search(filename)
  127. if flagmatch:
  128. flags = set((c for c in flagmatch.group(1)))
  129. return prefix, uid, fmd5, flags
  130. def _scanfolder(self, min_date=None, min_uid=None):
  131. """Cache the message list from a Maildir.
  132. If min_date is set, this finds the min UID of all messages newer than
  133. min_date and uses it as the real cutoff for considering messages.
  134. This handles the edge cases where the date is much earlier than messages
  135. with similar UID's (e.g. the UID was reassigned much later).
  136. Maildir flags are:
  137. D (draft) F (flagged) R (replied) S (seen) T (trashed),
  138. plus lower-case letters for custom flags.
  139. :returns: dict that can be used as self.messagelist.
  140. """
  141. maxsize = self.getmaxsize()
  142. retval = {}
  143. files = []
  144. nouidcounter = -1 # Messages without UIDs get negative UIDs.
  145. for dirannex in ['new', 'cur']:
  146. fulldirname = os.path.join(self.getfullname(), dirannex)
  147. files.extend((dirannex, filename) for
  148. filename in os.listdir(fulldirname))
  149. date_excludees = {}
  150. for dirannex, filename in files:
  151. if filename.startswith('.'):
  152. continue # Ignore dot files.
  153. # We store just dirannex and filename, ie 'cur/123...'
  154. filepath = os.path.join(dirannex, filename)
  155. # Check maxsize if this message should be considered.
  156. if maxsize and (os.path.getsize(
  157. os.path.join(self.getfullname(), filepath)) > maxsize):
  158. continue
  159. prefix, uid, fmd5, flags = self._parse_filename(filename)
  160. if uid is None: # Assign negative uid to upload it.
  161. uid = nouidcounter
  162. nouidcounter -= 1
  163. else: # It comes from our folder.
  164. uidmatch = re_uidmatch.search(filename)
  165. if not uidmatch:
  166. uid = nouidcounter
  167. nouidcounter -= 1
  168. else:
  169. uid = int(uidmatch.group(1))
  170. if min_uid is not None and uid > 0 and uid < min_uid:
  171. continue
  172. if min_date is not None and not self._iswithintime(filename, min_date):
  173. # Keep track of messages outside of the time limit, because they
  174. # still might have UID > min(UIDs of within-min_date). We hit
  175. # this case for maxage if any message had a known/valid datetime
  176. # and was re-uploaded because the UID in the filename got lost
  177. # (e.g. local copy/move). On next sync, it was assigned a new
  178. # UID from the server and will be included in the SEARCH
  179. # condition. So, we must re-include them later in this method
  180. # in order to avoid inconsistent lists of messages.
  181. date_excludees[uid] = self.msglist_item_initializer(uid)
  182. date_excludees[uid]['flags'] = flags
  183. date_excludees[uid]['filename'] = filepath
  184. else:
  185. # 'filename' is 'dirannex/filename', e.g. cur/123,U=1,FMD5=1:2,S
  186. retval[uid] = self.msglist_item_initializer(uid)
  187. retval[uid]['flags'] = flags
  188. retval[uid]['filename'] = filepath
  189. if min_date is not None:
  190. # Re-include messages with high enough uid's.
  191. positive_uids = [uid for uid in retval if uid > 0]
  192. if positive_uids:
  193. min_uid = min(positive_uids)
  194. for uid in list(date_excludees.keys()):
  195. if uid > min_uid:
  196. # This message was originally excluded because of
  197. # its date. It is re-included now because we want all
  198. # messages with UID > min_uid.
  199. retval[uid] = date_excludees[uid]
  200. return retval
  201. # Interface from BaseFolder
  202. def quickchanged(self, statusfolder):
  203. """Returns True if the Maildir has changed
  204. Assumes cachemessagelist() has already been called """
  205. # Folder has different uids than statusfolder => TRUE.
  206. if sorted(self.getmessageuidlist()) != \
  207. sorted(statusfolder.getmessageuidlist()):
  208. return True
  209. # Also check for flag changes, it's quick on a Maildir.
  210. for (uid, message) in list(self.getmessagelist().items()):
  211. if message['flags'] != statusfolder.getmessageflags(uid):
  212. return True
  213. return False # Nope, nothing changed.
  214. # Interface from BaseFolder
  215. def msglist_item_initializer(self, uid):
  216. return {'flags': set(), 'filename': '/no-dir/no-such-file/'}
  217. # Interface from BaseFolder
  218. def cachemessagelist(self, min_date=None, min_uid=None):
  219. if self.ismessagelistempty():
  220. self.ui.loadmessagelist(self.repository, self)
  221. self.messagelist = self._scanfolder(min_date=min_date,
  222. min_uid=min_uid)
  223. self.ui.messagelistloaded(self.repository, self, self.getmessagecount())
  224. # Interface from BaseFolder
  225. def getmessage(self, uid):
  226. """Returns an email message object."""
  227. filename = self.messagelist[uid]['filename']
  228. filepath = os.path.join(self.getfullname(), filename)
  229. fd = open(filepath, 'rb')
  230. _fd_bytes = fd.read()
  231. fd.close()
  232. try: retval = self.parser['8bit'].parsebytes(_fd_bytes)
  233. except:
  234. err = exc_info()
  235. msg_id = self._extract_message_id(_fd_bytes)[0].decode('ascii',errors='surrogateescape')
  236. raise OfflineImapError(
  237. "Exception parsing message with ID ({}) from file ({}).\n {}: {}".format(
  238. msg_id, filename, err[0].__name__, err[1]),
  239. OfflineImapError.ERROR.MESSAGE)
  240. if len(retval.defects) > 0:
  241. # We don't automatically apply fixes as to attempt to preserve the original message
  242. self.ui.warn("UID {} has defects: {}".format(uid, retval.defects))
  243. if any(isinstance(defect, NoBoundaryInMultipartDefect) for defect in retval.defects):
  244. # (Hopefully) Rare defect from a broken client where multipart boundary is
  245. # not properly quoted. Attempt to solve by fixing the boundary and parsing
  246. self.ui.warn(" ... applying multipart boundary fix.")
  247. retval = self.parser['8bit'].parsebytes(self._quote_boundary_fix(_fd_bytes))
  248. try:
  249. # See if the defects after fixes are preventing us from obtaining bytes
  250. _ = retval.as_bytes(policy=self.policy['8bit'])
  251. except UnicodeEncodeError as err:
  252. # Unknown issue which is causing failure of as_bytes()
  253. msg_id = self.getmessageheader(retval, "message-id")
  254. if msg_id is None:
  255. msg_id = '<unknown-message-id>'
  256. raise OfflineImapError(
  257. "UID {} ({}) has defects preventing it from being processed!\n {}: {}".format(
  258. uid, msg_id, type(err).__name__, err),
  259. OfflineImapError.ERROR.MESSAGE)
  260. return retval
  261. # Interface from BaseFolder
  262. def getmessagetime(self, uid):
  263. filename = self.messagelist[uid]['filename']
  264. filepath = os.path.join(self.getfullname(), filename)
  265. return os.path.getmtime(filepath)
  266. def new_message_filename(self, uid, flags=None, date=None):
  267. """Creates a new unique Maildir filename
  268. :param uid: The UID`None`, or a set of maildir flags
  269. :param flags: A set of maildir flags
  270. :param flags: (optional) Date
  271. :returns: String containing unique message filename"""
  272. if flags is None:
  273. flags = set()
  274. timeval, timeseq = _gettimeseq(date)
  275. uniq_name = '%d_%d.%d.%s,U=%d,FMD5=%s%s2,%s' % \
  276. (timeval, timeseq, os.getpid(), socket.gethostname(),
  277. uid, self._foldermd5, self.infosep, ''.join(sorted(flags)))
  278. return uniq_name.replace(os.path.sep, self.sep_subst)
  279. def save_to_tmp_file(self, filename, msg, policy=None):
  280. """Saves given message to the named temporary file in the
  281. 'tmp' subdirectory of $CWD.
  282. Arguments:
  283. - filename: name of the temporary file;
  284. - msg: Email message object
  285. Returns: relative path to the temporary file
  286. that was created."""
  287. if policy is None:
  288. output_policy = self.policy['8bit']
  289. else:
  290. output_policy = policy
  291. tmpname = os.path.join('tmp', filename)
  292. # Open file and write it out.
  293. # XXX: why do we need to loop 7 times?
  294. tries = 7
  295. while tries:
  296. tries = tries - 1
  297. try:
  298. fd = os.open(os.path.join(self.getfullname(), tmpname),
  299. os.O_EXCL | os.O_CREAT | os.O_WRONLY, 0o666)
  300. break
  301. except OSError as e:
  302. if not hasattr(e, 'EEXIST'):
  303. raise
  304. if e.errno == errno.EEXIST:
  305. if tries:
  306. time.sleep(0.23)
  307. continue
  308. severity = OfflineImapError.ERROR.MESSAGE
  309. raise OfflineImapError(
  310. "Unique filename %s already exists." %
  311. filename, severity,
  312. exc_info()[2])
  313. else:
  314. raise
  315. fd = os.fdopen(fd, 'wb')
  316. fd.write(msg.as_bytes(policy=output_policy))
  317. # Make sure the data hits the disk.
  318. fd.flush()
  319. if self.dofsync():
  320. os.fsync(fd)
  321. fd.close()
  322. return tmpname
  323. # Interface from BaseFolder
  324. def savemessage(self, uid, msg, flags, rtime):
  325. """Writes a new message, with the specified uid.
  326. See folder/Base for detail. Note that savemessage() does not
  327. check against dryrun settings, so you need to ensure that
  328. savemessage is never called in a dryrun mode."""
  329. # This function only ever saves to tmp/,
  330. # but it calls savemessageflags() to actually save to cur/ or new/.
  331. self.ui.savemessage('maildir', uid, flags, self)
  332. if uid < 0:
  333. # We cannot assign a new uid.
  334. return uid
  335. if uid in self.messagelist:
  336. # We already have it, just update flags.
  337. self.savemessageflags(uid, flags)
  338. return uid
  339. # Use the mail timestamp given by either Date or Delivery-date mail
  340. # headers.
  341. message_timestamp = None
  342. if self._filename_use_mail_timestamp is not False:
  343. try:
  344. message_timestamp = self.get_message_date(msg, 'Date')
  345. if message_timestamp is None:
  346. # Give a try with Delivery-date
  347. message_timestamp = self.get_message_date(
  348. msg, 'Delivery-date')
  349. except Exception as e:
  350. # Extracting the date has failed for some reason, such as it
  351. # being in an invalid format.
  352. from offlineimap.ui import getglobalui
  353. ui = getglobalui()
  354. ui.warn("UID %d has invalid date: %s\n"
  355. "Not using message timestamp as file prefix" % (uid, e))
  356. # No need to check if message_timestamp is None here since it
  357. # would be overridden by _gettimeseq.
  358. messagename = self.new_message_filename(uid, flags, date=message_timestamp)
  359. tmpname = self.save_to_tmp_file(messagename, msg)
  360. if self._utime_from_header is True:
  361. try:
  362. date = self.get_message_date(msg, 'Date')
  363. if date is not None:
  364. os.utime(os.path.join(self.getfullname(), tmpname),
  365. (date, date))
  366. except Exception as e:
  367. # Extracting the date has failed for some reason, such as it
  368. # being in an invalid format.
  369. from offlineimap.ui import getglobalui
  370. ui = getglobalui()
  371. ui.warn("UID %d has invalid date: %s\n"
  372. "Not changing file modification time" % (uid, e))
  373. self.messagelist[uid] = self.msglist_item_initializer(uid)
  374. self.messagelist[uid]['flags'] = flags
  375. self.messagelist[uid]['filename'] = tmpname
  376. # savemessageflags moves msg to 'cur' or 'new' as appropriate.
  377. self.savemessageflags(uid, flags)
  378. self.ui.debug('maildir', 'savemessage: returning uid %d' % uid)
  379. return uid
  380. # Interface from BaseFolder
  381. def getmessageflags(self, uid):
  382. return self.messagelist[uid]['flags']
  383. # Interface from BaseFolder
  384. def savemessageflags(self, uid, flags):
  385. """Sets the specified message's flags to the given set.
  386. This function moves the message to the cur or new subdir,
  387. depending on the 'S'een flag.
  388. Note that this function does not check against dryrun settings,
  389. so you need to ensure that it is never called in a
  390. dryrun mode."""
  391. assert uid in self.messagelist
  392. oldfilename = self.messagelist[uid]['filename']
  393. dir_prefix, filename = os.path.split(oldfilename)
  394. # If a message has been seen, it goes into 'cur'
  395. dir_prefix = 'cur' if 'S' in flags else 'new'
  396. if flags != self.messagelist[uid]['flags']:
  397. # Flags have actually changed, construct new filename Strip
  398. # off existing infostring
  399. infomatch = self.re_flagmatch.search(filename)
  400. if infomatch:
  401. filename = filename[:-len(infomatch.group())] # strip off
  402. infostr = '%s2,%s' % (self.infosep, ''.join(sorted(flags)))
  403. filename += infostr
  404. newfilename = os.path.join(dir_prefix, filename)
  405. if newfilename != oldfilename:
  406. try:
  407. os.rename(os.path.join(self.getfullname(), oldfilename),
  408. os.path.join(self.getfullname(), newfilename))
  409. except OSError as e:
  410. raise OfflineImapError(
  411. "Can't rename file '%s' to '%s': %s" %
  412. (oldfilename, newfilename, e.errno),
  413. OfflineImapError.ERROR.FOLDER,
  414. exc_info()[2])
  415. self.messagelist[uid]['flags'] = flags
  416. self.messagelist[uid]['filename'] = newfilename
  417. # Interface from BaseFolder
  418. def change_message_uid(self, uid, new_uid):
  419. """Change the message from existing uid to new_uid
  420. This will not update the statusfolder UID, you need to do that yourself.
  421. :param uid: Message UID
  422. :param new_uid: (optional) If given, the old UID will be changed
  423. to a new UID. The Maildir backend can implement this as
  424. an efficient rename.
  425. """
  426. if uid not in self.messagelist:
  427. raise OfflineImapError("Cannot change unknown Maildir UID %s" % uid,
  428. OfflineImapError.ERROR.MESSAGE)
  429. if uid == new_uid:
  430. return
  431. oldfilename = self.messagelist[uid]['filename']
  432. dir_prefix, filename = os.path.split(oldfilename)
  433. flags = self.getmessageflags(uid)
  434. # TODO: we aren't keeping the prefix timestamp so we don't honor the
  435. # filename_use_mail_timestamp configuration option.
  436. newfilename = os.path.join(dir_prefix,
  437. self.new_message_filename(new_uid, flags))
  438. os.rename(os.path.join(self.getfullname(), oldfilename),
  439. os.path.join(self.getfullname(), newfilename))
  440. self.messagelist[new_uid] = self.messagelist[uid]
  441. self.messagelist[new_uid]['filename'] = newfilename
  442. del self.messagelist[uid]
  443. # Interface from BaseFolder
  444. def deletemessage(self, uid):
  445. """Unlinks a message file from the Maildir.
  446. :param uid: UID of a mail message
  447. :type uid: String
  448. :return: Nothing, or an Exception if UID but no corresponding file
  449. found.
  450. """
  451. filename = self.messagelist[uid]['filename']
  452. filepath = os.path.join(self.getfullname(), filename)
  453. try:
  454. os.unlink(filepath)
  455. except OSError:
  456. # Can't find the file -- maybe already deleted?
  457. newmsglist = self._scanfolder()
  458. if uid in newmsglist: # Nope, try new filename.
  459. filename = newmsglist[uid]['filename']
  460. filepath = os.path.join(self.getfullname(), filename)
  461. os.unlink(filepath)
  462. # Yep -- return.
  463. del (self.messagelist[uid])
  464. def migratefmd5(self, dryrun=False):
  465. """Migrate FMD5 hashes from versions prior to 6.3.5
  466. :param dryrun: Run in dry run mode
  467. :return: None
  468. """
  469. oldfmd5 = md5(self.name).hexdigest()
  470. msglist = self._scanfolder()
  471. for mkey, mvalue in list(msglist.items()):
  472. filename = os.path.join(self.getfullname(), mvalue['filename'])
  473. match = re.search("FMD5=([a-fA-F0-9]+)", filename)
  474. if match is None:
  475. self.ui.debug("maildir",
  476. "File `%s' doesn't have an FMD5 assigned"
  477. % filename)
  478. elif match.group(1) == oldfmd5:
  479. self.ui.info("Migrating file `%s' to FMD5 `%s'"
  480. % (filename, self._foldermd5))
  481. if not dryrun:
  482. newfilename = filename.replace(
  483. "FMD5=" + match.group(1), "FMD5=" + self._foldermd5)
  484. try:
  485. os.rename(filename, newfilename)
  486. except OSError as e:
  487. raise OfflineImapError(
  488. "Can't rename file '%s' to '%s': %s" %
  489. (filename, newfilename, e.errno),
  490. OfflineImapError.ERROR.FOLDER,
  491. exc_info()[2])
  492. elif match.group(1) != self._foldermd5:
  493. self.ui.warn(("Inconsistent FMD5 for file `%s':"
  494. " Neither `%s' nor `%s' found")
  495. % (filename, oldfmd5, self._foldermd5))