s3.in 10 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414
  1. #!/usr/bin/env python
  2. # -*- coding: utf-8 -*-
  3. #
  4. # Midnight Commander compatible EXTFS for accessing Amazon Web Services S3.
  5. # Written by Jakob Kemi <jakob.kemi@gmail.com> 2009
  6. #
  7. # Copyright (c) 2009 Free Software Foundation, Inc.
  8. # This program is free software; you can redistribute it and/or modify
  9. # it under the terms of the GNU General Public License as published by
  10. # the Free Software Foundation; either version 2 of the License, or
  11. # (at your option) any later version.
  12. #
  13. # This program is distributed in the hope that it will be useful,
  14. # but WITHOUT ANY WARRANTY; without even the implied warranty of
  15. # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  16. # GNU General Public License for more details.
  17. #
  18. # You should have received a copy of the GNU General Public License
  19. # along with this program; if not, write to the Free Software
  20. # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA
  21. #
  22. #
  23. # Notes:
  24. # This EXTFS exposes buckets as directories and keys as files
  25. # Due to EXTFS limitations all buckets & keys have to be read initially which might
  26. # take quite some time.
  27. # Tested on Debian with Python 2.4-2.6 and boto 1.4c and 1.6b
  28. # (Python 2.6 might need -W ignore::DeprecationWarning due to boto using
  29. # deprecated module Popen2)
  30. #
  31. #
  32. # Installation:
  33. # Make sure that boto <http://code.google.com/p/boto> (python-boto in Debian) is installed.
  34. # Preferably pytz (package python-tz in Debian) should be installed as well.
  35. #
  36. # Save as executable file /share/mc/extfs/s3 (or wherever your mc expects to find extfs modules)
  37. # Add the the following to your extfs.ini (might exists as /usr/share/mc/extfs/extfs.ini):
  38. # ----- begin extfs.ini -----
  39. # # Amazon S3
  40. # s3:
  41. # ----- end extfs.ini -----
  42. #
  43. #
  44. # Settings: (should be set via environment)
  45. # Required:
  46. # AWS_ACCESS_KEY_ID : Amazon AWS acces key (required)
  47. # AWS_SECRET_ACCESS_KEY : Amazon AWS secret access key (required)
  48. # Optional:
  49. # MCVFS_EXTFS_S3_LOCATION : where to create new buckets, "EU"(default) or "US"
  50. # MCVFS_EXTFS_S3_DEBUGFILE : write debug info to this file (no info default)
  51. #
  52. #
  53. # Usage:
  54. # Open dialog "Quick cd" (<alt-c>) and type: #s3 <enter> (or simply type ''cd #s3'' in shell line)
  55. #
  56. #
  57. # History:
  58. # 2009-02-07 Jakob Kemi <jakob.kemi@gmail.com>
  59. # - Updated instructions.
  60. # - Improved error reporting.
  61. #
  62. # 2009-02-06 Jakob Kemi <jakob.kemi@gmail.com>
  63. # - Threaded list command.
  64. # - Handle rm of empty "subdirectories" (as seen in mc).
  65. # - List most recent datetime and total size of keys as directory properties.
  66. # - List modification time in local time.
  67. #
  68. # 2009-02-05 Jakob Kemi <jakob.kemi@gmail.com>
  69. # - Initial version.
  70. #
  71. import sys
  72. import os
  73. import time
  74. import re
  75. import datetime
  76. import boto
  77. from boto.s3.connection import S3Connection
  78. from boto.s3.key import Key
  79. from boto.exception import BotoServerError
  80. # Get settings from environment
  81. USER=os.getenv('USER','0')
  82. AWS_ACCESS_KEY_ID=os.getenv('AWS_ACCESS_KEY_ID')
  83. AWS_SECRET_ACCESS_KEY=os.getenv('AWS_SECRET_ACCESS_KEY')
  84. LOCATION = os.getenv('MCVFS_EXTFS_S3_LOCATION', 'EU').lower()
  85. DEBUGFILE = os.getenv('MCVFS_EXTFS_S3_DEBUGFILE')
  86. if not AWS_ACCESS_KEY_ID or not AWS_SECRET_ACCESS_KEY:
  87. sys.stderr.write('Missing AWS_ACCESS_KEY_ID or AWS_SECRET_ACCESS_KEY environment variables.\n')
  88. sys.exit(1)
  89. # Setup logging
  90. if DEBUGFILE:
  91. import logging
  92. logging.basicConfig(
  93. filename=DEBUGFILE,
  94. level=logging.DEBUG,
  95. format='%(asctime)s %(levelname)s %(message)s')
  96. logging.getLogger('boto').setLevel(logging.WARNING)
  97. else:
  98. class Void(object):
  99. def __getattr__(self, attr):
  100. return self
  101. def __call__(self, *args, **kw):
  102. return self
  103. logging = Void()
  104. logger=logging.getLogger('s3extfs')
  105. def threadmap(fun, iterable, maxthreads=16):
  106. """
  107. Quick and dirty threaded version of builtin method map.
  108. Propagates exception safely.
  109. """
  110. from threading import Thread
  111. import Queue
  112. items = list(iterable)
  113. nitems = len(items)
  114. if nitems < 2:
  115. return map(fun, items)
  116. # Create and fill input queue
  117. input = Queue.Queue()
  118. output = Queue.Queue()
  119. for i,item in enumerate(items):
  120. input.put( (i,item) )
  121. class WorkThread(Thread):
  122. """
  123. Takes one item from input queue (thread terminates when input queue is empty),
  124. performs fun, puts result in output queue
  125. """
  126. def run(self):
  127. while True:
  128. try:
  129. (i,item) = input.get_nowait()
  130. try:
  131. result = fun(item)
  132. output.put( (i,result) )
  133. except:
  134. output.put( (None,sys.exc_info()) )
  135. except Queue.Empty:
  136. return
  137. # Start threads
  138. for i in range( min(len(items), maxthreads) ):
  139. t = WorkThread()
  140. t.setDaemon(True)
  141. t.start()
  142. # Wait for all threads to finish & collate results
  143. ret = []
  144. for i in range(nitems):
  145. try:
  146. i,res = output.get()
  147. if i == None:
  148. raise res[0],res[1],res[2]
  149. except Queue.Empty:
  150. break
  151. ret.append(res)
  152. return ret
  153. logger.debug('started')
  154. # Global S3 connection
  155. s3 = S3Connection(AWS_ACCESS_KEY_ID, AWS_SECRET_ACCESS_KEY)
  156. if LOCATION == 'eu':
  157. logger.debug('Using location EU for new buckets')
  158. S3LOCATION = boto.s3.connection.Location.EU
  159. else:
  160. logger.debug('Using location US for new buckets')
  161. S3LOCATION = boto.s3.connection.Location.US
  162. logger.debug('argv: ' + str(sys.argv))
  163. try:
  164. cmd = sys.argv[1]
  165. args = sys.argv[2:]
  166. except:
  167. sys.stderr.write('This program should be called from within MC\n')
  168. sys.exit(1)
  169. def handleServerError(msg):
  170. e = sys.exc_info()
  171. msg += ', reason: ' + e[1].reason
  172. logger.error(msg, exc_info=e)
  173. sys.stderr.write(msg+'\n')
  174. sys.exit(1)
  175. #
  176. # Lists all S3 contents
  177. #
  178. if cmd == 'list':
  179. if len(args) > 0:
  180. path = args[0]
  181. else:
  182. path = ''
  183. logger.info('list')
  184. rs = s3.get_all_buckets()
  185. # Import python timezones (pytz)
  186. try:
  187. import pytz
  188. except:
  189. logger.warning('Missing pytz module, timestamps will be off')
  190. # A fallback UTC tz stub
  191. class pytzutc(datetime.tzinfo):
  192. def __init__(self):
  193. datetime.tzinfo.__init__(self)
  194. self.utc = self
  195. self.zone = 'UTC'
  196. def utcoffset(self, dt):
  197. return datetime.timedelta(0)
  198. def tzname(self, dt):
  199. return "UTC"
  200. def dst(self, dt):
  201. return datetime.timedelta(0)
  202. pytz = pytzutc()
  203. # Find timezone
  204. # (yes, timeZONE as in _geographic zone_ not EST/CEST or whatever crap we get from time.tzname)
  205. # http://regebro.wordpress.com/2008/05/10/python-and-time-zones-part-2-the-beast-returns/
  206. def getGuessedTimezone():
  207. # 1. check TZ env. var
  208. try:
  209. tz = os.getenv('TZ', '')
  210. return pytz.timezone(tz)
  211. except:
  212. pass
  213. # 2. check if /etc/timezone exists (Debian at least)
  214. try:
  215. if os.path.isfile('/etc/timezone'):
  216. tz = open('/etc/timezone', 'r').readline().strip()
  217. return pytz.timezone(tz)
  218. except:
  219. pass
  220. # 3. check if /etc/localtime is a _link_ to something useful
  221. try:
  222. if os.path.islink('/etc/localtime'):
  223. link = os.readlink('/etc/localtime')
  224. tz = '/'.join(p.split(os.path.sep)[-2:])
  225. return pytz.timezone(tz)
  226. except:
  227. pass
  228. # 4. use time.tzname which will probably be wrong by an hour 50% of the time.
  229. try:
  230. return pytz.timezone(time.tzname[0])
  231. except:
  232. pass
  233. # 5. use plain UTC ...
  234. return pytz.utc
  235. tz=getGuessedTimezone()
  236. logger.debug('Using timezone: ' + tz.zone)
  237. # AWS time is on format: 2009-01-07T16:43:39.000Z
  238. # we "want" MM-DD-YYYY hh:mm (in localtime)
  239. expr = re.compile(r'^(\d{4})-(\d{2})-(\d{2})T(\d{2}):(\d{2}):(\d{2})\.\d{3}Z$')
  240. def convDate(awsdatetime):
  241. m = expr.match(awsdatetime)
  242. ye,mo,da,ho,mi,se = map(int,m.groups())
  243. dt = datetime.datetime(ye,mo,da,ho,mi,se, tzinfo=pytz.utc)
  244. return dt.astimezone(tz).strftime('%m-%d-%Y %H:%M')
  245. def bucketList(b):
  246. totsz = 0
  247. mostrecent = '1970-01-01T00:00:00.000Z'
  248. ret = []
  249. for k in b.list():
  250. mostrecent = max(mostrecent, k.last_modified)
  251. datetime = convDate(k.last_modified)
  252. ret.append('%10s %3d %-8s %-8s %d %s %s\n' % (
  253. '-rw-r--r--', 1, USER, USER, k.size, datetime, b.name+'/'+k.name)
  254. )
  255. totsz += k.size
  256. datetime=convDate(mostrecent)
  257. sys.stdout.write('%10s %3d %-8s %-8s %d %s %s\n' % (
  258. 'drwxr-xr-x', 1, USER, USER, totsz, datetime, b.name)
  259. )
  260. for line in ret:
  261. sys.stdout.write(line)
  262. threadmap(bucketList, rs)
  263. #
  264. # Fetch file from S3
  265. #
  266. elif cmd == 'copyout':
  267. archivename = args[0]
  268. storedfilename = args[1]
  269. extractto = args[2]
  270. bucket,key = storedfilename.split('/', 1)
  271. logger.info('copyout bucket: %s, key: %s'%(bucket, key))
  272. try:
  273. b = s3.get_bucket(bucket)
  274. k = b.get_key(key)
  275. out = open(extractto, 'w')
  276. k.open(mode='r')
  277. for buf in k:
  278. out.write(buf)
  279. k.close()
  280. out.close()
  281. except BotoServerError:
  282. handleServerError('Unable to fetch key "%s"'%(key))
  283. #
  284. # Upload file to S3
  285. #
  286. elif cmd == 'copyin':
  287. archivename = args[0]
  288. storedfilename = args[1]
  289. sourcefile = args[2]
  290. bucket,key = storedfilename.split('/', 1)
  291. logger.info('copyin bucket: %s, key: %s'%(bucket, key))
  292. try:
  293. b = s3.get_bucket(bucket)
  294. k = b.new_key(key)
  295. k.set_contents_from_file(fp=open(sourcefile,'r'))
  296. except BotoServerError:
  297. handleServerError('Unable to upload key "%s"' % (key))
  298. #
  299. # Remove file from S3
  300. #
  301. elif cmd == 'rm':
  302. archivename = args[0]
  303. storedfilename = args[1]
  304. bucket,key = storedfilename.split('/', 1)
  305. logger.info('rm bucket: %s, key: %s'%(bucket, key))
  306. try:
  307. b = s3.get_bucket(bucket)
  308. b.delete_key(key)
  309. except BotoServerError:
  310. handleServerError('Unable to remove key "%s"' % (key))
  311. #
  312. # Create directory
  313. #
  314. elif cmd == 'mkdir':
  315. archivename = args[0]
  316. dirname = args[1]
  317. logger.info('mkdir dir: %s' %(dirname))
  318. if '/' in dirname:
  319. logger.warning('skipping mkdir')
  320. pass
  321. else:
  322. bucket = dirname
  323. try:
  324. s3.create_bucket(bucket, location=boto.s3.connection.Location.EU)
  325. except BotoServerError:
  326. handleServerError('Unable to create bucket "%s"' % (bucket))
  327. #
  328. # Remove directory
  329. #
  330. elif cmd == 'rmdir':
  331. archivename = args[0]
  332. dirname = args[1]
  333. logger.info('rmdir dir: %s' %(dirname))
  334. if '/' in dirname:
  335. logger.warning('skipping rmdir')
  336. pass
  337. else:
  338. bucket = dirname
  339. try:
  340. b = s3.get_bucket(bucket)
  341. s3.delete_bucket(b)
  342. except BotoServerError:
  343. handleServerError('Unable to delete bucket "%s"' % (bucket))
  344. #
  345. # Run from S3
  346. #
  347. elif cmd == 'run':
  348. archivename = args[0]
  349. storedfilename = args[1]
  350. arguments = args[2:]
  351. bucket,key = storedfilename.split('/', 1)
  352. logger.info('run bucket: %s, key: %s'%(bucket, key))
  353. os.execv(storedfilename, arguments)
  354. else:
  355. logger.error('unhandled, bye')
  356. sys.exit(1)
  357. logger.debug('command handled')
  358. sys.exit(0)