patch.py 37 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203
  1. #!/usr/bin/env python
  2. """
  3. Patch utility to apply unified diffs
  4. Brute-force line-by-line non-recursive parsing
  5. Copyright (c) 2008-2016 anatoly techtonik
  6. Available under the terms of MIT license
  7. https://github.com/techtonik/python-patch/
  8. """
  9. from __future__ import print_function
  10. __author__ = "anatoly techtonik <techtonik@gmail.com>"
  11. __version__ = "1.16"
  12. import copy
  13. import logging
  14. import re
  15. # cStringIO doesn't support unicode in 2.5
  16. try:
  17. from StringIO import StringIO
  18. except ImportError:
  19. from io import BytesIO as StringIO # python 3
  20. try:
  21. import urllib2 as urllib_request
  22. except ImportError:
  23. import urllib.request as urllib_request
  24. from os.path import exists, isfile, abspath
  25. import os
  26. import posixpath
  27. import shutil
  28. import sys
  29. PY3K = sys.version_info >= (3, 0)
  30. # PEP 3114
  31. if not PY3K:
  32. compat_next = lambda gen: gen.next()
  33. else:
  34. compat_next = lambda gen: gen.__next__()
  35. def tostr(b):
  36. """ Python 3 bytes encoder. Used to print filename in
  37. diffstat output. Assumes that filenames are in utf-8.
  38. """
  39. if not PY3K:
  40. return b
  41. # [ ] figure out how to print non-utf-8 filenames without
  42. # information loss
  43. return b.decode('utf-8')
  44. #------------------------------------------------
  45. # Logging is controlled by logger named after the
  46. # module name (e.g. 'patch' for patch.py module)
  47. logger = logging.getLogger(__name__)
  48. debug = logger.debug
  49. info = logger.info
  50. warning = logger.warning
  51. class NullHandler(logging.Handler):
  52. """ Copied from Python 2.7 to avoid getting
  53. `No handlers could be found for logger "patch"`
  54. http://bugs.python.org/issue16539
  55. """
  56. def handle(self, record):
  57. pass
  58. def emit(self, record):
  59. pass
  60. def createLock(self):
  61. self.lock = None
  62. streamhandler = logging.StreamHandler()
  63. # initialize logger itself
  64. logger.addHandler(NullHandler())
  65. debugmode = False
  66. def setdebug():
  67. global debugmode, streamhandler
  68. debugmode = True
  69. loglevel = logging.DEBUG
  70. logformat = "%(levelname)8s %(message)s"
  71. logger.setLevel(loglevel)
  72. if streamhandler not in logger.handlers:
  73. # when used as a library, streamhandler is not added
  74. # by default
  75. logger.addHandler(streamhandler)
  76. streamhandler.setFormatter(logging.Formatter(logformat))
  77. #------------------------------------------------
  78. # Constants for Patch/PatchSet types
  79. DIFF = PLAIN = "plain"
  80. GIT = "git"
  81. HG = MERCURIAL = "mercurial"
  82. SVN = SUBVERSION = "svn"
  83. # mixed type is only actual when PatchSet contains
  84. # Patches of different type
  85. MIXED = MIXED = "mixed"
  86. #------------------------------------------------
  87. # Helpers (these could come with Python stdlib)
  88. # x...() function are used to work with paths in
  89. # cross-platform manner - all paths use forward
  90. # slashes even on Windows.
  91. def xisabs(filename):
  92. """ Cross-platform version of `os.path.isabs()`
  93. Returns True if `filename` is absolute on
  94. Linux, OS X or Windows.
  95. """
  96. if filename.startswith(b'/'): # Linux/Unix
  97. return True
  98. elif filename.startswith(b'\\'): # Windows
  99. return True
  100. elif re.match(b'\\w:[\\\\/]', filename): # Windows
  101. return True
  102. return False
  103. def xnormpath(path):
  104. """ Cross-platform version of os.path.normpath """
  105. # replace escapes and Windows slashes
  106. normalized = posixpath.normpath(path).replace(b'\\', b'/')
  107. # fold the result
  108. return posixpath.normpath(normalized)
  109. def xstrip(filename):
  110. """ Make relative path out of absolute by stripping
  111. prefixes used on Linux, OS X and Windows.
  112. This function is critical for security.
  113. """
  114. while xisabs(filename):
  115. # strip windows drive with all slashes
  116. if re.match(b'\\w:[\\\\/]', filename):
  117. filename = re.sub(b'^\\w+:[\\\\/]+', b'', filename)
  118. # strip all slashes
  119. elif re.match(b'[\\\\/]', filename):
  120. filename = re.sub(b'^[\\\\/]+', b'', filename)
  121. return filename
  122. #-----------------------------------------------
  123. # Main API functions
  124. def fromfile(filename):
  125. """ Parse patch file. If successful, returns
  126. PatchSet() object. Otherwise returns False.
  127. """
  128. patchset = PatchSet()
  129. debug("reading %s" % filename)
  130. fp = open(filename, "rb")
  131. res = patchset.parse(fp)
  132. fp.close()
  133. if res == True:
  134. return patchset
  135. return False
  136. def fromstring(s):
  137. """ Parse text string and return PatchSet()
  138. object (or False if parsing fails)
  139. """
  140. ps = PatchSet( StringIO(s) )
  141. if ps.errors == 0:
  142. return ps
  143. return False
  144. def fromurl(url):
  145. """ Parse patch from an URL, return False
  146. if an error occured. Note that this also
  147. can throw urlopen() exceptions.
  148. """
  149. ps = PatchSet( urllib_request.urlopen(url) )
  150. if ps.errors == 0:
  151. return ps
  152. return False
  153. # --- Utility functions ---
  154. # [ ] reuse more universal pathsplit()
  155. def pathstrip(path, n):
  156. """ Strip n leading components from the given path """
  157. pathlist = [path]
  158. while os.path.dirname(pathlist[0]) != b'':
  159. pathlist[0:1] = os.path.split(pathlist[0])
  160. return b'/'.join(pathlist[n:])
  161. # --- /Utility function ---
  162. class Hunk(object):
  163. """ Parsed hunk data container (hunk starts with @@ -R +R @@) """
  164. def __init__(self):
  165. self.startsrc=None #: line count starts with 1
  166. self.linessrc=None
  167. self.starttgt=None
  168. self.linestgt=None
  169. self.invalid=False
  170. self.desc=''
  171. self.text=[]
  172. # def apply(self, estream):
  173. # """ write hunk data into enumerable stream
  174. # return strings one by one until hunk is
  175. # over
  176. #
  177. # enumerable stream are tuples (lineno, line)
  178. # where lineno starts with 0
  179. # """
  180. # pass
  181. class Patch(object):
  182. """ Patch for a single file.
  183. If used as an iterable, returns hunks.
  184. """
  185. def __init__(self):
  186. self.source = None
  187. self.target = None
  188. self.hunks = []
  189. self.hunkends = []
  190. self.header = []
  191. self.type = None
  192. def __iter__(self):
  193. for h in self.hunks:
  194. yield h
  195. class PatchSet(object):
  196. """ PatchSet is a patch parser and container.
  197. When used as an iterable, returns patches.
  198. """
  199. def __init__(self, stream=None):
  200. # --- API accessible fields ---
  201. # name of the PatchSet (filename or ...)
  202. self.name = None
  203. # patch set type - one of constants
  204. self.type = None
  205. # list of Patch objects
  206. self.items = []
  207. self.errors = 0 # fatal parsing errors
  208. self.warnings = 0 # non-critical warnings
  209. # --- /API ---
  210. if stream:
  211. self.parse(stream)
  212. def __len__(self):
  213. return len(self.items)
  214. def __iter__(self):
  215. for i in self.items:
  216. yield i
  217. def parse(self, stream):
  218. """ parse unified diff
  219. return True on success
  220. """
  221. lineends = dict(lf=0, crlf=0, cr=0)
  222. nexthunkno = 0 #: even if index starts with 0 user messages number hunks from 1
  223. p = None
  224. hunk = None
  225. # hunkactual variable is used to calculate hunk lines for comparison
  226. hunkactual = dict(linessrc=None, linestgt=None)
  227. class wrapumerate(enumerate):
  228. """Enumerate wrapper that uses boolean end of stream status instead of
  229. StopIteration exception, and properties to access line information.
  230. """
  231. def __init__(self, *args, **kwargs):
  232. # we don't call parent, it is magically created by __new__ method
  233. self._exhausted = False
  234. self._lineno = False # after end of stream equal to the num of lines
  235. self._line = False # will be reset to False after end of stream
  236. def next(self):
  237. """Try to read the next line and return True if it is available,
  238. False if end of stream is reached."""
  239. if self._exhausted:
  240. return False
  241. try:
  242. self._lineno, self._line = compat_next(super(wrapumerate, self))
  243. except StopIteration:
  244. self._exhausted = True
  245. self._line = False
  246. return False
  247. return True
  248. @property
  249. def is_empty(self):
  250. return self._exhausted
  251. @property
  252. def line(self):
  253. return self._line
  254. @property
  255. def lineno(self):
  256. return self._lineno
  257. # define states (possible file regions) that direct parse flow
  258. headscan = True # start with scanning header
  259. filenames = False # lines starting with --- and +++
  260. hunkhead = False # @@ -R +R @@ sequence
  261. hunkbody = False #
  262. hunkskip = False # skipping invalid hunk mode
  263. hunkparsed = False # state after successfully parsed hunk
  264. # regexp to match start of hunk, used groups - 1,3,4,6
  265. re_hunk_start = re.compile(b"^@@ -(\d+)(,(\d+))? \+(\d+)(,(\d+))? @@")
  266. self.errors = 0
  267. # temp buffers for header and filenames info
  268. header = []
  269. srcname = None
  270. tgtname = None
  271. # start of main cycle
  272. # each parsing block already has line available in fe.line
  273. fe = wrapumerate(stream)
  274. while fe.next():
  275. # -- deciders: these only switch state to decide who should process
  276. # -- line fetched at the start of this cycle
  277. if hunkparsed:
  278. hunkparsed = False
  279. if re_hunk_start.match(fe.line):
  280. hunkhead = True
  281. elif fe.line.startswith(b"--- "):
  282. filenames = True
  283. else:
  284. headscan = True
  285. # -- ------------------------------------
  286. # read out header
  287. if headscan:
  288. while not fe.is_empty and not fe.line.startswith(b"--- "):
  289. header.append(fe.line)
  290. fe.next()
  291. if fe.is_empty:
  292. if p == None:
  293. debug("no patch data found") # error is shown later
  294. self.errors += 1
  295. else:
  296. info("%d unparsed bytes left at the end of stream" % len(b''.join(header)))
  297. self.warnings += 1
  298. # TODO check for \No new line at the end..
  299. # TODO test for unparsed bytes
  300. # otherwise error += 1
  301. # this is actually a loop exit
  302. continue
  303. headscan = False
  304. # switch to filenames state
  305. filenames = True
  306. line = fe.line
  307. lineno = fe.lineno
  308. # hunkskip and hunkbody code skipped until definition of hunkhead is parsed
  309. if hunkbody:
  310. # [x] treat empty lines inside hunks as containing single space
  311. # (this happens when diff is saved by copy/pasting to editor
  312. # that strips trailing whitespace)
  313. if line.strip(b"\r\n") == b"":
  314. debug("expanding empty line in a middle of hunk body")
  315. self.warnings += 1
  316. line = b' ' + line
  317. # process line first
  318. if re.match(b"^[- \\+\\\\]", line):
  319. # gather stats about line endings
  320. if line.endswith(b"\r\n"):
  321. p.hunkends["crlf"] += 1
  322. elif line.endswith(b"\n"):
  323. p.hunkends["lf"] += 1
  324. elif line.endswith(b"\r"):
  325. p.hunkends["cr"] += 1
  326. if line.startswith(b"-"):
  327. hunkactual["linessrc"] += 1
  328. elif line.startswith(b"+"):
  329. hunkactual["linestgt"] += 1
  330. elif not line.startswith(b"\\"):
  331. hunkactual["linessrc"] += 1
  332. hunkactual["linestgt"] += 1
  333. hunk.text.append(line)
  334. # todo: handle \ No newline cases
  335. else:
  336. warning("invalid hunk no.%d at %d for target file %s" % (nexthunkno, lineno+1, p.target))
  337. # add hunk status node
  338. hunk.invalid = True
  339. p.hunks.append(hunk)
  340. self.errors += 1
  341. # switch to hunkskip state
  342. hunkbody = False
  343. hunkskip = True
  344. # check exit conditions
  345. if hunkactual["linessrc"] > hunk.linessrc or hunkactual["linestgt"] > hunk.linestgt:
  346. warning("extra lines for hunk no.%d at %d for target %s" % (nexthunkno, lineno+1, p.target))
  347. # add hunk status node
  348. hunk.invalid = True
  349. p.hunks.append(hunk)
  350. self.errors += 1
  351. # switch to hunkskip state
  352. hunkbody = False
  353. hunkskip = True
  354. elif hunk.linessrc == hunkactual["linessrc"] and hunk.linestgt == hunkactual["linestgt"]:
  355. # hunk parsed successfully
  356. p.hunks.append(hunk)
  357. # switch to hunkparsed state
  358. hunkbody = False
  359. hunkparsed = True
  360. # detect mixed window/unix line ends
  361. ends = p.hunkends
  362. if ((ends["cr"]!=0) + (ends["crlf"]!=0) + (ends["lf"]!=0)) > 1:
  363. warning("inconsistent line ends in patch hunks for %s" % p.source)
  364. self.warnings += 1
  365. if debugmode:
  366. debuglines = dict(ends)
  367. debuglines.update(file=p.target, hunk=nexthunkno)
  368. debug("crlf: %(crlf)d lf: %(lf)d cr: %(cr)d\t - file: %(file)s hunk: %(hunk)d" % debuglines)
  369. # fetch next line
  370. continue
  371. if hunkskip:
  372. if re_hunk_start.match(line):
  373. # switch to hunkhead state
  374. hunkskip = False
  375. hunkhead = True
  376. elif line.startswith(b"--- "):
  377. # switch to filenames state
  378. hunkskip = False
  379. filenames = True
  380. if debugmode and len(self.items) > 0:
  381. debug("- %2d hunks for %s" % (len(p.hunks), p.source))
  382. if filenames:
  383. if line.startswith(b"--- "):
  384. if srcname != None:
  385. # XXX testcase
  386. warning("skipping false patch for %s" % srcname)
  387. srcname = None
  388. # XXX header += srcname
  389. # double source filename line is encountered
  390. # attempt to restart from this second line
  391. re_filename = b"^--- ([^\t]+)"
  392. match = re.match(re_filename, line)
  393. # todo: support spaces in filenames
  394. if match:
  395. srcname = match.group(1).strip()
  396. else:
  397. warning("skipping invalid filename at line %d" % (lineno+1))
  398. self.errors += 1
  399. # XXX p.header += line
  400. # switch back to headscan state
  401. filenames = False
  402. headscan = True
  403. elif not line.startswith(b"+++ "):
  404. if srcname != None:
  405. warning("skipping invalid patch with no target for %s" % srcname)
  406. self.errors += 1
  407. srcname = None
  408. # XXX header += srcname
  409. # XXX header += line
  410. else:
  411. # this should be unreachable
  412. warning("skipping invalid target patch")
  413. filenames = False
  414. headscan = True
  415. else:
  416. if tgtname != None:
  417. # XXX seems to be a dead branch
  418. warning("skipping invalid patch - double target at line %d" % (lineno+1))
  419. self.errors += 1
  420. srcname = None
  421. tgtname = None
  422. # XXX header += srcname
  423. # XXX header += tgtname
  424. # XXX header += line
  425. # double target filename line is encountered
  426. # switch back to headscan state
  427. filenames = False
  428. headscan = True
  429. else:
  430. re_filename = b"^\+\+\+ ([^\t]+)"
  431. match = re.match(re_filename, line)
  432. if not match:
  433. warning("skipping invalid patch - no target filename at line %d" % (lineno+1))
  434. self.errors += 1
  435. srcname = None
  436. # switch back to headscan state
  437. filenames = False
  438. headscan = True
  439. else:
  440. if p: # for the first run p is None
  441. self.items.append(p)
  442. p = Patch()
  443. p.source = srcname
  444. srcname = None
  445. p.target = match.group(1).strip()
  446. p.header = header
  447. header = []
  448. # switch to hunkhead state
  449. filenames = False
  450. hunkhead = True
  451. nexthunkno = 0
  452. p.hunkends = lineends.copy()
  453. continue
  454. if hunkhead:
  455. match = re.match(b"^@@ -(\d+)(,(\d+))? \+(\d+)(,(\d+))? @@(.*)", line)
  456. if not match:
  457. if not p.hunks:
  458. warning("skipping invalid patch with no hunks for file %s" % p.source)
  459. self.errors += 1
  460. # XXX review switch
  461. # switch to headscan state
  462. hunkhead = False
  463. headscan = True
  464. continue
  465. else:
  466. # TODO review condition case
  467. # switch to headscan state
  468. hunkhead = False
  469. headscan = True
  470. else:
  471. hunk = Hunk()
  472. hunk.startsrc = int(match.group(1))
  473. hunk.linessrc = 1
  474. if match.group(3): hunk.linessrc = int(match.group(3))
  475. hunk.starttgt = int(match.group(4))
  476. hunk.linestgt = 1
  477. if match.group(6): hunk.linestgt = int(match.group(6))
  478. hunk.invalid = False
  479. hunk.desc = match.group(7)[1:].rstrip()
  480. hunk.text = []
  481. hunkactual["linessrc"] = hunkactual["linestgt"] = 0
  482. # switch to hunkbody state
  483. hunkhead = False
  484. hunkbody = True
  485. nexthunkno += 1
  486. continue
  487. # /while fe.next()
  488. if p:
  489. self.items.append(p)
  490. if not hunkparsed:
  491. if hunkskip:
  492. warning("warning: finished with errors, some hunks may be invalid")
  493. elif headscan:
  494. if len(self.items) == 0:
  495. warning("error: no patch data found!")
  496. return False
  497. else: # extra data at the end of file
  498. pass
  499. else:
  500. warning("error: patch stream is incomplete!")
  501. self.errors += 1
  502. if len(self.items) == 0:
  503. return False
  504. if debugmode and len(self.items) > 0:
  505. debug("- %2d hunks for %s" % (len(p.hunks), p.source))
  506. # XXX fix total hunks calculation
  507. debug("total files: %d total hunks: %d" % (len(self.items),
  508. sum(len(p.hunks) for p in self.items)))
  509. # ---- detect patch and patchset types ----
  510. for idx, p in enumerate(self.items):
  511. self.items[idx].type = self._detect_type(p)
  512. types = set([p.type for p in self.items])
  513. if len(types) > 1:
  514. self.type = MIXED
  515. else:
  516. self.type = types.pop()
  517. # --------
  518. self._normalize_filenames()
  519. return (self.errors == 0)
  520. def _detect_type(self, p):
  521. """ detect and return type for the specified Patch object
  522. analyzes header and filenames info
  523. NOTE: must be run before filenames are normalized
  524. """
  525. # check for SVN
  526. # - header starts with Index:
  527. # - next line is ===... delimiter
  528. # - filename is followed by revision number
  529. # TODO add SVN revision
  530. if (len(p.header) > 1 and p.header[-2].startswith(b"Index: ")
  531. and p.header[-1].startswith(b"="*67)):
  532. return SVN
  533. # common checks for both HG and GIT
  534. DVCS = ((p.source.startswith(b'a/') or p.source == b'/dev/null')
  535. and (p.target.startswith(b'b/') or p.target == b'/dev/null'))
  536. # GIT type check
  537. # - header[-2] is like "diff --git a/oldname b/newname"
  538. # - header[-1] is like "index <hash>..<hash> <mode>"
  539. # TODO add git rename diffs and add/remove diffs
  540. # add git diff with spaced filename
  541. # TODO http://www.kernel.org/pub/software/scm/git/docs/git-diff.html
  542. # Git patch header len is 2 min
  543. if len(p.header) > 1:
  544. # detect the start of diff header - there might be some comments before
  545. for idx in reversed(range(len(p.header))):
  546. if p.header[idx].startswith(b"diff --git"):
  547. break
  548. if p.header[idx].startswith(b'diff --git a/'):
  549. if (idx+1 < len(p.header)
  550. and re.match(b'index \\w{7}..\\w{7} \\d{6}', p.header[idx+1])):
  551. if DVCS:
  552. return GIT
  553. # HG check
  554. #
  555. # - for plain HG format header is like "diff -r b2d9961ff1f5 filename"
  556. # - for Git-style HG patches it is "diff --git a/oldname b/newname"
  557. # - filename starts with a/, b/ or is equal to /dev/null
  558. # - exported changesets also contain the header
  559. # # HG changeset patch
  560. # # User name@example.com
  561. # ...
  562. # TODO add MQ
  563. # TODO add revision info
  564. if len(p.header) > 0:
  565. if DVCS and re.match(b'diff -r \\w{12} .*', p.header[-1]):
  566. return HG
  567. if DVCS and p.header[-1].startswith(b'diff --git a/'):
  568. if len(p.header) == 1: # native Git patch header len is 2
  569. return HG
  570. elif p.header[0].startswith(b'# HG changeset patch'):
  571. return HG
  572. return PLAIN
  573. def _normalize_filenames(self):
  574. """ sanitize filenames, normalizing paths, i.e.:
  575. 1. strip a/ and b/ prefixes from GIT and HG style patches
  576. 2. remove all references to parent directories (with warning)
  577. 3. translate any absolute paths to relative (with warning)
  578. [x] always use forward slashes to be crossplatform
  579. (diff/patch were born as a unix utility after all)
  580. return None
  581. """
  582. if debugmode:
  583. debug("normalize filenames")
  584. for i,p in enumerate(self.items):
  585. if debugmode:
  586. debug(" patch type = " + p.type)
  587. debug(" source = " + p.source)
  588. debug(" target = " + p.target)
  589. if p.type in (HG, GIT):
  590. # TODO: figure out how to deal with /dev/null entries
  591. debug("stripping a/ and b/ prefixes")
  592. if p.source != '/dev/null':
  593. if not p.source.startswith(b"a/"):
  594. warning("invalid source filename")
  595. else:
  596. p.source = p.source[2:]
  597. if p.target != '/dev/null':
  598. if not p.target.startswith(b"b/"):
  599. warning("invalid target filename")
  600. else:
  601. p.target = p.target[2:]
  602. p.source = xnormpath(p.source)
  603. p.target = xnormpath(p.target)
  604. sep = b'/' # sep value can be hardcoded, but it looks nice this way
  605. # references to parent are not allowed
  606. if p.source.startswith(b".." + sep):
  607. warning("error: stripping parent path for source file patch no.%d" % (i+1))
  608. self.warnings += 1
  609. while p.source.startswith(b".." + sep):
  610. p.source = p.source.partition(sep)[2]
  611. if p.target.startswith(b".." + sep):
  612. warning("error: stripping parent path for target file patch no.%d" % (i+1))
  613. self.warnings += 1
  614. while p.target.startswith(b".." + sep):
  615. p.target = p.target.partition(sep)[2]
  616. # absolute paths are not allowed
  617. if xisabs(p.source) or xisabs(p.target):
  618. warning("error: absolute paths are not allowed - file no.%d" % (i+1))
  619. self.warnings += 1
  620. if xisabs(p.source):
  621. warning("stripping absolute path from source name '%s'" % p.source)
  622. p.source = xstrip(p.source)
  623. if xisabs(p.target):
  624. warning("stripping absolute path from target name '%s'" % p.target)
  625. p.target = xstrip(p.target)
  626. self.items[i].source = p.source
  627. self.items[i].target = p.target
  628. def diffstat(self):
  629. """ calculate diffstat and return as a string
  630. Notes:
  631. - original diffstat ouputs target filename
  632. - single + or - shouldn't escape histogram
  633. """
  634. names = []
  635. insert = []
  636. delete = []
  637. delta = 0 # size change in bytes
  638. namelen = 0
  639. maxdiff = 0 # max number of changes for single file
  640. # (for histogram width calculation)
  641. for patch in self.items:
  642. i,d = 0,0
  643. for hunk in patch.hunks:
  644. for line in hunk.text:
  645. if line.startswith(b'+'):
  646. i += 1
  647. delta += len(line)-1
  648. elif line.startswith(b'-'):
  649. d += 1
  650. delta -= len(line)-1
  651. names.append(patch.target)
  652. insert.append(i)
  653. delete.append(d)
  654. namelen = max(namelen, len(patch.target))
  655. maxdiff = max(maxdiff, i+d)
  656. output = ''
  657. statlen = len(str(maxdiff)) # stats column width
  658. for i,n in enumerate(names):
  659. # %-19s | %-4d %s
  660. format = " %-" + str(namelen) + "s | %" + str(statlen) + "s %s\n"
  661. hist = ''
  662. # -- calculating histogram --
  663. width = len(format % ('', '', ''))
  664. histwidth = max(2, 80 - width)
  665. if maxdiff < histwidth:
  666. hist = "+"*insert[i] + "-"*delete[i]
  667. else:
  668. iratio = (float(insert[i]) / maxdiff) * histwidth
  669. dratio = (float(delete[i]) / maxdiff) * histwidth
  670. # make sure every entry gets at least one + or -
  671. iwidth = 1 if 0 < iratio < 1 else int(iratio)
  672. dwidth = 1 if 0 < dratio < 1 else int(dratio)
  673. #print(iratio, dratio, iwidth, dwidth, histwidth)
  674. hist = "+"*int(iwidth) + "-"*int(dwidth)
  675. # -- /calculating +- histogram --
  676. output += (format % (tostr(names[i]), str(insert[i] + delete[i]), hist))
  677. output += (" %d files changed, %d insertions(+), %d deletions(-), %+d bytes"
  678. % (len(names), sum(insert), sum(delete), delta))
  679. return output
  680. def findfile(self, old, new):
  681. """ return name of file to be patched or None """
  682. if exists(old):
  683. return old
  684. elif exists(new):
  685. return new
  686. else:
  687. # [w] Google Code generates broken patches with its online editor
  688. debug("broken patch from Google Code, stripping prefixes..")
  689. if old.startswith(b'a/') and new.startswith(b'b/'):
  690. old, new = old[2:], new[2:]
  691. debug(" %s" % old)
  692. debug(" %s" % new)
  693. if exists(old):
  694. return old
  695. elif exists(new):
  696. return new
  697. return None
  698. def apply(self, strip=0, root=None):
  699. """ Apply parsed patch, optionally stripping leading components
  700. from file paths. `root` parameter specifies working dir.
  701. return True on success
  702. """
  703. if root:
  704. prevdir = os.getcwd()
  705. os.chdir(root)
  706. total = len(self.items)
  707. errors = 0
  708. if strip:
  709. # [ ] test strip level exceeds nesting level
  710. # [ ] test the same only for selected files
  711. # [ ] test if files end up being on the same level
  712. try:
  713. strip = int(strip)
  714. except ValueError:
  715. errors += 1
  716. warning("error: strip parameter '%s' must be an integer" % strip)
  717. strip = 0
  718. #for fileno, filename in enumerate(self.source):
  719. for i,p in enumerate(self.items):
  720. if strip:
  721. debug("stripping %s leading component(s) from:" % strip)
  722. debug(" %s" % p.source)
  723. debug(" %s" % p.target)
  724. old = pathstrip(p.source, strip)
  725. new = pathstrip(p.target, strip)
  726. else:
  727. old, new = p.source, p.target
  728. filename = self.findfile(old, new)
  729. if not filename:
  730. warning("source/target file does not exist:\n --- %s\n +++ %s" % (old, new))
  731. errors += 1
  732. continue
  733. if not isfile(filename):
  734. warning("not a file - %s" % filename)
  735. errors += 1
  736. continue
  737. # [ ] check absolute paths security here
  738. debug("processing %d/%d:\t %s" % (i+1, total, filename))
  739. # validate before patching
  740. f2fp = open(filename, 'rb')
  741. hunkno = 0
  742. hunk = p.hunks[hunkno]
  743. hunkfind = []
  744. hunkreplace = []
  745. validhunks = 0
  746. canpatch = False
  747. for lineno, line in enumerate(f2fp):
  748. if lineno+1 < hunk.startsrc:
  749. continue
  750. elif lineno+1 == hunk.startsrc:
  751. hunkfind = [x[1:].rstrip(b"\r\n") for x in hunk.text if x[0] in b" -"]
  752. hunkreplace = [x[1:].rstrip(b"\r\n") for x in hunk.text if x[0] in b" +"]
  753. #pprint(hunkreplace)
  754. hunklineno = 0
  755. # todo \ No newline at end of file
  756. # check hunks in source file
  757. if lineno+1 < hunk.startsrc+len(hunkfind)-1:
  758. if line.rstrip(b"\r\n") == hunkfind[hunklineno]:
  759. hunklineno+=1
  760. else:
  761. info("file %d/%d:\t %s" % (i+1, total, filename))
  762. info(" hunk no.%d doesn't match source file at line %d" % (hunkno+1, lineno+1))
  763. info(" expected: %s" % hunkfind[hunklineno])
  764. info(" actual : %s" % line.rstrip(b"\r\n"))
  765. # not counting this as error, because file may already be patched.
  766. # check if file is already patched is done after the number of
  767. # invalid hunks if found
  768. # TODO: check hunks against source/target file in one pass
  769. # API - check(stream, srchunks, tgthunks)
  770. # return tuple (srcerrs, tgterrs)
  771. # continue to check other hunks for completeness
  772. hunkno += 1
  773. if hunkno < len(p.hunks):
  774. hunk = p.hunks[hunkno]
  775. continue
  776. else:
  777. break
  778. # check if processed line is the last line
  779. if lineno+1 == hunk.startsrc+len(hunkfind)-1:
  780. debug(" hunk no.%d for file %s -- is ready to be patched" % (hunkno+1, filename))
  781. hunkno+=1
  782. validhunks+=1
  783. if hunkno < len(p.hunks):
  784. hunk = p.hunks[hunkno]
  785. else:
  786. if validhunks == len(p.hunks):
  787. # patch file
  788. canpatch = True
  789. break
  790. else:
  791. if hunkno < len(p.hunks):
  792. warning("premature end of source file %s at hunk %d" % (filename, hunkno+1))
  793. errors += 1
  794. f2fp.close()
  795. if validhunks < len(p.hunks):
  796. if self._match_file_hunks(filename, p.hunks):
  797. warning("already patched %s" % filename)
  798. else:
  799. warning("source file is different - %s" % filename)
  800. errors += 1
  801. if canpatch:
  802. backupname = filename+b".orig"
  803. if exists(backupname):
  804. warning("can't backup original file to %s - aborting" % backupname)
  805. else:
  806. import shutil
  807. shutil.move(filename, backupname)
  808. if self.write_hunks(backupname, filename, p.hunks):
  809. info("successfully patched %d/%d:\t %s" % (i+1, total, filename))
  810. os.unlink(backupname)
  811. else:
  812. errors += 1
  813. warning("error patching file %s" % filename)
  814. shutil.copy(filename, filename+".invalid")
  815. warning("invalid version is saved to %s" % filename+".invalid")
  816. # todo: proper rejects
  817. shutil.move(backupname, filename)
  818. if root:
  819. os.chdir(prevdir)
  820. # todo: check for premature eof
  821. return (errors == 0)
  822. def _reverse(self):
  823. """ reverse patch direction (this doesn't touch filenames) """
  824. for p in self.items:
  825. for h in p.hunks:
  826. h.startsrc, h.starttgt = h.starttgt, h.startsrc
  827. h.linessrc, h.linestgt = h.linestgt, h.linessrc
  828. for i,line in enumerate(h.text):
  829. # need to use line[0:1] here, because line[0]
  830. # returns int instead of bytes on Python 3
  831. if line[0:1] == b'+':
  832. h.text[i] = b'-' + line[1:]
  833. elif line[0:1] == b'-':
  834. h.text[i] = b'+' +line[1:]
  835. def revert(self, strip=0, root=None):
  836. """ apply patch in reverse order """
  837. reverted = copy.deepcopy(self)
  838. reverted._reverse()
  839. return reverted.apply(strip, root)
  840. def can_patch(self, filename):
  841. """ Check if specified filename can be patched. Returns None if file can
  842. not be found among source filenames. False if patch can not be applied
  843. clearly. True otherwise.
  844. :returns: True, False or None
  845. """
  846. filename = abspath(filename)
  847. for p in self.items:
  848. if filename == abspath(p.source):
  849. return self._match_file_hunks(filename, p.hunks)
  850. return None
  851. def _match_file_hunks(self, filepath, hunks):
  852. matched = True
  853. fp = open(abspath(filepath), 'rb')
  854. class NoMatch(Exception):
  855. pass
  856. lineno = 1
  857. line = fp.readline()
  858. hno = None
  859. try:
  860. for hno, h in enumerate(hunks):
  861. # skip to first line of the hunk
  862. while lineno < h.starttgt:
  863. if not len(line): # eof
  864. debug("check failed - premature eof before hunk: %d" % (hno+1))
  865. raise NoMatch
  866. line = fp.readline()
  867. lineno += 1
  868. for hline in h.text:
  869. if hline.startswith(b"-"):
  870. continue
  871. if not len(line):
  872. debug("check failed - premature eof on hunk: %d" % (hno+1))
  873. # todo: \ No newline at the end of file
  874. raise NoMatch
  875. if line.rstrip(b"\r\n") != hline[1:].rstrip(b"\r\n"):
  876. debug("file is not patched - failed hunk: %d" % (hno+1))
  877. raise NoMatch
  878. line = fp.readline()
  879. lineno += 1
  880. except NoMatch:
  881. matched = False
  882. # todo: display failed hunk, i.e. expected/found
  883. fp.close()
  884. return matched
  885. def patch_stream(self, instream, hunks):
  886. """ Generator that yields stream patched with hunks iterable
  887. Converts lineends in hunk lines to the best suitable format
  888. autodetected from input
  889. """
  890. # todo: At the moment substituted lineends may not be the same
  891. # at the start and at the end of patching. Also issue a
  892. # warning/throw about mixed lineends (is it really needed?)
  893. hunks = iter(hunks)
  894. srclineno = 1
  895. lineends = {b'\n':0, b'\r\n':0, b'\r':0}
  896. def get_line():
  897. """
  898. local utility function - return line from source stream
  899. collecting line end statistics on the way
  900. """
  901. line = instream.readline()
  902. # 'U' mode works only with text files
  903. if line.endswith(b"\r\n"):
  904. lineends[b"\r\n"] += 1
  905. elif line.endswith(b"\n"):
  906. lineends[b"\n"] += 1
  907. elif line.endswith(b"\r"):
  908. lineends[b"\r"] += 1
  909. return line
  910. for hno, h in enumerate(hunks):
  911. debug("hunk %d" % (hno+1))
  912. # skip to line just before hunk starts
  913. while srclineno < h.startsrc:
  914. yield get_line()
  915. srclineno += 1
  916. for hline in h.text:
  917. # todo: check \ No newline at the end of file
  918. if hline.startswith(b"-") or hline.startswith(b"\\"):
  919. get_line()
  920. srclineno += 1
  921. continue
  922. else:
  923. if not hline.startswith(b"+"):
  924. get_line()
  925. srclineno += 1
  926. line2write = hline[1:]
  927. # detect if line ends are consistent in source file
  928. if sum([bool(lineends[x]) for x in lineends]) == 1:
  929. newline = [x for x in lineends if lineends[x] != 0][0]
  930. yield line2write.rstrip(b"\r\n")+newline
  931. else: # newlines are mixed
  932. yield line2write
  933. for line in instream:
  934. yield line
  935. def write_hunks(self, srcname, tgtname, hunks):
  936. src = open(srcname, "rb")
  937. tgt = open(tgtname, "wb")
  938. debug("processing target file %s" % tgtname)
  939. tgt.writelines(self.patch_stream(src, hunks))
  940. tgt.close()
  941. src.close()
  942. # [ ] TODO: add test for permission copy
  943. shutil.copymode(srcname, tgtname)
  944. return True
  945. def dump(self):
  946. for p in self.items:
  947. for headline in p.header:
  948. print(headline.rstrip('\n'))
  949. print('--- ' + p.source)
  950. print('+++ ' + p.target)
  951. for h in p.hunks:
  952. print('@@ -%s,%s +%s,%s @@' % (h.startsrc, h.linessrc, h.starttgt, h.linestgt))
  953. for line in h.text:
  954. print(line.rstrip('\n'))
  955. def main():
  956. from optparse import OptionParser
  957. from os.path import exists
  958. import sys
  959. opt = OptionParser(usage="1. %prog [options] unified.diff\n"
  960. " 2. %prog [options] http://host/patch\n"
  961. " 3. %prog [options] -- < unified.diff",
  962. version="python-patch %s" % __version__)
  963. opt.add_option("-q", "--quiet", action="store_const", dest="verbosity",
  964. const=0, help="print only warnings and errors", default=1)
  965. opt.add_option("-v", "--verbose", action="store_const", dest="verbosity",
  966. const=2, help="be verbose")
  967. opt.add_option("--debug", action="store_true", dest="debugmode", help="debug mode")
  968. opt.add_option("--diffstat", action="store_true", dest="diffstat",
  969. help="print diffstat and exit")
  970. opt.add_option("-d", "--directory", metavar='DIR',
  971. help="specify root directory for applying patch")
  972. opt.add_option("-p", "--strip", type="int", metavar='N', default=0,
  973. help="strip N path components from filenames")
  974. opt.add_option("--revert", action="store_true",
  975. help="apply patch in reverse order (unpatch)")
  976. (options, args) = opt.parse_args()
  977. if not args and sys.argv[-1:] != ['--']:
  978. opt.print_version()
  979. opt.print_help()
  980. sys.exit()
  981. readstdin = (sys.argv[-1:] == ['--'] and not args)
  982. verbosity_levels = {0:logging.WARNING, 1:logging.INFO, 2:logging.DEBUG}
  983. loglevel = verbosity_levels[options.verbosity]
  984. logformat = "%(message)s"
  985. logger.setLevel(loglevel)
  986. streamhandler.setFormatter(logging.Formatter(logformat))
  987. if options.debugmode:
  988. setdebug() # this sets global debugmode variable
  989. if readstdin:
  990. patch = PatchSet(sys.stdin)
  991. else:
  992. patchfile = args[0]
  993. urltest = patchfile.split(':')[0]
  994. if (':' in patchfile and urltest.isalpha()
  995. and len(urltest) > 1): # one char before : is a windows drive letter
  996. patch = fromurl(patchfile)
  997. else:
  998. if not exists(patchfile) or not isfile(patchfile):
  999. sys.exit("patch file does not exist - %s" % patchfile)
  1000. patch = fromfile(patchfile)
  1001. if options.diffstat:
  1002. print(patch.diffstat())
  1003. sys.exit(0)
  1004. #pprint(patch)
  1005. if options.revert:
  1006. patch.revert(options.strip, root=options.directory) or sys.exit(-1)
  1007. else:
  1008. patch.apply(options.strip, root=options.directory) or sys.exit(-1)
  1009. # todo: document and test line ends handling logic - patch.py detects proper line-endings
  1010. # for inserted hunks and issues a warning if patched file has incosistent line ends
  1011. if __name__ == "__main__":
  1012. main()
  1013. # Legend:
  1014. # [ ] - some thing to be done
  1015. # [w] - official wart, external or internal that is unlikely to be fixed
  1016. # [ ] API break (2.x) wishlist
  1017. # PatchSet.items --> PatchSet.patches
  1018. # [ ] run --revert test for all dataset items
  1019. # [ ] run .parse() / .dump() test for dataset