patchfs.in 10 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395
  1. #! @PERL@ -w
  2. #
  3. # Written by Adam Byrtek <alpha@debian.org>, 2002
  4. # Rewritten by David Sterba <dave@jikos.cz>, 2009
  5. #
  6. # Extfs to handle patches in context and unified diff format.
  7. # Known issues: When name of file to patch is modified during editing,
  8. # hunk is duplicated on copyin. It is unavoidable.
  9. use bytes;
  10. use strict;
  11. use POSIX;
  12. use File::Temp 'tempfile';
  13. # standard binaries
  14. my $lzma = 'lzma';
  15. my $xz = 'xz';
  16. my $bzip = 'bzip2';
  17. my $gzip = 'gzip';
  18. my $fileutil = 'file -b';
  19. # date parsing requires Date::Parse from TimeDate module
  20. my $parsedates = eval 'require Date::Parse';
  21. # regular expressions
  22. my $unified_header=qr/^--- .*\t.*\n\+\+\+ .*\t.*\n$/;
  23. my $unified_extract=qr/^--- ([^\t]+).*\n\+\+\+ ([^\t]+)\s*(.*)\n/;
  24. my $unified_header2=qr/^--- .*\n\+\+\+ .*\n$/;
  25. my $unified_extract2=qr/^--- ([^\s]+).*\n\+\+\+ ([^\s]+)\s*(.*)\n/;
  26. my $unified_contents=qr/^([+\-\\ \n]|@@ .* @@)/;
  27. my $unified_hunk=qr/@@ -(\d+)(,(\d+))? \+(\d+)(,(\d+))? @@.*\n/;
  28. my $context_header=qr/^\*\*\* .*\t.*\n--- .*\t.*\n$/;
  29. my $context_extract=qr/^\*\*\* ([^\t]+).*\n--- ([^\t]+)\s*(.*)\n/;
  30. my $context_header2=qr/^\*\*\* .*\n--- .*\n$/;
  31. my $context_extract2=qr/^\*\*\* ([^\s]+).*\n--- ([^\s]+)\s*(.*)\n/;
  32. my $context_contents=qr/^([!+\-\\ \n]|-{3} .* -{4}|\*{3} .* \*{4}|\*{15})/;
  33. my $ls_extract_id=qr/^[^\s]+\s+[^\s]+\s+([^\s]+)\s+([^\s]+)/;
  34. my $basename=qr|^(.*/)*([^/]+)$|;
  35. sub patchfs_canonicalize_path ($) {
  36. my ($fname) = @_;
  37. $fname =~ s,/+,/,g;
  38. $fname =~ s,(^|/)(?:\.?\./)+,$1,;
  39. return $fname;
  40. }
  41. # output unix date in a mc-readable format
  42. sub timef
  43. {
  44. my @time=localtime($_[0]);
  45. return sprintf '%02d-%02d-%02d %02d:%02d', $time[4]+1, $time[3],
  46. $time[5]+1900, $time[2], $time[1];
  47. }
  48. # parse given string as a date and return unix time
  49. sub datetime
  50. {
  51. # in case of problems fall back to 0 in unix time
  52. # note: str2time interprets some wrong values (eg. " ") as 'today'
  53. if ($parsedates && defined (my $t=str2time($_[0]))) {
  54. return timef($t);
  55. }
  56. return timef(time);
  57. }
  58. # print message on stderr and exit
  59. sub error
  60. {
  61. print STDERR $_[0], "\n";
  62. exit 1;
  63. }
  64. # (compressed) input
  65. sub myin
  66. {
  67. my ($qfname)=(quotemeta $_[0]);
  68. $_=`$fileutil $qfname`;
  69. if (/^'*lzma/) {
  70. return "$lzma -dc $qfname";
  71. } elsif (/^'*xz/) {
  72. return "$xz -dc $qfname";
  73. } elsif (/^'*bzip/) {
  74. return "$bzip -dc $qfname";
  75. } elsif (/^'*gzip/) {
  76. return "$gzip -dc $qfname";
  77. } else {
  78. return "cat $qfname";
  79. }
  80. }
  81. # (compressed) output
  82. sub myout
  83. {
  84. my ($qfname,$append)=(quotemeta $_[0],$_[1]);
  85. my ($sep) = $append ? '>>' : '>';
  86. $_=`$fileutil $qfname`;
  87. if (/^'*lzma/) {
  88. return "$lzma -c $sep $qfname";
  89. } elsif (/^'*xz/) {
  90. return "$xz -c $sep $qfname";
  91. } elsif (/^'*bzip/) {
  92. return "$bzip -c $sep $qfname";
  93. } elsif (/^'*gzip/) {
  94. return "$gzip -c $sep $qfname";
  95. } else {
  96. return "cat $sep $qfname";
  97. }
  98. }
  99. # select diff filename conforming with rules found in diff.info
  100. sub diff_filename
  101. {
  102. my ($fsrc,$fdst)= @_;
  103. # TODO: can remove these two calls later
  104. $fsrc = patchfs_canonicalize_path ($fsrc);
  105. $fdst = patchfs_canonicalize_path ($fdst);
  106. if (!$fdst && !$fsrc) {
  107. error 'Index: not yet implemented';
  108. } elsif (!$fsrc || $fsrc eq '/dev/null') {
  109. return ($fdst,'PATCH-CREATE/');
  110. } elsif (!$fdst || $fdst eq '/dev/null') {
  111. return ($fsrc,'PATCH-REMOVE/');
  112. } elsif (($fdst eq '/dev/null') && ($fsrc eq '/dev/null')) {
  113. error 'Malformed diff';
  114. } else {
  115. # fewest path name components
  116. if ($fdst=~s|/|/|g < $fsrc=~s|/|/|g) {
  117. return ($fdst,'');
  118. } elsif ($fdst=~s|/|/|g > $fsrc=~s|/|/|g) {
  119. return ($fsrc,'');
  120. } else {
  121. # shorter base name
  122. if (($fdst=~/$basename/o,length $2) < ($fsrc=~/$basename/o,length $2)) {
  123. return ($fdst,'');
  124. } elsif (($fdst=~/$basename/o,length $2) > ($fsrc=~/$basename/o,length $2)) {
  125. return ($fsrc,'');
  126. } else {
  127. # shortest names
  128. if (length $fdst < length $fsrc) {
  129. return ($fdst,'');
  130. } else {
  131. return ($fsrc,'');
  132. }
  133. }
  134. }
  135. }
  136. }
  137. # IN: diff "archive" name
  138. # IN: file handle for output; STDIN for list, tempfile else
  139. # IN: filename to watch (for: copyout, rm), '' for: list
  140. # IN: remove the file?
  141. # true - ... and print out the rest
  142. # false - ie. copyout mode, print just the file
  143. sub parse($$$$)
  144. {
  145. my $archive=quotemeta shift;
  146. my $fh=shift;
  147. my $file=shift;
  148. my $rmmod=shift;
  149. my ($state,$fsize,$time);
  150. my ($f,$fsrc,$fdst,$prefix);
  151. my ($unified,$context);
  152. my ($skipread, $filetoprint, $filefound);
  153. my ($h_add,$h_del,$h_ctx); # hunk line counts
  154. my ($h_r1,$h_r2); # hunk ranges
  155. my @outsrc; # if desired ...
  156. my @outdst;
  157. my $line;
  158. # use uid and gid from file
  159. my ($uid,$gid)=(`ls -l $archive`=~/$ls_extract_id/o);
  160. import Date::Parse if ($parsedates && $file eq '');
  161. $line=1;
  162. $state=0; $fsize=0; $f='';
  163. $filefound=0;
  164. while ($skipread || ($line++,$_=<I>)) {
  165. $skipread=0;
  166. if($state == 0) { # expecting comments
  167. $unified=$context=0;
  168. $unified=1 if (/^--- /);
  169. $context=1 if (/^\*\*\* /);
  170. if (!$unified && !$context) {
  171. $filefound=0 if($file ne '' && $filetoprint);
  172. # shortcut for rmmod xor filefound
  173. # - in rmmod we print if not found
  174. # - in copyout (!rmmod) we print if found
  175. print $fh $_ if($rmmod != $filefound);
  176. next;
  177. }
  178. if($file eq '' && $filetoprint) {
  179. printf $fh "-rw-r--r-- 1 %s %s %d %s %s%s\n", $uid, $gid, $fsize, datetime($time), $prefix, $f;
  180. }
  181. # start of new file
  182. $_ .=<I>; # steel next line, both formats
  183. $line++;
  184. if($unified) {
  185. if(/$unified_header/o) {
  186. ($fsrc,$fdst,$time) = /$unified_extract/o;
  187. } elsif(/$unified_header2/o) {
  188. ($fsrc,$fdst,$time) = /$unified_extract2/o;
  189. } else {
  190. error "Can't parse unified diff header";
  191. }
  192. } elsif($context) {
  193. if(/$context_header/o) {
  194. ($fsrc,$fdst,$time) = /$context_extract/o;
  195. } elsif(/$context_header2/o) {
  196. ($fsrc,$fdst,$time) = /$context_extract2/o;
  197. } else {
  198. error "Can't parse context diff header";
  199. }
  200. } else {
  201. error "Unrecognized diff header";
  202. }
  203. $fsrc=patchfs_canonicalize_path($fsrc);
  204. $fdst=patchfs_canonicalize_path($fdst);
  205. if(wantarray) {
  206. push @outsrc,$fsrc;
  207. push @outdst,$fdst;
  208. }
  209. ($f,$prefix)=diff_filename($fsrc,$fdst);
  210. $filefound=($fsrc eq $file || $fdst eq $file);
  211. $f="$f.diff";
  212. $filetoprint=1;
  213. $fsize=length;
  214. print $fh $_ if($rmmod != $filefound);
  215. $state=1;
  216. } elsif($state == 1) { # expecting diff hunk headers, end of file or comments
  217. if($unified) {
  218. my ($a,$b,$c,$d);
  219. ($a,$b,$h_r1,$c,$d,$h_r2)=/$unified_hunk/o;
  220. if(!defined($a) || !defined($c)) {
  221. # hunk header does not come, a comment inside
  222. # or maybe a new file, state 0 will decide
  223. $skipread=1;
  224. $state=0;
  225. next;
  226. }
  227. $fsize+=length;
  228. print $fh $_ if($rmmod != $filefound);
  229. $h_r1=1 if(!defined($b));
  230. $h_r2=1 if(!defined($d));
  231. $h_add=$h_del=$h_ctx=0;
  232. $state=2;
  233. } elsif($context) {
  234. if(!/$context_contents/o) {
  235. $skipread=1;
  236. $state=0;
  237. next;
  238. }
  239. print $fh $_ if($rmmod != $filefound);
  240. $fsize+=length;
  241. }
  242. } elsif($state == 2) { # expecting hunk contents
  243. if($h_del + $h_ctx == $h_r1 && $h_add + $h_ctx == $h_r2) {
  244. # hooray, end of hunk
  245. # we optimistically ended with a hunk before but
  246. # the line has been read already
  247. $skipread=1;
  248. $state=1;
  249. next;
  250. }
  251. print $fh $_ if($rmmod != $filefound);
  252. $fsize+=length;
  253. my ($first)= /^(.)/;
  254. if(ord($first) == ord('+')) { $h_add++; }
  255. elsif(ord($first) == ord('-')) { $h_del++; }
  256. elsif(ord($first) == ord(' ')) { $h_ctx++; }
  257. elsif(ord($first) == ord('\\')) { 0; }
  258. elsif(ord($first) == ord('@')) { error "Malformed hunk, header came too early"; }
  259. else { error "Unrecognized character in hunk"; }
  260. }
  261. }
  262. if($file eq '' && $filetoprint) {
  263. printf $fh "-rw-r--r-- 1 %s %s %d %s %s%s\n", $uid, $gid, $fsize, datetime($time), $prefix, $f;
  264. }
  265. close($fh) if($file ne '');
  266. return \(@outsrc, @outdst) if wantarray;
  267. }
  268. # list files affected by patch
  269. sub list($) {
  270. parse($_[0], *STDOUT, '', 0);
  271. close(I);
  272. }
  273. # extract diff from patch
  274. # IN: diff file to find
  275. # IN: output file name
  276. sub copyout($$) {
  277. my ($file,$out)=@_;
  278. $file=~s/^(PATCH-(CREATE|REMOVE)\/)?(.*)\.diff$/$3/;
  279. $file = patchfs_canonicalize_path ($file);
  280. open(FH, ">$out") or error("Cannot open output file");
  281. parse('', *FH, $file, 0);
  282. }
  283. # remove diff(s) from patch
  284. # IN: archive
  285. # IN: file to delete
  286. sub rm($$) {
  287. my $archive=shift;
  288. my ($tmp,$tmpname)=tempfile();
  289. @_=map {scalar(s/^(PATCH-(CREATE|REMOVE)\/)?(.*)\.diff$/$3/,$_)} @_;
  290. # just the first file for now
  291. parse($archive, $tmp, $_[0], 1);
  292. close I;
  293. # replace archive
  294. system("cat \Q$tmpname\E | " . myout($archive,0))==0
  295. or error "Can't write to archive";
  296. system("rm -f -- \Q$tmpname\E");
  297. }
  298. # append diff to archive
  299. # IN: diff archive name
  300. # IN: newly created file name in archive
  301. # IN: the real source file
  302. sub copyin($$$) {
  303. # TODO: seems to be tricky. what to do?
  304. # copyin of file which is already there may:
  305. # * delete the original and copy only the new
  306. # * just append the new hunks to the same file
  307. # problems: may not be a valid diff, unmerged hunks
  308. # * try to merge the two together
  309. # ... but we do not want write patchutils again, right?
  310. error "Copying files into diff not supported";
  311. return;
  312. my ($archive,$name,$src)=@_;
  313. # in case we are appending another diff, we have
  314. # to delete/merge all the files
  315. open(DEVNULL, ">/dev/null");
  316. open I, myin($src).'|';
  317. my ($srclist,$dstlist)=parse($archive, *DEVNULL, '', 0);
  318. close(I);
  319. close(DEVNULL);
  320. foreach(@$srclist) {
  321. print("SRC: del $_\n");
  322. }
  323. foreach(@$dstlist) {
  324. print("DST: del $_\n");
  325. }
  326. return;
  327. # remove overwritten file
  328. open I, myin($archive).'|';
  329. rm ($archive, $name);
  330. close I;
  331. my $cmd1=myin("$src.diff");
  332. my $cmd2=myout($archive,1);
  333. system("$cmd1 | $cmd2")==0
  334. or error "Can't write to archive";
  335. }
  336. if ($ARGV[0] eq 'list') {
  337. open I, myin($ARGV[1]).'|';
  338. list ($ARGV[1]);
  339. exit 0;
  340. } elsif ($ARGV[0] eq 'copyout') {
  341. open I, myin($ARGV[1])."|";
  342. copyout ($ARGV[2], $ARGV[3]);
  343. exit 0;
  344. } elsif ($ARGV[0] eq 'rm') {
  345. open I, myin($ARGV[1])."|";
  346. rm ($ARGV[1], $ARGV[2]);
  347. exit 0;
  348. } elsif ($ARGV[0] eq 'rmdir') {
  349. exit 0;
  350. } elsif ($ARGV[0] eq 'mkdir') {
  351. exit 0;
  352. } elsif ($ARGV[0] eq 'copyin') {
  353. copyin ($ARGV[1], $ARGV[2], $ARGV[3]);
  354. exit 0;
  355. }
  356. exit 1;