APPNOTE.TXT 142 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168116911701171117211731174117511761177117811791180118111821183118411851186118711881189119011911192119311941195119611971198119912001201120212031204120512061207120812091210121112121213121412151216121712181219122012211222122312241225122612271228122912301231123212331234123512361237123812391240124112421243124412451246124712481249125012511252125312541255125612571258125912601261126212631264126512661267126812691270127112721273127412751276127712781279128012811282128312841285128612871288128912901291129212931294129512961297129812991300130113021303130413051306130713081309131013111312131313141315131613171318131913201321132213231324132513261327132813291330133113321333133413351336133713381339134013411342134313441345134613471348134913501351135213531354135513561357135813591360136113621363136413651366136713681369137013711372137313741375137613771378137913801381138213831384138513861387138813891390139113921393139413951396139713981399140014011402140314041405140614071408140914101411141214131414141514161417141814191420142114221423142414251426142714281429143014311432143314341435143614371438143914401441144214431444144514461447144814491450145114521453145414551456145714581459146014611462146314641465146614671468146914701471147214731474147514761477147814791480148114821483148414851486148714881489149014911492149314941495149614971498149915001501150215031504150515061507150815091510151115121513151415151516151715181519152015211522152315241525152615271528152915301531153215331534153515361537153815391540154115421543154415451546154715481549155015511552155315541555155615571558155915601561156215631564156515661567156815691570157115721573157415751576157715781579158015811582158315841585158615871588158915901591159215931594159515961597159815991600160116021603160416051606160716081609161016111612161316141615161616171618161916201621162216231624162516261627162816291630163116321633163416351636163716381639164016411642164316441645164616471648164916501651165216531654165516561657165816591660166116621663166416651666166716681669167016711672167316741675167616771678167916801681168216831684168516861687168816891690169116921693169416951696169716981699170017011702170317041705170617071708170917101711171217131714171517161717171817191720172117221723172417251726172717281729173017311732173317341735173617371738173917401741174217431744174517461747174817491750175117521753175417551756175717581759176017611762176317641765176617671768176917701771177217731774177517761777177817791780178117821783178417851786178717881789179017911792179317941795179617971798179918001801180218031804180518061807180818091810181118121813181418151816181718181819182018211822182318241825182618271828182918301831183218331834183518361837183818391840184118421843184418451846184718481849185018511852185318541855185618571858185918601861186218631864186518661867186818691870187118721873187418751876187718781879188018811882188318841885188618871888188918901891189218931894189518961897189818991900190119021903190419051906190719081909191019111912191319141915191619171918191919201921192219231924192519261927192819291930193119321933193419351936193719381939194019411942194319441945194619471948194919501951195219531954195519561957195819591960196119621963196419651966196719681969197019711972197319741975197619771978197919801981198219831984198519861987198819891990199119921993199419951996199719981999200020012002200320042005200620072008200920102011201220132014201520162017201820192020202120222023202420252026202720282029203020312032203320342035203620372038203920402041204220432044204520462047204820492050205120522053205420552056205720582059206020612062206320642065206620672068206920702071207220732074207520762077207820792080208120822083208420852086208720882089209020912092209320942095209620972098209921002101210221032104210521062107210821092110211121122113211421152116211721182119212021212122212321242125212621272128212921302131213221332134213521362137213821392140214121422143214421452146214721482149215021512152215321542155215621572158215921602161216221632164216521662167216821692170217121722173217421752176217721782179218021812182218321842185218621872188218921902191219221932194219521962197219821992200220122022203220422052206220722082209221022112212221322142215221622172218221922202221222222232224222522262227222822292230223122322233223422352236223722382239224022412242224322442245224622472248224922502251225222532254225522562257225822592260226122622263226422652266226722682269227022712272227322742275227622772278227922802281228222832284228522862287228822892290229122922293229422952296229722982299230023012302230323042305230623072308230923102311231223132314231523162317231823192320232123222323232423252326232723282329233023312332233323342335233623372338233923402341234223432344234523462347234823492350235123522353235423552356235723582359236023612362236323642365236623672368236923702371237223732374237523762377237823792380238123822383238423852386238723882389239023912392239323942395239623972398239924002401240224032404240524062407240824092410241124122413241424152416241724182419242024212422242324242425242624272428242924302431243224332434243524362437243824392440244124422443244424452446244724482449245024512452245324542455245624572458245924602461246224632464246524662467246824692470247124722473247424752476247724782479248024812482248324842485248624872488248924902491249224932494249524962497249824992500250125022503250425052506250725082509251025112512251325142515251625172518251925202521252225232524252525262527252825292530253125322533253425352536253725382539254025412542254325442545254625472548254925502551255225532554255525562557255825592560256125622563256425652566256725682569257025712572257325742575257625772578257925802581258225832584258525862587258825892590259125922593259425952596259725982599260026012602260326042605260626072608260926102611261226132614261526162617261826192620262126222623262426252626262726282629263026312632263326342635263626372638263926402641264226432644264526462647264826492650265126522653265426552656265726582659266026612662266326642665266626672668266926702671267226732674267526762677267826792680268126822683268426852686268726882689269026912692269326942695269626972698269927002701270227032704270527062707270827092710271127122713271427152716271727182719272027212722272327242725272627272728272927302731273227332734273527362737273827392740274127422743274427452746274727482749275027512752275327542755275627572758275927602761276227632764276527662767276827692770277127722773277427752776277727782779278027812782278327842785278627872788278927902791279227932794279527962797279827992800280128022803280428052806280728082809281028112812281328142815281628172818281928202821282228232824282528262827282828292830283128322833283428352836283728382839284028412842284328442845284628472848284928502851285228532854285528562857285828592860286128622863286428652866286728682869287028712872287328742875287628772878287928802881288228832884288528862887288828892890289128922893289428952896289728982899290029012902290329042905290629072908290929102911291229132914291529162917291829192920292129222923292429252926292729282929293029312932293329342935293629372938293929402941294229432944294529462947294829492950295129522953295429552956295729582959296029612962296329642965296629672968296929702971297229732974297529762977297829792980298129822983298429852986298729882989299029912992299329942995299629972998299930003001300230033004300530063007300830093010301130123013301430153016301730183019302030213022302330243025302630273028302930303031303230333034303530363037303830393040304130423043304430453046304730483049305030513052305330543055305630573058305930603061306230633064306530663067306830693070307130723073307430753076307730783079308030813082308330843085308630873088308930903091309230933094309530963097309830993100310131023103310431053106310731083109311031113112311331143115311631173118311931203121312231233124312531263127312831293130313131323133313431353136313731383139314031413142314331443145314631473148314931503151315231533154315531563157315831593160316131623163316431653166316731683169317031713172317331743175317631773178317931803181318231833184318531863187318831893190319131923193319431953196319731983199320032013202320332043205320632073208320932103211321232133214321532163217
  1. File: APPNOTE.TXT - .ZIP File Format Specification
  2. Version: 6.3.2
  3. Revised: September 28, 2007
  4. Copyright (c) 1989 - 2007 PKWARE Inc., All Rights Reserved.
  5. The use of certain technological aspects disclosed in the current
  6. APPNOTE is available pursuant to the below section entitled
  7. "Incorporating PKWARE Proprietary Technology into Your Product".
  8. I. Purpose
  9. ----------
  10. This specification is intended to define a cross-platform,
  11. interoperable file storage and transfer format. Since its
  12. first publication in 1989, PKWARE has remained committed to
  13. ensuring the interoperability of the .ZIP file format through
  14. publication and maintenance of this specification. We trust that
  15. all .ZIP compatible vendors and application developers that have
  16. adopted and benefited from this format will share and support
  17. this commitment to interoperability.
  18. II. Contacting PKWARE
  19. ---------------------
  20. PKWARE, Inc.
  21. 648 N. Plankinton Avenue, Suite 220
  22. Milwaukee, WI 53203
  23. +1-414-289-9788
  24. +1-414-289-9789 FAX
  25. zipformat@pkware.com
  26. III. Disclaimer
  27. ---------------
  28. Although PKWARE will attempt to supply current and accurate
  29. information relating to its file formats, algorithms, and the
  30. subject programs, the possibility of error or omission cannot
  31. be eliminated. PKWARE therefore expressly disclaims any warranty
  32. that the information contained in the associated materials relating
  33. to the subject programs and/or the format of the files created or
  34. accessed by the subject programs and/or the algorithms used by
  35. the subject programs, or any other matter, is current, correct or
  36. accurate as delivered. Any risk of damage due to any possible
  37. inaccurate information is assumed by the user of the information.
  38. Furthermore, the information relating to the subject programs
  39. and/or the file formats created or accessed by the subject
  40. programs and/or the algorithms used by the subject programs is
  41. subject to change without notice.
  42. If the version of this file is marked as a NOTIFICATION OF CHANGE,
  43. the content defines an Early Feature Specification (EFS) change
  44. to the .ZIP file format that may be subject to modification prior
  45. to publication of the Final Feature Specification (FFS). This
  46. document may also contain information on Planned Feature
  47. Specifications (PFS) defining recognized future extensions.
  48. IV. Change Log
  49. --------------
  50. Version Change Description Date
  51. ------- ------------------ ----------
  52. 5.2 -Single Password Symmetric Encryption 06/02/2003
  53. storage
  54. 6.1.0 -Smartcard compatibility 01/20/2004
  55. -Documentation on certificate storage
  56. 6.2.0 -Introduction of Central Directory 04/26/2004
  57. Encryption for encrypting metadata
  58. -Added OS/X to Version Made By values
  59. 6.2.1 -Added Extra Field placeholder for 04/01/2005
  60. POSZIP using ID 0x4690
  61. -Clarified size field on
  62. "zip64 end of central directory record"
  63. 6.2.2 -Documented Final Feature Specification 01/06/2006
  64. for Strong Encryption
  65. -Clarifications and typographical
  66. corrections
  67. 6.3.0 -Added tape positioning storage 09/29/2006
  68. parameters
  69. -Expanded list of supported hash algorithms
  70. -Expanded list of supported compression
  71. algorithms
  72. -Expanded list of supported encryption
  73. algorithms
  74. -Added option for Unicode filename
  75. storage
  76. -Clarifications for consistent use
  77. of Data Descriptor records
  78. -Added additional "Extra Field"
  79. definitions
  80. 6.3.1 -Corrected standard hash values for 04/11/2007
  81. SHA-256/384/512
  82. 6.3.2 -Added compression method 97 09/28/2007
  83. -Documented InfoZIP "Extra Field"
  84. values for UTF-8 file name and
  85. file comment storage
  86. V. General Format of a .ZIP file
  87. --------------------------------
  88. Files stored in arbitrary order. Large .ZIP files can span multiple
  89. volumes or be split into user-defined segment sizes. All values
  90. are stored in little-endian byte order unless otherwise specified.
  91. Overall .ZIP file format:
  92. [local file header 1]
  93. [file data 1]
  94. [data descriptor 1]
  95. .
  96. .
  97. .
  98. [local file header n]
  99. [file data n]
  100. [data descriptor n]
  101. [archive decryption header]
  102. [archive extra data record]
  103. [central directory]
  104. [zip64 end of central directory record]
  105. [zip64 end of central directory locator]
  106. [end of central directory record]
  107. A. Local file header:
  108. local file header signature 4 bytes (0x04034b50)
  109. version needed to extract 2 bytes
  110. general purpose bit flag 2 bytes
  111. compression method 2 bytes
  112. last mod file time 2 bytes
  113. last mod file date 2 bytes
  114. crc-32 4 bytes
  115. compressed size 4 bytes
  116. uncompressed size 4 bytes
  117. file name length 2 bytes
  118. extra field length 2 bytes
  119. file name (variable size)
  120. extra field (variable size)
  121. B. File data
  122. Immediately following the local header for a file
  123. is the compressed or stored data for the file.
  124. The series of [local file header][file data][data
  125. descriptor] repeats for each file in the .ZIP archive.
  126. C. Data descriptor:
  127. crc-32 4 bytes
  128. compressed size 4 bytes
  129. uncompressed size 4 bytes
  130. This descriptor exists only if bit 3 of the general
  131. purpose bit flag is set (see below). It is byte aligned
  132. and immediately follows the last byte of compressed data.
  133. This descriptor is used only when it was not possible to
  134. seek in the output .ZIP file, e.g., when the output .ZIP file
  135. was standard output or a non-seekable device. For ZIP64(tm) format
  136. archives, the compressed and uncompressed sizes are 8 bytes each.
  137. When compressing files, compressed and uncompressed sizes
  138. should be stored in ZIP64 format (as 8 byte values) when a
  139. files size exceeds 0xFFFFFFFF. However ZIP64 format may be
  140. used regardless of the size of a file. When extracting, if
  141. the zip64 extended information extra field is present for
  142. the file the compressed and uncompressed sizes will be 8
  143. byte values.
  144. Although not originally assigned a signature, the value
  145. 0x08074b50 has commonly been adopted as a signature value
  146. for the data descriptor record. Implementers should be
  147. aware that ZIP files may be encountered with or without this
  148. signature marking data descriptors and should account for
  149. either case when reading ZIP files to ensure compatibility.
  150. When writing ZIP files, it is recommended to include the
  151. signature value marking the data descriptor record. When
  152. the signature is used, the fields currently defined for
  153. the data descriptor record will immediately follow the
  154. signature.
  155. An extensible data descriptor will be released in a future
  156. version of this APPNOTE. This new record is intended to
  157. resolve conflicts with the use of this record going forward,
  158. and to provide better support for streamed file processing.
  159. When the Central Directory Encryption method is used, the data
  160. descriptor record is not required, but may be used. If present,
  161. and bit 3 of the general purpose bit field is set to indicate
  162. its presence, the values in fields of the data descriptor
  163. record should be set to binary zeros.
  164. D. Archive decryption header:
  165. The Archive Decryption Header is introduced in version 6.2
  166. of the ZIP format specification. This record exists in support
  167. of the Central Directory Encryption Feature implemented as part of
  168. the Strong Encryption Specification as described in this document.
  169. When the Central Directory Structure is encrypted, this decryption
  170. header will precede the encrypted data segment. The encrypted
  171. data segment will consist of the Archive extra data record (if
  172. present) and the encrypted Central Directory Structure data.
  173. The format of this data record is identical to the Decryption
  174. header record preceding compressed file data. If the central
  175. directory structure is encrypted, the location of the start of
  176. this data record is determined using the Start of Central Directory
  177. field in the Zip64 End of Central Directory record. Refer to the
  178. section on the Strong Encryption Specification for information
  179. on the fields used in the Archive Decryption Header record.
  180. E. Archive extra data record:
  181. archive extra data signature 4 bytes (0x08064b50)
  182. extra field length 4 bytes
  183. extra field data (variable size)
  184. The Archive Extra Data Record is introduced in version 6.2
  185. of the ZIP format specification. This record exists in support
  186. of the Central Directory Encryption Feature implemented as part of
  187. the Strong Encryption Specification as described in this document.
  188. When present, this record immediately precedes the central
  189. directory data structure. The size of this data record will be
  190. included in the Size of the Central Directory field in the
  191. End of Central Directory record. If the central directory structure
  192. is compressed, but not encrypted, the location of the start of
  193. this data record is determined using the Start of Central Directory
  194. field in the Zip64 End of Central Directory record.
  195. F. Central directory structure:
  196. [file header 1]
  197. .
  198. .
  199. .
  200. [file header n]
  201. [digital signature]
  202. File header:
  203. central file header signature 4 bytes (0x02014b50)
  204. version made by 2 bytes
  205. version needed to extract 2 bytes
  206. general purpose bit flag 2 bytes
  207. compression method 2 bytes
  208. last mod file time 2 bytes
  209. last mod file date 2 bytes
  210. crc-32 4 bytes
  211. compressed size 4 bytes
  212. uncompressed size 4 bytes
  213. file name length 2 bytes
  214. extra field length 2 bytes
  215. file comment length 2 bytes
  216. disk number start 2 bytes
  217. internal file attributes 2 bytes
  218. external file attributes 4 bytes
  219. relative offset of local header 4 bytes
  220. file name (variable size)
  221. extra field (variable size)
  222. file comment (variable size)
  223. Digital signature:
  224. header signature 4 bytes (0x05054b50)
  225. size of data 2 bytes
  226. signature data (variable size)
  227. With the introduction of the Central Directory Encryption
  228. feature in version 6.2 of this specification, the Central
  229. Directory Structure may be stored both compressed and encrypted.
  230. Although not required, it is assumed when encrypting the
  231. Central Directory Structure, that it will be compressed
  232. for greater storage efficiency. Information on the
  233. Central Directory Encryption feature can be found in the section
  234. describing the Strong Encryption Specification. The Digital
  235. Signature record will be neither compressed nor encrypted.
  236. G. Zip64 end of central directory record
  237. zip64 end of central dir
  238. signature 4 bytes (0x06064b50)
  239. size of zip64 end of central
  240. directory record 8 bytes
  241. version made by 2 bytes
  242. version needed to extract 2 bytes
  243. number of this disk 4 bytes
  244. number of the disk with the
  245. start of the central directory 4 bytes
  246. total number of entries in the
  247. central directory on this disk 8 bytes
  248. total number of entries in the
  249. central directory 8 bytes
  250. size of the central directory 8 bytes
  251. offset of start of central
  252. directory with respect to
  253. the starting disk number 8 bytes
  254. zip64 extensible data sector (variable size)
  255. The value stored into the "size of zip64 end of central
  256. directory record" should be the size of the remaining
  257. record and should not include the leading 12 bytes.
  258. Size = SizeOfFixedFields + SizeOfVariableData - 12.
  259. The above record structure defines Version 1 of the
  260. zip64 end of central directory record. Version 1 was
  261. implemented in versions of this specification preceding
  262. 6.2 in support of the ZIP64 large file feature. The
  263. introduction of the Central Directory Encryption feature
  264. implemented in version 6.2 as part of the Strong Encryption
  265. Specification defines Version 2 of this record structure.
  266. Refer to the section describing the Strong Encryption
  267. Specification for details on the version 2 format for
  268. this record.
  269. Special purpose data may reside in the zip64 extensible data
  270. sector field following either a V1 or V2 version of this
  271. record. To ensure identification of this special purpose data
  272. it must include an identifying header block consisting of the
  273. following:
  274. Header ID - 2 bytes
  275. Data Size - 4 bytes
  276. The Header ID field indicates the type of data that is in the
  277. data block that follows.
  278. Data Size identifies the number of bytes that follow for this
  279. data block type.
  280. Multiple special purpose data blocks may be present, but each
  281. must be preceded by a Header ID and Data Size field. Current
  282. mappings of Header ID values supported in this field are as
  283. defined in APPENDIX C.
  284. H. Zip64 end of central directory locator
  285. zip64 end of central dir locator
  286. signature 4 bytes (0x07064b50)
  287. number of the disk with the
  288. start of the zip64 end of
  289. central directory 4 bytes
  290. relative offset of the zip64
  291. end of central directory record 8 bytes
  292. total number of disks 4 bytes
  293. I. End of central directory record:
  294. end of central dir signature 4 bytes (0x06054b50)
  295. number of this disk 2 bytes
  296. number of the disk with the
  297. start of the central directory 2 bytes
  298. total number of entries in the
  299. central directory on this disk 2 bytes
  300. total number of entries in
  301. the central directory 2 bytes
  302. size of the central directory 4 bytes
  303. offset of start of central
  304. directory with respect to
  305. the starting disk number 4 bytes
  306. .ZIP file comment length 2 bytes
  307. .ZIP file comment (variable size)
  308. J. Explanation of fields:
  309. version made by (2 bytes)
  310. The upper byte indicates the compatibility of the file
  311. attribute information. If the external file attributes
  312. are compatible with MS-DOS and can be read by PKZIP for
  313. DOS version 2.04g then this value will be zero. If these
  314. attributes are not compatible, then this value will
  315. identify the host system on which the attributes are
  316. compatible. Software can use this information to determine
  317. the line record format for text files etc. The current
  318. mappings are:
  319. 0 - MS-DOS and OS/2 (FAT / VFAT / FAT32 file systems)
  320. 1 - Amiga 2 - OpenVMS
  321. 3 - UNIX 4 - VM/CMS
  322. 5 - Atari ST 6 - OS/2 H.P.F.S.
  323. 7 - Macintosh 8 - Z-System
  324. 9 - CP/M 10 - Windows NTFS
  325. 11 - MVS (OS/390 - Z/OS) 12 - VSE
  326. 13 - Acorn Risc 14 - VFAT
  327. 15 - alternate MVS 16 - BeOS
  328. 17 - Tandem 18 - OS/400
  329. 19 - OS/X (Darwin) 20 thru 255 - unused
  330. The lower byte indicates the ZIP specification version
  331. (the version of this document) supported by the software
  332. used to encode the file. The value/10 indicates the major
  333. version number, and the value mod 10 is the minor version
  334. number.
  335. version needed to extract (2 bytes)
  336. The minimum supported ZIP specification version needed to
  337. extract the file, mapped as above. This value is based on
  338. the specific format features a ZIP program must support to
  339. be able to extract the file. If multiple features are
  340. applied to a file, the minimum version should be set to the
  341. feature having the highest value. New features or feature
  342. changes affecting the published format specification will be
  343. implemented using higher version numbers than the last
  344. published value to avoid conflict.
  345. Current minimum feature versions are as defined below:
  346. 1.0 - Default value
  347. 1.1 - File is a volume label
  348. 2.0 - File is a folder (directory)
  349. 2.0 - File is compressed using Deflate compression
  350. 2.0 - File is encrypted using traditional PKWARE encryption
  351. 2.1 - File is compressed using Deflate64(tm)
  352. 2.5 - File is compressed using PKWARE DCL Implode
  353. 2.7 - File is a patch data set
  354. 4.5 - File uses ZIP64 format extensions
  355. 4.6 - File is compressed using BZIP2 compression*
  356. 5.0 - File is encrypted using DES
  357. 5.0 - File is encrypted using 3DES
  358. 5.0 - File is encrypted using original RC2 encryption
  359. 5.0 - File is encrypted using RC4 encryption
  360. 5.1 - File is encrypted using AES encryption
  361. 5.1 - File is encrypted using corrected RC2 encryption**
  362. 5.2 - File is encrypted using corrected RC2-64 encryption**
  363. 6.1 - File is encrypted using non-OAEP key wrapping***
  364. 6.2 - Central directory encryption
  365. 6.3 - File is compressed using LZMA
  366. 6.3 - File is compressed using PPMd+
  367. 6.3 - File is encrypted using Blowfish
  368. 6.3 - File is encrypted using Twofish
  369. * Early 7.x (pre-7.2) versions of PKZIP incorrectly set the
  370. version needed to extract for BZIP2 compression to be 50
  371. when it should have been 46.
  372. ** Refer to the section on Strong Encryption Specification
  373. for additional information regarding RC2 corrections.
  374. *** Certificate encryption using non-OAEP key wrapping is the
  375. intended mode of operation for all versions beginning with 6.1.
  376. Support for OAEP key wrapping should only be used for
  377. backward compatibility when sending ZIP files to be opened by
  378. versions of PKZIP older than 6.1 (5.0 or 6.0).
  379. + Files compressed using PPMd should set the version
  380. needed to extract field to 6.3, however, not all ZIP
  381. programs enforce this and may be unable to decompress
  382. data files compressed using PPMd if this value is set.
  383. When using ZIP64 extensions, the corresponding value in the
  384. zip64 end of central directory record should also be set.
  385. This field should be set appropriately to indicate whether
  386. Version 1 or Version 2 format is in use.
  387. general purpose bit flag: (2 bytes)
  388. Bit 0: If set, indicates that the file is encrypted.
  389. (For Method 6 - Imploding)
  390. Bit 1: If the compression method used was type 6,
  391. Imploding, then this bit, if set, indicates
  392. an 8K sliding dictionary was used. If clear,
  393. then a 4K sliding dictionary was used.
  394. Bit 2: If the compression method used was type 6,
  395. Imploding, then this bit, if set, indicates
  396. 3 Shannon-Fano trees were used to encode the
  397. sliding dictionary output. If clear, then 2
  398. Shannon-Fano trees were used.
  399. (For Methods 8 and 9 - Deflating)
  400. Bit 2 Bit 1
  401. 0 0 Normal (-en) compression option was used.
  402. 0 1 Maximum (-exx/-ex) compression option was used.
  403. 1 0 Fast (-ef) compression option was used.
  404. 1 1 Super Fast (-es) compression option was used.
  405. (For Method 14 - LZMA)
  406. Bit 1: If the compression method used was type 14,
  407. LZMA, then this bit, if set, indicates
  408. an end-of-stream (EOS) marker is used to
  409. mark the end of the compressed data stream.
  410. If clear, then an EOS marker is not present
  411. and the compressed data size must be known
  412. to extract.
  413. Note: Bits 1 and 2 are undefined if the compression
  414. method is any other.
  415. Bit 3: If this bit is set, the fields crc-32, compressed
  416. size and uncompressed size are set to zero in the
  417. local header. The correct values are put in the
  418. data descriptor immediately following the compressed
  419. data. (Note: PKZIP version 2.04g for DOS only
  420. recognizes this bit for method 8 compression, newer
  421. versions of PKZIP recognize this bit for any
  422. compression method.)
  423. Bit 4: Reserved for use with method 8, for enhanced
  424. deflating.
  425. Bit 5: If this bit is set, this indicates that the file is
  426. compressed patched data. (Note: Requires PKZIP
  427. version 2.70 or greater)
  428. Bit 6: Strong encryption. If this bit is set, you should
  429. set the version needed to extract value to at least
  430. 50 and you must also set bit 0. If AES encryption
  431. is used, the version needed to extract value must
  432. be at least 51.
  433. Bit 7: Currently unused.
  434. Bit 8: Currently unused.
  435. Bit 9: Currently unused.
  436. Bit 10: Currently unused.
  437. Bit 11: Language encoding flag (EFS). If this bit is set,
  438. the filename and comment fields for this file
  439. must be encoded using UTF-8. (see APPENDIX D)
  440. Bit 12: Reserved by PKWARE for enhanced compression.
  441. Bit 13: Used when encrypting the Central Directory to indicate
  442. selected data values in the Local Header are masked to
  443. hide their actual values. See the section describing
  444. the Strong Encryption Specification for details.
  445. Bit 14: Reserved by PKWARE.
  446. Bit 15: Reserved by PKWARE.
  447. compression method: (2 bytes)
  448. (see accompanying documentation for algorithm
  449. descriptions)
  450. 0 - The file is stored (no compression)
  451. 1 - The file is Shrunk
  452. 2 - The file is Reduced with compression factor 1
  453. 3 - The file is Reduced with compression factor 2
  454. 4 - The file is Reduced with compression factor 3
  455. 5 - The file is Reduced with compression factor 4
  456. 6 - The file is Imploded
  457. 7 - Reserved for Tokenizing compression algorithm
  458. 8 - The file is Deflated
  459. 9 - Enhanced Deflating using Deflate64(tm)
  460. 10 - PKWARE Data Compression Library Imploding (old IBM TERSE)
  461. 11 - Reserved by PKWARE
  462. 12 - File is compressed using BZIP2 algorithm
  463. 13 - Reserved by PKWARE
  464. 14 - LZMA (EFS)
  465. 15 - Reserved by PKWARE
  466. 16 - Reserved by PKWARE
  467. 17 - Reserved by PKWARE
  468. 18 - File is compressed using IBM TERSE (new)
  469. 19 - IBM LZ77 z Architecture (PFS)
  470. 97 - WavPack compressed data
  471. 98 - PPMd version I, Rev 1
  472. date and time fields: (2 bytes each)
  473. The date and time are encoded in standard MS-DOS format.
  474. If input came from standard input, the date and time are
  475. those at which compression was started for this data.
  476. If encrypting the central directory and general purpose bit
  477. flag 13 is set indicating masking, the value stored in the
  478. Local Header will be zero.
  479. CRC-32: (4 bytes)
  480. The CRC-32 algorithm was generously contributed by
  481. David Schwaderer and can be found in his excellent
  482. book "C Programmers Guide to NetBIOS" published by
  483. Howard W. Sams & Co. Inc. The 'magic number' for
  484. the CRC is 0xdebb20e3. The proper CRC pre and post
  485. conditioning is used, meaning that the CRC register
  486. is pre-conditioned with all ones (a starting value
  487. of 0xffffffff) and the value is post-conditioned by
  488. taking the one's complement of the CRC residual.
  489. If bit 3 of the general purpose flag is set, this
  490. field is set to zero in the local header and the correct
  491. value is put in the data descriptor and in the central
  492. directory. When encrypting the central directory, if the
  493. local header is not in ZIP64 format and general purpose
  494. bit flag 13 is set indicating masking, the value stored
  495. in the Local Header will be zero.
  496. compressed size: (4 bytes)
  497. uncompressed size: (4 bytes)
  498. The size of the file compressed and uncompressed,
  499. respectively. When a decryption header is present it will
  500. be placed in front of the file data and the value of the
  501. compressed file size will include the bytes of the decryption
  502. header. If bit 3 of the general purpose bit flag is set,
  503. these fields are set to zero in the local header and the
  504. correct values are put in the data descriptor and
  505. in the central directory. If an archive is in ZIP64 format
  506. and the value in this field is 0xFFFFFFFF, the size will be
  507. in the corresponding 8 byte ZIP64 extended information
  508. extra field. When encrypting the central directory, if the
  509. local header is not in ZIP64 format and general purpose bit
  510. flag 13 is set indicating masking, the value stored for the
  511. uncompressed size in the Local Header will be zero.
  512. file name length: (2 bytes)
  513. extra field length: (2 bytes)
  514. file comment length: (2 bytes)
  515. The length of the file name, extra field, and comment
  516. fields respectively. The combined length of any
  517. directory record and these three fields should not
  518. generally exceed 65,535 bytes. If input came from standard
  519. input, the file name length is set to zero.
  520. disk number start: (2 bytes)
  521. The number of the disk on which this file begins. If an
  522. archive is in ZIP64 format and the value in this field is
  523. 0xFFFF, the size will be in the corresponding 4 byte zip64
  524. extended information extra field.
  525. internal file attributes: (2 bytes)
  526. Bits 1 and 2 are reserved for use by PKWARE.
  527. The lowest bit of this field indicates, if set, that
  528. the file is apparently an ASCII or text file. If not
  529. set, that the file apparently contains binary data.
  530. The remaining bits are unused in version 1.0.
  531. The 0x0002 bit of this field indicates, if set, that a
  532. 4 byte variable record length control field precedes each
  533. logical record indicating the length of the record. The
  534. record length control field is stored in little-endian byte
  535. order. This flag is independent of text control characters,
  536. and if used in conjunction with text data, includes any
  537. control characters in the total length of the record. This
  538. value is provided for mainframe data transfer support.
  539. external file attributes: (4 bytes)
  540. The mapping of the external attributes is
  541. host-system dependent (see 'version made by'). For
  542. MS-DOS, the low order byte is the MS-DOS directory
  543. attribute byte. If input came from standard input, this
  544. field is set to zero.
  545. relative offset of local header: (4 bytes)
  546. This is the offset from the start of the first disk on
  547. which this file appears, to where the local header should
  548. be found. If an archive is in ZIP64 format and the value
  549. in this field is 0xFFFFFFFF, the size will be in the
  550. corresponding 8 byte zip64 extended information extra field.
  551. file name: (Variable)
  552. The name of the file, with optional relative path.
  553. The path stored should not contain a drive or
  554. device letter, or a leading slash. All slashes
  555. should be forward slashes '/' as opposed to
  556. backwards slashes '\' for compatibility with Amiga
  557. and UNIX file systems etc. If input came from standard
  558. input, there is no file name field. If encrypting
  559. the central directory and general purpose bit flag 13 is set
  560. indicating masking, the file name stored in the Local Header
  561. will not be the actual file name. A masking value consisting
  562. of a unique hexadecimal value will be stored. This value will
  563. be sequentially incremented for each file in the archive. See
  564. the section on the Strong Encryption Specification for details
  565. on retrieving the encrypted file name.
  566. extra field: (Variable)
  567. This is for expansion. If additional information
  568. needs to be stored for special needs or for specific
  569. platforms, it should be stored here. Earlier versions
  570. of the software can then safely skip this file, and
  571. find the next file or header. This field will be 0
  572. length in version 1.0.
  573. In order to allow different programs and different types
  574. of information to be stored in the 'extra' field in .ZIP
  575. files, the following structure should be used for all
  576. programs storing data in this field:
  577. header1+data1 + header2+data2 . . .
  578. Each header should consist of:
  579. Header ID - 2 bytes
  580. Data Size - 2 bytes
  581. Note: all fields stored in Intel low-byte/high-byte order.
  582. The Header ID field indicates the type of data that is in
  583. the following data block.
  584. Header ID's of 0 thru 31 are reserved for use by PKWARE.
  585. The remaining ID's can be used by third party vendors for
  586. proprietary usage.
  587. The current Header ID mappings defined by PKWARE are:
  588. 0x0001 Zip64 extended information extra field
  589. 0x0007 AV Info
  590. 0x0008 Reserved for extended language encoding data (PFS)
  591. (see APPENDIX D)
  592. 0x0009 OS/2
  593. 0x000a NTFS
  594. 0x000c OpenVMS
  595. 0x000d UNIX
  596. 0x000e Reserved for file stream and fork descriptors
  597. 0x000f Patch Descriptor
  598. 0x0014 PKCS#7 Store for X.509 Certificates
  599. 0x0015 X.509 Certificate ID and Signature for
  600. individual file
  601. 0x0016 X.509 Certificate ID for Central Directory
  602. 0x0017 Strong Encryption Header
  603. 0x0018 Record Management Controls
  604. 0x0019 PKCS#7 Encryption Recipient Certificate List
  605. 0x0065 IBM S/390 (Z390), AS/400 (I400) attributes
  606. - uncompressed
  607. 0x0066 Reserved for IBM S/390 (Z390), AS/400 (I400)
  608. attributes - compressed
  609. 0x4690 POSZIP 4690 (reserved)
  610. Third party mappings commonly used are:
  611. 0x07c8 Macintosh
  612. 0x2605 ZipIt Macintosh
  613. 0x2705 ZipIt Macintosh 1.3.5+
  614. 0x2805 ZipIt Macintosh 1.3.5+
  615. 0x334d Info-ZIP Macintosh
  616. 0x4341 Acorn/SparkFS
  617. 0x4453 Windows NT security descriptor (binary ACL)
  618. 0x4704 VM/CMS
  619. 0x470f MVS
  620. 0x4b46 FWKCS MD5 (see below)
  621. 0x4c41 OS/2 access control list (text ACL)
  622. 0x4d49 Info-ZIP OpenVMS
  623. 0x4f4c Xceed original location extra field
  624. 0x5356 AOS/VS (ACL)
  625. 0x5455 extended timestamp
  626. 0x554e Xceed unicode extra field
  627. 0x5855 Info-ZIP UNIX (original, also OS/2, NT, etc)
  628. 0x6375 Info-ZIP Unicode Comment Extra Field
  629. 0x6542 BeOS/BeBox
  630. 0x7075 Info-ZIP Unicode Path Extra Field
  631. 0x756e ASi UNIX
  632. 0x7855 Info-ZIP UNIX (new)
  633. 0xa220 Microsoft Open Packaging Growth Hint
  634. 0xfd4a SMS/QDOS
  635. Detailed descriptions of Extra Fields defined by third
  636. party mappings will be documented as information on
  637. these data structures is made available to PKWARE.
  638. PKWARE does not guarantee the accuracy of any published
  639. third party data.
  640. The Data Size field indicates the size of the following
  641. data block. Programs can use this value to skip to the
  642. next header block, passing over any data blocks that are
  643. not of interest.
  644. Note: As stated above, the size of the entire .ZIP file
  645. header, including the file name, comment, and extra
  646. field should not exceed 64K in size.
  647. In case two different programs should appropriate the same
  648. Header ID value, it is strongly recommended that each
  649. program place a unique signature of at least two bytes in
  650. size (and preferably 4 bytes or bigger) at the start of
  651. each data area. Every program should verify that its
  652. unique signature is present, in addition to the Header ID
  653. value being correct, before assuming that it is a block of
  654. known type.
  655. -Zip64 Extended Information Extra Field (0x0001):
  656. The following is the layout of the zip64 extended
  657. information "extra" block. If one of the size or
  658. offset fields in the Local or Central directory
  659. record is too small to hold the required data,
  660. a Zip64 extended information record is created.
  661. The order of the fields in the zip64 extended
  662. information record is fixed, but the fields will
  663. only appear if the corresponding Local or Central
  664. directory record field is set to 0xFFFF or 0xFFFFFFFF.
  665. Note: all fields stored in Intel low-byte/high-byte order.
  666. Value Size Description
  667. ----- ---- -----------
  668. (ZIP64) 0x0001 2 bytes Tag for this "extra" block type
  669. Size 2 bytes Size of this "extra" block
  670. Original
  671. Size 8 bytes Original uncompressed file size
  672. Compressed
  673. Size 8 bytes Size of compressed data
  674. Relative Header
  675. Offset 8 bytes Offset of local header record
  676. Disk Start
  677. Number 4 bytes Number of the disk on which
  678. this file starts
  679. This entry in the Local header must include BOTH original
  680. and compressed file size fields. If encrypting the
  681. central directory and bit 13 of the general purpose bit
  682. flag is set indicating masking, the value stored in the
  683. Local Header for the original file size will be zero.
  684. -OS/2 Extra Field (0x0009):
  685. The following is the layout of the OS/2 attributes "extra"
  686. block. (Last Revision 09/05/95)
  687. Note: all fields stored in Intel low-byte/high-byte order.
  688. Value Size Description
  689. ----- ---- -----------
  690. (OS/2) 0x0009 2 bytes Tag for this "extra" block type
  691. TSize 2 bytes Size for the following data block
  692. BSize 4 bytes Uncompressed Block Size
  693. CType 2 bytes Compression type
  694. EACRC 4 bytes CRC value for uncompress block
  695. (var) variable Compressed block
  696. The OS/2 extended attribute structure (FEA2LIST) is
  697. compressed and then stored in it's entirety within this
  698. structure. There will only ever be one "block" of data in
  699. VarFields[].
  700. -NTFS Extra Field (0x000a):
  701. The following is the layout of the NTFS attributes
  702. "extra" block. (Note: At this time the Mtime, Atime
  703. and Ctime values may be used on any WIN32 system.)
  704. Note: all fields stored in Intel low-byte/high-byte order.
  705. Value Size Description
  706. ----- ---- -----------
  707. (NTFS) 0x000a 2 bytes Tag for this "extra" block type
  708. TSize 2 bytes Size of the total "extra" block
  709. Reserved 4 bytes Reserved for future use
  710. Tag1 2 bytes NTFS attribute tag value #1
  711. Size1 2 bytes Size of attribute #1, in bytes
  712. (var.) Size1 Attribute #1 data
  713. .
  714. .
  715. .
  716. TagN 2 bytes NTFS attribute tag value #N
  717. SizeN 2 bytes Size of attribute #N, in bytes
  718. (var.) SizeN Attribute #N data
  719. For NTFS, values for Tag1 through TagN are as follows:
  720. (currently only one set of attributes is defined for NTFS)
  721. Tag Size Description
  722. ----- ---- -----------
  723. 0x0001 2 bytes Tag for attribute #1
  724. Size1 2 bytes Size of attribute #1, in bytes
  725. Mtime 8 bytes File last modification time
  726. Atime 8 bytes File last access time
  727. Ctime 8 bytes File creation time
  728. -OpenVMS Extra Field (0x000c):
  729. The following is the layout of the OpenVMS attributes
  730. "extra" block.
  731. Note: all fields stored in Intel low-byte/high-byte order.
  732. Value Size Description
  733. ----- ---- -----------
  734. (VMS) 0x000c 2 bytes Tag for this "extra" block type
  735. TSize 2 bytes Size of the total "extra" block
  736. CRC 4 bytes 32-bit CRC for remainder of the block
  737. Tag1 2 bytes OpenVMS attribute tag value #1
  738. Size1 2 bytes Size of attribute #1, in bytes
  739. (var.) Size1 Attribute #1 data
  740. .
  741. .
  742. .
  743. TagN 2 bytes OpenVMS attribute tag value #N
  744. SizeN 2 bytes Size of attribute #N, in bytes
  745. (var.) SizeN Attribute #N data
  746. Rules:
  747. 1. There will be one or more of attributes present, which
  748. will each be preceded by the above TagX & SizeX values.
  749. These values are identical to the ATR$C_XXXX and
  750. ATR$S_XXXX constants which are defined in ATR.H under
  751. OpenVMS C. Neither of these values will ever be zero.
  752. 2. No word alignment or padding is performed.
  753. 3. A well-behaved PKZIP/OpenVMS program should never produce
  754. more than one sub-block with the same TagX value. Also,
  755. there will never be more than one "extra" block of type
  756. 0x000c in a particular directory record.
  757. -UNIX Extra Field (0x000d):
  758. The following is the layout of the UNIX "extra" block.
  759. Note: all fields are stored in Intel low-byte/high-byte
  760. order.
  761. Value Size Description
  762. ----- ---- -----------
  763. (UNIX) 0x000d 2 bytes Tag for this "extra" block type
  764. TSize 2 bytes Size for the following data block
  765. Atime 4 bytes File last access time
  766. Mtime 4 bytes File last modification time
  767. Uid 2 bytes File user ID
  768. Gid 2 bytes File group ID
  769. (var) variable Variable length data field
  770. The variable length data field will contain file type
  771. specific data. Currently the only values allowed are
  772. the original "linked to" file names for hard or symbolic
  773. links, and the major and minor device node numbers for
  774. character and block device nodes. Since device nodes
  775. cannot be either symbolic or hard links, only one set of
  776. variable length data is stored. Link files will have the
  777. name of the original file stored. This name is NOT NULL
  778. terminated. Its size can be determined by checking TSize -
  779. 12. Device entries will have eight bytes stored as two 4
  780. byte entries (in little endian format). The first entry
  781. will be the major device number, and the second the minor
  782. device number.
  783. -PATCH Descriptor Extra Field (0x000f):
  784. The following is the layout of the Patch Descriptor "extra"
  785. block.
  786. Note: all fields stored in Intel low-byte/high-byte order.
  787. Value Size Description
  788. ----- ---- -----------
  789. (Patch) 0x000f 2 bytes Tag for this "extra" block type
  790. TSize 2 bytes Size of the total "extra" block
  791. Version 2 bytes Version of the descriptor
  792. Flags 4 bytes Actions and reactions (see below)
  793. OldSize 4 bytes Size of the file about to be patched
  794. OldCRC 4 bytes 32-bit CRC of the file to be patched
  795. NewSize 4 bytes Size of the resulting file
  796. NewCRC 4 bytes 32-bit CRC of the resulting file
  797. Actions and reactions
  798. Bits Description
  799. ---- ----------------
  800. 0 Use for auto detection
  801. 1 Treat as a self-patch
  802. 2-3 RESERVED
  803. 4-5 Action (see below)
  804. 6-7 RESERVED
  805. 8-9 Reaction (see below) to absent file
  806. 10-11 Reaction (see below) to newer file
  807. 12-13 Reaction (see below) to unknown file
  808. 14-15 RESERVED
  809. 16-31 RESERVED
  810. Actions
  811. Action Value
  812. ------ -----
  813. none 0
  814. add 1
  815. delete 2
  816. patch 3
  817. Reactions
  818. Reaction Value
  819. -------- -----
  820. ask 0
  821. skip 1
  822. ignore 2
  823. fail 3
  824. Patch support is provided by PKPatchMaker(tm) technology and is
  825. covered under U.S. Patents and Patents Pending. The use or
  826. implementation in a product of certain technological aspects set
  827. forth in the current APPNOTE, including those with regard to
  828. strong encryption, patching, or extended tape operations requires
  829. a license from PKWARE. Please contact PKWARE with regard to
  830. acquiring a license.
  831. -PKCS#7 Store for X.509 Certificates (0x0014):
  832. This field contains information about each of the certificates
  833. files may be signed with. When the Central Directory Encryption
  834. feature is enabled for a ZIP file, this record will appear in
  835. the Archive Extra Data Record, otherwise it will appear in the
  836. first central directory record and will be ignored in any
  837. other record.
  838. Note: all fields stored in Intel low-byte/high-byte order.
  839. Value Size Description
  840. ----- ---- -----------
  841. (Store) 0x0014 2 bytes Tag for this "extra" block type
  842. TSize 2 bytes Size of the store data
  843. TData TSize Data about the store
  844. -X.509 Certificate ID and Signature for individual file (0x0015):
  845. This field contains the information about which certificate in
  846. the PKCS#7 store was used to sign a particular file. It also
  847. contains the signature data. This field can appear multiple
  848. times, but can only appear once per certificate.
  849. Note: all fields stored in Intel low-byte/high-byte order.
  850. Value Size Description
  851. ----- ---- -----------
  852. (CID) 0x0015 2 bytes Tag for this "extra" block type
  853. TSize 2 bytes Size of data that follows
  854. TData TSize Signature Data
  855. -X.509 Certificate ID and Signature for central directory (0x0016):
  856. This field contains the information about which certificate in
  857. the PKCS#7 store was used to sign the central directory structure.
  858. When the Central Directory Encryption feature is enabled for a
  859. ZIP file, this record will appear in the Archive Extra Data Record,
  860. otherwise it will appear in the first central directory record.
  861. Note: all fields stored in Intel low-byte/high-byte order.
  862. Value Size Description
  863. ----- ---- -----------
  864. (CDID) 0x0016 2 bytes Tag for this "extra" block type
  865. TSize 2 bytes Size of data that follows
  866. TData TSize Data
  867. -Strong Encryption Header (0x0017):
  868. Value Size Description
  869. ----- ---- -----------
  870. 0x0017 2 bytes Tag for this "extra" block type
  871. TSize 2 bytes Size of data that follows
  872. Format 2 bytes Format definition for this record
  873. AlgID 2 bytes Encryption algorithm identifier
  874. Bitlen 2 bytes Bit length of encryption key
  875. Flags 2 bytes Processing flags
  876. CertData TSize-8 Certificate decryption extra field data
  877. (refer to the explanation for CertData
  878. in the section describing the
  879. Certificate Processing Method under
  880. the Strong Encryption Specification)
  881. -Record Management Controls (0x0018):
  882. Value Size Description
  883. ----- ---- -----------
  884. (Rec-CTL) 0x0018 2 bytes Tag for this "extra" block type
  885. CSize 2 bytes Size of total extra block data
  886. Tag1 2 bytes Record control attribute 1
  887. Size1 2 bytes Size of attribute 1, in bytes
  888. Data1 Size1 Attribute 1 data
  889. .
  890. .
  891. .
  892. TagN 2 bytes Record control attribute N
  893. SizeN 2 bytes Size of attribute N, in bytes
  894. DataN SizeN Attribute N data
  895. -PKCS#7 Encryption Recipient Certificate List (0x0019):
  896. This field contains information about each of the certificates
  897. used in encryption processing and it can be used to identify who is
  898. allowed to decrypt encrypted files. This field should only appear
  899. in the archive extra data record. This field is not required and
  900. serves only to aide archive modifications by preserving public
  901. encryption key data. Individual security requirements may dictate
  902. that this data be omitted to deter information exposure.
  903. Note: all fields stored in Intel low-byte/high-byte order.
  904. Value Size Description
  905. ----- ---- -----------
  906. (CStore) 0x0019 2 bytes Tag for this "extra" block type
  907. TSize 2 bytes Size of the store data
  908. TData TSize Data about the store
  909. TData:
  910. Value Size Description
  911. ----- ---- -----------
  912. Version 2 bytes Format version number - must 0x0001 at this time
  913. CStore (var) PKCS#7 data blob
  914. -MVS Extra Field (0x0065):
  915. The following is the layout of the MVS "extra" block.
  916. Note: Some fields are stored in Big Endian format.
  917. All text is in EBCDIC format unless otherwise specified.
  918. Value Size Description
  919. ----- ---- -----------
  920. (MVS) 0x0065 2 bytes Tag for this "extra" block type
  921. TSize 2 bytes Size for the following data block
  922. ID 4 bytes EBCDIC "Z390" 0xE9F3F9F0 or
  923. "T4MV" for TargetFour
  924. (var) TSize-4 Attribute data (see APPENDIX B)
  925. -OS/400 Extra Field (0x0065):
  926. The following is the layout of the OS/400 "extra" block.
  927. Note: Some fields are stored in Big Endian format.
  928. All text is in EBCDIC format unless otherwise specified.
  929. Value Size Description
  930. ----- ---- -----------
  931. (OS400) 0x0065 2 bytes Tag for this "extra" block type
  932. TSize 2 bytes Size for the following data block
  933. ID 4 bytes EBCDIC "I400" 0xC9F4F0F0 or
  934. "T4MV" for TargetFour
  935. (var) TSize-4 Attribute data (see APPENDIX A)
  936. Third-party Mappings:
  937. -ZipIt Macintosh Extra Field (long) (0x2605):
  938. The following is the layout of the ZipIt extra block
  939. for Macintosh. The local-header and central-header versions
  940. are identical. This block must be present if the file is
  941. stored MacBinary-encoded and it should not be used if the file
  942. is not stored MacBinary-encoded.
  943. Value Size Description
  944. ----- ---- -----------
  945. (Mac2) 0x2605 Short tag for this extra block type
  946. TSize Short total data size for this block
  947. "ZPIT" beLong extra-field signature
  948. FnLen Byte length of FileName
  949. FileName variable full Macintosh filename
  950. FileType Byte[4] four-byte Mac file type string
  951. Creator Byte[4] four-byte Mac creator string
  952. -ZipIt Macintosh Extra Field (short, for files) (0x2705):
  953. The following is the layout of a shortened variant of the
  954. ZipIt extra block for Macintosh (without "full name" entry).
  955. This variant is used by ZipIt 1.3.5 and newer for entries of
  956. files (not directories) that do not have a MacBinary encoded
  957. file. The local-header and central-header versions are identical.
  958. Value Size Description
  959. ----- ---- -----------
  960. (Mac2b) 0x2705 Short tag for this extra block type
  961. TSize Short total data size for this block (12)
  962. "ZPIT" beLong extra-field signature
  963. FileType Byte[4] four-byte Mac file type string
  964. Creator Byte[4] four-byte Mac creator string
  965. fdFlags beShort attributes from FInfo.frFlags,
  966. may be omitted
  967. 0x0000 beShort reserved, may be omitted
  968. -ZipIt Macintosh Extra Field (short, for directories) (0x2805):
  969. The following is the layout of a shortened variant of the
  970. ZipIt extra block for Macintosh used only for directory
  971. entries. This variant is used by ZipIt 1.3.5 and newer to
  972. save some optional Mac-specific information about directories.
  973. The local-header and central-header versions are identical.
  974. Value Size Description
  975. ----- ---- -----------
  976. (Mac2c) 0x2805 Short tag for this extra block type
  977. TSize Short total data size for this block (12)
  978. "ZPIT" beLong extra-field signature
  979. frFlags beShort attributes from DInfo.frFlags, may
  980. be omitted
  981. View beShort ZipIt view flag, may be omitted
  982. The View field specifies ZipIt-internal settings as follows:
  983. Bits of the Flags:
  984. bit 0 if set, the folder is shown expanded (open)
  985. when the archive contents are viewed in ZipIt.
  986. bits 1-15 reserved, zero;
  987. -FWKCS MD5 Extra Field (0x4b46):
  988. The FWKCS Contents_Signature System, used in
  989. automatically identifying files independent of file name,
  990. optionally adds and uses an extra field to support the
  991. rapid creation of an enhanced contents_signature:
  992. Header ID = 0x4b46
  993. Data Size = 0x0013
  994. Preface = 'M','D','5'
  995. followed by 16 bytes containing the uncompressed file's
  996. 128_bit MD5 hash(1), low byte first.
  997. When FWKCS revises a .ZIP file central directory to add
  998. this extra field for a file, it also replaces the
  999. central directory entry for that file's uncompressed
  1000. file length with a measured value.
  1001. FWKCS provides an option to strip this extra field, if
  1002. present, from a .ZIP file central directory. In adding
  1003. this extra field, FWKCS preserves .ZIP file Authenticity
  1004. Verification; if stripping this extra field, FWKCS
  1005. preserves all versions of AV through PKZIP version 2.04g.
  1006. FWKCS, and FWKCS Contents_Signature System, are
  1007. trademarks of Frederick W. Kantor.
  1008. (1) R. Rivest, RFC1321.TXT, MIT Laboratory for Computer
  1009. Science and RSA Data Security, Inc., April 1992.
  1010. ll.76-77: "The MD5 algorithm is being placed in the
  1011. public domain for review and possible adoption as a
  1012. standard."
  1013. -Info-ZIP Unicode Comment Extra Field (0x6375):
  1014. Stores the UTF-8 version of the file comment as stored in the
  1015. central directory header. (Last Revision 20070912)
  1016. Value Size Description
  1017. ----- ---- -----------
  1018. (UCom) 0x6375 Short tag for this extra block type ("uc")
  1019. TSize Short total data size for this block
  1020. Version 1 byte version of this extra field, currently 1
  1021. ComCRC32 4 bytes Comment Field CRC32 Checksum
  1022. UnicodeCom Variable UTF-8 version of the entry comment
  1023. Currently Version is set to the number 1. If there is a need
  1024. to change this field, the version will be incremented. Changes
  1025. may not be backward compatible so this extra field should not be
  1026. used if the version is not recognized.
  1027. The ComCRC32 is the standard zip CRC32 checksum of the File Comment
  1028. field in the central directory header. This is used to verify that
  1029. the comment field has not changed since the Unicode Comment extra field
  1030. was created. This can happen if a utility changes the File Comment
  1031. field but does not update the UTF-8 Comment extra field. If the CRC
  1032. check fails, this Unicode Comment extra field should be ignored and
  1033. the File Comment field in the header should be used instead.
  1034. The UnicodeCom field is the UTF-8 version of the File Comment field
  1035. in the header. As UnicodeCom is defined to be UTF-8, no UTF-8 byte
  1036. order mark (BOM) is used. The length of this field is determined by
  1037. subtracting the size of the previous fields from TSize. If both the
  1038. File Name and Comment fields are UTF-8, the new General Purpose Bit
  1039. Flag, bit 11 (Language encoding flag (EFS)), can be used to indicate
  1040. both the header File Name and Comment fields are UTF-8 and, in this
  1041. case, the Unicode Path and Unicode Comment extra fields are not
  1042. needed and should not be created. Note that, for backward
  1043. compatibility, bit 11 should only be used if the native character set
  1044. of the paths and comments being zipped up are already in UTF-8. It is
  1045. expected that the same file comment storage method, either general
  1046. purpose bit 11 or extra fields, be used in both the Local and Central
  1047. Directory Header for a file.
  1048. -Info-ZIP Unicode Path Extra Field (0x7075):
  1049. Stores the UTF-8 version of the file name field as stored in the
  1050. local header and central directory header. (Last Revision 20070912)
  1051. Value Size Description
  1052. ----- ---- -----------
  1053. (UPath) 0x7075 Short tag for this extra block type ("up")
  1054. TSize Short total data size for this block
  1055. Version 1 byte version of this extra field, currently 1
  1056. NameCRC32 4 bytes File Name Field CRC32 Checksum
  1057. UnicodeName Variable UTF-8 version of the entry File Name
  1058. Currently Version is set to the number 1. If there is a need
  1059. to change this field, the version will be incremented. Changes
  1060. may not be backward compatible so this extra field should not be
  1061. used if the version is not recognized.
  1062. The NameCRC32 is the standard zip CRC32 checksum of the File Name
  1063. field in the header. This is used to verify that the header
  1064. File Name field has not changed since the Unicode Path extra field
  1065. was created. This can happen if a utility renames the File Name but
  1066. does not update the UTF-8 path extra field. If the CRC check fails,
  1067. this UTF-8 Path Extra Field should be ignored and the File Name field
  1068. in the header should be used instead.
  1069. The UnicodeName is the UTF-8 version of the contents of the File Name
  1070. field in the header. As UnicodeName is defined to be UTF-8, no UTF-8
  1071. byte order mark (BOM) is used. The length of this field is determined
  1072. by subtracting the size of the previous fields from TSize. If both
  1073. the File Name and Comment fields are UTF-8, the new General Purpose
  1074. Bit Flag, bit 11 (Language encoding flag (EFS)), can be used to
  1075. indicate that both the header File Name and Comment fields are UTF-8
  1076. and, in this case, the Unicode Path and Unicode Comment extra fields
  1077. are not needed and should not be created. Note that, for backward
  1078. compatibility, bit 11 should only be used if the native character set
  1079. of the paths and comments being zipped up are already in UTF-8. It is
  1080. expected that the same file name storage method, either general
  1081. purpose bit 11 or extra fields, be used in both the Local and Central
  1082. Directory Header for a file.
  1083. -Microsoft Open Packaging Growth Hint (0xa220):
  1084. Value Size Description
  1085. ----- ---- -----------
  1086. 0xa220 Short tag for this extra block type
  1087. TSize Short size of Sig + PadVal + Padding
  1088. Sig Short verification signature (A028)
  1089. PadVal Short Initial padding value
  1090. Padding variable filled with NULL characters
  1091. file comment: (Variable)
  1092. The comment for this file.
  1093. number of this disk: (2 bytes)
  1094. The number of this disk, which contains central
  1095. directory end record. If an archive is in ZIP64 format
  1096. and the value in this field is 0xFFFF, the size will
  1097. be in the corresponding 4 byte zip64 end of central
  1098. directory field.
  1099. number of the disk with the start of the central
  1100. directory: (2 bytes)
  1101. The number of the disk on which the central
  1102. directory starts. If an archive is in ZIP64 format
  1103. and the value in this field is 0xFFFF, the size will
  1104. be in the corresponding 4 byte zip64 end of central
  1105. directory field.
  1106. total number of entries in the central dir on
  1107. this disk: (2 bytes)
  1108. The number of central directory entries on this disk.
  1109. If an archive is in ZIP64 format and the value in
  1110. this field is 0xFFFF, the size will be in the
  1111. corresponding 8 byte zip64 end of central
  1112. directory field.
  1113. total number of entries in the central dir: (2 bytes)
  1114. The total number of files in the .ZIP file. If an
  1115. archive is in ZIP64 format and the value in this field
  1116. is 0xFFFF, the size will be in the corresponding 8 byte
  1117. zip64 end of central directory field.
  1118. size of the central directory: (4 bytes)
  1119. The size (in bytes) of the entire central directory.
  1120. If an archive is in ZIP64 format and the value in
  1121. this field is 0xFFFFFFFF, the size will be in the
  1122. corresponding 8 byte zip64 end of central
  1123. directory field.
  1124. offset of start of central directory with respect to
  1125. the starting disk number: (4 bytes)
  1126. Offset of the start of the central directory on the
  1127. disk on which the central directory starts. If an
  1128. archive is in ZIP64 format and the value in this
  1129. field is 0xFFFFFFFF, the size will be in the
  1130. corresponding 8 byte zip64 end of central
  1131. directory field.
  1132. .ZIP file comment length: (2 bytes)
  1133. The length of the comment for this .ZIP file.
  1134. .ZIP file comment: (Variable)
  1135. The comment for this .ZIP file. ZIP file comment data
  1136. is stored unsecured. No encryption or data authentication
  1137. is applied to this area at this time. Confidential information
  1138. should not be stored in this section.
  1139. zip64 extensible data sector (variable size)
  1140. (currently reserved for use by PKWARE)
  1141. K. Splitting and Spanning ZIP files
  1142. Spanning is the process of segmenting a ZIP file across
  1143. multiple removable media. This support has typically only
  1144. been provided for DOS formatted floppy diskettes.
  1145. File splitting is a newer derivative of spanning.
  1146. Splitting follows the same segmentation process as
  1147. spanning, however, it does not require writing each
  1148. segment to a unique removable medium and instead supports
  1149. placing all pieces onto local or non-removable locations
  1150. such as file systems, local drives, folders, etc...
  1151. A key difference between spanned and split ZIP files is
  1152. that all pieces of a spanned ZIP file have the same name.
  1153. Since each piece is written to a separate volume, no name
  1154. collisions occur and each segment can reuse the original
  1155. .ZIP file name given to the archive.
  1156. Sequence ordering for DOS spanned archives uses the DOS
  1157. volume label to determine segment numbers. Volume labels
  1158. for each segment are written using the form PKBACK#xxx,
  1159. where xxx is the segment number written as a decimal
  1160. value from 001 - nnn.
  1161. Split ZIP files are typically written to the same location
  1162. and are subject to name collisions if the spanned name
  1163. format is used since each segment will reside on the same
  1164. drive. To avoid name collisions, split archives are named
  1165. as follows.
  1166. Segment 1 = filename.z01
  1167. Segment n-1 = filename.z(n-1)
  1168. Segment n = filename.zip
  1169. The .ZIP extension is used on the last segment to support
  1170. quickly reading the central directory. The segment number
  1171. n should be a decimal value.
  1172. Spanned ZIP files may be PKSFX Self-extracting ZIP files.
  1173. PKSFX files may also be split, however, in this case
  1174. the first segment must be named filename.exe. The first
  1175. segment of a split PKSFX archive must be large enough to
  1176. include the entire executable program.
  1177. Capacities for split archives are as follows.
  1178. Maximum number of segments = 4,294,967,295 - 1
  1179. Maximum .ZIP segment size = 4,294,967,295 bytes
  1180. Minimum segment size = 64K
  1181. Maximum PKSFX segment size = 2,147,483,647 bytes
  1182. Segment sizes may be different however by convention, all
  1183. segment sizes should be the same with the exception of the
  1184. last, which may be smaller. Local and central directory
  1185. header records must never be split across a segment boundary.
  1186. When writing a header record, if the number of bytes remaining
  1187. within a segment is less than the size of the header record,
  1188. end the current segment and write the header at the start
  1189. of the next segment. The central directory may span segment
  1190. boundaries, but no single record in the central directory
  1191. should be split across segments.
  1192. Spanned/Split archives created using PKZIP for Windows
  1193. (V2.50 or greater), PKZIP Command Line (V2.50 or greater),
  1194. or PKZIP Explorer will include a special spanning
  1195. signature as the first 4 bytes of the first segment of
  1196. the archive. This signature (0x08074b50) will be
  1197. followed immediately by the local header signature for
  1198. the first file in the archive.
  1199. A special spanning marker may also appear in spanned/split
  1200. archives if the spanning or splitting process starts but
  1201. only requires one segment. In this case the 0x08074b50
  1202. signature will be replaced with the temporary spanning
  1203. marker signature of 0x30304b50. Split archives can
  1204. only be uncompressed by other versions of PKZIP that
  1205. know how to create a split archive.
  1206. The signature value 0x08074b50 is also used by some
  1207. ZIP implementations as a marker for the Data Descriptor
  1208. record. Conflict in this alternate assignment can be
  1209. avoided by ensuring the position of the signature
  1210. within the ZIP file to determine the use for which it
  1211. is intended.
  1212. L. General notes:
  1213. 1) All fields unless otherwise noted are unsigned and stored
  1214. in Intel low-byte:high-byte, low-word:high-word order.
  1215. 2) String fields are not null terminated, since the
  1216. length is given explicitly.
  1217. 3) The entries in the central directory may not necessarily
  1218. be in the same order that files appear in the .ZIP file.
  1219. 4) If one of the fields in the end of central directory
  1220. record is too small to hold required data, the field
  1221. should be set to -1 (0xFFFF or 0xFFFFFFFF) and the
  1222. ZIP64 format record should be created.
  1223. 5) The end of central directory record and the
  1224. Zip64 end of central directory locator record must
  1225. reside on the same disk when splitting or spanning
  1226. an archive.
  1227. VI. Explanation of compression methods
  1228. --------------------------------------
  1229. UnShrinking - Method 1
  1230. ----------------------
  1231. Shrinking is a Dynamic Ziv-Lempel-Welch compression algorithm
  1232. with partial clearing. The initial code size is 9 bits, and
  1233. the maximum code size is 13 bits. Shrinking differs from
  1234. conventional Dynamic Ziv-Lempel-Welch implementations in several
  1235. respects:
  1236. 1) The code size is controlled by the compressor, and is not
  1237. automatically increased when codes larger than the current
  1238. code size are created (but not necessarily used). When
  1239. the decompressor encounters the code sequence 256
  1240. (decimal) followed by 1, it should increase the code size
  1241. read from the input stream to the next bit size. No
  1242. blocking of the codes is performed, so the next code at
  1243. the increased size should be read from the input stream
  1244. immediately after where the previous code at the smaller
  1245. bit size was read. Again, the decompressor should not
  1246. increase the code size used until the sequence 256,1 is
  1247. encountered.
  1248. 2) When the table becomes full, total clearing is not
  1249. performed. Rather, when the compressor emits the code
  1250. sequence 256,2 (decimal), the decompressor should clear
  1251. all leaf nodes from the Ziv-Lempel tree, and continue to
  1252. use the current code size. The nodes that are cleared
  1253. from the Ziv-Lempel tree are then re-used, with the lowest
  1254. code value re-used first, and the highest code value
  1255. re-used last. The compressor can emit the sequence 256,2
  1256. at any time.
  1257. Expanding - Methods 2-5
  1258. -----------------------
  1259. The Reducing algorithm is actually a combination of two
  1260. distinct algorithms. The first algorithm compresses repeated
  1261. byte sequences, and the second algorithm takes the compressed
  1262. stream from the first algorithm and applies a probabilistic
  1263. compression method.
  1264. The probabilistic compression stores an array of 'follower
  1265. sets' S(j), for j=0 to 255, corresponding to each possible
  1266. ASCII character. Each set contains between 0 and 32
  1267. characters, to be denoted as S(j)[0],...,S(j)[m], where m<32.
  1268. The sets are stored at the beginning of the data area for a
  1269. Reduced file, in reverse order, with S(255) first, and S(0)
  1270. last.
  1271. The sets are encoded as { N(j), S(j)[0],...,S(j)[N(j)-1] },
  1272. where N(j) is the size of set S(j). N(j) can be 0, in which
  1273. case the follower set for S(j) is empty. Each N(j) value is
  1274. encoded in 6 bits, followed by N(j) eight bit character values
  1275. corresponding to S(j)[0] to S(j)[N(j)-1] respectively. If
  1276. N(j) is 0, then no values for S(j) are stored, and the value
  1277. for N(j-1) immediately follows.
  1278. Immediately after the follower sets, is the compressed data
  1279. stream. The compressed data stream can be interpreted for the
  1280. probabilistic decompression as follows:
  1281. let Last-Character <- 0.
  1282. loop until done
  1283. if the follower set S(Last-Character) is empty then
  1284. read 8 bits from the input stream, and copy this
  1285. value to the output stream.
  1286. otherwise if the follower set S(Last-Character) is non-empty then
  1287. read 1 bit from the input stream.
  1288. if this bit is not zero then
  1289. read 8 bits from the input stream, and copy this
  1290. value to the output stream.
  1291. otherwise if this bit is zero then
  1292. read B(N(Last-Character)) bits from the input
  1293. stream, and assign this value to I.
  1294. Copy the value of S(Last-Character)[I] to the
  1295. output stream.
  1296. assign the last value placed on the output stream to
  1297. Last-Character.
  1298. end loop
  1299. B(N(j)) is defined as the minimal number of bits required to
  1300. encode the value N(j)-1.
  1301. The decompressed stream from above can then be expanded to
  1302. re-create the original file as follows:
  1303. let State <- 0.
  1304. loop until done
  1305. read 8 bits from the input stream into C.
  1306. case State of
  1307. 0: if C is not equal to DLE (144 decimal) then
  1308. copy C to the output stream.
  1309. otherwise if C is equal to DLE then
  1310. let State <- 1.
  1311. 1: if C is non-zero then
  1312. let V <- C.
  1313. let Len <- L(V)
  1314. let State <- F(Len).
  1315. otherwise if C is zero then
  1316. copy the value 144 (decimal) to the output stream.
  1317. let State <- 0
  1318. 2: let Len <- Len + C
  1319. let State <- 3.
  1320. 3: move backwards D(V,C) bytes in the output stream
  1321. (if this position is before the start of the output
  1322. stream, then assume that all the data before the
  1323. start of the output stream is filled with zeros).
  1324. copy Len+3 bytes from this position to the output stream.
  1325. let State <- 0.
  1326. end case
  1327. end loop
  1328. The functions F,L, and D are dependent on the 'compression
  1329. factor', 1 through 4, and are defined as follows:
  1330. For compression factor 1:
  1331. L(X) equals the lower 7 bits of X.
  1332. F(X) equals 2 if X equals 127 otherwise F(X) equals 3.
  1333. D(X,Y) equals the (upper 1 bit of X) * 256 + Y + 1.
  1334. For compression factor 2:
  1335. L(X) equals the lower 6 bits of X.
  1336. F(X) equals 2 if X equals 63 otherwise F(X) equals 3.
  1337. D(X,Y) equals the (upper 2 bits of X) * 256 + Y + 1.
  1338. For compression factor 3:
  1339. L(X) equals the lower 5 bits of X.
  1340. F(X) equals 2 if X equals 31 otherwise F(X) equals 3.
  1341. D(X,Y) equals the (upper 3 bits of X) * 256 + Y + 1.
  1342. For compression factor 4:
  1343. L(X) equals the lower 4 bits of X.
  1344. F(X) equals 2 if X equals 15 otherwise F(X) equals 3.
  1345. D(X,Y) equals the (upper 4 bits of X) * 256 + Y + 1.
  1346. Imploding - Method 6
  1347. --------------------
  1348. The Imploding algorithm is actually a combination of two distinct
  1349. algorithms. The first algorithm compresses repeated byte
  1350. sequences using a sliding dictionary. The second algorithm is
  1351. used to compress the encoding of the sliding dictionary output,
  1352. using multiple Shannon-Fano trees.
  1353. The Imploding algorithm can use a 4K or 8K sliding dictionary
  1354. size. The dictionary size used can be determined by bit 1 in the
  1355. general purpose flag word; a 0 bit indicates a 4K dictionary
  1356. while a 1 bit indicates an 8K dictionary.
  1357. The Shannon-Fano trees are stored at the start of the compressed
  1358. file. The number of trees stored is defined by bit 2 in the
  1359. general purpose flag word; a 0 bit indicates two trees stored, a
  1360. 1 bit indicates three trees are stored. If 3 trees are stored,
  1361. the first Shannon-Fano tree represents the encoding of the
  1362. Literal characters, the second tree represents the encoding of
  1363. the Length information, the third represents the encoding of the
  1364. Distance information. When 2 Shannon-Fano trees are stored, the
  1365. Length tree is stored first, followed by the Distance tree.
  1366. The Literal Shannon-Fano tree, if present is used to represent
  1367. the entire ASCII character set, and contains 256 values. This
  1368. tree is used to compress any data not compressed by the sliding
  1369. dictionary algorithm. When this tree is present, the Minimum
  1370. Match Length for the sliding dictionary is 3. If this tree is
  1371. not present, the Minimum Match Length is 2.
  1372. The Length Shannon-Fano tree is used to compress the Length part
  1373. of the (length,distance) pairs from the sliding dictionary
  1374. output. The Length tree contains 64 values, ranging from the
  1375. Minimum Match Length, to 63 plus the Minimum Match Length.
  1376. The Distance Shannon-Fano tree is used to compress the Distance
  1377. part of the (length,distance) pairs from the sliding dictionary
  1378. output. The Distance tree contains 64 values, ranging from 0 to
  1379. 63, representing the upper 6 bits of the distance value. The
  1380. distance values themselves will be between 0 and the sliding
  1381. dictionary size, either 4K or 8K.
  1382. The Shannon-Fano trees themselves are stored in a compressed
  1383. format. The first byte of the tree data represents the number of
  1384. bytes of data representing the (compressed) Shannon-Fano tree
  1385. minus 1. The remaining bytes represent the Shannon-Fano tree
  1386. data encoded as:
  1387. High 4 bits: Number of values at this bit length + 1. (1 - 16)
  1388. Low 4 bits: Bit Length needed to represent value + 1. (1 - 16)
  1389. The Shannon-Fano codes can be constructed from the bit lengths
  1390. using the following algorithm:
  1391. 1) Sort the Bit Lengths in ascending order, while retaining the
  1392. order of the original lengths stored in the file.
  1393. 2) Generate the Shannon-Fano trees:
  1394. Code <- 0
  1395. CodeIncrement <- 0
  1396. LastBitLength <- 0
  1397. i <- number of Shannon-Fano codes - 1 (either 255 or 63)
  1398. loop while i >= 0
  1399. Code = Code + CodeIncrement
  1400. if BitLength(i) <> LastBitLength then
  1401. LastBitLength=BitLength(i)
  1402. CodeIncrement = 1 shifted left (16 - LastBitLength)
  1403. ShannonCode(i) = Code
  1404. i <- i - 1
  1405. end loop
  1406. 3) Reverse the order of all the bits in the above ShannonCode()
  1407. vector, so that the most significant bit becomes the least
  1408. significant bit. For example, the value 0x1234 (hex) would
  1409. become 0x2C48 (hex).
  1410. 4) Restore the order of Shannon-Fano codes as originally stored
  1411. within the file.
  1412. Example:
  1413. This example will show the encoding of a Shannon-Fano tree
  1414. of size 8. Notice that the actual Shannon-Fano trees used
  1415. for Imploding are either 64 or 256 entries in size.
  1416. Example: 0x02, 0x42, 0x01, 0x13
  1417. The first byte indicates 3 values in this table. Decoding the
  1418. bytes:
  1419. 0x42 = 5 codes of 3 bits long
  1420. 0x01 = 1 code of 2 bits long
  1421. 0x13 = 2 codes of 4 bits long
  1422. This would generate the original bit length array of:
  1423. (3, 3, 3, 3, 3, 2, 4, 4)
  1424. There are 8 codes in this table for the values 0 thru 7. Using
  1425. the algorithm to obtain the Shannon-Fano codes produces:
  1426. Reversed Order Original
  1427. Val Sorted Constructed Code Value Restored Length
  1428. --- ------ ----------------- -------- -------- ------
  1429. 0: 2 1100000000000000 11 101 3
  1430. 1: 3 1010000000000000 101 001 3
  1431. 2: 3 1000000000000000 001 110 3
  1432. 3: 3 0110000000000000 110 010 3
  1433. 4: 3 0100000000000000 010 100 3
  1434. 5: 3 0010000000000000 100 11 2
  1435. 6: 4 0001000000000000 1000 1000 4
  1436. 7: 4 0000000000000000 0000 0000 4
  1437. The values in the Val, Order Restored and Original Length columns
  1438. now represent the Shannon-Fano encoding tree that can be used for
  1439. decoding the Shannon-Fano encoded data. How to parse the
  1440. variable length Shannon-Fano values from the data stream is beyond
  1441. the scope of this document. (See the references listed at the end of
  1442. this document for more information.) However, traditional decoding
  1443. schemes used for Huffman variable length decoding, such as the
  1444. Greenlaw algorithm, can be successfully applied.
  1445. The compressed data stream begins immediately after the
  1446. compressed Shannon-Fano data. The compressed data stream can be
  1447. interpreted as follows:
  1448. loop until done
  1449. read 1 bit from input stream.
  1450. if this bit is non-zero then (encoded data is literal data)
  1451. if Literal Shannon-Fano tree is present
  1452. read and decode character using Literal Shannon-Fano tree.
  1453. otherwise
  1454. read 8 bits from input stream.
  1455. copy character to the output stream.
  1456. otherwise (encoded data is sliding dictionary match)
  1457. if 8K dictionary size
  1458. read 7 bits for offset Distance (lower 7 bits of offset).
  1459. otherwise
  1460. read 6 bits for offset Distance (lower 6 bits of offset).
  1461. using the Distance Shannon-Fano tree, read and decode the
  1462. upper 6 bits of the Distance value.
  1463. using the Length Shannon-Fano tree, read and decode
  1464. the Length value.
  1465. Length <- Length + Minimum Match Length
  1466. if Length = 63 + Minimum Match Length
  1467. read 8 bits from the input stream,
  1468. add this value to Length.
  1469. move backwards Distance+1 bytes in the output stream, and
  1470. copy Length characters from this position to the output
  1471. stream. (if this position is before the start of the output
  1472. stream, then assume that all the data before the start of
  1473. the output stream is filled with zeros).
  1474. end loop
  1475. Tokenizing - Method 7
  1476. ---------------------
  1477. This method is not used by PKZIP.
  1478. Deflating - Method 8
  1479. --------------------
  1480. The Deflate algorithm is similar to the Implode algorithm using
  1481. a sliding dictionary of up to 32K with secondary compression
  1482. from Huffman/Shannon-Fano codes.
  1483. The compressed data is stored in blocks with a header describing
  1484. the block and the Huffman codes used in the data block. The header
  1485. format is as follows:
  1486. Bit 0: Last Block bit This bit is set to 1 if this is the last
  1487. compressed block in the data.
  1488. Bits 1-2: Block type
  1489. 00 (0) - Block is stored - All stored data is byte aligned.
  1490. Skip bits until next byte, then next word = block
  1491. length, followed by the ones compliment of the block
  1492. length word. Remaining data in block is the stored
  1493. data.
  1494. 01 (1) - Use fixed Huffman codes for literal and distance codes.
  1495. Lit Code Bits Dist Code Bits
  1496. --------- ---- --------- ----
  1497. 0 - 143 8 0 - 31 5
  1498. 144 - 255 9
  1499. 256 - 279 7
  1500. 280 - 287 8
  1501. Literal codes 286-287 and distance codes 30-31 are
  1502. never used but participate in the huffman construction.
  1503. 10 (2) - Dynamic Huffman codes. (See expanding Huffman codes)
  1504. 11 (3) - Reserved - Flag a "Error in compressed data" if seen.
  1505. Expanding Huffman Codes
  1506. -----------------------
  1507. If the data block is stored with dynamic Huffman codes, the Huffman
  1508. codes are sent in the following compressed format:
  1509. 5 Bits: # of Literal codes sent - 256 (256 - 286)
  1510. All other codes are never sent.
  1511. 5 Bits: # of Dist codes - 1 (1 - 32)
  1512. 4 Bits: # of Bit Length codes - 3 (3 - 19)
  1513. The Huffman codes are sent as bit lengths and the codes are built as
  1514. described in the implode algorithm. The bit lengths themselves are
  1515. compressed with Huffman codes. There are 19 bit length codes:
  1516. 0 - 15: Represent bit lengths of 0 - 15
  1517. 16: Copy the previous bit length 3 - 6 times.
  1518. The next 2 bits indicate repeat length (0 = 3, ... ,3 = 6)
  1519. Example: Codes 8, 16 (+2 bits 11), 16 (+2 bits 10) will
  1520. expand to 12 bit lengths of 8 (1 + 6 + 5)
  1521. 17: Repeat a bit length of 0 for 3 - 10 times. (3 bits of length)
  1522. 18: Repeat a bit length of 0 for 11 - 138 times (7 bits of length)
  1523. The lengths of the bit length codes are sent packed 3 bits per value
  1524. (0 - 7) in the following order:
  1525. 16, 17, 18, 0, 8, 7, 9, 6, 10, 5, 11, 4, 12, 3, 13, 2, 14, 1, 15
  1526. The Huffman codes should be built as described in the Implode algorithm
  1527. except codes are assigned starting at the shortest bit length, i.e. the
  1528. shortest code should be all 0's rather than all 1's. Also, codes with
  1529. a bit length of zero do not participate in the tree construction. The
  1530. codes are then used to decode the bit lengths for the literal and
  1531. distance tables.
  1532. The bit lengths for the literal tables are sent first with the number
  1533. of entries sent described by the 5 bits sent earlier. There are up
  1534. to 286 literal characters; the first 256 represent the respective 8
  1535. bit character, code 256 represents the End-Of-Block code, the remaining
  1536. 29 codes represent copy lengths of 3 thru 258. There are up to 30
  1537. distance codes representing distances from 1 thru 32k as described
  1538. below.
  1539. Length Codes
  1540. ------------
  1541. Extra Extra Extra Extra
  1542. Code Bits Length Code Bits Lengths Code Bits Lengths Code Bits Length(s)
  1543. ---- ---- ------ ---- ---- ------- ---- ---- ------- ---- ---- ---------
  1544. 257 0 3 265 1 11,12 273 3 35-42 281 5 131-162
  1545. 258 0 4 266 1 13,14 274 3 43-50 282 5 163-194
  1546. 259 0 5 267 1 15,16 275 3 51-58 283 5 195-226
  1547. 260 0 6 268 1 17,18 276 3 59-66 284 5 227-257
  1548. 261 0 7 269 2 19-22 277 4 67-82 285 0 258
  1549. 262 0 8 270 2 23-26 278 4 83-98
  1550. 263 0 9 271 2 27-30 279 4 99-114
  1551. 264 0 10 272 2 31-34 280 4 115-130
  1552. Distance Codes
  1553. --------------
  1554. Extra Extra Extra Extra
  1555. Code Bits Dist Code Bits Dist Code Bits Distance Code Bits Distance
  1556. ---- ---- ---- ---- ---- ------ ---- ---- -------- ---- ---- --------
  1557. 0 0 1 8 3 17-24 16 7 257-384 24 11 4097-6144
  1558. 1 0 2 9 3 25-32 17 7 385-512 25 11 6145-8192
  1559. 2 0 3 10 4 33-48 18 8 513-768 26 12 8193-12288
  1560. 3 0 4 11 4 49-64 19 8 769-1024 27 12 12289-16384
  1561. 4 1 5,6 12 5 65-96 20 9 1025-1536 28 13 16385-24576
  1562. 5 1 7,8 13 5 97-128 21 9 1537-2048 29 13 24577-32768
  1563. 6 2 9-12 14 6 129-192 22 10 2049-3072
  1564. 7 2 13-16 15 6 193-256 23 10 3073-4096
  1565. The compressed data stream begins immediately after the
  1566. compressed header data. The compressed data stream can be
  1567. interpreted as follows:
  1568. do
  1569. read header from input stream.
  1570. if stored block
  1571. skip bits until byte aligned
  1572. read count and 1's compliment of count
  1573. copy count bytes data block
  1574. otherwise
  1575. loop until end of block code sent
  1576. decode literal character from input stream
  1577. if literal < 256
  1578. copy character to the output stream
  1579. otherwise
  1580. if literal = end of block
  1581. break from loop
  1582. otherwise
  1583. decode distance from input stream
  1584. move backwards distance bytes in the output stream, and
  1585. copy length characters from this position to the output
  1586. stream.
  1587. end loop
  1588. while not last block
  1589. if data descriptor exists
  1590. skip bits until byte aligned
  1591. read crc and sizes
  1592. endif
  1593. Enhanced Deflating - Method 9
  1594. -----------------------------
  1595. The Enhanced Deflating algorithm is similar to Deflate but
  1596. uses a sliding dictionary of up to 64K. Deflate64(tm) is supported
  1597. by the Deflate extractor.
  1598. BZIP2 - Method 12
  1599. -----------------
  1600. BZIP2 is an open-source data compression algorithm developed by
  1601. Julian Seward. Information and source code for this algorithm
  1602. can be found on the internet.
  1603. LZMA - Method 14 (EFS)
  1604. ----------------------
  1605. LZMA is a block-oriented, general purpose data compression algorithm
  1606. developed and maintained by Igor Pavlov. It is a derivative of LZ77
  1607. that utilizes Markov chains and a range coder. Information and
  1608. source code for this algorithm can be found on the internet. Consult
  1609. with the author of this algorithm for information on terms or
  1610. restrictions on use.
  1611. Support for LZMA within the ZIP format is defined as follows:
  1612. The Compression method field within the ZIP Local and Central
  1613. Header records will be set to the value 14 to indicate data was
  1614. compressed using LZMA.
  1615. The Version needed to extract field within the ZIP Local and
  1616. Central Header records will be set to 6.3 to indicate the
  1617. minimum ZIP format version supporting this feature.
  1618. File data compressed using the LZMA algorithm must be placed
  1619. immediately following the Local Header for the file. If a
  1620. standard ZIP encryption header is required, it will follow
  1621. the Local Header and will precede the LZMA compressed file
  1622. data segment. The location of LZMA compressed data segment
  1623. within the ZIP format will be as shown:
  1624. [local header file 1]
  1625. [encryption header file 1]
  1626. [LZMA compressed data segment for file 1]
  1627. [data descriptor 1]
  1628. [local header file 2]
  1629. The encryption header and data descriptor records may
  1630. be conditionally present. The LZMA Compressed Data Segment
  1631. will consist of an LZMA Properties Header followed by the
  1632. LZMA Compressed Data as shown:
  1633. [LZMA properties header for file 1]
  1634. [LZMA compressed data for file 1]
  1635. The LZMA Compressed Data will be stored as provided by the
  1636. LZMA compression library. Compressed size, uncompressed
  1637. size and other file characteristics about the file being
  1638. compressed must be stored in standard ZIP storage format.
  1639. The LZMA Properties Header will store specific data required to
  1640. decompress the LZMA compressed Data. This data is set by the
  1641. LZMA compression engine using the function WriteCoderProperties()
  1642. as documented within the LZMA SDK.
  1643. Storage fields for the property information within the LZMA
  1644. Properties Header are as follows:
  1645. LZMA Version Information 2 bytes
  1646. LZMA Properties Size 2 bytes
  1647. LZMA Properties Data variable, defined by "LZMA Properties Size"
  1648. LZMA Version Information - this field identifies which version of
  1649. the LZMA SDK was used to compress a file. The first byte will
  1650. store the major version number of the LZMA SDK and the second
  1651. byte will store the minor number.
  1652. LZMA Properties Size - this field defines the size of the remaining
  1653. property data. Typically this size should be determined by the
  1654. version of the SDK. This size field is included as a convenience
  1655. and to help avoid any ambiguity should it arise in the future due
  1656. to changes in this compression algorithm.
  1657. LZMA Property Data - this variable sized field records the required
  1658. values for the decompressor as defined by the LZMA SDK. The
  1659. data stored in this field should be obtained using the
  1660. WriteCoderProperties() in the version of the SDK defined by
  1661. the "LZMA Version Information" field.
  1662. The layout of the "LZMA Properties Data" field is a function of the
  1663. LZMA compression algorithm. It is possible that this layout may be
  1664. changed by the author over time. The data layout in version 4.32
  1665. of the LZMA SDK defines a 5 byte array that uses 4 bytes to store
  1666. the dictionary size in little-endian order. This is preceded by a
  1667. single packed byte as the first element of the array that contains
  1668. the following fields:
  1669. PosStateBits
  1670. LiteralPosStateBits
  1671. LiteralContextBits
  1672. Refer to the LZMA documentation for a more detailed explanation of
  1673. these fields.
  1674. Data compressed with method 14, LZMA, may include an end-of-stream
  1675. (EOS) marker ending the compressed data stream. This marker is not
  1676. required, but its use is highly recommended to facilitate processing
  1677. and implementers should include the EOS marker whenever possible.
  1678. When the EOS marker is used, general purpose bit 1 must be set. If
  1679. general purpose bit 1 is not set, the EOS marker is not present.
  1680. WavPack - Method 97
  1681. -------------------
  1682. Information describing the use of compression method 97 is
  1683. provided by WinZIP International, LLC. This method relies on the
  1684. open source WavPack audio compression utility developed by David Bryant.
  1685. Information on WavPack is available at www.wavpack.com. Please consult
  1686. with the author of this algorithm for information on terms and
  1687. restrictions on use.
  1688. WavPack data for a file begins immediately after the end of the
  1689. local header data. This data is the output from WavPack compression
  1690. routines. Within the ZIP file, the use of WavPack compression is
  1691. indicated by setting the compression method field to a value of 97
  1692. in both the local header and the central directory header. The Version
  1693. needed to extract and version made by fields use the same values as are
  1694. used for data compressed using the Deflate algorithm.
  1695. An implementation note for storing digital sample data when using
  1696. WavPack compression within ZIP files is that all of the bytes of
  1697. the sample data should be compressed. This includes any unused
  1698. bits up to the byte boundary. An example is a 2 byte sample that
  1699. uses only 12 bits for the sample data with 4 unused bits. If only
  1700. 12 bits are passed as the sample size to the WavPack routines, the 4
  1701. unused bits will be set to 0 on extraction regardless of their original
  1702. state. To avoid this, the full 16 bits of the sample data size
  1703. should be provided.
  1704. PPMd - Method 98
  1705. ----------------
  1706. PPMd is a data compression algorithm developed by Dmitry Shkarin
  1707. which includes a carryless rangecoder developed by Dmitry Subbotin.
  1708. This algorithm is based on predictive phrase matching on multiple
  1709. order contexts. Information and source code for this algorithm
  1710. can be found on the internet. Consult with the author of this
  1711. algorithm for information on terms or restrictions on use.
  1712. Support for PPMd within the ZIP format currently is provided only
  1713. for version I, revision 1 of the algorithm. Storage requirements
  1714. for using this algorithm are as follows:
  1715. Parameters needed to control the algorithm are stored in the two
  1716. bytes immediately preceding the compressed data. These bytes are
  1717. used to store the following fields:
  1718. Model order - sets the maximum model order, default is 8, possible
  1719. values are from 2 to 16 inclusive
  1720. Sub-allocator size - sets the size of sub-allocator in MB, default is 50,
  1721. possible values are from 1MB to 256MB inclusive
  1722. Model restoration method - sets the method used to restart context
  1723. model at memory insufficiency, values are:
  1724. 0 - restarts model from scratch - default
  1725. 1 - cut off model - decreases performance by as much as 2x
  1726. 2 - freeze context tree - not recommended
  1727. An example for packing these fields into the 2 byte storage field is
  1728. illustrated below. These values are stored in Intel low-byte/high-byte
  1729. order.
  1730. wPPMd = (Model order - 1) +
  1731. ((Sub-allocator size - 1) << 4) +
  1732. (Model restoration method << 12)
  1733. VII. Traditional PKWARE Encryption
  1734. ----------------------------------
  1735. The following information discusses the decryption steps
  1736. required to support traditional PKWARE encryption. This
  1737. form of encryption is considered weak by today's standards
  1738. and its use is recommended only for situations with
  1739. low security needs or for compatibility with older .ZIP
  1740. applications.
  1741. Decryption
  1742. ----------
  1743. PKWARE is grateful to Mr. Roger Schlafly for his expert contribution
  1744. towards the development of PKWARE's traditional encryption.
  1745. PKZIP encrypts the compressed data stream. Encrypted files must
  1746. be decrypted before they can be extracted.
  1747. Each encrypted file has an extra 12 bytes stored at the start of
  1748. the data area defining the encryption header for that file. The
  1749. encryption header is originally set to random values, and then
  1750. itself encrypted, using three, 32-bit keys. The key values are
  1751. initialized using the supplied encryption password. After each byte
  1752. is encrypted, the keys are then updated using pseudo-random number
  1753. generation techniques in combination with the same CRC-32 algorithm
  1754. used in PKZIP and described elsewhere in this document.
  1755. The following is the basic steps required to decrypt a file:
  1756. 1) Initialize the three 32-bit keys with the password.
  1757. 2) Read and decrypt the 12-byte encryption header, further
  1758. initializing the encryption keys.
  1759. 3) Read and decrypt the compressed data stream using the
  1760. encryption keys.
  1761. Step 1 - Initializing the encryption keys
  1762. -----------------------------------------
  1763. Key(0) <- 305419896
  1764. Key(1) <- 591751049
  1765. Key(2) <- 878082192
  1766. loop for i <- 0 to length(password)-1
  1767. update_keys(password(i))
  1768. end loop
  1769. Where update_keys() is defined as:
  1770. update_keys(char):
  1771. Key(0) <- crc32(key(0),char)
  1772. Key(1) <- Key(1) + (Key(0) & 000000ffH)
  1773. Key(1) <- Key(1) * 134775813 + 1
  1774. Key(2) <- crc32(key(2),key(1) >> 24)
  1775. end update_keys
  1776. Where crc32(old_crc,char) is a routine that given a CRC value and a
  1777. character, returns an updated CRC value after applying the CRC-32
  1778. algorithm described elsewhere in this document.
  1779. Step 2 - Decrypting the encryption header
  1780. -----------------------------------------
  1781. The purpose of this step is to further initialize the encryption
  1782. keys, based on random data, to render a plaintext attack on the
  1783. data ineffective.
  1784. Read the 12-byte encryption header into Buffer, in locations
  1785. Buffer(0) thru Buffer(11).
  1786. loop for i <- 0 to 11
  1787. C <- buffer(i) ^ decrypt_byte()
  1788. update_keys(C)
  1789. buffer(i) <- C
  1790. end loop
  1791. Where decrypt_byte() is defined as:
  1792. unsigned char decrypt_byte()
  1793. local unsigned short temp
  1794. temp <- Key(2) | 2
  1795. decrypt_byte <- (temp * (temp ^ 1)) >> 8
  1796. end decrypt_byte
  1797. After the header is decrypted, the last 1 or 2 bytes in Buffer
  1798. should be the high-order word/byte of the CRC for the file being
  1799. decrypted, stored in Intel low-byte/high-byte order. Versions of
  1800. PKZIP prior to 2.0 used a 2 byte CRC check; a 1 byte CRC check is
  1801. used on versions after 2.0. This can be used to test if the password
  1802. supplied is correct or not.
  1803. Step 3 - Decrypting the compressed data stream
  1804. ----------------------------------------------
  1805. The compressed data stream can be decrypted as follows:
  1806. loop until done
  1807. read a character into C
  1808. Temp <- C ^ decrypt_byte()
  1809. update_keys(temp)
  1810. output Temp
  1811. end loop
  1812. VIII. Strong Encryption Specification
  1813. -------------------------------------
  1814. The Strong Encryption technology defined in this specification is
  1815. covered under a pending patent application. The use or implementation
  1816. in a product of certain technological aspects set forth in the current
  1817. APPNOTE, including those with regard to strong encryption, patching,
  1818. or extended tape operations requires a license from PKWARE. Portions
  1819. of this Strong Encryption technology are available for use at no charge.
  1820. Contact PKWARE for licensing terms and conditions. Refer to section II
  1821. of this APPNOTE (Contacting PKWARE) for information on how to
  1822. contact PKWARE.
  1823. Version 5.x of this specification introduced support for strong
  1824. encryption algorithms. These algorithms can be used with either
  1825. a password or an X.509v3 digital certificate to encrypt each file.
  1826. This format specification supports either password or certificate
  1827. based encryption to meet the security needs of today, to enable
  1828. interoperability between users within both PKI and non-PKI
  1829. environments, and to ensure interoperability between different
  1830. computing platforms that are running a ZIP program.
  1831. Password based encryption is the most common form of encryption
  1832. people are familiar with. However, inherent weaknesses with
  1833. passwords (e.g. susceptibility to dictionary/brute force attack)
  1834. as well as password management and support issues make certificate
  1835. based encryption a more secure and scalable option. Industry
  1836. efforts and support are defining and moving towards more advanced
  1837. security solutions built around X.509v3 digital certificates and
  1838. Public Key Infrastructures(PKI) because of the greater scalability,
  1839. administrative options, and more robust security over traditional
  1840. password based encryption.
  1841. Most standard encryption algorithms are supported with this
  1842. specification. Reference implementations for many of these
  1843. algorithms are available from either commercial or open source
  1844. distributors. Readily available cryptographic toolkits make
  1845. implementation of the encryption features straight-forward.
  1846. This document is not intended to provide a treatise on data
  1847. encryption principles or theory. Its purpose is to document the
  1848. data structures required for implementing interoperable data
  1849. encryption within the .ZIP format. It is strongly recommended that
  1850. you have a good understanding of data encryption before reading
  1851. further.
  1852. The algorithms introduced in Version 5.0 of this specification
  1853. include:
  1854. RC2 40 bit, 64 bit, and 128 bit
  1855. RC4 40 bit, 64 bit, and 128 bit
  1856. DES
  1857. 3DES 112 bit and 168 bit
  1858. Version 5.1 adds support for the following:
  1859. AES 128 bit, 192 bit, and 256 bit
  1860. Version 6.1 introduces encryption data changes to support
  1861. interoperability with Smartcard and USB Token certificate storage
  1862. methods which do not support the OAEP strengthening standard.
  1863. Version 6.2 introduces support for encrypting metadata by compressing
  1864. and encrypting the central directory data structure to reduce information
  1865. leakage. Information leakage can occur in legacy ZIP applications
  1866. through exposure of information about a file even though that file is
  1867. stored encrypted. The information exposed consists of file
  1868. characteristics stored within the records and fields defined by this
  1869. specification. This includes data such as a files name, its original
  1870. size, timestamp and CRC32 value.
  1871. Version 6.3 introduces support for encrypting data using the Blowfish
  1872. and Twofish algorithms. These are symmetric block ciphers developed
  1873. by Bruce Schneier. Blowfish supports using a variable length key from
  1874. 32 to 448 bits. Block size is 64 bits. Implementations should use 16
  1875. rounds and the only mode supported within ZIP files is CBC. Twofish
  1876. supports key sizes 128, 192 and 256 bits. Block size is 128 bits.
  1877. Implementations should use 16 rounds and the only mode supported within
  1878. ZIP files is CBC. Information and source code for both Blowfish and
  1879. Twofish algorithms can be found on the internet. Consult with the author
  1880. of these algorithms for information on terms or restrictions on use.
  1881. Central Directory Encryption provides greater protection against
  1882. information leakage by encrypting the Central Directory structure and
  1883. by masking key values that are replicated in the unencrypted Local
  1884. Header. ZIP compatible programs that cannot interpret an encrypted
  1885. Central Directory structure cannot rely on the data in the corresponding
  1886. Local Header for decompression information.
  1887. Extra Field records that may contain information about a file that should
  1888. not be exposed should not be stored in the Local Header and should only
  1889. be written to the Central Directory where they can be encrypted. This
  1890. design currently does not support streaming. Information in the End of
  1891. Central Directory record, the Zip64 End of Central Directory Locator,
  1892. and the Zip64 End of Central Directory records are not encrypted. Access
  1893. to view data on files within a ZIP file with an encrypted Central Directory
  1894. requires the appropriate password or private key for decryption prior to
  1895. viewing any files, or any information about the files, in the archive.
  1896. Older ZIP compatible programs not familiar with the Central Directory
  1897. Encryption feature will no longer be able to recognize the Central
  1898. Directory and may assume the ZIP file is corrupt. Programs that
  1899. attempt streaming access using Local Headers will see invalid
  1900. information for each file. Central Directory Encryption need not be
  1901. used for every ZIP file. Its use is recommended for greater security.
  1902. ZIP files not using Central Directory Encryption should operate as
  1903. in the past.
  1904. This strong encryption feature specification is intended to provide for
  1905. scalable, cross-platform encryption needs ranging from simple password
  1906. encryption to authenticated public/private key encryption.
  1907. Encryption provides data confidentiality and privacy. It is
  1908. recommended that you combine X.509 digital signing with encryption
  1909. to add authentication and non-repudiation.
  1910. Single Password Symmetric Encryption Method:
  1911. -------------------------------------------
  1912. The Single Password Symmetric Encryption Method using strong
  1913. encryption algorithms operates similarly to the traditional
  1914. PKWARE encryption defined in this format. Additional data
  1915. structures are added to support the processing needs of the
  1916. strong algorithms.
  1917. The Strong Encryption data structures are:
  1918. 1. General Purpose Bits - Bits 0 and 6 of the General Purpose bit
  1919. flag in both local and central header records. Both bits set
  1920. indicates strong encryption. Bit 13, when set indicates the Central
  1921. Directory is encrypted and that selected fields in the Local Header
  1922. are masked to hide their actual value.
  1923. 2. Extra Field 0x0017 in central header only.
  1924. Fields to consider in this record are:
  1925. Format - the data format identifier for this record. The only
  1926. value allowed at this time is the integer value 2.
  1927. AlgId - integer identifier of the encryption algorithm from the
  1928. following range
  1929. 0x6601 - DES
  1930. 0x6602 - RC2 (version needed to extract < 5.2)
  1931. 0x6603 - 3DES 168
  1932. 0x6609 - 3DES 112
  1933. 0x660E - AES 128
  1934. 0x660F - AES 192
  1935. 0x6610 - AES 256
  1936. 0x6702 - RC2 (version needed to extract >= 5.2)
  1937. 0x6720 - Blowfish
  1938. 0x6721 - Twofish
  1939. 0x6801 - RC4
  1940. 0xFFFF - Unknown algorithm
  1941. Bitlen - Explicit bit length of key
  1942. 32 - 448 bits
  1943. Flags - Processing flags needed for decryption
  1944. 0x0001 - Password is required to decrypt
  1945. 0x0002 - Certificates only
  1946. 0x0003 - Password or certificate required to decrypt
  1947. Values > 0x0003 reserved for certificate processing
  1948. 3. Decryption header record preceding compressed file data.
  1949. -Decryption Header:
  1950. Value Size Description
  1951. ----- ---- -----------
  1952. IVSize 2 bytes Size of initialization vector (IV)
  1953. IVData IVSize Initialization vector for this file
  1954. Size 4 bytes Size of remaining decryption header data
  1955. Format 2 bytes Format definition for this record
  1956. AlgID 2 bytes Encryption algorithm identifier
  1957. Bitlen 2 bytes Bit length of encryption key
  1958. Flags 2 bytes Processing flags
  1959. ErdSize 2 bytes Size of Encrypted Random Data
  1960. ErdData ErdSize Encrypted Random Data
  1961. Reserved1 4 bytes Reserved certificate processing data
  1962. Reserved2 (var) Reserved for certificate processing data
  1963. VSize 2 bytes Size of password validation data
  1964. VData VSize-4 Password validation data
  1965. VCRC32 4 bytes Standard ZIP CRC32 of password validation data
  1966. IVData - The size of the IV should match the algorithm block size.
  1967. The IVData can be completely random data. If the size of
  1968. the randomly generated data does not match the block size
  1969. it should be complemented with zero's or truncated as
  1970. necessary. If IVSize is 0,then IV = CRC32 + Uncompressed
  1971. File Size (as a 64 bit little-endian, unsigned integer value).
  1972. Format - the data format identifier for this record. The only
  1973. value allowed at this time is the integer value 3.
  1974. AlgId - integer identifier of the encryption algorithm from the
  1975. following range
  1976. 0x6601 - DES
  1977. 0x6602 - RC2 (version needed to extract < 5.2)
  1978. 0x6603 - 3DES 168
  1979. 0x6609 - 3DES 112
  1980. 0x660E - AES 128
  1981. 0x660F - AES 192
  1982. 0x6610 - AES 256
  1983. 0x6702 - RC2 (version needed to extract >= 5.2)
  1984. 0x6720 - Blowfish
  1985. 0x6721 - Twofish
  1986. 0x6801 - RC4
  1987. 0xFFFF - Unknown algorithm
  1988. Bitlen - Explicit bit length of key
  1989. 32 - 448 bits
  1990. Flags - Processing flags needed for decryption
  1991. 0x0001 - Password is required to decrypt
  1992. 0x0002 - Certificates only
  1993. 0x0003 - Password or certificate required to decrypt
  1994. Values > 0x0003 reserved for certificate processing
  1995. ErdData - Encrypted random data is used to store random data that
  1996. is used to generate a file session key for encrypting
  1997. each file. SHA1 is used to calculate hash data used to
  1998. derive keys. File session keys are derived from a master
  1999. session key generated from the user-supplied password.
  2000. If the Flags field in the decryption header contains
  2001. the value 0x4000, then the ErdData field must be
  2002. decrypted using 3DES. If the value 0x4000 is not set,
  2003. then the ErdData field must be decrypted using AlgId.
  2004. Reserved1 - Reserved for certificate processing, if value is
  2005. zero, then Reserved2 data is absent. See the explanation
  2006. under the Certificate Processing Method for details on
  2007. this data structure.
  2008. Reserved2 - If present, the size of the Reserved2 data structure
  2009. is located by skipping the first 4 bytes of this field
  2010. and using the next 2 bytes as the remaining size. See
  2011. the explanation under the Certificate Processing Method
  2012. for details on this data structure.
  2013. VSize - This size value will always include the 4 bytes of the
  2014. VCRC32 data and will be greater than 4 bytes.
  2015. VData - Random data for password validation. This data is VSize
  2016. in length and VSize must be a multiple of the encryption
  2017. block size. VCRC32 is a checksum value of VData.
  2018. VData and VCRC32 are stored encrypted and start the
  2019. stream of encrypted data for a file.
  2020. 4. Useful Tips
  2021. Strong Encryption is always applied to a file after compression. The
  2022. block oriented algorithms all operate in Cypher Block Chaining (CBC)
  2023. mode. The block size used for AES encryption is 16. All other block
  2024. algorithms use a block size of 8. Two ID's are defined for RC2 to
  2025. account for a discrepancy found in the implementation of the RC2
  2026. algorithm in the cryptographic library on Windows XP SP1 and all
  2027. earlier versions of Windows. It is recommended that zero length files
  2028. not be encrypted, however programs should be prepared to extract them
  2029. if they are found within a ZIP file.
  2030. A pseudo-code representation of the encryption process is as follows:
  2031. Password = GetUserPassword()
  2032. MasterSessionKey = DeriveKey(SHA1(Password))
  2033. RD = CryptographicStrengthRandomData()
  2034. For Each File
  2035. IV = CryptographicStrengthRandomData()
  2036. VData = CryptographicStrengthRandomData()
  2037. VCRC32 = CRC32(VData)
  2038. FileSessionKey = DeriveKey(SHA1(IV + RD)
  2039. ErdData = Encrypt(RD,MasterSessionKey,IV)
  2040. Encrypt(VData + VCRC32 + FileData, FileSessionKey,IV)
  2041. Done
  2042. The function names and parameter requirements will depend on
  2043. the choice of the cryptographic toolkit selected. Almost any
  2044. toolkit supporting the reference implementations for each
  2045. algorithm can be used. The RSA BSAFE(r), OpenSSL, and Microsoft
  2046. CryptoAPI libraries are all known to work well.
  2047. Single Password - Central Directory Encryption:
  2048. -----------------------------------------------
  2049. Central Directory Encryption is achieved within the .ZIP format by
  2050. encrypting the Central Directory structure. This encapsulates the metadata
  2051. most often used for processing .ZIP files. Additional metadata is stored for
  2052. redundancy in the Local Header for each file. The process of concealing
  2053. metadata by encrypting the Central Directory does not protect the data within
  2054. the Local Header. To avoid information leakage from the exposed metadata
  2055. in the Local Header, the fields containing information about a file are masked.
  2056. Local Header:
  2057. Masking replaces the true content of the fields for a file in the Local
  2058. Header with false information. When masked, the Local Header is not
  2059. suitable for streaming access and the options for data recovery of damaged
  2060. archives is reduced. Extra Data fields that may contain confidential
  2061. data should not be stored within the Local Header. The value set into
  2062. the Version needed to extract field should be the correct value needed to
  2063. extract the file without regard to Central Directory Encryption. The fields
  2064. within the Local Header targeted for masking when the Central Directory is
  2065. encrypted are:
  2066. Field Name Mask Value
  2067. ------------------ ---------------------------
  2068. compression method 0
  2069. last mod file time 0
  2070. last mod file date 0
  2071. crc-32 0
  2072. compressed size 0
  2073. uncompressed size 0
  2074. file name (variable size) Base 16 value from the
  2075. range 1 - 0xFFFFFFFFFFFFFFFF
  2076. represented as a string whose
  2077. size will be set into the
  2078. file name length field
  2079. The Base 16 value assigned as a masked file name is simply a sequentially
  2080. incremented value for each file starting with 1 for the first file.
  2081. Modifications to a ZIP file may cause different values to be stored for
  2082. each file. For compatibility, the file name field in the Local Header
  2083. should never be left blank. As of Version 6.2 of this specification,
  2084. the Compression Method and Compressed Size fields are not yet masked.
  2085. Fields having a value of 0xFFFF or 0xFFFFFFFF for the ZIP64 format
  2086. should not be masked.
  2087. Encrypting the Central Directory:
  2088. Encryption of the Central Directory does not include encryption of the
  2089. Central Directory Signature data, the Zip64 End of Central Directory
  2090. record, the Zip64 End of Central Directory Locator, or the End
  2091. of Central Directory record. The ZIP file comment data is never
  2092. encrypted.
  2093. Before encrypting the Central Directory, it may optionally be compressed.
  2094. Compression is not required, but for storage efficiency it is assumed
  2095. this structure will be compressed before encrypting. Similarly, this
  2096. specification supports compressing the Central Directory without
  2097. requiring that it also be encrypted. Early implementations of this
  2098. feature will assume the encryption method applied to files matches the
  2099. encryption applied to the Central Directory.
  2100. Encryption of the Central Directory is done in a manner similar to
  2101. that of file encryption. The encrypted data is preceded by a
  2102. decryption header. The decryption header is known as the Archive
  2103. Decryption Header. The fields of this record are identical to
  2104. the decryption header preceding each encrypted file. The location
  2105. of the Archive Decryption Header is determined by the value in the
  2106. Start of the Central Directory field in the Zip64 End of Central
  2107. Directory record. When the Central Directory is encrypted, the
  2108. Zip64 End of Central Directory record will always be present.
  2109. The layout of the Zip64 End of Central Directory record for all
  2110. versions starting with 6.2 of this specification will follow the
  2111. Version 2 format. The Version 2 format is as follows:
  2112. The leading fixed size fields within the Version 1 format for this
  2113. record remain unchanged. The record signature for both Version 1
  2114. and Version 2 will be 0x06064b50. Immediately following the last
  2115. byte of the field known as the Offset of Start of Central
  2116. Directory With Respect to the Starting Disk Number will begin the
  2117. new fields defining Version 2 of this record.
  2118. New fields for Version 2:
  2119. Note: all fields stored in Intel low-byte/high-byte order.
  2120. Value Size Description
  2121. ----- ---- -----------
  2122. Compression Method 2 bytes Method used to compress the
  2123. Central Directory
  2124. Compressed Size 8 bytes Size of the compressed data
  2125. Original Size 8 bytes Original uncompressed size
  2126. AlgId 2 bytes Encryption algorithm ID
  2127. BitLen 2 bytes Encryption key length
  2128. Flags 2 bytes Encryption flags
  2129. HashID 2 bytes Hash algorithm identifier
  2130. Hash Length 2 bytes Length of hash data
  2131. Hash Data (variable) Hash data
  2132. The Compression Method accepts the same range of values as the
  2133. corresponding field in the Central Header.
  2134. The Compressed Size and Original Size values will not include the
  2135. data of the Central Directory Signature which is compressed or
  2136. encrypted.
  2137. The AlgId, BitLen, and Flags fields accept the same range of values
  2138. the corresponding fields within the 0x0017 record.
  2139. Hash ID identifies the algorithm used to hash the Central Directory
  2140. data. This data does not have to be hashed, in which case the
  2141. values for both the HashID and Hash Length will be 0. Possible
  2142. values for HashID are:
  2143. Value Algorithm
  2144. ------ ---------
  2145. 0x0000 none
  2146. 0x0001 CRC32
  2147. 0x8003 MD5
  2148. 0x8004 SHA1
  2149. 0x8007 RIPEMD160
  2150. 0x800C SHA256
  2151. 0x800D SHA384
  2152. 0x800E SHA512
  2153. When the Central Directory data is signed, the same hash algorithm
  2154. used to hash the Central Directory for signing should be used.
  2155. This is recommended for processing efficiency, however, it is
  2156. permissible for any of the above algorithms to be used independent
  2157. of the signing process.
  2158. The Hash Data will contain the hash data for the Central Directory.
  2159. The length of this data will vary depending on the algorithm used.
  2160. The Version Needed to Extract should be set to 62.
  2161. The value for the Total Number of Entries on the Current Disk will
  2162. be 0. These records will no longer support random access when
  2163. encrypting the Central Directory.
  2164. When the Central Directory is compressed and/or encrypted, the
  2165. End of Central Directory record will store the value 0xFFFFFFFF
  2166. as the value for the Total Number of Entries in the Central
  2167. Directory. The value stored in the Total Number of Entries in
  2168. the Central Directory on this Disk field will be 0. The actual
  2169. values will be stored in the equivalent fields of the Zip64
  2170. End of Central Directory record.
  2171. Decrypting and decompressing the Central Directory is accomplished
  2172. in the same manner as decrypting and decompressing a file.
  2173. Certificate Processing Method:
  2174. -----------------------------
  2175. The Certificate Processing Method of for ZIP file encryption
  2176. defines the following additional data fields:
  2177. 1. Certificate Flag Values
  2178. Additional processing flags that can be present in the Flags field of both
  2179. the 0x0017 field of the central directory Extra Field and the Decryption
  2180. header record preceding compressed file data are:
  2181. 0x0007 - reserved for future use
  2182. 0x000F - reserved for future use
  2183. 0x0100 - Indicates non-OAEP key wrapping was used. If this
  2184. this field is set, the version needed to extract must
  2185. be at least 61. This means OAEP key wrapping is not
  2186. used when generating a Master Session Key using
  2187. ErdData.
  2188. 0x4000 - ErdData must be decrypted using 3DES-168, otherwise use the
  2189. same algorithm used for encrypting the file contents.
  2190. 0x8000 - reserved for future use
  2191. 2. CertData - Extra Field 0x0017 record certificate data structure
  2192. The data structure used to store certificate data within the section
  2193. of the Extra Field defined by the CertData field of the 0x0017
  2194. record are as shown:
  2195. Value Size Description
  2196. ----- ---- -----------
  2197. RCount 4 bytes Number of recipients.
  2198. HashAlg 2 bytes Hash algorithm identifier
  2199. HSize 2 bytes Hash size
  2200. SRList (var) Simple list of recipients hashed public keys
  2201. RCount This defines the number intended recipients whose
  2202. public keys were used for encryption. This identifies
  2203. the number of elements in the SRList.
  2204. HashAlg This defines the hash algorithm used to calculate
  2205. the public key hash of each public key used
  2206. for encryption. This field currently supports
  2207. only the following value for SHA-1
  2208. 0x8004 - SHA1
  2209. HSize This defines the size of a hashed public key.
  2210. SRList This is a variable length list of the hashed
  2211. public keys for each intended recipient. Each
  2212. element in this list is HSize. The total size of
  2213. SRList is determined using RCount * HSize.
  2214. 3. Reserved1 - Certificate Decryption Header Reserved1 Data:
  2215. Value Size Description
  2216. ----- ---- -----------
  2217. RCount 4 bytes Number of recipients.
  2218. RCount This defines the number intended recipients whose
  2219. public keys were used for encryption. This defines
  2220. the number of elements in the REList field defined below.
  2221. 4. Reserved2 - Certificate Decryption Header Reserved2 Data Structures:
  2222. Value Size Description
  2223. ----- ---- -----------
  2224. HashAlg 2 bytes Hash algorithm identifier
  2225. HSize 2 bytes Hash size
  2226. REList (var) List of recipient data elements
  2227. HashAlg This defines the hash algorithm used to calculate
  2228. the public key hash of each public key used
  2229. for encryption. This field currently supports
  2230. only the following value for SHA-1
  2231. 0x8004 - SHA1
  2232. HSize This defines the size of a hashed public key
  2233. defined in REHData.
  2234. REList This is a variable length of list of recipient data.
  2235. Each element in this list consists of a Recipient
  2236. Element data structure as follows:
  2237. Recipient Element (REList) Data Structure:
  2238. Value Size Description
  2239. ----- ---- -----------
  2240. RESize 2 bytes Size of REHData + REKData
  2241. REHData HSize Hash of recipients public key
  2242. REKData (var) Simple key blob
  2243. RESize This defines the size of an individual REList
  2244. element. This value is the combined size of the
  2245. REHData field + REKData field. REHData is defined by
  2246. HSize. REKData is variable and can be calculated
  2247. for each REList element using RESize and HSize.
  2248. REHData Hashed public key for this recipient.
  2249. REKData Simple Key Blob. The format of this data structure
  2250. is identical to that defined in the Microsoft
  2251. CryptoAPI and generated using the CryptExportKey()
  2252. function. The version of the Simple Key Blob
  2253. supported at this time is 0x02 as defined by
  2254. Microsoft.
  2255. Certificate Processing - Central Directory Encryption:
  2256. ------------------------------------------------------
  2257. Central Directory Encryption using Digital Certificates will
  2258. operate in a manner similar to that of Single Password Central
  2259. Directory Encryption. This record will only be present when there
  2260. is data to place into it. Currently, data is placed into this
  2261. record when digital certificates are used for either encrypting
  2262. or signing the files within a ZIP file. When only password
  2263. encryption is used with no certificate encryption or digital
  2264. signing, this record is not currently needed. When present, this
  2265. record will appear before the start of the actual Central Directory
  2266. data structure and will be located immediately after the Archive
  2267. Decryption Header if the Central Directory is encrypted.
  2268. The Archive Extra Data record will be used to store the following
  2269. information. Additional data may be added in future versions.
  2270. Extra Data Fields:
  2271. 0x0014 - PKCS#7 Store for X.509 Certificates
  2272. 0x0016 - X.509 Certificate ID and Signature for central directory
  2273. 0x0019 - PKCS#7 Encryption Recipient Certificate List
  2274. The 0x0014 and 0x0016 Extra Data records that otherwise would be
  2275. located in the first record of the Central Directory for digital
  2276. certificate processing. When encrypting or compressing the Central
  2277. Directory, the 0x0014 and 0x0016 records must be located in the
  2278. Archive Extra Data record and they should not remain in the first
  2279. Central Directory record. The Archive Extra Data record will also
  2280. be used to store the 0x0019 data.
  2281. When present, the size of the Archive Extra Data record will be
  2282. included in the size of the Central Directory. The data of the
  2283. Archive Extra Data record will also be compressed and encrypted
  2284. along with the Central Directory data structure.
  2285. Certificate Processing Differences:
  2286. The Certificate Processing Method of encryption differs from the
  2287. Single Password Symmetric Encryption Method as follows. Instead
  2288. of using a user-defined password to generate a master session key,
  2289. cryptographically random data is used. The key material is then
  2290. wrapped using standard key-wrapping techniques. This key material
  2291. is wrapped using the public key of each recipient that will need
  2292. to decrypt the file using their corresponding private key.
  2293. This specification currently assumes digital certificates will follow
  2294. the X.509 V3 format for 1024 bit and higher RSA format digital
  2295. certificates. Implementation of this Certificate Processing Method
  2296. requires supporting logic for key access and management. This logic
  2297. is outside the scope of this specification.
  2298. OAEP Processing with Certificate-based Encryption:
  2299. OAEP stands for Optimal Asymmetric Encryption Padding. It is a
  2300. strengthening technique used for small encoded items such as decryption
  2301. keys. This is commonly applied in cryptographic key-wrapping techniques
  2302. and is supported by PKCS #1. Versions 5.0 and 6.0 of this specification
  2303. were designed to support OAEP key-wrapping for certificate-based
  2304. decryption keys for additional security.
  2305. Support for private keys stored on Smartcards or Tokens introduced
  2306. a conflict with this OAEP logic. Most card and token products do
  2307. not support the additional strengthening applied to OAEP key-wrapped
  2308. data. In order to resolve this conflict, versions 6.1 and above of this
  2309. specification will no longer support OAEP when encrypting using
  2310. digital certificates.
  2311. Versions of PKZIP available during initial development of the
  2312. certificate processing method set a value of 61 into the
  2313. version needed to extract field for a file. This indicates that
  2314. non-OAEP key wrapping is used. This affects certificate encryption
  2315. only, and password encryption functions should not be affected by
  2316. this value. This means values of 61 may be found on files encrypted
  2317. with certificates only, or on files encrypted with both password
  2318. encryption and certificate encryption. Files encrypted with both
  2319. methods can safely be decrypted using the password methods documented.
  2320. IX. Change Process
  2321. ------------------
  2322. In order for the .ZIP file format to remain a viable definition, this
  2323. specification should be considered as open for periodic review and
  2324. revision. Although this format was originally designed with a
  2325. certain level of extensibility, not all changes in technology
  2326. (present or future) were or will be necessarily considered in its
  2327. design. If your application requires new definitions to the
  2328. extensible sections in this format, or if you would like to
  2329. submit new data structures, please forward your request to
  2330. zipformat@pkware.com. All submissions will be reviewed by the
  2331. ZIP File Specification Committee for possible inclusion into
  2332. future versions of this specification. Periodic revisions
  2333. to this specification will be published to ensure interoperability.
  2334. We encourage comments and feedback that may help improve clarity
  2335. or content.
  2336. X. Incorporating PKWARE Proprietary Technology into Your Product
  2337. ----------------------------------------------------------------
  2338. PKWARE is committed to the interoperability and advancement of the
  2339. .ZIP format. PKWARE offers a free license for certain technological
  2340. aspects described above under certain restrictions and conditions.
  2341. However, the use or implementation in a product of certain technological
  2342. aspects set forth in the current APPNOTE, including those with regard to
  2343. strong encryption, patching, or extended tape operations requires a
  2344. license from PKWARE. Please contact PKWARE with regard to acquiring
  2345. a license.
  2346. XI. Acknowledgements
  2347. ---------------------
  2348. In addition to the above mentioned contributors to PKZIP and PKUNZIP,
  2349. I would like to extend special thanks to Robert Mahoney for suggesting
  2350. the extension .ZIP for this software.
  2351. XII. References
  2352. ---------------
  2353. Fiala, Edward R., and Greene, Daniel H., "Data compression with
  2354. finite windows", Communications of the ACM, Volume 32, Number 4,
  2355. April 1989, pages 490-505.
  2356. Held, Gilbert, "Data Compression, Techniques and Applications,
  2357. Hardware and Software Considerations", John Wiley & Sons, 1987.
  2358. Huffman, D.A., "A method for the construction of minimum-redundancy
  2359. codes", Proceedings of the IRE, Volume 40, Number 9, September 1952,
  2360. pages 1098-1101.
  2361. Nelson, Mark, "LZW Data Compression", Dr. Dobbs Journal, Volume 14,
  2362. Number 10, October 1989, pages 29-37.
  2363. Nelson, Mark, "The Data Compression Book", M&T Books, 1991.
  2364. Storer, James A., "Data Compression, Methods and Theory",
  2365. Computer Science Press, 1988
  2366. Welch, Terry, "A Technique for High-Performance Data Compression",
  2367. IEEE Computer, Volume 17, Number 6, June 1984, pages 8-19.
  2368. Ziv, J. and Lempel, A., "A universal algorithm for sequential data
  2369. compression", Communications of the ACM, Volume 30, Number 6,
  2370. June 1987, pages 520-540.
  2371. Ziv, J. and Lempel, A., "Compression of individual sequences via
  2372. variable-rate coding", IEEE Transactions on Information Theory,
  2373. Volume 24, Number 5, September 1978, pages 530-536.
  2374. APPENDIX A - AS/400 Extra Field (0x0065) Attribute Definitions
  2375. --------------------------------------------------------------
  2376. Field Definition Structure:
  2377. a. field length including length 2 bytes
  2378. b. field code 2 bytes
  2379. c. data x bytes
  2380. Field Code Description
  2381. 4001 Source type i.e. CLP etc
  2382. 4002 The text description of the library
  2383. 4003 The text description of the file
  2384. 4004 The text description of the member
  2385. 4005 x'F0' or 0 is PF-DTA, x'F1' or 1 is PF_SRC
  2386. 4007 Database Type Code 1 byte
  2387. 4008 Database file and fields definition
  2388. 4009 GZIP file type 2 bytes
  2389. 400B IFS code page 2 bytes
  2390. 400C IFS Creation Time 4 bytes
  2391. 400D IFS Access Time 4 bytes
  2392. 400E IFS Modification time 4 bytes
  2393. 005C Length of the records in the file 2 bytes
  2394. 0068 GZIP two words 8 bytes
  2395. APPENDIX B - z/OS Extra Field (0x0065) Attribute Definitions
  2396. ------------------------------------------------------------
  2397. Field Definition Structure:
  2398. a. field length including length 2 bytes
  2399. b. field code 2 bytes
  2400. c. data x bytes
  2401. Field Code Description
  2402. 0001 File Type 2 bytes
  2403. 0002 NonVSAM Record Format 1 byte
  2404. 0003 Reserved
  2405. 0004 NonVSAM Block Size 2 bytes Big Endian
  2406. 0005 Primary Space Allocation 3 bytes Big Endian
  2407. 0006 Secondary Space Allocation 3 bytes Big Endian
  2408. 0007 Space Allocation Type1 byte flag
  2409. 0008 Modification Date Retired with PKZIP 5.0 +
  2410. 0009 Expiration Date Retired with PKZIP 5.0 +
  2411. 000A PDS Directory Block Allocation 3 bytes Big Endian binary value
  2412. 000B NonVSAM Volume List variable
  2413. 000C UNIT Reference Retired with PKZIP 5.0 +
  2414. 000D DF/SMS Management Class 8 bytes EBCDIC Text Value
  2415. 000E DF/SMS Storage Class 8 bytes EBCDIC Text Value
  2416. 000F DF/SMS Data Class 8 bytes EBCDIC Text Value
  2417. 0010 PDS/PDSE Member Info. 30 bytes
  2418. 0011 VSAM sub-filetype 2 bytes
  2419. 0012 VSAM LRECL 13 bytes EBCDIC "(num_avg num_max)"
  2420. 0013 VSAM Cluster Name Retired with PKZIP 5.0 +
  2421. 0014 VSAM KSDS Key Information 13 bytes EBCDIC "(num_length num_position)"
  2422. 0015 VSAM Average LRECL 5 bytes EBCDIC num_value padded with blanks
  2423. 0016 VSAM Maximum LRECL 5 bytes EBCDIC num_value padded with blanks
  2424. 0017 VSAM KSDS Key Length 5 bytes EBCDIC num_value padded with blanks
  2425. 0018 VSAM KSDS Key Position 5 bytes EBCDIC num_value padded with blanks
  2426. 0019 VSAM Data Name 1-44 bytes EBCDIC text string
  2427. 001A VSAM KSDS Index Name 1-44 bytes EBCDIC text string
  2428. 001B VSAM Catalog Name 1-44 bytes EBCDIC text string
  2429. 001C VSAM Data Space Type 9 bytes EBCDIC text string
  2430. 001D VSAM Data Space Primary 9 bytes EBCDIC num_value left-justified
  2431. 001E VSAM Data Space Secondary 9 bytes EBCDIC num_value left-justified
  2432. 001F VSAM Data Volume List variable EBCDIC text list of 6-character Volume IDs
  2433. 0020 VSAM Data Buffer Space 8 bytes EBCDIC num_value left-justified
  2434. 0021 VSAM Data CISIZE 5 bytes EBCDIC num_value left-justified
  2435. 0022 VSAM Erase Flag 1 byte flag
  2436. 0023 VSAM Free CI % 3 bytes EBCDIC num_value left-justified
  2437. 0024 VSAM Free CA % 3 bytes EBCDIC num_value left-justified
  2438. 0025 VSAM Index Volume List variable EBCDIC text list of 6-character Volume IDs
  2439. 0026 VSAM Ordered Flag 1 byte flag
  2440. 0027 VSAM REUSE Flag 1 byte flag
  2441. 0028 VSAM SPANNED Flag 1 byte flag
  2442. 0029 VSAM Recovery Flag 1 byte flag
  2443. 002A VSAM WRITECHK Flag 1 byte flag
  2444. 002B VSAM Cluster/Data SHROPTS 3 bytes EBCDIC "n,y"
  2445. 002C VSAM Index SHROPTS 3 bytes EBCDIC "n,y"
  2446. 002D VSAM Index Space Type 9 bytes EBCDIC text string
  2447. 002E VSAM Index Space Primary 9 bytes EBCDIC num_value left-justified
  2448. 002F VSAM Index Space Secondary 9 bytes EBCDIC num_value left-justified
  2449. 0030 VSAM Index CISIZE 5 bytes EBCDIC num_value left-justified
  2450. 0031 VSAM Index IMBED 1 byte flag
  2451. 0032 VSAM Index Ordered Flag 1 byte flag
  2452. 0033 VSAM REPLICATE Flag 1 byte flag
  2453. 0034 VSAM Index REUSE Flag 1 byte flag
  2454. 0035 VSAM Index WRITECHK Flag 1 byte flag Retired with PKZIP 5.0 +
  2455. 0036 VSAM Owner 8 bytes EBCDIC text string
  2456. 0037 VSAM Index Owner 8 bytes EBCDIC text string
  2457. 0038 Reserved
  2458. 0039 Reserved
  2459. 003A Reserved
  2460. 003B Reserved
  2461. 003C Reserved
  2462. 003D Reserved
  2463. 003E Reserved
  2464. 003F Reserved
  2465. 0040 Reserved
  2466. 0041 Reserved
  2467. 0042 Reserved
  2468. 0043 Reserved
  2469. 0044 Reserved
  2470. 0045 Reserved
  2471. 0046 Reserved
  2472. 0047 Reserved
  2473. 0048 Reserved
  2474. 0049 Reserved
  2475. 004A Reserved
  2476. 004B Reserved
  2477. 004C Reserved
  2478. 004D Reserved
  2479. 004E Reserved
  2480. 004F Reserved
  2481. 0050 Reserved
  2482. 0051 Reserved
  2483. 0052 Reserved
  2484. 0053 Reserved
  2485. 0054 Reserved
  2486. 0055 Reserved
  2487. 0056 Reserved
  2488. 0057 Reserved
  2489. 0058 PDS/PDSE Member TTR Info. 6 bytes Big Endian
  2490. 0059 PDS 1st LMOD Text TTR 3 bytes Big Endian
  2491. 005A PDS LMOD EP Rec # 4 bytes Big Endian
  2492. 005B Reserved
  2493. 005C Max Length of records 2 bytes Big Endian
  2494. 005D PDSE Flag 1 byte flag
  2495. 005E Reserved
  2496. 005F Reserved
  2497. 0060 Reserved
  2498. 0061 Reserved
  2499. 0062 Reserved
  2500. 0063 Reserved
  2501. 0064 Reserved
  2502. 0065 Last Date Referenced 4 bytes Packed Hex "yyyymmdd"
  2503. 0066 Date Created 4 bytes Packed Hex "yyyymmdd"
  2504. 0068 GZIP two words 8 bytes
  2505. 0071 Extended NOTE Location 12 bytes Big Endian
  2506. 0072 Archive device UNIT 6 bytes EBCDIC
  2507. 0073 Archive 1st Volume 6 bytes EBCDIC
  2508. 0074 Archive 1st VOL File Seq# 2 bytes Binary
  2509. APPENDIX C - Zip64 Extensible Data Sector Mappings (EFS)
  2510. --------------------------------------------------------
  2511. -Z390 Extra Field:
  2512. The following is the general layout of the attributes for the
  2513. ZIP 64 "extra" block for extended tape operations. Portions of
  2514. this extended tape processing technology is covered under a
  2515. pending patent application. The use or implementation in a
  2516. product of certain technological aspects set forth in the
  2517. current APPNOTE, including those with regard to strong encryption,
  2518. patching or extended tape operations, requires a license from
  2519. PKWARE. Please contact PKWARE with regard to acquiring a license.
  2520. Note: some fields stored in Big Endian format. All text is
  2521. in EBCDIC format unless otherwise specified.
  2522. Value Size Description
  2523. ----- ---- -----------
  2524. (Z390) 0x0065 2 bytes Tag for this "extra" block type
  2525. Size 4 bytes Size for the following data block
  2526. Tag 4 bytes EBCDIC "Z390"
  2527. Length71 2 bytes Big Endian
  2528. Subcode71 2 bytes Enote type code
  2529. FMEPos 1 byte
  2530. Length72 2 bytes Big Endian
  2531. Subcode72 2 bytes Unit type code
  2532. Unit 1 byte Unit
  2533. Length73 2 bytes Big Endian
  2534. Subcode73 2 bytes Volume1 type code
  2535. FirstVol 1 byte Volume
  2536. Length74 2 bytes Big Endian
  2537. Subcode74 2 bytes FirstVol file sequence
  2538. FileSeq 2 bytes Sequence
  2539. APPENDIX D - Language Encoding (EFS)
  2540. ------------------------------------
  2541. The ZIP format has historically supported only the original IBM PC character
  2542. encoding set, commonly referred to as IBM Code Page 437. This limits storing
  2543. file name characters to only those within the original MS-DOS range of values
  2544. and does not properly support file names in other character encodings, or
  2545. languages. To address this limitation, this specification will support the
  2546. following change.
  2547. If general purpose bit 11 is unset, the file name and comment should conform
  2548. to the original ZIP character encoding. If general purpose bit 11 is set, the
  2549. filename and comment must support The Unicode Standard, Version 4.1.0 or
  2550. greater using the character encoding form defined by the UTF-8 storage
  2551. specification. The Unicode Standard is published by the The Unicode
  2552. Consortium (www.unicode.org). UTF-8 encoded data stored within ZIP files
  2553. is expected to not include a byte order mark (BOM).
  2554. Applications may choose to supplement this file name storage through the use
  2555. of the 0x0008 Extra Field. Storage for this optional field is currently
  2556. undefined, however it will be used to allow storing extended information
  2557. on source or target encoding that may further assist applications with file
  2558. name, or file content encoding tasks. Please contact PKWARE with any
  2559. requirements on how this field should be used.
  2560. The 0x0008 Extra Field storage may be used with either setting for general
  2561. purpose bit 11. Examples of the intended usage for this field is to store
  2562. whether "modified-UTF-8" (JAVA) is used, or UTF-8-MAC. Similarly, other
  2563. commonly used character encoding (code page) designations can be indicated
  2564. through this field. Formalized values for use of the 0x0008 record remain
  2565. undefined at this time. The definition for the layout of the 0x0008 field
  2566. will be published when available. Use of the 0x0008 Extra Field provides
  2567. for storing data within a ZIP file in an encoding other than IBM Code
  2568. Page 437 or UTF-8.
  2569. General purpose bit 11 will not imply any encoding of file content or
  2570. password. Values defining character encoding for file content or
  2571. password must be stored within the 0x0008 Extended Language Encoding
  2572. Extra Field.
  2573. Ed Gordon of the Info-ZIP group has defined a pair of "extra field" records
  2574. that can be used to store UTF-8 file name and file comment fields. These
  2575. records can be used for cases when the general purpose bit 11 method
  2576. for storing UTF-8 data in the standard file name and comment fields is
  2577. not desirable. A common case for this alternate method is if backward
  2578. compatibility with older programs is required.
  2579. Definitions for the record structure of these fields are included above
  2580. in the section on 3rd party mappings for "extra field" records. These
  2581. records are identified by Header ID's 0x6375 (Info-ZIP Unicode Comment
  2582. Extra Field) and 0x7075 (Info-ZIP Unicode Path Extra Field).
  2583. The choice of which storage method to use when writing a ZIP file is left
  2584. to the implementation. Developers should expect that a ZIP file may
  2585. contain either method and should provide support for reading data in
  2586. either format. Use of general purpose bit 11 reduces storage requirements
  2587. for file name data by not requiring additional "extra field" data for
  2588. each file, but can result in older ZIP programs not being able to extract
  2589. files. Use of the 0x6375 and 0x7075 records will result in a ZIP file
  2590. that should always be readable by older ZIP programs, but requires more
  2591. storage per file to write file name and/or file comment fields.