sstruct.py 7.0 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231
  1. """sstruct.py -- SuperStruct
  2. Higher level layer on top of the struct module, enabling to
  3. bind names to struct elements. The interface is similar to
  4. struct, except the objects passed and returned are not tuples
  5. (or argument lists), but dictionaries or instances.
  6. Just like struct, we use fmt strings to describe a data
  7. structure, except we use one line per element. Lines are
  8. separated by newlines or semi-colons. Each line contains
  9. either one of the special struct characters ('@', '=', '<',
  10. '>' or '!') or a 'name:formatchar' combo (eg. 'myFloat:f').
  11. Repetitions, like the struct module offers them are not useful
  12. in this context, except for fixed length strings (eg. 'myInt:5h'
  13. is not allowed but 'myString:5s' is). The 'x' fmt character
  14. (pad byte) is treated as 'special', since it is by definition
  15. anonymous. Extra whitespace is allowed everywhere.
  16. The sstruct module offers one feature that the "normal" struct
  17. module doesn't: support for fixed point numbers. These are spelled
  18. as "n.mF", where n is the number of bits before the point, and m
  19. the number of bits after the point. Fixed point numbers get
  20. converted to floats.
  21. pack(fmt, object):
  22. 'object' is either a dictionary or an instance (or actually
  23. anything that has a __dict__ attribute). If it is a dictionary,
  24. its keys are used for names. If it is an instance, it's
  25. attributes are used to grab struct elements from. Returns
  26. a string containing the data.
  27. unpack(fmt, data, object=None)
  28. If 'object' is omitted (or None), a new dictionary will be
  29. returned. If 'object' is a dictionary, it will be used to add
  30. struct elements to. If it is an instance (or in fact anything
  31. that has a __dict__ attribute), an attribute will be added for
  32. each struct element. In the latter two cases, 'object' itself
  33. is returned.
  34. unpack2(fmt, data, object=None)
  35. Convenience function. Same as unpack, except data may be longer
  36. than needed. The returned value is a tuple: (object, leftoverdata).
  37. calcsize(fmt)
  38. like struct.calcsize(), but uses our own fmt strings:
  39. it returns the size of the data in bytes.
  40. """
  41. from fontTools.misc.fixedTools import fixedToFloat as fi2fl, floatToFixed as fl2fi
  42. from fontTools.misc.textTools import tobytes, tostr
  43. import struct
  44. import re
  45. __version__ = "1.2"
  46. __copyright__ = "Copyright 1998, Just van Rossum <just@letterror.com>"
  47. class Error(Exception):
  48. pass
  49. def pack(fmt, obj):
  50. formatstring, names, fixes = getformat(fmt, keep_pad_byte=True)
  51. elements = []
  52. if not isinstance(obj, dict):
  53. obj = obj.__dict__
  54. string_index = formatstring
  55. if formatstring.startswith(">"):
  56. string_index = formatstring[1:]
  57. for ix, name in enumerate(names.keys()):
  58. value = obj[name]
  59. if name in fixes:
  60. # fixed point conversion
  61. value = fl2fi(value, fixes[name])
  62. elif isinstance(value, str):
  63. value = tobytes(value)
  64. elements.append(value)
  65. # Check it fits
  66. try:
  67. struct.pack(names[name], value)
  68. except Exception as e:
  69. raise ValueError(
  70. "Value %s does not fit in format %s for %s" % (value, names[name], name)
  71. ) from e
  72. data = struct.pack(*(formatstring,) + tuple(elements))
  73. return data
  74. def unpack(fmt, data, obj=None):
  75. if obj is None:
  76. obj = {}
  77. data = tobytes(data)
  78. formatstring, names, fixes = getformat(fmt)
  79. if isinstance(obj, dict):
  80. d = obj
  81. else:
  82. d = obj.__dict__
  83. elements = struct.unpack(formatstring, data)
  84. for i in range(len(names)):
  85. name = list(names.keys())[i]
  86. value = elements[i]
  87. if name in fixes:
  88. # fixed point conversion
  89. value = fi2fl(value, fixes[name])
  90. elif isinstance(value, bytes):
  91. try:
  92. value = tostr(value)
  93. except UnicodeDecodeError:
  94. pass
  95. d[name] = value
  96. return obj
  97. def unpack2(fmt, data, obj=None):
  98. length = calcsize(fmt)
  99. return unpack(fmt, data[:length], obj), data[length:]
  100. def calcsize(fmt):
  101. formatstring, names, fixes = getformat(fmt)
  102. return struct.calcsize(formatstring)
  103. # matches "name:formatchar" (whitespace is allowed)
  104. _elementRE = re.compile(
  105. r"\s*" # whitespace
  106. r"([A-Za-z_][A-Za-z_0-9]*)" # name (python identifier)
  107. r"\s*:\s*" # whitespace : whitespace
  108. r"([xcbB?hHiIlLqQfd]|" # formatchar...
  109. r"[0-9]+[ps]|" # ...formatchar...
  110. r"([0-9]+)\.([0-9]+)(F))" # ...formatchar
  111. r"\s*" # whitespace
  112. r"(#.*)?$" # [comment] + end of string
  113. )
  114. # matches the special struct fmt chars and 'x' (pad byte)
  115. _extraRE = re.compile(r"\s*([x@=<>!])\s*(#.*)?$")
  116. # matches an "empty" string, possibly containing whitespace and/or a comment
  117. _emptyRE = re.compile(r"\s*(#.*)?$")
  118. _fixedpointmappings = {8: "b", 16: "h", 32: "l"}
  119. _formatcache = {}
  120. def getformat(fmt, keep_pad_byte=False):
  121. fmt = tostr(fmt, encoding="ascii")
  122. try:
  123. formatstring, names, fixes = _formatcache[fmt]
  124. except KeyError:
  125. lines = re.split("[\n;]", fmt)
  126. formatstring = ""
  127. names = {}
  128. fixes = {}
  129. for line in lines:
  130. if _emptyRE.match(line):
  131. continue
  132. m = _extraRE.match(line)
  133. if m:
  134. formatchar = m.group(1)
  135. if formatchar != "x" and formatstring:
  136. raise Error("a special fmt char must be first")
  137. else:
  138. m = _elementRE.match(line)
  139. if not m:
  140. raise Error("syntax error in fmt: '%s'" % line)
  141. name = m.group(1)
  142. formatchar = m.group(2)
  143. if keep_pad_byte or formatchar != "x":
  144. names[name] = formatchar
  145. if m.group(3):
  146. # fixed point
  147. before = int(m.group(3))
  148. after = int(m.group(4))
  149. bits = before + after
  150. if bits not in [8, 16, 32]:
  151. raise Error("fixed point must be 8, 16 or 32 bits long")
  152. formatchar = _fixedpointmappings[bits]
  153. names[name] = formatchar
  154. assert m.group(5) == "F"
  155. fixes[name] = after
  156. formatstring += formatchar
  157. _formatcache[fmt] = formatstring, names, fixes
  158. return formatstring, names, fixes
  159. def _test():
  160. fmt = """
  161. # comments are allowed
  162. > # big endian (see documentation for struct)
  163. # empty lines are allowed:
  164. ashort: h
  165. along: l
  166. abyte: b # a byte
  167. achar: c
  168. astr: 5s
  169. afloat: f; adouble: d # multiple "statements" are allowed
  170. afixed: 16.16F
  171. abool: ?
  172. apad: x
  173. """
  174. print("size:", calcsize(fmt))
  175. class foo(object):
  176. pass
  177. i = foo()
  178. i.ashort = 0x7FFF
  179. i.along = 0x7FFFFFFF
  180. i.abyte = 0x7F
  181. i.achar = "a"
  182. i.astr = "12345"
  183. i.afloat = 0.5
  184. i.adouble = 0.5
  185. i.afixed = 1.5
  186. i.abool = True
  187. data = pack(fmt, i)
  188. print("data:", repr(data))
  189. print(unpack(fmt, data))
  190. i2 = foo()
  191. unpack(fmt, data, i2)
  192. print(vars(i2))
  193. if __name__ == "__main__":
  194. _test()