O_S_2f_2.py 27 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752
  1. from fontTools.misc import sstruct
  2. from fontTools.misc.roundTools import otRound
  3. from fontTools.misc.textTools import safeEval, num2binary, binary2num
  4. from fontTools.ttLib.tables import DefaultTable
  5. import bisect
  6. import logging
  7. log = logging.getLogger(__name__)
  8. # panose classification
  9. panoseFormat = """
  10. bFamilyType: B
  11. bSerifStyle: B
  12. bWeight: B
  13. bProportion: B
  14. bContrast: B
  15. bStrokeVariation: B
  16. bArmStyle: B
  17. bLetterForm: B
  18. bMidline: B
  19. bXHeight: B
  20. """
  21. class Panose(object):
  22. def __init__(self, **kwargs):
  23. _, names, _ = sstruct.getformat(panoseFormat)
  24. for name in names:
  25. setattr(self, name, kwargs.pop(name, 0))
  26. for k in kwargs:
  27. raise TypeError(f"Panose() got an unexpected keyword argument {k!r}")
  28. def toXML(self, writer, ttFont):
  29. formatstring, names, fixes = sstruct.getformat(panoseFormat)
  30. for name in names:
  31. writer.simpletag(name, value=getattr(self, name))
  32. writer.newline()
  33. def fromXML(self, name, attrs, content, ttFont):
  34. setattr(self, name, safeEval(attrs["value"]))
  35. # 'sfnt' OS/2 and Windows Metrics table - 'OS/2'
  36. OS2_format_0 = """
  37. > # big endian
  38. version: H # version
  39. xAvgCharWidth: h # average character width
  40. usWeightClass: H # degree of thickness of strokes
  41. usWidthClass: H # aspect ratio
  42. fsType: H # type flags
  43. ySubscriptXSize: h # subscript horizontal font size
  44. ySubscriptYSize: h # subscript vertical font size
  45. ySubscriptXOffset: h # subscript x offset
  46. ySubscriptYOffset: h # subscript y offset
  47. ySuperscriptXSize: h # superscript horizontal font size
  48. ySuperscriptYSize: h # superscript vertical font size
  49. ySuperscriptXOffset: h # superscript x offset
  50. ySuperscriptYOffset: h # superscript y offset
  51. yStrikeoutSize: h # strikeout size
  52. yStrikeoutPosition: h # strikeout position
  53. sFamilyClass: h # font family class and subclass
  54. panose: 10s # panose classification number
  55. ulUnicodeRange1: L # character range
  56. ulUnicodeRange2: L # character range
  57. ulUnicodeRange3: L # character range
  58. ulUnicodeRange4: L # character range
  59. achVendID: 4s # font vendor identification
  60. fsSelection: H # font selection flags
  61. usFirstCharIndex: H # first unicode character index
  62. usLastCharIndex: H # last unicode character index
  63. sTypoAscender: h # typographic ascender
  64. sTypoDescender: h # typographic descender
  65. sTypoLineGap: h # typographic line gap
  66. usWinAscent: H # Windows ascender
  67. usWinDescent: H # Windows descender
  68. """
  69. OS2_format_1_addition = """
  70. ulCodePageRange1: L
  71. ulCodePageRange2: L
  72. """
  73. OS2_format_2_addition = (
  74. OS2_format_1_addition
  75. + """
  76. sxHeight: h
  77. sCapHeight: h
  78. usDefaultChar: H
  79. usBreakChar: H
  80. usMaxContext: H
  81. """
  82. )
  83. OS2_format_5_addition = (
  84. OS2_format_2_addition
  85. + """
  86. usLowerOpticalPointSize: H
  87. usUpperOpticalPointSize: H
  88. """
  89. )
  90. bigendian = " > # big endian\n"
  91. OS2_format_1 = OS2_format_0 + OS2_format_1_addition
  92. OS2_format_2 = OS2_format_0 + OS2_format_2_addition
  93. OS2_format_5 = OS2_format_0 + OS2_format_5_addition
  94. OS2_format_1_addition = bigendian + OS2_format_1_addition
  95. OS2_format_2_addition = bigendian + OS2_format_2_addition
  96. OS2_format_5_addition = bigendian + OS2_format_5_addition
  97. class table_O_S_2f_2(DefaultTable.DefaultTable):
  98. """OS/2 and Windows Metrics table
  99. The ``OS/2`` table contains a variety of font-wide metrics and
  100. parameters that may be useful to an operating system or other
  101. software for system-integration purposes.
  102. See also https://learn.microsoft.com/en-us/typography/opentype/spec/os2
  103. """
  104. dependencies = ["head"]
  105. def decompile(self, data, ttFont):
  106. dummy, data = sstruct.unpack2(OS2_format_0, data, self)
  107. if self.version == 1:
  108. dummy, data = sstruct.unpack2(OS2_format_1_addition, data, self)
  109. elif self.version in (2, 3, 4):
  110. dummy, data = sstruct.unpack2(OS2_format_2_addition, data, self)
  111. elif self.version == 5:
  112. dummy, data = sstruct.unpack2(OS2_format_5_addition, data, self)
  113. self.usLowerOpticalPointSize /= 20
  114. self.usUpperOpticalPointSize /= 20
  115. elif self.version != 0:
  116. from fontTools import ttLib
  117. raise ttLib.TTLibError(
  118. "unknown format for OS/2 table: version %s" % self.version
  119. )
  120. if len(data):
  121. log.warning("too much 'OS/2' table data")
  122. self.panose = sstruct.unpack(panoseFormat, self.panose, Panose())
  123. def compile(self, ttFont):
  124. self.updateFirstAndLastCharIndex(ttFont)
  125. panose = self.panose
  126. head = ttFont["head"]
  127. if (self.fsSelection & 1) and not (head.macStyle & 1 << 1):
  128. log.warning(
  129. "fsSelection bit 0 (italic) and "
  130. "head table macStyle bit 1 (italic) should match"
  131. )
  132. if (self.fsSelection & 1 << 5) and not (head.macStyle & 1):
  133. log.warning(
  134. "fsSelection bit 5 (bold) and "
  135. "head table macStyle bit 0 (bold) should match"
  136. )
  137. if (self.fsSelection & 1 << 6) and (self.fsSelection & 1 + (1 << 5)):
  138. log.warning(
  139. "fsSelection bit 6 (regular) is set, "
  140. "bits 0 (italic) and 5 (bold) must be clear"
  141. )
  142. if self.version < 4 and self.fsSelection & 0b1110000000:
  143. log.warning(
  144. "fsSelection bits 7, 8 and 9 are only defined in "
  145. "OS/2 table version 4 and up: version %s",
  146. self.version,
  147. )
  148. self.panose = sstruct.pack(panoseFormat, self.panose)
  149. if self.version == 0:
  150. data = sstruct.pack(OS2_format_0, self)
  151. elif self.version == 1:
  152. data = sstruct.pack(OS2_format_1, self)
  153. elif self.version in (2, 3, 4):
  154. data = sstruct.pack(OS2_format_2, self)
  155. elif self.version == 5:
  156. d = self.__dict__.copy()
  157. d["usLowerOpticalPointSize"] = round(self.usLowerOpticalPointSize * 20)
  158. d["usUpperOpticalPointSize"] = round(self.usUpperOpticalPointSize * 20)
  159. data = sstruct.pack(OS2_format_5, d)
  160. else:
  161. from fontTools import ttLib
  162. raise ttLib.TTLibError(
  163. "unknown format for OS/2 table: version %s" % self.version
  164. )
  165. self.panose = panose
  166. return data
  167. def toXML(self, writer, ttFont):
  168. writer.comment(
  169. "The fields 'usFirstCharIndex' and 'usLastCharIndex'\n"
  170. "will be recalculated by the compiler"
  171. )
  172. writer.newline()
  173. if self.version == 1:
  174. format = OS2_format_1
  175. elif self.version in (2, 3, 4):
  176. format = OS2_format_2
  177. elif self.version == 5:
  178. format = OS2_format_5
  179. else:
  180. format = OS2_format_0
  181. formatstring, names, fixes = sstruct.getformat(format)
  182. for name in names:
  183. value = getattr(self, name)
  184. if name == "panose":
  185. writer.begintag("panose")
  186. writer.newline()
  187. value.toXML(writer, ttFont)
  188. writer.endtag("panose")
  189. elif name in (
  190. "ulUnicodeRange1",
  191. "ulUnicodeRange2",
  192. "ulUnicodeRange3",
  193. "ulUnicodeRange4",
  194. "ulCodePageRange1",
  195. "ulCodePageRange2",
  196. ):
  197. writer.simpletag(name, value=num2binary(value))
  198. elif name in ("fsType", "fsSelection"):
  199. writer.simpletag(name, value=num2binary(value, 16))
  200. elif name == "achVendID":
  201. writer.simpletag(name, value=repr(value)[1:-1])
  202. else:
  203. writer.simpletag(name, value=value)
  204. writer.newline()
  205. def fromXML(self, name, attrs, content, ttFont):
  206. if name == "panose":
  207. self.panose = panose = Panose()
  208. for element in content:
  209. if isinstance(element, tuple):
  210. name, attrs, content = element
  211. panose.fromXML(name, attrs, content, ttFont)
  212. elif name in (
  213. "ulUnicodeRange1",
  214. "ulUnicodeRange2",
  215. "ulUnicodeRange3",
  216. "ulUnicodeRange4",
  217. "ulCodePageRange1",
  218. "ulCodePageRange2",
  219. "fsType",
  220. "fsSelection",
  221. ):
  222. setattr(self, name, binary2num(attrs["value"]))
  223. elif name == "achVendID":
  224. setattr(self, name, safeEval("'''" + attrs["value"] + "'''"))
  225. else:
  226. setattr(self, name, safeEval(attrs["value"]))
  227. def updateFirstAndLastCharIndex(self, ttFont):
  228. if "cmap" not in ttFont:
  229. return
  230. codes = set()
  231. for table in getattr(ttFont["cmap"], "tables", []):
  232. if table.isUnicode():
  233. codes.update(table.cmap.keys())
  234. if codes:
  235. minCode = min(codes)
  236. maxCode = max(codes)
  237. # USHORT cannot hold codepoints greater than 0xFFFF
  238. self.usFirstCharIndex = min(0xFFFF, minCode)
  239. self.usLastCharIndex = min(0xFFFF, maxCode)
  240. # misspelled attributes kept for legacy reasons
  241. @property
  242. def usMaxContex(self):
  243. return self.usMaxContext
  244. @usMaxContex.setter
  245. def usMaxContex(self, value):
  246. self.usMaxContext = value
  247. @property
  248. def fsFirstCharIndex(self):
  249. return self.usFirstCharIndex
  250. @fsFirstCharIndex.setter
  251. def fsFirstCharIndex(self, value):
  252. self.usFirstCharIndex = value
  253. @property
  254. def fsLastCharIndex(self):
  255. return self.usLastCharIndex
  256. @fsLastCharIndex.setter
  257. def fsLastCharIndex(self, value):
  258. self.usLastCharIndex = value
  259. def getUnicodeRanges(self):
  260. """Return the set of 'ulUnicodeRange*' bits currently enabled."""
  261. bits = set()
  262. ul1, ul2 = self.ulUnicodeRange1, self.ulUnicodeRange2
  263. ul3, ul4 = self.ulUnicodeRange3, self.ulUnicodeRange4
  264. for i in range(32):
  265. if ul1 & (1 << i):
  266. bits.add(i)
  267. if ul2 & (1 << i):
  268. bits.add(i + 32)
  269. if ul3 & (1 << i):
  270. bits.add(i + 64)
  271. if ul4 & (1 << i):
  272. bits.add(i + 96)
  273. return bits
  274. def setUnicodeRanges(self, bits):
  275. """Set the 'ulUnicodeRange*' fields to the specified 'bits'."""
  276. ul1, ul2, ul3, ul4 = 0, 0, 0, 0
  277. for bit in bits:
  278. if 0 <= bit < 32:
  279. ul1 |= 1 << bit
  280. elif 32 <= bit < 64:
  281. ul2 |= 1 << (bit - 32)
  282. elif 64 <= bit < 96:
  283. ul3 |= 1 << (bit - 64)
  284. elif 96 <= bit < 123:
  285. ul4 |= 1 << (bit - 96)
  286. else:
  287. raise ValueError("expected 0 <= int <= 122, found: %r" % bit)
  288. self.ulUnicodeRange1, self.ulUnicodeRange2 = ul1, ul2
  289. self.ulUnicodeRange3, self.ulUnicodeRange4 = ul3, ul4
  290. def recalcUnicodeRanges(self, ttFont, pruneOnly=False):
  291. """Intersect the codepoints in the font's Unicode cmap subtables with
  292. the Unicode block ranges defined in the OpenType specification (v1.7),
  293. and set the respective 'ulUnicodeRange*' bits if there is at least ONE
  294. intersection.
  295. If 'pruneOnly' is True, only clear unused bits with NO intersection.
  296. """
  297. unicodes = set()
  298. for table in ttFont["cmap"].tables:
  299. if table.isUnicode():
  300. unicodes.update(table.cmap.keys())
  301. if pruneOnly:
  302. empty = intersectUnicodeRanges(unicodes, inverse=True)
  303. bits = self.getUnicodeRanges() - empty
  304. else:
  305. bits = intersectUnicodeRanges(unicodes)
  306. self.setUnicodeRanges(bits)
  307. return bits
  308. def getCodePageRanges(self):
  309. """Return the set of 'ulCodePageRange*' bits currently enabled."""
  310. bits = set()
  311. if self.version < 1:
  312. return bits
  313. ul1, ul2 = self.ulCodePageRange1, self.ulCodePageRange2
  314. for i in range(32):
  315. if ul1 & (1 << i):
  316. bits.add(i)
  317. if ul2 & (1 << i):
  318. bits.add(i + 32)
  319. return bits
  320. def setCodePageRanges(self, bits):
  321. """Set the 'ulCodePageRange*' fields to the specified 'bits'."""
  322. ul1, ul2 = 0, 0
  323. for bit in bits:
  324. if 0 <= bit < 32:
  325. ul1 |= 1 << bit
  326. elif 32 <= bit < 64:
  327. ul2 |= 1 << (bit - 32)
  328. else:
  329. raise ValueError(f"expected 0 <= int <= 63, found: {bit:r}")
  330. if self.version < 1:
  331. self.version = 1
  332. self.ulCodePageRange1, self.ulCodePageRange2 = ul1, ul2
  333. def recalcCodePageRanges(self, ttFont, pruneOnly=False):
  334. unicodes = set()
  335. for table in ttFont["cmap"].tables:
  336. if table.isUnicode():
  337. unicodes.update(table.cmap.keys())
  338. bits = calcCodePageRanges(unicodes)
  339. if pruneOnly:
  340. bits &= self.getCodePageRanges()
  341. # when no codepage ranges can be enabled, fall back to enabling bit 0
  342. # (Latin 1) so that the font works in MS Word:
  343. # https://github.com/googlei18n/fontmake/issues/468
  344. if not bits:
  345. bits = {0}
  346. self.setCodePageRanges(bits)
  347. return bits
  348. def recalcAvgCharWidth(self, ttFont):
  349. """Recalculate xAvgCharWidth using metrics from ttFont's 'hmtx' table.
  350. Set it to 0 if the unlikely event 'hmtx' table is not found.
  351. """
  352. avg_width = 0
  353. hmtx = ttFont.get("hmtx")
  354. if hmtx is not None:
  355. widths = [width for width, _ in hmtx.metrics.values() if width > 0]
  356. if widths:
  357. avg_width = otRound(sum(widths) / len(widths))
  358. self.xAvgCharWidth = avg_width
  359. return avg_width
  360. # Unicode ranges data from the OpenType OS/2 table specification v1.7
  361. OS2_UNICODE_RANGES = (
  362. (("Basic Latin", (0x0000, 0x007F)),),
  363. (("Latin-1 Supplement", (0x0080, 0x00FF)),),
  364. (("Latin Extended-A", (0x0100, 0x017F)),),
  365. (("Latin Extended-B", (0x0180, 0x024F)),),
  366. (
  367. ("IPA Extensions", (0x0250, 0x02AF)),
  368. ("Phonetic Extensions", (0x1D00, 0x1D7F)),
  369. ("Phonetic Extensions Supplement", (0x1D80, 0x1DBF)),
  370. ),
  371. (
  372. ("Spacing Modifier Letters", (0x02B0, 0x02FF)),
  373. ("Modifier Tone Letters", (0xA700, 0xA71F)),
  374. ),
  375. (
  376. ("Combining Diacritical Marks", (0x0300, 0x036F)),
  377. ("Combining Diacritical Marks Supplement", (0x1DC0, 0x1DFF)),
  378. ),
  379. (("Greek and Coptic", (0x0370, 0x03FF)),),
  380. (("Coptic", (0x2C80, 0x2CFF)),),
  381. (
  382. ("Cyrillic", (0x0400, 0x04FF)),
  383. ("Cyrillic Supplement", (0x0500, 0x052F)),
  384. ("Cyrillic Extended-A", (0x2DE0, 0x2DFF)),
  385. ("Cyrillic Extended-B", (0xA640, 0xA69F)),
  386. ),
  387. (("Armenian", (0x0530, 0x058F)),),
  388. (("Hebrew", (0x0590, 0x05FF)),),
  389. (("Vai", (0xA500, 0xA63F)),),
  390. (("Arabic", (0x0600, 0x06FF)), ("Arabic Supplement", (0x0750, 0x077F))),
  391. (("NKo", (0x07C0, 0x07FF)),),
  392. (("Devanagari", (0x0900, 0x097F)),),
  393. (("Bengali", (0x0980, 0x09FF)),),
  394. (("Gurmukhi", (0x0A00, 0x0A7F)),),
  395. (("Gujarati", (0x0A80, 0x0AFF)),),
  396. (("Oriya", (0x0B00, 0x0B7F)),),
  397. (("Tamil", (0x0B80, 0x0BFF)),),
  398. (("Telugu", (0x0C00, 0x0C7F)),),
  399. (("Kannada", (0x0C80, 0x0CFF)),),
  400. (("Malayalam", (0x0D00, 0x0D7F)),),
  401. (("Thai", (0x0E00, 0x0E7F)),),
  402. (("Lao", (0x0E80, 0x0EFF)),),
  403. (("Georgian", (0x10A0, 0x10FF)), ("Georgian Supplement", (0x2D00, 0x2D2F))),
  404. (("Balinese", (0x1B00, 0x1B7F)),),
  405. (("Hangul Jamo", (0x1100, 0x11FF)),),
  406. (
  407. ("Latin Extended Additional", (0x1E00, 0x1EFF)),
  408. ("Latin Extended-C", (0x2C60, 0x2C7F)),
  409. ("Latin Extended-D", (0xA720, 0xA7FF)),
  410. ),
  411. (("Greek Extended", (0x1F00, 0x1FFF)),),
  412. (
  413. ("General Punctuation", (0x2000, 0x206F)),
  414. ("Supplemental Punctuation", (0x2E00, 0x2E7F)),
  415. ),
  416. (("Superscripts And Subscripts", (0x2070, 0x209F)),),
  417. (("Currency Symbols", (0x20A0, 0x20CF)),),
  418. (("Combining Diacritical Marks For Symbols", (0x20D0, 0x20FF)),),
  419. (("Letterlike Symbols", (0x2100, 0x214F)),),
  420. (("Number Forms", (0x2150, 0x218F)),),
  421. (
  422. ("Arrows", (0x2190, 0x21FF)),
  423. ("Supplemental Arrows-A", (0x27F0, 0x27FF)),
  424. ("Supplemental Arrows-B", (0x2900, 0x297F)),
  425. ("Miscellaneous Symbols and Arrows", (0x2B00, 0x2BFF)),
  426. ),
  427. (
  428. ("Mathematical Operators", (0x2200, 0x22FF)),
  429. ("Supplemental Mathematical Operators", (0x2A00, 0x2AFF)),
  430. ("Miscellaneous Mathematical Symbols-A", (0x27C0, 0x27EF)),
  431. ("Miscellaneous Mathematical Symbols-B", (0x2980, 0x29FF)),
  432. ),
  433. (("Miscellaneous Technical", (0x2300, 0x23FF)),),
  434. (("Control Pictures", (0x2400, 0x243F)),),
  435. (("Optical Character Recognition", (0x2440, 0x245F)),),
  436. (("Enclosed Alphanumerics", (0x2460, 0x24FF)),),
  437. (("Box Drawing", (0x2500, 0x257F)),),
  438. (("Block Elements", (0x2580, 0x259F)),),
  439. (("Geometric Shapes", (0x25A0, 0x25FF)),),
  440. (("Miscellaneous Symbols", (0x2600, 0x26FF)),),
  441. (("Dingbats", (0x2700, 0x27BF)),),
  442. (("CJK Symbols And Punctuation", (0x3000, 0x303F)),),
  443. (("Hiragana", (0x3040, 0x309F)),),
  444. (
  445. ("Katakana", (0x30A0, 0x30FF)),
  446. ("Katakana Phonetic Extensions", (0x31F0, 0x31FF)),
  447. ),
  448. (("Bopomofo", (0x3100, 0x312F)), ("Bopomofo Extended", (0x31A0, 0x31BF))),
  449. (("Hangul Compatibility Jamo", (0x3130, 0x318F)),),
  450. (("Phags-pa", (0xA840, 0xA87F)),),
  451. (("Enclosed CJK Letters And Months", (0x3200, 0x32FF)),),
  452. (("CJK Compatibility", (0x3300, 0x33FF)),),
  453. (("Hangul Syllables", (0xAC00, 0xD7AF)),),
  454. (("Non-Plane 0 *", (0xD800, 0xDFFF)),),
  455. (("Phoenician", (0x10900, 0x1091F)),),
  456. (
  457. ("CJK Unified Ideographs", (0x4E00, 0x9FFF)),
  458. ("CJK Radicals Supplement", (0x2E80, 0x2EFF)),
  459. ("Kangxi Radicals", (0x2F00, 0x2FDF)),
  460. ("Ideographic Description Characters", (0x2FF0, 0x2FFF)),
  461. ("CJK Unified Ideographs Extension A", (0x3400, 0x4DBF)),
  462. ("CJK Unified Ideographs Extension B", (0x20000, 0x2A6DF)),
  463. ("Kanbun", (0x3190, 0x319F)),
  464. ),
  465. (("Private Use Area (plane 0)", (0xE000, 0xF8FF)),),
  466. (
  467. ("CJK Strokes", (0x31C0, 0x31EF)),
  468. ("CJK Compatibility Ideographs", (0xF900, 0xFAFF)),
  469. ("CJK Compatibility Ideographs Supplement", (0x2F800, 0x2FA1F)),
  470. ),
  471. (("Alphabetic Presentation Forms", (0xFB00, 0xFB4F)),),
  472. (("Arabic Presentation Forms-A", (0xFB50, 0xFDFF)),),
  473. (("Combining Half Marks", (0xFE20, 0xFE2F)),),
  474. (
  475. ("Vertical Forms", (0xFE10, 0xFE1F)),
  476. ("CJK Compatibility Forms", (0xFE30, 0xFE4F)),
  477. ),
  478. (("Small Form Variants", (0xFE50, 0xFE6F)),),
  479. (("Arabic Presentation Forms-B", (0xFE70, 0xFEFF)),),
  480. (("Halfwidth And Fullwidth Forms", (0xFF00, 0xFFEF)),),
  481. (("Specials", (0xFFF0, 0xFFFF)),),
  482. (("Tibetan", (0x0F00, 0x0FFF)),),
  483. (("Syriac", (0x0700, 0x074F)),),
  484. (("Thaana", (0x0780, 0x07BF)),),
  485. (("Sinhala", (0x0D80, 0x0DFF)),),
  486. (("Myanmar", (0x1000, 0x109F)),),
  487. (
  488. ("Ethiopic", (0x1200, 0x137F)),
  489. ("Ethiopic Supplement", (0x1380, 0x139F)),
  490. ("Ethiopic Extended", (0x2D80, 0x2DDF)),
  491. ),
  492. (("Cherokee", (0x13A0, 0x13FF)),),
  493. (("Unified Canadian Aboriginal Syllabics", (0x1400, 0x167F)),),
  494. (("Ogham", (0x1680, 0x169F)),),
  495. (("Runic", (0x16A0, 0x16FF)),),
  496. (("Khmer", (0x1780, 0x17FF)), ("Khmer Symbols", (0x19E0, 0x19FF))),
  497. (("Mongolian", (0x1800, 0x18AF)),),
  498. (("Braille Patterns", (0x2800, 0x28FF)),),
  499. (("Yi Syllables", (0xA000, 0xA48F)), ("Yi Radicals", (0xA490, 0xA4CF))),
  500. (
  501. ("Tagalog", (0x1700, 0x171F)),
  502. ("Hanunoo", (0x1720, 0x173F)),
  503. ("Buhid", (0x1740, 0x175F)),
  504. ("Tagbanwa", (0x1760, 0x177F)),
  505. ),
  506. (("Old Italic", (0x10300, 0x1032F)),),
  507. (("Gothic", (0x10330, 0x1034F)),),
  508. (("Deseret", (0x10400, 0x1044F)),),
  509. (
  510. ("Byzantine Musical Symbols", (0x1D000, 0x1D0FF)),
  511. ("Musical Symbols", (0x1D100, 0x1D1FF)),
  512. ("Ancient Greek Musical Notation", (0x1D200, 0x1D24F)),
  513. ),
  514. (("Mathematical Alphanumeric Symbols", (0x1D400, 0x1D7FF)),),
  515. (
  516. ("Private Use (plane 15)", (0xF0000, 0xFFFFD)),
  517. ("Private Use (plane 16)", (0x100000, 0x10FFFD)),
  518. ),
  519. (
  520. ("Variation Selectors", (0xFE00, 0xFE0F)),
  521. ("Variation Selectors Supplement", (0xE0100, 0xE01EF)),
  522. ),
  523. (("Tags", (0xE0000, 0xE007F)),),
  524. (("Limbu", (0x1900, 0x194F)),),
  525. (("Tai Le", (0x1950, 0x197F)),),
  526. (("New Tai Lue", (0x1980, 0x19DF)),),
  527. (("Buginese", (0x1A00, 0x1A1F)),),
  528. (("Glagolitic", (0x2C00, 0x2C5F)),),
  529. (("Tifinagh", (0x2D30, 0x2D7F)),),
  530. (("Yijing Hexagram Symbols", (0x4DC0, 0x4DFF)),),
  531. (("Syloti Nagri", (0xA800, 0xA82F)),),
  532. (
  533. ("Linear B Syllabary", (0x10000, 0x1007F)),
  534. ("Linear B Ideograms", (0x10080, 0x100FF)),
  535. ("Aegean Numbers", (0x10100, 0x1013F)),
  536. ),
  537. (("Ancient Greek Numbers", (0x10140, 0x1018F)),),
  538. (("Ugaritic", (0x10380, 0x1039F)),),
  539. (("Old Persian", (0x103A0, 0x103DF)),),
  540. (("Shavian", (0x10450, 0x1047F)),),
  541. (("Osmanya", (0x10480, 0x104AF)),),
  542. (("Cypriot Syllabary", (0x10800, 0x1083F)),),
  543. (("Kharoshthi", (0x10A00, 0x10A5F)),),
  544. (("Tai Xuan Jing Symbols", (0x1D300, 0x1D35F)),),
  545. (
  546. ("Cuneiform", (0x12000, 0x123FF)),
  547. ("Cuneiform Numbers and Punctuation", (0x12400, 0x1247F)),
  548. ),
  549. (("Counting Rod Numerals", (0x1D360, 0x1D37F)),),
  550. (("Sundanese", (0x1B80, 0x1BBF)),),
  551. (("Lepcha", (0x1C00, 0x1C4F)),),
  552. (("Ol Chiki", (0x1C50, 0x1C7F)),),
  553. (("Saurashtra", (0xA880, 0xA8DF)),),
  554. (("Kayah Li", (0xA900, 0xA92F)),),
  555. (("Rejang", (0xA930, 0xA95F)),),
  556. (("Cham", (0xAA00, 0xAA5F)),),
  557. (("Ancient Symbols", (0x10190, 0x101CF)),),
  558. (("Phaistos Disc", (0x101D0, 0x101FF)),),
  559. (
  560. ("Carian", (0x102A0, 0x102DF)),
  561. ("Lycian", (0x10280, 0x1029F)),
  562. ("Lydian", (0x10920, 0x1093F)),
  563. ),
  564. (("Domino Tiles", (0x1F030, 0x1F09F)), ("Mahjong Tiles", (0x1F000, 0x1F02F))),
  565. )
  566. _unicodeStarts = []
  567. _unicodeValues = [None]
  568. def _getUnicodeRanges():
  569. # build the ranges of codepoints for each unicode range bit, and cache result
  570. if not _unicodeStarts:
  571. unicodeRanges = [
  572. (start, (stop, bit))
  573. for bit, blocks in enumerate(OS2_UNICODE_RANGES)
  574. for _, (start, stop) in blocks
  575. ]
  576. for start, (stop, bit) in sorted(unicodeRanges):
  577. _unicodeStarts.append(start)
  578. _unicodeValues.append((stop, bit))
  579. return _unicodeStarts, _unicodeValues
  580. def intersectUnicodeRanges(unicodes, inverse=False):
  581. """Intersect a sequence of (int) Unicode codepoints with the Unicode block
  582. ranges defined in the OpenType specification v1.7, and return the set of
  583. 'ulUnicodeRanges' bits for which there is at least ONE intersection.
  584. If 'inverse' is True, return the the bits for which there is NO intersection.
  585. >>> intersectUnicodeRanges([0x0410]) == {9}
  586. True
  587. >>> intersectUnicodeRanges([0x0410, 0x1F000]) == {9, 57, 122}
  588. True
  589. >>> intersectUnicodeRanges([0x0410, 0x1F000], inverse=True) == (
  590. ... set(range(len(OS2_UNICODE_RANGES))) - {9, 57, 122})
  591. True
  592. """
  593. unicodes = set(unicodes)
  594. unicodestarts, unicodevalues = _getUnicodeRanges()
  595. bits = set()
  596. for code in unicodes:
  597. stop, bit = unicodevalues[bisect.bisect(unicodestarts, code)]
  598. if code <= stop:
  599. bits.add(bit)
  600. # The spec says that bit 57 ("Non Plane 0") implies that there's
  601. # at least one codepoint beyond the BMP; so I also include all
  602. # the non-BMP codepoints here
  603. if any(0x10000 <= code < 0x110000 for code in unicodes):
  604. bits.add(57)
  605. return set(range(len(OS2_UNICODE_RANGES))) - bits if inverse else bits
  606. def calcCodePageRanges(unicodes):
  607. """Given a set of Unicode codepoints (integers), calculate the
  608. corresponding OS/2 CodePage range bits.
  609. This is a direct translation of FontForge implementation:
  610. https://github.com/fontforge/fontforge/blob/7b2c074/fontforge/tottf.c#L3158
  611. """
  612. bits = set()
  613. hasAscii = set(range(0x20, 0x7E)).issubset(unicodes)
  614. hasLineart = ord("┤") in unicodes
  615. for uni in unicodes:
  616. if uni == ord("Þ") and hasAscii:
  617. bits.add(0) # Latin 1
  618. elif uni == ord("Ľ") and hasAscii:
  619. bits.add(1) # Latin 2: Eastern Europe
  620. if hasLineart:
  621. bits.add(58) # Latin 2
  622. elif uni == ord("Б"):
  623. bits.add(2) # Cyrillic
  624. if ord("Ѕ") in unicodes and hasLineart:
  625. bits.add(57) # IBM Cyrillic
  626. if ord("╜") in unicodes and hasLineart:
  627. bits.add(49) # MS-DOS Russian
  628. elif uni == ord("Ά"):
  629. bits.add(3) # Greek
  630. if hasLineart and ord("½") in unicodes:
  631. bits.add(48) # IBM Greek
  632. if hasLineart and ord("√") in unicodes:
  633. bits.add(60) # Greek, former 437 G
  634. elif uni == ord("İ") and hasAscii:
  635. bits.add(4) # Turkish
  636. if hasLineart:
  637. bits.add(56) # IBM turkish
  638. elif uni == ord("א"):
  639. bits.add(5) # Hebrew
  640. if hasLineart and ord("√") in unicodes:
  641. bits.add(53) # Hebrew
  642. elif uni == ord("ر"):
  643. bits.add(6) # Arabic
  644. if ord("√") in unicodes:
  645. bits.add(51) # Arabic
  646. if hasLineart:
  647. bits.add(61) # Arabic; ASMO 708
  648. elif uni == ord("ŗ") and hasAscii:
  649. bits.add(7) # Windows Baltic
  650. if hasLineart:
  651. bits.add(59) # MS-DOS Baltic
  652. elif uni == ord("₫") and hasAscii:
  653. bits.add(8) # Vietnamese
  654. elif uni == ord("ๅ"):
  655. bits.add(16) # Thai
  656. elif uni == ord("エ"):
  657. bits.add(17) # JIS/Japan
  658. elif uni == ord("ㄅ"):
  659. bits.add(18) # Chinese: Simplified
  660. elif uni == ord("ㄱ"):
  661. bits.add(19) # Korean wansung
  662. elif uni == ord("央"):
  663. bits.add(20) # Chinese: Traditional
  664. elif uni == ord("곴"):
  665. bits.add(21) # Korean Johab
  666. elif uni == ord("♥") and hasAscii:
  667. bits.add(30) # OEM Character Set
  668. # TODO: Symbol bit has a special meaning (check the spec), we need
  669. # to confirm if this is wanted by default.
  670. # elif chr(0xF000) <= char <= chr(0xF0FF):
  671. # codepageRanges.add(31) # Symbol Character Set
  672. elif uni == ord("þ") and hasAscii and hasLineart:
  673. bits.add(54) # MS-DOS Icelandic
  674. elif uni == ord("╚") and hasAscii:
  675. bits.add(62) # WE/Latin 1
  676. bits.add(63) # US
  677. elif hasAscii and hasLineart and ord("√") in unicodes:
  678. if uni == ord("Å"):
  679. bits.add(50) # MS-DOS Nordic
  680. elif uni == ord("é"):
  681. bits.add(52) # MS-DOS Canadian French
  682. elif uni == ord("õ"):
  683. bits.add(55) # MS-DOS Portuguese
  684. if hasAscii and ord("‰") in unicodes and ord("∑") in unicodes:
  685. bits.add(29) # Macintosh Character Set (US Roman)
  686. return bits
  687. if __name__ == "__main__":
  688. import doctest, sys
  689. sys.exit(doctest.testmod().failed)