__init__.py 8.2 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241
  1. import os
  2. import shutil
  3. from devtools.yamaker.arcpath import ArcPath
  4. from devtools.yamaker import pathutil
  5. from devtools.yamaker import fileutil
  6. from devtools.yamaker.modules import GLOBAL, Linkable, Switch
  7. from devtools.yamaker.project import CMakeNinjaNixProject
  8. def _iterate_yamakes_section(yamakes, section_name, handler):
  9. for prj, m in list(yamakes.items()):
  10. if not hasattr(m, section_name):
  11. continue
  12. section = getattr(m, section_name)
  13. handler(section)
  14. def _remove_yamakes_section_entries(yamakes, section_name, entries):
  15. def _remove_entry(section):
  16. for entry_name in entries:
  17. if entry_name not in section:
  18. continue
  19. section.remove(entry_name)
  20. _iterate_yamakes_section(yamakes, section_name, _remove_entry)
  21. def _replace_yamakes_section_entries(yamakes, section_name, from_to_entries):
  22. def _replace_entry(section):
  23. for from_entry, to_entry in from_to_entries:
  24. if from_entry not in section:
  25. continue
  26. section.remove(from_entry)
  27. section.add(to_entry)
  28. _iterate_yamakes_section(yamakes, section_name, _replace_entry)
  29. def post_build(self):
  30. fileutil.copy(f"{self.srcdir}/cpp/src/arrow/ipc/feather.fbs", f"{self.dstdir}/cpp/src/generated")
  31. fileutil.copy(f"{self.srcdir}/format/File.fbs", f"{self.dstdir}/cpp/src/generated")
  32. fileutil.copy(f"{self.srcdir}/format/Message.fbs", f"{self.dstdir}/cpp/src/generated")
  33. fileutil.copy(f"{self.srcdir}/format/Schema.fbs", f"{self.dstdir}/cpp/src/generated")
  34. fileutil.copy(f"{self.srcdir}/format/SparseTensor.fbs", f"{self.dstdir}/cpp/src/generated")
  35. fileutil.copy(f"{self.srcdir}/format/Tensor.fbs", f"{self.dstdir}/cpp/src/generated")
  36. def post_install(self):
  37. with self.yamakes["."] as arrow:
  38. arrow.CFLAGS = [
  39. GLOBAL("-DARROW_STATIC"),
  40. GLOBAL("-DPARQUET_STATIC"),
  41. ] + arrow.CFLAGS
  42. # Building python extension for arrow automatically enables HDFS dependency.
  43. # We will patch the source code to avoid this dependency.
  44. arrow.CFLAGS.remove("-DARROW_HDFS")
  45. arrow.CFLAGS.remove("-DHAVE_NETINET_IN_H")
  46. arrow.after(
  47. "CFLAGS",
  48. Switch({"NOT OS_WINDOWS": Linkable(CFLAGS=["-DHAVE_NETINET_IN_H"])}),
  49. )
  50. with self.yamakes["."] as arrow:
  51. arrow.SRCS |= set(
  52. [
  53. "cpp/src/generated/feather.fbs",
  54. "cpp/src/generated/File.fbs",
  55. "cpp/src/generated/Message.fbs",
  56. "cpp/src/generated/Schema.fbs",
  57. "cpp/src/generated/SparseTensor.fbs",
  58. "cpp/src/generated/Tensor.fbs",
  59. ]
  60. )
  61. arrow.ADDINCL += [
  62. ArcPath(self.arcdir + "/cpp/src", GLOBAL=True, build=True),
  63. ]
  64. arrow.FLATC_FLAGS.add("--scoped-enums")
  65. with self.yamakes["cpp/src/arrow/python"] as python:
  66. python.module = "PY3_LIBRARY"
  67. # Unvendor fast_float
  68. shutil.rmtree(f"{self.dstdir}/cpp/src/arrow/vendored/fast_float")
  69. with self.yamakes["."] as arrow:
  70. arrow.PEERDIR.add("contrib/restricted/fast_float")
  71. fileutil.re_sub_dir(
  72. self.dstdir,
  73. "arrow/vendored/fast_float",
  74. "contrib/restricted/fast_float/include/fast_float",
  75. test=pathutil.is_preprocessable,
  76. )
  77. fileutil.re_sub_dir(
  78. self.dstdir,
  79. "::arrow_vendored::fast_float::",
  80. "fast_float::",
  81. test=pathutil.is_preprocessable,
  82. )
  83. # Unvendor double-conversion
  84. fileutil.re_sub_dir(
  85. f"{self.dstdir}/cpp/src/arrow/util",
  86. "arrow/vendored/double-conversion",
  87. "contrib/libs/double-conversion/double-conversion",
  88. )
  89. with self.yamakes["."] as arrow:
  90. arrow.PEERDIR.add("contrib/libs/double-conversion")
  91. double_conversion_path = f"{self.dstdir}/cpp/src/arrow/vendored/double-conversion"
  92. double_conversion_files = fileutil.files(double_conversion_path, rel=self.dstdir)
  93. _remove_yamakes_section_entries(self.yamakes, "SRCS", double_conversion_files)
  94. shutil.rmtree(double_conversion_path)
  95. # Unvendor uriparser
  96. fileutil.re_sub_dir(
  97. f"{self.dstdir}/cpp/src/arrow/util",
  98. "arrow/vendored/uriparser",
  99. "contrib/restricted/uriparser/include/uriparser",
  100. )
  101. with self.yamakes["."] as arrow:
  102. arrow.PEERDIR.add("contrib/restricted/uriparser")
  103. uriparser_path = f"{self.dstdir}/cpp/src/arrow/vendored/uriparser"
  104. uriparser_files = fileutil.files(uriparser_path, rel=self.dstdir)
  105. _remove_yamakes_section_entries(self.yamakes, "SRCS", uriparser_files)
  106. shutil.rmtree(uriparser_path)
  107. # Unvendor xxhash
  108. fileutil.re_sub_dir(
  109. f"{self.dstdir}/cpp/src/arrow/util",
  110. "arrow/vendored/xxhash.h",
  111. "contrib/libs/xxhash/xxhash.h",
  112. )
  113. with self.yamakes["."] as arrow:
  114. arrow.PEERDIR.add("contrib/libs/xxhash")
  115. xxhash_path = f"{self.dstdir}/cpp/src/arrow/vendored/xxhash"
  116. # NOTE: There are no SRCS for xxhash, skipped removing from yamakes
  117. os.remove(f"{self.dstdir}/cpp/src/arrow/vendored/xxhash.h")
  118. shutil.rmtree(xxhash_path)
  119. os.remove(f"{self.dstdir}/cpp/src/arrow/vendored/optional.hpp")
  120. # Unbundle header-only usage of flatbuffers
  121. fb_include_original = f"{self.arcdir}/cpp/thirdparty/flatbuffers/include"
  122. fb_include_ours = "contrib/libs/flatbuffers/include"
  123. _replace_yamakes_section_entries(
  124. self.yamakes,
  125. "ADDINCL",
  126. [
  127. (fb_include_original, fb_include_ours),
  128. ],
  129. )
  130. shutil.rmtree(f"{self.dstdir}/cpp/thirdparty/flatbuffers")
  131. # Cleanup unused hdfs include files
  132. thirdparty_hadoop_include = f"{self.arcdir}/cpp/thirdparty/hadoop/include"
  133. _remove_yamakes_section_entries(self.yamakes, "ADDINCL", [thirdparty_hadoop_include])
  134. shutil.rmtree(f"{self.dstdir}/cpp/thirdparty/hadoop")
  135. # Cleanup unused hdfs io files
  136. hadoop_related_sources = [
  137. "cpp/src/arrow/filesystem/hdfs.cc",
  138. "cpp/src/arrow/io/hdfs.cc",
  139. "cpp/src/arrow/io/hdfs_internal.cc",
  140. ]
  141. hadoop_related_headers = [
  142. "cpp/src/arrow/filesystem/hdfs.h",
  143. "cpp/src/arrow/io/hdfs.h",
  144. "cpp/src/arrow/io/hdfs_internal.h",
  145. ]
  146. hadoop_related_files = hadoop_related_sources + hadoop_related_headers
  147. _remove_yamakes_section_entries(self.yamakes, "SRCS", hadoop_related_sources)
  148. for fname in hadoop_related_files:
  149. os.remove(f"{self.dstdir}/{fname}")
  150. # remove bundled apache_orc interface
  151. _remove_yamakes_section_entries(self.yamakes, "ADDINCL", [f"{self.arcdir}/orc_ep-install/include"])
  152. with self.yamakes["."] as arrow:
  153. arrow.ADDINCL.add("contrib/libs/apache/orc/c++/include")
  154. arrow.PEERDIR.add("contrib/libs/apache/orc")
  155. shutil.rmtree(f"{self.dstdir}/orc_ep-install")
  156. # Cleanup thirdparty (will fail if not empty)
  157. shutil.rmtree(f"{self.dstdir}/cpp/thirdparty")
  158. apache_arrow = CMakeNinjaNixProject(
  159. owners=["primorial", "g:cpp-contrib"],
  160. arcdir="contrib/libs/apache/arrow",
  161. nixattr="arrow-cpp",
  162. ignore_commands=["cmake"],
  163. install_targets={"arrow", "parquet", "arrow_python"},
  164. put_with={"arrow": ["parquet"]},
  165. put={"arrow": "."},
  166. addincl_global={".": {"./cpp/src", "./src"}},
  167. copy_sources=[
  168. "cpp/src/arrow/api.h",
  169. "cpp/src/arrow/io/api.h",
  170. "cpp/src/arrow/io/mman.h",
  171. "cpp/src/arrow/ipc/api.h",
  172. "cpp/src/arrow/compute/api.h",
  173. "cpp/src/arrow/csv/api.h",
  174. "cpp/src/arrow/vendored/datetime/ios.*",
  175. "cpp/src/arrow/python/api.h",
  176. "cpp/src/arrow/filesystem/api.h",
  177. "cpp/src/parquet/api/*.h",
  178. ],
  179. disable_includes=[
  180. "knownfolders.h",
  181. "boost/multiprecision/cpp_int.hpp",
  182. "boost/shared_ptr.hpp",
  183. "curl.h",
  184. "curl/curl.h",
  185. "mimalloc.h",
  186. "jemalloc_ep/dist/include/jemalloc/jemalloc.h",
  187. "glog/logging.h",
  188. "xsimd/xsimd.hpp",
  189. "arrow/filesystem/s3fs.h",
  190. "arrow/filesystem/hdfs.h",
  191. "arrow/io/hdfs.h",
  192. "arrow/io/hdfs_internal.h",
  193. "arrow/util/bpacking_avx2.h",
  194. "arrow/util/bpacking_avx512.h",
  195. "arrow/util/bpacking_neon.h",
  196. # if defined(__sun__)
  197. "sys/byteorder.h",
  198. ],
  199. post_build=post_build,
  200. post_install=post_install,
  201. )