123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241 |
- import os
- import shutil
- from devtools.yamaker.arcpath import ArcPath
- from devtools.yamaker import pathutil
- from devtools.yamaker import fileutil
- from devtools.yamaker.modules import GLOBAL, Linkable, Switch
- from devtools.yamaker.project import CMakeNinjaNixProject
- def _iterate_yamakes_section(yamakes, section_name, handler):
- for prj, m in list(yamakes.items()):
- if not hasattr(m, section_name):
- continue
- section = getattr(m, section_name)
- handler(section)
- def _remove_yamakes_section_entries(yamakes, section_name, entries):
- def _remove_entry(section):
- for entry_name in entries:
- if entry_name not in section:
- continue
- section.remove(entry_name)
- _iterate_yamakes_section(yamakes, section_name, _remove_entry)
- def _replace_yamakes_section_entries(yamakes, section_name, from_to_entries):
- def _replace_entry(section):
- for from_entry, to_entry in from_to_entries:
- if from_entry not in section:
- continue
- section.remove(from_entry)
- section.add(to_entry)
- _iterate_yamakes_section(yamakes, section_name, _replace_entry)
- def post_build(self):
- fileutil.copy(f"{self.srcdir}/cpp/src/arrow/ipc/feather.fbs", f"{self.dstdir}/cpp/src/generated")
- fileutil.copy(f"{self.srcdir}/format/File.fbs", f"{self.dstdir}/cpp/src/generated")
- fileutil.copy(f"{self.srcdir}/format/Message.fbs", f"{self.dstdir}/cpp/src/generated")
- fileutil.copy(f"{self.srcdir}/format/Schema.fbs", f"{self.dstdir}/cpp/src/generated")
- fileutil.copy(f"{self.srcdir}/format/SparseTensor.fbs", f"{self.dstdir}/cpp/src/generated")
- fileutil.copy(f"{self.srcdir}/format/Tensor.fbs", f"{self.dstdir}/cpp/src/generated")
- def post_install(self):
- with self.yamakes["."] as arrow:
- arrow.CFLAGS = [
- GLOBAL("-DARROW_STATIC"),
- GLOBAL("-DPARQUET_STATIC"),
- ] + arrow.CFLAGS
- # Building python extension for arrow automatically enables HDFS dependency.
- # We will patch the source code to avoid this dependency.
- arrow.CFLAGS.remove("-DARROW_HDFS")
- arrow.CFLAGS.remove("-DHAVE_NETINET_IN_H")
- arrow.after(
- "CFLAGS",
- Switch({"NOT OS_WINDOWS": Linkable(CFLAGS=["-DHAVE_NETINET_IN_H"])}),
- )
- with self.yamakes["."] as arrow:
- arrow.SRCS |= set(
- [
- "cpp/src/generated/feather.fbs",
- "cpp/src/generated/File.fbs",
- "cpp/src/generated/Message.fbs",
- "cpp/src/generated/Schema.fbs",
- "cpp/src/generated/SparseTensor.fbs",
- "cpp/src/generated/Tensor.fbs",
- ]
- )
- arrow.ADDINCL += [
- ArcPath(self.arcdir + "/cpp/src", GLOBAL=True, build=True),
- ]
- arrow.FLATC_FLAGS.add("--scoped-enums")
- with self.yamakes["cpp/src/arrow/python"] as python:
- python.module = "PY3_LIBRARY"
- # Unvendor fast_float
- shutil.rmtree(f"{self.dstdir}/cpp/src/arrow/vendored/fast_float")
- with self.yamakes["."] as arrow:
- arrow.PEERDIR.add("contrib/restricted/fast_float")
- fileutil.re_sub_dir(
- self.dstdir,
- "arrow/vendored/fast_float",
- "contrib/restricted/fast_float/include/fast_float",
- test=pathutil.is_preprocessable,
- )
- fileutil.re_sub_dir(
- self.dstdir,
- "::arrow_vendored::fast_float::",
- "fast_float::",
- test=pathutil.is_preprocessable,
- )
- # Unvendor double-conversion
- fileutil.re_sub_dir(
- f"{self.dstdir}/cpp/src/arrow/util",
- "arrow/vendored/double-conversion",
- "contrib/libs/double-conversion/double-conversion",
- )
- with self.yamakes["."] as arrow:
- arrow.PEERDIR.add("contrib/libs/double-conversion")
- double_conversion_path = f"{self.dstdir}/cpp/src/arrow/vendored/double-conversion"
- double_conversion_files = fileutil.files(double_conversion_path, rel=self.dstdir)
- _remove_yamakes_section_entries(self.yamakes, "SRCS", double_conversion_files)
- shutil.rmtree(double_conversion_path)
- # Unvendor uriparser
- fileutil.re_sub_dir(
- f"{self.dstdir}/cpp/src/arrow/util",
- "arrow/vendored/uriparser",
- "contrib/restricted/uriparser/include/uriparser",
- )
- with self.yamakes["."] as arrow:
- arrow.PEERDIR.add("contrib/restricted/uriparser")
- uriparser_path = f"{self.dstdir}/cpp/src/arrow/vendored/uriparser"
- uriparser_files = fileutil.files(uriparser_path, rel=self.dstdir)
- _remove_yamakes_section_entries(self.yamakes, "SRCS", uriparser_files)
- shutil.rmtree(uriparser_path)
- # Unvendor xxhash
- fileutil.re_sub_dir(
- f"{self.dstdir}/cpp/src/arrow/util",
- "arrow/vendored/xxhash.h",
- "contrib/libs/xxhash/xxhash.h",
- )
- with self.yamakes["."] as arrow:
- arrow.PEERDIR.add("contrib/libs/xxhash")
- xxhash_path = f"{self.dstdir}/cpp/src/arrow/vendored/xxhash"
- # NOTE: There are no SRCS for xxhash, skipped removing from yamakes
- os.remove(f"{self.dstdir}/cpp/src/arrow/vendored/xxhash.h")
- shutil.rmtree(xxhash_path)
- os.remove(f"{self.dstdir}/cpp/src/arrow/vendored/optional.hpp")
- # Unbundle header-only usage of flatbuffers
- fb_include_original = f"{self.arcdir}/cpp/thirdparty/flatbuffers/include"
- fb_include_ours = "contrib/libs/flatbuffers/include"
- _replace_yamakes_section_entries(
- self.yamakes,
- "ADDINCL",
- [
- (fb_include_original, fb_include_ours),
- ],
- )
- shutil.rmtree(f"{self.dstdir}/cpp/thirdparty/flatbuffers")
- # Cleanup unused hdfs include files
- thirdparty_hadoop_include = f"{self.arcdir}/cpp/thirdparty/hadoop/include"
- _remove_yamakes_section_entries(self.yamakes, "ADDINCL", [thirdparty_hadoop_include])
- shutil.rmtree(f"{self.dstdir}/cpp/thirdparty/hadoop")
- # Cleanup unused hdfs io files
- hadoop_related_sources = [
- "cpp/src/arrow/filesystem/hdfs.cc",
- "cpp/src/arrow/io/hdfs.cc",
- "cpp/src/arrow/io/hdfs_internal.cc",
- ]
- hadoop_related_headers = [
- "cpp/src/arrow/filesystem/hdfs.h",
- "cpp/src/arrow/io/hdfs.h",
- "cpp/src/arrow/io/hdfs_internal.h",
- ]
- hadoop_related_files = hadoop_related_sources + hadoop_related_headers
- _remove_yamakes_section_entries(self.yamakes, "SRCS", hadoop_related_sources)
- for fname in hadoop_related_files:
- os.remove(f"{self.dstdir}/{fname}")
- # remove bundled apache_orc interface
- _remove_yamakes_section_entries(self.yamakes, "ADDINCL", [f"{self.arcdir}/orc_ep-install/include"])
- with self.yamakes["."] as arrow:
- arrow.ADDINCL.add("contrib/libs/apache/orc/c++/include")
- arrow.PEERDIR.add("contrib/libs/apache/orc")
- shutil.rmtree(f"{self.dstdir}/orc_ep-install")
- # Cleanup thirdparty (will fail if not empty)
- shutil.rmtree(f"{self.dstdir}/cpp/thirdparty")
- apache_arrow = CMakeNinjaNixProject(
- owners=["primorial", "g:cpp-contrib"],
- arcdir="contrib/libs/apache/arrow",
- nixattr="arrow-cpp",
- ignore_commands=["cmake"],
- install_targets={"arrow", "parquet", "arrow_python"},
- put_with={"arrow": ["parquet"]},
- put={"arrow": "."},
- addincl_global={".": {"./cpp/src", "./src"}},
- copy_sources=[
- "cpp/src/arrow/api.h",
- "cpp/src/arrow/io/api.h",
- "cpp/src/arrow/io/mman.h",
- "cpp/src/arrow/ipc/api.h",
- "cpp/src/arrow/compute/api.h",
- "cpp/src/arrow/csv/api.h",
- "cpp/src/arrow/vendored/datetime/ios.*",
- "cpp/src/arrow/python/api.h",
- "cpp/src/arrow/filesystem/api.h",
- "cpp/src/parquet/api/*.h",
- ],
- disable_includes=[
- "knownfolders.h",
- "boost/multiprecision/cpp_int.hpp",
- "boost/shared_ptr.hpp",
- "curl.h",
- "curl/curl.h",
- "mimalloc.h",
- "jemalloc_ep/dist/include/jemalloc/jemalloc.h",
- "glog/logging.h",
- "xsimd/xsimd.hpp",
- "arrow/filesystem/s3fs.h",
- "arrow/filesystem/hdfs.h",
- "arrow/io/hdfs.h",
- "arrow/io/hdfs_internal.h",
- "arrow/util/bpacking_avx2.h",
- "arrow/util/bpacking_avx512.h",
- "arrow/util/bpacking_neon.h",
- # if defined(__sun__)
- "sys/byteorder.h",
- ],
- post_build=post_build,
- post_install=post_install,
- )
|