Browse Source

add apache arrow python

heretic 2 years ago
parent
commit
8393683e8c

+ 16 - 0
contrib/libs/apache/arrow/CMakeLists.txt

@@ -38,6 +38,7 @@ target_include_directories(libs-apache-arrow PRIVATE
   ${CMAKE_SOURCE_DIR}/contrib/libs/apache/orc/c++/include
   ${CMAKE_SOURCE_DIR}/contrib/libs/flatbuffers/include
   ${CMAKE_SOURCE_DIR}/contrib/libs/lz4
+  ${CMAKE_SOURCE_DIR}/contrib/libs/rapidjson/include
   ${CMAKE_SOURCE_DIR}/contrib/libs/re2
   ${CMAKE_SOURCE_DIR}/contrib/libs/utf8proc
   ${CMAKE_SOURCE_DIR}/contrib/libs/zstd/include
@@ -50,6 +51,7 @@ target_link_libraries(libs-apache-arrow PUBLIC
   libs-brotli-enc
   contrib-libs-double-conversion
   contrib-libs-lz4
+  contrib-libs-rapidjson
   contrib-libs-re2
   contrib-libs-snappy
   contrib-libs-utf8proc
@@ -146,6 +148,11 @@ target_sources(libs-apache-arrow PRIVATE
   ${CMAKE_SOURCE_DIR}/contrib/libs/apache/arrow/cpp/src/arrow/datum.cc
   ${CMAKE_SOURCE_DIR}/contrib/libs/apache/arrow/cpp/src/arrow/device.cc
   ${CMAKE_SOURCE_DIR}/contrib/libs/apache/arrow/cpp/src/arrow/extension_type.cc
+  ${CMAKE_SOURCE_DIR}/contrib/libs/apache/arrow/cpp/src/arrow/filesystem/filesystem.cc
+  ${CMAKE_SOURCE_DIR}/contrib/libs/apache/arrow/cpp/src/arrow/filesystem/localfs.cc
+  ${CMAKE_SOURCE_DIR}/contrib/libs/apache/arrow/cpp/src/arrow/filesystem/mockfs.cc
+  ${CMAKE_SOURCE_DIR}/contrib/libs/apache/arrow/cpp/src/arrow/filesystem/path_util.cc
+  ${CMAKE_SOURCE_DIR}/contrib/libs/apache/arrow/cpp/src/arrow/filesystem/util_internal.cc
   ${CMAKE_SOURCE_DIR}/contrib/libs/apache/arrow/cpp/src/arrow/io/buffered.cc
   ${CMAKE_SOURCE_DIR}/contrib/libs/apache/arrow/cpp/src/arrow/io/caching.cc
   ${CMAKE_SOURCE_DIR}/contrib/libs/apache/arrow/cpp/src/arrow/io/compressed.cc
@@ -157,11 +164,20 @@ target_sources(libs-apache-arrow PRIVATE
   ${CMAKE_SOURCE_DIR}/contrib/libs/apache/arrow/cpp/src/arrow/io/transform.cc
   ${CMAKE_SOURCE_DIR}/contrib/libs/apache/arrow/cpp/src/arrow/ipc/dictionary.cc
   ${CMAKE_SOURCE_DIR}/contrib/libs/apache/arrow/cpp/src/arrow/ipc/feather.cc
+  ${CMAKE_SOURCE_DIR}/contrib/libs/apache/arrow/cpp/src/arrow/ipc/json_simple.cc
   ${CMAKE_SOURCE_DIR}/contrib/libs/apache/arrow/cpp/src/arrow/ipc/message.cc
   ${CMAKE_SOURCE_DIR}/contrib/libs/apache/arrow/cpp/src/arrow/ipc/metadata_internal.cc
   ${CMAKE_SOURCE_DIR}/contrib/libs/apache/arrow/cpp/src/arrow/ipc/options.cc
   ${CMAKE_SOURCE_DIR}/contrib/libs/apache/arrow/cpp/src/arrow/ipc/reader.cc
   ${CMAKE_SOURCE_DIR}/contrib/libs/apache/arrow/cpp/src/arrow/ipc/writer.cc
+  ${CMAKE_SOURCE_DIR}/contrib/libs/apache/arrow/cpp/src/arrow/json/chunked_builder.cc
+  ${CMAKE_SOURCE_DIR}/contrib/libs/apache/arrow/cpp/src/arrow/json/chunker.cc
+  ${CMAKE_SOURCE_DIR}/contrib/libs/apache/arrow/cpp/src/arrow/json/converter.cc
+  ${CMAKE_SOURCE_DIR}/contrib/libs/apache/arrow/cpp/src/arrow/json/object_parser.cc
+  ${CMAKE_SOURCE_DIR}/contrib/libs/apache/arrow/cpp/src/arrow/json/object_writer.cc
+  ${CMAKE_SOURCE_DIR}/contrib/libs/apache/arrow/cpp/src/arrow/json/options.cc
+  ${CMAKE_SOURCE_DIR}/contrib/libs/apache/arrow/cpp/src/arrow/json/parser.cc
+  ${CMAKE_SOURCE_DIR}/contrib/libs/apache/arrow/cpp/src/arrow/json/reader.cc
   ${CMAKE_SOURCE_DIR}/contrib/libs/apache/arrow/cpp/src/arrow/memory_pool.cc
   ${CMAKE_SOURCE_DIR}/contrib/libs/apache/arrow/cpp/src/arrow/pretty_print.cc
   ${CMAKE_SOURCE_DIR}/contrib/libs/apache/arrow/cpp/src/arrow/record_batch.cc

+ 761 - 0
contrib/libs/apache/arrow/cpp/src/arrow/filesystem/filesystem.cc

@@ -0,0 +1,761 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include <sstream>
+#include <utility>
+
+#include "arrow/util/config.h"
+
+#include "arrow/filesystem/filesystem.h"
+#ifdef ARROW_HDFS
+#error #include "arrow/filesystem/hdfs.h"
+#endif
+#ifdef ARROW_S3
+#error #include "arrow/filesystem/s3fs.h"
+#endif
+#include "arrow/filesystem/localfs.h"
+#include "arrow/filesystem/mockfs.h"
+#include "arrow/filesystem/path_util.h"
+#include "arrow/filesystem/util_internal.h"
+#include "arrow/io/slow.h"
+#include "arrow/io/util_internal.h"
+#include "arrow/result.h"
+#include "arrow/status.h"
+#include "arrow/util/async_generator.h"
+#include "arrow/util/checked_cast.h"
+#include "arrow/util/logging.h"
+#include "arrow/util/macros.h"
+#include "arrow/util/parallel.h"
+#include "arrow/util/uri.h"
+#include "arrow/util/vector.h"
+#include "arrow/util/windows_fixup.h"
+
+namespace arrow {
+
+using internal::checked_pointer_cast;
+using internal::TaskHints;
+using internal::Uri;
+using io::internal::SubmitIO;
+
+namespace fs {
+
+using internal::ConcatAbstractPath;
+using internal::EnsureTrailingSlash;
+using internal::GetAbstractPathParent;
+using internal::kSep;
+using internal::RemoveLeadingSlash;
+using internal::RemoveTrailingSlash;
+using internal::ToSlashes;
+
+std::string ToString(FileType ftype) {
+  switch (ftype) {
+    case FileType::NotFound:
+      return "not-found";
+    case FileType::Unknown:
+      return "unknown";
+    case FileType::File:
+      return "file";
+    case FileType::Directory:
+      return "directory";
+    default:
+      ARROW_LOG(FATAL) << "Invalid FileType value: " << static_cast<int>(ftype);
+      return "???";
+  }
+}
+
+// For googletest
+ARROW_EXPORT std::ostream& operator<<(std::ostream& os, FileType ftype) {
+#define FILE_TYPE_CASE(value_name)                  \
+  case FileType::value_name:                        \
+    os << "FileType::" ARROW_STRINGIFY(value_name); \
+    break;
+
+  switch (ftype) {
+    FILE_TYPE_CASE(NotFound)
+    FILE_TYPE_CASE(Unknown)
+    FILE_TYPE_CASE(File)
+    FILE_TYPE_CASE(Directory)
+    default:
+      ARROW_LOG(FATAL) << "Invalid FileType value: " << static_cast<int>(ftype);
+  }
+
+#undef FILE_TYPE_CASE
+  return os;
+}
+
+std::string FileInfo::base_name() const {
+  return internal::GetAbstractPathParent(path_).second;
+}
+
+std::string FileInfo::dir_name() const {
+  return internal::GetAbstractPathParent(path_).first;
+}
+
+// Debug helper
+std::string FileInfo::ToString() const {
+  std::stringstream os;
+  os << *this;
+  return os.str();
+}
+
+std::ostream& operator<<(std::ostream& os, const FileInfo& info) {
+  return os << "FileInfo(" << info.type() << ", " << info.path() << ")";
+}
+
+std::string FileInfo::extension() const {
+  return internal::GetAbstractPathExtension(path_);
+}
+
+//////////////////////////////////////////////////////////////////////////
+// FileSystem default method implementations
+
+FileSystem::~FileSystem() {}
+
+Result<std::string> FileSystem::NormalizePath(std::string path) { return path; }
+
+Result<std::vector<FileInfo>> FileSystem::GetFileInfo(
+    const std::vector<std::string>& paths) {
+  std::vector<FileInfo> res;
+  res.reserve(paths.size());
+  for (const auto& path : paths) {
+    ARROW_ASSIGN_OR_RAISE(FileInfo info, GetFileInfo(path));
+    res.push_back(std::move(info));
+  }
+  return res;
+}
+
+namespace {
+
+template <typename DeferredFunc>
+auto FileSystemDefer(FileSystem* fs, bool synchronous, DeferredFunc&& func)
+    -> decltype(DeferNotOk(
+        fs->io_context().executor()->Submit(func, std::shared_ptr<FileSystem>{}))) {
+  auto self = fs->shared_from_this();
+  if (synchronous) {
+    return std::forward<DeferredFunc>(func)(std::move(self));
+  }
+  return DeferNotOk(io::internal::SubmitIO(
+      fs->io_context(), std::forward<DeferredFunc>(func), std::move(self)));
+}
+
+}  // namespace
+
+Future<std::vector<FileInfo>> FileSystem::GetFileInfoAsync(
+    const std::vector<std::string>& paths) {
+  return FileSystemDefer(
+      this, default_async_is_sync_,
+      [paths](std::shared_ptr<FileSystem> self) { return self->GetFileInfo(paths); });
+}
+
+FileInfoGenerator FileSystem::GetFileInfoGenerator(const FileSelector& select) {
+  auto fut = FileSystemDefer(
+      this, default_async_is_sync_,
+      [select](std::shared_ptr<FileSystem> self) { return self->GetFileInfo(select); });
+  return MakeSingleFutureGenerator(std::move(fut));
+}
+
+Status FileSystem::DeleteFiles(const std::vector<std::string>& paths) {
+  Status st = Status::OK();
+  for (const auto& path : paths) {
+    st &= DeleteFile(path);
+  }
+  return st;
+}
+
+namespace {
+
+Status ValidateInputFileInfo(const FileInfo& info) {
+  if (info.type() == FileType::NotFound) {
+    return internal::PathNotFound(info.path());
+  }
+  if (info.type() != FileType::File && info.type() != FileType::Unknown) {
+    return internal::NotAFile(info.path());
+  }
+  return Status::OK();
+}
+
+}  // namespace
+
+Result<std::shared_ptr<io::InputStream>> FileSystem::OpenInputStream(
+    const FileInfo& info) {
+  RETURN_NOT_OK(ValidateInputFileInfo(info));
+  return OpenInputStream(info.path());
+}
+
+Result<std::shared_ptr<io::RandomAccessFile>> FileSystem::OpenInputFile(
+    const FileInfo& info) {
+  RETURN_NOT_OK(ValidateInputFileInfo(info));
+  return OpenInputFile(info.path());
+}
+
+Future<std::shared_ptr<io::InputStream>> FileSystem::OpenInputStreamAsync(
+    const std::string& path) {
+  return FileSystemDefer(
+      this, default_async_is_sync_,
+      [path](std::shared_ptr<FileSystem> self) { return self->OpenInputStream(path); });
+}
+
+Future<std::shared_ptr<io::InputStream>> FileSystem::OpenInputStreamAsync(
+    const FileInfo& info) {
+  RETURN_NOT_OK(ValidateInputFileInfo(info));
+  return FileSystemDefer(
+      this, default_async_is_sync_,
+      [info](std::shared_ptr<FileSystem> self) { return self->OpenInputStream(info); });
+}
+
+Future<std::shared_ptr<io::RandomAccessFile>> FileSystem::OpenInputFileAsync(
+    const std::string& path) {
+  return FileSystemDefer(
+      this, default_async_is_sync_,
+      [path](std::shared_ptr<FileSystem> self) { return self->OpenInputFile(path); });
+}
+
+Future<std::shared_ptr<io::RandomAccessFile>> FileSystem::OpenInputFileAsync(
+    const FileInfo& info) {
+  RETURN_NOT_OK(ValidateInputFileInfo(info));
+  return FileSystemDefer(
+      this, default_async_is_sync_,
+      [info](std::shared_ptr<FileSystem> self) { return self->OpenInputFile(info); });
+}
+
+Result<std::shared_ptr<io::OutputStream>> FileSystem::OpenOutputStream(
+    const std::string& path) {
+  return OpenOutputStream(path, std::shared_ptr<const KeyValueMetadata>{});
+}
+
+Result<std::shared_ptr<io::OutputStream>> FileSystem::OpenAppendStream(
+    const std::string& path) {
+  return OpenAppendStream(path, std::shared_ptr<const KeyValueMetadata>{});
+}
+
+//////////////////////////////////////////////////////////////////////////
+// SubTreeFileSystem implementation
+
+SubTreeFileSystem::SubTreeFileSystem(const std::string& base_path,
+                                     std::shared_ptr<FileSystem> base_fs)
+    : FileSystem(base_fs->io_context()),
+      base_path_(NormalizeBasePath(base_path, base_fs).ValueOrDie()),
+      base_fs_(base_fs) {}
+
+SubTreeFileSystem::~SubTreeFileSystem() {}
+
+Result<std::string> SubTreeFileSystem::NormalizeBasePath(
+    std::string base_path, const std::shared_ptr<FileSystem>& base_fs) {
+  ARROW_ASSIGN_OR_RAISE(base_path, base_fs->NormalizePath(std::move(base_path)));
+  return EnsureTrailingSlash(std::move(base_path));
+}
+
+bool SubTreeFileSystem::Equals(const FileSystem& other) const {
+  if (this == &other) {
+    return true;
+  }
+  if (other.type_name() != type_name()) {
+    return false;
+  }
+  const auto& subfs = ::arrow::internal::checked_cast<const SubTreeFileSystem&>(other);
+  return base_path_ == subfs.base_path_ && base_fs_->Equals(subfs.base_fs_);
+}
+
+std::string SubTreeFileSystem::PrependBase(const std::string& s) const {
+  if (s.empty()) {
+    return base_path_;
+  } else {
+    return ConcatAbstractPath(base_path_, s);
+  }
+}
+
+Status SubTreeFileSystem::PrependBaseNonEmpty(std::string* s) const {
+  if (s->empty()) {
+    return Status::IOError("Empty path");
+  } else {
+    *s = ConcatAbstractPath(base_path_, *s);
+    return Status::OK();
+  }
+}
+
+Result<std::string> SubTreeFileSystem::StripBase(const std::string& s) const {
+  auto len = base_path_.length();
+  // Note base_path_ ends with a slash (if not empty)
+  if (s.length() >= len && s.substr(0, len) == base_path_) {
+    return s.substr(len);
+  } else {
+    return Status::UnknownError("Underlying filesystem returned path '", s,
+                                "', which is not a subpath of '", base_path_, "'");
+  }
+}
+
+Status SubTreeFileSystem::FixInfo(FileInfo* info) const {
+  ARROW_ASSIGN_OR_RAISE(auto fixed_path, StripBase(info->path()));
+  info->set_path(std::move(fixed_path));
+  return Status::OK();
+}
+
+Result<std::string> SubTreeFileSystem::NormalizePath(std::string path) {
+  ARROW_ASSIGN_OR_RAISE(auto normalized, base_fs_->NormalizePath(PrependBase(path)));
+  return StripBase(std::move(normalized));
+}
+
+Result<FileInfo> SubTreeFileSystem::GetFileInfo(const std::string& path) {
+  ARROW_ASSIGN_OR_RAISE(FileInfo info, base_fs_->GetFileInfo(PrependBase(path)));
+  RETURN_NOT_OK(FixInfo(&info));
+  return info;
+}
+
+Result<std::vector<FileInfo>> SubTreeFileSystem::GetFileInfo(const FileSelector& select) {
+  auto selector = select;
+  selector.base_dir = PrependBase(selector.base_dir);
+  ARROW_ASSIGN_OR_RAISE(auto infos, base_fs_->GetFileInfo(selector));
+  for (auto& info : infos) {
+    RETURN_NOT_OK(FixInfo(&info));
+  }
+  return infos;
+}
+
+FileInfoGenerator SubTreeFileSystem::GetFileInfoGenerator(const FileSelector& select) {
+  auto selector = select;
+  selector.base_dir = PrependBase(selector.base_dir);
+  auto gen = base_fs_->GetFileInfoGenerator(selector);
+
+  auto self = checked_pointer_cast<SubTreeFileSystem>(shared_from_this());
+
+  std::function<Result<std::vector<FileInfo>>(const std::vector<FileInfo>& infos)>
+      fix_infos = [self](std::vector<FileInfo> infos) -> Result<std::vector<FileInfo>> {
+    for (auto& info : infos) {
+      RETURN_NOT_OK(self->FixInfo(&info));
+    }
+    return infos;
+  };
+  return MakeMappedGenerator(gen, fix_infos);
+}
+
+Status SubTreeFileSystem::CreateDir(const std::string& path, bool recursive) {
+  auto s = path;
+  RETURN_NOT_OK(PrependBaseNonEmpty(&s));
+  return base_fs_->CreateDir(s, recursive);
+}
+
+Status SubTreeFileSystem::DeleteDir(const std::string& path) {
+  auto s = path;
+  RETURN_NOT_OK(PrependBaseNonEmpty(&s));
+  return base_fs_->DeleteDir(s);
+}
+
+Status SubTreeFileSystem::DeleteDirContents(const std::string& path) {
+  if (internal::IsEmptyPath(path)) {
+    return internal::InvalidDeleteDirContents(path);
+  }
+  auto s = PrependBase(path);
+  return base_fs_->DeleteDirContents(s);
+}
+
+Status SubTreeFileSystem::DeleteRootDirContents() {
+  if (base_path_.empty()) {
+    return base_fs_->DeleteRootDirContents();
+  } else {
+    return base_fs_->DeleteDirContents(base_path_);
+  }
+}
+
+Status SubTreeFileSystem::DeleteFile(const std::string& path) {
+  auto s = path;
+  RETURN_NOT_OK(PrependBaseNonEmpty(&s));
+  return base_fs_->DeleteFile(s);
+}
+
+Status SubTreeFileSystem::Move(const std::string& src, const std::string& dest) {
+  auto s = src;
+  auto d = dest;
+  RETURN_NOT_OK(PrependBaseNonEmpty(&s));
+  RETURN_NOT_OK(PrependBaseNonEmpty(&d));
+  return base_fs_->Move(s, d);
+}
+
+Status SubTreeFileSystem::CopyFile(const std::string& src, const std::string& dest) {
+  auto s = src;
+  auto d = dest;
+  RETURN_NOT_OK(PrependBaseNonEmpty(&s));
+  RETURN_NOT_OK(PrependBaseNonEmpty(&d));
+  return base_fs_->CopyFile(s, d);
+}
+
+Result<std::shared_ptr<io::InputStream>> SubTreeFileSystem::OpenInputStream(
+    const std::string& path) {
+  auto s = path;
+  RETURN_NOT_OK(PrependBaseNonEmpty(&s));
+  return base_fs_->OpenInputStream(s);
+}
+
+Result<std::shared_ptr<io::InputStream>> SubTreeFileSystem::OpenInputStream(
+    const FileInfo& info) {
+  auto s = info.path();
+  RETURN_NOT_OK(PrependBaseNonEmpty(&s));
+  FileInfo new_info(info);
+  new_info.set_path(std::move(s));
+  return base_fs_->OpenInputStream(new_info);
+}
+
+Future<std::shared_ptr<io::InputStream>> SubTreeFileSystem::OpenInputStreamAsync(
+    const std::string& path) {
+  auto s = path;
+  RETURN_NOT_OK(PrependBaseNonEmpty(&s));
+  return base_fs_->OpenInputStreamAsync(s);
+}
+
+Future<std::shared_ptr<io::InputStream>> SubTreeFileSystem::OpenInputStreamAsync(
+    const FileInfo& info) {
+  auto s = info.path();
+  RETURN_NOT_OK(PrependBaseNonEmpty(&s));
+  FileInfo new_info(info);
+  new_info.set_path(std::move(s));
+  return base_fs_->OpenInputStreamAsync(new_info);
+}
+
+Result<std::shared_ptr<io::RandomAccessFile>> SubTreeFileSystem::OpenInputFile(
+    const std::string& path) {
+  auto s = path;
+  RETURN_NOT_OK(PrependBaseNonEmpty(&s));
+  return base_fs_->OpenInputFile(s);
+}
+
+Result<std::shared_ptr<io::RandomAccessFile>> SubTreeFileSystem::OpenInputFile(
+    const FileInfo& info) {
+  auto s = info.path();
+  RETURN_NOT_OK(PrependBaseNonEmpty(&s));
+  FileInfo new_info(info);
+  new_info.set_path(std::move(s));
+  return base_fs_->OpenInputFile(new_info);
+}
+
+Future<std::shared_ptr<io::RandomAccessFile>> SubTreeFileSystem::OpenInputFileAsync(
+    const std::string& path) {
+  auto s = path;
+  RETURN_NOT_OK(PrependBaseNonEmpty(&s));
+  return base_fs_->OpenInputFileAsync(s);
+}
+
+Future<std::shared_ptr<io::RandomAccessFile>> SubTreeFileSystem::OpenInputFileAsync(
+    const FileInfo& info) {
+  auto s = info.path();
+  RETURN_NOT_OK(PrependBaseNonEmpty(&s));
+  FileInfo new_info(info);
+  new_info.set_path(std::move(s));
+  return base_fs_->OpenInputFileAsync(new_info);
+}
+
+Result<std::shared_ptr<io::OutputStream>> SubTreeFileSystem::OpenOutputStream(
+    const std::string& path, const std::shared_ptr<const KeyValueMetadata>& metadata) {
+  auto s = path;
+  RETURN_NOT_OK(PrependBaseNonEmpty(&s));
+  return base_fs_->OpenOutputStream(s, metadata);
+}
+
+Result<std::shared_ptr<io::OutputStream>> SubTreeFileSystem::OpenAppendStream(
+    const std::string& path, const std::shared_ptr<const KeyValueMetadata>& metadata) {
+  auto s = path;
+  RETURN_NOT_OK(PrependBaseNonEmpty(&s));
+  return base_fs_->OpenAppendStream(s, metadata);
+}
+
+//////////////////////////////////////////////////////////////////////////
+// SlowFileSystem implementation
+
+SlowFileSystem::SlowFileSystem(std::shared_ptr<FileSystem> base_fs,
+                               std::shared_ptr<io::LatencyGenerator> latencies)
+    : FileSystem(base_fs->io_context()), base_fs_(base_fs), latencies_(latencies) {}
+
+SlowFileSystem::SlowFileSystem(std::shared_ptr<FileSystem> base_fs,
+                               double average_latency)
+    : FileSystem(base_fs->io_context()),
+      base_fs_(base_fs),
+      latencies_(io::LatencyGenerator::Make(average_latency)) {}
+
+SlowFileSystem::SlowFileSystem(std::shared_ptr<FileSystem> base_fs,
+                               double average_latency, int32_t seed)
+    : FileSystem(base_fs->io_context()),
+      base_fs_(base_fs),
+      latencies_(io::LatencyGenerator::Make(average_latency, seed)) {}
+
+bool SlowFileSystem::Equals(const FileSystem& other) const { return this == &other; }
+
+Result<FileInfo> SlowFileSystem::GetFileInfo(const std::string& path) {
+  latencies_->Sleep();
+  return base_fs_->GetFileInfo(path);
+}
+
+Result<std::vector<FileInfo>> SlowFileSystem::GetFileInfo(const FileSelector& selector) {
+  latencies_->Sleep();
+  return base_fs_->GetFileInfo(selector);
+}
+
+Status SlowFileSystem::CreateDir(const std::string& path, bool recursive) {
+  latencies_->Sleep();
+  return base_fs_->CreateDir(path, recursive);
+}
+
+Status SlowFileSystem::DeleteDir(const std::string& path) {
+  latencies_->Sleep();
+  return base_fs_->DeleteDir(path);
+}
+
+Status SlowFileSystem::DeleteDirContents(const std::string& path) {
+  latencies_->Sleep();
+  return base_fs_->DeleteDirContents(path);
+}
+
+Status SlowFileSystem::DeleteRootDirContents() {
+  latencies_->Sleep();
+  return base_fs_->DeleteRootDirContents();
+}
+
+Status SlowFileSystem::DeleteFile(const std::string& path) {
+  latencies_->Sleep();
+  return base_fs_->DeleteFile(path);
+}
+
+Status SlowFileSystem::Move(const std::string& src, const std::string& dest) {
+  latencies_->Sleep();
+  return base_fs_->Move(src, dest);
+}
+
+Status SlowFileSystem::CopyFile(const std::string& src, const std::string& dest) {
+  latencies_->Sleep();
+  return base_fs_->CopyFile(src, dest);
+}
+
+Result<std::shared_ptr<io::InputStream>> SlowFileSystem::OpenInputStream(
+    const std::string& path) {
+  latencies_->Sleep();
+  ARROW_ASSIGN_OR_RAISE(auto stream, base_fs_->OpenInputStream(path));
+  return std::make_shared<io::SlowInputStream>(stream, latencies_);
+}
+
+Result<std::shared_ptr<io::InputStream>> SlowFileSystem::OpenInputStream(
+    const FileInfo& info) {
+  latencies_->Sleep();
+  ARROW_ASSIGN_OR_RAISE(auto stream, base_fs_->OpenInputStream(info));
+  return std::make_shared<io::SlowInputStream>(stream, latencies_);
+}
+
+Result<std::shared_ptr<io::RandomAccessFile>> SlowFileSystem::OpenInputFile(
+    const std::string& path) {
+  latencies_->Sleep();
+  ARROW_ASSIGN_OR_RAISE(auto file, base_fs_->OpenInputFile(path));
+  return std::make_shared<io::SlowRandomAccessFile>(file, latencies_);
+}
+
+Result<std::shared_ptr<io::RandomAccessFile>> SlowFileSystem::OpenInputFile(
+    const FileInfo& info) {
+  latencies_->Sleep();
+  ARROW_ASSIGN_OR_RAISE(auto file, base_fs_->OpenInputFile(info));
+  return std::make_shared<io::SlowRandomAccessFile>(file, latencies_);
+}
+
+Result<std::shared_ptr<io::OutputStream>> SlowFileSystem::OpenOutputStream(
+    const std::string& path, const std::shared_ptr<const KeyValueMetadata>& metadata) {
+  latencies_->Sleep();
+  // XXX Should we have a SlowOutputStream that waits on Flush() and Close()?
+  return base_fs_->OpenOutputStream(path, metadata);
+}
+
+Result<std::shared_ptr<io::OutputStream>> SlowFileSystem::OpenAppendStream(
+    const std::string& path, const std::shared_ptr<const KeyValueMetadata>& metadata) {
+  latencies_->Sleep();
+  return base_fs_->OpenAppendStream(path, metadata);
+}
+
+Status CopyFiles(const std::vector<FileLocator>& sources,
+                 const std::vector<FileLocator>& destinations,
+                 const io::IOContext& io_context, int64_t chunk_size, bool use_threads) {
+  if (sources.size() != destinations.size()) {
+    return Status::Invalid("Trying to copy ", sources.size(), " files into ",
+                           destinations.size(), " paths.");
+  }
+
+  auto copy_one_file = [&](int i) {
+    if (sources[i].filesystem->Equals(destinations[i].filesystem)) {
+      return sources[i].filesystem->CopyFile(sources[i].path, destinations[i].path);
+    }
+
+    ARROW_ASSIGN_OR_RAISE(auto source,
+                          sources[i].filesystem->OpenInputStream(sources[i].path));
+    ARROW_ASSIGN_OR_RAISE(const auto metadata, source->ReadMetadata());
+
+    ARROW_ASSIGN_OR_RAISE(auto destination, destinations[i].filesystem->OpenOutputStream(
+                                                destinations[i].path, metadata));
+    RETURN_NOT_OK(internal::CopyStream(source, destination, chunk_size, io_context));
+    return destination->Close();
+  };
+
+  return ::arrow::internal::OptionalParallelFor(
+      use_threads, static_cast<int>(sources.size()), std::move(copy_one_file),
+      io_context.executor());
+}
+
+Status CopyFiles(const std::shared_ptr<FileSystem>& source_fs,
+                 const FileSelector& source_sel,
+                 const std::shared_ptr<FileSystem>& destination_fs,
+                 const std::string& destination_base_dir, const io::IOContext& io_context,
+                 int64_t chunk_size, bool use_threads) {
+  ARROW_ASSIGN_OR_RAISE(auto source_infos, source_fs->GetFileInfo(source_sel));
+  if (source_infos.empty()) {
+    return Status::OK();
+  }
+
+  std::vector<FileLocator> sources, destinations;
+  std::vector<std::string> dirs;
+
+  for (const FileInfo& source_info : source_infos) {
+    auto relative = internal::RemoveAncestor(source_sel.base_dir, source_info.path());
+    if (!relative.has_value()) {
+      return Status::Invalid("GetFileInfo() yielded path '", source_info.path(),
+                             "', which is outside base dir '", source_sel.base_dir, "'");
+    }
+
+    auto destination_path =
+        internal::ConcatAbstractPath(destination_base_dir, relative->to_string());
+
+    if (source_info.IsDirectory()) {
+      dirs.push_back(destination_path);
+    } else if (source_info.IsFile()) {
+      sources.push_back({source_fs, source_info.path()});
+      destinations.push_back({destination_fs, destination_path});
+    }
+  }
+
+  auto create_one_dir = [&](int i) { return destination_fs->CreateDir(dirs[i]); };
+
+  dirs = internal::MinimalCreateDirSet(std::move(dirs));
+  RETURN_NOT_OK(::arrow::internal::OptionalParallelFor(
+      use_threads, static_cast<int>(dirs.size()), std::move(create_one_dir),
+      io_context.executor()));
+
+  return CopyFiles(sources, destinations, io_context, chunk_size, use_threads);
+}
+
+namespace {
+
+Result<Uri> ParseFileSystemUri(const std::string& uri_string) {
+  Uri uri;
+  auto status = uri.Parse(uri_string);
+  if (!status.ok()) {
+#ifdef _WIN32
+    // Could be a "file:..." URI with backslashes instead of regular slashes.
+    RETURN_NOT_OK(uri.Parse(ToSlashes(uri_string)));
+    if (uri.scheme() != "file") {
+      return status;
+    }
+#else
+    return status;
+#endif
+  }
+  return std::move(uri);
+}
+
+Result<std::shared_ptr<FileSystem>> FileSystemFromUriReal(const Uri& uri,
+                                                          const std::string& uri_string,
+                                                          const io::IOContext& io_context,
+                                                          std::string* out_path) {
+  const auto scheme = uri.scheme();
+
+  if (scheme == "file") {
+    std::string path;
+    ARROW_ASSIGN_OR_RAISE(auto options, LocalFileSystemOptions::FromUri(uri, &path));
+    if (out_path != nullptr) {
+      *out_path = path;
+    }
+    return std::make_shared<LocalFileSystem>(options, io_context);
+  }
+  if (scheme == "hdfs" || scheme == "viewfs") {
+#ifdef ARROW_HDFS
+    ARROW_ASSIGN_OR_RAISE(auto options, HdfsOptions::FromUri(uri));
+    if (out_path != nullptr) {
+      *out_path = uri.path();
+    }
+    ARROW_ASSIGN_OR_RAISE(auto hdfs, HadoopFileSystem::Make(options, io_context));
+    return hdfs;
+#else
+    return Status::NotImplemented("Got HDFS URI but Arrow compiled without HDFS support");
+#endif
+  }
+  if (scheme == "s3") {
+#ifdef ARROW_S3
+    RETURN_NOT_OK(EnsureS3Initialized());
+    ARROW_ASSIGN_OR_RAISE(auto options, S3Options::FromUri(uri, out_path));
+    ARROW_ASSIGN_OR_RAISE(auto s3fs, S3FileSystem::Make(options, io_context));
+    return s3fs;
+#else
+    return Status::NotImplemented("Got S3 URI but Arrow compiled without S3 support");
+#endif
+  }
+
+  if (scheme == "mock") {
+    // MockFileSystem does not have an absolute / relative path distinction,
+    // normalize path by removing leading slash.
+    if (out_path != nullptr) {
+      *out_path = std::string(RemoveLeadingSlash(uri.path()));
+    }
+    return std::make_shared<internal::MockFileSystem>(internal::CurrentTimePoint(),
+                                                      io_context);
+  }
+
+  return Status::Invalid("Unrecognized filesystem type in URI: ", uri_string);
+}
+
+}  // namespace
+
+Result<std::shared_ptr<FileSystem>> FileSystemFromUri(const std::string& uri_string,
+                                                      std::string* out_path) {
+  return FileSystemFromUri(uri_string, io::default_io_context(), out_path);
+}
+
+Result<std::shared_ptr<FileSystem>> FileSystemFromUri(const std::string& uri_string,
+                                                      const io::IOContext& io_context,
+                                                      std::string* out_path) {
+  ARROW_ASSIGN_OR_RAISE(auto fsuri, ParseFileSystemUri(uri_string));
+  return FileSystemFromUriReal(fsuri, uri_string, io_context, out_path);
+}
+
+Result<std::shared_ptr<FileSystem>> FileSystemFromUriOrPath(const std::string& uri_string,
+                                                            std::string* out_path) {
+  return FileSystemFromUriOrPath(uri_string, io::default_io_context(), out_path);
+}
+
+Result<std::shared_ptr<FileSystem>> FileSystemFromUriOrPath(
+    const std::string& uri_string, const io::IOContext& io_context,
+    std::string* out_path) {
+  if (internal::DetectAbsolutePath(uri_string)) {
+    // Normalize path separators
+    if (out_path != nullptr) {
+      *out_path = ToSlashes(uri_string);
+    }
+    return std::make_shared<LocalFileSystem>();
+  }
+  return FileSystemFromUri(uri_string, io_context, out_path);
+}
+
+Status FileSystemFromUri(const std::string& uri, std::shared_ptr<FileSystem>* out_fs,
+                         std::string* out_path) {
+  return FileSystemFromUri(uri, out_path).Value(out_fs);
+}
+
+Status Initialize(const FileSystemGlobalOptions& options) {
+  internal::global_options = options;
+  return Status::OK();
+}
+
+}  // namespace fs
+}  // namespace arrow

+ 532 - 0
contrib/libs/apache/arrow/cpp/src/arrow/filesystem/filesystem.h

@@ -0,0 +1,532 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#pragma once
+
+#include <chrono>
+#include <cstdint>
+#include <functional>
+#include <iosfwd>
+#include <memory>
+#include <string>
+#include <utility>
+#include <vector>
+
+#include "arrow/filesystem/type_fwd.h"
+#include "arrow/io/interfaces.h"
+#include "arrow/type_fwd.h"
+#include "arrow/util/compare.h"
+#include "arrow/util/macros.h"
+#include "arrow/util/type_fwd.h"
+#include "arrow/util/visibility.h"
+#include "arrow/util/windows_fixup.h"
+
+namespace arrow {
+namespace fs {
+
+// A system clock time point expressed as a 64-bit (or more) number of
+// nanoseconds since the epoch.
+using TimePoint =
+    std::chrono::time_point<std::chrono::system_clock, std::chrono::nanoseconds>;
+
+ARROW_EXPORT std::string ToString(FileType);
+
+ARROW_EXPORT std::ostream& operator<<(std::ostream& os, FileType);
+
+static const int64_t kNoSize = -1;
+static const TimePoint kNoTime = TimePoint(TimePoint::duration(-1));
+
+/// \brief FileSystem entry info
+struct ARROW_EXPORT FileInfo : public util::EqualityComparable<FileInfo> {
+  FileInfo() = default;
+  FileInfo(FileInfo&&) = default;
+  FileInfo& operator=(FileInfo&&) = default;
+  FileInfo(const FileInfo&) = default;
+  FileInfo& operator=(const FileInfo&) = default;
+
+  explicit FileInfo(std::string path, FileType type = FileType::Unknown)
+      : path_(std::move(path)), type_(type) {}
+
+  /// The file type
+  FileType type() const { return type_; }
+  void set_type(FileType type) { type_ = type; }
+
+  /// The full file path in the filesystem
+  const std::string& path() const { return path_; }
+  void set_path(std::string path) { path_ = std::move(path); }
+
+  /// The file base name (component after the last directory separator)
+  std::string base_name() const;
+
+  // The directory base name (component before the file base name).
+  std::string dir_name() const;
+
+  /// The size in bytes, if available
+  ///
+  /// Only regular files are guaranteed to have a size.
+  int64_t size() const { return size_; }
+  void set_size(int64_t size) { size_ = size; }
+
+  /// The file extension (excluding the dot)
+  std::string extension() const;
+
+  /// The time of last modification, if available
+  TimePoint mtime() const { return mtime_; }
+  void set_mtime(TimePoint mtime) { mtime_ = mtime; }
+
+  bool IsFile() const { return type_ == FileType::File; }
+  bool IsDirectory() const { return type_ == FileType::Directory; }
+
+  bool Equals(const FileInfo& other) const {
+    return type() == other.type() && path() == other.path() && size() == other.size() &&
+           mtime() == other.mtime();
+  }
+
+  std::string ToString() const;
+
+  /// Function object implementing less-than comparison and hashing by
+  /// path, to support sorting infos, using them as keys, and other
+  /// interactions with the STL.
+  struct ByPath {
+    bool operator()(const FileInfo& l, const FileInfo& r) const {
+      return l.path() < r.path();
+    }
+
+    size_t operator()(const FileInfo& i) const {
+      return std::hash<std::string>{}(i.path());
+    }
+  };
+
+ protected:
+  std::string path_;
+  FileType type_ = FileType::Unknown;
+  int64_t size_ = kNoSize;
+  TimePoint mtime_ = kNoTime;
+};
+
+ARROW_EXPORT std::ostream& operator<<(std::ostream& os, const FileInfo&);
+
+/// \brief File selector for filesystem APIs
+struct ARROW_EXPORT FileSelector {
+  /// The directory in which to select files.
+  /// If the path exists but doesn't point to a directory, this should be an error.
+  std::string base_dir;
+  /// The behavior if `base_dir` isn't found in the filesystem.  If false,
+  /// an error is returned.  If true, an empty selection is returned.
+  bool allow_not_found;
+  /// Whether to recurse into subdirectories.
+  bool recursive;
+  /// The maximum number of subdirectories to recurse into.
+  int32_t max_recursion;
+
+  FileSelector() : allow_not_found(false), recursive(false), max_recursion(INT32_MAX) {}
+};
+
+/// \brief FileSystem, path pair
+struct ARROW_EXPORT FileLocator {
+  std::shared_ptr<FileSystem> filesystem;
+  std::string path;
+};
+
+using FileInfoVector = std::vector<FileInfo>;
+using FileInfoGenerator = std::function<Future<FileInfoVector>()>;
+
+}  // namespace fs
+
+template <>
+struct IterationTraits<fs::FileInfoVector> {
+  static fs::FileInfoVector End() { return {}; }
+  static bool IsEnd(const fs::FileInfoVector& val) { return val.empty(); }
+};
+
+namespace fs {
+
+/// \brief Abstract file system API
+class ARROW_EXPORT FileSystem : public std::enable_shared_from_this<FileSystem> {
+ public:
+  virtual ~FileSystem();
+
+  virtual std::string type_name() const = 0;
+
+  /// EXPERIMENTAL: The IOContext associated with this filesystem.
+  const io::IOContext& io_context() const { return io_context_; }
+
+  /// Normalize path for the given filesystem
+  ///
+  /// The default implementation of this method is a no-op, but subclasses
+  /// may allow normalizing irregular path forms (such as Windows local paths).
+  virtual Result<std::string> NormalizePath(std::string path);
+
+  virtual bool Equals(const FileSystem& other) const = 0;
+
+  virtual bool Equals(const std::shared_ptr<FileSystem>& other) const {
+    return Equals(*other);
+  }
+
+  /// Get info for the given target.
+  ///
+  /// Any symlink is automatically dereferenced, recursively.
+  /// A nonexistent or unreachable file returns an Ok status and
+  /// has a FileType of value NotFound.  An error status indicates
+  /// a truly exceptional condition (low-level I/O error, etc.).
+  virtual Result<FileInfo> GetFileInfo(const std::string& path) = 0;
+  /// Same, for many targets at once.
+  virtual Result<FileInfoVector> GetFileInfo(const std::vector<std::string>& paths);
+  /// Same, according to a selector.
+  ///
+  /// The selector's base directory will not be part of the results, even if
+  /// it exists.
+  /// If it doesn't exist, see `FileSelector::allow_not_found`.
+  virtual Result<FileInfoVector> GetFileInfo(const FileSelector& select) = 0;
+
+  /// EXPERIMENTAL: async version of GetFileInfo
+  virtual Future<FileInfoVector> GetFileInfoAsync(const std::vector<std::string>& paths);
+
+  /// EXPERIMENTAL: streaming async version of GetFileInfo
+  ///
+  /// The returned generator is not async-reentrant, i.e. you need to wait for
+  /// the returned future to complete before calling the generator again.
+  virtual FileInfoGenerator GetFileInfoGenerator(const FileSelector& select);
+
+  /// Create a directory and subdirectories.
+  ///
+  /// This function succeeds if the directory already exists.
+  virtual Status CreateDir(const std::string& path, bool recursive = true) = 0;
+
+  /// Delete a directory and its contents, recursively.
+  virtual Status DeleteDir(const std::string& path) = 0;
+
+  /// Delete a directory's contents, recursively.
+  ///
+  /// Like DeleteDir, but doesn't delete the directory itself.
+  /// Passing an empty path ("" or "/") is disallowed, see DeleteRootDirContents.
+  virtual Status DeleteDirContents(const std::string& path) = 0;
+
+  /// EXPERIMENTAL: Delete the root directory's contents, recursively.
+  ///
+  /// Implementations may decide to raise an error if this operation is
+  /// too dangerous.
+  // NOTE: may decide to remove this if it's deemed not useful
+  virtual Status DeleteRootDirContents() = 0;
+
+  /// Delete a file.
+  virtual Status DeleteFile(const std::string& path) = 0;
+  /// Delete many files.
+  ///
+  /// The default implementation issues individual delete operations in sequence.
+  virtual Status DeleteFiles(const std::vector<std::string>& paths);
+
+  /// Move / rename a file or directory.
+  ///
+  /// If the destination exists:
+  /// - if it is a non-empty directory, an error is returned
+  /// - otherwise, if it has the same type as the source, it is replaced
+  /// - otherwise, behavior is unspecified (implementation-dependent).
+  virtual Status Move(const std::string& src, const std::string& dest) = 0;
+
+  /// Copy a file.
+  ///
+  /// If the destination exists and is a directory, an error is returned.
+  /// Otherwise, it is replaced.
+  virtual Status CopyFile(const std::string& src, const std::string& dest) = 0;
+
+  /// Open an input stream for sequential reading.
+  virtual Result<std::shared_ptr<io::InputStream>> OpenInputStream(
+      const std::string& path) = 0;
+  /// Open an input stream for sequential reading.
+  ///
+  /// This override assumes the given FileInfo validly represents the file's
+  /// characteristics, and may optimize access depending on them (for example
+  /// avoid querying the file size or its existence).
+  virtual Result<std::shared_ptr<io::InputStream>> OpenInputStream(const FileInfo& info);
+
+  /// Open an input file for random access reading.
+  virtual Result<std::shared_ptr<io::RandomAccessFile>> OpenInputFile(
+      const std::string& path) = 0;
+  /// Open an input file for random access reading.
+  ///
+  /// This override assumes the given FileInfo validly represents the file's
+  /// characteristics, and may optimize access depending on them (for example
+  /// avoid querying the file size or its existence).
+  virtual Result<std::shared_ptr<io::RandomAccessFile>> OpenInputFile(
+      const FileInfo& info);
+
+  /// EXPERIMENTAL: async version of OpenInputStream
+  virtual Future<std::shared_ptr<io::InputStream>> OpenInputStreamAsync(
+      const std::string& path);
+  /// EXPERIMENTAL: async version of OpenInputStream
+  virtual Future<std::shared_ptr<io::InputStream>> OpenInputStreamAsync(
+      const FileInfo& info);
+
+  /// EXPERIMENTAL: async version of OpenInputFile
+  virtual Future<std::shared_ptr<io::RandomAccessFile>> OpenInputFileAsync(
+      const std::string& path);
+  /// EXPERIMENTAL: async version of OpenInputFile
+  virtual Future<std::shared_ptr<io::RandomAccessFile>> OpenInputFileAsync(
+      const FileInfo& info);
+
+  /// Open an output stream for sequential writing.
+  ///
+  /// If the target already exists, existing data is truncated.
+  virtual Result<std::shared_ptr<io::OutputStream>> OpenOutputStream(
+      const std::string& path,
+      const std::shared_ptr<const KeyValueMetadata>& metadata) = 0;
+  Result<std::shared_ptr<io::OutputStream>> OpenOutputStream(const std::string& path);
+
+  /// Open an output stream for appending.
+  ///
+  /// If the target doesn't exist, a new empty file is created.
+  virtual Result<std::shared_ptr<io::OutputStream>> OpenAppendStream(
+      const std::string& path,
+      const std::shared_ptr<const KeyValueMetadata>& metadata) = 0;
+  Result<std::shared_ptr<io::OutputStream>> OpenAppendStream(const std::string& path);
+
+ protected:
+  explicit FileSystem(const io::IOContext& io_context = io::default_io_context())
+      : io_context_(io_context) {}
+
+  io::IOContext io_context_;
+  // Whether metadata operations (such as GetFileInfo or OpenInputStream)
+  // are cheap enough that the default async variants don't bother with
+  // a thread pool.
+  bool default_async_is_sync_ = true;
+};
+
+/// \brief A FileSystem implementation that delegates to another
+/// implementation after prepending a fixed base path.
+///
+/// This is useful to expose a logical view of a subtree of a filesystem,
+/// for example a directory in a LocalFileSystem.
+/// This works on abstract paths, i.e. paths using forward slashes and
+/// and a single root "/".  Windows paths are not guaranteed to work.
+/// This makes no security guarantee.  For example, symlinks may allow to
+/// "escape" the subtree and access other parts of the underlying filesystem.
+class ARROW_EXPORT SubTreeFileSystem : public FileSystem {
+ public:
+  // This constructor may abort if base_path is invalid.
+  explicit SubTreeFileSystem(const std::string& base_path,
+                             std::shared_ptr<FileSystem> base_fs);
+  ~SubTreeFileSystem() override;
+
+  std::string type_name() const override { return "subtree"; }
+  std::string base_path() const { return base_path_; }
+  std::shared_ptr<FileSystem> base_fs() const { return base_fs_; }
+
+  Result<std::string> NormalizePath(std::string path) override;
+
+  bool Equals(const FileSystem& other) const override;
+
+  /// \cond FALSE
+  using FileSystem::GetFileInfo;
+  /// \endcond
+  Result<FileInfo> GetFileInfo(const std::string& path) override;
+  Result<FileInfoVector> GetFileInfo(const FileSelector& select) override;
+
+  FileInfoGenerator GetFileInfoGenerator(const FileSelector& select) override;
+
+  Status CreateDir(const std::string& path, bool recursive = true) override;
+
+  Status DeleteDir(const std::string& path) override;
+  Status DeleteDirContents(const std::string& path) override;
+  Status DeleteRootDirContents() override;
+
+  Status DeleteFile(const std::string& path) override;
+
+  Status Move(const std::string& src, const std::string& dest) override;
+
+  Status CopyFile(const std::string& src, const std::string& dest) override;
+
+  Result<std::shared_ptr<io::InputStream>> OpenInputStream(
+      const std::string& path) override;
+  Result<std::shared_ptr<io::InputStream>> OpenInputStream(const FileInfo& info) override;
+  Result<std::shared_ptr<io::RandomAccessFile>> OpenInputFile(
+      const std::string& path) override;
+  Result<std::shared_ptr<io::RandomAccessFile>> OpenInputFile(
+      const FileInfo& info) override;
+
+  Future<std::shared_ptr<io::InputStream>> OpenInputStreamAsync(
+      const std::string& path) override;
+  Future<std::shared_ptr<io::InputStream>> OpenInputStreamAsync(
+      const FileInfo& info) override;
+  Future<std::shared_ptr<io::RandomAccessFile>> OpenInputFileAsync(
+      const std::string& path) override;
+  Future<std::shared_ptr<io::RandomAccessFile>> OpenInputFileAsync(
+      const FileInfo& info) override;
+
+  Result<std::shared_ptr<io::OutputStream>> OpenOutputStream(
+      const std::string& path,
+      const std::shared_ptr<const KeyValueMetadata>& metadata = {}) override;
+  Result<std::shared_ptr<io::OutputStream>> OpenAppendStream(
+      const std::string& path,
+      const std::shared_ptr<const KeyValueMetadata>& metadata = {}) override;
+
+ protected:
+  SubTreeFileSystem() {}
+
+  const std::string base_path_;
+  std::shared_ptr<FileSystem> base_fs_;
+
+  std::string PrependBase(const std::string& s) const;
+  Status PrependBaseNonEmpty(std::string* s) const;
+  Result<std::string> StripBase(const std::string& s) const;
+  Status FixInfo(FileInfo* info) const;
+
+  static Result<std::string> NormalizeBasePath(
+      std::string base_path, const std::shared_ptr<FileSystem>& base_fs);
+};
+
+/// \brief A FileSystem implementation that delegates to another
+/// implementation but inserts latencies at various points.
+class ARROW_EXPORT SlowFileSystem : public FileSystem {
+ public:
+  SlowFileSystem(std::shared_ptr<FileSystem> base_fs,
+                 std::shared_ptr<io::LatencyGenerator> latencies);
+  SlowFileSystem(std::shared_ptr<FileSystem> base_fs, double average_latency);
+  SlowFileSystem(std::shared_ptr<FileSystem> base_fs, double average_latency,
+                 int32_t seed);
+
+  std::string type_name() const override { return "slow"; }
+  bool Equals(const FileSystem& other) const override;
+
+  using FileSystem::GetFileInfo;
+  Result<FileInfo> GetFileInfo(const std::string& path) override;
+  Result<FileInfoVector> GetFileInfo(const FileSelector& select) override;
+
+  Status CreateDir(const std::string& path, bool recursive = true) override;
+
+  Status DeleteDir(const std::string& path) override;
+  Status DeleteDirContents(const std::string& path) override;
+  Status DeleteRootDirContents() override;
+
+  Status DeleteFile(const std::string& path) override;
+
+  Status Move(const std::string& src, const std::string& dest) override;
+
+  Status CopyFile(const std::string& src, const std::string& dest) override;
+
+  Result<std::shared_ptr<io::InputStream>> OpenInputStream(
+      const std::string& path) override;
+  Result<std::shared_ptr<io::InputStream>> OpenInputStream(const FileInfo& info) override;
+  Result<std::shared_ptr<io::RandomAccessFile>> OpenInputFile(
+      const std::string& path) override;
+  Result<std::shared_ptr<io::RandomAccessFile>> OpenInputFile(
+      const FileInfo& info) override;
+  Result<std::shared_ptr<io::OutputStream>> OpenOutputStream(
+      const std::string& path,
+      const std::shared_ptr<const KeyValueMetadata>& metadata = {}) override;
+  Result<std::shared_ptr<io::OutputStream>> OpenAppendStream(
+      const std::string& path,
+      const std::shared_ptr<const KeyValueMetadata>& metadata = {}) override;
+
+ protected:
+  std::shared_ptr<FileSystem> base_fs_;
+  std::shared_ptr<io::LatencyGenerator> latencies_;
+};
+
+/// \defgroup filesystem-factories Functions for creating FileSystem instances
+///
+/// @{
+
+/// \brief Create a new FileSystem by URI
+///
+/// Recognized schemes are "file", "mock", "hdfs" and "s3fs".
+///
+/// \param[in] uri a URI-based path, ex: file:///some/local/path
+/// \param[out] out_path (optional) Path inside the filesystem.
+/// \return out_fs FileSystem instance.
+ARROW_EXPORT
+Result<std::shared_ptr<FileSystem>> FileSystemFromUri(const std::string& uri,
+                                                      std::string* out_path = NULLPTR);
+
+/// \brief Create a new FileSystem by URI with a custom IO context
+///
+/// Recognized schemes are "file", "mock", "hdfs" and "s3fs".
+///
+/// \param[in] uri a URI-based path, ex: file:///some/local/path
+/// \param[in] io_context an IOContext which will be associated with the filesystem
+/// \param[out] out_path (optional) Path inside the filesystem.
+/// \return out_fs FileSystem instance.
+ARROW_EXPORT
+Result<std::shared_ptr<FileSystem>> FileSystemFromUri(const std::string& uri,
+                                                      const io::IOContext& io_context,
+                                                      std::string* out_path = NULLPTR);
+
+/// \brief Create a new FileSystem by URI
+///
+/// Same as FileSystemFromUri, but in addition also recognize non-URIs
+/// and treat them as local filesystem paths.  Only absolute local filesystem
+/// paths are allowed.
+ARROW_EXPORT
+Result<std::shared_ptr<FileSystem>> FileSystemFromUriOrPath(
+    const std::string& uri, std::string* out_path = NULLPTR);
+
+/// \brief Create a new FileSystem by URI with a custom IO context
+///
+/// Same as FileSystemFromUri, but in addition also recognize non-URIs
+/// and treat them as local filesystem paths.  Only absolute local filesystem
+/// paths are allowed.
+ARROW_EXPORT
+Result<std::shared_ptr<FileSystem>> FileSystemFromUriOrPath(
+    const std::string& uri, const io::IOContext& io_context,
+    std::string* out_path = NULLPTR);
+
+/// @}
+
+/// \brief Copy files, including from one FileSystem to another
+///
+/// If a source and destination are resident in the same FileSystem FileSystem::CopyFile
+/// will be used, otherwise the file will be opened as a stream in both FileSystems and
+/// chunks copied from the source to the destination. No directories will be created.
+ARROW_EXPORT
+Status CopyFiles(const std::vector<FileLocator>& sources,
+                 const std::vector<FileLocator>& destinations,
+                 const io::IOContext& io_context = io::default_io_context(),
+                 int64_t chunk_size = 1024 * 1024, bool use_threads = true);
+
+/// \brief Copy selected files, including from one FileSystem to another
+///
+/// Directories will be created under the destination base directory as needed.
+ARROW_EXPORT
+Status CopyFiles(const std::shared_ptr<FileSystem>& source_fs,
+                 const FileSelector& source_sel,
+                 const std::shared_ptr<FileSystem>& destination_fs,
+                 const std::string& destination_base_dir,
+                 const io::IOContext& io_context = io::default_io_context(),
+                 int64_t chunk_size = 1024 * 1024, bool use_threads = true);
+
+struct FileSystemGlobalOptions {
+  /// Path to a single PEM file holding all TLS CA certificates
+  ///
+  /// If empty, the underlying TLS library's defaults will be used.
+  std::string tls_ca_file_path;
+
+  /// Path to a directory holding TLS CA certificates in individual PEM files
+  /// named along the OpenSSL "hashed" format.
+  ///
+  /// If empty, the underlying TLS library's defaults will be used.
+  std::string tls_ca_dir_path;
+};
+
+/// Experimental: optional global initialization routine
+///
+/// This is for environments (such as manylinux) where the path
+/// to TLS CA certificates needs to be configured at runtime.
+ARROW_EXPORT
+Status Initialize(const FileSystemGlobalOptions& options);
+
+}  // namespace fs
+}  // namespace arrow

+ 448 - 0
contrib/libs/apache/arrow/cpp/src/arrow/filesystem/localfs.cc

@@ -0,0 +1,448 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include <chrono>
+#include <cstring>
+#include <sstream>
+#include <utility>
+
+#ifdef _WIN32
+#include "arrow/util/windows_compatibility.h"
+#else
+#include <errno.h>
+#include <fcntl.h>
+#include <stdio.h>
+#include <sys/stat.h>
+#endif
+
+#include "arrow/filesystem/localfs.h"
+#include "arrow/filesystem/path_util.h"
+#include "arrow/filesystem/util_internal.h"
+#include "arrow/io/file.h"
+#include "arrow/util/io_util.h"
+#include "arrow/util/logging.h"
+#include "arrow/util/uri.h"
+#include "arrow/util/windows_fixup.h"
+
+namespace arrow {
+namespace fs {
+
+using ::arrow::internal::IOErrorFromErrno;
+#ifdef _WIN32
+using ::arrow::internal::IOErrorFromWinError;
+#endif
+using ::arrow::internal::NativePathString;
+using ::arrow::internal::PlatformFilename;
+
+namespace internal {
+
+#ifdef _WIN32
+static bool IsDriveLetter(char c) {
+  // Can't use locale-dependent functions from the C/C++ stdlib
+  return (c >= 'A' && c <= 'Z') || (c >= 'a' && c <= 'z');
+}
+#endif
+
+bool DetectAbsolutePath(const std::string& s) {
+  // Is it a /-prefixed local path?
+  if (s.length() >= 1 && s[0] == '/') {
+    return true;
+  }
+#ifdef _WIN32
+  // Is it a \-prefixed local path?
+  if (s.length() >= 1 && s[0] == '\\') {
+    return true;
+  }
+  // Does it start with a drive letter in addition to being /- or \-prefixed,
+  // e.g. "C:\..."?
+  if (s.length() >= 3 && s[1] == ':' && (s[2] == '/' || s[2] == '\\') &&
+      IsDriveLetter(s[0])) {
+    return true;
+  }
+#endif
+  return false;
+}
+
+}  // namespace internal
+
+namespace {
+
+#ifdef _WIN32
+
+std::string NativeToString(const NativePathString& ns) {
+  PlatformFilename fn(ns);
+  return fn.ToString();
+}
+
+TimePoint ToTimePoint(FILETIME ft) {
+  // Hundreds of nanoseconds between January 1, 1601 (UTC) and the Unix epoch.
+  static constexpr int64_t kFileTimeEpoch = 11644473600LL * 10000000;
+
+  int64_t hundreds = (static_cast<int64_t>(ft.dwHighDateTime) << 32) + ft.dwLowDateTime -
+                     kFileTimeEpoch;  // hundreds of ns since Unix epoch
+  std::chrono::nanoseconds ns_count(100 * hundreds);
+  return TimePoint(std::chrono::duration_cast<TimePoint::duration>(ns_count));
+}
+
+FileInfo FileInformationToFileInfo(const BY_HANDLE_FILE_INFORMATION& information) {
+  FileInfo info;
+  if (information.dwFileAttributes & FILE_ATTRIBUTE_DIRECTORY) {
+    info.set_type(FileType::Directory);
+    info.set_size(kNoSize);
+  } else {
+    // Regular file
+    info.set_type(FileType::File);
+    info.set_size((static_cast<int64_t>(information.nFileSizeHigh) << 32) +
+                  information.nFileSizeLow);
+  }
+  info.set_mtime(ToTimePoint(information.ftLastWriteTime));
+  return info;
+}
+
+Result<FileInfo> StatFile(const std::wstring& path) {
+  HANDLE h;
+  std::string bytes_path = NativeToString(path);
+  FileInfo info;
+
+  /* Inspired by CPython, see Modules/posixmodule.c */
+  h = CreateFileW(path.c_str(), FILE_READ_ATTRIBUTES, /* desired access */
+                  0,                                  /* share mode */
+                  NULL,                               /* security attributes */
+                  OPEN_EXISTING,
+                  /* FILE_FLAG_BACKUP_SEMANTICS is required to open a directory */
+                  FILE_ATTRIBUTE_NORMAL | FILE_FLAG_BACKUP_SEMANTICS, NULL);
+
+  if (h == INVALID_HANDLE_VALUE) {
+    DWORD err = GetLastError();
+    if (err == ERROR_FILE_NOT_FOUND || err == ERROR_PATH_NOT_FOUND) {
+      info.set_path(bytes_path);
+      info.set_type(FileType::NotFound);
+      info.set_mtime(kNoTime);
+      info.set_size(kNoSize);
+      return info;
+    } else {
+      return IOErrorFromWinError(GetLastError(), "Failed querying information for path '",
+                                 bytes_path, "'");
+    }
+  }
+  BY_HANDLE_FILE_INFORMATION information;
+  if (!GetFileInformationByHandle(h, &information)) {
+    CloseHandle(h);
+    return IOErrorFromWinError(GetLastError(), "Failed querying information for path '",
+                               bytes_path, "'");
+  }
+  CloseHandle(h);
+  info = FileInformationToFileInfo(information);
+  info.set_path(bytes_path);
+  return info;
+}
+
+#else  // POSIX systems
+
+TimePoint ToTimePoint(const struct timespec& s) {
+  std::chrono::nanoseconds ns_count(static_cast<int64_t>(s.tv_sec) * 1000000000 +
+                                    static_cast<int64_t>(s.tv_nsec));
+  return TimePoint(std::chrono::duration_cast<TimePoint::duration>(ns_count));
+}
+
+FileInfo StatToFileInfo(const struct stat& s) {
+  FileInfo info;
+  if (S_ISREG(s.st_mode)) {
+    info.set_type(FileType::File);
+    info.set_size(static_cast<int64_t>(s.st_size));
+  } else if (S_ISDIR(s.st_mode)) {
+    info.set_type(FileType::Directory);
+    info.set_size(kNoSize);
+  } else {
+    info.set_type(FileType::Unknown);
+    info.set_size(kNoSize);
+  }
+#ifdef __APPLE__
+  // macOS doesn't use the POSIX-compliant spelling
+  info.set_mtime(ToTimePoint(s.st_mtimespec));
+#else
+  info.set_mtime(ToTimePoint(s.st_mtim));
+#endif
+  return info;
+}
+
+Result<FileInfo> StatFile(const std::string& path) {
+  FileInfo info;
+  struct stat s;
+  int r = stat(path.c_str(), &s);
+  if (r == -1) {
+    if (errno == ENOENT || errno == ENOTDIR || errno == ELOOP) {
+      info.set_type(FileType::NotFound);
+      info.set_mtime(kNoTime);
+      info.set_size(kNoSize);
+    } else {
+      return IOErrorFromErrno(errno, "Failed stat()ing path '", path, "'");
+    }
+  } else {
+    info = StatToFileInfo(s);
+  }
+  info.set_path(path);
+  return info;
+}
+
+#endif
+
+Status StatSelector(const PlatformFilename& dir_fn, const FileSelector& select,
+                    int32_t nesting_depth, std::vector<FileInfo>* out) {
+  auto result = ListDir(dir_fn);
+  if (!result.ok()) {
+    auto status = result.status();
+    if (select.allow_not_found && status.IsIOError()) {
+      ARROW_ASSIGN_OR_RAISE(bool exists, FileExists(dir_fn));
+      if (!exists) {
+        return Status::OK();
+      }
+    }
+    return status;
+  }
+
+  for (const auto& child_fn : *result) {
+    PlatformFilename full_fn = dir_fn.Join(child_fn);
+    ARROW_ASSIGN_OR_RAISE(FileInfo info, StatFile(full_fn.ToNative()));
+    if (info.type() != FileType::NotFound) {
+      out->push_back(std::move(info));
+    }
+    if (nesting_depth < select.max_recursion && select.recursive &&
+        info.type() == FileType::Directory) {
+      RETURN_NOT_OK(StatSelector(full_fn, select, nesting_depth + 1, out));
+    }
+  }
+  return Status::OK();
+}
+
+}  // namespace
+
+LocalFileSystemOptions LocalFileSystemOptions::Defaults() {
+  return LocalFileSystemOptions();
+}
+
+bool LocalFileSystemOptions::Equals(const LocalFileSystemOptions& other) const {
+  return use_mmap == other.use_mmap;
+}
+
+Result<LocalFileSystemOptions> LocalFileSystemOptions::FromUri(
+    const ::arrow::internal::Uri& uri, std::string* out_path) {
+  if (!uri.username().empty() || !uri.password().empty()) {
+    return Status::Invalid("Unsupported username or password in local URI: '",
+                           uri.ToString(), "'");
+  }
+  std::string path;
+  const auto host = uri.host();
+  if (!host.empty()) {
+#ifdef _WIN32
+    std::stringstream ss;
+    ss << "//" << host << "/" << internal::RemoveLeadingSlash(uri.path());
+    *out_path = ss.str();
+#else
+    return Status::Invalid("Unsupported hostname in non-Windows local URI: '",
+                           uri.ToString(), "'");
+#endif
+  } else {
+    *out_path = uri.path();
+  }
+
+  // TODO handle use_mmap option
+  return LocalFileSystemOptions();
+}
+
+LocalFileSystem::LocalFileSystem(const io::IOContext& io_context)
+    : FileSystem(io_context), options_(LocalFileSystemOptions::Defaults()) {}
+
+LocalFileSystem::LocalFileSystem(const LocalFileSystemOptions& options,
+                                 const io::IOContext& io_context)
+    : FileSystem(io_context), options_(options) {}
+
+LocalFileSystem::~LocalFileSystem() {}
+
+Result<std::string> LocalFileSystem::NormalizePath(std::string path) {
+  ARROW_ASSIGN_OR_RAISE(auto fn, PlatformFilename::FromString(path));
+  return fn.ToString();
+}
+
+bool LocalFileSystem::Equals(const FileSystem& other) const {
+  if (other.type_name() != type_name()) {
+    return false;
+  } else {
+    const auto& localfs = ::arrow::internal::checked_cast<const LocalFileSystem&>(other);
+    return options_.Equals(localfs.options());
+  }
+}
+
+Result<FileInfo> LocalFileSystem::GetFileInfo(const std::string& path) {
+  ARROW_ASSIGN_OR_RAISE(auto fn, PlatformFilename::FromString(path));
+  return StatFile(fn.ToNative());
+}
+
+Result<std::vector<FileInfo>> LocalFileSystem::GetFileInfo(const FileSelector& select) {
+  ARROW_ASSIGN_OR_RAISE(auto fn, PlatformFilename::FromString(select.base_dir));
+  std::vector<FileInfo> results;
+  RETURN_NOT_OK(StatSelector(fn, select, 0, &results));
+  return results;
+}
+
+Status LocalFileSystem::CreateDir(const std::string& path, bool recursive) {
+  ARROW_ASSIGN_OR_RAISE(auto fn, PlatformFilename::FromString(path));
+  if (recursive) {
+    return ::arrow::internal::CreateDirTree(fn).status();
+  } else {
+    return ::arrow::internal::CreateDir(fn).status();
+  }
+}
+
+Status LocalFileSystem::DeleteDir(const std::string& path) {
+  ARROW_ASSIGN_OR_RAISE(auto fn, PlatformFilename::FromString(path));
+  auto st = ::arrow::internal::DeleteDirTree(fn, /*allow_not_found=*/false).status();
+  if (!st.ok()) {
+    // TODO Status::WithPrefix()?
+    std::stringstream ss;
+    ss << "Cannot delete directory '" << path << "': " << st.message();
+    return st.WithMessage(ss.str());
+  }
+  return Status::OK();
+}
+
+Status LocalFileSystem::DeleteDirContents(const std::string& path) {
+  if (internal::IsEmptyPath(path)) {
+    return internal::InvalidDeleteDirContents(path);
+  }
+  ARROW_ASSIGN_OR_RAISE(auto fn, PlatformFilename::FromString(path));
+  auto st = ::arrow::internal::DeleteDirContents(fn, /*allow_not_found=*/false).status();
+  if (!st.ok()) {
+    std::stringstream ss;
+    ss << "Cannot delete directory contents in '" << path << "': " << st.message();
+    return st.WithMessage(ss.str());
+  }
+  return Status::OK();
+}
+
+Status LocalFileSystem::DeleteRootDirContents() {
+  return Status::Invalid("LocalFileSystem::DeleteRootDirContents is strictly forbidden");
+}
+
+Status LocalFileSystem::DeleteFile(const std::string& path) {
+  ARROW_ASSIGN_OR_RAISE(auto fn, PlatformFilename::FromString(path));
+  return ::arrow::internal::DeleteFile(fn, /*allow_not_found=*/false).status();
+}
+
+Status LocalFileSystem::Move(const std::string& src, const std::string& dest) {
+  ARROW_ASSIGN_OR_RAISE(auto sfn, PlatformFilename::FromString(src));
+  ARROW_ASSIGN_OR_RAISE(auto dfn, PlatformFilename::FromString(dest));
+
+#ifdef _WIN32
+  if (!MoveFileExW(sfn.ToNative().c_str(), dfn.ToNative().c_str(),
+                   MOVEFILE_REPLACE_EXISTING)) {
+    return IOErrorFromWinError(GetLastError(), "Failed renaming '", sfn.ToString(),
+                               "' to '", dfn.ToString(), "'");
+  }
+#else
+  if (rename(sfn.ToNative().c_str(), dfn.ToNative().c_str()) == -1) {
+    return IOErrorFromErrno(errno, "Failed renaming '", sfn.ToString(), "' to '",
+                            dfn.ToString(), "'");
+  }
+#endif
+  return Status::OK();
+}
+
+Status LocalFileSystem::CopyFile(const std::string& src, const std::string& dest) {
+  ARROW_ASSIGN_OR_RAISE(auto sfn, PlatformFilename::FromString(src));
+  ARROW_ASSIGN_OR_RAISE(auto dfn, PlatformFilename::FromString(dest));
+  // XXX should we use fstat() to compare inodes?
+  if (sfn.ToNative() == dfn.ToNative()) {
+    return Status::OK();
+  }
+
+#ifdef _WIN32
+  if (!CopyFileW(sfn.ToNative().c_str(), dfn.ToNative().c_str(),
+                 FALSE /* bFailIfExists */)) {
+    return IOErrorFromWinError(GetLastError(), "Failed copying '", sfn.ToString(),
+                               "' to '", dfn.ToString(), "'");
+  }
+  return Status::OK();
+#else
+  ARROW_ASSIGN_OR_RAISE(auto is, OpenInputStream(src));
+  ARROW_ASSIGN_OR_RAISE(auto os, OpenOutputStream(dest));
+  RETURN_NOT_OK(internal::CopyStream(is, os, 1024 * 1024 /* chunk_size */, io_context()));
+  RETURN_NOT_OK(os->Close());
+  return is->Close();
+#endif
+}
+
+namespace {
+
+template <typename InputStreamType>
+Result<std::shared_ptr<InputStreamType>> OpenInputStreamGeneric(
+    const std::string& path, const LocalFileSystemOptions& options,
+    const io::IOContext& io_context) {
+  if (options.use_mmap) {
+    return io::MemoryMappedFile::Open(path, io::FileMode::READ);
+  } else {
+    return io::ReadableFile::Open(path, io_context.pool());
+  }
+}
+
+}  // namespace
+
+Result<std::shared_ptr<io::InputStream>> LocalFileSystem::OpenInputStream(
+    const std::string& path) {
+  return OpenInputStreamGeneric<io::InputStream>(path, options_, io_context());
+}
+
+Result<std::shared_ptr<io::RandomAccessFile>> LocalFileSystem::OpenInputFile(
+    const std::string& path) {
+  return OpenInputStreamGeneric<io::RandomAccessFile>(path, options_, io_context());
+}
+
+namespace {
+
+Result<std::shared_ptr<io::OutputStream>> OpenOutputStreamGeneric(const std::string& path,
+                                                                  bool truncate,
+                                                                  bool append) {
+  int fd;
+  bool write_only = true;
+  ARROW_ASSIGN_OR_RAISE(auto fn, PlatformFilename::FromString(path));
+  ARROW_ASSIGN_OR_RAISE(
+      fd, ::arrow::internal::FileOpenWritable(fn, write_only, truncate, append));
+  auto maybe_stream = io::FileOutputStream::Open(fd);
+  if (!maybe_stream.ok()) {
+    ARROW_UNUSED(::arrow::internal::FileClose(fd));
+  }
+  return maybe_stream;
+}
+
+}  // namespace
+
+Result<std::shared_ptr<io::OutputStream>> LocalFileSystem::OpenOutputStream(
+    const std::string& path, const std::shared_ptr<const KeyValueMetadata>& metadata) {
+  bool truncate = true;
+  bool append = false;
+  return OpenOutputStreamGeneric(path, truncate, append);
+}
+
+Result<std::shared_ptr<io::OutputStream>> LocalFileSystem::OpenAppendStream(
+    const std::string& path, const std::shared_ptr<const KeyValueMetadata>& metadata) {
+  bool truncate = false;
+  bool append = true;
+  return OpenOutputStreamGeneric(path, truncate, append);
+}
+
+}  // namespace fs
+}  // namespace arrow

+ 113 - 0
contrib/libs/apache/arrow/cpp/src/arrow/filesystem/localfs.h

@@ -0,0 +1,113 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#pragma once
+
+#include <memory>
+#include <string>
+#include <vector>
+
+#include "arrow/filesystem/filesystem.h"
+
+namespace arrow {
+namespace internal {
+
+class Uri;
+
+}
+
+namespace fs {
+
+/// Options for the LocalFileSystem implementation.
+struct ARROW_EXPORT LocalFileSystemOptions {
+  /// Whether OpenInputStream and OpenInputFile return a mmap'ed file,
+  /// or a regular one.
+  bool use_mmap = false;
+
+  /// \brief Initialize with defaults
+  static LocalFileSystemOptions Defaults();
+
+  bool Equals(const LocalFileSystemOptions& other) const;
+
+  static Result<LocalFileSystemOptions> FromUri(const ::arrow::internal::Uri& uri,
+                                                std::string* out_path);
+};
+
+/// \brief A FileSystem implementation accessing files on the local machine.
+///
+/// This class handles only `/`-separated paths.  If desired, conversion
+/// from Windows backslash-separated paths should be done by the caller.
+/// Details such as symlinks are abstracted away (symlinks are always
+/// followed, except when deleting an entry).
+class ARROW_EXPORT LocalFileSystem : public FileSystem {
+ public:
+  explicit LocalFileSystem(const io::IOContext& = io::default_io_context());
+  explicit LocalFileSystem(const LocalFileSystemOptions&,
+                           const io::IOContext& = io::default_io_context());
+  ~LocalFileSystem() override;
+
+  std::string type_name() const override { return "local"; }
+
+  Result<std::string> NormalizePath(std::string path) override;
+
+  bool Equals(const FileSystem& other) const override;
+
+  LocalFileSystemOptions options() const { return options_; }
+
+  /// \cond FALSE
+  using FileSystem::GetFileInfo;
+  /// \endcond
+  Result<FileInfo> GetFileInfo(const std::string& path) override;
+  Result<std::vector<FileInfo>> GetFileInfo(const FileSelector& select) override;
+
+  Status CreateDir(const std::string& path, bool recursive = true) override;
+
+  Status DeleteDir(const std::string& path) override;
+  Status DeleteDirContents(const std::string& path) override;
+  Status DeleteRootDirContents() override;
+
+  Status DeleteFile(const std::string& path) override;
+
+  Status Move(const std::string& src, const std::string& dest) override;
+
+  Status CopyFile(const std::string& src, const std::string& dest) override;
+
+  Result<std::shared_ptr<io::InputStream>> OpenInputStream(
+      const std::string& path) override;
+  Result<std::shared_ptr<io::RandomAccessFile>> OpenInputFile(
+      const std::string& path) override;
+  Result<std::shared_ptr<io::OutputStream>> OpenOutputStream(
+      const std::string& path,
+      const std::shared_ptr<const KeyValueMetadata>& metadata = {}) override;
+  Result<std::shared_ptr<io::OutputStream>> OpenAppendStream(
+      const std::string& path,
+      const std::shared_ptr<const KeyValueMetadata>& metadata = {}) override;
+
+ protected:
+  LocalFileSystemOptions options_;
+};
+
+namespace internal {
+
+// Return whether the string is detected as a local absolute path.
+ARROW_EXPORT
+bool DetectAbsolutePath(const std::string& s);
+
+}  // namespace internal
+
+}  // namespace fs
+}  // namespace arrow

+ 780 - 0
contrib/libs/apache/arrow/cpp/src/arrow/filesystem/mockfs.cc

@@ -0,0 +1,780 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include <algorithm>
+#include <iterator>
+#include <map>
+#include <mutex>
+#include <sstream>
+#include <string>
+#include <utility>
+#include <vector>
+
+#include "arrow/buffer.h"
+#include "arrow/buffer_builder.h"
+#include "arrow/filesystem/mockfs.h"
+#include "arrow/filesystem/path_util.h"
+#include "arrow/filesystem/util_internal.h"
+#include "arrow/io/interfaces.h"
+#include "arrow/io/memory.h"
+#include "arrow/util/async_generator.h"
+#include "arrow/util/future.h"
+#include "arrow/util/logging.h"
+#include "arrow/util/string_view.h"
+#include "arrow/util/variant.h"
+#include "arrow/util/windows_fixup.h"
+
+namespace arrow {
+namespace fs {
+namespace internal {
+
+namespace {
+
+////////////////////////////////////////////////////////////////////////////
+// Filesystem structure
+
+class Entry;
+
+struct File {
+  TimePoint mtime;
+  std::string name;
+  std::shared_ptr<Buffer> data;
+  std::shared_ptr<const KeyValueMetadata> metadata;
+
+  File(TimePoint mtime, std::string name) : mtime(mtime), name(std::move(name)) {}
+
+  int64_t size() const { return data ? data->size() : 0; }
+
+  explicit operator util::string_view() const {
+    if (data) {
+      return util::string_view(*data);
+    } else {
+      return "";
+    }
+  }
+};
+
+struct Directory {
+  std::string name;
+  TimePoint mtime;
+  std::map<std::string, std::unique_ptr<Entry>> entries;
+
+  Directory(std::string name, TimePoint mtime) : name(std::move(name)), mtime(mtime) {}
+  Directory(Directory&& other) noexcept
+      : name(std::move(other.name)),
+        mtime(other.mtime),
+        entries(std::move(other.entries)) {}
+
+  Directory& operator=(Directory&& other) noexcept {
+    name = std::move(other.name);
+    mtime = other.mtime;
+    entries = std::move(other.entries);
+    return *this;
+  }
+
+  Entry* Find(const std::string& s) {
+    auto it = entries.find(s);
+    if (it != entries.end()) {
+      return it->second.get();
+    } else {
+      return nullptr;
+    }
+  }
+
+  bool CreateEntry(const std::string& s, std::unique_ptr<Entry> entry) {
+    DCHECK(!s.empty());
+    auto p = entries.emplace(s, std::move(entry));
+    return p.second;
+  }
+
+  void AssignEntry(const std::string& s, std::unique_ptr<Entry> entry) {
+    DCHECK(!s.empty());
+    entries[s] = std::move(entry);
+  }
+
+  bool DeleteEntry(const std::string& s) { return entries.erase(s) > 0; }
+
+ private:
+  ARROW_DISALLOW_COPY_AND_ASSIGN(Directory);
+};
+
+// A filesystem entry
+using EntryBase = util::Variant<std::nullptr_t, File, Directory>;
+
+class Entry : public EntryBase {
+ public:
+  Entry(Entry&&) = default;
+  Entry& operator=(Entry&&) = default;
+  explicit Entry(Directory&& v) : EntryBase(std::move(v)) {}
+  explicit Entry(File&& v) : EntryBase(std::move(v)) {}
+
+  bool is_dir() const { return util::holds_alternative<Directory>(*this); }
+
+  bool is_file() const { return util::holds_alternative<File>(*this); }
+
+  Directory& as_dir() { return util::get<Directory>(*this); }
+
+  File& as_file() { return util::get<File>(*this); }
+
+  // Get info for this entry.  Note the path() property isn't set.
+  FileInfo GetInfo() {
+    FileInfo info;
+    if (is_dir()) {
+      Directory& dir = as_dir();
+      info.set_type(FileType::Directory);
+      info.set_mtime(dir.mtime);
+    } else {
+      DCHECK(is_file());
+      File& file = as_file();
+      info.set_type(FileType::File);
+      info.set_mtime(file.mtime);
+      info.set_size(file.size());
+    }
+    return info;
+  }
+
+  // Get info for this entry, knowing the parent path.
+  FileInfo GetInfo(const std::string& base_path) {
+    FileInfo info;
+    if (is_dir()) {
+      Directory& dir = as_dir();
+      info.set_type(FileType::Directory);
+      info.set_mtime(dir.mtime);
+      info.set_path(ConcatAbstractPath(base_path, dir.name));
+    } else {
+      DCHECK(is_file());
+      File& file = as_file();
+      info.set_type(FileType::File);
+      info.set_mtime(file.mtime);
+      info.set_size(file.size());
+      info.set_path(ConcatAbstractPath(base_path, file.name));
+    }
+    return info;
+  }
+
+  // Set the entry name
+  void SetName(const std::string& name) {
+    if (is_dir()) {
+      as_dir().name = name;
+    } else {
+      DCHECK(is_file());
+      as_file().name = name;
+    }
+  }
+
+ private:
+  ARROW_DISALLOW_COPY_AND_ASSIGN(Entry);
+};
+
+////////////////////////////////////////////////////////////////////////////
+// Streams
+
+class MockFSOutputStream : public io::OutputStream {
+ public:
+  MockFSOutputStream(File* file, MemoryPool* pool)
+      : file_(file), builder_(pool), closed_(false) {}
+
+  ~MockFSOutputStream() override = default;
+
+  // Implement the OutputStream interface
+  Status Close() override {
+    if (!closed_) {
+      RETURN_NOT_OK(builder_.Finish(&file_->data));
+      closed_ = true;
+    }
+    return Status::OK();
+  }
+
+  Status Abort() override {
+    if (!closed_) {
+      // MockFSOutputStream is mainly used for debugging and testing, so
+      // mark an aborted file's contents explicitly.
+      std::stringstream ss;
+      ss << "MockFSOutputStream aborted after " << file_->size() << " bytes written";
+      file_->data = Buffer::FromString(ss.str());
+      closed_ = true;
+    }
+    return Status::OK();
+  }
+
+  bool closed() const override { return closed_; }
+
+  Result<int64_t> Tell() const override {
+    if (closed_) {
+      return Status::Invalid("Invalid operation on closed stream");
+    }
+    return builder_.length();
+  }
+
+  Status Write(const void* data, int64_t nbytes) override {
+    if (closed_) {
+      return Status::Invalid("Invalid operation on closed stream");
+    }
+    return builder_.Append(data, nbytes);
+  }
+
+ protected:
+  File* file_;
+  BufferBuilder builder_;
+  bool closed_;
+};
+
+class MockFSInputStream : public io::BufferReader {
+ public:
+  explicit MockFSInputStream(const File& file)
+      : io::BufferReader(file.data), metadata_(file.metadata) {}
+
+  Result<std::shared_ptr<const KeyValueMetadata>> ReadMetadata() override {
+    return metadata_;
+  }
+
+ protected:
+  std::shared_ptr<const KeyValueMetadata> metadata_;
+};
+
+}  // namespace
+
+std::ostream& operator<<(std::ostream& os, const MockDirInfo& di) {
+  return os << "'" << di.full_path << "' [mtime=" << di.mtime.time_since_epoch().count()
+            << "]";
+}
+
+std::ostream& operator<<(std::ostream& os, const MockFileInfo& di) {
+  return os << "'" << di.full_path << "' [mtime=" << di.mtime.time_since_epoch().count()
+            << ", size=" << di.data.length() << "]";
+}
+
+////////////////////////////////////////////////////////////////////////////
+// MockFileSystem implementation
+
+class MockFileSystem::Impl {
+ public:
+  TimePoint current_time;
+  MemoryPool* pool;
+
+  // The root directory
+  Entry root;
+  std::mutex mutex;
+
+  Impl(TimePoint current_time, MemoryPool* pool)
+      : current_time(current_time), pool(pool), root(Directory("", current_time)) {}
+
+  std::unique_lock<std::mutex> lock_guard() {
+    return std::unique_lock<std::mutex>(mutex);
+  }
+
+  Directory& RootDir() { return root.as_dir(); }
+
+  template <typename It>
+  Entry* FindEntry(It it, It end, size_t* nconsumed) {
+    size_t consumed = 0;
+    Entry* entry = &root;
+
+    for (; it != end; ++it) {
+      const std::string& part = *it;
+      DCHECK(entry->is_dir());
+      Entry* child = entry->as_dir().Find(part);
+      if (child == nullptr) {
+        // Partial find only
+        break;
+      }
+      ++consumed;
+      entry = child;
+      if (entry->is_file()) {
+        // Cannot go any further
+        break;
+      }
+      // Recurse
+    }
+    *nconsumed = consumed;
+    return entry;
+  }
+
+  // Find an entry, allowing partial matching
+  Entry* FindEntry(const std::vector<std::string>& parts, size_t* nconsumed) {
+    return FindEntry(parts.begin(), parts.end(), nconsumed);
+  }
+
+  // Find an entry, only full matching allowed
+  Entry* FindEntry(const std::vector<std::string>& parts) {
+    size_t consumed;
+    auto entry = FindEntry(parts, &consumed);
+    return (consumed == parts.size()) ? entry : nullptr;
+  }
+
+  // Find the parent entry, only full matching allowed
+  Entry* FindParent(const std::vector<std::string>& parts) {
+    if (parts.size() == 0) {
+      return nullptr;
+    }
+    size_t consumed;
+    auto last = parts.end();
+    last--;
+    auto entry = FindEntry(parts.begin(), last, &consumed);
+    return (consumed == parts.size() - 1) ? entry : nullptr;
+  }
+
+  void GatherInfos(const FileSelector& select, const std::string& base_path,
+                   const Directory& base_dir, int32_t nesting_depth,
+                   std::vector<FileInfo>* infos) {
+    for (const auto& pair : base_dir.entries) {
+      Entry* child = pair.second.get();
+      infos->push_back(child->GetInfo(base_path));
+      if (select.recursive && nesting_depth < select.max_recursion && child->is_dir()) {
+        Directory& child_dir = child->as_dir();
+        std::string child_path = infos->back().path();
+        GatherInfos(select, std::move(child_path), child_dir, nesting_depth + 1, infos);
+      }
+    }
+  }
+
+  void DumpDirs(const std::string& prefix, const Directory& dir,
+                std::vector<MockDirInfo>* out) {
+    std::string path = prefix + dir.name;
+    if (!path.empty()) {
+      out->push_back({path, dir.mtime});
+      path += "/";
+    }
+    for (const auto& pair : dir.entries) {
+      Entry* child = pair.second.get();
+      if (child->is_dir()) {
+        DumpDirs(path, child->as_dir(), out);
+      }
+    }
+  }
+
+  void DumpFiles(const std::string& prefix, const Directory& dir,
+                 std::vector<MockFileInfo>* out) {
+    std::string path = prefix + dir.name;
+    if (!path.empty()) {
+      path += "/";
+    }
+    for (const auto& pair : dir.entries) {
+      Entry* child = pair.second.get();
+      if (child->is_file()) {
+        auto& file = child->as_file();
+        out->push_back({path + file.name, file.mtime, util::string_view(file)});
+      } else if (child->is_dir()) {
+        DumpFiles(path, child->as_dir(), out);
+      }
+    }
+  }
+
+  Result<std::shared_ptr<io::OutputStream>> OpenOutputStream(
+      const std::string& path, bool append,
+      const std::shared_ptr<const KeyValueMetadata>& metadata) {
+    auto parts = SplitAbstractPath(path);
+    RETURN_NOT_OK(ValidateAbstractPathParts(parts));
+
+    Entry* parent = FindParent(parts);
+    if (parent == nullptr || !parent->is_dir()) {
+      return PathNotFound(path);
+    }
+    // Find the file in the parent dir, or create it
+    const auto& name = parts.back();
+    Entry* child = parent->as_dir().Find(name);
+    File* file;
+    if (child == nullptr) {
+      child = new Entry(File(current_time, name));
+      parent->as_dir().AssignEntry(name, std::unique_ptr<Entry>(child));
+      file = &child->as_file();
+    } else if (child->is_file()) {
+      file = &child->as_file();
+      file->mtime = current_time;
+    } else {
+      return NotAFile(path);
+    }
+    file->metadata = metadata;
+    auto ptr = std::make_shared<MockFSOutputStream>(file, pool);
+    if (append && file->data) {
+      RETURN_NOT_OK(ptr->Write(file->data->data(), file->data->size()));
+    }
+    return ptr;
+  }
+
+  Result<std::shared_ptr<io::BufferReader>> OpenInputReader(const std::string& path) {
+    auto parts = SplitAbstractPath(path);
+    RETURN_NOT_OK(ValidateAbstractPathParts(parts));
+
+    Entry* entry = FindEntry(parts);
+    if (entry == nullptr) {
+      return PathNotFound(path);
+    }
+    if (!entry->is_file()) {
+      return NotAFile(path);
+    }
+    return std::make_shared<MockFSInputStream>(entry->as_file());
+  }
+};
+
+MockFileSystem::~MockFileSystem() = default;
+
+MockFileSystem::MockFileSystem(TimePoint current_time, const io::IOContext& io_context) {
+  impl_ = std::unique_ptr<Impl>(new Impl(current_time, io_context.pool()));
+}
+
+bool MockFileSystem::Equals(const FileSystem& other) const { return this == &other; }
+
+Status MockFileSystem::CreateDir(const std::string& path, bool recursive) {
+  auto parts = SplitAbstractPath(path);
+  RETURN_NOT_OK(ValidateAbstractPathParts(parts));
+
+  auto guard = impl_->lock_guard();
+
+  size_t consumed;
+  Entry* entry = impl_->FindEntry(parts, &consumed);
+  if (!entry->is_dir()) {
+    auto file_path = JoinAbstractPath(parts.begin(), parts.begin() + consumed);
+    return Status::IOError("Cannot create directory '", path, "': ", "ancestor '",
+                           file_path, "' is not a directory");
+  }
+  if (!recursive && (parts.size() - consumed) > 1) {
+    return Status::IOError("Cannot create directory '", path,
+                           "': ", "parent does not exist");
+  }
+  for (size_t i = consumed; i < parts.size(); ++i) {
+    const auto& name = parts[i];
+    std::unique_ptr<Entry> child(new Entry(Directory(name, impl_->current_time)));
+    Entry* child_ptr = child.get();
+    bool inserted = entry->as_dir().CreateEntry(name, std::move(child));
+    // No race condition on insertion is possible, as all operations are locked
+    DCHECK(inserted);
+    entry = child_ptr;
+  }
+  return Status::OK();
+}
+
+Status MockFileSystem::DeleteDir(const std::string& path) {
+  auto parts = SplitAbstractPath(path);
+  RETURN_NOT_OK(ValidateAbstractPathParts(parts));
+
+  auto guard = impl_->lock_guard();
+
+  Entry* parent = impl_->FindParent(parts);
+  if (parent == nullptr || !parent->is_dir()) {
+    return PathNotFound(path);
+  }
+  Directory& parent_dir = parent->as_dir();
+  auto child = parent_dir.Find(parts.back());
+  if (child == nullptr) {
+    return PathNotFound(path);
+  }
+  if (!child->is_dir()) {
+    return NotADir(path);
+  }
+
+  bool deleted = parent_dir.DeleteEntry(parts.back());
+  DCHECK(deleted);
+  return Status::OK();
+}
+
+Status MockFileSystem::DeleteDirContents(const std::string& path) {
+  auto parts = SplitAbstractPath(path);
+  RETURN_NOT_OK(ValidateAbstractPathParts(parts));
+
+  auto guard = impl_->lock_guard();
+
+  if (parts.empty()) {
+    // Wipe filesystem
+    return internal::InvalidDeleteDirContents(path);
+  }
+
+  Entry* entry = impl_->FindEntry(parts);
+  if (entry == nullptr) {
+    return PathNotFound(path);
+  }
+  if (!entry->is_dir()) {
+    return NotADir(path);
+  }
+  entry->as_dir().entries.clear();
+  return Status::OK();
+}
+
+Status MockFileSystem::DeleteRootDirContents() {
+  auto guard = impl_->lock_guard();
+
+  impl_->RootDir().entries.clear();
+  return Status::OK();
+}
+
+Status MockFileSystem::DeleteFile(const std::string& path) {
+  auto parts = SplitAbstractPath(path);
+  RETURN_NOT_OK(ValidateAbstractPathParts(parts));
+
+  auto guard = impl_->lock_guard();
+
+  Entry* parent = impl_->FindParent(parts);
+  if (parent == nullptr || !parent->is_dir()) {
+    return PathNotFound(path);
+  }
+  Directory& parent_dir = parent->as_dir();
+  auto child = parent_dir.Find(parts.back());
+  if (child == nullptr) {
+    return PathNotFound(path);
+  }
+  if (!child->is_file()) {
+    return NotAFile(path);
+  }
+  bool deleted = parent_dir.DeleteEntry(parts.back());
+  DCHECK(deleted);
+  return Status::OK();
+}
+
+Result<FileInfo> MockFileSystem::GetFileInfo(const std::string& path) {
+  auto parts = SplitAbstractPath(path);
+  RETURN_NOT_OK(ValidateAbstractPathParts(parts));
+
+  auto guard = impl_->lock_guard();
+
+  FileInfo info;
+  Entry* entry = impl_->FindEntry(parts);
+  if (entry == nullptr) {
+    info.set_type(FileType::NotFound);
+  } else {
+    info = entry->GetInfo();
+  }
+  info.set_path(path);
+  return info;
+}
+
+Result<FileInfoVector> MockFileSystem::GetFileInfo(const FileSelector& selector) {
+  auto parts = SplitAbstractPath(selector.base_dir);
+  RETURN_NOT_OK(ValidateAbstractPathParts(parts));
+
+  auto guard = impl_->lock_guard();
+
+  FileInfoVector results;
+
+  Entry* base_dir = impl_->FindEntry(parts);
+  if (base_dir == nullptr) {
+    // Base directory does not exist
+    if (selector.allow_not_found) {
+      return results;
+    } else {
+      return PathNotFound(selector.base_dir);
+    }
+  }
+  if (!base_dir->is_dir()) {
+    return NotADir(selector.base_dir);
+  }
+
+  impl_->GatherInfos(selector, selector.base_dir, base_dir->as_dir(), 0, &results);
+  return results;
+}
+
+namespace {
+
+// Helper for binary operations (move, copy)
+struct BinaryOp {
+  std::vector<std::string> src_parts;
+  std::vector<std::string> dest_parts;
+  Directory& src_dir;
+  Directory& dest_dir;
+  std::string src_name;
+  std::string dest_name;
+  Entry* src_entry;
+  Entry* dest_entry;
+
+  template <typename OpFunc>
+  static Status Run(MockFileSystem::Impl* impl, const std::string& src,
+                    const std::string& dest, OpFunc&& op_func) {
+    auto src_parts = SplitAbstractPath(src);
+    auto dest_parts = SplitAbstractPath(dest);
+    RETURN_NOT_OK(ValidateAbstractPathParts(src_parts));
+    RETURN_NOT_OK(ValidateAbstractPathParts(dest_parts));
+
+    auto guard = impl->lock_guard();
+
+    // Both source and destination must have valid parents
+    Entry* src_parent = impl->FindParent(src_parts);
+    if (src_parent == nullptr || !src_parent->is_dir()) {
+      return PathNotFound(src);
+    }
+    Entry* dest_parent = impl->FindParent(dest_parts);
+    if (dest_parent == nullptr || !dest_parent->is_dir()) {
+      return PathNotFound(dest);
+    }
+    Directory& src_dir = src_parent->as_dir();
+    Directory& dest_dir = dest_parent->as_dir();
+    DCHECK_GE(src_parts.size(), 1);
+    DCHECK_GE(dest_parts.size(), 1);
+    const auto& src_name = src_parts.back();
+    const auto& dest_name = dest_parts.back();
+
+    BinaryOp op{std::move(src_parts),
+                std::move(dest_parts),
+                src_dir,
+                dest_dir,
+                src_name,
+                dest_name,
+                src_dir.Find(src_name),
+                dest_dir.Find(dest_name)};
+
+    return op_func(std::move(op));
+  }
+};
+
+}  // namespace
+
+Status MockFileSystem::Move(const std::string& src, const std::string& dest) {
+  return BinaryOp::Run(impl_.get(), src, dest, [&](const BinaryOp& op) -> Status {
+    if (op.src_entry == nullptr) {
+      return PathNotFound(src);
+    }
+    if (op.dest_entry != nullptr) {
+      if (op.dest_entry->is_dir()) {
+        return Status::IOError("Cannot replace destination '", dest,
+                               "', which is a directory");
+      }
+      if (op.dest_entry->is_file() && op.src_entry->is_dir()) {
+        return Status::IOError("Cannot replace destination '", dest,
+                               "', which is a file, with directory '", src, "'");
+      }
+    }
+    if (op.src_parts.size() < op.dest_parts.size()) {
+      // Check if dest is a child of src
+      auto p =
+          std::mismatch(op.src_parts.begin(), op.src_parts.end(), op.dest_parts.begin());
+      if (p.first == op.src_parts.end()) {
+        return Status::IOError("Cannot move '", src, "' into child path '", dest, "'");
+      }
+    }
+
+    // Move original entry, fix its name
+    std::unique_ptr<Entry> new_entry(new Entry(std::move(*op.src_entry)));
+    new_entry->SetName(op.dest_name);
+    bool deleted = op.src_dir.DeleteEntry(op.src_name);
+    DCHECK(deleted);
+    op.dest_dir.AssignEntry(op.dest_name, std::move(new_entry));
+    return Status::OK();
+  });
+}
+
+Status MockFileSystem::CopyFile(const std::string& src, const std::string& dest) {
+  return BinaryOp::Run(impl_.get(), src, dest, [&](const BinaryOp& op) -> Status {
+    if (op.src_entry == nullptr) {
+      return PathNotFound(src);
+    }
+    if (!op.src_entry->is_file()) {
+      return NotAFile(src);
+    }
+    if (op.dest_entry != nullptr && op.dest_entry->is_dir()) {
+      return Status::IOError("Cannot replace destination '", dest,
+                             "', which is a directory");
+    }
+
+    // Copy original entry, fix its name
+    std::unique_ptr<Entry> new_entry(new Entry(File(op.src_entry->as_file())));
+    new_entry->SetName(op.dest_name);
+    op.dest_dir.AssignEntry(op.dest_name, std::move(new_entry));
+    return Status::OK();
+  });
+}
+
+Result<std::shared_ptr<io::InputStream>> MockFileSystem::OpenInputStream(
+    const std::string& path) {
+  auto guard = impl_->lock_guard();
+
+  return impl_->OpenInputReader(path);
+}
+
+Result<std::shared_ptr<io::RandomAccessFile>> MockFileSystem::OpenInputFile(
+    const std::string& path) {
+  auto guard = impl_->lock_guard();
+
+  return impl_->OpenInputReader(path);
+}
+
+Result<std::shared_ptr<io::OutputStream>> MockFileSystem::OpenOutputStream(
+    const std::string& path, const std::shared_ptr<const KeyValueMetadata>& metadata) {
+  auto guard = impl_->lock_guard();
+
+  return impl_->OpenOutputStream(path, /*append=*/false, metadata);
+}
+
+Result<std::shared_ptr<io::OutputStream>> MockFileSystem::OpenAppendStream(
+    const std::string& path, const std::shared_ptr<const KeyValueMetadata>& metadata) {
+  auto guard = impl_->lock_guard();
+
+  return impl_->OpenOutputStream(path, /*append=*/true, metadata);
+}
+
+std::vector<MockDirInfo> MockFileSystem::AllDirs() {
+  auto guard = impl_->lock_guard();
+
+  std::vector<MockDirInfo> result;
+  impl_->DumpDirs("", impl_->RootDir(), &result);
+  return result;
+}
+
+std::vector<MockFileInfo> MockFileSystem::AllFiles() {
+  auto guard = impl_->lock_guard();
+
+  std::vector<MockFileInfo> result;
+  impl_->DumpFiles("", impl_->RootDir(), &result);
+  return result;
+}
+
+Status MockFileSystem::CreateFile(const std::string& path, util::string_view contents,
+                                  bool recursive) {
+  auto parent = fs::internal::GetAbstractPathParent(path).first;
+
+  if (parent != "") {
+    RETURN_NOT_OK(CreateDir(parent, recursive));
+  }
+
+  ARROW_ASSIGN_OR_RAISE(auto file, OpenOutputStream(path));
+  RETURN_NOT_OK(file->Write(contents));
+  return file->Close();
+}
+
+Result<std::shared_ptr<FileSystem>> MockFileSystem::Make(
+    TimePoint current_time, const std::vector<FileInfo>& infos) {
+  auto fs = std::make_shared<MockFileSystem>(current_time);
+  for (const auto& info : infos) {
+    switch (info.type()) {
+      case FileType::Directory:
+        RETURN_NOT_OK(fs->CreateDir(info.path(), /*recursive*/ true));
+        break;
+      case FileType::File:
+        RETURN_NOT_OK(fs->CreateFile(info.path(), "", /*recursive*/ true));
+        break;
+      default:
+        break;
+    }
+  }
+
+  return fs;
+}
+
+FileInfoGenerator MockAsyncFileSystem::GetFileInfoGenerator(const FileSelector& select) {
+  auto maybe_infos = GetFileInfo(select);
+  if (maybe_infos.ok()) {
+    // Return the FileInfo entries one by one
+    const auto& infos = *maybe_infos;
+    std::vector<FileInfoVector> chunks(infos.size());
+    std::transform(infos.begin(), infos.end(), chunks.begin(),
+                   [](const FileInfo& info) { return FileInfoVector{info}; });
+    return MakeVectorGenerator(std::move(chunks));
+  } else {
+    return MakeFailingGenerator(maybe_infos);
+  }
+}
+
+}  // namespace internal
+}  // namespace fs
+}  // namespace arrow

+ 132 - 0
contrib/libs/apache/arrow/cpp/src/arrow/filesystem/mockfs.h

@@ -0,0 +1,132 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#pragma once
+
+#include <iosfwd>
+#include <memory>
+#include <string>
+#include <vector>
+
+#include "arrow/filesystem/filesystem.h"
+#include "arrow/util/string_view.h"
+#include "arrow/util/windows_fixup.h"
+
+namespace arrow {
+namespace fs {
+namespace internal {
+
+struct MockDirInfo {
+  std::string full_path;
+  TimePoint mtime;
+
+  bool operator==(const MockDirInfo& other) const {
+    return mtime == other.mtime && full_path == other.full_path;
+  }
+
+  friend ARROW_EXPORT std::ostream& operator<<(std::ostream&, const MockDirInfo&);
+};
+
+struct MockFileInfo {
+  std::string full_path;
+  TimePoint mtime;
+  util::string_view data;
+
+  bool operator==(const MockFileInfo& other) const {
+    return mtime == other.mtime && full_path == other.full_path && data == other.data;
+  }
+
+  friend ARROW_EXPORT std::ostream& operator<<(std::ostream&, const MockFileInfo&);
+};
+
+/// A mock FileSystem implementation that holds its contents in memory.
+///
+/// Useful for validating the FileSystem API, writing conformance suite,
+/// and bootstrapping FileSystem-based APIs.
+class ARROW_EXPORT MockFileSystem : public FileSystem {
+ public:
+  explicit MockFileSystem(TimePoint current_time,
+                          const io::IOContext& = io::default_io_context());
+  ~MockFileSystem() override;
+
+  std::string type_name() const override { return "mock"; }
+
+  bool Equals(const FileSystem& other) const override;
+
+  // XXX It's not very practical to have to explicitly declare inheritance
+  // of default overrides.
+  using FileSystem::GetFileInfo;
+  Result<FileInfo> GetFileInfo(const std::string& path) override;
+  Result<std::vector<FileInfo>> GetFileInfo(const FileSelector& select) override;
+
+  Status CreateDir(const std::string& path, bool recursive = true) override;
+
+  Status DeleteDir(const std::string& path) override;
+  Status DeleteDirContents(const std::string& path) override;
+  Status DeleteRootDirContents() override;
+
+  Status DeleteFile(const std::string& path) override;
+
+  Status Move(const std::string& src, const std::string& dest) override;
+
+  Status CopyFile(const std::string& src, const std::string& dest) override;
+
+  Result<std::shared_ptr<io::InputStream>> OpenInputStream(
+      const std::string& path) override;
+  Result<std::shared_ptr<io::RandomAccessFile>> OpenInputFile(
+      const std::string& path) override;
+  Result<std::shared_ptr<io::OutputStream>> OpenOutputStream(
+      const std::string& path,
+      const std::shared_ptr<const KeyValueMetadata>& metadata = {}) override;
+  Result<std::shared_ptr<io::OutputStream>> OpenAppendStream(
+      const std::string& path,
+      const std::shared_ptr<const KeyValueMetadata>& metadata = {}) override;
+
+  // Contents-dumping helpers to ease testing.
+  // Output is lexicographically-ordered by full path.
+  std::vector<MockDirInfo> AllDirs();
+  std::vector<MockFileInfo> AllFiles();
+
+  // Create a File with a content from a string.
+  Status CreateFile(const std::string& path, util::string_view content,
+                    bool recursive = true);
+
+  // Create a MockFileSystem out of (empty) FileInfo. The content of every
+  // file is empty and of size 0. All directories will be created recursively.
+  static Result<std::shared_ptr<FileSystem>> Make(TimePoint current_time,
+                                                  const std::vector<FileInfo>& infos);
+
+  class Impl;
+
+ protected:
+  std::unique_ptr<Impl> impl_;
+};
+
+class ARROW_EXPORT MockAsyncFileSystem : public MockFileSystem {
+ public:
+  explicit MockAsyncFileSystem(TimePoint current_time,
+                               const io::IOContext& io_context = io::default_io_context())
+      : MockFileSystem(current_time, io_context) {
+    default_async_is_sync_ = false;
+  }
+
+  FileInfoGenerator GetFileInfoGenerator(const FileSelector& select) override;
+};
+
+}  // namespace internal
+}  // namespace fs
+}  // namespace arrow

+ 271 - 0
contrib/libs/apache/arrow/cpp/src/arrow/filesystem/path_util.cc

@@ -0,0 +1,271 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include <algorithm>
+
+#include "arrow/filesystem/path_util.h"
+#include "arrow/result.h"
+#include "arrow/status.h"
+#include "arrow/util/logging.h"
+#include "arrow/util/string_view.h"
+
+namespace arrow {
+namespace fs {
+namespace internal {
+
+// XXX How does this encode Windows UNC paths?
+
+std::vector<std::string> SplitAbstractPath(const std::string& path) {
+  std::vector<std::string> parts;
+  auto v = util::string_view(path);
+  // Strip trailing slash
+  if (v.length() > 0 && v.back() == kSep) {
+    v = v.substr(0, v.length() - 1);
+  }
+  // Strip leading slash
+  if (v.length() > 0 && v.front() == kSep) {
+    v = v.substr(1);
+  }
+  if (v.length() == 0) {
+    return parts;
+  }
+
+  auto append_part = [&parts, &v](size_t start, size_t end) {
+    parts.push_back(std::string(v.substr(start, end - start)));
+  };
+
+  size_t start = 0;
+  while (true) {
+    size_t end = v.find_first_of(kSep, start);
+    append_part(start, end);
+    if (end == std::string::npos) {
+      break;
+    }
+    start = end + 1;
+  }
+  return parts;
+}
+
+std::pair<std::string, std::string> GetAbstractPathParent(const std::string& s) {
+  // XXX should strip trailing slash?
+
+  auto pos = s.find_last_of(kSep);
+  if (pos == std::string::npos) {
+    // Empty parent
+    return {{}, s};
+  }
+  return {s.substr(0, pos), s.substr(pos + 1)};
+}
+
+std::string GetAbstractPathExtension(const std::string& s) {
+  util::string_view basename(s);
+  auto offset = basename.find_last_of(kSep);
+  if (offset != std::string::npos) {
+    basename = basename.substr(offset);
+  }
+  auto dot = basename.find_last_of('.');
+  if (dot == util::string_view::npos) {
+    // Empty extension
+    return "";
+  }
+  return std::string(basename.substr(dot + 1));
+}
+
+Status ValidateAbstractPathParts(const std::vector<std::string>& parts) {
+  for (const auto& part : parts) {
+    if (part.length() == 0) {
+      return Status::Invalid("Empty path component");
+    }
+    if (part.find_first_of(kSep) != std::string::npos) {
+      return Status::Invalid("Separator in component '", part, "'");
+    }
+  }
+  return Status::OK();
+}
+
+std::string ConcatAbstractPath(const std::string& base, const std::string& stem) {
+  DCHECK(!stem.empty());
+  if (base.empty()) {
+    return stem;
+  }
+  return EnsureTrailingSlash(base) + std::string(RemoveLeadingSlash(stem));
+}
+
+std::string EnsureTrailingSlash(util::string_view v) {
+  if (v.length() > 0 && v.back() != kSep) {
+    // XXX How about "C:" on Windows?  We probably don't want to turn it into "C:/"...
+    // Unless the local filesystem always uses absolute paths
+    return std::string(v) + kSep;
+  } else {
+    return std::string(v);
+  }
+}
+
+std::string EnsureLeadingSlash(util::string_view v) {
+  if (v.length() == 0 || v.front() != kSep) {
+    // XXX How about "C:" on Windows?  We probably don't want to turn it into "/C:"...
+    return kSep + std::string(v);
+  } else {
+    return std::string(v);
+  }
+}
+util::string_view RemoveTrailingSlash(util::string_view key) {
+  while (!key.empty() && key.back() == kSep) {
+    key.remove_suffix(1);
+  }
+  return key;
+}
+
+util::string_view RemoveLeadingSlash(util::string_view key) {
+  while (!key.empty() && key.front() == kSep) {
+    key.remove_prefix(1);
+  }
+  return key;
+}
+
+Result<std::string> MakeAbstractPathRelative(const std::string& base,
+                                             const std::string& path) {
+  if (base.empty() || base.front() != kSep) {
+    return Status::Invalid("MakeAbstractPathRelative called with non-absolute base '",
+                           base, "'");
+  }
+  auto b = EnsureLeadingSlash(RemoveTrailingSlash(base));
+  auto p = util::string_view(path);
+  if (p.substr(0, b.size()) != util::string_view(b)) {
+    return Status::Invalid("Path '", path, "' is not relative to '", base, "'");
+  }
+  p = p.substr(b.size());
+  if (!p.empty() && p.front() != kSep && b.back() != kSep) {
+    return Status::Invalid("Path '", path, "' is not relative to '", base, "'");
+  }
+  return std::string(RemoveLeadingSlash(p));
+}
+
+bool IsAncestorOf(util::string_view ancestor, util::string_view descendant) {
+  ancestor = RemoveTrailingSlash(ancestor);
+  if (ancestor == "") {
+    // everything is a descendant of the root directory
+    return true;
+  }
+
+  descendant = RemoveTrailingSlash(descendant);
+  if (!descendant.starts_with(ancestor)) {
+    // an ancestor path is a prefix of descendant paths
+    return false;
+  }
+
+  descendant.remove_prefix(ancestor.size());
+
+  if (descendant.empty()) {
+    // "/hello" is an ancestor of "/hello"
+    return true;
+  }
+
+  // "/hello/w" is not an ancestor of "/hello/world"
+  return descendant.starts_with(std::string{kSep});
+}
+
+util::optional<util::string_view> RemoveAncestor(util::string_view ancestor,
+                                                 util::string_view descendant) {
+  if (!IsAncestorOf(ancestor, descendant)) {
+    return util::nullopt;
+  }
+
+  auto relative_to_ancestor = descendant.substr(ancestor.size());
+  return RemoveLeadingSlash(relative_to_ancestor);
+}
+
+std::vector<std::string> AncestorsFromBasePath(util::string_view base_path,
+                                               util::string_view descendant) {
+  std::vector<std::string> ancestry;
+  if (auto relative = RemoveAncestor(base_path, descendant)) {
+    auto relative_segments = fs::internal::SplitAbstractPath(std::string(*relative));
+
+    // the last segment indicates descendant
+    relative_segments.pop_back();
+
+    if (relative_segments.empty()) {
+      // no missing parent
+      return {};
+    }
+
+    for (auto&& relative_segment : relative_segments) {
+      ancestry.push_back(JoinAbstractPath(
+          std::vector<std::string>{std::string(base_path), std::move(relative_segment)}));
+      base_path = ancestry.back();
+    }
+  }
+  return ancestry;
+}
+
+std::vector<std::string> MinimalCreateDirSet(std::vector<std::string> dirs) {
+  std::sort(dirs.begin(), dirs.end());
+
+  for (auto ancestor = dirs.begin(); ancestor != dirs.end(); ++ancestor) {
+    auto descendant = ancestor;
+    auto descendants_end = descendant + 1;
+
+    while (descendants_end != dirs.end() && IsAncestorOf(*descendant, *descendants_end)) {
+      ++descendant;
+      ++descendants_end;
+    }
+
+    ancestor = dirs.erase(ancestor, descendants_end - 1);
+  }
+
+  // the root directory need not be created
+  if (dirs.size() == 1 && IsAncestorOf(dirs[0], "")) {
+    return {};
+  }
+
+  return dirs;
+}
+
+std::string ToBackslashes(util::string_view v) {
+  std::string s(v);
+  for (auto& c : s) {
+    if (c == '/') {
+      c = '\\';
+    }
+  }
+  return s;
+}
+
+std::string ToSlashes(util::string_view v) {
+  std::string s(v);
+#ifdef _WIN32
+  for (auto& c : s) {
+    if (c == '\\') {
+      c = '/';
+    }
+  }
+#endif
+  return s;
+}
+
+bool IsEmptyPath(util::string_view v) {
+  for (const auto c : v) {
+    if (c != '/') {
+      return false;
+    }
+  }
+  return true;
+}
+
+}  // namespace internal
+}  // namespace fs
+}  // namespace arrow

+ 130 - 0
contrib/libs/apache/arrow/cpp/src/arrow/filesystem/path_util.h

@@ -0,0 +1,130 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#pragma once
+
+#include <string>
+#include <utility>
+#include <vector>
+
+#include "arrow/type_fwd.h"
+#include "arrow/util/optional.h"
+#include "arrow/util/string_view.h"
+
+namespace arrow {
+namespace fs {
+namespace internal {
+
+constexpr char kSep = '/';
+
+// Computations on abstract paths (not local paths with system-dependent behaviour).
+// Abstract paths are typically used in URIs.
+
+// Split an abstract path into its individual components.
+ARROW_EXPORT
+std::vector<std::string> SplitAbstractPath(const std::string& s);
+
+// Return the extension of the file
+ARROW_EXPORT
+std::string GetAbstractPathExtension(const std::string& s);
+
+// Return the parent directory and basename of an abstract path.  Both values may be
+// empty.
+ARROW_EXPORT
+std::pair<std::string, std::string> GetAbstractPathParent(const std::string& s);
+
+// Validate the components of an abstract path.
+ARROW_EXPORT
+Status ValidateAbstractPathParts(const std::vector<std::string>& parts);
+
+// Append a non-empty stem to an abstract path.
+ARROW_EXPORT
+std::string ConcatAbstractPath(const std::string& base, const std::string& stem);
+
+// Make path relative to base, if it starts with base.  Otherwise error out.
+ARROW_EXPORT
+Result<std::string> MakeAbstractPathRelative(const std::string& base,
+                                             const std::string& path);
+
+ARROW_EXPORT
+std::string EnsureLeadingSlash(util::string_view s);
+
+ARROW_EXPORT
+util::string_view RemoveLeadingSlash(util::string_view s);
+
+ARROW_EXPORT
+std::string EnsureTrailingSlash(util::string_view s);
+
+ARROW_EXPORT
+util::string_view RemoveTrailingSlash(util::string_view s);
+
+ARROW_EXPORT
+bool IsAncestorOf(util::string_view ancestor, util::string_view descendant);
+
+ARROW_EXPORT
+util::optional<util::string_view> RemoveAncestor(util::string_view ancestor,
+                                                 util::string_view descendant);
+
+/// Return a vector of ancestors between a base path and a descendant.
+/// For example,
+///
+/// AncestorsFromBasePath("a/b", "a/b/c/d/e") -> ["a/b/c", "a/b/c/d"]
+ARROW_EXPORT
+std::vector<std::string> AncestorsFromBasePath(util::string_view base_path,
+                                               util::string_view descendant);
+
+/// Given a vector of paths of directories which must be created, produce a the minimal
+/// subset for passing to CreateDir(recursive=true) by removing redundant parent
+/// directories
+ARROW_EXPORT
+std::vector<std::string> MinimalCreateDirSet(std::vector<std::string> dirs);
+
+// Join the components of an abstract path.
+template <class StringIt>
+std::string JoinAbstractPath(StringIt it, StringIt end) {
+  std::string path;
+  for (; it != end; ++it) {
+    if (it->empty()) continue;
+
+    if (!path.empty()) {
+      path += kSep;
+    }
+    path += *it;
+  }
+  return path;
+}
+
+template <class StringRange>
+std::string JoinAbstractPath(const StringRange& range) {
+  return JoinAbstractPath(range.begin(), range.end());
+}
+
+/// Convert slashes to backslashes, on all platforms.  Mostly useful for testing.
+ARROW_EXPORT
+std::string ToBackslashes(util::string_view s);
+
+/// Ensure a local path is abstract, by converting backslashes to regular slashes
+/// on Windows.  Return the path unchanged on other systems.
+ARROW_EXPORT
+std::string ToSlashes(util::string_view s);
+
+ARROW_EXPORT
+bool IsEmptyPath(util::string_view s);
+
+}  // namespace internal
+}  // namespace fs
+}  // namespace arrow

+ 49 - 0
contrib/libs/apache/arrow/cpp/src/arrow/filesystem/type_fwd.h

@@ -0,0 +1,49 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#pragma once
+
+namespace arrow {
+namespace fs {
+
+/// \brief FileSystem entry type
+enum class FileType : int8_t {
+  /// Entry is not found
+  NotFound,
+  /// Entry exists but its type is unknown
+  ///
+  /// This can designate a special file such as a Unix socket or character
+  /// device, or Windows NUL / CON / ...
+  Unknown,
+  /// Entry is a regular file
+  File,
+  /// Entry is a directory
+  Directory
+};
+
+struct FileInfo;
+
+struct FileSelector;
+
+class FileSystem;
+class SubTreeFileSystem;
+class SlowFileSystem;
+class LocalFileSystem;
+class S3FileSystem;
+
+}  // namespace fs
+}  // namespace arrow

Some files were not shown because too many files changed in this diff