summaryrefslogblamecommitdiff
path: root/gitstatus/src/dir.cc
blob: 1817e1d3f26e25c0c14b7c06d84081aefb237dcd (plain) (tree)





















































































































                                                                                                 



                                                               
     






                                                                                     









































































































                                                                                            
// Copyright 2019 Roman Perepelitsa.
//
// This file is part of GitStatus.
//
// GitStatus is free software: you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by
// the Free Software Foundation, either version 3 of the License, or
// (at your option) any later version.
//
// GitStatus is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License for more details.
//
// You should have received a copy of the GNU General Public License
// along with GitStatus. If not, see <https://www.gnu.org/licenses/>.

#include "dir.h"

#include <algorithm>
#include <atomic>
#include <cerrno>
#include <cstring>

#include <dirent.h>
#include <fcntl.h>
#include <stdio.h>
#include <stdlib.h>
#include <sys/stat.h>
#include <unistd.h>

#ifdef __linux__
#include <endian.h>
#include <sys/syscall.h>
#endif

#ifdef __APPLE__
#include <iconv.h>
#endif

#include "bits.h"
#include "check.h"
#include "scope_guard.h"
#include "string_cmp.h"
#include "tribool.h"

namespace gitstatus {

namespace {

bool Dots(const char* name) {
  if (name[0] == '.') {
    if (name[1] == 0) return true;
    if (name[1] == '.' && name[2] == 0) return true;
  }
  return false;
}

}  // namespace

// The linux-specific implementation is about 20% faster than the generic (posix) implementation.
#ifdef __linux__

uint64_t Read64(const void* p) {
  uint64_t res;
  std::memcpy(&res, p, 8);
  return res;
}

void Write64(uint64_t x, void* p) { std::memcpy(p, &x, 8); }

void SwapBytes(char** begin, char** end) {
#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
  for (; begin != end; ++begin) Write64(__builtin_bswap64(Read64(*begin)), *begin);
#elif __BYTE_ORDER__ != __ORDER_BIG_ENDIAN__
#error "sorry, not implemented"
#endif
}

template <bool kCaseSensitive>
void SortEntries(char** begin, char** end) {
  static_assert(kCaseSensitive, "");
  SwapBytes(begin, end);
  std::sort(begin, end, [](const char* a, const char* b) {
    uint64_t x = Read64(a);
    uint64_t y = Read64(b);
    // Add 5 for good luck.
    return x < y || (x == y && std::memcmp(a + 5, b + 5, 256) < 0);
  });
  SwapBytes(begin, end);
}

template <>
void SortEntries<false>(char** begin, char** end) {
  std::sort(begin, end, StrLt<false>());
}

bool ListDir(int dir_fd, Arena& arena, std::vector<char*>& entries, bool precompose_unicode,
             bool case_sensitive) {
  struct linux_dirent64 {
    ino64_t d_ino;
    off64_t d_off;
    unsigned short d_reclen;
    unsigned char d_type;
    char d_name[];
  };

  constexpr size_t kBufSize = 8 << 10;
  entries.clear();

  while (true) {
    char* buf = static_cast<char*>(arena.Allocate(kBufSize, alignof(linux_dirent64)));
    // Save 256 bytes for the rainy day.
    int n = syscall(SYS_getdents64, dir_fd, buf, kBufSize - 256);
    if (n < 0) {
      entries.clear();
      return false;
    }
    for (int pos = 0; pos < n;) {
      auto* ent = reinterpret_cast<linux_dirent64*>(buf + pos);
      if (!Dots(ent->d_name)) entries.push_back(ent->d_name);
      pos += ent->d_reclen;
    }
    if (n == 0) break;
    // The following optimization relies on SYS_getdents64 always returning as many
    // entries as would fit. This is not guaranteed by the specification and I don't
    // know if this is true in practice. The optimization has no measurable effect on
    // gitstatus performance, so it's turned off.
    //
    //   if (n + sizeof(linux_dirent64) + 512 <= kBufSize) break;
  }

  if (case_sensitive) {
    SortEntries<true>(entries.data(), entries.data() + entries.size());
  } else {
    SortEntries<false>(entries.data(), entries.data() + entries.size());
  }

  return true;
}

#else  // __linux__

namespace {

char* DirentDup(Arena& arena, const struct dirent& ent, size_t len) {
  char* p = arena.Allocate<char>(len + 2);
  *p++ = ent.d_type;
  std::memcpy(p, ent.d_name, len + 1);
  return p;
}

#ifdef __APPLE__

std::atomic<bool> g_iconv_error(true);

Tribool IConvTry(char* inp, size_t ins, char* outp, size_t outs) {
  if (outs == 0) return Tribool::kUnknown;
  iconv_t ic = iconv_open("UTF-8", "UTF-8-MAC");
  if (ic == (iconv_t)-1) {
    if (g_iconv_error.load(std::memory_order_relaxed) &&
        g_iconv_error.exchange(false, std::memory_order_relaxed)) {
      LOG(ERROR) << "iconv_open(\"UTF-8\", \"UTF-8-MAC\") failed";
    }
    return Tribool::kFalse;
  }
  ON_SCOPE_EXIT(&) { CHECK(iconv_close(ic) == 0) << Errno(); };
  --outs;
  if (iconv(ic, &inp, &ins, &outp, &outs) >= 0) {
    *outp = 0;
    return Tribool::kTrue;
  }
  return errno == E2BIG ? Tribool::kUnknown : Tribool::kFalse;
}

char* DirenvConvert(Arena& arena, struct dirent& ent, bool do_convert) {
  if (!do_convert) return DirentDup(arena, ent, std::strlen(ent.d_name));

  size_t len = 0;
  do_convert = false;
  for (unsigned char c; (c = ent.d_name[len]); ++len) {
    if (c & 0x80) do_convert = true;
  }
  if (!do_convert) return DirentDup(arena, ent, len);

  size_t n = NextPow2(len + 2);
  while (true) {
    char* p = arena.Allocate<char>(n);
    switch (IConvTry(ent.d_name, len, p + 1, n - 1)) {
      case Tribool::kFalse:
        return DirentDup(arena, ent, len);
      case Tribool::kTrue:
        *p = ent.d_type;
        return p + 1;
      case Tribool::kUnknown:
        break;
    }
    n *= 2;
  }
}

#else  // __APPLE__

char* DirenvConvert(Arena& arena, struct dirent& ent, bool do_convert) {
  return DirentDup(arena, ent, std::strlen(ent.d_name));
}

#endif  // __APPLE__

}  // namespace

bool ListDir(int dir_fd, Arena& arena, std::vector<char*>& entries, bool precompose_unicode,
             bool case_sensitive) {
  VERIFY((dir_fd = dup(dir_fd)) >= 0);
  DIR* dir = fdopendir(dir_fd);
  if (!dir) {
    CHECK(!close(dir_fd)) << Errno();
    return -1;
  }
  ON_SCOPE_EXIT(&) { CHECK(!closedir(dir)) << Errno(); };
  entries.clear();
  while (struct dirent* ent = (errno = 0, readdir(dir))) {
    if (Dots(ent->d_name)) continue;
    entries.push_back(DirenvConvert(arena, *ent, precompose_unicode));
  }
  if (errno) {
    entries.clear();
    return false;
  }
  StrSort(entries.data(), entries.data() + entries.size(), case_sensitive);
  return true;
}

#endif  // __linux__

}  // namespace gitstatus