diff options
Diffstat (limited to 'gitstatus/src')
42 files changed, 5244 insertions, 0 deletions
diff --git a/gitstatus/src/algorithm.h b/gitstatus/src/algorithm.h new file mode 100644 index 00000000..b87b13f0 --- /dev/null +++ b/gitstatus/src/algorithm.h @@ -0,0 +1,37 @@ +// Copyright 2019 Roman Perepelitsa. +// +// This file is part of GitStatus. +// +// GitStatus is free software: you can redistribute it and/or modify +// it under the terms of the GNU General Public License as published by +// the Free Software Foundation, either version 3 of the License, or +// (at your option) any later version. +// +// GitStatus is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License for more details. +// +// You should have received a copy of the GNU General Public License +// along with GitStatus. If not, see <https://www.gnu.org/licenses/>. + +#ifndef ROMKATV_GITSTATUS_ALGORITHM_H_ +#define ROMKATV_GITSTATUS_ALGORITHM_H_ + +#include <algorithm> + +namespace gitstatus { + +// Requires: Iter is a BidirectionalIterator. +// +// Returns iterator pointing to the last value in [begin, end) that compares equal to the value, or +// begin if none compare equal. +template <class Iter, class T> +Iter FindLast(Iter begin, Iter end, const T& val) { + while (begin != end && !(*--end == val)) {} + return end; +} + +} // namespace gitstatus + +#endif // ROMKATV_GITSTATUS_ALGORITHM_H_ diff --git a/gitstatus/src/arena.cc b/gitstatus/src/arena.cc new file mode 100644 index 00000000..4c137639 --- /dev/null +++ b/gitstatus/src/arena.cc @@ -0,0 +1,118 @@ +// Copyright 2019 Roman Perepelitsa. +// +// This file is part of GitStatus. +// +// GitStatus is free software: you can redistribute it and/or modify +// it under the terms of the GNU General Public License as published by +// the Free Software Foundation, either version 3 of the License, or +// (at your option) any later version. +// +// GitStatus is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License for more details. +// +// You should have received a copy of the GNU General Public License +// along with GitStatus. If not, see <https://www.gnu.org/licenses/>. + +#include "arena.h" + +#include <algorithm> +#include <type_traits> + +#include "bits.h" +#include "check.h" + +namespace gitstatus { + +namespace { + +size_t Clamp(size_t min, size_t val, size_t max) { return std::min(max, std::max(min, val)); } + +static const uintptr_t kSingularity = reinterpret_cast<uintptr_t>(&kSingularity); + +} // namespace + +// Triple singularity. We are all fucked. +Arena::Block Arena::g_empty_block = {kSingularity, kSingularity, kSingularity}; + +Arena::Arena(Arena::Options opt) : opt_(std::move(opt)), top_(&g_empty_block) { + CHECK(opt_.min_block_size <= opt_.max_block_size); +} + +Arena::Arena(Arena&& other) : Arena() { *this = std::move(other); } + +Arena::~Arena() { + // See comments in Makefile for the reason sized deallocation is not used. + for (const Block& b : blocks_) ::operator delete(reinterpret_cast<void*>(b.start)); +} + +Arena& Arena::operator=(Arena&& other) { + if (this != &other) { + // In case std::vector ever gets small object optimization. + size_t idx = other.reusable_ ? other.top_ - other.blocks_.data() : 0; + opt_ = other.opt_; + blocks_ = std::move(other.blocks_); + reusable_ = other.reusable_; + top_ = reusable_ ? blocks_.data() + idx : &g_empty_block; + other.blocks_.clear(); + other.reusable_ = 0; + other.top_ = &g_empty_block; + } + return *this; +} + +void Arena::Reuse(size_t num_blocks) { + reusable_ = std::min(reusable_, num_blocks); + for (size_t i = reusable_; i != blocks_.size(); ++i) { + const Block& b = blocks_[i]; + // See comments in Makefile for the reason sized deallocation is not used. + ::operator delete(reinterpret_cast<void*>(b.start)); + } + blocks_.resize(reusable_); + if (reusable_) { + top_ = blocks_.data(); + top_->tip = top_->start; + } else { + top_ = &g_empty_block; + } +} + +void Arena::AddBlock(size_t size, size_t alignment) { + if (alignment > alignof(std::max_align_t)) { + size += alignment - 1; + } else { + size = std::max(size, alignment); + } + if (size <= top_->size() && top_ < blocks_.data() + reusable_ - 1) { + assert(blocks_.front().size() == top_->size()); + ++top_; + top_->tip = top_->start; + return; + } + if (size <= opt_.max_alloc_threshold) { + size = + std::max(size, Clamp(opt_.min_block_size, NextPow2(top_->size() + 1), opt_.max_block_size)); + } + + auto p = reinterpret_cast<uintptr_t>(::operator new(size)); + blocks_.push_back(Block{p, p, p + size}); + if (reusable_) { + if (size < blocks_.front().size()) { + top_ = &blocks_.back(); + return; + } + if (size > blocks_.front().size()) reusable_ = 0; + } + std::swap(blocks_.back(), blocks_[reusable_]); + top_ = &blocks_[reusable_++]; +} + +void* Arena::AllocateSlow(size_t size, size_t alignment) { + assert(alignment && !(alignment & (alignment - 1))); + AddBlock(size, alignment); + assert(Align(top_->tip, alignment) + size <= top_->end); + return Allocate(size, alignment); +} + +} // namespace gitstatus diff --git a/gitstatus/src/arena.h b/gitstatus/src/arena.h new file mode 100644 index 00000000..569833ca --- /dev/null +++ b/gitstatus/src/arena.h @@ -0,0 +1,273 @@ +// Copyright 2019 Roman Perepelitsa. +// +// This file is part of GitStatus. +// +// GitStatus is free software: you can redistribute it and/or modify +// it under the terms of the GNU General Public License as published by +// the Free Software Foundation, either version 3 of the License, or +// (at your option) any later version. +// +// GitStatus is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License for more details. +// +// You should have received a copy of the GNU General Public License +// along with GitStatus. If not, see <https://www.gnu.org/licenses/>. + +#ifndef ROMKATV_GITSTATUS_ARENA_H_ +#define ROMKATV_GITSTATUS_ARENA_H_ + +#include <cassert> +#include <cstddef> +#include <cstdint> +#include <cstring> +#include <limits> +#include <new> +#include <type_traits> +#include <vector> + +#include "string_view.h" + +namespace gitstatus { + +// Thread-compatible. Very fast and very flexible w.r.t. allocation size and alignment. +// +// Natural API extensions: +// +// // Donates a block to the arena. When the time comes, it'll be freed with +// // free(p, size, userdata). +// void Donate(void* p, size_t size, void* userdata, void(*free)(void*, size_t, void*)); +class Arena { + public: + struct Options { + // The first call to Allocate() will allocate a block of this size. There is one exception when + // the first requested allocation size is larger than this limit. Subsequent blocks will be + // twice as large as the last until they saturate at max_block_size. + size_t min_block_size = 64; + + // Allocate blocks at most this large. There is one exception when the requested allocation + // size is larger than this limit. + size_t max_block_size = 8 << 10; + + // When the size of the first allocation in a block is larger than this threshold, the block + // size will be equal to the allocation size. This is meant to reduce memory waste when making + // many allocations with sizes slightly over max_block_size / 2. With max_alloc_threshold equal + // to max_block_size / N, the upper bound on wasted memory when making many equally-sized + // allocations is 100.0 / (N + 1) percent. When making allocations of different sizes, the upper + // bound on wasted memory is 50%. + size_t max_alloc_threshold = 1 << 10; + + // Natural extensions: + // + // void* userdata; + // void (*alloc)(size_t size, size_t alignment, void* userdata); + // void (*free)(void* p, size_t size, void* userdata); + }; + + // Requires: opt.min_block_size <= opt.max_block_size. + // + // Doesn't allocate any memory. + Arena(Options opt); + Arena() : Arena(Options()) {} + Arena(Arena&&); + ~Arena(); + + Arena& operator=(Arena&& other); + + // Requires: alignment is a power of 2. + // + // Result is never null and always aligned. If size is zero, the result may be equal to the last. + // Alignment above alignof(std::max_align_t) is supported. There is no requirement for alignment + // to be less than size or to divide it. + inline void* Allocate(size_t size, size_t alignment) { + assert(alignment && !(alignment & (alignment - 1))); + uintptr_t p = Align(top_->tip, alignment); + uintptr_t e = p + size; + if (e <= top_->end) { + top_->tip = e; + return reinterpret_cast<void*>(p); + } + return AllocateSlow(size, alignment); + } + + template <class T> + inline T* Allocate(size_t n) { + static_assert(!std::is_reference<T>(), ""); + return static_cast<T*>(Allocate(n * sizeof(T), alignof(T))); + } + + template <class T> + inline T* Allocate() { + return Allocate<T>(1); + } + + inline char* MemDup(const char* p, size_t len) { + char* res = Allocate<char>(len); + std::memcpy(res, p, len); + return res; + } + + // Copies the null-terminated string (including the trailing null character) to the arena and + // returns a pointer to the copy. + inline char* StrDup(const char* s) { + size_t len = std::strlen(s); + return MemDup(s, len + 1); + } + + // Guarantees: !StrDup(p, len)[len]. + inline char* StrDup(const char* p, size_t len) { + char* res = Allocate<char>(len + 1); + std::memcpy(res, p, len); + res[len] = 0; + return res; + } + + // Guarantees: !StrDup(s)[s.len]. + inline char* StrDup(StringView s) { + return StrDup(s.ptr, s.len); + } + + template <class... Ts> + inline char* StrCat(const Ts&... ts) { + return [&](std::initializer_list<StringView> ss) { + size_t len = 0; + for (StringView s : ss) len += s.len; + char* p = Allocate<char>(len + 1); + for (StringView s : ss) { + std::memcpy(p, s.ptr, s.len); + p += s.len; + } + *p = 0; + return p - len; + }({ts...}); + } + + // Copies/moves `val` to the arena and returns a pointer to it. + template <class T> + inline std::remove_const_t<std::remove_reference_t<T>>* Dup(T&& val) { + return DirectInit<std::remove_const_t<std::remove_reference_t<T>>>(std::forward<T>(val)); + } + + // The same as `new T{args...}` but on the arena. + template <class T, class... Args> + inline T* DirectInit(Args&&... args) { + T* res = Allocate<T>(); + ::new (const_cast<void*>(static_cast<const void*>(res))) T(std::forward<Args>(args)...); + return res; + } + + // The same as `new T(args...)` but on the arena. + template <class T, class... Args> + inline T* BraceInit(Args&&... args) { + T* res = Allocate<T>(); + ::new (const_cast<void*>(static_cast<const void*>(res))) T{std::forward<Args>(args)...}; + return res; + } + + // Tip() and TipSize() allow you to allocate the remainder of the current block. They can be + // useful if you are flexible w.r.t. the allocation size. + // + // Invariant: + // + // const void* tip = Tip(); + // void* p = Allocate(TipSize(), 1); // grab the remainder of the current block + // assert(p == tip); + const void* Tip() const { return reinterpret_cast<const void*>(top_->tip); } + size_t TipSize() const { return top_->end - top_->tip; } + + // Invalidates all allocations (without running destructors of allocated objects) and frees all + // blocks except at most the specified number of blocks. The retained blocks will be used to + // fulfil future allocation requests. + void Reuse(size_t num_blocks = std::numeric_limits<size_t>::max()); + + private: + struct Block { + size_t size() const { return end - start; } + uintptr_t start; + uintptr_t tip; + uintptr_t end; + }; + + inline static size_t Align(size_t n, size_t m) { return (n + m - 1) & ~(m - 1); }; + + void AddBlock(size_t size, size_t alignment); + bool ReuseBlock(size_t size, size_t alignment); + + __attribute__((noinline)) void* AllocateSlow(size_t size, size_t alignment); + + Options opt_; + std::vector<Block> blocks_; + // Invariant: !blocks_.empty() <= reusable_ && reusable_ <= blocks_.size(). + size_t reusable_ = 0; + // Invariant: (top_ == &g_empty_block) == blocks_.empty(). + // Invariant: blocks_.empty() || top_ == &blocks_.back() || top_ < blocks_.data() + reusable_. + Block* top_; + + static Block g_empty_block; +}; + +// Copies of ArenaAllocator use the same thread-compatible Arena without synchronization. +template <class T> +class ArenaAllocator { + public: + using value_type = T; + using pointer = T*; + using const_pointer = const T*; + using reference = T&; + using const_reference = const T&; + using size_type = size_t; + using difference_type = ptrdiff_t; + using propagate_on_container_move_assignment = std::true_type; + template <class U> + struct rebind { + using other = ArenaAllocator<U>; + }; + using is_always_equal = std::false_type; + + ArenaAllocator(Arena* arena = nullptr) : arena_(*arena) {} + + Arena& arena() const { return arena_; } + + pointer address(reference x) const { return &x; } + const_pointer address(const_reference x) const { return &x; } + pointer allocate(size_type n, const void* hint = nullptr) { return arena_.Allocate<T>(n); } + void deallocate(T* p, std::size_t n) {} + size_type max_size() const { return std::numeric_limits<size_type>::max() / sizeof(value_type); } + + template <class U, class... Args> + void construct(U* p, Args&&... args) { + ::new (const_cast<void*>(static_cast<const void*>(p))) U(std::forward<Args>(args)...); + } + + template <class U> + void destroy(U* p) { + p->~U(); + } + + bool operator==(const ArenaAllocator& other) const { return &arena_ == &other.arena_; } + bool operator!=(const ArenaAllocator& other) const { return &arena_ != &other.arena_; } + + private: + Arena& arena_; +}; + +template <class C> +struct LazyWithArena; + +template <template <class, class> class C, class T1, class A> +struct LazyWithArena<C<T1, A>> { + using type = C<T1, ArenaAllocator<typename C<T1, A>::value_type>>; +}; + +template <template <class, class, class> class C, class T1, class T2, class A> +struct LazyWithArena<C<T1, T2, A>> { + using type = C<T1, T2, ArenaAllocator<typename C<T1, T2, A>::value_type>>; +}; + +template <class C> +using WithArena = typename LazyWithArena<C>::type; + +} // namespace gitstatus + +#endif // ROMKATV_GITSTATUS_DIR_H_ diff --git a/gitstatus/src/bits.h b/gitstatus/src/bits.h new file mode 100644 index 00000000..c1a7dcb6 --- /dev/null +++ b/gitstatus/src/bits.h @@ -0,0 +1,29 @@ +// Copyright 2019 Roman Perepelitsa. +// +// This file is part of GitStatus. +// +// GitStatus is free software: you can redistribute it and/or modify +// it under the terms of the GNU General Public License as published by +// the Free Software Foundation, either version 3 of the License, or +// (at your option) any later version. +// +// GitStatus is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License for more details. +// +// You should have received a copy of the GNU General Public License +// along with GitStatus. If not, see <https://www.gnu.org/licenses/>. + +#ifndef ROMKATV_GITSTATUS_BITS_H_ +#define ROMKATV_GITSTATUS_BITS_H_ + +#include <cstddef> + +namespace gitstatus { + +inline size_t NextPow2(size_t n) { return n < 2 ? 1 : (~size_t{0} >> __builtin_clzll(n - 1)) + 1; } + +} // namespace gitstatus + +#endif // ROMKATV_GITSTATUS_BITS_H_ diff --git a/gitstatus/src/check.h b/gitstatus/src/check.h new file mode 100644 index 00000000..82dceae1 --- /dev/null +++ b/gitstatus/src/check.h @@ -0,0 +1,61 @@ +// Copyright 2019 Roman Perepelitsa. +// +// This file is part of GitStatus. +// +// GitStatus is free software: you can redistribute it and/or modify +// it under the terms of the GNU General Public License as published by +// the Free Software Foundation, either version 3 of the License, or +// (at your option) any later version. +// +// GitStatus is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License for more details. +// +// You should have received a copy of the GNU General Public License +// along with GitStatus. If not, see <https://www.gnu.org/licenses/>. + +#ifndef ROMKATV_GITSTATUS_CHECK_H_ +#define ROMKATV_GITSTATUS_CHECK_H_ + +#include "logging.h" + +#include <stdexcept> + +// The argument must be an expression convertible to bool. +// Does nothing if the expression evalutes to true. Otherwise +// it's equivalent to LOG(FATAL). +#define CHECK(cond...) \ + static_cast<void>(0), (!!(cond)) ? static_cast<void>(0) : LOG(FATAL) << #cond << ": " + +#define VERIFY(cond...) \ + static_cast<void>(0), ::gitstatus::internal_check::Thrower(!(cond)) \ + ? static_cast<void>(0) \ + : LOG(ERROR) << #cond << ": " + +namespace gitstatus { + +struct Exception : std::exception { + const char* what() const noexcept override { return "Exception"; } +}; + +namespace internal_check { + +class Thrower { + public: + Thrower(bool should_throw) : throw_(should_throw) {} + Thrower(Thrower&&) = delete; + explicit operator bool() const { return !throw_; } + ~Thrower() noexcept(false) { + if (throw_) throw Exception(); + } + + private: + bool throw_; +}; + +} // namespace internal_check + +} // namespace gitstatus + +#endif // ROMKATV_GITSTATUS_CHECK_H_ diff --git a/gitstatus/src/check_dir_mtime.cc b/gitstatus/src/check_dir_mtime.cc new file mode 100644 index 00000000..bb60ffe5 --- /dev/null +++ b/gitstatus/src/check_dir_mtime.cc @@ -0,0 +1,157 @@ +// Copyright 2019 Roman Perepelitsa. +// +// This file is part of GitStatus. +// +// GitStatus is free software: you can redistribute it and/or modify +// it under the terms of the GNU General Public License as published by +// the Free Software Foundation, either version 3 of the License, or +// (at your option) any later version. +// +// GitStatus is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License for more details. +// +// You should have received a copy of the GNU General Public License +// along with GitStatus. If not, see <https://www.gnu.org/licenses/>. + +#include "check_dir_mtime.h" + +#include <fcntl.h> +#include <stdlib.h> +#include <sys/stat.h> +#include <sys/types.h> +#include <time.h> +#include <unistd.h> + +#include <cerrno> +#include <cstring> +#include <ctime> +#include <string> +#include <vector> + +#include "check.h" +#include "dir.h" +#include "logging.h" +#include "print.h" +#include "scope_guard.h" +#include "stat.h" + +namespace gitstatus { + +namespace { + +constexpr char kDirPrefix[] = ".gitstatus."; + +void Touch(const char* path) { + int fd = creat(path, 0444); + VERIFY(fd >= 0) << Errno(); + CHECK(!close(fd)) << Errno(); +} + +bool StatChanged(const char* path, const struct stat& prev) { + struct stat cur; + VERIFY(!lstat(path, &cur)) << Errno(); + return !StatEq(prev, cur); +} + +void RemoveStaleDirs(const char* root_dir) { + int dir_fd = open(root_dir, O_DIRECTORY | O_CLOEXEC); + if (dir_fd < 0) return; + ON_SCOPE_EXIT(&) { CHECK(!close(dir_fd)) << Errno(); }; + + Arena arena; + std::vector<char*> entries; + const std::time_t now = std::time(nullptr); + if (!ListDir(dir_fd, arena, entries, + /* precompose_unicode = */ false, + /* case_sensitive = */ true)) { + return; + } + + std::string path = root_dir; + const size_t root_dir_len = path.size(); + + for (const char* entry : entries) { + if (std::strlen(entry) < std::strlen(kDirPrefix)) continue; + if (std::memcmp(entry, kDirPrefix, std::strlen(kDirPrefix))) continue; + + struct stat st; + if (fstatat(dir_fd, entry, &st, AT_SYMLINK_NOFOLLOW)) { + LOG(WARN) << "Cannot stat " << Print(entry) << " in " << Print(root_dir) << ": " << Errno(); + continue; + } + if (MTim(st).tv_sec + 10 > now) continue; + + path.resize(root_dir_len); + path += entry; + size_t dir_len = path.size(); + + path += "/b/1"; + if (unlink(path.c_str()) && errno != ENOENT) { + LOG(WARN) << "Cannot unlink " << Print(path) << ": " << Errno(); + continue; + } + + for (const char* d : {"/a/1", "/a", "/b", ""}) { + path.resize(dir_len); + path += d; + if (rmdir(path.c_str()) && errno != ENOENT) { + LOG(WARN) << "Cannot remove " << Print(path) << ": " << Errno(); + break; + } + } + } +} + +} // namespace + +bool CheckDirMtime(const char* root_dir) { + try { + RemoveStaleDirs(root_dir); + + std::string tmp = std::string() + root_dir + kDirPrefix + "XXXXXX"; + VERIFY(mkdtemp(&tmp[0])) << Errno(); + ON_SCOPE_EXIT(&) { rmdir(tmp.c_str()); }; + + std::string a_dir = tmp + "/a"; + VERIFY(!mkdir(a_dir.c_str(), 0755)) << Errno(); + ON_SCOPE_EXIT(&) { rmdir(a_dir.c_str()); }; + struct stat a_st; + VERIFY(!lstat(a_dir.c_str(), &a_st)) << Errno(); + + std::string b_dir = tmp + "/b"; + VERIFY(!mkdir(b_dir.c_str(), 0755)) << Errno(); + ON_SCOPE_EXIT(&) { rmdir(b_dir.c_str()); }; + struct stat b_st; + VERIFY(!lstat(b_dir.c_str(), &b_st)) << Errno(); + + while (sleep(1)) { + // zzzz + } + + std::string a1 = a_dir + "/1"; + VERIFY(!mkdir(a1.c_str(), 0755)) << Errno(); + ON_SCOPE_EXIT(&) { rmdir(a1.c_str()); }; + if (!StatChanged(a_dir.c_str(), a_st)) { + LOG(WARN) << "Creating a directory doesn't change mtime of the parent: " << Print(root_dir); + return false; + } + + std::string b1 = b_dir + "/1"; + Touch(b1.c_str()); + ON_SCOPE_EXIT(&) { unlink(b1.c_str()); }; + if (!StatChanged(b_dir.c_str(), b_st)) { + LOG(WARN) << "Creating a file doesn't change mtime of the parent: " << Print(root_dir); + return false; + } + + LOG(INFO) << "All mtime checks have passes. Enabling untracked cache: " << Print(root_dir); + return true; + } catch (const Exception&) { + LOG(WARN) << "Error while testing for mtime capability: " << Print(root_dir); + return false; + } +} + +} // namespace gitstatus diff --git a/gitstatus/src/check_dir_mtime.h b/gitstatus/src/check_dir_mtime.h new file mode 100644 index 00000000..c9204e95 --- /dev/null +++ b/gitstatus/src/check_dir_mtime.h @@ -0,0 +1,31 @@ +// Copyright 2019 Roman Perepelitsa. +// +// This file is part of GitStatus. +// +// GitStatus is free software: you can redistribute it and/or modify +// it under the terms of the GNU General Public License as published by +// the Free Software Foundation, either version 3 of the License, or +// (at your option) any later version. +// +// GitStatus is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License for more details. +// +// You should have received a copy of the GNU General Public License +// along with GitStatus. If not, see <https://www.gnu.org/licenses/>. + +#ifndef ROMKATV_GITSTATUS_CHECK_DIR_MTIME_H_ +#define ROMKATV_GITSTATUS_CHECK_DIR_MTIME_H_ + +namespace gitstatus { + +// Similar to `git update-index --test-untracked-cache` but performs all tests +// in parallel, so the total testing time is one second regardless of the number +// of tests. It also performs fewer tests because gitstatus imposes fewer +// requirements on the filesystem in order to take advantage of untracked cache. +bool CheckDirMtime(const char* root_dir); + +} // namespace gitstatus + +#endif // ROMKATV_GITSTATUS_CHECK_DIR_MTIME_H_ diff --git a/gitstatus/src/dir.cc b/gitstatus/src/dir.cc new file mode 100644 index 00000000..39cf1c2c --- /dev/null +++ b/gitstatus/src/dir.cc @@ -0,0 +1,237 @@ +// Copyright 2019 Roman Perepelitsa. +// +// This file is part of GitStatus. +// +// GitStatus is free software: you can redistribute it and/or modify +// it under the terms of the GNU General Public License as published by +// the Free Software Foundation, either version 3 of the License, or +// (at your option) any later version. +// +// GitStatus is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License for more details. +// +// You should have received a copy of the GNU General Public License +// along with GitStatus. If not, see <https://www.gnu.org/licenses/>. + +#include "dir.h" + +#include <algorithm> +#include <atomic> +#include <cerrno> +#include <cstring> + +#include <dirent.h> +#include <fcntl.h> +#include <stdio.h> +#include <stdlib.h> +#include <sys/stat.h> +#include <unistd.h> + +#ifdef __linux__ +#include <endian.h> +#include <sys/syscall.h> +#endif + +#ifdef __APPLE__ +#include <iconv.h> +#endif + +#include "bits.h" +#include "check.h" +#include "scope_guard.h" +#include "string_cmp.h" +#include "tribool.h" + +namespace gitstatus { + +namespace { + +bool Dots(const char* name) { + if (name[0] == '.') { + if (name[1] == 0) return true; + if (name[1] == '.' && name[2] == 0) return true; + } + return false; +} + +} // namespace + +// The linux-specific implementation is about 20% faster than the generic (posix) implementation. +#ifdef __linux__ + +uint64_t Read64(const void* p) { + uint64_t res; + std::memcpy(&res, p, 8); + return res; +} + +void Write64(uint64_t x, void* p) { std::memcpy(p, &x, 8); } + +void SwapBytes(char** begin, char** end) { +#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ + for (; begin != end; ++begin) Write64(__builtin_bswap64(Read64(*begin)), *begin); +#elif __BYTE_ORDER__ != __ORDER_BIG_ENDIAN__ +#error "sorry, not implemented" +#endif +} + +template <bool kCaseSensitive> +void SortEntries(char** begin, char** end) { + static_assert(kCaseSensitive, ""); + SwapBytes(begin, end); + std::sort(begin, end, [](const char* a, const char* b) { + uint64_t x = Read64(a); + uint64_t y = Read64(b); + // Add 5 for good luck. + return x < y || (x == y && std::memcmp(a + 5, b + 5, 256) < 0); + }); + SwapBytes(begin, end); +} + +template <> +void SortEntries<false>(char** begin, char** end) { + std::sort(begin, end, StrLt<false>()); +} + +bool ListDir(int dir_fd, Arena& arena, std::vector<char*>& entries, bool precompose_unicode, + bool case_sensitive) { + struct linux_dirent64 { + ino64_t d_ino; + off64_t d_off; + unsigned short d_reclen; + unsigned char d_type; + char d_name[]; + }; + + constexpr size_t kBufSize = 8 << 10; + const size_t orig_size = entries.size(); + + while (true) { + char* buf = static_cast<char*>(arena.Allocate(kBufSize, alignof(linux_dirent64))); + // Save 256 bytes for the rainy day. + int n = syscall(SYS_getdents64, dir_fd, buf, kBufSize - 256); + if (n < 0) { + entries.resize(orig_size); + return false; + } + for (int pos = 0; pos < n;) { + auto* ent = reinterpret_cast<linux_dirent64*>(buf + pos); + if (!Dots(ent->d_name)) entries.push_back(ent->d_name); + pos += ent->d_reclen; + } + if (n == 0) break; + // The following optimization relies on SYS_getdents64 always returning as many + // entries as would fit. This is not guaranteed by the specification and I don't + // know if this is true in practice. The optimization has no measurable effect on + // gitstatus performance, so it's turned off. + // + // if (n + sizeof(linux_dirent64) + 512 <= kBufSize) break; + } + + if (case_sensitive) { + SortEntries<true>(entries.data() + orig_size, entries.data() + entries.size()); + } else { + SortEntries<false>(entries.data() + orig_size, entries.data() + entries.size()); + } + + return true; +} + +#else // __linux__ + +namespace { + +char* DirentDup(Arena& arena, const struct dirent& ent, size_t len) { + char* p = arena.Allocate<char>(len + 2); + *p++ = ent.d_type; + std::memcpy(p, ent.d_name, len + 1); + return p; +} + +#ifdef __APPLE__ + +std::atomic<bool> g_iconv_error(true); + +Tribool IConvTry(char* inp, size_t ins, char* outp, size_t outs) { + if (outs == 0) return Tribool::kUnknown; + iconv_t ic = iconv_open("UTF-8", "UTF-8-MAC"); + if (ic == (iconv_t)-1) { + if (g_iconv_error.load(std::memory_order_relaxed) && + g_iconv_error.exchange(false, std::memory_order_relaxed)) { + LOG(ERROR) << "iconv_open(\"UTF-8\", \"UTF-8-MAC\") failed"; + } + return Tribool::kFalse; + } + ON_SCOPE_EXIT(&) { CHECK(iconv_close(ic) == 0) << Errno(); }; + --outs; + if (iconv(ic, &inp, &ins, &outp, &outs) >= 0) { + *outp = 0; + return Tribool::kTrue; + } + return errno == E2BIG ? Tribool::kUnknown : Tribool::kFalse; +} + +char* DirenvConvert(Arena& arena, struct dirent& ent, bool do_convert) { + if (!do_convert) return DirentDup(arena, ent, std::strlen(ent.d_name)); + + size_t len = 0; + do_convert = false; + for (unsigned char c; (c = ent.d_name[len]); ++len) { + if (c & 0x80) do_convert = true; + } + if (!do_convert) return DirentDup(arena, ent, len); + + size_t n = NextPow2(len + 2); + while (true) { + char* p = arena.Allocate<char>(n); + switch (IConvTry(ent.d_name, len, p + 1, n - 1)) { + case Tribool::kFalse: + return DirentDup(arena, ent, len); + case Tribool::kTrue: + *p = ent.d_type; + return p + 1; + case Tribool::kUnknown: + break; + } + n *= 2; + } +} + +#else // __APPLE__ + +char* DirenvConvert(Arena& arena, struct dirent& ent, bool do_convert) { + return DirentDup(arena, ent, std::strlen(ent.d_name)); +} + +#endif // __APPLE__ + +} // namespace + +bool ListDir(int dir_fd, Arena& arena, std::vector<char*>& entries, bool precompose_unicode, + bool case_sensitive) { + const size_t orig_size = entries.size(); + dir_fd = dup(dir_fd); + if (dir_fd < 0) return false; + DIR* dir = fdopendir(dir_fd); + if (!dir) { + CHECK(!close(dir_fd)) << Errno(); + return false; + } + ON_SCOPE_EXIT(&) { CHECK(!closedir(dir)) << Errno(); }; + while (struct dirent* ent = (errno = 0, readdir(dir))) { + if (Dots(ent->d_name)) continue; + entries.push_back(DirenvConvert(arena, *ent, precompose_unicode)); + } + if (errno) { + entries.resize(orig_size); + return false; + } + StrSort(entries.data() + orig_size, entries.data() + entries.size(), case_sensitive); + return true; +} + +#endif // __linux__ + +} // namespace gitstatus diff --git a/gitstatus/src/dir.h b/gitstatus/src/dir.h new file mode 100644 index 00000000..4d4cf3da --- /dev/null +++ b/gitstatus/src/dir.h @@ -0,0 +1,50 @@ +// Copyright 2019 Roman Perepelitsa. +// +// This file is part of GitStatus. +// +// GitStatus is free software: you can redistribute it and/or modify +// it under the terms of the GNU General Public License as published by +// the Free Software Foundation, either version 3 of the License, or +// (at your option) any later version. +// +// GitStatus is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License for more details. +// +// You should have received a copy of the GNU General Public License +// along with GitStatus. If not, see <https://www.gnu.org/licenses/>. + +#ifndef ROMKATV_GITSTATUS_DIR_H_ +#define ROMKATV_GITSTATUS_DIR_H_ + +#include <cstddef> +#include <vector> + +#include "arena.h" + +namespace gitstatus { + +// On error, leaves entries unchaged and returns false. Does not throw. +// +// On success, appends names of files from the specified directory to entries and returns true. +// Every appended entry is a null-terminated string. At -1 offset is its d_type. All elements +// point into the arena. They are sorted either by strcmp or strcasecmp depending on case_sensitive. +// +// Does not close dir_fd. +// +// There are two distinct implementations of ListDir -- one for Linux and another for everything +// else. The linux-specific implementation is 20% faster. +// +// The reason sorting is bundled with directory listing is performance on Linux. The API of +// getdents64 allows for much faster sorting than what can be done with a plain vector<char*>. +// For the POSIX implementation there is no need to bundle sorting in this way. In fact, it's +// done at the end with a generic StrSort() call. +// +// For best results, reuse the arena and vector for multiple calls to avoid heap allocations. +bool ListDir(int dir_fd, Arena& arena, std::vector<char*>& entries, bool precompose_unicode, + bool case_sensitive); + +} // namespace gitstatus + +#endif // ROMKATV_GITSTATUS_DIR_H_ diff --git a/gitstatus/src/git.cc b/gitstatus/src/git.cc new file mode 100644 index 00000000..552100cb --- /dev/null +++ b/gitstatus/src/git.cc @@ -0,0 +1,250 @@ +// Copyright 2019 Roman Perepelitsa. +// +// This file is part of GitStatus. +// +// GitStatus is free software: you can redistribute it and/or modify +// it under the terms of the GNU General Public License as published by +// the Free Software Foundation, either version 3 of the License, or +// (at your option) any later version. +// +// GitStatus is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License for more details. +// +// You should have received a copy of the GNU General Public License +// along with GitStatus. If not, see <https://www.gnu.org/licenses/>. + +#include "git.h" + +#include <cstdlib> +#include <cstring> +#include <fstream> +#include <sstream> +#include <utility> + +#include <fcntl.h> +#include <sys/stat.h> +#include <sys/types.h> +#include <unistd.h> + +#include "arena.h" +#include "check.h" +#include "print.h" +#include "scope_guard.h" + +namespace gitstatus { + +const char* GitError() { + const git_error* err = git_error_last(); + return err && err->message ? err->message : "unknown error"; +} + +std::string RepoState(git_repository* repo) { + Arena arena; + StringView gitdir(git_repository_path(repo)); + + // These names mostly match gitaction in vcs_info: + // https://github.com/zsh-users/zsh/blob/master/Functions/VCS_Info/Backends/VCS_INFO_get_data_git. + auto State = [&]() { + switch (git_repository_state(repo)) { + case GIT_REPOSITORY_STATE_NONE: + return ""; + case GIT_REPOSITORY_STATE_MERGE: + return "merge"; + case GIT_REPOSITORY_STATE_REVERT: + return "revert"; + case GIT_REPOSITORY_STATE_REVERT_SEQUENCE: + return "revert-seq"; + case GIT_REPOSITORY_STATE_CHERRYPICK: + return "cherry"; + case GIT_REPOSITORY_STATE_CHERRYPICK_SEQUENCE: + return "cherry-seq"; + case GIT_REPOSITORY_STATE_BISECT: + return "bisect"; + case GIT_REPOSITORY_STATE_REBASE: + return "rebase"; + case GIT_REPOSITORY_STATE_REBASE_INTERACTIVE: + return "rebase-i"; + case GIT_REPOSITORY_STATE_REBASE_MERGE: + return "rebase-m"; + case GIT_REPOSITORY_STATE_APPLY_MAILBOX: + return "am"; + case GIT_REPOSITORY_STATE_APPLY_MAILBOX_OR_REBASE: + return "am/rebase"; + } + return "action"; + }; + + auto DirExists = [&](StringView name) { + int fd = open(arena.StrCat(gitdir, "/", name), O_DIRECTORY | O_CLOEXEC); + if (fd < 0) return false; + CHECK(!close(fd)) << Errno(); + return true; + }; + + auto ReadFile = [&](StringView name) { + std::ifstream strm(arena.StrCat(gitdir, "/", name)); + std::string res; + strm >> res; + return res; + }; + + std::string next; + std::string last; + + if (DirExists("rebase-merge")) { + next = ReadFile("rebase-merge/msgnum"); + last = ReadFile("rebase-merge/end"); + } else if (DirExists("rebase-apply")) { + next = ReadFile("rebase-apply/next"); + last = ReadFile("rebase-apply/last"); + } + + std::ostringstream res; + res << State(); + if (!next.empty() && !last.empty()) res << ' ' << next << '/' << last; + return res.str(); +} + +size_t CountRange(git_repository* repo, const std::string& range) { + git_revwalk* walk = nullptr; + VERIFY(!git_revwalk_new(&walk, repo)) << GitError(); + ON_SCOPE_EXIT(=) { git_revwalk_free(walk); }; + VERIFY(!git_revwalk_push_range(walk, range.c_str())) << GitError(); + size_t res = 0; + while (true) { + git_oid oid; + switch (git_revwalk_next(&oid, walk)) { + case 0: + ++res; + break; + case GIT_ITEROVER: + return res; + default: + LOG(ERROR) << "git_revwalk_next: " << range << ": " << GitError(); + throw Exception(); + } + } +} + +size_t NumStashes(git_repository* repo) { + size_t res = 0; + auto* cb = +[](size_t index, const char* message, const git_oid* stash_id, void* payload) { + ++*static_cast<size_t*>(payload); + return 0; + }; + if (!git_stash_foreach(repo, cb, &res)) return res; + // Example error: failed to parse signature - malformed e-mail. + // See https://github.com/romkatv/powerlevel10k/issues/216. + LOG(WARN) << "git_stash_foreach: " << GitError(); + return 0; +} + +git_reference* Head(git_repository* repo) { + git_reference* symbolic = nullptr; + switch (git_reference_lookup(&symbolic, repo, "HEAD")) { + case 0: + break; + case GIT_ENOTFOUND: + return nullptr; + default: + LOG(ERROR) << "git_reference_lookup: " << GitError(); + throw Exception(); + } + + git_reference* direct = nullptr; + if (git_reference_resolve(&direct, symbolic)) { + LOG(INFO) << "Empty git repo (no HEAD)"; + return symbolic; + } + git_reference_free(symbolic); + return direct; +} + +const char* LocalBranchName(const git_reference* ref) { + CHECK(ref); + git_reference_t type = git_reference_type(ref); + switch (type) { + case GIT_REFERENCE_DIRECT: { + return git_reference_is_branch(ref) ? git_reference_shorthand(ref) : ""; + } + case GIT_REFERENCE_SYMBOLIC: { + static constexpr char kHeadPrefix[] = "refs/heads/"; + const char* target = git_reference_symbolic_target(ref); + if (!target) return ""; + size_t len = std::strlen(target); + if (len < sizeof(kHeadPrefix)) return ""; + if (std::memcmp(target, kHeadPrefix, sizeof(kHeadPrefix) - 1)) return ""; + return target + (sizeof(kHeadPrefix) - 1); + } + case GIT_REFERENCE_INVALID: + case GIT_REFERENCE_ALL: + break; + } + LOG(ERROR) << "Invalid reference type: " << type; + throw Exception(); +} + +RemotePtr GetRemote(git_repository* repo, const git_reference* local) { + git_remote* remote; + git_buf symref = {}; + if (git_branch_remote(&remote, &symref, repo, git_reference_name(local))) return nullptr; + ON_SCOPE_EXIT(&) { + git_remote_free(remote); + git_buf_free(&symref); + }; + + git_reference* ref; + if (git_reference_lookup(&ref, repo, symref.ptr)) return nullptr; + ON_SCOPE_EXIT(&) { if (ref) git_reference_free(ref); }; + + const char* branch = nullptr; + std::string name = remote ? git_remote_name(remote) : "."; + if (git_branch_name(&branch, ref)) { + branch = ""; + } else if (remote) { + VERIFY(std::strstr(branch, name.c_str()) == branch); + VERIFY(branch[name.size()] == '/'); + branch += name.size() + 1; + } + + auto res = std::make_unique<Remote>(); + res->name = std::move(name); + res->branch = branch; + res->url = remote ? (git_remote_url(remote) ?: "") : ""; + res->ref = std::exchange(ref, nullptr); + return RemotePtr(res.release()); +} + +PushRemotePtr GetPushRemote(git_repository* repo, const git_reference* local) { + git_remote* remote; + git_buf symref = {}; + if (git_branch_push_remote(&remote, &symref, repo, git_reference_name(local))) return nullptr; + ON_SCOPE_EXIT(&) { + git_remote_free(remote); + git_buf_free(&symref); + }; + + git_reference* ref; + if (git_reference_lookup(&ref, repo, symref.ptr)) return nullptr; + ON_SCOPE_EXIT(&) { if (ref) git_reference_free(ref); }; + + std::string name = remote ? git_remote_name(remote) : "."; + + auto res = std::make_unique<PushRemote>(); + res->name = std::move(name); + res->url = remote ? (git_remote_url(remote) ?: "") : ""; + res->ref = std::exchange(ref, nullptr); + return PushRemotePtr(res.release()); +} + +CommitMessage GetCommitMessage(git_repository* repo, const git_oid& id) { + git_commit* commit; + VERIFY(!git_commit_lookup(&commit, repo, &id)) << GitError(); + ON_SCOPE_EXIT(=) { git_commit_free(commit); }; + return {.encoding = git_commit_message_encoding(commit) ?: "", + .summary = git_commit_summary(commit) ?: ""}; +} + +} // namespace gitstatus diff --git a/gitstatus/src/git.h b/gitstatus/src/git.h new file mode 100644 index 00000000..b85f09f7 --- /dev/null +++ b/gitstatus/src/git.h @@ -0,0 +1,115 @@ +// Copyright 2019 Roman Perepelitsa. +// +// This file is part of GitStatus. +// +// GitStatus is free software: you can redistribute it and/or modify +// it under the terms of the GNU General Public License as published by +// the Free Software Foundation, either version 3 of the License, or +// (at your option) any later version. +// +// GitStatus is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License for more details. +// +// You should have received a copy of the GNU General Public License +// along with GitStatus. If not, see <https://www.gnu.org/licenses/>. + +#ifndef ROMKATV_GITSTATUS_GIT_H_ +#define ROMKATV_GITSTATUS_GIT_H_ + +#include <git2.h> + +#include <cstddef> +#include <memory> +#include <string> + +namespace gitstatus { + +// Not null. +const char* GitError(); + +// Not null. +std::string RepoState(git_repository* repo); + +// Returns the number of commits in the range. +size_t CountRange(git_repository* repo, const std::string& range); + +// How many stashes are there? +size_t NumStashes(git_repository* repo); + +// Returns the origin URL or an empty string. Not null. +std::string RemoteUrl(git_repository* repo, const git_reference* ref); + +// Returns reference to HEAD or null if not found. The reference is symbolic if the repo is empty +// and direct otherwise. +git_reference* Head(git_repository* repo); + +// Returns the name of the local branch, or an empty string. +const char* LocalBranchName(const git_reference* ref); + +struct CommitMessage { + // Can be empty, meaning "UTF-8". + std::string encoding; + // The first paragraph of the commit's message as a one-liner. + std::string summary; +}; + +CommitMessage GetCommitMessage(git_repository* repo, const git_oid& id); + +struct Remote { + // Tip of the remote branch. + git_reference* ref; + + // Name of the tracking remote. For example, "origin". + std::string name; + + // Name of the tracking remote branch. For example, "master". + std::string branch; + + // URL of the tracking remote. For example, "https://foo.com/repo.git". + std::string url; + + // Note: pushurl is not exposed (but could be). + + struct Free { + void operator()(const Remote* p) const { + if (p) { + if (p->ref) git_reference_free(p->ref); + delete p; + } + } + }; +}; + +struct PushRemote { + // Tip of the remote branch. + git_reference* ref; + + // Name of the tracking remote. For example, "origin". + std::string name; + + // URL of the tracking remote. For example, "https://foo.com/repo.git". + std::string url; + + // Note: pushurl is not exposed (but could be). + + struct Free { + void operator()(const PushRemote* p) const { + if (p) { + if (p->ref) git_reference_free(p->ref); + delete p; + } + } + }; +}; + +using RemotePtr = std::unique_ptr<Remote, Remote::Free>; +using PushRemotePtr = std::unique_ptr<PushRemote, PushRemote::Free>; + +RemotePtr GetRemote(git_repository* repo, const git_reference* local); +PushRemotePtr GetPushRemote(git_repository* repo, const git_reference* local); + +} // namespace gitstatus + +#endif // ROMKATV_GITSTATUS_GIT_H_ diff --git a/gitstatus/src/gitstatus.cc b/gitstatus/src/gitstatus.cc new file mode 100644 index 00000000..81399ea7 --- /dev/null +++ b/gitstatus/src/gitstatus.cc @@ -0,0 +1,219 @@ +// Copyright 2019 Roman Perepelitsa. +// +// This file is part of GitStatus. +// +// GitStatus is free software: you can redistribute it and/or modify +// it under the terms of the GNU General Public License as published by +// the Free Software Foundation, either version 3 of the License, or +// (at your option) any later version. +// +// GitStatus is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License for more details. +// +// You should have received a copy of the GNU General Public License +// along with GitStatus. If not, see <https://www.gnu.org/licenses/>. + +#include <time.h> + +#include <cstddef> +#include <future> +#include <string> + +#include <git2.h> + +#include "check.h" +#include "git.h" +#include "logging.h" +#include "options.h" +#include "print.h" +#include "repo.h" +#include "repo_cache.h" +#include "request.h" +#include "response.h" +#include "scope_guard.h" +#include "thread_pool.h" +#include "timer.h" + +namespace gitstatus { +namespace { + +using namespace std::string_literals; + +void Truncate(std::string& s, size_t max_len) { + if (s.size() > max_len) s.resize(max_len); +} + +void ProcessRequest(const Options& opts, RepoCache& cache, Request req) { + Timer timer; + ON_SCOPE_EXIT(&) { timer.Report("request"); }; + + ResponseWriter resp(req.id); + Repo* repo = cache.Open(req.dir, req.from_dotgit); + if (!repo) return; + + git_config* cfg; + VERIFY(!git_repository_config(&cfg, repo->repo())) << GitError(); + ON_SCOPE_EXIT(=) { git_config_free(cfg); }; + VERIFY(!git_config_refresh(cfg)) << GitError(); + + // Symbolic reference if and only if the repo is empty. + git_reference* head = Head(repo->repo()); + if (!head) return; + ON_SCOPE_EXIT(=) { git_reference_free(head); }; + + // Null if and only if the repo is empty. + const git_oid* head_target = git_reference_target(head); + + // Looking up tags may take some time. Do it in the background while we check for stuff. + // Note that GetTagName() doesn't access index, so it'll overlap with index reading and + // parsing. + std::future<std::string> tag = repo->GetTagName(head_target); + ON_SCOPE_EXIT(&) { + if (tag.valid()) { + try { + tag.wait(); + } catch (const Exception&) { + } + } + }; + + // Repository working directory. Absolute; no trailing slash. E.g., "/home/romka/gitstatus". + StringView workdir(git_repository_workdir(repo->repo())); + if (workdir.len == 0) return; + if (workdir.len > 1 && workdir.ptr[workdir.len - 1] == '/') --workdir.len; + resp.Print(workdir); + + // Revision. Either 40 hex digits or an empty string for empty repo. + resp.Print(head_target ? git_oid_tostr_s(head_target) : ""); + + // Local branch name (e.g., "master") or empty string if not on a branch. + resp.Print(LocalBranchName(head)); + + // Remote tracking branch or null. + RemotePtr remote = GetRemote(repo->repo(), head); + + // Tracking remote branch name (e.g., "master") or empty string if there is no tracking remote. + resp.Print(remote ? remote->branch : ""); + + // Tracking remote name (e.g., "origin") or empty string if there is no tracking remote. + resp.Print(remote ? remote->name : ""); + + // Tracking remote URL or empty string if there is no tracking remote. + resp.Print(remote ? remote->url : ""); + + // Repository state, A.K.A. action. For example, "merge". + resp.Print(RepoState(repo->repo())); + + IndexStats stats; + // Look for staged, unstaged and untracked. This is where most of the time is spent. + if (req.diff) stats = repo->GetIndexStats(head_target, cfg); + + // The number of files in the index. + resp.Print(stats.index_size); + // The number of staged changes. At most opts.max_num_staged. + resp.Print(stats.num_staged); + // The number of unstaged changes. At most opts.max_num_unstaged. 0 if index is too large. + resp.Print(stats.num_unstaged); + // The number of conflicted changes. At most opts.max_num_conflicted. 0 if index is too large. + resp.Print(stats.num_conflicted); + // The number of untracked changes. At most opts.max_num_untracked. 0 if index is too large. + resp.Print(stats.num_untracked); + + if (remote && remote->ref) { + const char* ref = git_reference_name(remote->ref); + // Number of commits we are ahead of upstream. Non-negative integer. + resp.Print(CountRange(repo->repo(), ref + "..HEAD"s)); + // Number of commits we are behind upstream. Non-negative integer. + resp.Print(CountRange(repo->repo(), "HEAD.."s + ref)); + } else { + resp.Print("0"); + resp.Print("0"); + } + + // Number of stashes. Non-negative integer. + resp.Print(NumStashes(repo->repo())); + + // Tag that points to HEAD (e.g., "v4.2") or empty string if there aren't any. The same as + // `git describe --tags --exact-match`. + resp.Print(tag.get()); + + // The number of unstaged deleted files. At most stats.num_unstaged. + resp.Print(stats.num_unstaged_deleted); + // The number of staged new files. At most stats.num_staged. + resp.Print(stats.num_staged_new); + // The number of staged deleted files. At most stats.num_staged. + resp.Print(stats.num_staged_deleted); + + // Push remote or null. + PushRemotePtr push_remote = GetPushRemote(repo->repo(), head); + + // Push remote name (e.g., "origin") or empty string if there is no push remote. + resp.Print(push_remote ? push_remote->name : ""); + + // Push remote URL or empty string if there is no push remote. + resp.Print(push_remote ? push_remote->url : ""); + + if (push_remote && push_remote->ref) { + const char* ref = git_reference_name(push_remote->ref); + // Number of commits we are ahead of push remote. Non-negative integer. + resp.Print(CountRange(repo->repo(), ref + "..HEAD"s)); + // Number of commits we are behind upstream. Non-negative integer. + resp.Print(CountRange(repo->repo(), "HEAD.."s + ref)); + } else { + resp.Print("0"); + resp.Print("0"); + } + + // The number of files in the index with skip-worktree bit set. + resp.Print(stats.num_skip_worktree); + // The number of files in the index with assume-unchanged bit set. + resp.Print(stats.num_assume_unchanged); + + CommitMessage msg = head_target ? GetCommitMessage(repo->repo(), *head_target) : CommitMessage(); + Truncate(msg.summary, opts.max_commit_summary_length); + resp.Print(msg.encoding); + resp.Print(msg.summary); + + resp.Dump("with git status"); +} + +int GitStatus(int argc, char** argv) { + tzset(); + Options opts = ParseOptions(argc, argv); + g_min_log_level = opts.log_level; + for (int i = 0; i != argc; ++i) LOG(INFO) << "argv[" << i << "]: " << Print(argv[i]); + RequestReader reader(fileno(stdin), opts.lock_fd, opts.parent_pid); + RepoCache cache(opts); + + InitGlobalThreadPool(opts.num_threads); + git_libgit2_opts(GIT_OPT_ENABLE_STRICT_HASH_VERIFICATION, 0); + git_libgit2_opts(GIT_OPT_DISABLE_INDEX_CHECKSUM_VERIFICATION, 1); + git_libgit2_opts(GIT_OPT_DISABLE_INDEX_FILEPATH_VALIDATION, 1); + git_libgit2_opts(GIT_OPT_DISABLE_READNG_PACKED_TAGS, 1); + git_libgit2_init(); + + while (true) { + try { + Request req; + if (reader.ReadRequest(req)) { + LOG(INFO) << "Processing request: " << req; + try { + ProcessRequest(opts, cache, req); + LOG(INFO) << "Successfully processed request: " << req; + } catch (const Exception&) { + LOG(ERROR) << "Error processing request: " << req; + } + } else if (opts.repo_ttl >= Duration()) { + cache.Free(Clock::now() - opts.repo_ttl); + } + } catch (const Exception&) { + } + } +} + +} // namespace +} // namespace gitstatus + +int main(int argc, char** argv) { gitstatus::GitStatus(argc, argv); } diff --git a/gitstatus/src/index.cc b/gitstatus/src/index.cc new file mode 100644 index 00000000..4d66876b --- /dev/null +++ b/gitstatus/src/index.cc @@ -0,0 +1,456 @@ +// Copyright 2019 Roman Perepelitsa. +// +// This file is part of GitStatus. +// +// GitStatus is free software: you can redistribute it and/or modify +// it under the terms of the GNU General Public License as published by +// the Free Software Foundation, either version 3 of the License, or +// (at your option) any later version. +// +// GitStatus is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License for more details. +// +// You should have received a copy of the GNU General Public License +// along with GitStatus. If not, see <https://www.gnu.org/licenses/>. + +#include "index.h" + +#include <dirent.h> +#include <fcntl.h> +#include <unistd.h> + +#include <algorithm> +#include <condition_variable> +#include <cstdint> +#include <cstring> +#include <iomanip> +#include <iterator> +#include <mutex> +#include <stack> + +#include "algorithm.h" +#include "check.h" +#include "dir.h" +#include "git.h" +#include "index.h" +#include "print.h" +#include "scope_guard.h" +#include "stat.h" +#include "string_cmp.h" +#include "thread_pool.h" + +namespace gitstatus { + +namespace { + +void CommonDir(Str<> str, const char* a, const char* b, size_t* dir_len, size_t* dir_depth) { + *dir_len = 0; + *dir_depth = 0; + for (size_t i = 1; str.Eq(*a, *b) && *a; ++i, ++a, ++b) { + if (*a == '/') { + *dir_len = i; + ++*dir_depth; + } + } +} + +size_t Weight(const IndexDir& dir) { return 1 + dir.subdirs.size() + dir.files.size(); } + +bool MTimeEq(const git_index_time& index, const struct timespec& workdir) { + if (index.seconds != workdir.tv_sec) return false; + if (int64_t{index.nanoseconds} == workdir.tv_nsec) return true; +#ifdef GITSTATUS_ZERO_NSEC + return index.nanoseconds == 0; +#else + return false; +#endif +} + +bool IsModified(const git_index_entry* entry, const struct stat& st, const RepoCaps& caps) { + mode_t mode = st.st_mode; + if (S_ISREG(mode)) { + if (!caps.has_symlinks && S_ISLNK(entry->mode)) { + mode = entry->mode; + } else if (!caps.trust_filemode) { + mode = entry->mode; + } else { + mode = S_IFREG | (mode & 0100 ? 0755 : 0644); + } + } else { + mode &= S_IFMT; + } + + bool res = false; + +#define COND(field, cond...) \ + if (cond) { \ + } else \ + res = true, \ + LOG(DEBUG) << "Dirty candidate (modified): " << Print(entry->path) << ": " #field " " + + COND(ino, !entry->ino || entry->ino == static_cast<std::uint32_t>(st.st_ino)) + << entry->ino << " => " << static_cast<std::uint32_t>(st.st_ino); + + COND(stage, GIT_INDEX_ENTRY_STAGE(entry) == 0) << "=> " << GIT_INDEX_ENTRY_STAGE(entry); + COND(fsize, int64_t{entry->file_size} == st.st_size) << entry->file_size << " => " << st.st_size; + COND(mtime, MTimeEq(entry->mtime, MTim(st))) << Print(entry->mtime) << " => " << Print(MTim(st)); + COND(mode, entry->mode == mode) << std::oct << entry->mode << " => " << std::oct << mode; + +#undef COND + + return res; +} + +int OpenDir(int parent_fd, const char* name) { + return openat(parent_fd, name, O_RDONLY | O_DIRECTORY | O_CLOEXEC); +} + +void OpenTail(int* fds, size_t nfds, int root_fd, StringView dirname, Arena& arena) { + CHECK(fds && nfds && root_fd >= 0); + std::fill(fds, fds + nfds, -1); + if (!dirname.len) return; + CHECK(dirname.len > 1); + CHECK(dirname.ptr[0] != '/'); + CHECK(dirname.ptr[dirname.len - 1] == '/'); + + char* begin = arena.StrDup(dirname.ptr, dirname.len - 1); + WithArena<std::vector<const char*>> subdirs(&arena); + subdirs.reserve(nfds + 1); + + for (char* sep = begin + dirname.len - 1; subdirs.size() < nfds;) { + sep = FindLast(begin, sep, '/'); + if (sep == begin) break; + *sep = 0; + subdirs.push_back(sep + 1); + } + subdirs.push_back(begin); + if (subdirs.size() < nfds + 1) subdirs.push_back("."); + CHECK(subdirs.size() <= nfds + 1); + + for (size_t i = subdirs.size(); i != 1; --i) { + const char* path = subdirs[i - 1]; + if ((root_fd = OpenDir(root_fd, path)) < 0) { + for (; i != subdirs.size(); ++i) { + CHECK(!close(fds[i - 1])) << Errno(); + fds[i - 1] = -1; + } + return; + } + fds[i - 2] = root_fd; + } +} + +std::vector<const char*> ScanDirs(git_index* index, int root_fd, IndexDir* const* begin, + IndexDir* const* end, const RepoCaps& caps, + const ScanOpts& opts) { + const Str<> str(caps.case_sensitive); + + Arena arena; + std::vector<const char*> dirty_candidates; + std::vector<char*> entries; + entries.reserve(128); + + auto AddCandidate = [&](const char* kind, const char* path) { + if (kind) LOG(DEBUG) << "Dirty candidate (" << kind << "): " << Print(path); + dirty_candidates.push_back(path); + }; + + constexpr ssize_t kDirStackSize = 5; + int dir_fd[kDirStackSize]; + std::fill(std::begin(dir_fd), std::end(dir_fd), -1); + auto Close = [](int& fd) { + if (fd >= 0) { + CHECK(!close(fd)) << Errno(); + fd = -1; + } + }; + auto CloseAll = [&] { std::for_each(std::begin(dir_fd), std::end(dir_fd), Close); }; + ON_SCOPE_EXIT(&) { CloseAll(); }; + if (begin != end) OpenTail(dir_fd, kDirStackSize, root_fd, (*begin)->path, arena); + + for (IndexDir* const* it = begin; it != end; ++it) { + IndexDir& dir = **it; + + auto Basename = [&](const git_index_entry* e) { return e->path + dir.path.len; }; + + auto AddUnmached = [&](StringView basename) { + if (!basename.len) { + dir.st = {}; + dir.unmatched.clear(); + dir.arena.Reuse(); + } else if (str.Eq(basename, StringView(".git/"))) { + return; + } + char* path = dir.arena.StrCat(dir.path, basename); + dir.unmatched.push_back(path); + AddCandidate(basename.len ? "new" : "unreadable", path); + }; + + auto StatFiles = [&]() { + struct stat st; + for (const git_index_entry* file : dir.files) { + if (fstatat(*dir_fd, Basename(file), &st, AT_SYMLINK_NOFOLLOW)) { + AddCandidate(errno == ENOENT ? "deleted" : "unreadable", file->path); + } else if (IsModified(file, st, caps)) { + AddCandidate(nullptr, file->path); + } + } + }; + + ssize_t d = 0; + if ((it == begin || (d = it[-1]->depth + 1 - dir.depth) < kDirStackSize) && dir_fd[d] >= 0) { + CHECK(d >= 0); + int fd = OpenDir(dir_fd[d], arena.StrDup(dir.basename.ptr, dir.basename.len)); + for (ssize_t i = 0; i != d; ++i) Close(dir_fd[i]); + std::rotate(dir_fd, dir_fd + (d ? d : kDirStackSize) - 1, dir_fd + kDirStackSize); + Close(*dir_fd); + *dir_fd = fd; + } else { + CloseAll(); + if (dir.path.len) { + CHECK(dir.path.ptr[0] != '/'); + CHECK(dir.path.ptr[dir.path.len - 1] == '/'); + *dir_fd = OpenDir(root_fd, arena.StrDup(dir.path.ptr, dir.path.len - 1)); + } else { + VERIFY((*dir_fd = dup(root_fd)) >= 0) << Errno(); + } + } + if (*dir_fd < 0) { + CloseAll(); + AddUnmached(""); + continue; + } + + if (!opts.include_untracked) { + StatFiles(); + continue; + } + + if (opts.untracked_cache != Tribool::kFalse) { + struct stat st; + if (fstat(*dir_fd, &st)) { + AddUnmached(""); + continue; + } + if (opts.untracked_cache == Tribool::kTrue && StatEq(st, dir.st)) { + StatFiles(); + for (const char* path : dir.unmatched) AddCandidate("new", path); + continue; + } + dir.st = st; + } + + entries.clear(); + arena.Reuse(); + if (!ListDir(*dir_fd, arena, entries, caps.precompose_unicode, caps.case_sensitive)) { + AddUnmached(""); + continue; + } + dir.unmatched.clear(); + dir.arena.Reuse(); + + const git_index_entry* const* file = dir.files.data(); + const git_index_entry* const* file_end = file + dir.files.size(); + const StringView* subdir = dir.subdirs.data(); + const StringView* subdir_end = subdir + dir.subdirs.size(); + + for (char* entry : entries) { + bool matched = false; + + for (; file != file_end; ++file) { + int cmp = str.Cmp(Basename(*file), entry); + if (cmp < 0) { + AddCandidate("deleted", (*file)->path); + } else if (cmp == 0) { + struct stat st; + if (fstatat(*dir_fd, entry, &st, AT_SYMLINK_NOFOLLOW)) { + AddCandidate("unreadable", (*file)->path); + } else if (IsModified(*file, st, caps)) { + AddCandidate(nullptr, (*file)->path); + } + matched = true; + ++file; + break; + } else { + break; + } + } + + if (matched) continue; + + for (; subdir != subdir_end; ++subdir) { + int cmp = str.Cmp(*subdir, entry); + if (cmp > 0) break; + if (cmp == 0) { + matched = true; + ++subdir; + break; + } + } + + if (!matched) { + StringView basename(entry); + if (entry[-1] == DT_DIR) entry[basename.len++] = '/'; + AddUnmached(basename); + } + } + + for (; file != file_end; ++file) AddCandidate("deleted", (*file)->path); + } + + return dirty_candidates; +} + +} // namespace + +RepoCaps::RepoCaps(git_repository* repo, git_index* index) { + trust_filemode = git_index_is_filemode_trustworthy(index); + has_symlinks = git_index_supports_symlinks(index); + case_sensitive = git_index_is_case_sensitive(index); + precompose_unicode = git_index_precompose_unicode(index); + LOG(DEBUG) << "Repository capabilities for " << Print(git_repository_workdir(repo)) << ": " + << "is_filemode_trustworthy = " << std::boolalpha << trust_filemode << ", " + << "index_supports_symlinks = " << std::boolalpha << has_symlinks << ", " + << "index_is_case_sensitive = " << std::boolalpha << case_sensitive << ", " + << "precompose_unicode = " << std::boolalpha << precompose_unicode; +} + +Index::Index(git_repository* repo, git_index* index) + : dirs_(&arena_), + splits_(&arena_), + git_index_(index), + root_dir_(git_repository_workdir(repo)), + caps_(repo, index) { + size_t total_weight = InitDirs(index); + InitSplits(total_weight); +} + +size_t Index::InitDirs(git_index* index) { + const Str<> str(git_index_is_case_sensitive(index)); + const size_t index_size = git_index_entrycount(index); + dirs_.reserve(index_size / 8); + std::stack<IndexDir*> stack; + stack.push(arena_.DirectInit<IndexDir>(&arena_)); + + size_t total_weight = 0; + auto PopDir = [&] { + CHECK(!stack.empty()); + IndexDir* top = stack.top(); + CHECK(top->depth + 1 == stack.size()); + if (!std::is_sorted(top->subdirs.begin(), top->subdirs.end(), str.Lt)) { + StrSort(top->subdirs.begin(), top->subdirs.end(), str.case_sensitive); + } + total_weight += Weight(*top); + dirs_.push_back(top); + stack.pop(); + }; + + for (size_t i = 0; i != index_size; ++i) { + const git_index_entry* entry = git_index_get_byindex_no_sort(index, i); + IndexDir* prev = stack.top(); + size_t common_len, common_depth; + CommonDir(str, prev->path.ptr, entry->path, &common_len, &common_depth); + CHECK(common_depth <= prev->depth); + + for (size_t i = common_depth; i != prev->depth; ++i) PopDir(); + + for (const char* p = entry->path + common_len; (p = std::strchr(p, '/')); ++p) { + IndexDir* top = stack.top(); + StringView subdir(entry->path + top->path.len, p); + top->subdirs.push_back(subdir); + IndexDir* dir = arena_.DirectInit<IndexDir>(&arena_); + dir->path = StringView(entry->path, p - entry->path + 1); + dir->basename = subdir; + dir->depth = stack.size(); + CHECK(dir->path.ptr[dir->path.len - 1] == '/'); + stack.push(dir); + } + + CHECK(!stack.empty()); + IndexDir* dir = stack.top(); + dir->files.push_back(entry); + } + + CHECK(!stack.empty()); + do { + PopDir(); + } while (!stack.empty()); + std::reverse(dirs_.begin(), dirs_.end()); + + return total_weight; +} + +void Index::InitSplits(size_t total_weight) { + constexpr size_t kMinShardWeight = 512; + const size_t kNumShards = 16 * GlobalThreadPool()->num_threads(); + const size_t shard_weight = std::max(kMinShardWeight, total_weight / kNumShards); + + splits_.reserve(kNumShards + 1); + splits_.push_back(0); + + for (size_t i = 0, w = 0; i != dirs_.size(); ++i) { + w += Weight(*dirs_[i]); + if (w >= shard_weight) { + w = 0; + splits_.push_back(i + 1); + } + } + + if (splits_.back() != dirs_.size()) splits_.push_back(dirs_.size()); + CHECK(splits_.size() <= kNumShards + 1); + CHECK(std::is_sorted(splits_.begin(), splits_.end())); + CHECK(std::adjacent_find(splits_.begin(), splits_.end()) == splits_.end()); +} + +std::vector<const char*> Index::GetDirtyCandidates(const ScanOpts& opts) { + int root_fd = open(root_dir_, O_RDONLY | O_DIRECTORY | O_CLOEXEC); + VERIFY(root_fd >= 0); + ON_SCOPE_EXIT(&) { CHECK(!close(root_fd)) << Errno(); }; + + CHECK(!splits_.empty()); + + std::mutex mutex; + std::condition_variable cv; + size_t inflight = splits_.size() - 1; + bool error = false; + std::vector<const char*> res; + + for (size_t i = 0; i != splits_.size() - 1; ++i) { + size_t from = splits_[i]; + size_t to = splits_[i + 1]; + + GlobalThreadPool()->Schedule([&, from, to]() { + ON_SCOPE_EXIT(&) { + std::unique_lock<std::mutex> lock(mutex); + CHECK(inflight); + if (--inflight == 0) cv.notify_one(); + }; + try { + std::vector<const char*> candidates = + ScanDirs(git_index_, root_fd, dirs_.data() + from, dirs_.data() + to, caps_, opts); + if (!candidates.empty()) { + std::unique_lock<std::mutex> lock(mutex); + res.insert(res.end(), candidates.begin(), candidates.end()); + } + } catch (const Exception&) { + std::unique_lock<std::mutex> lock(mutex); + error = true; + } + }); + } + + { + std::unique_lock<std::mutex> lock(mutex); + while (inflight) cv.wait(lock); + } + + VERIFY(!error); + StrSort(res.begin(), res.end(), git_index_is_case_sensitive(git_index_)); + auto StrEq = [](const char* a, const char* b) { return !strcmp(a, b); }; + res.erase(std::unique(res.begin(), res.end(), StrEq), res.end()); + return res; +} + +} // namespace gitstatus diff --git a/gitstatus/src/index.h b/gitstatus/src/index.h new file mode 100644 index 00000000..bbf95673 --- /dev/null +++ b/gitstatus/src/index.h @@ -0,0 +1,84 @@ +// Copyright 2019 Roman Perepelitsa. +// +// This file is part of GitStatus. +// +// GitStatus is free software: you can redistribute it and/or modify +// it under the terms of the GNU General Public License as published by +// the Free Software Foundation, either version 3 of the License, or +// (at your option) any later version. +// +// GitStatus is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License for more details. +// +// You should have received a copy of the GNU General Public License +// along with GitStatus. If not, see <https://www.gnu.org/licenses/>. + +#ifndef ROMKATV_GITSTATUS_INDEX_H_ +#define ROMKATV_GITSTATUS_INDEX_H_ + +#include <sys/stat.h> + +#include <git2.h> + +#include <cstddef> +#include <string> +#include <vector> + +#include "arena.h" +#include "options.h" +#include "string_view.h" +#include "tribool.h" + +namespace gitstatus { + +struct RepoCaps { + RepoCaps(git_repository* repo, git_index* index); + + bool trust_filemode; + bool has_symlinks; + bool case_sensitive; + bool precompose_unicode; +}; + +struct ScanOpts { + bool include_untracked; + Tribool untracked_cache; +}; + +struct IndexDir { + explicit IndexDir(Arena* arena) : files(arena), subdirs(arena) {} + + StringView path; + StringView basename; + size_t depth = 0; + struct stat st = {}; + WithArena<std::vector<const git_index_entry*>> files; + WithArena<std::vector<StringView>> subdirs; + + Arena arena; + std::vector<const char*> unmatched; +}; + +class Index { + public: + Index(git_repository* repo, git_index* index); + + std::vector<const char*> GetDirtyCandidates(const ScanOpts& opts); + + private: + size_t InitDirs(git_index* index); + void InitSplits(size_t total_weight); + + Arena arena_; + WithArena<std::vector<IndexDir*>> dirs_; + WithArena<std::vector<size_t>> splits_; + git_index* git_index_; + const char* root_dir_; + RepoCaps caps_; +}; + +} // namespace gitstatus + +#endif // ROMKATV_GITSTATUS_GIT_H_ diff --git a/gitstatus/src/logging.cc b/gitstatus/src/logging.cc new file mode 100644 index 00000000..fb9ac9ea --- /dev/null +++ b/gitstatus/src/logging.cc @@ -0,0 +1,139 @@ +// Copyright 2019 Roman Perepelitsa. +// +// This file is part of GitStatus. +// +// GitStatus is free software: you can redistribute it and/or modify +// it under the terms of the GNU General Public License as published by +// the Free Software Foundation, either version 3 of the License, or +// (at your option) any later version. +// +// GitStatus is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License for more details. +// +// You should have received a copy of the GNU General Public License +// along with GitStatus. If not, see <https://www.gnu.org/licenses/>. + +#include "logging.h" + +#include <pthread.h> +#include <time.h> + +#include <cerrno> +#include <cstdio> +#include <cstring> +#include <ctime> +#include <mutex> +#include <string> + +namespace gitstatus { + +namespace internal_logging { + +namespace { + +std::mutex g_log_mutex; + +constexpr char kHexLower[] = {'0', '1', '2', '3', '4', '5', '6', '7', + '8', '9', 'a', 'b', 'c', 'd', 'e', 'f'}; + +void FormatThreadId(char (&out)[2 * sizeof(std::uintptr_t) + 1]) { + std::uintptr_t tid = (std::uintptr_t)pthread_self(); + char* p = out + sizeof(out) - 1; + *p = 0; + do { + --p; + *p = kHexLower[tid & 0xF]; + tid >>= 4; + } while (p != out); +} + +void FormatCurrentTime(char (&out)[64]) { + std::time_t time = std::time(nullptr); + struct tm tm; + if (localtime_r(&time, &tm) != &tm || std::strftime(out, sizeof(out), "%F %T", &tm) == 0) { + std::strcpy(out, "undef"); + } +} + +} // namespace + +LogStreamBase::LogStreamBase(const char* file, int line, LogLevel lvl) + : errno_(errno), file_(file), line_(line), lvl_(LogLevelStr(lvl)) { + strm_ = std::make_unique<std::ostringstream>(); +} + +void LogStreamBase::Flush() { + { + std::string msg = strm_->str(); + char tid[2 * sizeof(std::uintptr_t) + 1]; + FormatThreadId(tid); + char time[64]; + FormatCurrentTime(time); + + std::unique_lock<std::mutex> lock(g_log_mutex); + std::fprintf(stderr, "[%s %s %s %s:%d] %s\n", time, tid, lvl_, file_, line_, msg.c_str()); + } + strm_.reset(); + errno = errno_; +} + +std::ostream& operator<<(std::ostream& strm, Errno e) { + // GNU C Library uses a buffer of 1024 characters for strerror(). Mimic to avoid truncations. + char buf[1024]; + auto x = strerror_r(e.err, buf, sizeof(buf)); + // There are two versions of strerror_r with different semantics. We can figure out which + // one we've got by looking at the result type. + if (std::is_same<decltype(x), int>::value) { + // XSI-compliant version. + strm << (x ? "unknown error" : buf); + } else if (std::is_same<decltype(x), char*>::value) { + // GNU-specific version. + strm << x; + } else { + // Something else entirely. + strm << "unknown error"; + } + return strm; +} + +} // namespace internal_logging + +LogLevel g_min_log_level = INFO; + +const char* LogLevelStr(LogLevel lvl) { + switch (lvl) { + case DEBUG: + return "DEBUG"; + case INFO: + return "INFO"; + case WARN: + return "WARN"; + case ERROR: + return "ERROR"; + case FATAL: + return "FATAL"; + } + return "UNKNOWN"; +} + +bool ParseLogLevel(const char* s, LogLevel& lvl) { + if (!s) + return false; + else if (!std::strcmp(s, "DEBUG")) + lvl = DEBUG; + else if (!std::strcmp(s, "INFO")) + lvl = INFO; + else if (!std::strcmp(s, "WARN")) + lvl = WARN; + else if (!std::strcmp(s, "ERROR")) + lvl = ERROR; + else if (!std::strcmp(s, "FATAL")) + lvl = FATAL; + else + return false; + return true; +} + +} // namespace gitstatus diff --git a/gitstatus/src/logging.h b/gitstatus/src/logging.h new file mode 100644 index 00000000..6ddb2e16 --- /dev/null +++ b/gitstatus/src/logging.h @@ -0,0 +1,124 @@ +// Copyright 2019 Roman Perepelitsa. +// +// This file is part of GitStatus. +// +// GitStatus is free software: you can redistribute it and/or modify +// it under the terms of the GNU General Public License as published by +// the Free Software Foundation, either version 3 of the License, or +// (at your option) any later version. +// +// GitStatus is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License for more details. +// +// You should have received a copy of the GNU General Public License +// along with GitStatus. If not, see <https://www.gnu.org/licenses/>. + +#ifndef ROMKATV_GITSTATUS_LOGGING_H_ +#define ROMKATV_GITSTATUS_LOGGING_H_ + +#include <cstdlib> +#include <memory> +#include <ostream> +#include <sstream> + +#define LOG(severity) LOG_I(severity) + +#define LOG_I(severity) \ + (::gitstatus::severity < ::gitstatus::g_min_log_level) \ + ? static_cast<void>(0) \ + : ::gitstatus::internal_logging::Assignable() = \ + ::gitstatus::internal_logging::LogStream<::gitstatus::severity>(__FILE__, __LINE__, \ + ::gitstatus::severity) \ + .ref() + +namespace gitstatus { + +enum LogLevel { + DEBUG, + INFO, + WARN, + ERROR, + FATAL, +}; + +const char* LogLevelStr(LogLevel lvl); +bool ParseLogLevel(const char* s, LogLevel& lvl); + +extern LogLevel g_min_log_level; + +namespace internal_logging { + +struct Assignable { + template <class T> + void operator=(const T&) const {} +}; + +class LogStreamBase { + public: + LogStreamBase(const char* file, int line, LogLevel lvl); + + LogStreamBase& ref() { return *this; } + std::ostream& strm() { return *strm_; } + int stashed_errno() const { return errno_; } + + protected: + void Flush(); + + private: + int errno_; + const char* file_; + int line_; + const char* lvl_; + std::unique_ptr<std::ostringstream> strm_; +}; + +template <LogLevel> +class LogStream : public LogStreamBase { + public: + using LogStreamBase::LogStreamBase; + ~LogStream() { this->Flush(); } +}; + +template <> +class LogStream<FATAL> : public LogStreamBase { + public: + using LogStreamBase::LogStreamBase; + ~LogStream() __attribute__((noreturn)) { + this->Flush(); + std::abort(); + } +}; + +template <class T> +LogStreamBase& operator<<(LogStreamBase& strm, const T& val) { + strm.strm() << val; + return strm; +} + +inline LogStreamBase& operator<<(LogStreamBase& strm, std::ostream& (*manip)(std::ostream&)) { + strm.strm() << manip; + return strm; +} + +struct Errno { + int err; +}; + +std::ostream& operator<<(std::ostream& strm, Errno e); + +struct StashedErrno {}; + +inline LogStreamBase& operator<<(LogStreamBase& strm, StashedErrno) { + return strm << Errno{strm.stashed_errno()}; +} + +} // namespace internal_logging + +inline internal_logging::Errno Errno(int err) { return {err}; } +inline internal_logging::StashedErrno Errno() { return {}; } + +} // namespace gitstatus + +#endif // ROMKATV_GITSTATUS_LOGGING_H_ diff --git a/gitstatus/src/options.cc b/gitstatus/src/options.cc new file mode 100644 index 00000000..46f3845c --- /dev/null +++ b/gitstatus/src/options.cc @@ -0,0 +1,362 @@ +// Copyright 2019 Roman Perepelitsa. +// +// This file is part of GitStatus. +// +// GitStatus is free software: you can redistribute it and/or modify +// it under the terms of the GNU General Public License as published by +// the Free Software Foundation, either version 3 of the License, or +// (at your option) any later version. +// +// GitStatus is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License for more details. +// +// You should have received a copy of the GNU General Public License +// along with GitStatus. If not, see <https://www.gnu.org/licenses/>. + +#include "options.h" + +#include <fnmatch.h> +#include <getopt.h> +#include <unistd.h> + +#include <algorithm> +#include <climits> +#include <cstdlib> +#include <cstring> +#include <iostream> + +#include "print.h" + +namespace gitstatus { + +namespace { + +long ParseLong(const char* s) { + errno = 0; + char* end = nullptr; + long res = std::strtol(s, &end, 10); + if (*end || end == s || errno) { + std::cerr << "gitstatusd: not an integer: " << s << std::endl; + std::exit(10); + } + return res; +} + +long ParseInt(const char* s) { + long res = ParseLong(s); + if (res < INT_MIN || res > INT_MAX) { + std::cerr << "gitstatusd: integer out of bounds: " << s << std::endl; + std::exit(10); + } + return res; +} + +size_t ParseSizeT(const char* s) { + static_assert(sizeof(long) <= sizeof(size_t), ""); + long res = ParseLong(s); + return res >= 0 ? res : -1; +} + +void PrintUsage() { + std::cout << "Usage: gitstatusd [OPTION]...\n" + << "Print machine-readable status of the git repos for directores in stdin.\n" + << "\n" + << "OPTIONS\n" + << " -l, --lock-fd=NUM [default=-1]\n" + << " If non-negative, check whether the specified file descriptor is locked when\n" + << " not receiving any requests for one second; exit if it isn't locked.\n" + << "\n" + << " -p, --parent-pid=NUM [default=-1]\n" + << " If non-negative, send signal 0 to the specified PID when not receiving any\n" + << " requests for one second; exit if signal sending fails.\n" + << "\n" + << " -t, --num-threads=NUM [default=1]\n" + << " Use this many threads to scan git workdir for unstaged and untracked files.\n" + << " Empirically, setting this parameter to twice the number of virtual CPU yields\n" + << " maximum performance.\n" + << "\n" + << " -v, --log-level=STR [default=INFO]\n" + << " Don't write entires to log whose log level is below this. Log levels in\n" + << " increasing order: DEBUG, INFO, WARN, ERROR, FATAL.\n" + << "\n" + << " -r, --repo-ttl-seconds=NUM [default=3600]\n" + << " Close git repositories that haven't been used for this long. This is meant to\n" + << " release resources such as memory and file descriptors. The next request for a\n" + << " repo that's been closed is much slower than for a repo that hasn't been.\n" + << " Negative value means infinity.\n" + << "\n" + << " -z, --max-commit-summary-length=NUM [default=256]\n" + << " Truncate commit summary if it's longer than this many bytes.\n" + << "\n" + << " -s, --max-num-staged=NUM [default=1]\n" + << " Report at most this many staged changes; negative value means infinity.\n" + << "\n" + << " -u, --max-num-unstaged=NUM [default=1]\n" + << " Report at most this many unstaged changes; negative value means infinity.\n" + << "\n" + << " -c, --max-num-conflicted=NUM [default=1]\n" + << " Report at most this many conflicted changes; negative value means infinity.\n" + << "\n" + << " -d, --max-num-untracked=NUM [default=1]\n" + << " Report at most this many untracked files; negative value means infinity.\n" + << "\n" + << " -m, --dirty-max-index-size=NUM [default=-1]\n" + << " If a repo has more files in its index than this, override --max-num-unstaged\n" + << " and --max-num-untracked (but not --max-num-staged) with zeros; negative value\n" + << " means infinity.\n" + << "\n" + << " -e, --recurse-untracked-dirs\n" + << " Count files within untracked directories like `git status --untracked-files`.\n" + << "\n" + << " -U, --ignore-status-show-untracked-files\n" + << " Unless this option is specified, report zero untracked files for repositories\n" + << " with status.showUntrackedFiles = false.\n" + << "\n" + << " -W, --ignore-bash-show-untracked-files\n" + << " Unless this option is specified, report zero untracked files for repositories\n" + << " with bash.showUntrackedFiles = false.\n" + << "\n" + << " -D, --ignore-bash-show-dirty-state\n" + << " Unless this option is specified, report zero staged, unstaged and conflicted\n" + << " changes for repositories with bash.showDirtyState = false.\n" + << "\n" + << " -V, --version\n" + << " Print gitstatusd version and exit.\n" + << "\n" + << " -G, --version-glob=STR [default=*]\n" + << " Immediately exit with code 11 if gitstatusd version (see --version) doesn't\n" + << " does not match the specified pattern. Matching is done with fnmatch(3)\n" + << " without flags.\n" + << "\n" + << " -h, --help\n" + << " Display this help and exit.\n" + << "\n" + << "INPUT\n" + << "\n" + << " Requests are read from stdin, separated by ascii 30 (record separator). Each\n" + << " request is made of the following fields, in the specified order, separated by\n" + << " ascii 31 (unit separator):\n" + << "\n" + << " 1. Request ID. Any string. Can be empty.\n" + << " 2. Path to the directory for which git stats are being requested.\n" + << " If the first character is ':', it is removed and the remaning path\n" + << " is treated as GIT_DIR.\n" + << " 3. (Optional) '1' to disable computation of anything that requires reading\n" + << " git index; '0' for the default behavior of computing everything.\n" + << "\n" + << "OUTPUT\n" + << "\n" + << " For every request read from stdin there is response written to stdout.\n" + << " Responses are separated by ascii 30 (record separator). Each response is made\n" + << " of the following fields, in the specified order, separated by ascii 31\n" + << " (unit separator):\n" + << "\n" + << " 1. Request id. The same as the first field in the request.\n" + << " 2. 0 if the directory isn't a git repo, 1 otherwise. If 0, all the\n" + << " following fields are missing.\n" + << " 3. Absolute path to the git repository workdir.\n" + << " 4. Commit hash that HEAD is pointing to. 40 hex digits.\n" + << " 5. Local branch name or empty if not on a branch.\n" + << " 6. Upstream branch name. Can be empty.\n" + << " 7. The remote name, e.g. \"upstream\" or \"origin\".\n" + << " 8. Remote URL. Can be empty.\n" + << " 9. Repository state, A.K.A. action. Can be empty.\n" + << " 10. The number of files in the index.\n" + << " 11. The number of staged changes.\n" + << " 12. The number of unstaged changes.\n" + << " 13. The number of conflicted changes.\n" + << " 14. The number of untracked files.\n" + << " 15. Number of commits the current branch is ahead of upstream.\n" + << " 16. Number of commits the current branch is behind upstream.\n" + << " 17. The number of stashes.\n" + << " 18. The last tag (in lexicographical order) that points to the same\n" + << " commit as HEAD.\n" + << " 19. The number of unstaged deleted files.\n" + << " 20. The number of staged new files.\n" + << " 21. The number of staged deleted files.\n" + << " 22. The push remote name, e.g. \"upstream\" or \"origin\".\n" + << " 23. Push remote URL. Can be empty.\n" + << " 24. Number of commits the current branch is ahead of push remote.\n" + << " 25. Number of commits the current branch is behind push remote.\n" + << " 26. Number of files in the index with skip-worktree bit set.\n" + << " 27. Number of files in the index with assume-unchanged bit set.\n" + << " 28. Encoding of the HEAD's commit message. Empty value means UTF-8.\n" + << " 29. The first paragraph of the HEAD's commit message as one line.\n" + << "\n" + << "Note: Renamed files are reported as deleted plus new.\n" + << "\n" + << "EXAMPLE\n" + << "\n" + << " Send a single request and print response (zsh syntax):\n" + << "\n" + << " local req_id=id\n" + << " local dir=$PWD\n" + << " echo -nE $req_id$'\\x1f'$dir$'\\x1e' | ./gitstatusd | {\n" + << " local resp\n" + << " IFS=$'\\x1f' read -rd $'\\x1e' -A resp && print -lr -- \"${(@qq)resp}\"\n" + << " }\n" + << "\n" + << " Output:" + << "\n" + << " 'id'\n" + << " '1'\n" + << " '/home/romka/gitstatus'\n" + << " 'bf46bf03dbab7108801b53f8a720caee8464c9c3'\n" + << " 'master'\n" + << " 'master'\n" + << " 'origin'\n" + << " 'git@github.com:romkatv/gitstatus.git'\n" + << " ''\n" + << " '70'\n" + << " '1'\n" + << " '0'\n" + << " '0'\n" + << " '2'\n" + << " '0'\n" + << " '0'\n" + << " ''\n" + << " '0'\n" + << " '0'\n" + << " '0'\n" + << " ''\n" + << " ''\n" + << " '0'\n" + << " '0'\n" + << " '0'\n" + << " '0'\n" + << " ''\n" + << " 'add a build server for darwin-arm64'\n" + << "\n" + << "EXIT STATUS\n" + << "\n" + << " The command returns zero on success (when printing help or on EOF),\n" + << " non-zero on failure. In the latter case the output is unspecified.\n" + << "\n" + << "COPYRIGHT\n" + << "\n" + << " Copyright 2019 Roman Perepelitsa\n" + << " This is free software; see https://github.com/romkatv/gitstatus for copying\n" + << " conditions. There is NO warranty; not even for MERCHANTABILITY or FITNESS FOR\n" + << " A PARTICULAR PURPOSE." << std::endl; +} + +const char* Version() { +#define _INTERNAL_GITSTATUS_STRINGIZE(x) _INTERNAL_GITSTATUS_STRINGIZE_I(x) +#define _INTERNAL_GITSTATUS_STRINGIZE_I(x) #x + return _INTERNAL_GITSTATUS_STRINGIZE(GITSTATUS_VERSION); +#undef _INTERNAL_GITSTATUS_STRINGIZE_I +#undef _INTERNAL_GITSTATUS_STRINGIZE +} + +} // namespace + +Options ParseOptions(int argc, char** argv) { + const struct option opts[] = {{"help", no_argument, nullptr, 'h'}, + {"version", no_argument, nullptr, 'V'}, + {"version-glob", required_argument, nullptr, 'G'}, + {"lock-fd", required_argument, nullptr, 'l'}, + {"parent-pid", required_argument, nullptr, 'p'}, + {"num-threads", required_argument, nullptr, 't'}, + {"log-level", required_argument, nullptr, 'v'}, + {"repo-ttl-seconds", required_argument, nullptr, 'r'}, + {"max-commit-summary-length", required_argument, nullptr, 'z'}, + {"max-num-staged", required_argument, nullptr, 's'}, + {"max-num-unstaged", required_argument, nullptr, 'u'}, + {"max-num-conflicted", required_argument, nullptr, 'c'}, + {"max-num-untracked", required_argument, nullptr, 'd'}, + {"dirty-max-index-size", required_argument, nullptr, 'm'}, + {"recurse-untracked-dirs", no_argument, nullptr, 'e'}, + {"ignore-status-show-untracked-files", no_argument, nullptr, 'U'}, + {"ignore-bash-show-untracked-files", no_argument, nullptr, 'W'}, + {"ignore-bash-show-dirty-state", no_argument, nullptr, 'D'}, + {}}; + Options res; + while (true) { + switch (getopt_long(argc, argv, "hVG:l:p:t:v:r:z:s:u:c:d:m:eUWD", opts, nullptr)) { + case -1: + if (optind != argc) { + std::cerr << "unexpected positional argument: " << argv[optind] << std::endl; + std::exit(10); + } + return res; + case 'h': + PrintUsage(); + std::exit(0); + case 'V': + std::cout << Version() << std::endl; + std::exit(0); + case 'G': + if (int err = fnmatch(optarg, Version(), 0)) { + if (err != FNM_NOMATCH) { + std::cerr << "Cannot match " << Print(Version()) << " against pattern " + << Print(optarg) << ": error " << err; + std::exit(10); + } + std::cerr << "Version mismatch. Wanted (pattern): " << Print(optarg) + << ". Actual: " << Print(Version()) << "." << std::endl; + std::exit(11); + } + break; + case 'l': + res.lock_fd = ParseInt(optarg); + break; + case 'p': + res.parent_pid = ParseInt(optarg); + break; + case 'v': + if (!ParseLogLevel(optarg, res.log_level)) { + std::cerr << "invalid log level: " << optarg << std::endl; + std::exit(10); + } + break; + case 'r': + res.repo_ttl = std::chrono::seconds(ParseLong(optarg)); + break; + case 't': { + long n = ParseLong(optarg); + if (n <= 0) { + std::cerr << "invalid number of threads: " << n << std::endl; + std::exit(10); + } + res.num_threads = n; + break; + } + case 'z': + res.max_commit_summary_length = ParseSizeT(optarg); + break; + case 's': + res.max_num_staged = ParseSizeT(optarg); + break; + case 'u': + res.max_num_unstaged = ParseSizeT(optarg); + break; + case 'c': + res.max_num_conflicted = ParseSizeT(optarg); + break; + case 'd': + res.max_num_untracked = ParseSizeT(optarg); + break; + case 'm': + res.dirty_max_index_size = ParseSizeT(optarg); + break; + case 'e': + res.recurse_untracked_dirs = true; + break; + case 'U': + res.ignore_status_show_untracked_files = true; + break; + case 'W': + res.ignore_bash_show_untracked_files = true; + break; + case 'D': + res.ignore_bash_show_dirty_state = true; + break; + default: + std::exit(10); + } + } +} + +} // namespace gitstatus diff --git a/gitstatus/src/options.h b/gitstatus/src/options.h new file mode 100644 index 00000000..fd561e11 --- /dev/null +++ b/gitstatus/src/options.h @@ -0,0 +1,78 @@ +// Copyright 2019 Roman Perepelitsa. +// +// This file is part of GitStatus. +// +// GitStatus is free software: you can redistribute it and/or modify +// it under the terms of the GNU General Public License as published by +// the Free Software Foundation, either version 3 of the License, or +// (at your option) any later version. +// +// GitStatus is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License for more details. +// +// You should have received a copy of the GNU General Public License +// along with GitStatus. If not, see <https://www.gnu.org/licenses/>. + +#ifndef ROMKATV_GITSTATUS_OPTIONS_H_ +#define ROMKATV_GITSTATUS_OPTIONS_H_ + +#include <chrono> +#include <string> + +#include "logging.h" +#include "time.h" + +namespace gitstatus { + +struct Limits { + // Truncate commit summary if it's longer than this many bytes. + size_t max_commit_summary_length = 256; + // Report at most this many staged changes. + size_t max_num_staged = 1; + // Report at most this many unstaged changes. + size_t max_num_unstaged = 1; + // Report at most this many conflicted changes. + size_t max_num_conflicted = 1; + // Report at most this many untracked files. + size_t max_num_untracked = 1; + // If a repo has more files in its index than this, override max_num_unstaged and + // max_num_untracked (but not max_num_staged) with zeros. + size_t dirty_max_index_size = -1; + // If true, report untracked files like `git status --untracked-files`. + bool recurse_untracked_dirs = false; + // Unless true, report zero untracked files for repositories with + // status.showUntrackedFiles = false. + bool ignore_status_show_untracked_files = false; + // Unless true, report zero untracked files for repositories with + // bash.showUntrackedFiles = false. + bool ignore_bash_show_untracked_files = false; + // Unless true, report zero staged, unstaged and conflicted changes for repositories with + // bash.showDirtyState = false. + bool ignore_bash_show_dirty_state = false; +}; + +struct Options : Limits { + // Use this many threads to scan git workdir for unstaged and untracked files. Must be positive. + size_t num_threads = 1; + // If non-negative, check whether the specified file descriptor is locked when not receiving any + // requests for one second; exit if it isn't locked. + int lock_fd = -1; + // If non-negative, send signal 0 to the specified PID when not receiving any requests for one + // second; exit if signal sending fails. + int parent_pid = -1; + // Don't write entires to log whose log level is below this. Log levels in increasing order: + // DEBUG, INFO, WARN, ERROR, FATAL. + LogLevel log_level = INFO; + // Close git repositories that haven't been used for this long. This is meant to release resources + // such as memory and file descriptors. The next request for a repo that's been closed is much + // slower than for a repo that hasn't been. Negative value means infinity. + Duration repo_ttl = std::chrono::seconds(3600); +}; + +Options ParseOptions(int argc, char** argv); + +} // namespace gitstatus + +#endif // ROMKATV_GITSTATUS_OPTIONS_H_ diff --git a/gitstatus/src/print.h b/gitstatus/src/print.h new file mode 100644 index 00000000..949f946b --- /dev/null +++ b/gitstatus/src/print.h @@ -0,0 +1,101 @@ +// Copyright 2019 Roman Perepelitsa. +// +// This file is part of GitStatus. +// +// GitStatus is free software: you can redistribute it and/or modify +// it under the terms of the GNU General Public License as published by +// the Free Software Foundation, either version 3 of the License, or +// (at your option) any later version. +// +// GitStatus is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License for more details. +// +// You should have received a copy of the GNU General Public License +// along with GitStatus. If not, see <https://www.gnu.org/licenses/>. + +#ifndef ROMKATV_GITSTATUS_PRINT_H_ +#define ROMKATV_GITSTATUS_PRINT_H_ + +#include <sys/stat.h> + +#include <iomanip> +#include <ostream> +#include <string> +#include <type_traits> +#include <utility> +#include <vector> + +#include <git2.h> + +#include "string_view.h" +#include "strings.h" + +namespace gitstatus { + +template <class T> +struct Printable { + const T& value; +}; + +template <class T> +Printable<T> Print(const T& val) { + return {val}; +} + +template <class T> +std::ostream& operator<<(std::ostream& strm, const Printable<T>& p) { + static_assert(!std::is_pointer<std::decay_t<T>>(), ""); + return strm << p.value; +} + +inline std::ostream& operator<<(std::ostream& strm, const Printable<StringView>& p) { + Quote(strm, p.value.ptr, p.value.ptr + p.value.len); + return strm; +} + +inline std::ostream& operator<<(std::ostream& strm, const Printable<std::string>& p) { + Quote(strm, p.value.data(), p.value.data() + p.value.size()); + return strm; +} + +inline std::ostream& operator<<(std::ostream& strm, const Printable<const char*>& p) { + Quote(strm, p.value, p.value ? p.value + std::strlen(p.value) : nullptr); + return strm; +} + +inline std::ostream& operator<<(std::ostream& strm, const Printable<char*>& p) { + Quote(strm, p.value, p.value ? p.value + std::strlen(p.value) : nullptr); + return strm; +} + +template <class T, class U> +std::ostream& operator<<(std::ostream& strm, const Printable<std::pair<T, U>>& p) { + return strm << '{' << Print(p.value.first) << ", " << Print(p.value.second) << '}'; +} + +template <class T> +std::ostream& operator<<(std::ostream& strm, const Printable<std::vector<T>>& p) { + strm << '['; + for (size_t i = 0; i != p.value.size(); ++i) { + if (i) strm << ", "; + strm << Print(p.value[i]); + } + strm << ']'; + return strm; +} + +inline std::ostream& operator<<(std::ostream& strm, const Printable<struct timespec>& p) { + strm << p.value.tv_sec << '.' << std::setw(9) << std::setfill('0') << p.value.tv_nsec; + return strm; +} + +inline std::ostream& operator<<(std::ostream& strm, const Printable<git_index_time>& p) { + strm << p.value.seconds << '.' << std::setw(9) << std::setfill('0') << p.value.nanoseconds; + return strm; +} + +} // namespace gitstatus + +#endif // ROMKATV_GITSTATUS_PRINT_H_ diff --git a/gitstatus/src/repo.cc b/gitstatus/src/repo.cc new file mode 100644 index 00000000..d7ea7d3e --- /dev/null +++ b/gitstatus/src/repo.cc @@ -0,0 +1,503 @@ +// Copyright 2019 Roman Perepelitsa. +// +// This file is part of GitStatus. +// +// GitStatus is free software: you can redistribute it and/or modify +// it under the terms of the GNU General Public License as published by +// the Free Software Foundation, either version 3 of the License, or +// (at your option) any later version. +// +// GitStatus is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License for more details. +// +// You should have received a copy of the GNU General Public License +// along with GitStatus. If not, see <https://www.gnu.org/licenses/>. + +#include "repo.h" + +#include <fcntl.h> +#include <sys/stat.h> +#include <sys/types.h> +#include <unistd.h> + +#include <algorithm> +#include <atomic> +#include <cstdlib> +#include <cstring> +#include <exception> +#include <iterator> +#include <memory> +#include <type_traits> +#include <utility> + +#include "arena.h" +#include "check.h" +#include "check_dir_mtime.h" +#include "dir.h" +#include "git.h" +#include "print.h" +#include "scope_guard.h" +#include "stat.h" +#include "string_cmp.h" +#include "thread_pool.h" +#include "timer.h" + +namespace gitstatus { + +namespace { + +using namespace std::string_literals; + +template <class T> +T Load(const std::atomic<T>& x) { + return x.load(std::memory_order_relaxed); +} + +template <class T> +void Store(std::atomic<T>& x, T v) { + x.store(v, std::memory_order_relaxed); +} + +template <class T> +T Inc(std::atomic<T>& x, T by = 1) { + return x.fetch_add(by, std::memory_order_relaxed); +} + +template <class T> +T Dec(std::atomic<T>& x) { + return x.fetch_sub(1, std::memory_order_relaxed); +} + +template <class T> +T Exchange(std::atomic<T>& x, T v) { + return x.exchange(v, std::memory_order_relaxed); +} + +const char* DeltaStr(git_delta_t t) { + switch (t) { + case GIT_DELTA_UNMODIFIED: return "unmodified"; + case GIT_DELTA_ADDED: return "added"; + case GIT_DELTA_DELETED: return "deleted"; + case GIT_DELTA_MODIFIED: return "modified"; + case GIT_DELTA_RENAMED: return "renamed"; + case GIT_DELTA_COPIED: return "copied"; + case GIT_DELTA_IGNORED: return "ignored"; + case GIT_DELTA_UNTRACKED: return "untracked"; + case GIT_DELTA_TYPECHANGE: return "typechange"; + case GIT_DELTA_UNREADABLE: return "unreadable"; + case GIT_DELTA_CONFLICTED: return "conflicted"; + } + return "unknown"; +} + +} // namespace + +bool Repo::Shard::Contains(Str<> str, StringView path) const { + if (str.Lt(path, start_s)) return false; + if (end_s.empty()) return true; + path.len = std::min(path.len, end_s.size()); + return !str.Lt(end_s, path); +} + +Repo::Repo(git_repository* repo, Limits lim) : lim_(std::move(lim)), repo_(repo), tag_db_(repo) { + if (lim_.max_num_untracked) { + GlobalThreadPool()->Schedule([this] { + bool check = CheckDirMtime(git_repository_path(repo_)); + std::unique_lock<std::mutex> lock(mutex_); + CHECK(Load(untracked_cache_) == Tribool::kUnknown); + Store(untracked_cache_, check ? Tribool::kTrue : Tribool::kFalse); + cv_.notify_one(); + }); + } else { + untracked_cache_ = Tribool::kFalse; + } +} + +Repo::~Repo() { + { + std::unique_lock<std::mutex> lock(mutex_); + while (untracked_cache_ == Tribool::kUnknown) cv_.wait(lock); + } + if (git_index_) git_index_free(git_index_); + git_repository_free(repo_); +} + +IndexStats Repo::GetIndexStats(const git_oid* head, git_config* cfg) { + ON_SCOPE_EXIT(this, orig_lim = lim_) { lim_ = orig_lim; }; + auto Off = [&](const char* name) { + int val; + if (git_config_get_bool(&val, cfg, name) || val) return false; + LOG(INFO) << "Honoring git config option: " << name << " = false"; + return true; + }; + if (!lim_.ignore_status_show_untracked_files && Off("status.showUntrackedFiles")) { + lim_.max_num_untracked = 0; + } + if (!lim_.ignore_bash_show_untracked_files && Off("bash.showUntrackedFiles")) { + lim_.max_num_untracked = 0; + } + if (!lim_.ignore_bash_show_dirty_state && Off("bash.showDirtyState")) { + lim_.max_num_staged = 0; + lim_.max_num_unstaged = 0; + lim_.max_num_conflicted = 0; + } + + if (git_index_) { + int new_index; + VERIFY(!git_index_read_ex(git_index_, 0, &new_index)) << GitError(); + if (new_index) { + head_ = {}; + index_.reset(); + } + } else { + VERIFY(!git_repository_index(&git_index_, repo_)) << GitError(); + // Query an attribute (doesn't matter which) to initialize repo's attribute + // cache. It's a workaround for synchronization bugs (data races) in libgit2 + // that result from lazy cache initialization without synchrnonization. + // Thankfully, subsequent cache reads and writes are properly synchronized. + const char* attr; + VERIFY(!git_attr_get(&attr, repo_, 0, "x", "x")) << GitError(); + } + + UpdateShards(); + Store(error_, false); + Store(unstaged_, {}); + Store(untracked_, {}); + Store(unstaged_deleted_, {}); + + std::vector<const char*> dirty_candidates; + const size_t index_size = git_index_entrycount(git_index_); + + if (!lim_.max_num_staged && !lim_.max_num_conflicted) { + head_ = {}; + Store(staged_, {}); + Store(conflicted_, {}); + Store(staged_new_, {}); + Store(staged_deleted_, {}); + Store(skip_worktree_, {}); + Store(assume_unchanged_, {}); + } else if (head) { + if (git_oid_equal(head, &head_)) { + LOG(INFO) << "Index and HEAD unchanged; staged = " << Load(staged_) + << ", conflicted = " << Load(conflicted_); + } else { + head_ = *head; + Store(staged_, {}); + Store(conflicted_, {}); + Store(staged_new_, {}); + Store(staged_deleted_, {}); + Store(skip_worktree_, {}); + Store(assume_unchanged_, {}); + StartStagedScan(head); + } + } else { + head_ = {}; + size_t staged = 0; + size_t skip_worktree = 0; + size_t assume_unchanged = 0; + for (size_t i = 0; i != index_size; ++i) { + const git_index_entry* entry = git_index_get_byindex_no_sort(git_index_, i); + if (!(entry->flags_extended & GIT_INDEX_ENTRY_INTENT_TO_ADD)) ++staged; + if (entry->flags_extended & GIT_INDEX_ENTRY_SKIP_WORKTREE) ++skip_worktree; + if (entry->flags & GIT_INDEX_ENTRY_VALID) ++assume_unchanged; + } + Store(staged_, staged); + Store(conflicted_, {}); + Store(staged_new_, staged); + Store(staged_deleted_, {}); + Store(skip_worktree_, skip_worktree); + Store(assume_unchanged_, assume_unchanged); + } + + if (index_size <= lim_.dirty_max_index_size && + (lim_.max_num_unstaged || lim_.max_num_untracked)) { + if (!index_) index_ = std::make_unique<Index>(repo_, git_index_); + dirty_candidates = index_->GetDirtyCandidates({.include_untracked = lim_.max_num_untracked > 0, + .untracked_cache = Load(untracked_cache_)}); + if (dirty_candidates.empty()) { + LOG(INFO) << "Clean repo: no dirty candidates"; + } else { + LOG(INFO) << "Found " << dirty_candidates.size() << " dirty candidate(s) spanning from " + << Print(dirty_candidates.front()) << " to " << Print(dirty_candidates.back()); + } + StartDirtyScan(dirty_candidates); + } + + Wait(); + VERIFY(!Load(error_)); + + size_t num_staged = std::min(Load(staged_), lim_.max_num_staged); + size_t num_unstaged = std::min(Load(unstaged_), lim_.max_num_unstaged); + return {.index_size = index_size, + .num_staged = num_staged, + .num_unstaged = num_unstaged, + .num_conflicted = std::min(Load(conflicted_), lim_.max_num_conflicted), + .num_untracked = std::min(Load(untracked_), lim_.max_num_untracked), + .num_staged_new = std::min(Load(staged_new_), num_staged), + .num_staged_deleted = std::min(Load(staged_deleted_), num_staged), + .num_unstaged_deleted = std::min(Load(unstaged_deleted_), num_unstaged), + .num_skip_worktree = Load(skip_worktree_), + .num_assume_unchanged = Load(assume_unchanged_)}; +} + +int Repo::OnDelta(const char* type, const git_diff_delta& d, std::atomic<size_t>& c1, size_t m1, + const std::atomic<size_t>& c2, size_t m2) { + auto Msg = [&]() { + const char* status = DeltaStr(d.status); + std::ostringstream strm; + strm << "Found " << type << " file"; + if (strcmp(status, type)) strm << " (" << status << ")"; + strm << ": " << Print(d.new_file.path); + return strm.str(); + }; + + size_t v = Inc(c1); + if (v) { + LOG(DEBUG) << Msg(); + } else { + LOG(INFO) << Msg(); + } + if (v + 1 < m1) return GIT_DIFF_DELTA_DO_NOT_INSERT; + if (Load(c2) < m2) return GIT_DIFF_DELTA_DO_NOT_INSERT | GIT_DIFF_DELTA_SKIP_TYPE; + return GIT_EUSER; +} + +void Repo::StartDirtyScan(const std::vector<const char*>& paths) { + if (paths.empty()) return; + + git_diff_options opt = GIT_DIFF_OPTIONS_INIT; + opt.payload = this; + opt.flags = GIT_DIFF_INCLUDE_TYPECHANGE_TREES | GIT_DIFF_SKIP_BINARY_CHECK | + GIT_DIFF_DISABLE_PATHSPEC_MATCH | GIT_DIFF_EXEMPLARS; + if (lim_.max_num_untracked) { + opt.flags |= GIT_DIFF_INCLUDE_UNTRACKED; + if (lim_.recurse_untracked_dirs) opt.flags |= GIT_DIFF_RECURSE_UNTRACKED_DIRS; + } else { + opt.flags |= GIT_DIFF_ENABLE_FAST_UNTRACKED_DIRS; + } + opt.ignore_submodules = GIT_SUBMODULE_IGNORE_DIRTY; + opt.notify_cb = +[](const git_diff* diff, const git_diff_delta* delta, + const char* matched_pathspec, void* payload) -> int { + if (delta->status == GIT_DELTA_CONFLICTED) return GIT_DIFF_DELTA_DO_NOT_INSERT; + Repo* repo = static_cast<Repo*>(payload); + if (Load(repo->error_)) return GIT_EUSER; + if (delta->status == GIT_DELTA_UNTRACKED) { + return repo->OnDelta("untracked", *delta, repo->untracked_, repo->lim_.max_num_untracked, + repo->unstaged_, repo->lim_.max_num_unstaged); + } else { + if (delta->status == GIT_DELTA_DELETED) Inc(repo->unstaged_deleted_); + return repo->OnDelta("unstaged", *delta, repo->unstaged_, repo->lim_.max_num_unstaged, + repo->untracked_, repo->lim_.max_num_untracked); + } + }; + + const Str<> str(git_index_is_case_sensitive(git_index_)); + auto shard = shards_.begin(); + for (auto p = paths.begin(); p != paths.end();) { + opt.range_start = *p; + opt.range_end = *p; + opt.pathspec.strings = const_cast<char**>(&*p); + opt.pathspec.count = 1; + while (!shard->Contains(str, StringView(*p))) ++shard; + while (++p != paths.end() && shard->Contains(str, StringView(*p))) { + opt.range_end = *p; + ++opt.pathspec.count; + } + RunAsync([this, opt]() { + git_diff* diff = nullptr; + LOG(DEBUG) << "git_diff_index_to_workdir from " << Print(opt.range_start) << " to " + << Print(opt.range_end); + switch (git_diff_index_to_workdir(&diff, repo_, git_index_, &opt)) { + case 0: + git_diff_free(diff); + break; + case GIT_EUSER: + break; + default: + LOG(ERROR) << "git_diff_index_to_workdir: " << GitError(); + throw Exception(); + } + }); + } +} + +void Repo::StartStagedScan(const git_oid* head) { + git_commit* commit = nullptr; + VERIFY(!git_commit_lookup(&commit, repo_, head)) << GitError(); + ON_SCOPE_EXIT(=) { git_commit_free(commit); }; + git_tree* tree = nullptr; + VERIFY(!git_commit_tree(&tree, commit)) << GitError(); + + git_diff_options opt = GIT_DIFF_OPTIONS_INIT; + opt.flags = GIT_DIFF_EXEMPLARS | GIT_DIFF_INCLUDE_TYPECHANGE_TREES; + opt.payload = this; + opt.notify_cb = +[](const git_diff* diff, const git_diff_delta* delta, + const char* matched_pathspec, void* payload) -> int { + Repo* repo = static_cast<Repo*>(payload); + if (Load(repo->error_)) return GIT_EUSER; + if (delta->status == GIT_DELTA_CONFLICTED) { + return repo->OnDelta("conflicted", *delta, repo->conflicted_, repo->lim_.max_num_conflicted, + repo->staged_, repo->lim_.max_num_staged); + } else { + if (delta->status == GIT_DELTA_ADDED) Inc(repo->staged_new_); + if (delta->status == GIT_DELTA_DELETED) Inc(repo->staged_deleted_); + return repo->OnDelta("staged", *delta, repo->staged_, repo->lim_.max_num_staged, + repo->conflicted_, repo->lim_.max_num_conflicted); + } + }; + + for (const Shard& shard : shards_) { + RunAsync([this, tree, opt, shard]() mutable { + size_t skip_worktree = 0; + size_t assume_unchanged = 0; + for (size_t i = shard.start_i; i != shard.end_i; ++i) { + const git_index_entry* entry = git_index_get_byindex_no_sort(git_index_, i); + if (entry->flags_extended & GIT_INDEX_ENTRY_SKIP_WORKTREE) ++skip_worktree; + if (entry->flags & GIT_INDEX_ENTRY_VALID) ++assume_unchanged; + } + Inc(skip_worktree_, skip_worktree); + Inc(assume_unchanged_, assume_unchanged); + opt.range_start = shard.start_s.c_str(); + opt.range_end = shard.end_s.c_str(); + git_diff* diff = nullptr; + LOG(DEBUG) << "git_diff_tree_to_index from " << Print(opt.range_start) << " to " + << Print(opt.range_end); + switch (git_diff_tree_to_index(&diff, repo_, tree, git_index_, &opt)) { + case 0: + git_diff_free(diff); + break; + case GIT_EUSER: + break; + default: + LOG(ERROR) << "git_diff_tree_to_index: " << GitError(); + throw Exception(); + } + }); + } +} + +void Repo::UpdateShards() { + constexpr size_t kEntriesPerShard = 512; + + const Str<> str(git_index_is_case_sensitive(git_index_)); + size_t index_size = git_index_entrycount(git_index_); + ON_SCOPE_EXIT(&) { + LOG(INFO) << "Splitting " << index_size << " object(s) into " << shards_.size() << " shard(s)"; + }; + + if (index_size <= kEntriesPerShard || GlobalThreadPool()->num_threads() < 2) { + shards_ = {{ + .start_s = "", + .end_s = "", + .start_i = 0, + .end_i = index_size}}; + return; + } + + size_t shards = + std::min(index_size / kEntriesPerShard + 1, 2 * GlobalThreadPool()->num_threads()); + shards_.clear(); + shards_.reserve(shards); + std::string last_s; + size_t last_i = 0; + + for (size_t i = 0; i != shards - 1; ++i) { + size_t idx = (i + 1) * index_size / shards; + std::string split = git_index_get_byindex_no_sort(git_index_, idx)->path; + auto pos = split.find_last_of('/'); + if (pos == std::string::npos) continue; + split = split.substr(0, pos + 1); + Shard shard; + shard.end_s = split; + --shard.end_s.back(); + if (!str.Lt(last_s, shard.end_s)) continue; + shard.start_s = std::move(last_s); + last_s = std::move(split); + shard.start_i = last_i; + shard.end_i = idx; + last_i = idx; + shards_.push_back(std::move(shard)); + } + shards_.push_back({ + .start_s = std::move(last_s), + .end_s = "", + .start_i = last_i, + .end_i = index_size}); + + CHECK(!shards_.empty()); + CHECK(shards_.size() <= shards); + CHECK(shards_.front().start_s.empty()); + CHECK(shards_.front().start_i == 0); + CHECK(shards_.back().end_s.empty()); + CHECK(shards_.back().end_i == index_size); + for (size_t i = 0; i != shards_.size(); ++i) { + if (i) { + const git_index_entry* entry = git_index_get_byindex_no_sort(git_index_, shards_[i].start_i); + CHECK(!std::memcmp(shards_[i].start_s.c_str(), entry->path, shards_[i].start_s.size())); + CHECK(str.Lt(shards_[i - 1].end_s, shards_[i].start_s)); + CHECK(shards_[i - 1].end_i == shards_[i].start_i); + } + if (i != shards_.size() - 1) { + CHECK(shards_[i].start_i < shards_[i].end_i); + CHECK(str.Lt(shards_[i].start_s, shards_[i].end_s)); + } + } +} + +void Repo::DecInflight() { + std::unique_lock<std::mutex> lock(mutex_); + CHECK(Load(inflight_) > 0); + if (Dec(inflight_) == 1) cv_.notify_one(); +} + +void Repo::RunAsync(std::function<void()> f) { + Inc(inflight_); + try { + GlobalThreadPool()->Schedule([this, f = std::move(f)] { + try { + ON_SCOPE_EXIT(&) { DecInflight(); }; + f(); + } catch (const Exception&) { + if (!Load(error_)) { + std::unique_lock<std::mutex> lock(mutex_); + if (!Load(error_)) { + Store(error_, true); + cv_.notify_one(); + } + } + } + }); + } catch (...) { + DecInflight(); + throw; + } +} + +void Repo::Wait() { + std::unique_lock<std::mutex> lock(mutex_); + while (inflight_) cv_.wait(lock); +} + +std::future<std::string> Repo::GetTagName(const git_oid* target) { + auto* promise = new std::promise<std::string>; + std::future<std::string> res = promise->get_future(); + + GlobalThreadPool()->Schedule([=] { + ON_SCOPE_EXIT(&) { delete promise; }; + if (!target) { + promise->set_value(""); + return; + } + try { + promise->set_value(tag_db_.TagForCommit(*target)); + } catch (const Exception&) { + promise->set_exception(std::current_exception()); + } + }); + + return res; +} + +} // namespace gitstatus diff --git a/gitstatus/src/repo.h b/gitstatus/src/repo.h new file mode 100644 index 00000000..f243f86e --- /dev/null +++ b/gitstatus/src/repo.h @@ -0,0 +1,126 @@ +// Copyright 2019 Roman Perepelitsa. +// +// This file is part of GitStatus. +// +// GitStatus is free software: you can redistribute it and/or modify +// it under the terms of the GNU General Public License as published by +// the Free Software Foundation, either version 3 of the License, or +// (at your option) any later version. +// +// GitStatus is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License for more details. +// +// You should have received a copy of the GNU General Public License +// along with GitStatus. If not, see <https://www.gnu.org/licenses/>. + +#ifndef ROMKATV_GITSTATUS_REPO_H_ +#define ROMKATV_GITSTATUS_REPO_H_ + +#include <stddef.h> +#include <sys/stat.h> +#include <sys/types.h> +#include <unistd.h> + +#include <git2.h> + +#include <algorithm> +#include <atomic> +#include <condition_variable> +#include <cstddef> +#include <cstring> +#include <functional> +#include <future> +#include <memory> +#include <mutex> +#include <string> +#include <utility> +#include <vector> + +#include "check.h" +#include "index.h" +#include "options.h" +#include "string_cmp.h" +#include "tag_db.h" +#include "time.h" + +namespace gitstatus { + +struct IndexStats { + size_t index_size = 0; + size_t num_staged = 0; + size_t num_unstaged = 0; + size_t num_conflicted = 0; + size_t num_untracked = 0; + size_t num_staged_new = 0; + size_t num_staged_deleted = 0; + size_t num_unstaged_deleted = 0; + size_t num_skip_worktree = 0; + size_t num_assume_unchanged = 0; +}; + +class Repo { + public: + explicit Repo(git_repository* repo, Limits lim); + Repo(Repo&& other) = delete; + ~Repo(); + + git_repository* repo() const { return repo_; } + + // Head can be null, in which case has_staged will be false. + IndexStats GetIndexStats(const git_oid* head, git_config* cfg); + + // Returns the last tag in lexicographical order whose target is equal to the given, or an + // empty string. Target can be null, in which case the tag is empty. + std::future<std::string> GetTagName(const git_oid* target); + + private: + struct Shard { + bool Contains(Str<> str, StringView path) const; + std::string start_s; + std::string end_s; + size_t start_i; + size_t end_i; + }; + + void UpdateShards(); + + int OnDelta(const char* type, const git_diff_delta& d, std::atomic<size_t>& c1, size_t m1, + const std::atomic<size_t>& c2, size_t m2); + + void StartStagedScan(const git_oid* head); + void StartDirtyScan(const std::vector<const char*>& paths); + + void DecInflight(); + void RunAsync(std::function<void()> f); + void Wait(); + + Limits lim_; + git_repository* const repo_; + git_index* git_index_ = nullptr; + std::vector<Shard> shards_; + git_oid head_ = {}; + TagDb tag_db_; + + std::unique_ptr<Index> index_; + + std::mutex mutex_; + std::condition_variable cv_; + std::atomic<size_t> inflight_{0}; + std::atomic<bool> error_{false}; + std::atomic<size_t> staged_{0}; + std::atomic<size_t> unstaged_{0}; + std::atomic<size_t> conflicted_{0}; + std::atomic<size_t> untracked_{0}; + std::atomic<size_t> staged_new_{0}; + std::atomic<size_t> staged_deleted_{0}; + std::atomic<size_t> unstaged_deleted_{0}; + std::atomic<size_t> skip_worktree_{0}; + std::atomic<size_t> assume_unchanged_{0}; + std::atomic<Tribool> untracked_cache_{Tribool::kUnknown}; +}; + +} // namespace gitstatus + +#endif // ROMKATV_GITSTATUS_REPO_H_ diff --git a/gitstatus/src/repo_cache.cc b/gitstatus/src/repo_cache.cc new file mode 100644 index 00000000..d7f5f9ad --- /dev/null +++ b/gitstatus/src/repo_cache.cc @@ -0,0 +1,167 @@ +// Copyright 2019 Roman Perepelitsa. +// +// This file is part of GitStatus. +// +// GitStatus is free software: you can redistribute it and/or modify +// it under the terms of the GNU General Public License as published by +// the Free Software Foundation, either version 3 of the License, or +// (at your option) any later version. +// +// GitStatus is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License for more details. +// +// You should have received a copy of the GNU General Public License +// along with GitStatus. If not, see <https://www.gnu.org/licenses/>. + +#include "repo_cache.h" + +#include <cstring> + +#include "check.h" +#include "git.h" +#include "print.h" +#include "scope_guard.h" +#include "string_view.h" + +namespace gitstatus { + +namespace { + +void GitDirs(const char* dir, bool from_dotgit, std::string& gitdir, std::string& workdir) { + git_buf gitdir_buf = {}; + git_buf workdir_buf = {}; + ON_SCOPE_EXIT(&) { + git_buf_free(&gitdir_buf); + git_buf_free(&workdir_buf); + }; + int flags = from_dotgit ? GIT_REPOSITORY_OPEN_NO_SEARCH | GIT_REPOSITORY_OPEN_NO_DOTGIT : 0; + switch (git_repository_discover_ex(&gitdir_buf, &workdir_buf, NULL, NULL, dir, flags, nullptr)) { + case 0: + gitdir.assign(gitdir_buf.ptr, gitdir_buf.size); + workdir.assign(workdir_buf.ptr, workdir_buf.size); + VERIFY(!gitdir.empty() && gitdir.front() == '/' && gitdir.back() == '/'); + VERIFY(!workdir.empty() && workdir.front() == '/' && workdir.back() == '/'); + break; + case GIT_ENOTFOUND: + gitdir.clear(); + workdir.clear(); + break; + default: + LOG(ERROR) << "git_repository_open_ext: " << Print(dir) << ": " << GitError(); + throw Exception(); + } +} + +git_repository* OpenRepo(const std::string& dir, bool from_dotgit) { + git_repository* repo = nullptr; + int flags = from_dotgit ? GIT_REPOSITORY_OPEN_NO_SEARCH | GIT_REPOSITORY_OPEN_NO_DOTGIT : 0; + switch (git_repository_open_ext(&repo, dir.c_str(), flags, nullptr)) { + case 0: + return repo; + case GIT_ENOTFOUND: + return nullptr; + default: + LOG(ERROR) << "git_repository_open_ext: " << Print(dir) << ": " << GitError(); + throw Exception(); + } +} + +std::string DirName(std::string path) { + if (path.empty()) return ""; + while (path.back() == '/') { + path.pop_back(); + if (path.empty()) return ""; + } + do { + path.pop_back(); + if (path.empty()) return ""; + } while (path.back() != '/'); + return path; +} + +} // namespace + +Repo* RepoCache::Open(const std::string& dir, bool from_dotgit) { + if (dir.empty() || dir.front() != '/') return nullptr; + + std::string gitdir, workdir; + GitDirs(dir.c_str(), from_dotgit, gitdir, workdir); + if (gitdir.empty()) { + // This isn't quite correct because of differences in canonicalization, .git files and GIT_DIR. + // A proper solution would require tracking the "discovery dir" for every repository and + // performing path canonicalization. + if (from_dotgit) { + Erase(cache_.find(dir.back() == '/' ? dir : dir + '/')); + } else { + std::string path = dir; + if (path.back() != '/') path += '/'; + do { + Erase(cache_.find(path + ".git/")); + path = DirName(path); + } while (!path.empty()); + } + return nullptr; + } + + auto it = cache_.find(gitdir); + if (it != cache_.end()) { + lru_.erase(it->second->lru); + it->second->lru = lru_.insert({Clock::now(), it}); + return it->second.get(); + } + + // Opening from gitdir is faster but we cannot use it when gitdir came from a .git file. + git_repository* repo = + DirName(gitdir) == workdir ? OpenRepo(gitdir, true) : OpenRepo(dir, from_dotgit); + if (!repo) return nullptr; + ON_SCOPE_EXIT(&) { + if (repo) git_repository_free(repo); + }; + if (git_repository_is_bare(repo)) return nullptr; + workdir = git_repository_workdir(repo) ?: ""; + if (workdir.empty()) return nullptr; + VERIFY(workdir.front() == '/' && workdir.back() == '/') << Print(workdir); + + auto x = cache_.emplace(gitdir, nullptr); + std::unique_ptr<Entry>& elem = x.first->second; + if (elem) { + lru_.erase(elem->lru); + } else { + LOG(INFO) << "Initializing new repository: " << Print(gitdir); + + // Libgit2 initializes odb and refdb lazily with double-locking. To avoid useless work + // when multiple threads attempt to initialize the same db at the same time, we trigger + // initialization manually before threads are in play. + git_odb* odb; + VERIFY(!git_repository_odb(&odb, repo)) << GitError(); + git_odb_free(odb); + + git_refdb* refdb; + VERIFY(!git_repository_refdb(&refdb, repo)) << GitError(); + git_refdb_free(refdb); + + elem = std::make_unique<Entry>(std::exchange(repo, nullptr), lim_); + } + elem->lru = lru_.insert({Clock::now(), x.first}); + return elem.get(); +} + +void RepoCache::Free(Time cutoff) { + while (true) { + if (lru_.empty()) break; + auto it = lru_.begin(); + if (it->first > cutoff) break; + Erase(it->second); + } +} + +void RepoCache::Erase(Cache::iterator it) { + if (it == cache_.end()) return; + LOG(INFO) << "Closing repository: " << Print(it->first); + lru_.erase(it->second->lru); + cache_.erase(it); +} + +} // namespace gitstatus diff --git a/gitstatus/src/repo_cache.h b/gitstatus/src/repo_cache.h new file mode 100644 index 00000000..9d14ec06 --- /dev/null +++ b/gitstatus/src/repo_cache.h @@ -0,0 +1,60 @@ +// Copyright 2019 Roman Perepelitsa. +// +// This file is part of GitStatus. +// +// GitStatus is free software: you can redistribute it and/or modify +// it under the terms of the GNU General Public License as published by +// the Free Software Foundation, either version 3 of the License, or +// (at your option) any later version. +// +// GitStatus is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License for more details. +// +// You should have received a copy of the GNU General Public License +// along with GitStatus. If not, see <https://www.gnu.org/licenses/>. + +#ifndef ROMKATV_GITSTATUS_REPO_CACHE_H_ +#define ROMKATV_GITSTATUS_REPO_CACHE_H_ + +#include <map> +#include <memory> +#include <string> +#include <unordered_map> +#include <utility> + +#include <git2.h> + +#include "options.h" +#include "repo.h" +#include "time.h" + +namespace gitstatus { + +class RepoCache { + public: + explicit RepoCache(Limits lim) : lim_(std::move(lim)) {} + Repo* Open(const std::string& dir, bool from_dotgit); + void Free(Time cutoff); + + private: + struct Entry; + using Cache = std::unordered_map<std::string, std::unique_ptr<Entry>>; + using LRU = std::multimap<Time, Cache::iterator>; + + void Erase(Cache::iterator it); + + Limits lim_; + Cache cache_; + LRU lru_; + + struct Entry : Repo { + using Repo::Repo; + LRU::iterator lru; + }; +}; + +} // namespace gitstatus + +#endif // ROMKATV_GITSTATUS_REPO_CACHE_H_ diff --git a/gitstatus/src/request.cc b/gitstatus/src/request.cc new file mode 100644 index 00000000..1a81bffb --- /dev/null +++ b/gitstatus/src/request.cc @@ -0,0 +1,130 @@ +// Copyright 2019 Roman Perepelitsa. +// +// This file is part of GitStatus. +// +// GitStatus is free software: you can redistribute it and/or modify +// it under the terms of the GNU General Public License as published by +// the Free Software Foundation, either version 3 of the License, or +// (at your option) any later version. +// +// GitStatus is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License for more details. +// +// You should have received a copy of the GNU General Public License +// along with GitStatus. If not, see <https://www.gnu.org/licenses/>. + +#include "request.h" + +#include <fcntl.h> +#include <signal.h> +#include <sys/select.h> +#include <sys/types.h> +#include <unistd.h> + +#include <algorithm> +#include <cstdlib> +#include <iostream> + +#include "check.h" +#include "logging.h" +#include "print.h" +#include "serialization.h" + +namespace gitstatus { + +namespace { + +Request ParseRequest(const std::string& s) { + Request res; + auto begin = s.begin(), end = s.end(), sep = std::find(begin, end, kFieldSep); + VERIFY(sep != end) << "Malformed request: " << s; + res.id.assign(begin, sep); + + begin = sep + 1; + if (*begin == ':') { + res.from_dotgit = true; + ++begin; + } + sep = std::find(begin, end, kFieldSep); + res.dir.assign(begin, sep); + if (sep == end) return res; + + begin = sep + 1; + VERIFY(begin + 1 == end && (*begin == '0' || *begin == '1')) << "Malformed request: " << s; + res.diff = *begin == '0'; + return res; +} + +bool IsLockedFd(int fd) { + CHECK(fd >= 0); + struct flock flock = {}; + flock.l_type = F_RDLCK; + flock.l_whence = SEEK_SET; + CHECK(fcntl(fd, F_GETLK, &flock) != -1) << Errno(); + return flock.l_type != F_UNLCK; +} + +} // namespace + +std::ostream& operator<<(std::ostream& strm, const Request& req) { + strm << Print(req.id) << " for " << Print(req.dir); + if (req.from_dotgit) strm << " [from-dotgit]"; + if (!req.diff) strm << " [no-diff]"; + return strm; +} + +RequestReader::RequestReader(int fd, int lock_fd, int parent_pid) + : fd_(fd), lock_fd_(lock_fd), parent_pid_(parent_pid) { + CHECK(fd != lock_fd); +} + +bool RequestReader::ReadRequest(Request& req) { + auto eol = std::find(read_.begin(), read_.end(), kMsgSep); + if (eol != read_.end()) { + std::string msg(read_.begin(), eol); + read_.erase(read_.begin(), eol + 1); + req = ParseRequest(msg); + return true; + } + + char buf[256]; + while (true) { + fd_set fds; + FD_ZERO(&fds); + FD_SET(fd_, &fds); + struct timeval timeout = {.tv_sec = 1}; + + int n; + CHECK((n = select(fd_ + 1, &fds, NULL, NULL, &timeout)) >= 0) << Errno(); + if (n == 0) { + if (lock_fd_ >= 0 && !IsLockedFd(lock_fd_)) { + LOG(INFO) << "Lock on fd " << lock_fd_ << " is gone. Exiting."; + std::exit(0); + } + if (parent_pid_ >= 0 && kill(parent_pid_, 0)) { + LOG(INFO) << "Unable to send signal 0 to " << parent_pid_ << ". Exiting."; + std::exit(0); + } + req = {}; + return false; + } + + CHECK((n = read(fd_, buf, sizeof(buf))) >= 0) << Errno(); + if (n == 0) { + LOG(INFO) << "EOF. Exiting."; + std::exit(0); + } + read_.insert(read_.end(), buf, buf + n); + int eol = std::find(buf, buf + n, kMsgSep) - buf; + if (eol != n) { + std::string msg(read_.begin(), read_.end() - (n - eol)); + read_.erase(read_.begin(), read_.begin() + msg.size() + 1); + req = ParseRequest(msg); + return true; + } + } +} + +} // namespace gitstatus diff --git a/gitstatus/src/request.h b/gitstatus/src/request.h new file mode 100644 index 00000000..2cc8baf9 --- /dev/null +++ b/gitstatus/src/request.h @@ -0,0 +1,50 @@ +// Copyright 2019 Roman Perepelitsa. +// +// This file is part of GitStatus. +// +// GitStatus is free software: you can redistribute it and/or modify +// it under the terms of the GNU General Public License as published by +// the Free Software Foundation, either version 3 of the License, or +// (at your option) any later version. +// +// GitStatus is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License for more details. +// +// You should have received a copy of the GNU General Public License +// along with GitStatus. If not, see <https://www.gnu.org/licenses/>. + +#ifndef ROMKATV_GITSTATUS_REQUEST_H_ +#define ROMKATV_GITSTATUS_REQUEST_H_ + +#include <deque> +#include <ostream> +#include <string> + +namespace gitstatus { + +struct Request { + std::string id; + std::string dir; + bool from_dotgit = false; + bool diff = true; +}; + +std::ostream& operator<<(std::ostream& strm, const Request& req); + +class RequestReader { + public: + RequestReader(int fd, int lock_fd, int parent_pid); + bool ReadRequest(Request& req); + + private: + int fd_; + int lock_fd_; + int parent_pid_; + std::deque<char> read_; +}; + +} // namespace gitstatus + +#endif // ROMKATV_GITSTATUS_REQUEST_H_ diff --git a/gitstatus/src/response.cc b/gitstatus/src/response.cc new file mode 100644 index 00000000..eeb89c44 --- /dev/null +++ b/gitstatus/src/response.cc @@ -0,0 +1,73 @@ +// Copyright 2019 Roman Perepelitsa. +// +// This file is part of GitStatus. +// +// GitStatus is free software: you can redistribute it and/or modify +// it under the terms of the GNU General Public License as published by +// the Free Software Foundation, either version 3 of the License, or +// (at your option) any later version. +// +// GitStatus is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License for more details. +// +// You should have received a copy of the GNU General Public License +// along with GitStatus. If not, see <https://www.gnu.org/licenses/>. + +#include "response.h" + +#include <cctype> +#include <cstring> +#include <iostream> + +#include "check.h" +#include "serialization.h" + +namespace gitstatus { + +namespace { + +constexpr char kUnreadable = '?'; + +void SafePrint(std::ostream& strm, StringView s) { + for (size_t i = 0; i != s.len; ++i) { + char c = s.ptr[i]; + strm << (c > 127 || std::isprint(c) ? c : kUnreadable); + } +} + +} // namespace + +ResponseWriter::ResponseWriter(std::string request_id) : request_id_(std::move(request_id)) { + SafePrint(strm_, request_id_); + Print(1); +} + +ResponseWriter::~ResponseWriter() { + if (!done_) { + strm_.str(""); + SafePrint(strm_, request_id_); + Print("0"); + Dump("without git status"); + } +} + +void ResponseWriter::Print(ssize_t val) { + strm_ << kFieldSep; + strm_ << val; +} + +void ResponseWriter::Print(StringView val) { + strm_ << kFieldSep; + SafePrint(strm_, val); +} + +void ResponseWriter::Dump(const char* log) { + CHECK(!done_); + done_ = true; + LOG(INFO) << "Replying " << log; + std::cout << strm_.str() << kMsgSep << std::flush; +} + +} // namespace gitstatus diff --git a/gitstatus/src/response.h b/gitstatus/src/response.h new file mode 100644 index 00000000..12de765a --- /dev/null +++ b/gitstatus/src/response.h @@ -0,0 +1,50 @@ +// Copyright 2019 Roman Perepelitsa. +// +// This file is part of GitStatus. +// +// GitStatus is free software: you can redistribute it and/or modify +// it under the terms of the GNU General Public License as published by +// the Free Software Foundation, either version 3 of the License, or +// (at your option) any later version. +// +// GitStatus is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License for more details. +// +// You should have received a copy of the GNU General Public License +// along with GitStatus. If not, see <https://www.gnu.org/licenses/>. + +#ifndef ROMKATV_GITSTATUS_RESPONSE_H_ +#define ROMKATV_GITSTATUS_RESPONSE_H_ + +#include <cstddef> +#include <cstdint> +#include <sstream> +#include <string> + +#include "string_view.h" + +namespace gitstatus { + +class ResponseWriter { + public: + ResponseWriter(std::string request_id); + ResponseWriter(ResponseWriter&&) = delete; + ~ResponseWriter(); + + void Print(ssize_t val); + void Print(StringView val); + void Print(const char* val) { Print(StringView(val)); } + + void Dump(const char* log); + + private: + bool done_ = false; + std::string request_id_; + std::ostringstream strm_; +}; + +} // namespace gitstatus + +#endif // ROMKATV_GITSTATUS_RESPONSE_H_ diff --git a/gitstatus/src/scope_guard.h b/gitstatus/src/scope_guard.h new file mode 100644 index 00000000..3a7aa01b --- /dev/null +++ b/gitstatus/src/scope_guard.h @@ -0,0 +1,56 @@ +// Copyright 2019 Roman Perepelitsa. +// +// This file is part of GitStatus. +// +// GitStatus is free software: you can redistribute it and/or modify +// it under the terms of the GNU General Public License as published by +// the Free Software Foundation, either version 3 of the License, or +// (at your option) any later version. +// +// GitStatus is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License for more details. +// +// You should have received a copy of the GNU General Public License +// along with GitStatus. If not, see <https://www.gnu.org/licenses/>. + +#ifndef ROMKATV_GITSTATUS_SCOPE_GUARD_H_ +#define ROMKATV_GITSTATUS_SCOPE_GUARD_H_ + +#include <utility> + +#define ON_SCOPE_EXIT(capture...) \ + auto GITSTATUS_INTERNAL_CAT(_gitstatus_scope_guard_, __COUNTER__) = \ + ::gitstatus::internal_scope_guard::ScopeGuardGenerator() = [capture]() + +#define GITSTATUS_INTERNAL_CAT_I(x, y) x##y +#define GITSTATUS_INTERNAL_CAT(x, y) GITSTATUS_INTERNAL_CAT_I(x, y) + +namespace gitstatus { +namespace internal_scope_guard { + +void Undefined(); + +template <class F> +class ScopeGuard { + public: + explicit ScopeGuard(F f) : f_(std::move(f)) {} + ~ScopeGuard() { std::move(f_)(); } + ScopeGuard(ScopeGuard&& other) : f_(std::move(other.f_)) { Undefined(); } + + private: + F f_; +}; + +struct ScopeGuardGenerator { + template <class F> + ScopeGuard<F> operator=(F f) const { + return ScopeGuard<F>(std::move(f)); + } +}; + +} // namespace internal_scope_guard +} // namespace gitstatus + +#endif // ROMKATV_GITSTATUS_SCOPE_GUARD_H_ diff --git a/gitstatus/src/serialization.h b/gitstatus/src/serialization.h new file mode 100644 index 00000000..42b24098 --- /dev/null +++ b/gitstatus/src/serialization.h @@ -0,0 +1,28 @@ +// Copyright 2019 Roman Perepelitsa. +// +// This file is part of GitStatus. +// +// GitStatus is free software: you can redistribute it and/or modify +// it under the terms of the GNU General Public License as published by +// the Free Software Foundation, either version 3 of the License, or +// (at your option) any later version. +// +// GitStatus is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License for more details. +// +// You should have received a copy of the GNU General Public License +// along with GitStatus. If not, see <https://www.gnu.org/licenses/>. + +#ifndef ROMKATV_GITSTATUS_SERIALIZATION_H_ +#define ROMKATV_GITSTATUS_SERIALIZATION_H_ + +namespace gitstatus { + +constexpr char kFieldSep = 31; // ascii 31 is unit separator +constexpr char kMsgSep = 30; // ascii 30 is record separator + +} // namespace gitstatus + +#endif // ROMKATV_GITSTATUS_SERIALIZATION_H_ diff --git a/gitstatus/src/stat.h b/gitstatus/src/stat.h new file mode 100644 index 00000000..4f3195a7 --- /dev/null +++ b/gitstatus/src/stat.h @@ -0,0 +1,23 @@ +#ifndef ROMKATV_GITSTATUS_STAT_H_ +#define ROMKATV_GITSTATUS_STAT_H_ + +#include <sys/stat.h> + +namespace gitstatus { + +inline const struct timespec& MTim(const struct stat& s) { +#ifdef __APPLE__ + return s.st_mtimespec; +#else + return s.st_mtim; +#endif +} + +inline bool StatEq(const struct stat& x, const struct stat& y) { + return MTim(x).tv_sec == MTim(y).tv_sec && MTim(x).tv_nsec == MTim(y).tv_nsec && + x.st_size == y.st_size && x.st_ino == y.st_ino && x.st_mode == y.st_mode; +} + +} // namespace gitstatus + +#endif // ROMKATV_GITSTATUS_STAT_H_ diff --git a/gitstatus/src/string_cmp.h b/gitstatus/src/string_cmp.h new file mode 100644 index 00000000..621c724c --- /dev/null +++ b/gitstatus/src/string_cmp.h @@ -0,0 +1,151 @@ +// Copyright 2019 Roman Perepelitsa. +// +// This file is part of GitStatus. +// +// GitStatus is free software: you can redistribute it and/or modify +// it under the terms of the GNU General Public License as published by +// the Free Software Foundation, either version 3 of the License, or +// (at your option) any later version. +// +// GitStatus is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License for more details. +// +// You should have received a copy of the GNU General Public License +// along with GitStatus. If not, see <https://www.gnu.org/licenses/>. + +#ifndef ROMKATV_GITSTATUS_STRING_CMP_H_ +#define ROMKATV_GITSTATUS_STRING_CMP_H_ + +#include <string.h> // because there is no std::strcasecmp in C++ + +#include <algorithm> +#include <cctype> +#include <cstddef> +#include <cstring> + +#include "string_view.h" + +namespace gitstatus { + +// WARNING: These routines assume no embedded null characters in StringView. Violations cause UB. + +template <int kCaseSensitive = -1> +struct StrCmp; + +template <> +struct StrCmp<0> { + int operator()(StringView x, StringView y) const { + size_t n = std::min(x.len, y.len); + int cmp = strncasecmp(x.ptr, y.ptr, n); + if (cmp) return cmp; + return static_cast<ssize_t>(x.len) - static_cast<ssize_t>(y.len); + } + + int operator()(StringView x, const char* y) const { + for (const char *p = x.ptr, *e = p + x.len; p != e; ++p, ++y) { + if (int cmp = std::tolower(*p) - std::tolower(*y)) return cmp; + } + return 0 - *y; + } + + int operator()(char x, char y) const { return std::tolower(x) - std::tolower(y); } + int operator()(const char* x, const char* y) const { return strcasecmp(x, y); } + int operator()(const char* x, StringView y) const { return -operator()(y, x); } +}; + +template <> +struct StrCmp<1> { + int operator()(StringView x, StringView y) const { + size_t n = std::min(x.len, y.len); + int cmp = std::memcmp(x.ptr, y.ptr, n); + if (cmp) return cmp; + return static_cast<ssize_t>(x.len) - static_cast<ssize_t>(y.len); + } + + int operator()(StringView x, const char* y) const { + for (const char *p = x.ptr, *e = p + x.len; p != e; ++p, ++y) { + if (int cmp = *p - *y) return cmp; + } + return 0 - *y; + } + + int operator()(char x, char y) const { return x - y; } + int operator()(const char* x, const char* y) const { return std::strcmp(x, y); } + int operator()(const char* x, StringView y) const { return -operator()(y, x); } +}; + +template <> +struct StrCmp<-1> { + explicit StrCmp(bool case_sensitive) : case_sensitive(case_sensitive) {} + + template <class X, class Y> + int operator()(const X& x, const Y& y) const { + return case_sensitive ? StrCmp<1>()(x, y) : StrCmp<0>()(x, y); + } + + bool case_sensitive; +}; + +template <int kCaseSensitive = -1> +struct StrLt : private StrCmp<kCaseSensitive> { + using StrCmp<kCaseSensitive>::StrCmp; + + template <class X, class Y> + bool operator()(const X& x, const Y& y) const { + return StrCmp<kCaseSensitive>::operator()(x, y) < 0; + } +}; + +template <int kCaseSensitive = -1> +struct StrEq : private StrCmp<kCaseSensitive> { + using StrCmp<kCaseSensitive>::StrCmp; + + template <class X, class Y> + bool operator()(const X& x, const Y& y) const { + return StrCmp<kCaseSensitive>::operator()(x, y) == 0; + } + + bool operator()(const StringView& x, const StringView& y) const { + return x.len == y.len && StrCmp<kCaseSensitive>::operator()(x, y) == 0; + } +}; + +template <int kCaseSensitive = -1> +struct Str { + static_assert(kCaseSensitive == 0 || kCaseSensitive == 1, ""); + + static const bool case_sensitive = kCaseSensitive; + + StrCmp<kCaseSensitive> Cmp; + StrLt<kCaseSensitive> Lt; + StrEq<kCaseSensitive> Eq; +}; + +template <int kCaseSensitive> +const bool Str<kCaseSensitive>::case_sensitive; + +template <> +struct Str<-1> { + explicit Str(bool case_sensitive) + : case_sensitive(case_sensitive), + Cmp(case_sensitive), + Lt(case_sensitive), + Eq(case_sensitive) {} + + bool case_sensitive; + + StrCmp<-1> Cmp; + StrLt<-1> Lt; + StrEq<-1> Eq; +}; + +template <class Iter> +void StrSort(Iter begin, Iter end, bool case_sensitive) { + case_sensitive ? std::sort(begin, end, StrLt<true>()) : std::sort(begin, end, StrLt<false>()); +} + +} // namespace gitstatus + +#endif // ROMKATV_GITSTATUS_STRING_CMP_H_ diff --git a/gitstatus/src/string_view.h b/gitstatus/src/string_view.h new file mode 100644 index 00000000..e29414b5 --- /dev/null +++ b/gitstatus/src/string_view.h @@ -0,0 +1,77 @@ +// Copyright 2019 Roman Perepelitsa. +// +// This file is part of GitStatus. +// +// GitStatus is free software: you can redistribute it and/or modify +// it under the terms of the GNU General Public License as published by +// the Free Software Foundation, either version 3 of the License, or +// (at your option) any later version. +// +// GitStatus is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License for more details. +// +// You should have received a copy of the GNU General Public License +// along with GitStatus. If not, see <https://www.gnu.org/licenses/>. + +#ifndef ROMKATV_GITSTATUS_STRING_VIEW_H_ +#define ROMKATV_GITSTATUS_STRING_VIEW_H_ + +#include <algorithm> +#include <cstddef> +#include <cstring> +#include <ostream> +#include <string> + +namespace gitstatus { + +// WARNING: StringView must not have embedded null characters. Violations cause UB. +struct StringView { + StringView() : StringView("") {} + + // Requires: !memchr(s.data(), 0, s.size()). + // + // WARNING: The existence of this requirement and the fact that this constructor is implicit + // means it's dangerous to have std::string instances with embedded null characters anywhere + // in the program. If you have an std::string `s` with embedded nulls, an innocent-looking + // `F(s)` might perform an implicit conversion to StringView and land you squarely in the + // Undefined Behavior land. + StringView(const std::string& s) : StringView(s.c_str(), s.size()) {} + + // Requires: !memchr(ptr, 0, len). + StringView(const char* ptr, size_t len) : ptr(ptr), len(len) {} + + // Requires: end >= begin && !memchr(begin, 0, end - begin). + StringView(const char* begin, const char* end) : StringView(begin, end - begin) {} + + // Requires: strchr(s, 0) == s + N. + template <size_t N> + StringView(const char (&s)[N]) : StringView(s, N - 1) { + static_assert(N, ""); + } + + // Explicit because it's the only constructor that isn't O(1). + // Are you sure you don't already known the strings's length? + explicit StringView(const char* ptr) : StringView(ptr, ptr ? std::strlen(ptr) : 0) {} + + bool StartsWith(StringView prefix) const { + return len >= prefix.len && !std::memcmp(ptr, prefix.ptr, prefix.len); + } + + bool EndsWith(StringView suffix) const { + return len >= suffix.len && !std::memcmp(ptr + (len - suffix.len), suffix.ptr, suffix.len); + } + + const char* ptr; + size_t len; +}; + +inline std::ostream& operator<<(std::ostream& strm, StringView s) { + if (s.ptr) strm.write(s.ptr, s.len); + return strm; +} + +} // namespace gitstatus + +#endif // ROMKATV_GITSTATUS_STRING_VIEW_H_ diff --git a/gitstatus/src/strings.cc b/gitstatus/src/strings.cc new file mode 100644 index 00000000..a68835d7 --- /dev/null +++ b/gitstatus/src/strings.cc @@ -0,0 +1,71 @@ +// Copyright 2019 Roman Perepelitsa. +// +// This file is part of GitStatus. +// +// GitStatus is free software: you can redistribute it and/or modify +// it under the terms of the GNU General Public License as published by +// the Free Software Foundation, either version 3 of the License, or +// (at your option) any later version. +// +// GitStatus is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License for more details. +// +// You should have received a copy of the GNU General Public License +// along with GitStatus. If not, see <https://www.gnu.org/licenses/>. + +#include <cassert> + +#include "strings.h" + +namespace gitstatus { + +void CEscape(std::ostream& strm, const char* begin, const char* end) { + assert(!begin == !end); + if (!begin) return; + for (; begin != end; ++begin) { + const unsigned char c = *begin; + switch (c) { + case '\t': + strm << "\\t"; + continue; + case '\n': + strm << "\\n"; + continue; + case '\r': + strm << "\\r"; + continue; + case '"': + strm << "\\\""; + continue; + case '\'': + strm << "\\'"; + continue; + case '\\': + strm << "\\\\"; + continue; + } + if (c > 31 && c < 127) { + strm << c; + continue; + } + strm << '\\'; + strm << static_cast<char>('0' + ((c >> 6) & 7)); + strm << static_cast<char>('0' + ((c >> 3) & 7)); + strm << static_cast<char>('0' + ((c >> 0) & 7)); + } +} + +void Quote(std::ostream& strm, const char* begin, const char* end) { + assert(!begin == !end); + if (!begin) { + strm << "null"; + return; + } + strm << '"'; + CEscape(strm, begin, end); + strm << '"'; +} + +} // namespace gitstatus diff --git a/gitstatus/src/strings.h b/gitstatus/src/strings.h new file mode 100644 index 00000000..a57cf20d --- /dev/null +++ b/gitstatus/src/strings.h @@ -0,0 +1,37 @@ +// Copyright 2019 Roman Perepelitsa. +// +// This file is part of GitStatus. +// +// GitStatus is free software: you can redistribute it and/or modify +// it under the terms of the GNU General Public License as published by +// the Free Software Foundation, either version 3 of the License, or +// (at your option) any later version. +// +// GitStatus is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License for more details. +// +// You should have received a copy of the GNU General Public License +// along with GitStatus. If not, see <https://www.gnu.org/licenses/>. + +#ifndef ROMKATV_GITSTATUS_STRINGS_H_ +#define ROMKATV_GITSTATUS_STRINGS_H_ + +#include <ostream> + +namespace gitstatus { + +// If the pointers are null, prints nothing. +// +// Requires: !begin == !end. +void CEscape(std::ostream& strm, const char* begin, const char* end); + +// If the pointers are null, prints null without quotes. +// +// Requires: !begin == !end. +void Quote(std::ostream& strm, const char* begin, const char* end); + +} // namespace gitstatus + +#endif // ROMKATV_GITSTATUS_STRING_VIEW_H_ diff --git a/gitstatus/src/tag_db.cc b/gitstatus/src/tag_db.cc new file mode 100644 index 00000000..52cbaede --- /dev/null +++ b/gitstatus/src/tag_db.cc @@ -0,0 +1,332 @@ +// Copyright 2019 Roman Perepelitsa. +// +// This file is part of GitStatus. +// +// GitStatus is free software: you can redistribute it and/or modify +// it under the terms of the GNU General Public License as published by +// the Free Software Foundation, either version 3 of the License, or +// (at your option) any later version. +// +// GitStatus is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License for more details. +// +// You should have received a copy of the GNU General Public License +// along with GitStatus. If not, see <https://www.gnu.org/licenses/>. + +#include "tag_db.h" + +#include <fcntl.h> +#include <sys/stat.h> +#include <sys/types.h> +#include <unistd.h> + +#include <algorithm> +#include <cstdlib> +#include <cstring> +#include <iterator> +#include <utility> + +#include "check.h" +#include "dir.h" +#include "git.h" +#include "print.h" +#include "scope_guard.h" +#include "stat.h" +#include "string_cmp.h" +#include "thread_pool.h" +#include "timer.h" + +namespace gitstatus { + +namespace { + +using namespace std::string_literals; + +static constexpr char kTagPrefix[] = "refs/tags/"; + +constexpr int8_t kUnhex[256] = { + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0 + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 1 + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 2 + 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 0, 0, 0, 0, 0, 0, // 3 + 0, 10, 11, 12, 13, 14, 15, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 4 + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 5 + 0, 10, 11, 12, 13, 14, 15, 0, 0, 0, 0, 0, 0, 0, 0, 0 // 6 +}; + +struct { + bool operator()(const Tag* x, const git_oid& y) const { + return std::memcmp(x->id.id, y.id, GIT_OID_RAWSZ) < 0; + } + bool operator()(const git_oid& x, const Tag* y) const { + return std::memcmp(x.id, y->id.id, GIT_OID_RAWSZ) < 0; + } + bool operator()(const Tag* x, const Tag* y) const { + return std::memcmp(x->id.id, y->id.id, GIT_OID_RAWSZ) < 0; + } +} constexpr ById = {}; + +struct { + bool operator()(const Tag* x, const char* y) const { + return std::strcmp(x->name, y) < 0; + } + bool operator()(const char* x, const Tag* y) const { + return std::strcmp(x, y->name) < 0; + } + bool operator()(const Tag* x, const Tag* y) const { + return std::strcmp(x->name, y->name) < 0; + } +} constexpr ByName = {}; + +void ParseOid(unsigned char* oid, const char* begin, const char* end) { + VERIFY(end >= begin + GIT_OID_HEXSZ); + for (size_t i = 0; i != GIT_OID_HEXSZ; i += 2) { + *oid++ = kUnhex[+begin[i]] << 4 | kUnhex[+begin[i + 1]]; + } +} + +const char* StripTag(const char* ref) { + for (size_t i = 0; i != sizeof(kTagPrefix) - 1; ++i) { + if (*ref++ != kTagPrefix[i]) return nullptr; + } + return ref; +} + +git_refdb* RefDb(git_repository* repo) { + git_refdb* res; + VERIFY(!git_repository_refdb(&res, repo)) << GitError(); + return res; +} + +} // namespace + +TagDb::TagDb(git_repository* repo) + : repo_(repo), + refdb_(RefDb(repo)), + pack_(&pack_arena_), + name2id_(&pack_arena_), + id2name_(&pack_arena_) { + CHECK(repo_ && refdb_); +} + +TagDb::~TagDb() { + Wait(); + git_refdb_free(refdb_); +} + +std::string TagDb::TagForCommit(const git_oid& oid) { + ReadLooseTags(); + UpdatePack(); + + std::string res; + + std::string ref = "refs/tags/"; + size_t prefix_len = ref.size(); + for (const char* tag : loose_tags_) { + ref.resize(prefix_len); + ref += tag; + if (res < tag && TagHasTarget(ref.c_str(), &oid)) res = tag; + } + + if ((std::unique_lock<std::mutex>(mutex_), id2name_dirty_)) { + for (auto it = name2id_.rbegin(); it != name2id_.rend(); ++it) { + if (!memcmp((*it)->id.id, oid.id, GIT_OID_RAWSZ) && !IsLooseTag((*it)->name)) { + if (res < (*it)->name) res = (*it)->name; + break; + } + } + } else { + auto r = std::equal_range(id2name_.begin(), id2name_.end(), oid, ById); + for (auto it = r.first; it != r.second; ++it) { + if (!IsLooseTag((*it)->name) && res < (*it)->name) res = (*it)->name; + } + } + + return res; +} + +void TagDb::ReadLooseTags() { + loose_tags_.clear(); + loose_arena_.Reuse(); + + std::string dirname = git_repository_path(repo_) + "refs/tags"s; + int dir_fd = open(dirname.c_str(), O_RDONLY | O_DIRECTORY | O_CLOEXEC); + if (dir_fd < 0) return; + ON_SCOPE_EXIT(&) { CHECK(!close(dir_fd)) << Errno(); }; + // TODO: recursively traverse directories so that the file refs/tags/foo/bar gets interpreted + // as the tag foo/bar. See https://github.com/romkatv/gitstatus/issues/254. + (void)ListDir(dir_fd, loose_arena_, loose_tags_, /* precompose_unicode = */ false, + /* case_sensitive = */ true); +} + +void TagDb::UpdatePack() { + auto Reset = [&] { + auto Wipe = [](auto& x) { + x.clear(); + x.shrink_to_fit(); + }; + Wait(); + Wipe(pack_); + Wipe(name2id_); + Wipe(id2name_); + pack_arena_.Reuse(); + std::memset(&pack_stat_, 0, sizeof(pack_stat_)); + }; + + std::string pack_path = git_repository_path(repo_) + "packed-refs"s; + struct stat st; + if (stat(pack_path.c_str(), &st)) { + Reset(); + return; + } + if (StatEq(pack_stat_, st)) return; + + Reset(); + + try { + while (true) { + LOG(INFO) << "Parsing " << Print(pack_path); + int fd = open(pack_path.c_str(), O_RDONLY | O_CLOEXEC); + VERIFY(fd >= 0); + ON_SCOPE_EXIT(&) { CHECK(!close(fd)) << Errno(); }; + pack_.resize(st.st_size + 1); + ssize_t n = read(fd, &pack_[0], st.st_size + 1); + VERIFY(n >= 0) << Errno(); + VERIFY(!fstat(fd, &pack_stat_)) << Errno(); + if (!StatEq(st, pack_stat_)) { + st = pack_stat_; + continue; + } + VERIFY(n == st.st_size); + pack_.pop_back(); + break; + } + ParsePack(); + } catch (const Exception&) { + Reset(); + throw; + } +} + +void TagDb::ParsePack() { + char* p = &pack_[0]; + char* e = p + pack_.size(); + + // Usually packed-refs starts with the following line: + // + // # pack-refs with: peeled fully-peeled sorted + // + // However, some users can produce pack-refs without this line. + // See https://github.com/romkatv/powerlevel10k/issues/1428. + // I don't know how they do it. Without the header line we cannot + // assume that refs are sorted, which isn't a big deal because we + // can just sort them. What's worse is that refs cannot be assumed + // to be fully-peeled. We don't want to peel them, so we just drop + // all tags. + if (*p != '#') { + LOG(WARN) << "packed-refs doesn't have a header. Won't resolve tags."; + return; + } + + char* eol = std::strchr(p, '\n'); + if (!eol) return; + *eol = 0; + if (!std::strstr(p, " fully-peeled") || !std::strstr(p, " sorted")) { + LOG(WARN) << "packed-refs has unexpected header. Won't resolve tags."; + } + p = eol + 1; + + name2id_.reserve(pack_.size() / 128); + id2name_.reserve(pack_.size() / 128); + + std::vector<Tag*> idx; + idx.reserve(pack_.size() / 128); + + while (p != e) { + Tag* tag = pack_arena_.Allocate<Tag>(); + ParseOid(tag->id.id, p, e); + p += GIT_OID_HEXSZ; + VERIFY(*p++ == ' '); + const char* ref = p; + VERIFY(p = std::strchr(p, '\n')); + p[p[-1] == '\r' ? -1 : 0] = 0; + ++p; + if (*p == '^') { + ParseOid(tag->id.id, p + 1, e); + p += GIT_OID_HEXSZ + 1; + if (p != e) { + VERIFY((p = std::strchr(p, '\n'))); + ++p; + } + } + tag->name = StripTag(ref); + if (!tag->name) continue; + name2id_.push_back(tag); + id2name_.push_back(tag); + } + + if (!std::is_sorted(name2id_.begin(), name2id_.end(), ByName)) { + // "sorted" in the header of packed-refs promisses that this won't trigger. + std::sort(name2id_.begin(), name2id_.end(), ByName); + } + + id2name_dirty_ = true; + GlobalThreadPool()->Schedule([this] { + std::sort(id2name_.begin(), id2name_.end(), ById); + std::unique_lock<std::mutex> lock(mutex_); + CHECK(id2name_dirty_); + id2name_dirty_ = false; + cv_.notify_one(); + }); +} + +void TagDb::Wait() { + std::unique_lock<std::mutex> lock(mutex_); + while (id2name_dirty_) cv_.wait(lock); +} + +bool TagDb::IsLooseTag(const char* name) const { + return std::binary_search(loose_tags_.begin(), loose_tags_.end(), name, + [](const char* a, const char* b) { return std::strcmp(a, b) < 0; }); +} + +bool TagDb::TagHasTarget(const char* name, const git_oid* target) const { + static constexpr size_t kMaxDerefCount = 10; + + git_reference* ref; + if (git_refdb_lookup(&ref, refdb_, name)) return false; + ON_SCOPE_EXIT(&) { git_reference_free(ref); }; + + for (int i = 0; i != kMaxDerefCount && git_reference_type(ref) == GIT_REFERENCE_SYMBOLIC; ++i) { + git_reference* dst; + const char* ref_name = git_reference_name(ref); + if (git_refdb_lookup(&dst, refdb_, ref_name)) { + const char* tag_name = StripTag(ref_name); + auto it = std::lower_bound(name2id_.begin(), name2id_.end(), tag_name, ByName); + return it != name2id_.end() && !strcmp((*it)->name, tag_name) && !IsLooseTag(tag_name) && + git_oid_equal(&(*it)->id, target); + } + git_reference_free(ref); + ref = dst; + } + + if (git_reference_type(ref) == GIT_REFERENCE_SYMBOLIC) return false; + const git_oid* oid = git_reference_target_peel(ref) ?: git_reference_target(ref); + if (git_oid_equal(oid, target)) return true; + + for (int i = 0; i != kMaxDerefCount; ++i) { + git_tag* tag; + if (git_tag_lookup(&tag, repo_, oid)) return false; + ON_SCOPE_EXIT(&) { git_tag_free(tag); }; + if (git_tag_target_type(tag) == GIT_OBJECT_COMMIT) { + return git_oid_equal(git_tag_target_id(tag), target); + } + oid = git_tag_target_id(tag); + } + + return false; +} + +} // namespace gitstatus diff --git a/gitstatus/src/tag_db.h b/gitstatus/src/tag_db.h new file mode 100644 index 00000000..b5b14a48 --- /dev/null +++ b/gitstatus/src/tag_db.h @@ -0,0 +1,79 @@ +// Copyright 2019 Roman Perepelitsa. +// +// This file is part of GitStatus. +// +// GitStatus is free software: you can redistribute it and/or modify +// it under the terms of the GNU General Public License as published by +// the Free Software Foundation, either version 3 of the License, or +// (at your option) any later version. +// +// GitStatus is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License for more details. +// +// You should have received a copy of the GNU General Public License +// along with GitStatus. If not, see <https://www.gnu.org/licenses/>. + +#ifndef ROMKATV_GITSTATUS_TAG_DB_H_ +#define ROMKATV_GITSTATUS_TAG_DB_H_ + +#include <sys/stat.h> +#include <sys/types.h> +#include <unistd.h> + +#include <git2.h> + +#include <condition_variable> +#include <cstring> +#include <mutex> +#include <string> +#include <vector> + +#include "arena.h" + +namespace gitstatus { + +struct Tag { + const char* name; + git_oid id; +}; + +class TagDb { + public: + explicit TagDb(git_repository* repo); + TagDb(TagDb&&) = delete; + ~TagDb(); + + std::string TagForCommit(const git_oid& oid); + + private: + void ReadLooseTags(); + void UpdatePack(); + void ParsePack(); + void Wait(); + + bool IsLooseTag(const char* name) const; + + bool TagHasTarget(const char* name, const git_oid* target) const; + + git_repository* const repo_; + git_refdb* const refdb_; + + Arena pack_arena_; + struct stat pack_stat_ = {}; + WithArena<std::string> pack_; + WithArena<std::vector<const Tag*>> name2id_; + WithArena<std::vector<const Tag*>> id2name_; + + Arena loose_arena_; + std::vector<char*> loose_tags_; + + std::mutex mutex_; + std::condition_variable cv_; + bool id2name_dirty_ = false; +}; + +} // namespace gitstatus + +#endif // ROMKATV_GITSTATUS_TAG_DB_H_ diff --git a/gitstatus/src/thread_pool.cc b/gitstatus/src/thread_pool.cc new file mode 100644 index 00000000..b37eb203 --- /dev/null +++ b/gitstatus/src/thread_pool.cc @@ -0,0 +1,87 @@ +#include "thread_pool.h" + +#include <cassert> +#include <utility> + +#include "check.h" +#include "logging.h" + +namespace gitstatus { + +ThreadPool::ThreadPool(size_t num_threads) : num_inflight_(num_threads) { + for (size_t i = 0; i != num_threads; ++i) { + threads_.emplace_back([=]() { Loop(i + 1); }); + } +} + +ThreadPool::~ThreadPool() { + { + std::lock_guard<std::mutex> lock(mutex_); + exit_ = true; + } + cv_.notify_all(); + sleeper_cv_.notify_one(); + for (std::thread& t : threads_) t.join(); +} + +void ThreadPool::Schedule(Time t, std::function<void()> f) { + std::condition_variable* wake = nullptr; + { + std::unique_lock<std::mutex> lock(mutex_); + work_.push(Work{std::move(t), ++last_idx_, std::move(f)}); + if (work_.top().idx == last_idx_) wake = have_sleeper_ ? &sleeper_cv_ : &cv_; + } + if (wake) wake->notify_one(); +} + +void ThreadPool::Loop(size_t tid) { + auto Next = [&]() -> std::function<void()> { + std::unique_lock<std::mutex> lock(mutex_); + --num_inflight_; + if (work_.empty() && num_inflight_ == 0) idle_cv_.notify_all(); + while (true) { + if (exit_) return nullptr; + if (work_.empty()) { + cv_.wait(lock); + continue; + } + Time now = Clock::now(); + const Work& top = work_.top(); + if (top.t <= now) { + std::function<void()> res = std::move(top.f); + work_.pop(); + ++num_inflight_; + bool notify = !work_.empty() && !have_sleeper_; + lock.unlock(); + if (notify) cv_.notify_one(); + return res; + } + if (have_sleeper_) { + cv_.wait(lock); + continue; + } + have_sleeper_ = true; + sleeper_cv_.wait_until(lock, top.t); + assert(have_sleeper_); + have_sleeper_ = false; + } + }; + while (std::function<void()> f = Next()) f(); +} + +void ThreadPool::Wait() { + std::unique_lock<std::mutex> lock(mutex_); + idle_cv_.wait(lock, [&] { return work_.empty() && num_inflight_ == 0; }); +} + +static ThreadPool* g_thread_pool = nullptr; + +void InitGlobalThreadPool(size_t num_threads) { + CHECK(!g_thread_pool); + LOG(INFO) << "Spawning " << num_threads << " thread(s)"; + g_thread_pool = new ThreadPool(num_threads); +} + +ThreadPool* GlobalThreadPool() { return g_thread_pool; } + +} // namespace gitstatus diff --git a/gitstatus/src/thread_pool.h b/gitstatus/src/thread_pool.h new file mode 100644 index 00000000..1e39b915 --- /dev/null +++ b/gitstatus/src/thread_pool.h @@ -0,0 +1,74 @@ +#ifndef ROMKATV_GITSTATUS_THREAD_POOL_H_ +#define ROMKATV_GITSTATUS_THREAD_POOL_H_ + +#include <condition_variable> +#include <cstddef> +#include <cstdint> +#include <functional> +#include <mutex> +#include <queue> +#include <thread> +#include <tuple> +#include <utility> + +#include "time.h" + +namespace gitstatus { + +class ThreadPool { + public: + explicit ThreadPool(size_t num_threads); + ThreadPool(ThreadPool&&) = delete; + + // Waits for the currently running functions to finish. + // Does NOT wait for the queue of functions to drain. + // If you want the latter, call Wait() manually. + ~ThreadPool(); + + // Runs `f` on one of the threads at or after time `t`. Can be called + // from any thread. Can be called concurrently. + // + // Does not block. + void Schedule(Time t, std::function<void()> f); + + void Schedule(std::function<void()> f) { Schedule(Clock::now(), std::move(f)); } + + // Blocks until the work queue is empty and there are no currently + // running functions. + void Wait(); + + size_t num_threads() const { return threads_.size(); } + + private: + struct Work { + bool operator<(const Work& w) const { return std::tie(w.t, w.idx) < std::tie(t, idx); } + Time t; + int64_t idx; + mutable std::function<void()> f; + }; + + void Loop(size_t tid); + + int64_t last_idx_ = 0; + int64_t num_inflight_; + bool exit_ = false; + // Do we have a thread waiting on sleeper_cv_? + bool have_sleeper_ = false; + std::mutex mutex_; + // Any number of threads can wait on this condvar. Always without a timeout. + std::condition_variable cv_; + // At most one thread can wait on this condvar at a time. Always with a timeout. + std::condition_variable sleeper_cv_; + // Signalled when the work queue is empty and there is nothing inflight. + std::condition_variable idle_cv_; + std::priority_queue<Work> work_; + std::vector<std::thread> threads_; +}; + +void InitGlobalThreadPool(size_t num_threads); + +ThreadPool* GlobalThreadPool(); + +} // namespace gitstatus + +#endif // ROMKATV_GITSTATUS_THREAD_POOL_H_ diff --git a/gitstatus/src/time.h b/gitstatus/src/time.h new file mode 100644 index 00000000..cdd5fa27 --- /dev/null +++ b/gitstatus/src/time.h @@ -0,0 +1,14 @@ +#ifndef ROMKATV_GITSTATUS_TIME_H_ +#define ROMKATV_GITSTATUS_TIME_H_ + +#include <chrono> + +namespace gitstatus { + +using Clock = std::chrono::steady_clock; +using Time = Clock::time_point; +using Duration = Clock::duration; + +} // namespace gitstatus + +#endif // ROMKATV_GITSTATUS_TIME_H_ diff --git a/gitstatus/src/timer.cc b/gitstatus/src/timer.cc new file mode 100644 index 00000000..0e9f64e9 --- /dev/null +++ b/gitstatus/src/timer.cc @@ -0,0 +1,72 @@ +// Copyright 2019 Roman Perepelitsa. +// +// This file is part of GitStatus. +// +// GitStatus is free software: you can redistribute it and/or modify +// it under the terms of the GNU General Public License as published by +// the Free Software Foundation, either version 3 of the License, or +// (at your option) any later version. +// +// GitStatus is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License for more details. +// +// You should have received a copy of the GNU General Public License +// along with GitStatus. If not, see <https://www.gnu.org/licenses/>. + +#include "timer.h" + +#include <sys/resource.h> +#include <sys/time.h> +#include <time.h> + +#include <cmath> +#include <limits> + +#include "check.h" +#include "logging.h" + +namespace gitstatus { + +namespace { + +double CpuTimeMs() { + auto ToMs = [](const timeval& tv) { return 1e3 * tv.tv_sec + 1e-3 * tv.tv_usec; }; + rusage usage = {}; + CHECK(getrusage(RUSAGE_SELF, &usage) == 0) << Errno(); + return ToMs(usage.ru_utime) + ToMs(usage.ru_stime); +} + +double WallTimeMs() { + // An attempt to call clock_gettime on an ancient version of MacOS fails at runtime. + // It's possible to detect the presence of clock_gettime at runtime but I don't have + // an ancient MacOS to test the code. Hence this. +#ifdef __APPLE__ + return std::numeric_limits<double>::quiet_NaN(); +#else + struct timespec ts; + clock_gettime(CLOCK_MONOTONIC, &ts); + return 1e3 * ts.tv_sec + 1e-6 * ts.tv_nsec; +#endif +} + +} // namespace + +void Timer::Start() { + cpu_ = CpuTimeMs(); + wall_ = WallTimeMs(); +} + +void Timer::Report(const char* msg) { + double cpu = CpuTimeMs() - cpu_; + if (std::isnan(wall_)) { + LOG(INFO) << "Timing for: " << msg << ": " << cpu << "ms cpu"; + } else { + double wall = WallTimeMs() - wall_; + LOG(INFO) << "Timing for: " << msg << ": " << cpu << "ms cpu, " << wall << "ms wall"; + } + Start(); +} + +} // namespace gitstatus diff --git a/gitstatus/src/timer.h b/gitstatus/src/timer.h new file mode 100644 index 00000000..51c557ca --- /dev/null +++ b/gitstatus/src/timer.h @@ -0,0 +1,36 @@ +// Copyright 2019 Roman Perepelitsa. +// +// This file is part of GitStatus. +// +// GitStatus is free software: you can redistribute it and/or modify +// it under the terms of the GNU General Public License as published by +// the Free Software Foundation, either version 3 of the License, or +// (at your option) any later version. +// +// GitStatus is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License for more details. +// +// You should have received a copy of the GNU General Public License +// along with GitStatus. If not, see <https://www.gnu.org/licenses/>. + +#ifndef ROMKATV_GITSTATUS_TIMER_H_ +#define ROMKATV_GITSTATUS_TIMER_H_ + +namespace gitstatus { + +class Timer { + public: + Timer() { Start(); } + void Start(); + void Report(const char* msg); + + private: + double cpu_; + double wall_; +}; + +} // namespace gitstatus + +#endif // ROMKATV_GITSTATUS_TIMER_H_ diff --git a/gitstatus/src/tribool.h b/gitstatus/src/tribool.h new file mode 100644 index 00000000..f06daf30 --- /dev/null +++ b/gitstatus/src/tribool.h @@ -0,0 +1,27 @@ +// Copyright 2019 Roman Perepelitsa. +// +// This file is part of GitStatus. +// +// GitStatus is free software: you can redistribute it and/or modify +// it under the terms of the GNU General Public License as published by +// the Free Software Foundation, either version 3 of the License, or +// (at your option) any later version. +// +// GitStatus is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License for more details. +// +// You should have received a copy of the GNU General Public License +// along with GitStatus. If not, see <https://www.gnu.org/licenses/>. + +#ifndef ROMKATV_GITSTATUS_TRIBOOL_H_ +#define ROMKATV_GITSTATUS_TRIBOOL_H_ + +namespace gitstatus { + +enum class Tribool : int { kFalse = 0, kTrue = 1, kUnknown = -1 }; + +} // namespace gitstatus + +#endif // ROMKATV_GITSTATUS_TRIBOOL_H_ |