diff options
author | romkatv <roman.perepelitsa@gmail.com> | 2020-05-10 16:58:05 +0300 |
---|---|---|
committer | romkatv <roman.perepelitsa@gmail.com> | 2020-05-10 16:58:05 +0300 |
commit | 1531d6e5439daae01627b2645684876b75eaf5eb (patch) | |
tree | 05ed820f16dbb04be4b4b8ec3055f93431518773 /src/repo.cc |
Squashed 'gitstatus/' content from commit 6b9ba17
git-subtree-dir: gitstatus
git-subtree-split: 6b9ba179c6655286c4c399e7926d5098dd6bd706
Diffstat (limited to 'src/repo.cc')
-rw-r--r-- | src/repo.cc | 503 |
1 files changed, 503 insertions, 0 deletions
diff --git a/src/repo.cc b/src/repo.cc new file mode 100644 index 00000000..d7ea7d3e --- /dev/null +++ b/src/repo.cc @@ -0,0 +1,503 @@ +// Copyright 2019 Roman Perepelitsa. +// +// This file is part of GitStatus. +// +// GitStatus is free software: you can redistribute it and/or modify +// it under the terms of the GNU General Public License as published by +// the Free Software Foundation, either version 3 of the License, or +// (at your option) any later version. +// +// GitStatus is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License for more details. +// +// You should have received a copy of the GNU General Public License +// along with GitStatus. If not, see <https://www.gnu.org/licenses/>. + +#include "repo.h" + +#include <fcntl.h> +#include <sys/stat.h> +#include <sys/types.h> +#include <unistd.h> + +#include <algorithm> +#include <atomic> +#include <cstdlib> +#include <cstring> +#include <exception> +#include <iterator> +#include <memory> +#include <type_traits> +#include <utility> + +#include "arena.h" +#include "check.h" +#include "check_dir_mtime.h" +#include "dir.h" +#include "git.h" +#include "print.h" +#include "scope_guard.h" +#include "stat.h" +#include "string_cmp.h" +#include "thread_pool.h" +#include "timer.h" + +namespace gitstatus { + +namespace { + +using namespace std::string_literals; + +template <class T> +T Load(const std::atomic<T>& x) { + return x.load(std::memory_order_relaxed); +} + +template <class T> +void Store(std::atomic<T>& x, T v) { + x.store(v, std::memory_order_relaxed); +} + +template <class T> +T Inc(std::atomic<T>& x, T by = 1) { + return x.fetch_add(by, std::memory_order_relaxed); +} + +template <class T> +T Dec(std::atomic<T>& x) { + return x.fetch_sub(1, std::memory_order_relaxed); +} + +template <class T> +T Exchange(std::atomic<T>& x, T v) { + return x.exchange(v, std::memory_order_relaxed); +} + +const char* DeltaStr(git_delta_t t) { + switch (t) { + case GIT_DELTA_UNMODIFIED: return "unmodified"; + case GIT_DELTA_ADDED: return "added"; + case GIT_DELTA_DELETED: return "deleted"; + case GIT_DELTA_MODIFIED: return "modified"; + case GIT_DELTA_RENAMED: return "renamed"; + case GIT_DELTA_COPIED: return "copied"; + case GIT_DELTA_IGNORED: return "ignored"; + case GIT_DELTA_UNTRACKED: return "untracked"; + case GIT_DELTA_TYPECHANGE: return "typechange"; + case GIT_DELTA_UNREADABLE: return "unreadable"; + case GIT_DELTA_CONFLICTED: return "conflicted"; + } + return "unknown"; +} + +} // namespace + +bool Repo::Shard::Contains(Str<> str, StringView path) const { + if (str.Lt(path, start_s)) return false; + if (end_s.empty()) return true; + path.len = std::min(path.len, end_s.size()); + return !str.Lt(end_s, path); +} + +Repo::Repo(git_repository* repo, Limits lim) : lim_(std::move(lim)), repo_(repo), tag_db_(repo) { + if (lim_.max_num_untracked) { + GlobalThreadPool()->Schedule([this] { + bool check = CheckDirMtime(git_repository_path(repo_)); + std::unique_lock<std::mutex> lock(mutex_); + CHECK(Load(untracked_cache_) == Tribool::kUnknown); + Store(untracked_cache_, check ? Tribool::kTrue : Tribool::kFalse); + cv_.notify_one(); + }); + } else { + untracked_cache_ = Tribool::kFalse; + } +} + +Repo::~Repo() { + { + std::unique_lock<std::mutex> lock(mutex_); + while (untracked_cache_ == Tribool::kUnknown) cv_.wait(lock); + } + if (git_index_) git_index_free(git_index_); + git_repository_free(repo_); +} + +IndexStats Repo::GetIndexStats(const git_oid* head, git_config* cfg) { + ON_SCOPE_EXIT(this, orig_lim = lim_) { lim_ = orig_lim; }; + auto Off = [&](const char* name) { + int val; + if (git_config_get_bool(&val, cfg, name) || val) return false; + LOG(INFO) << "Honoring git config option: " << name << " = false"; + return true; + }; + if (!lim_.ignore_status_show_untracked_files && Off("status.showUntrackedFiles")) { + lim_.max_num_untracked = 0; + } + if (!lim_.ignore_bash_show_untracked_files && Off("bash.showUntrackedFiles")) { + lim_.max_num_untracked = 0; + } + if (!lim_.ignore_bash_show_dirty_state && Off("bash.showDirtyState")) { + lim_.max_num_staged = 0; + lim_.max_num_unstaged = 0; + lim_.max_num_conflicted = 0; + } + + if (git_index_) { + int new_index; + VERIFY(!git_index_read_ex(git_index_, 0, &new_index)) << GitError(); + if (new_index) { + head_ = {}; + index_.reset(); + } + } else { + VERIFY(!git_repository_index(&git_index_, repo_)) << GitError(); + // Query an attribute (doesn't matter which) to initialize repo's attribute + // cache. It's a workaround for synchronization bugs (data races) in libgit2 + // that result from lazy cache initialization without synchrnonization. + // Thankfully, subsequent cache reads and writes are properly synchronized. + const char* attr; + VERIFY(!git_attr_get(&attr, repo_, 0, "x", "x")) << GitError(); + } + + UpdateShards(); + Store(error_, false); + Store(unstaged_, {}); + Store(untracked_, {}); + Store(unstaged_deleted_, {}); + + std::vector<const char*> dirty_candidates; + const size_t index_size = git_index_entrycount(git_index_); + + if (!lim_.max_num_staged && !lim_.max_num_conflicted) { + head_ = {}; + Store(staged_, {}); + Store(conflicted_, {}); + Store(staged_new_, {}); + Store(staged_deleted_, {}); + Store(skip_worktree_, {}); + Store(assume_unchanged_, {}); + } else if (head) { + if (git_oid_equal(head, &head_)) { + LOG(INFO) << "Index and HEAD unchanged; staged = " << Load(staged_) + << ", conflicted = " << Load(conflicted_); + } else { + head_ = *head; + Store(staged_, {}); + Store(conflicted_, {}); + Store(staged_new_, {}); + Store(staged_deleted_, {}); + Store(skip_worktree_, {}); + Store(assume_unchanged_, {}); + StartStagedScan(head); + } + } else { + head_ = {}; + size_t staged = 0; + size_t skip_worktree = 0; + size_t assume_unchanged = 0; + for (size_t i = 0; i != index_size; ++i) { + const git_index_entry* entry = git_index_get_byindex_no_sort(git_index_, i); + if (!(entry->flags_extended & GIT_INDEX_ENTRY_INTENT_TO_ADD)) ++staged; + if (entry->flags_extended & GIT_INDEX_ENTRY_SKIP_WORKTREE) ++skip_worktree; + if (entry->flags & GIT_INDEX_ENTRY_VALID) ++assume_unchanged; + } + Store(staged_, staged); + Store(conflicted_, {}); + Store(staged_new_, staged); + Store(staged_deleted_, {}); + Store(skip_worktree_, skip_worktree); + Store(assume_unchanged_, assume_unchanged); + } + + if (index_size <= lim_.dirty_max_index_size && + (lim_.max_num_unstaged || lim_.max_num_untracked)) { + if (!index_) index_ = std::make_unique<Index>(repo_, git_index_); + dirty_candidates = index_->GetDirtyCandidates({.include_untracked = lim_.max_num_untracked > 0, + .untracked_cache = Load(untracked_cache_)}); + if (dirty_candidates.empty()) { + LOG(INFO) << "Clean repo: no dirty candidates"; + } else { + LOG(INFO) << "Found " << dirty_candidates.size() << " dirty candidate(s) spanning from " + << Print(dirty_candidates.front()) << " to " << Print(dirty_candidates.back()); + } + StartDirtyScan(dirty_candidates); + } + + Wait(); + VERIFY(!Load(error_)); + + size_t num_staged = std::min(Load(staged_), lim_.max_num_staged); + size_t num_unstaged = std::min(Load(unstaged_), lim_.max_num_unstaged); + return {.index_size = index_size, + .num_staged = num_staged, + .num_unstaged = num_unstaged, + .num_conflicted = std::min(Load(conflicted_), lim_.max_num_conflicted), + .num_untracked = std::min(Load(untracked_), lim_.max_num_untracked), + .num_staged_new = std::min(Load(staged_new_), num_staged), + .num_staged_deleted = std::min(Load(staged_deleted_), num_staged), + .num_unstaged_deleted = std::min(Load(unstaged_deleted_), num_unstaged), + .num_skip_worktree = Load(skip_worktree_), + .num_assume_unchanged = Load(assume_unchanged_)}; +} + +int Repo::OnDelta(const char* type, const git_diff_delta& d, std::atomic<size_t>& c1, size_t m1, + const std::atomic<size_t>& c2, size_t m2) { + auto Msg = [&]() { + const char* status = DeltaStr(d.status); + std::ostringstream strm; + strm << "Found " << type << " file"; + if (strcmp(status, type)) strm << " (" << status << ")"; + strm << ": " << Print(d.new_file.path); + return strm.str(); + }; + + size_t v = Inc(c1); + if (v) { + LOG(DEBUG) << Msg(); + } else { + LOG(INFO) << Msg(); + } + if (v + 1 < m1) return GIT_DIFF_DELTA_DO_NOT_INSERT; + if (Load(c2) < m2) return GIT_DIFF_DELTA_DO_NOT_INSERT | GIT_DIFF_DELTA_SKIP_TYPE; + return GIT_EUSER; +} + +void Repo::StartDirtyScan(const std::vector<const char*>& paths) { + if (paths.empty()) return; + + git_diff_options opt = GIT_DIFF_OPTIONS_INIT; + opt.payload = this; + opt.flags = GIT_DIFF_INCLUDE_TYPECHANGE_TREES | GIT_DIFF_SKIP_BINARY_CHECK | + GIT_DIFF_DISABLE_PATHSPEC_MATCH | GIT_DIFF_EXEMPLARS; + if (lim_.max_num_untracked) { + opt.flags |= GIT_DIFF_INCLUDE_UNTRACKED; + if (lim_.recurse_untracked_dirs) opt.flags |= GIT_DIFF_RECURSE_UNTRACKED_DIRS; + } else { + opt.flags |= GIT_DIFF_ENABLE_FAST_UNTRACKED_DIRS; + } + opt.ignore_submodules = GIT_SUBMODULE_IGNORE_DIRTY; + opt.notify_cb = +[](const git_diff* diff, const git_diff_delta* delta, + const char* matched_pathspec, void* payload) -> int { + if (delta->status == GIT_DELTA_CONFLICTED) return GIT_DIFF_DELTA_DO_NOT_INSERT; + Repo* repo = static_cast<Repo*>(payload); + if (Load(repo->error_)) return GIT_EUSER; + if (delta->status == GIT_DELTA_UNTRACKED) { + return repo->OnDelta("untracked", *delta, repo->untracked_, repo->lim_.max_num_untracked, + repo->unstaged_, repo->lim_.max_num_unstaged); + } else { + if (delta->status == GIT_DELTA_DELETED) Inc(repo->unstaged_deleted_); + return repo->OnDelta("unstaged", *delta, repo->unstaged_, repo->lim_.max_num_unstaged, + repo->untracked_, repo->lim_.max_num_untracked); + } + }; + + const Str<> str(git_index_is_case_sensitive(git_index_)); + auto shard = shards_.begin(); + for (auto p = paths.begin(); p != paths.end();) { + opt.range_start = *p; + opt.range_end = *p; + opt.pathspec.strings = const_cast<char**>(&*p); + opt.pathspec.count = 1; + while (!shard->Contains(str, StringView(*p))) ++shard; + while (++p != paths.end() && shard->Contains(str, StringView(*p))) { + opt.range_end = *p; + ++opt.pathspec.count; + } + RunAsync([this, opt]() { + git_diff* diff = nullptr; + LOG(DEBUG) << "git_diff_index_to_workdir from " << Print(opt.range_start) << " to " + << Print(opt.range_end); + switch (git_diff_index_to_workdir(&diff, repo_, git_index_, &opt)) { + case 0: + git_diff_free(diff); + break; + case GIT_EUSER: + break; + default: + LOG(ERROR) << "git_diff_index_to_workdir: " << GitError(); + throw Exception(); + } + }); + } +} + +void Repo::StartStagedScan(const git_oid* head) { + git_commit* commit = nullptr; + VERIFY(!git_commit_lookup(&commit, repo_, head)) << GitError(); + ON_SCOPE_EXIT(=) { git_commit_free(commit); }; + git_tree* tree = nullptr; + VERIFY(!git_commit_tree(&tree, commit)) << GitError(); + + git_diff_options opt = GIT_DIFF_OPTIONS_INIT; + opt.flags = GIT_DIFF_EXEMPLARS | GIT_DIFF_INCLUDE_TYPECHANGE_TREES; + opt.payload = this; + opt.notify_cb = +[](const git_diff* diff, const git_diff_delta* delta, + const char* matched_pathspec, void* payload) -> int { + Repo* repo = static_cast<Repo*>(payload); + if (Load(repo->error_)) return GIT_EUSER; + if (delta->status == GIT_DELTA_CONFLICTED) { + return repo->OnDelta("conflicted", *delta, repo->conflicted_, repo->lim_.max_num_conflicted, + repo->staged_, repo->lim_.max_num_staged); + } else { + if (delta->status == GIT_DELTA_ADDED) Inc(repo->staged_new_); + if (delta->status == GIT_DELTA_DELETED) Inc(repo->staged_deleted_); + return repo->OnDelta("staged", *delta, repo->staged_, repo->lim_.max_num_staged, + repo->conflicted_, repo->lim_.max_num_conflicted); + } + }; + + for (const Shard& shard : shards_) { + RunAsync([this, tree, opt, shard]() mutable { + size_t skip_worktree = 0; + size_t assume_unchanged = 0; + for (size_t i = shard.start_i; i != shard.end_i; ++i) { + const git_index_entry* entry = git_index_get_byindex_no_sort(git_index_, i); + if (entry->flags_extended & GIT_INDEX_ENTRY_SKIP_WORKTREE) ++skip_worktree; + if (entry->flags & GIT_INDEX_ENTRY_VALID) ++assume_unchanged; + } + Inc(skip_worktree_, skip_worktree); + Inc(assume_unchanged_, assume_unchanged); + opt.range_start = shard.start_s.c_str(); + opt.range_end = shard.end_s.c_str(); + git_diff* diff = nullptr; + LOG(DEBUG) << "git_diff_tree_to_index from " << Print(opt.range_start) << " to " + << Print(opt.range_end); + switch (git_diff_tree_to_index(&diff, repo_, tree, git_index_, &opt)) { + case 0: + git_diff_free(diff); + break; + case GIT_EUSER: + break; + default: + LOG(ERROR) << "git_diff_tree_to_index: " << GitError(); + throw Exception(); + } + }); + } +} + +void Repo::UpdateShards() { + constexpr size_t kEntriesPerShard = 512; + + const Str<> str(git_index_is_case_sensitive(git_index_)); + size_t index_size = git_index_entrycount(git_index_); + ON_SCOPE_EXIT(&) { + LOG(INFO) << "Splitting " << index_size << " object(s) into " << shards_.size() << " shard(s)"; + }; + + if (index_size <= kEntriesPerShard || GlobalThreadPool()->num_threads() < 2) { + shards_ = {{ + .start_s = "", + .end_s = "", + .start_i = 0, + .end_i = index_size}}; + return; + } + + size_t shards = + std::min(index_size / kEntriesPerShard + 1, 2 * GlobalThreadPool()->num_threads()); + shards_.clear(); + shards_.reserve(shards); + std::string last_s; + size_t last_i = 0; + + for (size_t i = 0; i != shards - 1; ++i) { + size_t idx = (i + 1) * index_size / shards; + std::string split = git_index_get_byindex_no_sort(git_index_, idx)->path; + auto pos = split.find_last_of('/'); + if (pos == std::string::npos) continue; + split = split.substr(0, pos + 1); + Shard shard; + shard.end_s = split; + --shard.end_s.back(); + if (!str.Lt(last_s, shard.end_s)) continue; + shard.start_s = std::move(last_s); + last_s = std::move(split); + shard.start_i = last_i; + shard.end_i = idx; + last_i = idx; + shards_.push_back(std::move(shard)); + } + shards_.push_back({ + .start_s = std::move(last_s), + .end_s = "", + .start_i = last_i, + .end_i = index_size}); + + CHECK(!shards_.empty()); + CHECK(shards_.size() <= shards); + CHECK(shards_.front().start_s.empty()); + CHECK(shards_.front().start_i == 0); + CHECK(shards_.back().end_s.empty()); + CHECK(shards_.back().end_i == index_size); + for (size_t i = 0; i != shards_.size(); ++i) { + if (i) { + const git_index_entry* entry = git_index_get_byindex_no_sort(git_index_, shards_[i].start_i); + CHECK(!std::memcmp(shards_[i].start_s.c_str(), entry->path, shards_[i].start_s.size())); + CHECK(str.Lt(shards_[i - 1].end_s, shards_[i].start_s)); + CHECK(shards_[i - 1].end_i == shards_[i].start_i); + } + if (i != shards_.size() - 1) { + CHECK(shards_[i].start_i < shards_[i].end_i); + CHECK(str.Lt(shards_[i].start_s, shards_[i].end_s)); + } + } +} + +void Repo::DecInflight() { + std::unique_lock<std::mutex> lock(mutex_); + CHECK(Load(inflight_) > 0); + if (Dec(inflight_) == 1) cv_.notify_one(); +} + +void Repo::RunAsync(std::function<void()> f) { + Inc(inflight_); + try { + GlobalThreadPool()->Schedule([this, f = std::move(f)] { + try { + ON_SCOPE_EXIT(&) { DecInflight(); }; + f(); + } catch (const Exception&) { + if (!Load(error_)) { + std::unique_lock<std::mutex> lock(mutex_); + if (!Load(error_)) { + Store(error_, true); + cv_.notify_one(); + } + } + } + }); + } catch (...) { + DecInflight(); + throw; + } +} + +void Repo::Wait() { + std::unique_lock<std::mutex> lock(mutex_); + while (inflight_) cv_.wait(lock); +} + +std::future<std::string> Repo::GetTagName(const git_oid* target) { + auto* promise = new std::promise<std::string>; + std::future<std::string> res = promise->get_future(); + + GlobalThreadPool()->Schedule([=] { + ON_SCOPE_EXIT(&) { delete promise; }; + if (!target) { + promise->set_value(""); + return; + } + try { + promise->set_value(tag_db_.TagForCommit(*target)); + } catch (const Exception&) { + promise->set_exception(std::current_exception()); + } + }); + + return res; +} + +} // namespace gitstatus |